1 // helper program is in ~me/encodings.d to make more tables from wikipedia 2 3 /** 4 This is meant to help get data from the wild into utf8 strings 5 so you can work with them easily inside D. 6 7 The main function is convertToUtf8(), which takes a byte array 8 of your raw data (a byte array because it isn't really a D string 9 yet until it is utf8), and a runtime string telling it's current 10 encoding. 11 12 The current encoding argument is meant to come from the data's 13 metadata, and is flexible on exact format - it is case insensitive 14 and takes several variations on the names. 15 16 This way, you should be able to send it the encoding string directly 17 from an XML document, a HTTP header, or whatever you have, and it 18 ought to just work. 19 20 Example: 21 auto data = cast(immutable(ubyte)[]) 22 std.file.read("my-windows-file.txt"); 23 string utf8String = convertToUtf8(data, "windows-1252"); 24 // utf8String can now be used 25 26 27 The encodings currently implemented for decoding are: 28 UTF-8 (a no-op; it simply casts the array to string) 29 UTF-16, 30 UTF-32, 31 Windows-1252, 32 ISO 8859 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, and 16. 33 34 It treats ISO 8859-1, Latin-1, and Windows-1252 the same way, since 35 those labels are pretty much de-facto the same thing in wild documents. 36 37 38 This module currently makes no attempt to look at control characters. 39 */ 40 module arsd.characterencodings; 41 42 import std.string; 43 import std.array; 44 import std.conv; 45 46 /// Like convertToUtf8, but if the encoding is unknown, it just strips all chars > 127 and calls it done instead of throwing 47 string convertToUtf8Lossy(immutable(ubyte)[] data, string dataCharacterEncoding) { 48 try { 49 auto ret = convertToUtf8(data, dataCharacterEncoding); 50 import std.utf; 51 validate(ret); 52 return ret; 53 } catch(Exception e) { 54 string ret; 55 foreach(b; data) 56 if(b < 128) 57 ret ~= b; 58 return ret; 59 } 60 } 61 62 /// Takes data from a given character encoding and returns it as UTF-8 63 string convertToUtf8(immutable(ubyte)[] data, string dataCharacterEncoding) { 64 // just to normalize the passed string... 65 auto encoding = dataCharacterEncoding.toLower(); 66 encoding = encoding.replace(" ", ""); 67 encoding = encoding.replace("-", ""); 68 encoding = encoding.replace("_", ""); 69 // should be good enough. 70 71 switch(encoding) { 72 default: 73 throw new Exception("I don't know how to convert " ~ dataCharacterEncoding ~ " to UTF-8"); 74 // since the input is immutable, these are ok too. 75 // just want to cover all the bases with one runtime function. 76 case "utf16": 77 case "utf16le": 78 return to!string(cast(wstring) data); 79 case "utf32": 80 case "utf32le": 81 return to!string(cast(dstring) data); 82 // FIXME: does the big endian to little endian conversion work? 83 case "ascii": 84 case "usascii": // utf-8 is a superset of ascii 85 case "utf8": 86 return cast(string) data; 87 // and now the various 8 bit encodings we support. 88 case "windows1252": 89 return decodeImpl(data, ISO_8859_1, Windows_1252); 90 case "windows1251": 91 return decodeImpl(data, Windows_1251, Windows_1251_Lower); 92 case "koi8r": 93 return decodeImpl(data, KOI8_R, KOI8_R_Lower); 94 case "latin1": 95 case "iso88591": 96 // Why am I putting Windows_1252 here? A lot of 97 // stuff in the wild is mislabeled, so this will 98 // do some good in the Just Works department. 99 // Regardless, I don't handle the 100 // control char set in that zone anyway right now. 101 return decodeImpl(data, ISO_8859_1, Windows_1252); 102 case "iso88592": 103 return decodeImpl(data, ISO_8859_2); 104 case "iso88593": 105 return decodeImpl(data, ISO_8859_3); 106 case "iso88594": 107 return decodeImpl(data, ISO_8859_4); 108 case "iso88595": 109 return decodeImpl(data, ISO_8859_5); 110 case "iso88596": 111 return decodeImpl(data, ISO_8859_6); 112 case "iso88597": 113 return decodeImpl(data, ISO_8859_7); 114 case "iso88598": 115 return decodeImpl(data, ISO_8859_8); 116 case "iso88599": 117 return decodeImpl(data, ISO_8859_9); 118 case "iso885910": 119 return decodeImpl(data, ISO_8859_10); 120 case "iso885911": 121 return decodeImpl(data, ISO_8859_11); 122 case "iso885913": 123 return decodeImpl(data, ISO_8859_13); 124 case "iso885914": 125 return decodeImpl(data, ISO_8859_14); 126 case "iso885915": 127 return decodeImpl(data, ISO_8859_15); 128 case "iso885916": 129 return decodeImpl(data, ISO_8859_16); 130 } 131 132 assert(0); 133 } 134 135 /// Tries to determine the current encoding based on the content. 136 /// Only really helps with the UTF variants. 137 /// Returns null if it can't be reasonably sure. 138 string tryToDetermineEncoding(in ubyte[] rawdata) { 139 import std.utf; 140 try { 141 validate!string(cast(string) rawdata); 142 // the odds of non stuff validating as utf-8 are pretty low 143 return "UTF-8"; 144 } catch(UTFException t) { 145 // it's definitely not UTF-8! 146 // we'll look at the first few characters. If there's a 147 // BOM, it's probably UTF-16 or UTF-32 148 149 if(rawdata.length > 4) { 150 // not checking for utf8 bom; if it was that, we 151 // wouldn't be here. 152 if(rawdata[0] == 0xff && rawdata[1] == 0xfe) 153 return "UTF-16 LE"; 154 else if(rawdata[0] == 0xfe && rawdata[1] == 0xff) 155 return "UTF-16 BE"; 156 else if(rawdata[0] == 0x00 && rawdata[1] == 0x00 157 && rawdata[2] == 0xfe && rawdata[3] == 0xff) 158 return "UTF-32 BE"; 159 else if(rawdata[0] == 0xff && rawdata[1] == 0xfe 160 && rawdata[2] == 0x00 && rawdata[3] == 0x00) 161 return "UTF-32 LE"; 162 else { 163 // this space is intentionally left blank 164 } 165 } 166 } 167 168 // we don't know with enough confidence. The app will have to find another way. 169 return null; 170 } 171 172 // this function actually does the work, using the translation tables 173 // below. 174 string decodeImpl(in ubyte[] data, in dchar[] chars160to255, in dchar[] chars128to159 = null, in dchar[] chars0to127 = null) 175 in { 176 assert(chars160to255.length == 256 - 160); 177 assert(chars128to159 is null || chars128to159.length == 160 - 128); 178 assert(chars0to127 is null || chars0to127.length == 128 - 0); 179 } 180 out(ret) { 181 import std.utf; 182 validate(ret); 183 } 184 body { 185 string utf8; 186 187 /// I'm sure this could be a lot more efficient, but whatever, it 188 /// works. 189 foreach(octet; data) { 190 if(octet < 128) { 191 if(chars0to127 !is null) 192 utf8 ~= chars0to127[octet]; 193 else 194 utf8 ~= cast(char) octet; // ascii is the same 195 } else if(octet < 160) { 196 if(chars128to159 !is null) 197 utf8 ~= chars128to159[octet - 128]; 198 else 199 utf8 ~= " "; 200 } else { 201 utf8 ~= chars160to255[octet - 160]; 202 } 203 } 204 205 return utf8; 206 } 207 208 209 // Here come the translation tables. 210 211 // this table gives characters for decimal 128 through 159. 212 // the < 128 characters are the same as ascii, and > 159 the same as 213 // iso 8859 1, seen below. 214 immutable dchar[] Windows_1252 = [ 215 '€', ' ', '‚', 'ƒ', '„', '…', '†', '‡', 216 'ˆ', '‰', 'Š', '‹', 'Œ', ' ', 'Ž', ' ', 217 ' ', '‘', '’', '“', '”', '•', '–', '—', 218 '˜', '™', 'š', '›', 'œ', ' ', 'ž', 'Ÿ']; 219 220 // the following tables give the characters from decimal 160 up to 255 221 // in the given encodings. 222 223 immutable dchar[] ISO_8859_1 = [ 224 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 225 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 226 '°', '±', '²', '³', '´', 'µ', '¶', '·', 227 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 228 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 229 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 230 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 231 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 232 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 233 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 234 'ð', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 235 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 236 237 immutable dchar[] ISO_8859_2 = [ 238 ' ', 'Ą', '˘', 'Ł', '¤', 'Ľ', 'Ś', '§', 239 '¨', 'Š', 'Ş', 'Ť', 'Ź', '', 'Ž', 'Ż', 240 '°', 'ą', '˛', 'ł', '´', 'ľ', 'ś', 'ˇ', 241 '¸', 'š', 'ş', 'ť', 'ź', '˝', 'ž', 'ż', 242 'Ŕ', 'Á', 'Â', 'Ă', 'Ä', 'Ĺ', 'Ć', 'Ç', 243 'Č', 'É', 'Ę', 'Ë', 'Ě', 'Í', 'Î', 'Ď', 244 'Đ', 'Ń', 'Ň', 'Ó', 'Ô', 'Ő', 'Ö', '×', 245 'Ř', 'Ů', 'Ú', 'Ű', 'Ü', 'Ý', 'Ţ', 'ß', 246 'ŕ', 'á', 'â', 'ă', 'ä', 'ĺ', 'ć', 'ç', 247 'č', 'é', 'ę', 'ë', 'ě', 'í', 'î', 'ď', 248 'đ', 'ń', 'ň', 'ó', 'ô', 'ő', 'ö', '÷', 249 'ř', 'ů', 'ú', 'ű', 'ü', 'ý', 'ţ', '˙']; 250 251 immutable dchar[] ISO_8859_3 = [ 252 ' ', 'Ħ', '˘', '£', '¤', ' ', 'Ĥ', '§', 253 '¨', 'İ', 'Ş', 'Ğ', 'Ĵ', '', ' ', 'Ż', 254 '°', 'ħ', '²', '³', '´', 'µ', 'ĥ', '·', 255 '¸', 'ı', 'ş', 'ğ', 'ĵ', '½', ' ', 'ż', 256 'À', 'Á', 'Â', ' ', 'Ä', 'Ċ', 'Ĉ', 'Ç', 257 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 258 ' ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ġ', 'Ö', '×', 259 'Ĝ', 'Ù', 'Ú', 'Û', 'Ü', 'Ŭ', 'Ŝ', 'ß', 260 'à', 'á', 'â', ' ', 'ä', 'ċ', 'ĉ', 'ç', 261 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 262 ' ', 'ñ', 'ò', 'ó', 'ô', 'ġ', 'ö', '÷', 263 'ĝ', 'ù', 'ú', 'û', 'ü', 'ŭ', 'ŝ', '˙']; 264 265 immutable dchar[] ISO_8859_4 = [ 266 ' ', 'Ą', 'ĸ', 'Ŗ', '¤', 'Ĩ', 'Ļ', '§', 267 '¨', 'Š', 'Ē', 'Ģ', 'Ŧ', '', 'Ž', '¯', 268 '°', 'ą', '˛', 'ŗ', '´', 'ĩ', 'ļ', 'ˇ', 269 '¸', 'š', 'ē', 'ģ', 'ŧ', 'Ŋ', 'ž', 'ŋ', 270 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 271 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ī', 272 'Đ', 'Ņ', 'Ō', 'Ķ', 'Ô', 'Õ', 'Ö', '×', 273 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ũ', 'Ū', 'ß', 274 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 275 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ī', 276 'đ', 'ņ', 'ō', 'ķ', 'ô', 'õ', 'ö', '÷', 277 'ø', 'ų', 'ú', 'û', 'ü', 'ũ', 'ū', '˙']; 278 279 immutable dchar[] ISO_8859_5 = [ 280 ' ', 'Ё', 'Ђ', 'Ѓ', 'Є', 'Ѕ', 'І', 'Ї', 281 'Ј', 'Љ', 'Њ', 'Ћ', 'Ќ', '', 'Ў', 'Џ', 282 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 283 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 284 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 285 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 286 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 287 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 288 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 289 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я', 290 '№', 'ё', 'ђ', 'ѓ', 'є', 'ѕ', 'і', 'ї', 291 'ј', 'љ', 'њ', 'ћ', 'ќ', '§', 'ў', 'џ']; 292 293 immutable dchar[] ISO_8859_6 = [ 294 ' ', ' ', ' ', ' ', '¤', ' ', ' ', ' ', 295 ' ', ' ', ' ', ' ', '،', '', ' ', ' ', 296 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 297 ' ', ' ', ' ', '؛', ' ', ' ', ' ', '؟', 298 ' ', 'ء', 'آ', 'أ', 'ؤ', 'إ', 'ئ', 'ا', 299 'ب', 'ة', 'ت', 'ث', 'ج', 'ح', 'خ', 'د', 300 'ذ', 'ر', 'ز', 'س', 'ش', 'ص', 'ض', 'ط', 301 'ظ', 'ع', 'غ', ' ', ' ', ' ', ' ', ' ', 302 'ـ', 'ف', 'ق', 'ك', 'ل', 'م', 'ن', 'ه', 303 'و', 'ى', 'ي', 'ً', 'ٌ', 'ٍ', 'َ', 'ُ', 304 'ِ', 'ّ', 'ْ', ' ', ' ', ' ', ' ', ' ', 305 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']; 306 307 immutable dchar[] ISO_8859_7 = [ 308 ' ', '‘', '’', '£', '€', '₯', '¦', '§', 309 '¨', '©', 'ͺ', '«', '¬', '', ' ', '―', 310 '°', '±', '²', '³', '΄', '΅', 'Ά', '·', 311 'Έ', 'Ή', 'Ί', '»', 'Ό', '½', 'Ύ', 'Ώ', 312 'ΐ', 'Α', 'Β', 'Γ', 'Δ', 'Ε', 'Ζ', 'Η', 313 'Θ', 'Ι', 'Κ', 'Λ', 'Μ', 'Ν', 'Ξ', 'Ο', 314 'Π', 'Ρ', ' ', 'Σ', 'Τ', 'Υ', 'Φ', 'Χ', 315 'Ψ', 'Ω', 'Ϊ', 'Ϋ', 'ά', 'έ', 'ή', 'ί', 316 'ΰ', 'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 317 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 318 'π', 'ρ', 'ς', 'σ', 'τ', 'υ', 'φ', 'χ', 319 'ψ', 'ω', 'ϊ', 'ϋ', 'ό', 'ύ', 'ώ', ' ']; 320 321 immutable dchar[] ISO_8859_8 = [ 322 ' ', ' ', '¢', '£', '¤', '¥', '¦', '§', 323 '¨', '©', '×', '«', '¬', '', '®', '¯', 324 '°', '±', '²', '³', '´', 'µ', '¶', '·', 325 '¸', '¹', '÷', '»', '¼', '½', '¾', ' ', 326 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 327 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 328 ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 329 ' ', ' ', ' ', ' ', ' ', ' ', ' ', '‗', 330 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 331 'ט', 'י', 'ך', 'כ', 'ל', 'ם', 'מ', 'ן', 332 'נ', 'ס', 'ע', 'ף', 'פ', 'ץ', 'צ', 'ק', 333 // v v those are wrong 334 'ר', 'ש', 'ת', ' ', ' ', ' ', ' ', ' ']; // FIXME: those ones marked wrong are supposed to be left to right and right to left markers, not spaces. lol maybe it isn't wrong 335 336 immutable dchar[] ISO_8859_9 = [ 337 ' ', '¡', '¢', '£', '¤', '¥', '¦', '§', 338 '¨', '©', 'ª', '«', '¬', '', '®', '¯', 339 '°', '±', '²', '³', '´', 'µ', '¶', '·', 340 '¸', '¹', 'º', '»', '¼', '½', '¾', '¿', 341 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 342 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 343 'Ğ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Õ', 'Ö', '×', 344 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'İ', 'Ş', 'ß', 345 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 346 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 347 'ğ', 'ñ', 'ò', 'ó', 'ô', 'õ', 'ö', '÷', 348 'ø', 'ù', 'ú', 'û', 'ü', 'ı', 'ş', 'ÿ']; 349 350 immutable dchar[] ISO_8859_10 = [ 351 ' ', 'Ą', 'Ē', 'Ģ', 'Ī', 'Ĩ', 'Ķ', '§', 352 'Ļ', 'Đ', 'Š', 'Ŧ', 'Ž', '', 'Ū', 'Ŋ', 353 '°', 'ą', 'ē', 'ģ', 'ī', 'ĩ', 'ķ', '·', 354 'ļ', 'đ', 'š', 'ŧ', 'ž', '―', 'ū', 'ŋ', 355 'Ā', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Į', 356 'Č', 'É', 'Ę', 'Ë', 'Ė', 'Í', 'Î', 'Ï', 357 'Ð', 'Ņ', 'Ō', 'Ó', 'Ô', 'Õ', 'Ö', 'Ũ', 358 'Ø', 'Ų', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 359 'ā', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'į', 360 'č', 'é', 'ę', 'ë', 'ė', 'í', 'î', 'ï', 361 'ð', 'ņ', 'ō', 'ó', 'ô', 'õ', 'ö', 'ũ', 362 'ø', 'ų', 'ú', 'û', 'ü', 'ý', 'þ', 'ĸ']; 363 364 immutable dchar[] ISO_8859_11 = [ 365 ' ', 'ก', 'ข', 'ฃ', 'ค', 'ฅ', 'ฆ', 'ง', 366 'จ', 'ฉ', 'ช', 'ซ', 'ฌ', 'ญ', 'ฎ', 'ฏ', 367 'ฐ', 'ฑ', 'ฒ', 'ณ', 'ด', 'ต', 'ถ', 'ท', 368 'ธ', 'น', 'บ', 'ป', 'ผ', 'ฝ', 'พ', 'ฟ', 369 'ภ', 'ม', 'ย', 'ร', 'ฤ', 'ล', 'ฦ', 'ว', 370 'ศ', 'ษ', 'ส', 'ห', 'ฬ', 'อ', 'ฮ', 'ฯ', 371 'ะ', 'ั', 'า', 'ำ', 'ิ', 'ี', 'ึ', 'ื', 372 'ุ', 'ู', 'ฺ', ' ', ' ', ' ', ' ', '฿', 373 'เ', 'แ', 'โ', 'ใ', 'ไ', 'ๅ', 'ๆ', '็', 374 '่', '้', '๊', '๋', '์', 'ํ', '๎', '๏', 375 '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', 376 '๘', '๙', '๚', '๛', ' ', ' ', ' ', ' ']; 377 378 immutable dchar[] ISO_8859_13 = [ 379 ' ', '”', '¢', '£', '¤', '„', '¦', '§', 380 'Ø', '©', 'Ŗ', '«', '¬', '', '®', 'Æ', 381 '°', '±', '²', '³', '“', 'µ', '¶', '·', 382 'ø', '¹', 'ŗ', '»', '¼', '½', '¾', 'æ', 383 'Ą', 'Į', 'Ā', 'Ć', 'Ä', 'Å', 'Ę', 'Ē', 384 'Č', 'É', 'Ź', 'Ė', 'Ģ', 'Ķ', 'Ī', 'Ļ', 385 'Š', 'Ń', 'Ņ', 'Ó', 'Ō', 'Ő', 'Ö', '×', 386 'Ų', 'Ł', 'Ś', 'Ū', 'Ü', 'Ż', 'Ž', 'ß', 387 'ą', 'į', 'ā', 'ć', 'ä', 'å', 'ę', 'ē', 388 'č', 'é', 'ź', 'ė', 'ģ', 'ķ', 'ī', 'ļ', 389 'š', 'ń', 'ņ', 'ó', 'ō', 'ő', 'ö', '÷', 390 'ų', 'ł', 'ś', 'ū', 'ü', 'ż', 'ž', '’']; 391 392 immutable dchar[] ISO_8859_14 = [ 393 ' ', 'Ḃ', 'ḃ', '£', 'Ċ', 'ċ', 'Ḋ', '§', 394 'Ẁ', '©', 'Ẃ', 'ḋ', 'Ỳ', '', '®', 'Ÿ', 395 'Ḟ', 'ḟ', 'Ġ', 'ġ', 'Ṁ', 'ṁ', '¶', 'Ṗ', 396 'ẁ', 'ṗ', 'ẃ', 'Ṡ', 'ỳ', 'Ẅ', 'ẅ', 'ṡ', 397 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 398 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 399 'Ŵ', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ṫ', 400 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Ŷ', 'ß', 401 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 402 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 403 'ŵ', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', 'ṫ', 404 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'ŷ', 'ÿ']; 405 406 immutable dchar[] ISO_8859_15 = [ 407 ' ', '¡', '¢', '£', '€', '¥', 'Š', '§', 408 'š', '©', 'ª', '«', '¬', '', '®', '¯', 409 '°', '±', '²', '³', 'Ž', 'µ', '¶', '·', 410 'ž', '¹', 'º', '»', 'Œ', 'œ', 'Ÿ', '¿', 411 'À', 'Á', 'Â', 'Ã', 'Ä', 'Å', 'Æ', 'Ç', 412 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 413 'Ð', 'Ñ', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', '×', 414 'Ø', 'Ù', 'Ú', 'Û', 'Ü', 'Ý', 'Þ', 'ß', 415 'à', 'á', 'â', 'ã', 'ä', 'å', 'æ', 'ç', 416 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 417 'ð', 'ñ', 'ò', 'ó', 'ô', 'ő', 'ö', '÷', 418 'ø', 'ù', 'ú', 'û', 'ü', 'ý', 'þ', 'ÿ']; 419 420 immutable dchar[] ISO_8859_16 = [ 421 ' ', 'Ą', 'ą', 'Ł', '€', '„', 'Š', '§', 422 'š', '©', 'Ș', '«', 'Ź', '', 'ź', 'Ż', 423 '°', '±', 'Č', 'ł', 'Ž', '”', '¶', '·', 424 'ž', 'č', 'ș', '»', 'Œ', 'œ', 'Ÿ', 'ż', 425 'À', 'Á', 'Â', 'Ă', 'Ä', 'Ć', 'Æ', 'Ç', 426 'È', 'É', 'Ê', 'Ë', 'Ì', 'Í', 'Î', 'Ï', 427 'Ð', 'Ń', 'Ò', 'Ó', 'Ô', 'Ő', 'Ö', 'Ś', 428 'Ű', 'Ù', 'Ú', 'Û', 'Ü', 'Ę', 'Ț', 'ß', 429 'à', 'á', 'â', 'ă', 'ä', 'ć', 'æ', 'ç', 430 'è', 'é', 'ê', 'ë', 'ì', 'í', 'î', 'ï', 431 'đ', 'ń', 'ò', 'ó', 'ô', 'ő', 'ö', 'ś', 432 'ű', 'ù', 'ú', 'û', 'ü', 'ę', 'ț', 'ÿ']; 433 434 immutable dchar[] KOI8_R_Lower = [ 435 '─', '│', '┌', '┐', '└', '┘', '├', '┤', 436 '┬', '┴', '┼', '▀', '▄', '█', '▌', '▐', 437 '░', '▒', '▓', '⌠', '■', '∙', '√', '≈', 438 '≤', '≥', '\u00a0', '⌡', '°', '²', '·', '÷']; 439 440 immutable dchar[] KOI8_R = [ 441 '═', '║', '╒', 'ё', '╓', '╔', '╕', '╖', 442 '╗', '╘', '╙', '╚', '╛', '╜', '╝', '╞', 443 '╟', '╠', '╡', 'ё', '╢', '╣', '╤', '╥', 444 '╦', '╧', '╨', '╩', '╪', '╫', '╬', '©', 445 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 446 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 447 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 448 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ', 449 'ю', 'а', 'б', 'ц', 'д', 'е', 'ф', 'г', 450 'х', 'и', 'й', 'к', 'л', 'м', 'н', 'о', 451 'п', 'я', 'р', 'с', 'т', 'у', 'ж', 'в', 452 'ь', 'ы', 'з', 'ш', 'э', 'щ', 'ч', 'ъ']; 453 454 immutable dchar[] Windows_1251_Lower = [ 455 'Ђ', 'Ѓ', '‚', 'ѓ', '„', '…', '†', '‡', 456 '€', '‰', 'Љ', '‹', 'Њ', 'Ќ', 'Ћ', 'Џ', 457 'ђ', '‘', '’', '“', '”', '•', '–', '—', 458 ' ', '™', 'љ', '›', 'њ', 'ќ', 'ћ', 'џ']; 459 460 immutable dchar[] Windows_1251 = [ 461 ' ', 'Ў', 'ў', 'Ј', '¤', 'Ґ', '¦', '§', 462 'Ё', '©', 'Є', '«', '¬', '', '®', 'Ї', 463 '°', '±', 'І', 'і', 'ґ', 'µ', '¶', '·', 464 'ё', '№', 'є', '»', 'ј', 'Ѕ', 'ѕ', 'ї', 465 'А', 'Б', 'В', 'Г', 'Д', 'Е', 'Ж', 'З', 466 'И', 'Й', 'К', 'Л', 'М', 'Н', 'О', 'П', 467 'Р', 'С', 'Т', 'У', 'Ф', 'Х', 'Ц', 'Ч', 468 'Ш', 'Щ', 'Ъ', 'Ы', 'Ь', 'Э', 'Ю', 'Я', 469 'а', 'б', 'в', 'г', 'д', 'е', 'ж', 'з', 470 'и', 'й', 'к', 'л', 'м', 'н', 'о', 'п', 471 'р', 'с', 'т', 'у', 'ф', 'х', 'ц', 'ч', 472 'ш', 'щ', 'ъ', 'ы', 'ь', 'э', 'ю', 'я']; 473 474