| | 1 | | namespace ValidateLib.UtilityClasses |
| | 2 | | { |
| | 3 | | public class EncodingUtilityClass |
| | 4 | | { |
| | 5 | | public static bool IsValidEncodingName(string encodingName) |
| | 6 | | { |
| 1 | 7 | | return GetEncodingNameFromLabel(encodingName) != null; |
| | 8 | | } |
| | 9 | |
|
| | 10 | | public static string? GetEncodingNameFromLabel(string label) |
| | 11 | | { |
| 1 | 12 | | label = TrimAndToLowerLabel(label); |
| 1 | 13 | | if (LabelToEncodingMap.ContainsKey(label)) return LabelToEncodingMap[label]; |
| 1 | 14 | | else return null; |
| | 15 | | } |
| | 16 | |
|
| | 17 | | public static string TrimAndToLowerLabel(string input) |
| | 18 | | { |
| 1 | 19 | | if (string.IsNullOrEmpty(input)) |
| 0 | 20 | | return input; |
| | 21 | |
|
| 1 | 22 | | int startIndex = 0; |
| 1 | 23 | | int endIndex = input.Length - 1; |
| | 24 | |
|
| | 25 | | // Find the index of the first non-whitespace character from the start |
| 1 | 26 | | while (startIndex < input.Length && char.IsWhiteSpace(input[startIndex]) && input[startIndex] <= 127) |
| | 27 | | { |
| 0 | 28 | | startIndex++; |
| | 29 | | } |
| | 30 | |
|
| | 31 | | // Find the index of the first non-whitespace character from the end |
| 1 | 32 | | while (endIndex >= startIndex && char.IsWhiteSpace(input[endIndex]) && input[endIndex] <= 127) |
| | 33 | | { |
| 0 | 34 | | endIndex--; |
| | 35 | | } |
| | 36 | |
|
| 1 | 37 | | if (startIndex > endIndex) |
| 0 | 38 | | return string.Empty; |
| | 39 | |
|
| 1 | 40 | | return input.Substring(startIndex, endIndex - startIndex + 1).ToLower(); |
| | 41 | | } |
| | 42 | |
|
| 1 | 43 | | private static readonly Dictionary<string, string> LabelToEncodingMap = new Dictionary<string, string> |
| 1 | 44 | | { |
| 1 | 45 | | // The Encoding |
| 1 | 46 | | { "unicode-1-1-utf-8", "UTF-8" }, |
| 1 | 47 | | { "unicode11utf8", "UTF-8" }, |
| 1 | 48 | | { "unicode20utf8", "UTF-8" }, |
| 1 | 49 | | { "utf-8", "UTF-8" }, |
| 1 | 50 | | { "utf8", "UTF-8" }, |
| 1 | 51 | | { "x-unicode20utf8", "UTF-8" }, |
| 1 | 52 | | // Legacy single-byte encodings |
| 1 | 53 | | {"866", "IBM866"}, |
| 1 | 54 | | {"cp866", "IBM866"}, |
| 1 | 55 | | {"csibm866", "IBM866"}, |
| 1 | 56 | | {"ibm866", "IBM866"}, |
| 1 | 57 | | {"csisolatin2", "ISO-8859-2"}, |
| 1 | 58 | | {"iso-8859-2", "ISO-8859-2"}, |
| 1 | 59 | | {"iso-ir-101", "ISO-8859-2"}, |
| 1 | 60 | | {"iso8859-2", "ISO-8859-2"}, |
| 1 | 61 | | {"iso88592", "ISO-8859-2"}, |
| 1 | 62 | | {"iso_8859-2", "ISO-8859-2"}, |
| 1 | 63 | | {"iso_8859-2:1987", "ISO-8859-2"}, |
| 1 | 64 | | {"l2", "ISO-8859-2"}, |
| 1 | 65 | | {"latin2", "ISO-8859-2"}, |
| 1 | 66 | | {"csisolatin3", "ISO-8859-3"}, |
| 1 | 67 | | {"iso-8859-3", "ISO-8859-3"}, |
| 1 | 68 | | {"iso-ir-109", "ISO-8859-3"}, |
| 1 | 69 | | {"iso8859-3", "ISO-8859-3"}, |
| 1 | 70 | | {"iso88593", "ISO-8859-3"}, |
| 1 | 71 | | {"iso_8859-3", "ISO-8859-3"}, |
| 1 | 72 | | {"iso_8859-3:1988", "ISO-8859-3"}, |
| 1 | 73 | | {"l3", "ISO-8859-3"}, |
| 1 | 74 | | {"latin3", "ISO-8859-3"}, |
| 1 | 75 | | {"csisolatin4", "ISO-8859-4"}, |
| 1 | 76 | | {"iso-8859-4", "ISO-8859-4"}, |
| 1 | 77 | | {"iso-ir-110", "ISO-8859-4"}, |
| 1 | 78 | | {"iso8859-4", "ISO-8859-4"}, |
| 1 | 79 | | {"iso88594", "ISO-8859-4"}, |
| 1 | 80 | | {"iso_8859-4", "ISO-8859-4"}, |
| 1 | 81 | | {"iso_8859-4:1988", "ISO-8859-4"}, |
| 1 | 82 | | {"l4", "ISO-8859-4"}, |
| 1 | 83 | | {"latin4", "ISO-8859-4"}, |
| 1 | 84 | | {"csisolatincyrillic", "ISO-8859-5"}, |
| 1 | 85 | | {"cyrillic", "ISO-8859-5"}, |
| 1 | 86 | | {"iso-8859-5", "ISO-8859-5"}, |
| 1 | 87 | | {"iso-ir-144", "ISO-8859-5"}, |
| 1 | 88 | | {"iso8859-5", "ISO-8859-5"}, |
| 1 | 89 | | {"iso88595", "ISO-8859-5"}, |
| 1 | 90 | | {"iso_8859-5", "ISO-8859-5"}, |
| 1 | 91 | | {"iso_8859-5:1988", "ISO-8859-5"}, |
| 1 | 92 | | {"arabic", "ISO-8859-6"}, |
| 1 | 93 | | {"asmo-708", "ISO-8859-6"}, |
| 1 | 94 | | {"csiso88596e", "ISO-8859-6"}, |
| 1 | 95 | | {"csiso88596i", "ISO-8859-6"}, |
| 1 | 96 | | {"csisolatinarabic", "ISO-8859-6"}, |
| 1 | 97 | | {"ecma-114", "ISO-8859-6"}, |
| 1 | 98 | | {"iso-8859-6", "ISO-8859-6"}, |
| 1 | 99 | | {"iso-8859-6-e", "ISO-8859-6"}, |
| 1 | 100 | | {"iso-8859-6-i", "ISO-8859-6"}, |
| 1 | 101 | | {"iso-ir-127", "ISO-8859-6"}, |
| 1 | 102 | | {"iso8859-6", "ISO-8859-6"}, |
| 1 | 103 | | {"iso88596", "ISO-8859-6"}, |
| 1 | 104 | | {"iso_8859-6", "ISO-8859-6"}, |
| 1 | 105 | | {"iso_8859-6:1987", "ISO-8859-6"}, |
| 1 | 106 | | {"csisolatingreek", "ISO-8859-7"}, |
| 1 | 107 | | {"ecma-118", "ISO-8859-7"}, |
| 1 | 108 | | {"elot_928", "ISO-8859-7"}, |
| 1 | 109 | | {"greek", "ISO-8859-7"}, |
| 1 | 110 | | {"greek8", "ISO-8859-7"}, |
| 1 | 111 | | {"iso-8859-7", "ISO-8859-7"}, |
| 1 | 112 | | {"iso-ir-126", "ISO-8859-7"}, |
| 1 | 113 | | {"iso8859-7", "ISO-8859-7"}, |
| 1 | 114 | | {"iso88597", "ISO-8859-7"}, |
| 1 | 115 | | {"iso_8859-7", "ISO-8859-7"}, |
| 1 | 116 | | {"iso_8859-7:1987", "ISO-8859-7"}, |
| 1 | 117 | | {"sun_eu_greek", "ISO-8859-7"}, |
| 1 | 118 | | {"csiso88598e", "ISO-8859-8"}, |
| 1 | 119 | | {"csisolatinhebrew", "ISO-8859-8"}, |
| 1 | 120 | | {"hebrew", "ISO-8859-8"}, |
| 1 | 121 | | {"iso-8859-8", "ISO-8859-8"}, |
| 1 | 122 | | {"iso-8859-8-e", "ISO-8859-8"}, |
| 1 | 123 | | {"iso-ir-138", "ISO-8859-8"}, |
| 1 | 124 | | {"iso8859-8", "ISO-8859-8"}, |
| 1 | 125 | | {"iso88598", "ISO-8859-8"}, |
| 1 | 126 | | {"iso_8859-8", "ISO-8859-8"}, |
| 1 | 127 | | {"iso_8859-8:1988", "ISO-8859-8"}, |
| 1 | 128 | | {"visual", "ISO-8859-8"}, |
| 1 | 129 | | {"csiso88598i", "ISO-8859-8-I"}, |
| 1 | 130 | | {"iso-8859-8-i", "ISO-8859-8-I"}, |
| 1 | 131 | | {"logical", "ISO-8859-8-I"}, |
| 1 | 132 | | {"csisolatin6", "ISO-8859-10"}, |
| 1 | 133 | | {"iso-8859-10", "ISO-8859-10"}, |
| 1 | 134 | | {"iso-ir-157", "ISO-8859-10"}, |
| 1 | 135 | | {"iso8859-10", "ISO-8859-10"}, |
| 1 | 136 | | {"iso885910", "ISO-8859-10"}, |
| 1 | 137 | | {"l6", "ISO-8859-10"}, |
| 1 | 138 | | {"latin6", "ISO-8859-10"}, |
| 1 | 139 | | {"iso-8859-13", "ISO-8859-13"}, |
| 1 | 140 | | {"iso8859-13", "ISO-8859-13"}, |
| 1 | 141 | | {"iso885913", "ISO-8859-13"}, |
| 1 | 142 | | {"iso-8859-14", "ISO-8859-14"}, |
| 1 | 143 | | {"iso8859-14", "ISO-8859-14"}, |
| 1 | 144 | | {"iso885914", "ISO-8859-14"}, |
| 1 | 145 | | {"csisolatin9", "ISO-8859-15"}, |
| 1 | 146 | | {"iso-8859-15", "ISO-8859-15"}, |
| 1 | 147 | | {"iso8859-15", "ISO-8859-15"}, |
| 1 | 148 | | {"iso885915", "ISO-8859-15"}, |
| 1 | 149 | | {"iso_8859-15", "ISO-8859-15"}, |
| 1 | 150 | | {"l9", "ISO-8859-15"}, |
| 1 | 151 | | {"iso-8859-16", "ISO-8859-16"}, |
| 1 | 152 | | {"cskoi8r", "KOI8-R"}, |
| 1 | 153 | | {"koi", "KOI8-R"}, |
| 1 | 154 | | {"koi8", "KOI8-R"}, |
| 1 | 155 | | {"koi8-r", "KOI8-R"}, |
| 1 | 156 | | {"koi8_r", "KOI8-R"}, |
| 1 | 157 | | {"koi8-ru", "KOI8-U"}, |
| 1 | 158 | | {"koi8-u", "KOI8-U"}, |
| 1 | 159 | | {"csmacintosh", "macintosh"}, |
| 1 | 160 | | {"mac", "macintosh"}, |
| 1 | 161 | | {"macintosh", "macintosh"}, |
| 1 | 162 | | {"x-mac-roman", "macintosh"}, |
| 1 | 163 | | {"dos-874", "windows-874"}, |
| 1 | 164 | | {"iso-8859-11", "windows-874"}, |
| 1 | 165 | | {"iso8859-11", "windows-874"}, |
| 1 | 166 | | {"iso885911", "windows-874"}, |
| 1 | 167 | | {"tis-620", "windows-874"}, |
| 1 | 168 | | {"cp1250", "windows-1250"}, |
| 1 | 169 | | {"windows-1250", "windows-1250"}, |
| 1 | 170 | | {"x-cp1250", "windows-1250"}, |
| 1 | 171 | | {"cp1251", "windows-1251"}, |
| 1 | 172 | | {"windows-1251", "windows-1251"}, |
| 1 | 173 | | {"x-cp1251", "windows-1251"}, |
| 1 | 174 | | {"ansi_x3.4-1968", "windows-1252"}, |
| 1 | 175 | | {"ascii", "windows-1252"}, |
| 1 | 176 | | {"cp1252", "windows-1252"}, |
| 1 | 177 | | {"cp819", "windows-1252"}, |
| 1 | 178 | | {"csisolatin1", "windows-1252"}, |
| 1 | 179 | | {"ibm819", "windows-1252"}, |
| 1 | 180 | | {"iso-8859-1", "windows-1252"}, |
| 1 | 181 | | {"iso-ir-100", "windows-1252"}, |
| 1 | 182 | | {"iso8859-1", "windows-1252"}, |
| 1 | 183 | | {"iso88591", "windows-1252"}, |
| 1 | 184 | | {"iso_8859-1", "windows-1252"}, |
| 1 | 185 | | {"iso_8859-1:1987", "windows-1252"}, |
| 1 | 186 | | {"l1", "windows-1252"}, |
| 1 | 187 | | {"latin1", "windows-1252"}, |
| 1 | 188 | | {"us-ascii", "windows-1252"}, |
| 1 | 189 | | {"windows-1252", "windows-1252"}, |
| 1 | 190 | | {"x-cp1252", "windows-1252"}, |
| 1 | 191 | | {"cp1253", "windows-1253"}, |
| 1 | 192 | | {"windows-1253", "windows-1253"}, |
| 1 | 193 | | {"x-cp1253", "windows-1253"}, |
| 1 | 194 | | {"cp1254", "windows-1254"}, |
| 1 | 195 | | {"csisolatin5", "windows-1254"}, |
| 1 | 196 | | {"iso-8859-9", "windows-1254"}, |
| 1 | 197 | | {"iso-ir-148", "windows-1254"}, |
| 1 | 198 | | {"iso8859-9", "windows-1254"}, |
| 1 | 199 | | {"iso88599", "windows-1254"}, |
| 1 | 200 | | {"iso_8859-9", "windows-1254"}, |
| 1 | 201 | | {"iso_8859-9:1989", "windows-1254"}, |
| 1 | 202 | | {"l5", "windows-1254"}, |
| 1 | 203 | | {"latin5", "windows-1254"}, |
| 1 | 204 | | {"windows-1254", "windows-1254"}, |
| 1 | 205 | | {"x-cp1254", "windows-1254"}, |
| 1 | 206 | | {"cp1255", "windows-1255"}, |
| 1 | 207 | | {"windows-1255", "windows-1255"}, |
| 1 | 208 | | {"x-cp1255", "windows-1255"}, |
| 1 | 209 | | {"cp1256", "windows-1256"}, |
| 1 | 210 | | {"windows-1256", "windows-1256"}, |
| 1 | 211 | | {"x-cp1256", "windows-1256"}, |
| 1 | 212 | | {"cp1257", "windows-1257"}, |
| 1 | 213 | | {"windows-1257", "windows-1257"}, |
| 1 | 214 | | {"x-cp1257", "windows-1257"}, |
| 1 | 215 | | {"cp1258", "windows-1258"}, |
| 1 | 216 | | {"windows-1258", "windows-1258"}, |
| 1 | 217 | | {"x-cp1258", "windows-1258"}, |
| 1 | 218 | | {"x-mac-cyrillic", "x-mac-cyrillic"}, |
| 1 | 219 | | {"x-mac-ukrainian", "x-mac-ukrainian"}, |
| 1 | 220 | | // Legacy multi-byte Chinese (simplified) encodings |
| 1 | 221 | | {"chinese", "GBK"}, |
| 1 | 222 | | {"csgb2312", "GBK"}, |
| 1 | 223 | | {"csiso58gb231280", "GBK"}, |
| 1 | 224 | | {"gb2312", "GBK"}, |
| 1 | 225 | | {"gb_2312", "GBK"}, |
| 1 | 226 | | {"gb_2312-80", "GBK"}, |
| 1 | 227 | | {"gbk", "GBK"}, |
| 1 | 228 | | {"iso-ir-58", "GBK"}, |
| 1 | 229 | | {"x-gbk", "GBK"}, |
| 1 | 230 | | {"gb18030", "gb18030"}, |
| 1 | 231 | | // Legacy multi-byte Chinese (traditional) encodings |
| 1 | 232 | | {"big5", "Big5"}, |
| 1 | 233 | | {"big5-hkscs", "Big5"}, |
| 1 | 234 | | {"cn-big5", "Big5"}, |
| 1 | 235 | | {"csbig5", "Big5"}, |
| 1 | 236 | | {"x-x-big5", "Big5"}, |
| 1 | 237 | | //Legacy multi-byte Japanese encodings |
| 1 | 238 | | {"cseucpkdfmtjapanese", "EUC-JP"}, |
| 1 | 239 | | {"euc-jp", "EUC-JP"}, |
| 1 | 240 | | {"x-euc-jp", "EUC-JP"}, |
| 1 | 241 | | {"csiso2022jp", "ISO-2022-JP"}, |
| 1 | 242 | | {"iso-2022-jp", "ISO-2022-JP"}, |
| 1 | 243 | | {"csshiftjis", "Shift_JIS"}, |
| 1 | 244 | | {"ms932", "Shift_JIS"}, |
| 1 | 245 | | {"ms_kanji", "Shift_JIS"}, |
| 1 | 246 | | {"shift-jis", "Shift_JIS"}, |
| 1 | 247 | | {"shift_jis", "Shift_JIS"}, |
| 1 | 248 | | {"sjis", "Shift_JIS"}, |
| 1 | 249 | | {"windows-31j", "Shift_JIS"}, |
| 1 | 250 | | {"x-sjis", "Shift_JIS"}, |
| 1 | 251 | | //Legacy multi-byte Korean encodings |
| 1 | 252 | | {"cseuckr", "EUC-KR"}, |
| 1 | 253 | | {"csksc56011987", "EUC-KR"}, |
| 1 | 254 | | {"euc-kr", "EUC-KR"}, |
| 1 | 255 | | {"iso-ir-149", "EUC-KR"}, |
| 1 | 256 | | {"korean", "EUC-KR"}, |
| 1 | 257 | | {"ks_c_5601-1987", "EUC-KR"}, |
| 1 | 258 | | {"ks_c_5601-1989", "EUC-KR"}, |
| 1 | 259 | | {"ksc5601", "EUC-KR"}, |
| 1 | 260 | | {"ksc_5601", "EUC-KR"}, |
| 1 | 261 | | {"windows-949", "EUC-KR"}, |
| 1 | 262 | | //Legacy miscellaneous encodings |
| 1 | 263 | | {"csiso2022kr", "replacement"}, |
| 1 | 264 | | {"hz-gb-2312", "replacement"}, |
| 1 | 265 | | {"iso-2022-cn", "replacement"}, |
| 1 | 266 | | {"iso-2022-cn-ext", "replacement"}, |
| 1 | 267 | | {"iso-2022-kr", "replacement"}, |
| 1 | 268 | | {"unicodefffe", "UTF-16BE"}, |
| 1 | 269 | | {"utf-16be", "UTF-16BE"}, |
| 1 | 270 | | {"csunicode", "UTF-16LE"}, |
| 1 | 271 | | {"iso-10646-ucs-2", "UTF-16LE"}, |
| 1 | 272 | | {"ucs-2", "UTF-16LE"}, |
| 1 | 273 | | {"unicode", "UTF-16LE"}, |
| 1 | 274 | | {"unicodefeff", "UTF-16LE"}, |
| 1 | 275 | | {"utf-16", "UTF-16LE"}, |
| 1 | 276 | | {"utf-16le", "UTF-16LE"}, |
| 1 | 277 | | {"x-user-defined", "x-user-defined"} |
| 1 | 278 | | }; |
| | 279 | | } |
| | 280 | | } |