kernel32/tests: Use ARRAY_SIZE instead of open coding it.
[wine.git] / tools / make_unicode
blob165afe53b62995b3cdd4de0a22aa256f3525708d
1 #!/usr/bin/perl -w
3 # Generate code page .c files from ftp.unicode.org descriptions
5 # Copyright 2000 Alexandre Julliard
7 # This library is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU Lesser General Public
9 # License as published by the Free Software Foundation; either
10 # version 2.1 of the License, or (at your option) any later version.
12 # This library is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # Lesser General Public License for more details.
17 # You should have received a copy of the GNU Lesser General Public
18 # License along with this library; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 use strict;
23 use XML::LibXML;
24 use Digest::SHA;
25 use Encode;
26 use Time::Local qw(timegm_modern);
28 my $UNIVERSION = "15.0.0";
29 my $CLDRVERSION = "41";
30 my $ISO639VERSION = "20220120";
31 my $TZVERSION = "2022a";
33 my %data_files =
35 ucd => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
36 sha => "5fbde400f3e687d25cc9b0a8d30d7619e76cb2f4c3e85ba9df8ec1312cb6718c" },
37 unihan => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
38 sha => "24b154691fc97cb44267b925d62064297086b3f896b57a8181c7b6d42702a026" },
39 idna => { url => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
40 sha => "cc8522199541d60326a42a8f91f8748fd15630a42502dd2cf4878e81e2066ead" },
41 cldr => { url => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
42 sha => "a2b7aee281ad2f497d47995808cf5e8f24123b0814ca47f7a824556aec8a0d91" },
43 cldr33 => { url => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
44 sha => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
45 sorting => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
46 sha => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
47 codepages => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
48 sha => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
49 iso639 => { url => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50 sha => "d912749d10c344835f052a9f31d13f13d5ffc99bc589e1eb88f2b4663e990881" },
51 ksx1001 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
52 sha => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
53 jis0208 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
54 sha => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
55 jis0212 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
56 sha => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
57 tzdata => { url => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
58 sha => "ef7fffd9f4f50f4f58328b35022a32a5a056b245c5cb3d6791dddb342f871664" },
62 # Default char for undefined mappings
63 my $DEF_CHAR = ord '?';
65 # Last valid Unicode character
66 my $MAX_CHAR = 0x10ffff;
68 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
69 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
71 my @allfiles =
73 "CodpageFiles/037.txt",
74 "CodpageFiles/437.txt",
75 "CodpageFiles/500.txt",
76 "CodpageFiles/708.txt",
77 "CodpageFiles/720.txt",
78 "CodpageFiles/737.txt",
79 "CodpageFiles/775.txt",
80 "CodpageFiles/850.txt",
81 "CodpageFiles/852.txt",
82 "CodpageFiles/855.txt",
83 "CodpageFiles/857.txt",
84 "CodpageFiles/860.txt",
85 "CodpageFiles/861.txt",
86 "CodpageFiles/862.txt",
87 "CodpageFiles/863.txt",
88 "CodpageFiles/864.txt",
89 "CodpageFiles/865.txt",
90 "CodpageFiles/866.txt",
91 "CodpageFiles/869.txt",
92 "CodpageFiles/874.txt",
93 "CodpageFiles/875.txt",
94 "CodpageFiles/932.txt",
95 "CodpageFiles/936.txt",
96 "CodpageFiles/949.txt",
97 "CodpageFiles/950.txt",
98 "CodpageFiles/1026.txt",
99 "CodpageFiles/1250.txt",
100 "CodpageFiles/1251.txt",
101 "CodpageFiles/1252.txt",
102 "CodpageFiles/1253.txt",
103 "CodpageFiles/1254.txt",
104 "CodpageFiles/1255.txt",
105 "CodpageFiles/1256.txt",
106 "CodpageFiles/1257.txt",
107 "CodpageFiles/1258.txt",
108 "CodpageFiles/1361.txt",
109 "CodpageFiles/10000.txt",
110 "CodpageFiles/10001.txt",
111 "CodpageFiles/10002.txt",
112 "CodpageFiles/10003.txt",
113 "CodpageFiles/10004.txt",
114 "CodpageFiles/10005.txt",
115 "CodpageFiles/10006.txt",
116 "CodpageFiles/10007.txt",
117 "CodpageFiles/10008.txt",
118 "CodpageFiles/10010.txt",
119 "CodpageFiles/10017.txt",
120 "CodpageFiles/10021.txt",
121 "CodpageFiles/10029.txt",
122 "CodpageFiles/10079.txt",
123 "CodpageFiles/10081.txt",
124 "CodpageFiles/10082.txt",
125 "CodpageFiles/20127.txt",
126 "CodpageFiles/20866.txt",
127 "CodpageFiles/21866.txt",
128 "CodpageFiles/28591.txt",
129 "CodpageFiles/28592.txt",
130 "CodpageFiles/28593.txt",
131 "CodpageFiles/28594.txt",
132 "CodpageFiles/28595.txt",
133 "CodpageFiles/28596.txt",
134 "CodpageFiles/28597.txt",
135 "CodpageFiles/28598.txt",
136 "CodpageFiles/28599.txt",
137 "CodpageFiles/28603.txt",
138 "CodpageFiles/28605.txt",
141 my @timezone_files = qw(africa antarctica asia australasia europe northamerica southamerica etcetera backward);
143 my %ctype =
145 # CT_CTYPE1
146 "upper" => 0x0001,
147 "lower" => 0x0002,
148 "digit" => 0x0004,
149 "space" => 0x0008,
150 "punct" => 0x0010,
151 "cntrl" => 0x0020,
152 "blank" => 0x0040,
153 "xdigit" => 0x0080,
154 "alpha" => 0x0100 | 0x80000000,
155 "defin" => 0x0200,
156 # CT_CTYPE3 in high 16 bits
157 "nonspacing" => 0x00010000,
158 "diacritic" => 0x00020000,
159 "vowelmark" => 0x00040000,
160 "symbol" => 0x00080000,
161 "katakana" => 0x00100000,
162 "hiragana" => 0x00200000,
163 "halfwidth" => 0x00400000,
164 "fullwidth" => 0x00800000,
165 "ideograph" => 0x01000000,
166 "kashida" => 0x02000000,
167 "lexical" => 0x04000000,
168 "highsurrogate" => 0x08000000,
169 "lowsurrogate" => 0x10000000,
172 my %bracket_types =
174 "o" => 0x0000,
175 "c" => 0x0001,
178 my %indic_types =
180 "Other" => 0x0000,
181 "Bindu" => 0x0001,
182 "Visarga" => 0x0002,
183 "Avagraha" => 0x0003,
184 "Nukta" => 0x0004,
185 "Virama" => 0x0005,
186 "Vowel_Independent" => 0x0006,
187 "Vowel_Dependent" => 0x0007,
188 "Vowel" => 0x0008,
189 "Consonant_Placeholder" => 0x0009,
190 "Consonant" => 0x000a,
191 "Consonant_Dead" => 0x000b,
192 "Consonant_Succeeding_Repha" => 0x000c,
193 "Consonant_Subjoined" => 0x000d,
194 "Consonant_Medial" => 0x000e,
195 "Consonant_Final" => 0x000f,
196 "Consonant_Head_Letter" => 0x0010,
197 "Modifying_Letter" => 0x0011,
198 "Tone_Letter" => 0x0012,
199 "Tone_Mark" => 0x0013,
200 "Register_Shifter" => 0x0014,
201 "Consonant_Preceding_Repha" => 0x0015,
202 "Pure_Killer" => 0x0016,
203 "Invisible_Stacker" => 0x0017,
204 "Gemination_Mark" => 0x0018,
205 "Cantillation_Mark" => 0x0019,
206 "Non_Joiner" => 0x001a,
207 "Joiner" => 0x001b,
208 "Number_Joiner" => 0x001c,
209 "Number" => 0x001d,
210 "Brahmi_Joining_Number" => 0x001e,
211 "Consonant_With_Stacker" => 0x001f,
212 "Consonant_Prefixed" => 0x0020,
213 "Syllable_Modifier" => 0x0021,
214 "Consonant_Killer" => 0x0022,
215 "Consonant_Initial_Postfixed" => 0x0023,
218 my %matra_types =
220 "Right" => 0x01,
221 "Left" => 0x02,
222 "Visual_Order_Left" => 0x03,
223 "Left_And_Right" => 0x04,
224 "Top" => 0x05,
225 "Bottom" => 0x06,
226 "Top_And_Bottom" => 0x07,
227 "Top_And_Right" => 0x08,
228 "Top_And_Left" => 0x09,
229 "Top_And_Left_And_Right" => 0x0a,
230 "Bottom_And_Right" => 0x0b,
231 "Top_And_Bottom_And_Right" => 0x0c,
232 "Overstruck" => 0x0d,
233 "Invisible" => 0x0e,
234 "Bottom_And_Left" => 0x0f,
235 "Top_And_Bottom_And_Left" => 0x10,
238 my %break_types =
240 "BK" => 0x0001,
241 "CR" => 0x0002,
242 "LF" => 0x0003,
243 "CM" => 0x0004,
244 "SG" => 0x0005,
245 "GL" => 0x0006,
246 "CB" => 0x0007,
247 "SP" => 0x0008,
248 "ZW" => 0x0009,
249 "NL" => 0x000a,
250 "WJ" => 0x000b,
251 "JL" => 0x000c,
252 "JV" => 0x000d,
253 "JT" => 0x000e,
254 "H2" => 0x000f,
255 "H3" => 0x0010,
256 "XX" => 0x0011,
257 "OP" => 0x0012,
258 "CL" => 0x0013,
259 "CP" => 0x0014,
260 "QU" => 0x0015,
261 "NS" => 0x0016,
262 "EX" => 0x0017,
263 "SY" => 0x0018,
264 "IS" => 0x0019,
265 "PR" => 0x001a,
266 "PO" => 0x001b,
267 "NU" => 0x001c,
268 "AL" => 0x001d,
269 "ID" => 0x001e,
270 "IN" => 0x001f,
271 "HY" => 0x0020,
272 "BB" => 0x0021,
273 "BA" => 0x0022,
274 "SA" => 0x0023,
275 "AI" => 0x0024,
276 "B2" => 0x0025,
277 "HL" => 0x0026,
278 "CJ" => 0x0027,
279 "RI" => 0x0028,
280 "EB" => 0x0029,
281 "EM" => 0x002a,
282 "ZWJ" => 0x002b,
285 my %vertical_types =
287 "R" => 0x0000,
288 "U" => 0x0001,
289 "Tr" => 0x0002,
290 "Tu" => 0x0003,
293 my %categories =
295 "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
296 "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
297 "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"}, # Letter, Titlecase
298 "Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
299 "Mc" => $ctype{"defin"}, # Mark, Spacing Combining
300 "Me" => $ctype{"defin"}, # Mark, Enclosing
301 "Nd" => $ctype{"defin"}|$ctype{"digit"}, # Number, Decimal Digit
302 "Nl" => $ctype{"defin"}|$ctype{"alpha"}, # Number, Letter
303 "No" => $ctype{"defin"}, # Number, Other
304 "Zs" => $ctype{"defin"}|$ctype{"space"}, # Separator, Space
305 "Zl" => $ctype{"defin"}|$ctype{"space"}, # Separator, Line
306 "Zp" => $ctype{"defin"}|$ctype{"space"}, # Separator, Paragraph
307 "Cc" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Control
308 "Cf" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Format
309 "Cs" => $ctype{"defin"}, # Other, Surrogate
310 "Co" => $ctype{"defin"}, # Other, Private Use
311 "Cn" => $ctype{"defin"}, # Other, Not Assigned
312 "Lm" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Modifier
313 "Lo" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Other
314 "Pc" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Connector
315 "Pd" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Dash
316 "Ps" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Open
317 "Pe" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Close
318 "Pi" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Initial quote
319 "Pf" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Final quote
320 "Po" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Other
321 "Sm" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Math
322 "Sc" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Currency
323 "Sk" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Modifier
324 "So" => $ctype{"defin"}|$ctype{"symbol"} # Symbol, Other
327 # a few characters need additional categories that cannot be determined automatically
328 my %special_categories =
330 "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
331 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
332 "space" => [ 0x09..0x0d, 0x85 ],
333 "blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
334 "cntrl" => [ 0x070f, 0x200c, 0x200d,
335 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
336 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
337 0xfff9, 0xfffa, 0xfffb ],
338 "punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
339 0xd7, 0xf7 ],
340 "digit" => [ 0xb2, 0xb3, 0xb9 ],
341 "lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
342 "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
343 0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
344 "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
345 "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
346 0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
347 0x02b9..0x02ba, 0x02c6..0x02cf ],
348 "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
349 "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
350 0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
351 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
352 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
353 0x3131..0x3164 ],
354 "ideograph" => [ 0x3006..0x3007 ],
355 "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
356 0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
357 0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
358 0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
359 0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
360 0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
361 0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
362 0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
363 0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
364 0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
365 "kashida" => [ 0x0640 ],
368 my %directions =
370 "L" => 1, # Left-to-Right
371 "R" => 2, # Right-to-Left
372 "AL" => 12, # Right-to-Left Arabic
373 "EN" => 3, # European Number
374 "ES" => 4, # European Number Separator
375 "ET" => 5, # European Number Terminator
376 "AN" => 6, # Arabic Number
377 "CS" => 7, # Common Number Separator
378 "NSM" => 13, # Non-Spacing Mark
379 "BN" => 14, # Boundary Neutral
380 "B" => 8, # Paragraph Separator
381 "S" => 9, # Segment Separator
382 "WS" => 10, # Whitespace
383 "ON" => 11, # Other Neutrals
384 "LRE" => 15, # Left-to-Right Embedding
385 "LRO" => 15, # Left-to-Right Override
386 "RLE" => 15, # Right-to-Left Embedding
387 "RLO" => 15, # Right-to-Left Override
388 "PDF" => 15, # Pop Directional Format
389 "LRI" => 15, # Left-to-Right Isolate
390 "RLI" => 15, # Right-to-Left Isolate
391 "FSI" => 15, # First Strong Isolate
392 "PDI" => 15 # Pop Directional Isolate
395 my %c2_types =
397 "L" => 1, # C2_LEFTTORIGHT
398 "R" => 2, # C2_RIGHTTOLEFT
399 "AL" => 2, # C2_RIGHTTOLEFT
400 "EN" => 3, # C2_EUROPENUMBER
401 "ES" => 4, # C2_EUROPESEPARATOR
402 "ET" => 5, # C2_EUROPETERMINATOR
403 "AN" => 6, # C2_ARABICNUMBER
404 "CS" => 7, # C2_COMMONSEPARATOR
405 "NSM" => 11, # C2_OTHERNEUTRAL
406 "BN" => 0, # C2_NOTAPPLICABLE
407 "B" => 8, # C2_BLOCKSEPARATOR
408 "S" => 9, # C2_SEGMENTSEPARATOR
409 "WS" => 10, # C2_WHITESPACE
410 "ON" => 11, # C2_OTHERNEUTRAL
411 "LRE" => 11, # C2_OTHERNEUTRAL
412 "LRO" => 11, # C2_OTHERNEUTRAL
413 "RLE" => 11, # C2_OTHERNEUTRAL
414 "RLO" => 11, # C2_OTHERNEUTRAL
415 "PDF" => 11, # C2_OTHERNEUTRAL
416 "LRI" => 11, # C2_OTHERNEUTRAL
417 "RLI" => 11, # C2_OTHERNEUTRAL
418 "FSI" => 11, # C2_OTHERNEUTRAL
419 "PDI" => 11 # C2_OTHERNEUTRAL
422 my %bidi_types =
424 "ON" => 0, # Other Neutrals
425 "L" => 1, # Left-to-Right
426 "R" => 2, # Right-to-Left
427 "AN" => 3, # Arabic Number
428 "EN" => 4, # European Number
429 "AL" => 5, # Right-to-Left Arabic
430 "NSM" => 6, # Non-Spacing Mark
431 "CS" => 7, # Common Number Separator
432 "ES" => 8, # European Number Separator
433 "ET" => 9, # European Number Terminator
434 "BN" => 10, # Boundary Neutral
435 "S" => 11, # Segment Separator
436 "WS" => 12, # Whitespace
437 "B" => 13, # Paragraph Separator
438 "RLO" => 14, # Right-to-Left Override
439 "RLE" => 15, # Right-to-Left Embedding
440 "LRO" => 16, # Left-to-Right Override
441 "LRE" => 17, # Left-to-Right Embedding
442 "PDF" => 18, # Pop Directional Format
443 "LRI" => 19, # Left-to-Right Isolate
444 "RLI" => 20, # Right-to-Left Isolate
445 "FSI" => 21, # First Strong Isolate
446 "PDI" => 22 # Pop Directional Isolate
449 my %joining_types =
451 "U" => 0, # Non_Joining
452 "L" => 1, # Left_Joining
453 "R" => 2, # Right_Joining
454 "D" => 3, # Dual_Joining
455 "C" => 3, # Join_Causing
456 "ALAPH" => 4, # Syriac ALAPH
457 "DALATH RISH" => 5, # Syriac DALATH RISH group
458 "T" => 6, # Transparent
461 my @locales =
463 { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
464 { name => "aa", dir => "seed", sopentypelang => "AFR" },
465 { name => "aa-DJ", dir => "seed" },
466 { name => "aa-ER", dir => "seed" },
467 { name => "aa-ET", dir => "seed" },
468 { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
469 { name => "af-NA" },
470 { name => "af-ZA", lcid => 0x00000436 },
471 { name => "agq" },
472 { name => "agq-CM" },
473 { name => "ak", sopentypelang => "TWI" },
474 { name => "ak-GH" },
475 { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
476 { name => "am-ET", lcid => 0x0000045e },
477 { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
478 { name => "ar-001" },
479 { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
480 { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
481 { name => "ar-DJ" },
482 { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG", nativedigits => "0123456789" },
483 { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
484 { name => "ar-EH" },
485 { name => "ar-ER" },
486 { name => "ar-IL" },
487 { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
488 { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
489 { name => "ar-KM" },
490 { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
491 { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
492 { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL", nativedigits => "0123456789" },
493 { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM", nativedigits => "0123456789" },
494 { name => "ar-MR" },
495 { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
496 { name => "ar-PS" },
497 { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
498 { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
499 { name => "ar-SD" },
500 { name => "ar-SO" },
501 { name => "ar-SS" },
502 { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
503 { name => "ar-TD" },
504 { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART", nativedigits => "0123456789" },
505 { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
506 { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sabbrevlangname => "MPD", sopentypelang => "MAP" },
507 { name => "arn-CL", lcid => 0x0000047a, dir => "seed" },
508 { name => "arn-Latn", alias => "arn" },
509 { name => "arn-Latn-CL", alias => "arn-CL" },
510 { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
511 { name => "as-IN", lcid => 0x0000044d },
512 { name => "asa" },
513 { name => "asa-TZ" },
514 { name => "ast" },
515 { name => "ast-ES" },
516 { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
517 { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
518 { name => "az-Cyrl-AZ", lcid => 0x0000082c },
519 { name => "az-Latn", lcid => 0x0000782c },
520 { name => "az-Latn-AZ", lcid => 0x0000042c },
521 { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, dir => "seed", sabbrevlangname => "BAS", sopentypelang => "BSH" },
522 { name => "ba-Cyrl", alias => "ba" },
523 { name => "ba-Cyrl-RU", alias => "ba-RU" },
524 { name => "ba-RU", lcid => 0x0000046d, dir => "seed" },
525 { name => "bas" },
526 { name => "bas-CM" },
527 { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
528 { name => "be-BY", lcid => 0x00000423 },
529 { name => "bem" },
530 { name => "bem-ZM" },
531 { name => "bez" },
532 { name => "bez-TZ" },
533 { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
534 { name => "bg-BG", lcid => 0x00000402 },
535 { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
536 { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
537 { name => "bm", sopentypelang => "BMB" },
538 { name => "bm-Latn", file => "bm" },
539 { name => "bm-Latn-ML", file => "bm_ML" },
540 { name => "bm-ML", alias => "bm-Latn-ML" },
541 { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
542 { name => "bn-BD", lcid => 0x00000845 },
543 { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
544 { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
545 { name => "bo-CN", lcid => 0x00000451 },
546 { name => "bo-IN", slist => "," },
547 { name => "bo-Tibt", alias => "bo" },
548 { name => "bo-Tibt-CN", alias => "bo-CN" },
549 { name => "bo-Tibt-IN", alias => "bo-IN" },
550 { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
551 { name => "br-FR", lcid => 0x0000047e },
552 { name => "br-Latn", alias => "br" },
553 { name => "br-Latn-FR", alias => "br-FR" },
554 { name => "brx" },
555 { name => "brx-IN" },
556 { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
557 { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
558 { name => "bs-Cyrl-BA", lcid => 0x0000201a },
559 { name => "bs-Latn", lcid => 0x0000681a },
560 { name => "bs-Latn-BA", lcid => 0x0000141a },
561 { name => "byn", dir => "seed", sopentypelang => "BIL" },
562 { name => "byn-ER", dir => "seed" },
563 { name => "ca", lcid => 0x00000003, oemcp => 850 },
564 { name => "ca-AD", maccp => 65001 },
565 { name => "ca-ES", lcid => 0x00000403 },
566 { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
567 { name => "ca-FR", maccp => 65001 },
568 { name => "ca-IT", maccp => 65001 },
569 { name => "ccp" },
570 { name => "ccp-BD", alias => "ccp-Cakm-BD" },
571 { name => "ccp-Cakm", file => "ccp" },
572 { name => "ccp-Cakm-BD", file => "ccp_BD" },
573 { name => "ccp-Cakm-IN", file => "ccp_IN" },
574 { name => "ccp-IN", alias => "ccp-Cakm-IN" },
575 { name => "ce" },
576 { name => "ce-RU" },
577 { name => "ceb" },
578 { name => "ceb-Latn", file => "ceb" },
579 { name => "ceb-Latn-PH", file => "ceb_PH" },
580 { name => "ceb-PH", alias => "ceb-Latn-PH" },
581 { name => "cgg" },
582 { name => "cgg-UG" },
583 { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
584 { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
585 { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
586 { name => "chr-US", alias => "chr-Cher-US" },
587 { name => "ckb", alias => "ku" },
588 { name => "ckb-IQ", alias => "ku-Arab-IQ" },
589 { name => "ckb-IR", alias => "ku-Arab-IR" },
590 { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
591 { name => "co-FR", lcid => 0x00000483, dir => "seed" },
592 { name => "co-Latn", alias => "co" },
593 { name => "co-Latn-FR", alias => "co-FR" },
594 { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
595 { name => "cs-CZ", lcid => 0x00000405 },
596 { name => "cu", dir => "seed", sopentypelang => "CSL" },
597 { name => "cu-RU", dir => "seed" },
598 { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
599 { name => "cy-GB", lcid => 0x00000452 },
600 { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
601 { name => "da-DK", lcid => 0x00000406 },
602 { name => "da-GL", maccp => 65001 },
603 { name => "dav" },
604 { name => "dav-KE" },
605 { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
606 { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
607 { name => "de-BE" },
608 { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
609 { name => "de-DE", lcid => 0x00000407 },
610 { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
611 { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
612 { name => "de-IT", oemcp => 65001 },
613 { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
614 { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
615 { name => "dje", sopentypelang => "DJR" },
616 { name => "dje-NE" },
617 { name => "doi", sopentypelang => "DGR" },
618 { name => "doi-IN", alias => "doi-Deva-IN" },
619 { name => "doi-Deva", file => "doi" },
620 { name => "doi-Deva-IN", file => "doi_IN" },
621 { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
622 { name => "dsb-DE", lcid => 0x0000082e },
623 { name => "dua" },
624 { name => "dua-CM" },
625 { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, dir => "seed", nativedigits => "0123456789" },
626 { name => "dv-MV", lcid => 0x00000465, dir => "seed" },
627 { name => "dyo" },
628 { name => "dyo-SN" },
629 { name => "dz", sopentypelang => "DZN" },
630 { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
631 { name => "ebu" },
632 { name => "ebu-KE" },
633 { name => "ee" },
634 { name => "ee-GH" },
635 { name => "ee-TG" },
636 { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
637 { name => "el-CY" },
638 { name => "el-GR", lcid => 0x00000408 },
639 { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
640 { name => "en-001", oemcp => 850 },
641 { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
642 { name => "en-150", oemcp => 65001 },
643 { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
644 { name => "en-AG", oemcp => 850 },
645 { name => "en-AI", oemcp => 850 },
646 { name => "en-AS", oemcp => 850 },
647 { name => "en-AT", oemcp => 65001 },
648 { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
649 { name => "en-BB", oemcp => 850 },
650 { name => "en-BE", oemcp => 850 },
651 { name => "en-BI", oemcp => 65001 },
652 { name => "en-BM", oemcp => 850 },
653 { name => "en-BS", oemcp => 850 },
654 { name => "en-BW", oemcp => 850 },
655 { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
656 { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
657 { name => "en-CC", oemcp => 850 },
658 { name => "en-CH", oemcp => 65001 },
659 { name => "en-CK", oemcp => 850 },
660 { name => "en-CM", oemcp => 850 },
661 { name => "en-CX", oemcp => 850 },
662 { name => "en-CY", oemcp => 65001 },
663 { name => "en-DE", oemcp => 65001 },
664 { name => "en-DG", oemcp => 850 },
665 { name => "en-DK", oemcp => 65001 },
666 { name => "en-DM", oemcp => 850 },
667 { name => "en-ER", oemcp => 850 },
668 { name => "en-FI", oemcp => 65001 },
669 { name => "en-FJ", oemcp => 850 },
670 { name => "en-FK", oemcp => 850 },
671 { name => "en-FM", oemcp => 850 },
672 { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
673 { name => "en-GD", oemcp => 850 },
674 { name => "en-GG", oemcp => 850 },
675 { name => "en-GH", oemcp => 850 },
676 { name => "en-GI", oemcp => 850 },
677 { name => "en-GM", oemcp => 850 },
678 { name => "en-GU", oemcp => 850 },
679 { name => "en-GY", oemcp => 850 },
680 { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
681 { name => "en-ID", lcid => 0x00003809, file => "en", oemcp => 850, sabbrevlangname => "ZZZ" },
682 { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
683 { name => "en-IL", oemcp => 65001 },
684 { name => "en-IM", oemcp => 850 },
685 { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
686 { name => "en-IO", oemcp => 850 },
687 { name => "en-JE", oemcp => 850 },
688 { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
689 { name => "en-KE", oemcp => 850 },
690 { name => "en-KI", oemcp => 850 },
691 { name => "en-KN", oemcp => 850 },
692 { name => "en-KY", oemcp => 850 },
693 { name => "en-LC", oemcp => 850 },
694 { name => "en-LR", oemcp => 850 },
695 { name => "en-LS", oemcp => 850 },
696 { name => "en-MG", oemcp => 850 },
697 { name => "en-MH", oemcp => 850 },
698 { name => "en-MO", oemcp => 850 },
699 { name => "en-MP", oemcp => 850 },
700 { name => "en-MS", oemcp => 850 },
701 { name => "en-MT", oemcp => 850 },
702 { name => "en-MU", oemcp => 850 },
703 { name => "en-MW", oemcp => 850 },
704 { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
705 { name => "en-NA", oemcp => 850 },
706 { name => "en-NF", oemcp => 850 },
707 { name => "en-NG", oemcp => 850 },
708 { name => "en-NL", oemcp => 65001 },
709 { name => "en-NR", oemcp => 850 },
710 { name => "en-NU", oemcp => 850 },
711 { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
712 { name => "en-PG", oemcp => 850 },
713 { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
714 { name => "en-PK", oemcp => 850 },
715 { name => "en-PN", oemcp => 850 },
716 { name => "en-PR", oemcp => 850 },
717 { name => "en-PW", oemcp => 850 },
718 { name => "en-RW", oemcp => 850 },
719 { name => "en-SB", oemcp => 850 },
720 { name => "en-SC", oemcp => 850 },
721 { name => "en-SD", oemcp => 850 },
722 { name => "en-SE", oemcp => 65001 },
723 { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
724 { name => "en-SH", oemcp => 850 },
725 { name => "en-SI", oemcp => 65001 },
726 { name => "en-SL", oemcp => 850 },
727 { name => "en-SS", oemcp => 850 },
728 { name => "en-SX", oemcp => 850 },
729 { name => "en-SZ", oemcp => 850 },
730 { name => "en-TC", oemcp => 850 },
731 { name => "en-TK", oemcp => 850 },
732 { name => "en-TO", oemcp => 850 },
733 { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
734 { name => "en-TV", oemcp => 850 },
735 { name => "en-TZ", oemcp => 850 },
736 { name => "en-UG", oemcp => 850 },
737 { name => "en-UM", oemcp => 850 },
738 { name => "en-US", lcid => 0x00000409 },
739 { name => "en-VC", oemcp => 850 },
740 { name => "en-VG", oemcp => 850 },
741 { name => "en-VI", oemcp => 850 },
742 { name => "en-VU", oemcp => 850 },
743 { name => "en-WS", oemcp => 850 },
744 { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
745 { name => "en-ZM", oemcp => 850 },
746 { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
747 { name => "eo", sopentypelang => "NTO" },
748 { name => "eo-001" },
749 { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
750 { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
751 { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
752 { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
753 { name => "es-BR", oemcp => 65001 },
754 { name => "es-BZ", oemcp => 65001 },
755 { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
756 { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
757 { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
758 { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
759 { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
760 { name => "es-EA" },
761 { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
762 { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
763 { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
764 { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
765 { name => "es-GQ" },
766 { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
767 { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
768 { name => "es-IC" },
769 { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
770 { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
771 { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
772 { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
773 { name => "es-PH" },
774 { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
775 { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
776 { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
777 { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
778 { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
779 { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
780 { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
781 { name => "et-EE", lcid => 0x00000425 },
782 { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
783 { name => "eu-ES", lcid => 0x0000042d },
784 { name => "ewo" },
785 { name => "ewo-CM" },
786 { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
787 { name => "fa-AF", alias => "prs-AF" },
788 { name => "fa-IR", lcid => 0x00000429 },
789 { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
790 { name => "ff-CM", alias => "ff-Latn-CM" },
791 { name => "ff-GN", alias => "ff-Latn-GN" },
792 { name => "ff-MR", alias => "ff-Latn-MR" },
793 { name => "ff-NG", alias => "ff-Latn-NG" },
794 { name => "ff-SN", alias => "ff-Latn-SN" },
795 { name => "ff-Adlm", oemcp => 65001 },
796 { name => "ff-Adlm-BF" },
797 { name => "ff-Adlm-CM" },
798 { name => "ff-Adlm-GH" },
799 { name => "ff-Adlm-GM" },
800 { name => "ff-Adlm-GN" },
801 { name => "ff-Adlm-GW" },
802 { name => "ff-Adlm-LR" },
803 { name => "ff-Adlm-MR" },
804 { name => "ff-Adlm-NE" },
805 { name => "ff-Adlm-NG" },
806 { name => "ff-Adlm-SL" },
807 { name => "ff-Adlm-SN" },
808 { name => "ff-Latn", lcid => 0x00007c67 },
809 { name => "ff-Latn-BF", oemcp => 65001 },
810 { name => "ff-Latn-CM" },
811 { name => "ff-Latn-GH", oemcp => 65001 },
812 { name => "ff-Latn-GM", oemcp => 65001 },
813 { name => "ff-Latn-GN" },
814 { name => "ff-Latn-GW", oemcp => 65001 },
815 { name => "ff-Latn-LR", oemcp => 65001 },
816 { name => "ff-Latn-MR" },
817 { name => "ff-Latn-NE", oemcp => 65001 },
818 { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
819 { name => "ff-Latn-SL", oemcp => 65001 },
820 { name => "ff-Latn-SN", lcid => 0x00000867 },
821 { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
822 { name => "fi-FI", lcid => 0x0000040b },
823 { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
824 { name => "fil-PH", lcid => 0x00000464 },
825 { name => "fil-Latn", alias => "fil" },
826 { name => "fil-Latn-PH", alias => "fil-PH" },
827 { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
828 { name => "fo-DK", oemcp => 65001, maccp => 65001 },
829 { name => "fo-FO", lcid => 0x00000438 },
830 { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
831 { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
832 { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
833 { name => "fr-BF" },
834 { name => "fr-BI" },
835 { name => "fr-BJ" },
836 { name => "fr-BL" },
837 { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
838 { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
839 { name => "fr-CF" },
840 { name => "fr-CG" },
841 { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
842 { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
843 { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
844 { name => "fr-DJ" },
845 { name => "fr-DZ" },
846 { name => "fr-FR", lcid => 0x0000040c },
847 { name => "fr-GA" },
848 { name => "fr-GF" },
849 { name => "fr-GN" },
850 { name => "fr-GP" },
851 { name => "fr-GQ" },
852 { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
853 { name => "fr-KM" },
854 { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
855 { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
856 { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
857 { name => "fr-MF" },
858 { name => "fr-MG" },
859 { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
860 { name => "fr-MQ" },
861 { name => "fr-MR" },
862 { name => "fr-MU" },
863 { name => "fr-NC" },
864 { name => "fr-NE" },
865 { name => "fr-PF" },
866 { name => "fr-PM" },
867 { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
868 { name => "fr-RW" },
869 { name => "fr-SC" },
870 { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
871 { name => "fr-SY" },
872 { name => "fr-TD" },
873 { name => "fr-TG" },
874 { name => "fr-TN" },
875 { name => "fr-VU" },
876 { name => "fr-WF" },
877 { name => "fr-YT" },
878 { name => "fur", sopentypelang => "FRL" },
879 { name => "fur-IT" },
880 { name => "fuv-NG", alias => "ff-Latn-NG" },
881 { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
882 { name => "fy-NL", lcid => 0x00000462 },
883 { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
884 { name => "ga-GB", oemcp => 65001 },
885 { name => "ga-IE", lcid => 0x0000083c },
886 { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
887 { name => "gd-GB", lcid => 0x00000491 },
888 { name => "gd-Latn", alias => "gd" },
889 { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
890 { name => "gl-ES", lcid => 0x00000456 },
891 { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sopentypelang => "GUA" },
892 { name => "gn-PY", lcid => 0x00000474, dir => "seed" },
893 { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
894 { name => "gsw-CH" },
895 { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
896 { name => "gsw-LI" },
897 { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
898 { name => "gu-IN", lcid => 0x00000447 },
899 { name => "guz" },
900 { name => "guz-KE" },
901 { name => "gv", sopentypelang => "MNX" },
902 { name => "gv-GB", file => "gv" },
903 { name => "gv-IM" },
904 { name => "ha", lcid => 0x00000068, oemcp => 437 },
905 { name => "ha-GH", alias => "ha-Latn-GH" },
906 { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
907 { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
908 { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
909 { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
910 { name => "ha-NE", alias => "ha-Latn-NE" },
911 { name => "ha-NG", alias => "ha-Latn-NG" },
912 { name => "haw", lcid => 0x00000075, oemcp => 437 },
913 { name => "haw-Latn", alias => "haw" },
914 { name => "haw-Latn-US", alias => "haw-US" },
915 { name => "haw-US", lcid => 0x00000475 },
916 { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
917 { name => "he-IL", lcid => 0x0000040d },
918 { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
919 { name => "hi-IN", lcid => 0x00000439 },
920 { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
921 { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
922 { name => "hr-HR", lcid => 0x0000041a },
923 { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
924 { name => "hsb-DE", lcid => 0x0000042e },
925 { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
926 { name => "hu-HU", lcid => 0x0000040e },
927 { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
928 { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
929 { name => "hy-AM", lcid => 0x0000042b },
930 { name => "ia" },
931 { name => "ia-001" },
932 ## name => "ibb", lcid => 0x00000069 },
933 ## name => "ibb-NG", lcid => 0x00000469 },
934 { name => "id", lcid => 0x00000021, oemcp => 850 },
935 { name => "id-ID", lcid => 0x00000421 },
936 { name => "ig", lcid => 0x00000070, oemcp => 437 },
937 { name => "ig-Latn", alias => "ig" },
938 { name => "ig-Latn-NG", alias => "ig-NG" },
939 { name => "ig-NG", lcid => 0x00000470 },
940 { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
941 { name => "ii-CN", lcid => 0x00000478 },
942 { name => "ii-Yiii", alias => "ii" },
943 { name => "ii-Yiii-CN", alias => "ii-CN" },
944 { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
945 { name => "is-IS", lcid => 0x0000040f },
946 { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
947 { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
948 { name => "it-IT", lcid => 0x00000410 },
949 { name => "it-SM" },
950 { name => "it-VA", oemcp => 65001 },
951 { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", dir => "seed", sabbrevlangname => "IUK", sopentypelang => "INU" },
952 { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, dir => "seed", sabbrevlangname => "IUS" },
953 { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA", dir => "seed" },
954 { name => "iu-Latn", lcid => 0x00007c5d, dir => "seed" },
955 { name => "iu-Latn-CA", lcid => 0x0000085d, dir => "seed" },
956 { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
957 { name => "ja-JP", lcid => 0x00000411 },
958 { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
959 { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
960 { name => "jgo" },
961 { name => "jgo-CM" },
962 { name => "jmc" },
963 { name => "jmc-TZ" },
964 { name => "jv", oemcp => 850, nativedigits => "0123456789" },
965 { name => "jv-ID", alias => "jv-Latn-ID" },
966 ## name => "jv-Java" },
967 ## name => "jv-Java-ID" },
968 { name => "jv-Latn", file => "jv" },
969 { name => "jv-Latn-ID", file => "jv_ID" },
970 { name => "ka", lcid => 0x00000037, group => 16 },
971 { name => "ka-GE", lcid => 0x00000437 },
972 { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
973 { name => "kab", sopentypelang => "KAB0" },
974 { name => "kab-DZ" },
975 { name => "kam", sopentypelang => "KMB" },
976 { name => "kam-KE" },
977 { name => "kde" },
978 { name => "kde-TZ" },
979 { name => "kea" },
980 { name => "kea-CV" },
981 { name => "kgp" },
982 { name => "kgp-BR" },
983 { name => "khq" },
984 { name => "khq-ML" },
985 { name => "ki" },
986 { name => "ki-KE" },
987 { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
988 { name => "kk-Cyrl", alias => "kk" },
989 { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
990 { name => "kk-KZ", lcid => 0x0000043f },
991 { name => "kkj" },
992 { name => "kkj-CM" },
993 { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
994 { name => "kl-GL", lcid => 0x0000046f },
995 { name => "kln", sopentypelang => "KAL" },
996 { name => "kln-KE" },
997 { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
998 { name => "km-KH", lcid => 0x00000453 },
999 { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
1000 { name => "kn-IN", lcid => 0x0000044b },
1001 { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
1002 { name => "ko-KP", oemcp => 65001 },
1003 { name => "ko-KR", lcid => 0x00000412 },
1004 { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
1005 { name => "kok-IN", lcid => 0x00000457 },
1006 { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
1007 { name => "kr-Latn", file => "kr", dir => "exemplars" },
1008 { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
1009 { name => "kr-NG", alias => "kr-Latn-NG" },
1010 { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
1011 { name => "ks-Arab", lcid => 0x00000460 },
1012 { name => "ks-Arab-IN" },
1013 { name => "ks-Deva", slist => "," },
1014 { name => "ks-Deva-IN", lcid => 0x00000860 },
1015 { name => "ks-IN", alias => "ks-Arab-IN" },
1016 { name => "ksb" },
1017 { name => "ksb-TZ" },
1018 { name => "ksf" },
1019 { name => "ksf-CM" },
1020 { name => "ksh", sopentypelang => "KSH0" },
1021 { name => "ksh-DE" },
1022 { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
1023 { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
1024 { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
1025 { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
1026 { name => "kw" },
1027 { name => "kw-GB" },
1028 { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
1029 { name => "ky-Cyrl", alias => "ky" },
1030 { name => "ky-Cyrl-KG", alias => "ky-KG" },
1031 { name => "ky-KG", lcid => 0x00000440 },
1032 { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", dir => "seed", sabbrevlangname => "ZZZ" },
1033 { name => "la-VA", lcid => 0x00000476, dir => "seed" },
1034 { name => "la-001", alias => "la-VA" },
1035 { name => "lag" },
1036 { name => "lag-TZ" },
1037 { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
1038 { name => "lb-LU", lcid => 0x0000046e },
1039 { name => "lg" },
1040 { name => "lg-UG" },
1041 { name => "lkt" },
1042 { name => "lkt-US" },
1043 { name => "ln" },
1044 { name => "ln-AO" },
1045 { name => "ln-CD" },
1046 { name => "ln-CF" },
1047 { name => "ln-CG" },
1048 { name => "lo", lcid => 0x00000054, group => 15 },
1049 { name => "lo-LA", lcid => 0x00000454 },
1050 { name => "lrc" },
1051 { name => "lrc-IQ" },
1052 { name => "lrc-IR" },
1053 { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
1054 { name => "lt-LT", lcid => 0x00000427 },
1055 { name => "lu" },
1056 { name => "lu-CD" },
1057 { name => "luo" },
1058 { name => "luo-KE" },
1059 { name => "luy", sopentypelang => "LUH" },
1060 { name => "luy-KE" },
1061 { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
1062 { name => "lv-LV", lcid => 0x00000426 },
1063 { name => "mai" },
1064 { name => "mai-IN" },
1065 { name => "mas" },
1066 { name => "mas-KE" },
1067 { name => "mas-TZ" },
1068 { name => "mer" },
1069 { name => "mer-KE" },
1070 { name => "mfe" },
1071 { name => "mfe-MU" },
1072 { name => "mg" },
1073 { name => "mg-MG" },
1074 { name => "mgh" },
1075 { name => "mgh-MZ" },
1076 { name => "mgo" },
1077 { name => "mgo-CM" },
1078 { name => "mi", lcid => 0x00000081, slist => "," },
1079 { name => "mi-Latn", alias => "mi" },
1080 { name => "mi-Latn-NZ", alias => "mi-NZ" },
1081 { name => "mi-NZ", lcid => 0x00000481 },
1082 { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
1083 { name => "mk-MK", lcid => 0x0000042f },
1084 { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
1085 { name => "ml-IN", lcid => 0x0000044c },
1086 { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
1087 { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
1088 { name => "mn-Cyrl-MN", alias => "mn-MN" },
1089 { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
1090 { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, dir => "seed", sabbrevlangname => "MNG", nativedigits => "0123456789" },
1091 { name => "mn-Mong-CN", lcid => 0x00000850, dir => "seed" },
1092 { name => "mn-Mong-MN", lcid => 0x00000c50, dir => "seed", sabbrevlangname => "MNM" },
1093 { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
1094 { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
1095 { name => "mni-Beng" },
1096 { name => "mni-Beng-IN", alias => "mni-IN" },
1097 { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", dir => "seed", sabbrevlangname => "MWK" },
1098 { name => "moh-CA", lcid => 0x0000047c, dir => "seed" },
1099 { name => "moh-Latn", alias => "moh" },
1100 { name => "moh-Latn-CA", alias => "moh-CA" },
1101 { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
1102 { name => "mr-IN", lcid => 0x0000044e },
1103 { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
1104 { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
1105 { name => "ms-ID" },
1106 { name => "ms-Latn", alias => "ms" },
1107 { name => "ms-Latn-BN", alias => "ms-BN" },
1108 { name => "ms-Latn-MY", alias => "ms-MY" },
1109 { name => "ms-Latn-SG", alias => "ms-SG" },
1110 { name => "ms-MY", lcid => 0x0000043e },
1111 { name => "ms-SG" },
1112 { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
1113 { name => "mt-MT", lcid => 0x0000043a },
1114 { name => "mua" },
1115 { name => "mua-CM" },
1116 { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
1117 { name => "my-MM", lcid => 0x00000455 },
1118 { name => "mzn" },
1119 { name => "mzn-IR" },
1120 { name => "naq" },
1121 { name => "naq-NA" },
1122 { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
1123 { name => "nb-NO", lcid => 0x00000414 },
1124 { name => "nb-SJ" },
1125 { name => "nd", sopentypelang => "NDB" },
1126 { name => "nd-ZW" },
1127 { name => "nds" },
1128 { name => "nds-DE" },
1129 { name => "nds-NL" },
1130 { name => "ne", lcid => 0x00000061, slist => "," },
1131 { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
1132 { name => "ne-NP", lcid => 0x00000461, group => 15 },
1133 { name => "nl", lcid => 0x00000013, oemcp => 850 },
1134 { name => "nl-AW" },
1135 { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
1136 { name => "nl-BQ" },
1137 { name => "nl-CW" },
1138 { name => "nl-NL", lcid => 0x00000413 },
1139 { name => "nl-SR" },
1140 { name => "nl-SX" },
1141 { name => "nmg" },
1142 { name => "nmg-CM" },
1143 { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
1144 { name => "nn-NO", lcid => 0x00000814 },
1145 { name => "nnh" },
1146 { name => "nnh-CM" },
1147 { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
1148 { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", dir => "seed", sopentypelang => "NKO" },
1149 { name => "nqo-GN", dir => "seed" },
1150 { name => "nr", dir => "seed", sopentypelang => "NDB" },
1151 { name => "nr-ZA", dir => "seed" },
1152 { name => "nso", lcid => 0x0000006c, oemcp => 850, dir => "seed", sopentypelang => "SOT" },
1153 { name => "nso-ZA", lcid => 0x0000046c, dir => "seed" },
1154 { name => "nus" },
1155 { name => "nus-SD", alias => "nus-SS" },
1156 { name => "nus-SS" },
1157 { name => "nyn", sopentypelang => "NKL" },
1158 { name => "nyn-UG" },
1159 { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
1160 { name => "oc-FR", lcid => 0x00000482, dir => "seed" },
1161 { name => "oc-Latn", alias => "oc" },
1162 { name => "oc-Latn-FR", alias => "oc-FR" },
1163 { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
1164 { name => "om-ET", lcid => 0x00000472 },
1165 { name => "om-KE" },
1166 { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
1167 { name => "or-IN", lcid => 0x00000448 },
1168 { name => "os" },
1169 { name => "os-GE" },
1170 { name => "os-RU" },
1171 { name => "pa", lcid => 0x00000046, slist => "," },
1172 { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
1173 { name => "pa-Arab-PK", lcid => 0x00000846 },
1174 { name => "pa-Guru" },
1175 { name => "pa-Guru-IN", alias => "pa-IN" },
1176 { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
1177 ## name => "pap", lcid => 0x00000079 },
1178 ## name => "pap-029", lcid => 0x00000479 },
1179 { name => "pcm" },
1180 { name => "pcm-NG", alias => "pcm-Latn-NG" },
1181 { name => "pcm-Latn", file => "pcm" },
1182 { name => "pcm-Latn-NG", file => "pcm_NG" },
1183 { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
1184 { name => "pl-PL", lcid => 0x00000415 },
1185 ## name => "prg" },
1186 ## name => "prg-001" },
1187 { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
1188 { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
1189 { name => "prs-Arab", alias => "prs" },
1190 { name => "prs-Arab-AF", alias => "prs-AF" },
1191 { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
1192 { name => "ps-AF", lcid => 0x00000463 },
1193 { name => "ps-PK" },
1194 { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
1195 { name => "pt-AO" },
1196 { name => "pt-BR", lcid => 0x00000416 },
1197 { name => "pt-CH", oemcp => 65001 },
1198 { name => "pt-CV" },
1199 { name => "pt-GQ", oemcp => 65001 },
1200 { name => "pt-GW" },
1201 { name => "pt-LU", oemcp => 65001 },
1202 { name => "pt-MO" },
1203 { name => "pt-MZ" },
1204 { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
1205 { name => "pt-ST" },
1206 { name => "pt-TL" },
1207 ## name => qps-Latn-x-sh", lcid => 0x80000901 },
1208 ## name => qps-ploc", lcid => 0x80000501 },
1209 ## name => qps-ploca", lcid => 0x800005fe },
1210 ## name => qps-plocm", lcid => 0x800009ff },
1211 { name => "qu", alias => "quz" },
1212 { name => "qu-BO", alias => "quz-BO" },
1213 { name => "qu-EC", alias => "quz-EC" },
1214 { name => "qu-PE", alias => "quz-PE" },
1215 { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed" },
1216 { name => "quc-Latn", lcid => 0x00007c86, file => "quc", dir => "seed" },
1217 { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT", dir => "seed" },
1218 { name => "qut", alias => "quc" },
1219 { name => "qut-GT", alias => "quc-Latn-GT" },
1220 { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
1221 { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
1222 { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
1223 { name => "quz-Latn", alias => "quz" },
1224 { name => "quz-Latn-BO", alias => "quz-BO" },
1225 { name => "quz-Latn-EC", alias => "quz-EC" },
1226 { name => "quz-Latn-PE", alias => "quz-PE" },
1227 { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
1228 { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
1229 { name => "rm-CH", lcid => 0x00000417 },
1230 { name => "rn" },
1231 { name => "rn-BI" },
1232 { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
1233 { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
1234 { name => "ro-RO", lcid => 0x00000418, group => 2 },
1235 { name => "rof" },
1236 { name => "rof-TZ" },
1237 { name => "ru", lcid => 0x00000019, oemcp => 866 },
1238 { name => "ru-BY", maccp => 65001 },
1239 { name => "ru-KG", maccp => 65001 },
1240 { name => "ru-KZ", maccp => 65001 },
1241 { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
1242 { name => "ru-RU", lcid => 0x00000419, group => 5 },
1243 { name => "ru-UA", maccp => 65001 },
1244 { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
1245 { name => "rw-RW", lcid => 0x00000487 },
1246 { name => "rwk" },
1247 { name => "rwk-TZ" },
1248 { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
1249 { name => "sa-Deva", alias => "sa" },
1250 { name => "sa-Deva-IN", alias => "sa-IN" },
1251 { name => "sa-IN", lcid => 0x0000044f },
1252 { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
1253 { name => "sah-Cyrl", alias => "sah" },
1254 { name => "sah-Cyrl-RU", alias => "sah-RU" },
1255 { name => "sah-RU", lcid => 0x00000485 },
1256 { name => "saq" },
1257 { name => "saq-KE" },
1258 { name => "sat" },
1259 { name => "sat-Olck" },
1260 { name => "sat-Olck-IN" },
1261 { name => "sbp" },
1262 { name => "sbp-TZ" },
1263 { name => "sc" },
1264 { name => "sc-IT" },
1265 { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
1266 { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
1267 { name => "sd-Arab-PK", lcid => 0x00000859 },
1268 { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
1269 { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
1270 { name => "sd-PK", alias => "sd-Arab-PK" },
1271 { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
1272 { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
1273 { name => "se-NO", lcid => 0x0000043b },
1274 { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
1275 { name => "se-Latn", alias => "se" },
1276 { name => "se-Latn-FI", alias => "se-FI" },
1277 { name => "se-Latn-NO", alias => "se-NO" },
1278 { name => "se-Latn-SE", alias => "se-SE" },
1279 { name => "seh" },
1280 { name => "seh-MZ" },
1281 { name => "ses" },
1282 { name => "ses-ML" },
1283 { name => "sg", sopentypelang => "SGO" },
1284 { name => "sg-CF" },
1285 { name => "shi" },
1286 { name => "shi-Latn" },
1287 { name => "shi-Latn-MA" },
1288 { name => "shi-Tfng" },
1289 { name => "shi-Tfng-MA" },
1290 { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
1291 { name => "si-LK", lcid => 0x0000045b },
1292 { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
1293 { name => "sk-SK", lcid => 0x0000041b },
1294 { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
1295 { name => "sl-SI", lcid => 0x00000424 },
1296 { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMB", sopentypelang => "SSM" },
1297 { name => "sma-Latn", alias => "sma" },
1298 { name => "sma-Latn-NO", alias => "sma-NO" },
1299 { name => "sma-Latn-SE", alias => "sma-SE" },
1300 { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMA" },
1301 { name => "sma-SE", lcid => 0x00001c3b, dir => "seed" },
1302 { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMK", sopentypelang => "LSM" },
1303 { name => "smj-Latn", alias => "smj" },
1304 { name => "smj-Latn-NO", alias => "smj-NO" },
1305 { name => "smj-Latn-SE", alias => "smj-SE" },
1306 { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMJ" },
1307 { name => "smj-SE", lcid => 0x0000143b, dir => "seed" },
1308 { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
1309 { name => "smn-FI", lcid => 0x0000243b },
1310 { name => "smn-Latn", alias => "smn" },
1311 { name => "smn-Latn-FI", alias => "smn-FI" },
1312 { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, dir => "seed", sopentypelang => "SKS" },
1313 { name => "sms-FI", lcid => 0x0000203b, dir => "seed" },
1314 { name => "sms-Latn", alias => "sms" },
1315 { name => "sms-Latn-FI", alias => "sms-FI" },
1316 { name => "sn", sopentypelang => "SNA0" },
1317 { name => "sn-Latn", file => "sn" },
1318 { name => "sn-Latn-ZW", file => "sn_ZW" },
1319 { name => "sn-ZW", alias => "sn-Latn-ZW" },
1320 { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
1321 { name => "so-DJ" },
1322 { name => "so-ET" },
1323 { name => "so-KE" },
1324 { name => "so-SO", lcid => 0x00000477 },
1325 { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
1326 { name => "sq-AL", lcid => 0x0000041c },
1327 { name => "sq-MK" },
1328 { name => "sq-XK" },
1329 { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
1330 { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
1331 { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
1332 { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
1333 { name => "sr-Cyrl-RS", lcid => 0x0000281a },
1334 { name => "sr-Cyrl-XK" },
1335 { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
1336 { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
1337 { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
1338 { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
1339 { name => "sr-Latn-XK" },
1340 ## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
1341 ## name => "sr-Latn-CS", lcid => 0x0000081a },
1342 { name => "ss", dir => "seed", sopentypelang => "SWZ" },
1343 { name => "ss-SZ", dir => "seed" },
1344 { name => "ss-ZA", dir => "seed" },
1345 { name => "ssy", dir => "seed" },
1346 { name => "ssy-ER", dir => "seed" },
1347 { name => "st", lcid => 0x00000030, dir => "seed" },
1348 { name => "st-LS", dir => "seed" },
1349 { name => "st-ZA", lcid => 0x00000430, dir => "seed" },
1350 { name => "su" },
1351 { name => "su-Latn" },
1352 { name => "su-Latn-ID" },
1353 { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
1354 { name => "sv-AX" },
1355 { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
1356 { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
1357 { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
1358 { name => "sw-CD" },
1359 { name => "sw-KE", lcid => 0x00000441 },
1360 { name => "sw-TZ" },
1361 { name => "sw-UG" },
1362 { name => "swc-CD", alias => "sw-CD" },
1363 { name => "syr", lcid => 0x0000005a, slist => ",", group => 13, dir => "seed" },
1364 { name => "syr-SY", lcid => 0x0000045a, dir => "seed" },
1365 { name => "syr-Syrc", alias => "syr" },
1366 { name => "syr-Syrc-SY", alias => "syr-SY" },
1367 { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
1368 { name => "ta-IN", lcid => 0x00000449 },
1369 { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
1370 { name => "ta-MY" },
1371 { name => "ta-SG" },
1372 { name => "te", lcid => 0x0000004a, group => 15 },
1373 { name => "te-IN", lcid => 0x0000044a },
1374 { name => "teo" },
1375 { name => "teo-KE" },
1376 { name => "teo-UG" },
1377 { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
1378 { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
1379 { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
1380 { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
1381 { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
1382 { name => "th-TH", lcid => 0x0000041e },
1383 { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
1384 { name => "ti-ER", lcid => 0x00000873 },
1385 { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
1386 { name => "tig", dir => "seed", sopentypelang => "TGR" },
1387 { name => "tig-ER", dir => "seed" },
1388 { name => "tig-Ethi-ER", alias => "tig-ER" },
1389 { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
1390 { name => "tk-Latn", alias => "tk" },
1391 { name => "tk-Latn-TM", alias => "tk-TM" },
1392 { name => "tk-TM", lcid => 0x00000442 },
1393 { name => "tn", lcid => 0x00000032, oemcp => 850, dir => "seed", sopentypelang => "TNA" },
1394 { name => "tn-BW", lcid => 0x00000832, dir => "seed", sabbrevlangname => "TSB" },
1395 { name => "tn-ZA", lcid => 0x00000432, dir => "seed" },
1396 { name => "to", sopentypelang => "TGN" },
1397 { name => "to-TO" },
1398 { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
1399 { name => "tr-CY" },
1400 { name => "tr-TR", lcid => 0x0000041f },
1401 { name => "ts", lcid => 0x00000031, dir => "seed", sopentypelang => "TSG" },
1402 { name => "ts-ZA", lcid => 0x00000431, dir => "seed" },
1403 { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
1404 { name => "tt-Cyrl", alias => "tt" },
1405 { name => "tt-Cyrl-RU", alias => "tt-RU" },
1406 { name => "tt-RU", lcid => 0x00000444 },
1407 { name => "twq" },
1408 { name => "twq-NE" },
1409 { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
1410 { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
1411 { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
1412 { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
1413 { name => "tzm-MA", alias => "tzm-Latn-MA" },
1414 { name => "tzm-DZ", alias => "tzm-Latn-DZ" },
1415 ## name => "tzm-Arab", group => 13 },
1416 ## name => "tzm-Arab-MA", lcid => 0x0000045f },
1417 ## name => "tzm-Tfng", lcid => 0x0000785f },
1418 ## name => "tzm-Tfng-MA", lcid => 0x0000105f },
1419 { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG", nativedigits => "0123456789" },
1420 { name => "ug-Arab", alias => "ug" },
1421 { name => "ug-Arab-CN", alias => "ug-CN" },
1422 { name => "ug-CN", lcid => 0x00000480 },
1423 { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
1424 { name => "uk-UA", lcid => 0x00000422 },
1425 { name => "ur", lcid => 0x00000020, oemcp => 720 },
1426 { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
1427 { name => "ur-PK", lcid => 0x00000420, group => 13 },
1428 { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
1429 { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
1430 { name => "uz-Arab-AF" },
1431 { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
1432 { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
1433 { name => "uz-Latn", lcid => 0x00007c43 },
1434 { name => "uz-Latn-UZ", lcid => 0x00000443 },
1435 { name => "vai" },
1436 { name => "vai-Latn" },
1437 { name => "vai-Latn-LR" },
1438 { name => "vai-Vaii" },
1439 { name => "vai-Vaii-LR" },
1440 { name => "ve", lcid => 0x00000033, dir => "seed", sabbrevlangname => "ZZZ" },
1441 { name => "ve-ZA", lcid => 0x00000433, dir => "seed" },
1442 { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
1443 { name => "vi-VN", lcid => 0x0000042a },
1444 { name => "vo", dir => "seed" },
1445 { name => "vo-001", dir => "seed" },
1446 { name => "vun" },
1447 { name => "vun-TZ" },
1448 { name => "wa", dir => "seed", oemcp => 850 },
1449 { name => "wa-BE", dir => "seed" },
1450 { name => "wae" },
1451 { name => "wae-CH" },
1452 { name => "wal", dir => "seed" },
1453 { name => "wal-ET", dir => "seed" },
1454 { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
1455 { name => "wo-Latn", alias => "wo" },
1456 { name => "wo-Latn-SN", alias => "wo-SN" },
1457 { name => "wo-SN", lcid => 0x00000488 },
1458 { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
1459 { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
1460 { name => "xh-ZA", lcid => 0x00000434 },
1461 { name => "xog" },
1462 { name => "xog-UG" },
1463 { name => "yav" },
1464 { name => "yav-CM" },
1465 { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
1466 { name => "yi-001", lcid => 0x0000043d },
1467 { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
1468 { name => "yo-BJ", ebcdiccp => 500 },
1469 { name => "yo-Latn", alias => "yo" },
1470 { name => "yo-Latn-NG", alias => "yo-NG" },
1471 { name => "yo-NG", lcid => 0x0000046a },
1472 { name => "yrl" },
1473 { name => "yrl-BR" },
1474 { name => "yrl-CO" },
1475 { name => "yrl-VE" },
1476 { name => "yue" },
1477 { name => "yue-Hans" },
1478 { name => "yue-Hans-CN" },
1479 { name => "yue-Hant" },
1480 { name => "yue-Hant-HK" },
1481 { name => "zgh" },
1482 { name => "zgh-MA", alias => "zgh-Tfng-MA" },
1483 { name => "zgh-Tfng", file => "zgh" },
1484 { name => "zgh-Tfng-MA", file => "zgh_MA" },
1485 { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS", nativedigits => "0123456789" },
1486 { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
1487 { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
1488 { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
1489 { name => "zh-Hans", lcid => 0x00000004, group => 10 },
1490 { name => "zh-Hans-CN", alias => "zh-CN" },
1491 { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
1492 { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
1493 { name => "zh-Hans-HK", slist => ";", nativedigits => "" },
1494 { name => "zh-Hans-MO", slist => ";", nativedigits => "" },
1495 { name => "zh-Hans-SG", alias => "zh-SG" },
1496 { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
1497 { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
1498 { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
1499 { name => "zh-Hant-HK", alias => "zh-HK" },
1500 { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
1501 { name => "zh-Hant-MO", alias => "zh-MO" },
1502 { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
1503 { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
1504 { name => "zh-Hant-TW", alias => "zh-TW" },
1505 { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
1506 { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
1507 { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
1508 { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
1509 { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
1510 { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
1511 { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
1512 { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
1513 { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
1514 { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
1515 { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
1516 { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
1517 { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
1518 { name => "zu", lcid => 0x00000035, oemcp => 850 },
1519 { name => "zu-ZA", lcid => 0x00000435 },
1522 my @calendars =
1524 { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
1525 { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
1526 { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
1527 { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
1528 { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
1529 { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
1530 { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
1531 { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
1532 { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
1533 { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1534 { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1535 { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1536 { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
1537 { id => 14, name => "Japanese Lunisolar" },
1538 { id => 15, name => "Chinese Lunisolar" },
1539 { id => 16, name => "Saka" },
1540 { id => 17, name => "Lunar ETO Chinese" },
1541 { id => 18, name => "Lunar ETO Korean" },
1542 { id => 19, name => "Lunar ETO Rokuyou" },
1543 { id => 20, name => "Korean Lunisolar" },
1544 { id => 21, name => "Taiwan Lunisolar" },
1545 { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
1546 { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
1549 my @geoids =
1551 { id => 2, name => "AG" }, # Antigua and Barbuda
1552 { id => 3, name => "AF" }, # Afghanistan
1553 { id => 4, name => "DZ" }, # Algeria
1554 { id => 5, name => "AZ" }, # Azerbaijan
1555 { id => 6, name => "AL" }, # Albania
1556 { id => 7, name => "AM" }, # Armenia
1557 { id => 8, name => "AD" }, # Andorra
1558 { id => 9, name => "AO" }, # Angola
1559 { id => 10, name => "AS" }, # American Samoa
1560 { id => 11, name => "AR" }, # Argentina
1561 { id => 12, name => "AU" }, # Australia
1562 { id => 14, name => "AT" }, # Austria
1563 { id => 17, name => "BH" }, # Bahrain
1564 { id => 18, name => "BB" }, # Barbados
1565 { id => 19, name => "BW" }, # Botswana
1566 { id => 20, name => "BM" }, # Bermuda
1567 { id => 21, name => "BE" }, # Belgium
1568 { id => 22, name => "BS" }, # Bahamas, The
1569 { id => 23, name => "BD" }, # Bangladesh
1570 { id => 24, name => "BZ" }, # Belize
1571 { id => 25, name => "BA" }, # Bosnia and Herzegovina
1572 { id => 26, name => "BO" }, # Bolivia
1573 { id => 27, name => "MM" }, # Myanmar
1574 { id => 28, name => "BJ" }, # Benin
1575 { id => 29, name => "BY" }, # Belarus
1576 { id => 30, name => "SB" }, # Solomon Islands
1577 { id => 32, name => "BR" }, # Brazil
1578 { id => 34, name => "BT" }, # Bhutan
1579 { id => 35, name => "BG" }, # Bulgaria
1580 { id => 37, name => "BN" }, # Brunei
1581 { id => 38, name => "BI" }, # Burundi
1582 { id => 39, name => "CA" }, # Canada
1583 { id => 40, name => "KH" }, # Cambodia
1584 { id => 41, name => "TD" }, # Chad
1585 { id => 42, name => "LK" }, # Sri Lanka
1586 { id => 43, name => "CG" }, # Congo
1587 { id => 44, name => "CD" }, # Congo (DRC)
1588 { id => 45, name => "CN" }, # China
1589 { id => 46, name => "CL" }, # Chile
1590 { id => 49, name => "CM" }, # Cameroon
1591 { id => 50, name => "KM" }, # Comoros
1592 { id => 51, name => "CO" }, # Colombia
1593 { id => 54, name => "CR" }, # Costa Rica
1594 { id => 55, name => "CF" }, # Central African Republic
1595 { id => 56, name => "CU" }, # Cuba
1596 { id => 57, name => "CV" }, # Cape Verde
1597 { id => 59, name => "CY" }, # Cyprus
1598 { id => 61, name => "DK" }, # Denmark
1599 { id => 62, name => "DJ" }, # Djibouti
1600 { id => 63, name => "DM" }, # Dominica
1601 { id => 65, name => "DO" }, # Dominican Republic
1602 { id => 66, name => "EC" }, # Ecuador
1603 { id => 67, name => "EG" }, # Egypt
1604 { id => 68, name => "IE" }, # Ireland
1605 { id => 69, name => "GQ" }, # Equatorial Guinea
1606 { id => 70, name => "EE" }, # Estonia
1607 { id => 71, name => "ER" }, # Eritrea
1608 { id => 72, name => "SV" }, # El Salvador
1609 { id => 73, name => "ET" }, # Ethiopia
1610 { id => 75, name => "CZ" }, # Czech Republic
1611 { id => 77, name => "FI" }, # Finland
1612 { id => 78, name => "FJ" }, # Fiji Islands
1613 { id => 80, name => "FM" }, # Micronesia
1614 { id => 81, name => "FO" }, # Faroe Islands
1615 { id => 84, name => "FR" }, # France
1616 { id => 86, name => "GM" }, # Gambia, The
1617 { id => 87, name => "GA" }, # Gabon
1618 { id => 88, name => "GE" }, # Georgia
1619 { id => 89, name => "GH" }, # Ghana
1620 { id => 90, name => "GI" }, # Gibraltar
1621 { id => 91, name => "GD" }, # Grenada
1622 { id => 93, name => "GL" }, # Greenland
1623 { id => 94, name => "DE" }, # Germany
1624 { id => 98, name => "GR" }, # Greece
1625 { id => 99, name => "GT" }, # Guatemala
1626 { id => 100, name => "GN" }, # Guinea
1627 { id => 101, name => "GY" }, # Guyana
1628 { id => 103, name => "HT" }, # Haiti
1629 { id => 104, name => "HK" }, # Hong Kong S.A.R.
1630 { id => 106, name => "HN" }, # Honduras
1631 { id => 108, name => "HR" }, # Croatia
1632 { id => 109, name => "HU" }, # Hungary
1633 { id => 110, name => "IS" }, # Iceland
1634 { id => 111, name => "ID" }, # Indonesia
1635 { id => 113, name => "IN" }, # India
1636 { id => 114, name => "IO" }, # British Indian Ocean Territory
1637 { id => 116, name => "IR" }, # Iran
1638 { id => 117, name => "IL" }, # Israel
1639 { id => 118, name => "IT" }, # Italy
1640 { id => 119, name => "CI" }, # Côte d'Ivoire
1641 { id => 121, name => "IQ" }, # Iraq
1642 { id => 122, name => "JP" }, # Japan
1643 { id => 124, name => "JM" }, # Jamaica
1644 { id => 125, name => "SJ" }, # Jan Mayen
1645 { id => 126, name => "JO" }, # Jordan
1646 { id => 127, parent => "UM" }, # Johnston Atoll
1647 { id => 129, name => "KE" }, # Kenya
1648 { id => 130, name => "KG" }, # Kyrgyzstan
1649 { id => 131, name => "KP" }, # North Korea
1650 { id => 133, name => "KI" }, # Kiribati
1651 { id => 134, name => "KR" }, # Korea
1652 { id => 136, name => "KW" }, # Kuwait
1653 { id => 137, name => "KZ" }, # Kazakhstan
1654 { id => 138, name => "LA" }, # Laos
1655 { id => 139, name => "LB" }, # Lebanon
1656 { id => 140, name => "LV" }, # Latvia
1657 { id => 141, name => "LT" }, # Lithuania
1658 { id => 142, name => "LR" }, # Liberia
1659 { id => 143, name => "SK" }, # Slovakia
1660 { id => 145, name => "LI" }, # Liechtenstein
1661 { id => 146, name => "LS" }, # Lesotho
1662 { id => 147, name => "LU" }, # Luxembourg
1663 { id => 148, name => "LY" }, # Libya
1664 { id => 149, name => "MG" }, # Madagascar
1665 { id => 151, name => "MO" }, # Macao S.A.R.
1666 { id => 152, name => "MD" }, # Moldova
1667 { id => 154, name => "MN" }, # Mongolia
1668 { id => 156, name => "MW" }, # Malawi
1669 { id => 157, name => "ML" }, # Mali
1670 { id => 158, name => "MC" }, # Monaco
1671 { id => 159, name => "MA" }, # Morocco
1672 { id => 160, name => "MU" }, # Mauritius
1673 { id => 162, name => "MR" }, # Mauritania
1674 { id => 163, name => "MT" }, # Malta
1675 { id => 164, name => "OM" }, # Oman
1676 { id => 165, name => "MV" }, # Maldives
1677 { id => 166, name => "MX" }, # Mexico
1678 { id => 167, name => "MY" }, # Malaysia
1679 { id => 168, name => "MZ" }, # Mozambique
1680 { id => 173, name => "NE" }, # Niger
1681 { id => 174, name => "VU" }, # Vanuatu
1682 { id => 175, name => "NG" }, # Nigeria
1683 { id => 176, name => "NL" }, # Netherlands
1684 { id => 177, name => "NO" }, # Norway
1685 { id => 178, name => "NP" }, # Nepal
1686 { id => 180, name => "NR" }, # Nauru
1687 { id => 181, name => "SR" }, # Suriname
1688 { id => 182, name => "NI" }, # Nicaragua
1689 { id => 183, name => "NZ" }, # New Zealand
1690 { id => 184, name => "PS" }, # Palestinian Authority
1691 { id => 185, name => "PY" }, # Paraguay
1692 { id => 187, name => "PE" }, # Peru
1693 { id => 190, name => "PK" }, # Pakistan
1694 { id => 191, name => "PL" }, # Poland
1695 { id => 192, name => "PA" }, # Panama
1696 { id => 193, name => "PT" }, # Portugal
1697 { id => 194, name => "PG" }, # Papua New Guinea
1698 { id => 195, name => "PW" }, # Palau
1699 { id => 196, name => "GW" }, # Guinea-Bissau
1700 { id => 197, name => "QA" }, # Qatar
1701 { id => 198, name => "RE" }, # Reunion
1702 { id => 199, name => "MH" }, # Marshall Islands
1703 { id => 200, name => "RO" }, # Romania
1704 { id => 201, name => "PH" }, # Philippines
1705 { id => 202, name => "PR" }, # Puerto Rico
1706 { id => 203, name => "RU" }, # Russia
1707 { id => 204, name => "RW" }, # Rwanda
1708 { id => 205, name => "SA" }, # Saudi Arabia
1709 { id => 206, name => "PM" }, # St. Pierre and Miquelon
1710 { id => 207, name => "KN" }, # St. Kitts and Nevis
1711 { id => 208, name => "SC" }, # Seychelles
1712 { id => 209, name => "ZA" }, # South Africa
1713 { id => 210, name => "SN" }, # Senegal
1714 { id => 212, name => "SI" }, # Slovenia
1715 { id => 213, name => "SL" }, # Sierra Leone
1716 { id => 214, name => "SM" }, # San Marino
1717 { id => 215, name => "SG" }, # Singapore
1718 { id => 216, name => "SO" }, # Somalia
1719 { id => 217, name => "ES" }, # Spain
1720 { id => 218, name => "LC" }, # St. Lucia
1721 { id => 219, name => "SD" }, # Sudan
1722 { id => 220, name => "SJ" }, # Svalbard
1723 { id => 221, name => "SE" }, # Sweden
1724 { id => 222, name => "SY" }, # Syria
1725 { id => 223, name => "CH" }, # Switzerland
1726 { id => 224, name => "AE" }, # United Arab Emirates
1727 { id => 225, name => "TT" }, # Trinidad and Tobago
1728 { id => 227, name => "TH" }, # Thailand
1729 { id => 228, name => "TJ" }, # Tajikistan
1730 { id => 231, name => "TO" }, # Tonga
1731 { id => 232, name => "TG" }, # Togo
1732 { id => 233, name => "ST" }, # São Tomé and Príncipe
1733 { id => 234, name => "TN" }, # Tunisia
1734 { id => 235, name => "TR" }, # Turkey
1735 { id => 236, name => "TV" }, # Tuvalu
1736 { id => 237, name => "TW" }, # Taiwan
1737 { id => 238, name => "TM" }, # Turkmenistan
1738 { id => 239, name => "TZ" }, # Tanzania
1739 { id => 240, name => "UG" }, # Uganda
1740 { id => 241, name => "UA" }, # Ukraine
1741 { id => 242, name => "GB" }, # United Kingdom
1742 { id => 244, name => "US" }, # United States
1743 { id => 245, name => "BF" }, # Burkina Faso
1744 { id => 246, name => "UY" }, # Uruguay
1745 { id => 247, name => "UZ" }, # Uzbekistan
1746 { id => 248, name => "VC" }, # St. Vincent and the Grenadines
1747 { id => 249, name => "VE" }, # Bolivarian Republic of Venezuela
1748 { id => 251, name => "VN" }, # Vietnam
1749 { id => 252, name => "VI" }, # Virgin Islands
1750 { id => 253, name => "VA" }, # Vatican City
1751 { id => 254, name => "NA" }, # Namibia
1752 { id => 257, name => "EH" }, # Western Sahara (disputed)
1753 { id => 258, parent => "UM" }, # Wake Island
1754 { id => 259, name => "WS" }, # Samoa
1755 { id => 260, name => "SZ" }, # Swaziland
1756 { id => 261, name => "YE" }, # Yemen
1757 { id => 263, name => "ZM" }, # Zambia
1758 { id => 264, name => "ZW" }, # Zimbabwe
1759 { id => 269, name => "CS" }, # Serbia and Montenegro (Former)
1760 { id => 270, name => "ME" }, # Montenegro
1761 { id => 271, name => "RS" }, # Serbia
1762 { id => 273, name => "CW" }, # Curaçao
1763 { id => 276, name => "SS" }, # South Sudan
1764 { id => 300, name => "AI" }, # Anguilla
1765 { id => 301, name => "AQ" }, # Antarctica
1766 { id => 302, name => "AW" }, # Aruba
1767 { id => 303, parent => "SH" }, # Ascension Island
1768 { id => 304, parent => "053" }, # Ashmore and Cartier Islands
1769 { id => 305, parent => "UM" }, # Baker Island
1770 { id => 306, name => "BV" }, # Bouvet Island
1771 { id => 307, name => "KY" }, # Cayman Islands
1772 { id => 308, name => "830", parent => "155" }, # Channel Islands
1773 { id => 309, name => "CX" }, # Christmas Island
1774 { id => 310, parent => "009" }, # Clipperton Island
1775 { id => 311, name => "CC" }, # Cocos (Keeling) Islands
1776 { id => 312, name => "CK" }, # Cook Islands
1777 { id => 313, parent => "053" }, # Coral Sea Islands
1778 { id => 314, parent => "IO" }, # Diego Garcia
1779 { id => 315, name => "FK" }, # Falkland Islands (Islas Malvinas)
1780 { id => 317, name => "GF" }, # French Guiana
1781 { id => 318, name => "PF" }, # French Polynesia
1782 { id => 319, name => "TF" }, # French Southern and Antarctic Lands
1783 { id => 321, name => "GP" }, # Guadeloupe
1784 { id => 322, name => "GU" }, # Guam
1785 { id => 323 }, # Guantanamo Bay
1786 { id => 324, name => "GG" }, # Guernsey
1787 { id => 325, name => "HM" }, # Heard Island and McDonald Islands
1788 { id => 326, parent => "UM" }, # Howland Island
1789 { id => 327, parent => "UM" }, # Jarvis Island
1790 { id => 328, name => "JE" }, # Jersey
1791 { id => 329, parent => "UM" }, # Kingman Reef
1792 { id => 330, name => "MQ" }, # Martinique
1793 { id => 331, name => "YT" }, # Mayotte
1794 { id => 332, name => "MS" }, # Montserrat
1795 { id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
1796 { id => 334, name => "NC" }, # New Caledonia
1797 { id => 335, name => "NU" }, # Niue
1798 { id => 336, name => "NF" }, # Norfolk Island
1799 { id => 337, name => "MP" }, # Northern Mariana Islands
1800 { id => 338, parent => "UM" }, # Palmyra Atoll
1801 { id => 339, name => "PN" }, # Pitcairn Islands
1802 { id => 340, parent => "MP" }, # Rota Island
1803 { id => 341, parent => "MP" }, # Saipan
1804 { id => 342, name => "GS" }, # South Georgia and the South Sandwich Islands
1805 { id => 343, name => "SH" }, # St. Helena
1806 { id => 346, parent => "MP" }, # Tinian Island
1807 { id => 347, name => "TK" }, # Tokelau
1808 { id => 348, parent => "SH" }, # Tristan da Cunha
1809 { id => 349, name => "TC" }, # Turks and Caicos Islands
1810 { id => 351, name => "VG" }, # Virgin Islands, British
1811 { id => 352, name => "WF" }, # Wallis and Futuna
1812 { id => 742, name => "002" }, # Africa
1813 { id => 2129, name => "142" }, # Asia
1814 { id => 10541, name => "150" }, # Europe
1815 { id => 15126, name => "IM" }, # Man, Isle of
1816 { id => 19618, name => "MK" }, # Macedonia, Former Yugoslav Republic of
1817 { id => 20900, name => "054" }, # Melanesia
1818 { id => 21206, name => "057" }, # Micronesia
1819 { id => 21242, parent => "UM" }, # Midway Islands
1820 { id => 23581, name => "021" }, # Northern America
1821 { id => 26286, name => "061" }, # Polynesia
1822 { id => 27082, name => "013" }, # Central America
1823 { id => 27114, name => "009" }, # Oceania
1824 { id => 30967, name => "SX" }, # Sint Maarten (Dutch part)
1825 { id => 31396, name => "005" }, # South America
1826 { id => 31706, name => "MF" }, # Saint Martin (French part)
1827 { id => 39070, name => "001" }, # World
1828 { id => 42483, name => "011" }, # Western Africa
1829 { id => 42484, name => "017" }, # Middle Africa
1830 { id => 42487, name => "015" }, # Northern Africa
1831 { id => 47590, name => "143" }, # Central Asia
1832 { id => 47599, name => "035" }, # South-Eastern Asia
1833 { id => 47600, name => "030" }, # Eastern Asia
1834 { id => 47603, name => "014" }, # Eastern Africa
1835 { id => 47609, name => "151" }, # Eastern Europe
1836 { id => 47610, name => "039" }, # Southern Europe
1837 { id => 47611, name => "145" }, # Middle East
1838 { id => 47614, name => "034" }, # Southern Asia
1839 { id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
1840 { id => 9914689, name => "XK" }, # Kosovo
1841 { id => 10026358, name => "019" }, # Americas
1842 { id => 10028789, name => "AX" }, # Ã…land Islands
1843 { id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
1844 { id => 10039882, name => "154" }, # Northern Europe
1845 { id => 10039883, name => "018" }, # Southern Africa
1846 { id => 10210824, name => "155" }, # Western Europe
1847 { id => 10210825, name => "053" }, # Australia and New Zealand
1848 { id => 161832015, name => "BL" }, # Saint Barthélemy
1849 { id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
1850 { id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
1851 { id => 161832258, name => "BQ" }, # Bonaire, Sint Eustatius and Saba
1854 my @cp2uni = ();
1855 my @glyph2uni = ();
1856 my @lead_bytes = ();
1857 my @uni2cp = ();
1858 my @tolower_table = ();
1859 my @toupper_table = ();
1860 my @digitmap_table = ();
1861 my @halfwidth_table = ();
1862 my @fullwidth_table = ();
1863 my @cjk_compat_table = ();
1864 my @chinese_traditional_table = ();
1865 my @chinese_simplified_table = ();
1866 my @category_table = ();
1867 my @initial_joining_table = ();
1868 my @direction_table = ();
1869 my @decomp_table = ();
1870 my @combining_class_table = ();
1871 my @decomp_compat_table = ();
1872 my @comp_exclusions = ();
1873 my @idna_decomp_table = ();
1874 my @idna_disallowed = ();
1875 my %registry_keys;
1876 my $default_char;
1877 my $default_wchar;
1879 my %joining_forms =
1881 "isolated" => [],
1882 "final" => [],
1883 "initial" => [],
1884 "medial" => []
1887 my $current_data_file;
1889 sub to_utf16(@)
1891 my @ret;
1892 foreach my $ch (@_)
1894 if ($ch < 0x10000)
1896 push @ret, $ch;
1898 else
1900 my $val = $ch - 0x10000;
1901 push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
1904 return @ret;
1907 ################################################################
1908 # fetch a unicode.org file and open it
1909 sub open_data_file($@)
1911 my ($id, $name) = @_;
1912 my $data = $data_files{$id};
1913 my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
1914 local *FILE;
1916 my $url = $data->{url};
1917 my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
1918 unless (-f $filename)
1920 print "Fetching $url...\n";
1921 system "mkdir", "-p", $cache;
1922 !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
1925 my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
1926 die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
1928 if ($filename =~ /\.zip$/)
1930 open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
1932 elsif ($filename =~ /\.tar\.gz$/)
1934 open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
1936 else
1938 open FILE, "<$filename" or die "cannot open $filename";
1940 $current_data_file = $name ? "$url:$name" : $url;
1941 return *FILE;
1944 ################################################################
1945 # load a unicode.org file as XML data
1946 sub load_xml_data_file($@)
1948 my ($id, $name) = @_;
1949 my $FILE = open_data_file( $id, $name );
1950 my $xml = XML::LibXML->load_xml( IO => $FILE );
1951 close FILE;
1952 return $xml;
1955 ################################################################
1956 # recursively get the decomposition for a character
1957 sub get_decomposition($$);
1958 sub get_decomposition($$)
1960 my ($char, $table) = @_;
1961 my @ret;
1963 return $char unless defined $table->[$char];
1964 foreach my $ch (@{$table->[$char]})
1966 push @ret, get_decomposition( $ch, $table );
1968 return @ret;
1971 ################################################################
1972 # get the composition that results in a given character
1973 sub get_composition($$)
1975 my ($ch, $compat) = @_;
1976 return () unless defined $decomp_table[$ch]; # no decomposition
1977 my @ret = @{$decomp_table[$ch]};
1978 return () if @ret < 2; # singleton decomposition
1979 return () if $comp_exclusions[$ch]; # composition exclusion
1980 return () if $combining_class_table[$ch]; # non-starter
1981 return () if $combining_class_table[$ret[0]]; # first char is non-starter
1982 return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
1983 defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition
1984 return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
1985 defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition
1986 return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
1987 defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition
1988 return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition
1989 return @ret;
1992 ################################################################
1993 # recursively build decompositions
1994 sub build_decompositions(@)
1996 my @src = @_;
1997 my @dst;
1999 for (my $i = 0; $i < @src; $i++)
2001 next unless defined $src[$i];
2002 my @decomp = to_utf16( get_decomposition( $i, \@src ));
2003 $dst[$i] = \@decomp;
2005 return @dst;
2008 ################################################################
2009 # compose Hangul sequences
2010 sub compose_hangul(@)
2012 my $SBASE = 0xac00;
2013 my $LBASE = 0x1100;
2014 my $VBASE = 0x1161;
2015 my $TBASE = 0x11a7;
2016 my $LCOUNT = 19;
2017 my $VCOUNT = 21;
2018 my $TCOUNT = 28;
2019 my $NCOUNT = $VCOUNT * $TCOUNT;
2020 my $SCOUNT = $LCOUNT * $NCOUNT;
2022 my @seq = @_;
2023 my @ret;
2024 my $i;
2026 for ($i = 0; $i < @seq; $i++)
2028 my $ch = $seq[$i];
2029 if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
2030 $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
2032 $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
2033 $i++;
2035 if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
2036 $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
2038 $ch += $seq[$i+1] - $TBASE;
2039 $i++;
2041 push @ret, $ch;
2043 return @ret;
2046 ################################################################
2047 # remove linguistic-only mappings from the case table
2048 sub remove_linguistic_mappings($$)
2050 my ($upper, $lower) = @_;
2052 # remove case mappings that don't round-trip
2054 for (my $i = 0; $i < @{$upper}; $i++)
2056 next unless defined ${$upper}[$i];
2057 my $ch = ${$upper}[$i];
2058 ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
2060 for (my $i = 0; $i < @{$lower}; $i++)
2062 next unless defined ${$lower}[$i];
2063 my $ch = ${$lower}[$i];
2064 ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
2068 ################################################################
2069 # read in the Unicode database files
2070 sub load_data()
2072 my $start;
2074 # now build mappings from the decomposition field of the Unicode database
2076 my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
2077 while (<$UNICODE_DATA>)
2079 # Decode the fields ...
2080 my ($code, $name, $cat, $comb, $bidi,
2081 $decomp, $dec, $dig, $num, $mirror,
2082 $oldname, $comment, $upper, $lower, $title) = split /;/;
2083 my $src = hex $code;
2085 die "unknown category $cat" unless defined $categories{$cat};
2086 die "unknown directionality $bidi" unless defined $directions{$bidi};
2088 $category_table[$src] = $categories{$cat};
2089 $direction_table[$src] = $bidi;
2090 if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
2092 $initial_joining_table[$src] = $joining_types{"T"};
2094 else
2096 $initial_joining_table[$src] = $joining_types{"U"};
2099 if ($lower ne "")
2101 $tolower_table[$src] = hex $lower;
2103 if ($upper ne "")
2105 $toupper_table[$src] = hex $upper;
2107 if ($dec ne "")
2109 $category_table[$src] |= $ctype{"digit"};
2111 if ($dig ne "")
2113 $digitmap_table[$src] = ord $dig;
2115 $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
2117 $category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
2118 $category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
2119 $category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
2120 $category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
2121 $category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
2122 $category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
2123 $category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
2124 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
2125 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
2126 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
2127 $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
2128 $category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
2130 # copy the category and direction for everything between First/Last pairs
2131 if ($name =~ /, First>/) { $start = $src; }
2132 if ($name =~ /, Last>/)
2134 while ($start < $src)
2136 $category_table[$start] = $category_table[$src];
2137 $direction_table[$start] = $direction_table[$src];
2138 $combining_class_table[$start] = $combining_class_table[$src];
2139 $start++;
2143 next if $decomp eq ""; # no decomposition, skip it
2145 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
2147 my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
2148 $decomp_compat_table[$src] = \@seq;
2151 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
2153 # decomposition of the form "<foo> 1234" -> use char if type is known
2154 my $dst = hex $2;
2155 if ($1 eq "narrow")
2157 $halfwidth_table[$dst] = $src;
2158 $fullwidth_table[$src] = $dst;
2160 elsif ($1 eq "wide")
2162 next if $dst == 0x5c; # don't remap backslash
2163 $fullwidth_table[$dst] = $src;
2164 $halfwidth_table[$src] = $dst;
2166 elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
2168 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2170 elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
2172 ${joining_forms{$1}}[$dst] = $src;
2175 elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
2177 # decomposition "<compat> 0020 1234" -> combining accent
2179 elsif ($decomp =~ /^([0-9a-fA-F]+)/)
2181 # store decomposition
2182 if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
2184 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
2186 elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
2188 my $dst = hex $1;
2189 # Single char decomposition
2190 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
2191 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
2193 $cjk_compat_table[$src] = $dst;
2194 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2199 close $UNICODE_DATA;
2201 # patch the category of some special characters
2203 for (my $i = 0; $i < @decomp_table; $i++)
2205 next unless defined $decomp_table[$i];
2206 $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
2208 foreach my $cat (keys %special_categories)
2210 my $flag = $ctype{$cat};
2211 foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
2213 for (my $i = 0; $i < @decomp_compat_table; $i++)
2215 next unless defined $decomp_compat_table[$i];
2216 next unless @{$decomp_compat_table[$i]} == 2;
2217 $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
2220 # load the composition exclusions
2222 my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2223 while (<$EXCL>)
2225 s/\#.*//; # remove comments
2226 if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
2228 foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
2230 elsif (/^([0-9a-fA-F]+)\s*$/)
2232 $comp_exclusions[hex $1] = 1;
2235 close $EXCL;
2237 # load the IDNA mappings
2239 @idna_decomp_table = @decomp_compat_table;
2240 my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
2241 while (<$IDNA>)
2243 s/\#.*//; # remove comments
2244 next if /^\s*$/;
2245 my ($char, $type, $mapping) = split /;/;
2246 my ($ch1, $ch2);
2247 if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
2249 $ch1 = hex $1;
2250 $ch2 = hex $2;
2252 elsif ($char =~ /([0-9a-fA-F]+)/)
2254 $ch1 = $ch2 = hex $1;
2257 if ($type =~ /mapped/ || $type =~ /deviation/)
2259 $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
2260 my @seq = map { hex $_; } split /\s+/, $mapping;
2261 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
2263 elsif ($type =~ /valid/)
2266 elsif ($type =~ /ignored/)
2268 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
2270 elsif ($type =~ /disallowed/)
2272 foreach my $i ($ch1 .. $ch2)
2274 $idna_decomp_table[$i] = undef;
2275 $idna_disallowed[$i] = 1;
2279 close $IDNA;
2281 # load the Unihan mappings
2283 my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
2284 while (<$UNIHAN>)
2286 s/\#.*//; # remove comments
2287 next if /^\s*$/;
2288 if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
2290 next if hex $1 < 0x4dc0; # skip extension A
2291 $chinese_traditional_table[hex $1] = hex $2;
2293 elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
2295 next if hex $1 < 0x4dc0; # skip extension A
2296 $chinese_simplified_table[hex $1] = hex $2;
2299 close $UNIHAN;
2300 foreach my $i (0xf900..0xfaff)
2302 next unless defined $cjk_compat_table[$i];
2303 next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
2304 $chinese_simplified_table[$i] = $cjk_compat_table[$i];
2309 ################################################################
2310 # add a new registry key
2311 sub add_registry_key($$$)
2313 my ($base, $key, $defval) = @_;
2314 $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
2317 ################################################################
2318 # add a new registry value with explicit type
2319 sub add_registry_value($$$$)
2321 my ($base, $key, $name, $value) = @_;
2322 add_registry_key( $base, $key, undef );
2323 push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
2326 ################################################################
2327 # add a new registry string value
2328 sub add_registry_string_value($$$$)
2330 my ($base, $key, $name, $value) = @_;
2331 $value =~ s/\'/\'\'/g;
2332 add_registry_value( $base, $key, $name, "s '$value'" );
2335 ################################################################
2336 # add a new registry dword value
2337 sub add_registry_dword_value($$$$)
2339 my ($base, $key, $name, $value) = @_;
2340 add_registry_value( $base, $key, $name, "d $value" );
2343 ################################################################
2344 # add a new registry binary value
2345 sub add_registry_binary_value($$$$)
2347 my ($base, $key, $name, $value) = @_;
2348 add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
2351 ################################################################
2352 # define a new lead byte
2353 sub add_lead_byte($)
2355 my $ch = shift;
2356 return if defined $cp2uni[$ch];
2357 push @lead_bytes, $ch;
2358 $cp2uni[$ch] = 0;
2361 ################################################################
2362 # define a new char mapping
2363 sub add_mapping($$)
2365 my ($cp, $uni) = @_;
2366 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
2367 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
2368 if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
2371 ################################################################
2372 # get a mapping including glyph chars for MB_USEGLYPHCHARS
2373 sub get_glyphs_mapping(@)
2375 my @table = @_;
2377 for (my $i = 0; $i < @glyph2uni; $i++)
2379 $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
2381 return @table;
2384 ################################################################
2385 # build EUC-JP table from the JIS 0208/0212 files
2386 sub dump_eucjp_codepage()
2388 @cp2uni = ();
2389 @glyph2uni = ();
2390 @lead_bytes = ();
2391 @uni2cp = ();
2392 $default_char = $DEF_CHAR;
2393 $default_wchar = 0x30fb;
2395 # ASCII chars
2396 foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
2398 # lead bytes
2399 foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
2401 # JIS X 0201 right plane
2402 foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
2404 # undefined chars
2405 foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
2406 $cp2uni[0xa0] = 0xf8f0;
2407 $cp2uni[0xff] = 0xf8f3;
2409 # Fix backslash conversion
2410 add_mapping( 0xa1c0, 0xff3c );
2412 # Add private mappings for rows undefined in JIS 0208/0212
2413 my $private = 0xe000;
2414 foreach my $hi (0xf5 .. 0xfe)
2416 foreach my $lo (0xa1 .. 0xfe)
2418 add_mapping( ($hi << 8) + $lo, $private++ );
2421 foreach my $hi (0xf5 .. 0xfe)
2423 foreach my $lo (0x21 .. 0x7e)
2425 add_mapping( ($hi << 8) + $lo, $private++ );
2429 my $INPUT = open_data_file( "jis0208" );
2430 while (<$INPUT>)
2432 next if /^\#/; # skip comments
2433 next if /^$/; # skip empty lines
2434 next if /\x1a/; # skip ^Z
2435 if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2437 add_mapping( 0x8080 + hex $1, hex $2 );
2438 next;
2440 die "Unrecognized line $_\n";
2442 close $INPUT;
2444 $INPUT = open_data_file( "jis0212" );
2445 while (<$INPUT>)
2447 next if /^\#/; # skip comments
2448 next if /^$/; # skip empty lines
2449 next if /\x1a/; # skip ^Z
2450 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2452 add_mapping( 0x8000 + hex $1, hex $2 );
2453 next;
2455 die "Unrecognized line $_\n";
2457 close $INPUT;
2459 output_codepage_file( 20932 );
2462 ################################################################
2463 # build Korean Wansung table from the KSX1001 file
2464 sub dump_krwansung_codepage(@)
2466 my @cp949 = @_;
2467 @cp2uni = ();
2468 @glyph2uni = ();
2469 @lead_bytes = ();
2470 @uni2cp = ();
2471 $default_char = 0x3f;
2472 $default_wchar = 0x003f;
2474 # ASCII and undefined chars
2475 foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
2476 add_mapping( 0xa0, 0xf8e6 );
2477 add_mapping( 0xad, 0xf8e7 );
2478 add_mapping( 0xae, 0xf8e8 );
2479 add_mapping( 0xaf, 0xf8e9 );
2480 add_mapping( 0xfe, 0xf8ea );
2481 add_mapping( 0xff, 0xf8eb );
2483 my $INPUT = open_data_file( "ksx1001" );
2484 while (<$INPUT>)
2486 next if /^\#/; # skip comments
2487 next if /^$/; # skip empty lines
2488 next if /\x1a/; # skip ^Z
2489 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2491 add_mapping( 0x8080 + hex $1, hex $2 );
2492 next;
2494 die "Unrecognized line $_\n";
2496 close $INPUT;
2498 # get some extra mappings from cp 949
2499 my @defined_lb;
2500 map { $defined_lb[$_] = 1; } @lead_bytes;
2501 foreach my $i (0x0000 .. 0xffff)
2503 next if ($i >= 0x1100 && $i <= 0x11ff); # range not used in 20949
2504 next unless defined $cp949[$i];
2505 if ($cp949[$i] >= 0xff)
2507 # only add chars for lead bytes that exist in 20949
2508 my $hi = $cp949[$i] >> 8;
2509 my $lo = $cp949[$i] & 0xff;
2510 next unless $defined_lb[$hi];
2511 next unless $lo >= 0xa1 && $lo <= 0xfe;
2513 add_mapping( $cp949[$i], $i );
2516 output_codepage_file( 20949 );
2520 ################################################################
2521 # dump an array of integers
2522 sub dump_array($$@)
2524 my ($bit_width, $default, @array) = @_;
2525 my $format = sprintf "0x%%0%ux", $bit_width / 4;
2526 my $i;
2527 my $ret = " ";
2528 for ($i = 0; $i < $#array; $i++)
2530 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2531 $ret .= (($i % 8) != 7) ? ", " : ",\n ";
2533 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2534 return $ret;
2538 ################################################################
2539 # dump an SBCS mapping table in binary format
2540 sub dump_binary_sbcs_table($)
2542 my $codepage = shift;
2544 my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2545 my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
2547 print OUTPUT pack "S<*", @header;
2548 print OUTPUT pack "C12", (0) x 12;
2549 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2551 if (@glyph2uni)
2553 print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
2555 else
2557 print OUTPUT pack "S<*", 0;
2560 print OUTPUT pack "S<*", 0, 0;
2562 print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2566 ################################################################
2567 # dump a DBCS mapping table in binary format
2568 sub dump_binary_dbcs_table($)
2570 my $codepage = shift;
2571 my @lb_ranges = get_lb_ranges();
2572 my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2574 my @offsets = (0) x 256;
2575 my $pos = 0;
2576 foreach my $i (@lead_bytes)
2578 $offsets[$i] = ($pos += 256);
2579 $cp2uni[$i] = 0;
2582 my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
2584 print OUTPUT pack "S<*", @header;
2585 print OUTPUT pack "C12", @lb_ranges, 0 x 12;
2586 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2587 print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
2589 foreach my $i (@lead_bytes)
2591 my $base = $i << 8;
2592 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
2595 print OUTPUT pack "S<", 4;
2596 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2600 ################################################################
2601 # get the list of defined lead byte ranges
2602 sub get_lb_ranges()
2604 my @list = ();
2605 my @ranges = ();
2607 foreach my $i (@lead_bytes) { $list[$i] = 1; }
2608 my $on = 0;
2609 for (my $i = 0; $i < 256; $i++)
2611 if ($on)
2613 if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
2615 else
2617 if ($list[$i]) { push @ranges, $i; $on = 1; }
2620 if ($on) { push @ranges, 0xff; }
2621 return @ranges;
2624 ################################################################
2625 # dump the Indic Syllabic Category table
2626 sub dump_indic($)
2628 my $filename = shift;
2629 my @indic_table;
2631 my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
2632 while (<$INPUT>)
2634 next if /^\#/; # skip comments
2635 next if /^\s*$/; # skip empty lines
2636 next if /\x1a/; # skip ^Z
2637 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2639 my $type = $2;
2640 die "unknown indic $type" unless defined $indic_types{$type};
2641 if (hex $1 < 65536)
2643 $indic_table[hex $1] = $indic_types{$type};
2645 next;
2647 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2649 my $type = $3;
2650 die "unknown indic $type" unless defined $indic_types{$type};
2651 if (hex $1 < 65536 and hex $2 < 65536)
2653 foreach my $i (hex $1 .. hex $2)
2655 $indic_table[$i] = $indic_types{$type};
2658 next;
2660 die "malformed line $_";
2662 close $INPUT;
2664 my $prev_data_file = $current_data_file;
2665 $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
2666 while (<$INPUT>)
2668 next if /^\#/; # skip comments
2669 next if /^\s*$/; # skip empty lines
2670 next if /\x1a/; # skip ^Z
2671 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2673 my $type = $2;
2674 die "unknown matra $type" unless defined $matra_types{$type};
2675 $indic_table[hex $1] |= $matra_types{$type} << 8;
2676 next;
2678 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2680 my $type = $3;
2681 die "unknown matra $type" unless defined $matra_types{$type};
2682 foreach my $i (hex $1 .. hex $2)
2684 $indic_table[$i] |= $matra_types{$type} << 8;
2686 next;
2688 die "malformed line $_";
2690 close $INPUT;
2692 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2693 print "Building $filename\n";
2694 print OUTPUT "/* Unicode Indic Syllabic Category */\n";
2695 print OUTPUT "/* generated from $prev_data_file */\n";
2696 print OUTPUT "/* and from $current_data_file */\n";
2697 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2698 print OUTPUT "#include \"windef.h\"\n\n";
2700 dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
2702 close OUTPUT;
2703 save_file($filename);
2706 ################################################################
2707 # dump the Line Break Properties table
2708 sub dump_linebreak($)
2710 my $filename = shift;
2711 my @break_table;
2713 my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
2714 while (<$INPUT>)
2716 next if /^\#/; # skip comments
2717 next if /^\s*$/; # skip empty lines
2718 next if /\x1a/; # skip ^Z
2719 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2721 my $type = $2;
2722 die "unknown breaktype $type" unless defined $break_types{$type};
2723 $break_table[hex $1] = $break_types{$type};
2724 next;
2726 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2728 my $type = $3;
2729 die "unknown breaktype $type" unless defined $break_types{$type};
2730 foreach my $i (hex $1 .. hex $2)
2732 $break_table[$i] = $break_types{$type};
2734 next;
2736 elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2738 my $type = $2;
2739 die "unknown breaktype $type" unless defined $break_types{$type};
2740 $break_table[hex $1] = $break_types{$type};
2741 next;
2743 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2745 my $type = $3;
2746 die "unknown breaktype $type" unless defined $break_types{$type};
2747 foreach my $i (hex $1 .. hex $2)
2749 $break_table[$i] = $break_types{$type};
2751 next;
2753 die "malformed line $_";
2755 close $INPUT;
2757 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2758 print "Building $filename\n";
2759 print OUTPUT "/* Unicode Line Break Properties */\n";
2760 print OUTPUT "/* generated from $current_data_file */\n";
2761 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2762 print OUTPUT "#include \"windef.h\"\n\n";
2764 dump_three_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
2766 close OUTPUT;
2767 save_file($filename);
2770 my %scripts =
2772 "Unknown" => 0,
2773 "Common" => 1,
2774 "Inherited" => 2,
2775 "Arabic" => 3,
2776 "Armenian" => 4,
2777 "Avestan" => 5,
2778 "Balinese" => 6,
2779 "Bamum" => 7,
2780 "Batak" => 8,
2781 "Bengali" => 9,
2782 "Bopomofo" => 10,
2783 "Brahmi" => 11,
2784 "Braille" => 12,
2785 "Buginese" => 13,
2786 "Buhid" => 14,
2787 "Canadian_Aboriginal" => 15,
2788 "Carian" => 16,
2789 "Cham" => 17,
2790 "Cherokee" => 18,
2791 "Coptic" => 19,
2792 "Cuneiform" => 20,
2793 "Cypriot" => 21,
2794 "Cyrillic" => 22,
2795 "Deseret" => 23,
2796 "Devanagari" => 24,
2797 "Egyptian_Hieroglyphs" => 25,
2798 "Ethiopic" => 26,
2799 "Georgian" => 27,
2800 "Glagolitic" => 28,
2801 "Gothic" => 29,
2802 "Greek" => 30,
2803 "Gujarati" => 31,
2804 "Gurmukhi" => 32,
2805 "Han" => 33,
2806 "Hangul" => 34,
2807 "Hanunoo" => 35,
2808 "Hebrew" => 36,
2809 "Hiragana" => 37,
2810 "Imperial_Aramaic" => 38,
2811 "Inscriptional_Pahlavi" => 39,
2812 "Inscriptional_Parthian" => 40,
2813 "Javanese" => 41,
2814 "Kaithi" => 42,
2815 "Kannada" => 43,
2816 "Katakana" => 44,
2817 "Kayah_Li" => 45,
2818 "Kharoshthi" => 46,
2819 "Khmer" => 47,
2820 "Lao" => 48,
2821 "Latin" => 49,
2822 "Lepcha" => 50,
2823 "Limbu" => 51,
2824 "Linear_B" => 52,
2825 "Lisu" => 53,
2826 "Lycian" => 54,
2827 "Lydian" => 55,
2828 "Malayalam" => 56,
2829 "Mandaic" => 57,
2830 "Meetei_Mayek" => 58,
2831 "Mongolian" => 59,
2832 "Myanmar" => 60,
2833 "New_Tai_Lue" => 61,
2834 "Nko" => 62,
2835 "Ogham" => 63,
2836 "Ol_Chiki" => 64,
2837 "Old_Italic" => 65,
2838 "Old_Persian" => 66,
2839 "Old_South_Arabian" => 67,
2840 "Old_Turkic" => 68,
2841 "Oriya" => 69,
2842 "Osmanya" => 70,
2843 "Phags_Pa" => 71,
2844 "Phoenician" => 72,
2845 "Rejang" => 73,
2846 "Runic" => 74,
2847 "Samaritan" => 75,
2848 "Saurashtra" => 76,
2849 "Shavian" => 77,
2850 "Sinhala" => 78,
2851 "Sundanese" => 79,
2852 "Syloti_Nagri" => 80,
2853 "Syriac" => 81,
2854 "Tagalog" => 82,
2855 "Tagbanwa" => 83,
2856 "Tai_Le" => 84,
2857 "Tai_Tham" => 85,
2858 "Tai_Viet" => 86,
2859 "Tamil" => 87,
2860 "Telugu" => 88,
2861 "Thaana" => 89,
2862 "Thai" => 90,
2863 "Tibetan" => 91,
2864 "Tifinagh" => 92,
2865 "Ugaritic" => 93,
2866 "Vai" => 94,
2867 "Yi" => 95,
2868 # Win8/Win8.1
2869 "Chakma" => 96,
2870 "Meroitic_Cursive" => 97,
2871 "Meroitic_Hieroglyphs" => 98,
2872 "Miao" => 99,
2873 "Sharada" => 100,
2874 "Sora_Sompeng" => 101,
2875 "Takri" => 102,
2876 # Win10
2877 "Bassa_Vah" => 103,
2878 "Caucasian_Albanian" => 104,
2879 "Duployan" => 105,
2880 "Elbasan" => 106,
2881 "Grantha" => 107,
2882 "Khojki" => 108,
2883 "Khudawadi" => 109,
2884 "Linear_A" => 110,
2885 "Mahajani" => 111,
2886 "Manichaean" => 112,
2887 "Mende_Kikakui" => 113,
2888 "Modi" => 114,
2889 "Mro" => 115,
2890 "Nabataean" => 116,
2891 "Old_North_Arabian" => 117,
2892 "Old_Permic" => 118,
2893 "Pahawh_Hmong" => 119,
2894 "Palmyrene" => 120,
2895 "Pau_Cin_Hau" => 121,
2896 "Psalter_Pahlavi" => 122,
2897 "Siddham" => 123,
2898 "Tirhuta" => 124,
2899 "Warang_Citi" => 125,
2900 # Win10 RS1
2901 "Adlam" => 126,
2902 "Ahom" => 127,
2903 "Anatolian_Hieroglyphs" => 128,
2904 "Bhaiksuki" => 129,
2905 "Hatran" => 130,
2906 "Marchen" => 131,
2907 "Multani" => 132,
2908 "Newa" => 133,
2909 "Old_Hungarian" => 134,
2910 "Osage" => 135,
2911 "SignWriting" => 136,
2912 "Tangut" => 137,
2913 # Win10 RS4
2914 "Masaram_Gondi" => 138,
2915 "Nushu" => 139,
2916 "Soyombo" => 140,
2917 "Zanabazar_Square" => 141,
2918 # Win10 1903
2919 "Dogra" => 142,
2920 "Gunjala_Gondi" => 143,
2921 "Hanifi_Rohingya" => 144,
2922 "Makasar" => 145,
2923 "Medefaidrin" => 146,
2924 "Old_Sogdian" => 147,
2925 "Sogdian" => 148,
2926 # Win10 2004
2927 "Elymaic" => 149,
2928 "Nyiakeng_Puachue_Hmong" => 150,
2929 "Nandinagari" => 151,
2930 "Wancho" => 152,
2931 # Win11
2932 "Chorasmian" => 153,
2933 "Dives_Akuru" => 154,
2934 "Khitan_Small_Script" => 155,
2935 "Yezidi" => 156,
2938 ################################################################
2939 # dump Script IDs table
2940 sub dump_scripts($)
2942 my $filename = shift;
2943 my $header = $filename;
2944 my @scripts_table;
2945 my $script_index;
2946 my $i;
2948 my $INPUT = open_data_file( "ucd", "Scripts.txt" );
2949 # Fill the table
2950 # Unknown script id is always 0, so undefined scripts are automatically treated as such
2951 while (<$INPUT>)
2953 my $type = "";
2955 next if /^\#/; # skip comments
2956 next if /^\s*$/; # skip empty lines
2957 next if /\x1a/; # skip ^Z
2958 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2960 $type = $2;
2961 if (defined $scripts{$type})
2963 $scripts_table[hex $1] = $scripts{$type};
2965 next;
2967 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2969 $type = $3;
2970 if (defined $scripts{$type})
2972 foreach my $i (hex $1 .. hex $2)
2974 $scripts_table[$i] = $scripts{$type};
2977 next;
2981 close $INPUT;
2983 $header = "$filename.h";
2984 open OUTPUT,">$header.new" or die "Cannot create $header";
2985 print "Building $header\n";
2986 print OUTPUT "/* Unicode Script IDs */\n";
2987 print OUTPUT "/* generated from $current_data_file */\n";
2988 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2990 print OUTPUT "enum unicode_script_id {\n";
2991 foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
2993 print OUTPUT " Script_$script = $scripts{$script},\n";
2995 print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
2996 print OUTPUT "};\n";
2998 close OUTPUT;
2999 save_file($header);
3001 $filename = "$filename.c";
3002 open OUTPUT,">$filename.new" or die "Cannot create $header";
3003 print "Building $filename\n";
3004 print OUTPUT "/* Unicode Script IDs */\n";
3005 print OUTPUT "/* generated from $current_data_file */\n";
3006 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3007 print OUTPUT "#include \"windef.h\"\n\n";
3009 dump_three_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
3010 close OUTPUT;
3011 save_file($filename);
3014 ################################################################
3015 # dump the BiDi mirroring table
3016 sub dump_mirroring($)
3018 my $filename = shift;
3019 my @mirror_table = ();
3021 my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
3022 while (<$INPUT>)
3024 next if /^\#/; # skip comments
3025 next if /^$/; # skip empty lines
3026 next if /\x1a/; # skip ^Z
3027 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
3029 $mirror_table[hex $1] = hex $2;
3030 next;
3032 die "malformed line $_";
3034 close $INPUT;
3036 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3037 print "Building $filename\n";
3038 print OUTPUT "/* Unicode BiDi mirroring */\n";
3039 print OUTPUT "/* generated from $current_data_file */\n";
3040 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3041 print OUTPUT "#include \"windef.h\"\n\n";
3042 dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
3043 close OUTPUT;
3044 save_file($filename);
3047 ################################################################
3048 # dump the Bidi Brackets
3049 sub dump_bracket($)
3051 my $filename = shift;
3052 my @bracket_table;
3054 my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
3055 while (<$INPUT>)
3057 next if /^\#/; # skip comments
3058 next if /^\s*$/; # skip empty lines
3059 next if /\x1a/; # skip ^Z
3060 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
3062 my $type = $3;
3063 die "unknown bracket $type" unless defined $bracket_types{$type};
3064 die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
3065 $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
3066 $bracket_table[hex $1] += $bracket_types{$type} << 8;
3067 next;
3069 die "malformed line $_";
3071 close $INPUT;
3073 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3074 print "Building $filename\n";
3075 print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
3076 print OUTPUT "/* generated from $current_data_file */\n";
3077 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3078 print OUTPUT "#include \"windef.h\"\n\n";
3080 dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
3082 close OUTPUT;
3083 save_file($filename);
3086 ################################################################
3087 # dump the Arabic shaping table
3088 sub dump_shaping($)
3090 my $filename = shift;
3091 my @joining_table = @initial_joining_table;
3093 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3094 while (<$INPUT>)
3096 next if /^\#/; # skip comments
3097 next if /^\s*$/; # skip empty lines
3098 next if /\x1a/; # skip ^Z
3099 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3101 my $type = $2;
3102 $joining_table[hex $1] = $joining_types{$type};
3103 next;
3105 die "malformed line $_";
3107 close $INPUT;
3109 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3110 print "Building $filename\n";
3111 print OUTPUT "/* Unicode Arabic shaping */\n";
3112 print OUTPUT "/* generated from $current_data_file */\n";
3113 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3114 print OUTPUT "#include \"windef.h\"\n\n";
3116 dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
3118 print OUTPUT "\nconst unsigned short DECLSPEC_HIDDEN wine_shaping_forms[256][4] =\n{\n";
3119 for (my $i = 0x600; $i <= 0x6ff; $i++)
3121 printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
3122 ${joining_forms{"isolated"}}[$i] || $i,
3123 ${joining_forms{"final"}}[$i] || $i,
3124 ${joining_forms{"initial"}}[$i] || $i,
3125 ${joining_forms{"medial"}}[$i] || $i;
3127 print OUTPUT "};\n";
3129 close OUTPUT;
3130 save_file($filename);
3133 ################################################################
3134 # dump the Arabic shaping table
3135 sub dump_arabic_shaping($)
3137 my $filename = shift;
3138 my @joining_table = @initial_joining_table;
3140 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3141 while (<$INPUT>)
3143 next if /^\#/; # skip comments
3144 next if /^\s*$/; # skip empty lines
3145 next if /\x1a/; # skip ^Z
3146 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3148 my $type = $2;
3149 my $group = $3;
3151 if ($group eq "ALAPH" || $group eq "DALATH RISH")
3153 $joining_table[hex $1] = $joining_types{$group};
3155 else
3157 $joining_table[hex $1] = $joining_types{$type};
3160 next;
3162 die "malformed line $_";
3164 close $INPUT;
3166 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3167 print "Building $filename\n";
3168 print OUTPUT "/* Unicode Arabic shaping */\n";
3169 print OUTPUT "/* generated from $current_data_file */\n";
3170 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3171 print OUTPUT "#include \"windef.h\"\n\n";
3173 dump_three_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
3175 close OUTPUT;
3176 save_file($filename);
3179 ################################################################
3180 # dump the Vertical Orientation table
3181 sub dump_vertical($$)
3183 my ($filename, $unix) = @_;
3184 my @vertical_table;
3186 my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
3187 while (<$INPUT>)
3189 next if /^\#/; # skip comments
3190 next if /^\s*$/; # skip empty lines
3191 next if /\x1a/; # skip ^Z
3192 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
3194 my $type = $2;
3195 die "unknown vertical $type" unless defined $vertical_types{$type};
3196 if (hex $1 < 65536)
3198 $vertical_table[hex $1] = $vertical_types{$type};
3200 next;
3202 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
3204 my $type = $3;
3205 die "unknown vertical $type" unless defined $vertical_types{$type};
3206 foreach my $i (hex $1 .. hex $2)
3208 $vertical_table[$i] = $vertical_types{$type};
3210 next;
3212 die "malformed line $_";
3214 close $INPUT;
3216 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3217 print "Building $filename\n";
3218 print OUTPUT "/* Unicode Vertical Orientation */\n";
3219 print OUTPUT "/* generated from $current_data_file */\n";
3220 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3221 if ($unix)
3223 print OUTPUT "#if 0\n";
3224 print OUTPUT "#pragma makedep unix\n";
3225 print OUTPUT "#endif\n\n";
3227 print OUTPUT "#include \"windef.h\"\n\n";
3229 dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
3231 close OUTPUT;
3232 save_file($filename);
3235 ################################################################
3236 # compress a mapping table by removing identical rows
3237 sub compress_array($$@)
3239 my $rows = shift;
3240 my $def = shift;
3241 my @table = @_;
3242 my $len = @table / $rows;
3243 my @array;
3244 my $data = "";
3246 # try to merge table rows
3247 for (my $row = 0; $row < $rows; $row++)
3249 my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
3250 my $pos = index $data, $rowtxt;
3251 if ($pos == -1)
3253 # check if the tail of the data can match the start of the new row
3254 my $first = substr( $rowtxt, 0, 1 );
3255 for (my $i = length($data) - 1; $i > 0; $i--)
3257 $pos = index( substr( $data, -$i ), $first );
3258 last if $pos == -1;
3259 $i -= $pos;
3260 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
3261 substr( $data, -$i ) = "";
3262 last;
3264 $pos = length $data;
3265 $data .= $rowtxt;
3267 $array[$row] = $rows + $pos;
3269 return @array, unpack "U*", $data;
3272 ################################################################
3273 # dump a char -> value mapping table using two-level tables
3274 sub dump_two_level_mapping($$$@)
3276 my $name = shift;
3277 my $def = shift;
3278 my $size = shift;
3279 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3280 my (@array, @row_array, @data, @row_data);
3281 (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
3282 (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
3284 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
3286 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
3287 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
3288 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
3289 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data );
3292 ################################################################
3293 # dump a char -> value mapping table using three-level tables
3294 sub dump_three_level_mapping($$@)
3296 my $name = shift;
3297 my $def = shift;
3298 my $size = shift;
3299 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3300 my $level3 = ($MAX_CHAR + 1) / 16;
3301 my $level2 = $level3 / 16;
3302 my $level1 = $level2 / 16;
3303 my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
3304 my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
3305 my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
3307 for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
3308 for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
3310 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
3311 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
3312 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
3313 printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
3314 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
3317 ################################################################
3318 # dump a binary case mapping table in l_intl.nls format
3319 sub dump_binary_case_table(@)
3321 my (@table) = @_;
3322 my @difftable;
3323 my @res;
3325 for (my $i = 0; $i < @table; $i++)
3327 next unless defined $table[$i];
3328 $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
3331 my (@low_array1, @low_array2, @low_data, @low_row_data);
3332 (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
3333 (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
3335 if (scalar @table > 0x10000)
3337 my (@high_array1, @high_array2, @high_data, @high_row_data);
3338 (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
3339 (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
3341 push @res, map { $_ + 1024; } @low_array1;
3342 push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
3343 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3344 push @res, @low_data;
3345 push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
3346 return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
3348 else
3350 push @res, @low_array1;
3351 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3352 push @res, @low_data;
3353 return pack "S<*", 1 + scalar @res, @res;
3357 ################################################################
3358 # dump case mappings for l_intl.nls
3359 sub dump_intl_nls($)
3361 my @upper_table = @toupper_table;
3362 my @lower_table = @tolower_table;
3363 remove_linguistic_mappings( \@upper_table, \@lower_table );
3365 my $upper = dump_binary_case_table( @upper_table[0..65535] );
3366 my $lower = dump_binary_case_table( @lower_table[0..65535] );
3368 my $filename = shift;
3369 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3370 printf "Building $filename\n";
3372 binmode OUTPUT;
3373 print OUTPUT pack "S<", 1; # version
3374 print OUTPUT $upper;
3375 print OUTPUT $lower;
3376 close OUTPUT;
3377 save_file($filename);
3381 ################################################################
3382 # dump the bidi direction table
3383 sub dump_bidi_dir_table($)
3385 my $filename = shift;
3386 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3387 printf "Building $filename\n";
3388 printf OUTPUT "/* Unicode BiDi direction table */\n";
3389 printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
3390 printf OUTPUT "#include \"windef.h\"\n\n";
3392 my @table;
3394 for (my $i = 0; $i < @direction_table; $i++)
3396 $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
3399 dump_three_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
3401 close OUTPUT;
3402 save_file($filename);
3406 sub rol($$)
3408 my ($byte, $count) = @_;
3409 return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
3412 ################################################################
3413 # compress the character properties table
3414 sub compress_char_props_table($@)
3416 my $rows = shift;
3417 my @table = @_;
3418 my $len = @table / $rows;
3419 my $pos = 0;
3420 my @array = (0) x $rows;
3421 my %sequences;
3423 # add some predefined sequences
3424 foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
3426 # try to merge table rows
3427 for (my $row = 0; $row < $rows; $row++)
3429 my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
3430 my $rowtxt = pack "L*", @table_row;
3431 if (defined($sequences{$rowtxt}))
3433 # reuse an existing row
3434 $array[$row] = $sequences{$rowtxt};
3436 else
3438 # create a new row
3439 $sequences{$rowtxt} = $array[$row] = ++$pos;
3440 push @array, @table_row;
3443 return @array;
3446 ################################################################
3447 # dump a normalization table in binary format
3448 sub dump_norm_table($)
3450 my $filename = shift;
3452 my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
3453 my %decomp = ( "nfc" => \@decomp_table,
3454 "nfd" => \@decomp_table,
3455 "nfkc" => \@decomp_compat_table,
3456 "nfkd" => \@decomp_compat_table ,
3457 "idna" => \@idna_decomp_table );
3459 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3460 print "Building $filename\n";
3462 my $type = $filename;
3463 $type =~ s!.*/norm(\w+)\.nls!$1!;
3465 my $compose = $forms{$type} & 1;
3466 my $compat = !!($forms{$type} & 4) + ($type eq "idna");
3468 my @version = split /\./, $UNIVERSION;
3470 # combining classes
3472 my @classes;
3473 my @class_values;
3475 foreach my $c (grep defined, @combining_class_table)
3477 $classes[$c] = 1 if $c < 0x100;
3479 for (my $i = 0; $i < @classes; $i++)
3481 next unless defined $classes[$i];
3482 $classes[$i] = @class_values;
3483 push @class_values, $i;
3485 push @class_values, 0 if (@class_values % 2);
3486 die "too many classes" if @class_values >= 0x40;
3488 # character properties
3490 my @char_props;
3491 my @decomposed;
3492 my @comp_hash_table;
3493 my $comp_hash_size = $compose ? 254 : 0;
3495 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3497 next unless defined $combining_class_table[$i];
3498 if (defined $decomp{$type}->[$i])
3500 my @dec = get_decomposition( $i, $decomp{$type} );
3501 if ($compose && (my @comp = get_composition( $i, $compat )))
3503 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
3504 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
3506 my $val = 0;
3507 foreach my $d (@dec)
3509 $val = $combining_class_table[$d];
3510 last if $val;
3512 $char_props[$i] = $classes[$val];
3514 else
3516 $char_props[$i] = 0xbf;
3518 @dec = compose_hangul( @dec ) if $compose;
3519 @dec = to_utf16( @dec );
3520 push @dec, 0 if @dec >= 7;
3521 $decomposed[$i] = \@dec;
3523 else
3525 if ($combining_class_table[$i] == 0x100)
3527 $char_props[$i] = 0x7f;
3529 elsif ($combining_class_table[$i])
3531 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
3533 elsif ($type eq "idna" && defined $idna_disallowed[$i])
3535 $char_props[$i] = 0xff;
3537 else
3539 $char_props[$i] = 0;
3544 if ($compose)
3546 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3548 my @comp = get_composition( $i, $compat );
3549 next unless @comp;
3550 if ($combining_class_table[$comp[1]])
3552 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
3553 $char_props[$comp[1]] |= 0x40;
3555 else
3557 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
3558 $char_props[$comp[1]] |= 0xc0;
3563 # surrogates
3564 foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
3565 foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
3567 # Hangul
3568 if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
3569 elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
3570 foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
3572 # invalid chars
3573 if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
3574 foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
3575 foreach my $i (0x00..0x10)
3577 $char_props[($i << 16) | 0xfffe] = 0xff;
3578 $char_props[($i << 16) | 0xffff] = 0xff;
3581 # decomposition hash table
3583 my @decomp_hash_table;
3584 my @decomp_hash_index;
3585 my @decomp_hash_data;
3586 my $decomp_hash_size = 944;
3588 # build string of character data, reusing substrings when possible
3589 my $decomp_char_data = "";
3590 foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
3592 my $str = pack "U*", @{$i};
3593 $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
3595 for (my $i = 0; $i < @decomposed; $i++)
3597 next unless defined $decomposed[$i];
3598 my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
3599 die "sequence not found" if $pos == -1;
3600 my $len = @{$decomposed[$i]};
3601 $len = 7 if $len > 7;
3602 my $hash = $i % $decomp_hash_size;
3603 push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
3605 for (my $i = 0; $i < $decomp_hash_size; $i++)
3607 $decomp_hash_index[$i] = @decomp_hash_data / 2;
3608 next unless defined $decomp_hash_table[$i];
3609 if (@{$decomp_hash_table[$i]} == 1)
3611 my $entry = $decomp_hash_table[$i]->[0];
3612 if ($char_props[$entry->[0]] == 0xbf)
3614 $decomp_hash_index[$i] = $entry->[1];
3615 next;
3618 foreach my $entry (@{$decomp_hash_table[$i]})
3620 push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
3623 push @decomp_hash_data, 0, 0;
3625 # composition hash table
3627 my @comp_hash_index;
3628 my @comp_hash_data;
3629 if (@comp_hash_table)
3631 for (my $i = 0; $i < $comp_hash_size; $i++)
3633 $comp_hash_index[$i] = @comp_hash_data;
3634 push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
3636 $comp_hash_index[$comp_hash_size] = @comp_hash_data;
3637 push @comp_hash_data, 0, 0, 0;
3640 my $level1 = ($MAX_CHAR + 1) / 128;
3641 my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
3643 my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
3644 0, $decomp_hash_size, $comp_hash_size, 0 );
3645 my @tables = (0) x 8;
3647 $tables[0] = 16 + @header + @tables;
3648 $tables[1] = $tables[0] + @class_values / 2;
3649 $tables[2] = $tables[1] + $level1 / 2;
3650 $tables[3] = $tables[2] + (@rows - $level1) / 2;
3651 $tables[4] = $tables[3] + @decomp_hash_index;
3652 $tables[5] = $tables[4] + @decomp_hash_data;
3653 $tables[6] = $tables[5] + length $decomp_char_data;
3654 $tables[7] = $tables[6] + @comp_hash_index;
3656 print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
3657 print OUTPUT pack "S<*", @header;
3658 print OUTPUT pack "S<*", @tables;
3659 print OUTPUT pack "C*", @class_values;
3661 print OUTPUT pack "C*", @rows[0..$level1-1];
3662 print OUTPUT pack "C*", @rows[$level1..$#rows];
3663 print OUTPUT pack "S<*", @decomp_hash_index;
3664 print OUTPUT pack "S<*", @decomp_hash_data;
3665 print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
3666 print OUTPUT pack "S<*", @comp_hash_index;
3667 print OUTPUT pack "S<*", @comp_hash_data;
3669 close OUTPUT;
3670 save_file($filename);
3672 add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
3676 ################################################################
3677 # output a codepage definition file from the global tables
3678 sub output_codepage_file($)
3680 my $codepage = shift;
3682 my $output = sprintf "nls/c_%03d.nls", $codepage;
3683 open OUTPUT,">$output.new" or die "Cannot create $output";
3685 printf "Building %s\n", $output;
3686 if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
3687 else { dump_binary_dbcs_table( $codepage ); }
3689 close OUTPUT;
3690 save_file($output);
3692 add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
3695 ################################################################
3696 # output a codepage table from a Microsoft-style mapping file
3697 sub dump_msdata_codepage($)
3699 my $filename = shift;
3701 my $state = "";
3702 my ($codepage, $width, $count);
3703 my ($lb_cur, $lb_end);
3705 @cp2uni = ();
3706 @glyph2uni = ();
3707 @lead_bytes = ();
3708 @uni2cp = ();
3709 $default_char = $DEF_CHAR;
3710 $default_wchar = $DEF_CHAR;
3712 my $INPUT = open_data_file( "codepages", $filename );
3714 while (<$INPUT>)
3716 next if /^;/; # skip comments
3717 next if /^\s*$/; # skip empty lines
3718 next if /\x1a/; # skip ^Z
3719 last if /^ENDCODEPAGE/;
3721 if (/^CODEPAGE\s+(\d+)/)
3723 $codepage = $1;
3724 next;
3726 if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
3728 $width = $1;
3729 $default_char = hex $2;
3730 $default_wchar = hex $3;
3731 next;
3733 if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
3735 $state = $1;
3736 $count = $2;
3737 next;
3739 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
3741 if ($state eq "MBTABLE")
3743 my $cp = hex $1;
3744 my $uni = hex $2;
3745 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3746 next;
3748 if ($state eq "GLYPHTABLE")
3750 my $cp = hex $1;
3751 my $uni = hex $2;
3752 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
3753 next;
3755 if ($state eq "WCTABLE")
3757 my $uni = hex $1;
3758 my $cp = hex $2;
3759 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
3760 next;
3762 if ($state eq "DBCSRANGE")
3764 my $start = hex $1;
3765 my $end = hex $2;
3766 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
3767 $lb_cur = $start;
3768 $lb_end = $end;
3769 next;
3771 if ($state eq "DBCSTABLE")
3773 my $mb = hex $1;
3774 my $uni = hex $2;
3775 my $cp = ($lb_cur << 8) | $mb;
3776 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3777 if (!--$count)
3779 if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
3781 next;
3784 die "$filename: Unrecognized line $_\n";
3786 close $INPUT;
3788 output_codepage_file( $codepage );
3790 if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3793 ################################################################
3794 # align a string length
3795 sub align_string($$)
3797 my ($align, $str) = @_;
3798 $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
3799 return $str;
3802 ################################################################
3803 # pad a string with zeros
3804 sub pad_string($$)
3806 my ($pad, $str) = @_;
3807 $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
3808 return $str;
3811 ################################################################
3812 # pack a GUID string
3813 sub pack_guid($)
3815 $_ = shift;
3816 /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
3817 return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
3820 ################################################################
3821 # comparison function for compression sort
3822 sub cmp_compression
3824 return scalar @{$a} <=> scalar @{$b} ||
3825 $a->[4] <=> $b->[4] ||
3826 $a->[5] <=> $b->[5] ||
3827 $a->[6] <=> $b->[6] ||
3828 $a->[7] <=> $b->[7] ||
3829 $a->[8] <=> $b->[8] ||
3830 $a->[9] <=> $b->[9] ||
3831 $a->[10] <=> $b->[10] ||
3832 $a->[11] <=> $b->[11] ||
3833 $a->[12] <=> $b->[12];
3836 ################################################################
3837 # build a binary sort keys table
3838 sub dump_sortkey_table($)
3840 my $filename = shift;
3841 my @keys;
3842 my ($part, $section, $subsection, $guid, $version, $ling_flag);
3843 my @multiple_weights;
3844 my @expansions;
3845 my @compressions;
3846 my %exceptions;
3847 my %guids;
3848 my %compr_flags;
3849 my %locales;
3850 my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
3851 my $jamostr = "";
3853 my $re_hex = '0x[0-9A-Fa-f]+';
3854 my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
3855 $guids{$default_guid} = { };
3857 my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
3859 my $KEYS = open_data_file( "sorting" );
3861 printf "Building $filename\n";
3863 while (<$KEYS>)
3865 s/\s*;.*$//;
3866 next if /^\s*$/; # skip empty lines
3867 if (/^\s*(SORTKEY|SORTTABLES)/)
3869 $part = $1;
3870 next;
3872 if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
3874 $part = $section = "";
3875 next;
3877 if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
3879 $section = $1;
3880 $guid = undef;
3881 next;
3883 next unless $part;
3884 if ("$part.$section" eq "SORTKEY.DEFAULT")
3886 if (/^\s*($re_hex)\s+$re_key/)
3888 $keys[hex $1] = [ split(/\s+/,$2) ];
3889 next;
3892 elsif ("$part.$section" eq "SORTTABLES.RELEASE")
3894 if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
3896 $version = hex $1;
3897 next;
3899 if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
3901 # ignore for now
3902 next;
3905 elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
3906 "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
3907 "$part.$section" eq "SORTTABLES.INVERSECASING")
3909 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
3911 $guid = lc $1;
3912 $guids{$guid} = { } unless defined $guids{$guid};
3913 $guids{$guid}->{flags} |= $flags{$section};
3914 next;
3916 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3918 $locales{$1} = $guid;
3919 next;
3922 elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
3924 if (/^\s*(\d+)\s+(\d+)/)
3926 push @multiple_weights, $1, $2;
3927 next;
3930 elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
3932 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3934 my $pos = scalar @expansions / 2;
3935 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
3936 push @expansions, hex $2, hex $3;
3937 next;
3940 elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
3942 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3944 $keys[hex $1] = $keys[hex $2];
3945 next;
3948 elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
3950 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
3952 if ($subsection || !$guid) # start a new one
3954 $guid = lc $1;
3955 $subsection = "";
3956 $guids{$guid} = { } unless defined $guids{$guid};
3957 $guids{$guid}->{flags} |= $flags{$2} if $2;
3958 $guids{$guid}->{compr} = @compressions;
3959 $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
3960 $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
3961 push @compressions, [ ];
3963 else # merge with current one
3965 $guids{lc $1} = { } unless defined $guids{lc $1};
3966 $guids{lc $1}->{flags} |= $flags{$2} if $2;
3967 $guids{lc $1}->{compr} = $guids{$guid}->{compr};
3968 $compr_flags{lc $1} = $compr_flags{$guid};
3970 next;
3972 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3974 $locales{$1} = $guid;
3975 next;
3977 if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
3979 $subsection = $1;
3980 next;
3982 if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
3984 my @comp = map { hex $_; } split(/\s+/,$1);
3985 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
3986 # add compression flags
3987 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
3988 next;
3991 elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
3993 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
3995 $guid = lc $1;
3996 $guids{$guid} = { } unless defined $guids{lc $1};
3997 $ling_flag = ($2 ? "+" : "-");
3998 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
3999 next;
4001 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4003 $locales{$1} = $guid;
4004 next;
4006 if (/^\s*($re_hex)\s+$re_key/)
4008 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
4009 next;
4012 elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
4014 if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
4016 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
4017 next;
4020 die "$current_data_file: $part.$section: unrecognized line $_\n";
4022 close $KEYS;
4024 # Sortkey table
4026 my $table;
4027 for (my $i = 0; $i < 0x10000; $i++)
4029 my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
4030 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4033 foreach my $id (sort keys %exceptions)
4035 my $pos = length($table) / 4;
4036 my @exc = @{$exceptions{$id}};
4037 my @filled;
4038 my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
4039 my $guid = substr( $id, 0, -1 );
4040 $guids{$guid}->{$key} = $pos;
4041 $pos += 0x100;
4042 my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
4043 for (my $j = 0; $j < 0x10000; $j++)
4045 next unless defined $exc[$j] || defined $flags[$j];
4046 $filled[$j >> 8] = 1;
4047 $j |= 0xff;
4049 for (my $j = 0; $j < 0x100; $j++)
4051 $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
4052 $pos += 0x100 if $filled[$j];
4054 for (my $j = 0; $j < 0x10000; $j++)
4056 next unless $filled[$j >> 8];
4057 my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
4058 $k[3] |= $flags[$j] || 0;
4059 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4063 # Case mapping tables
4065 # standard table
4066 my @casemaps;
4067 my @upper = @toupper_table;
4068 my @lower = @tolower_table;
4069 remove_linguistic_mappings( \@upper, \@lower );
4070 $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4072 # linguistic table
4073 $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
4075 # Turkish table
4076 @upper = @toupper_table;
4077 @lower = @tolower_table;
4078 $upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
4079 $lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
4080 $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4081 my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
4083 # Char type table
4085 my @table;
4086 my $types = "";
4087 my %typestr;
4088 for (my $i = 0; $i < 0x10000; $i++)
4090 my $str = pack "S<3",
4091 ($category_table[$i] || 0) & 0xffff,
4092 defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
4093 ($category_table[$i] || 0) >> 16;
4095 if (!defined($typestr{$str}))
4097 $typestr{$str} = length($types) / 6;
4098 $types .= $str;
4100 $table[$i] = $typestr{$str};
4103 my (@rows, @array, @data, @row_data);
4104 (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
4105 (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
4106 for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets
4107 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
4109 my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
4110 my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
4111 $chartypes = align_string( 8, $chartypes . $types . $arraystr );
4113 # Sort tables
4115 # guids
4116 my $sorttables = pack "L<2", $version, scalar %guids;
4117 foreach my $id (sort keys %guids)
4119 my %guid = %{$guids{$id}};
4120 my $flags = $guid{flags} || 0;
4121 my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
4122 $sorttables .= pack_guid($id) . pack "L<5",
4123 $flags,
4124 defined($guid{compr}) ? $guid{compr} : 0xffffffff,
4125 $guid{except} || 0,
4126 $guid{ling_except} || 0,
4127 $map / 2;
4130 # expansions
4131 $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
4133 # compressions
4134 $sorttables .= pack "L<", scalar @compressions;
4135 my $rowstr = "";
4136 foreach my $c (@compressions)
4138 my $pos = length($rowstr) / 2;
4139 my $min = 0xffff;
4140 my $max = 0;
4141 my @lengths = (0) x 8;
4142 foreach my $r (sort cmp_compression @{$c})
4144 my @row = @{$r};
4145 $lengths[scalar @row - 6]++;
4146 foreach my $val (@row[4..$#row])
4148 $min = $val if $min > $val;
4149 $max = $val if $max < $val;
4151 $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
4152 $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
4154 $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
4156 $sorttables .= $rowstr;
4158 # multiple weights
4159 $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
4161 # jamo sort
4162 $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
4164 # Locales
4166 add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
4167 foreach my $loc (sort keys %locales)
4169 # skip specific locales that match more general ones
4170 my @parts = split /[-_]/, $loc;
4171 next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
4172 next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
4173 add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
4176 # File header
4178 my @header;
4179 $header[0] = 16;
4180 $header[1] = $header[0] + length $table;
4181 $header[2] = $header[1] + length $casemaps;
4182 $header[3] = $header[2] + length $chartypes;
4184 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
4185 print OUTPUT pack "L<*", @header;
4186 print OUTPUT $table, $casemaps, $chartypes, $sorttables;
4187 close OUTPUT;
4188 save_file($filename);
4189 return $chartypes;
4193 my %lcnames;
4195 sub locale_parent($)
4197 my $loc = shift;
4199 return undef unless $loc;
4200 return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
4201 return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
4202 if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
4203 return "";
4206 sub compare_locales
4208 (my $n1 = $a) =~ tr/A-Z_/a-z-/;
4209 (my $n2 = $b) =~ tr/A-Z_/a-z-/;
4210 return $n1 cmp $n2;
4213 # query an xml key
4214 sub xml_query($$)
4216 my ($xml, $query) = @_;
4217 my $ret = $xml->find( $query );
4218 return undef unless $ret;
4219 printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
4220 return @{$ret}[0]->textContent;
4223 # query an xml key for a locale, with fallback to the parents
4224 sub loc_query($$)
4226 my ($loc, $query) = @_;
4228 $loc = $lcnames{"en-US"} unless $loc->{name}; # fallback to "en-US" for root locale
4230 for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
4232 next unless defined $lcnames{$cur};
4233 my $xml = $lcnames{$cur}->{xml};
4234 my $ret = $xml->find( $query );
4235 next unless $ret;
4236 printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
4237 next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; # "↑↑↑"
4238 return @{$ret}[0]->textContent;
4240 return undef;
4243 # retrieve a locale field entry by going up the parents tree
4244 sub locale_entry($$$)
4246 my ($loc, $field, $def) = @_;
4248 return $loc->{$field} if defined $loc->{$field};
4250 unless ($loc->{name}) # fallback to "en-US" for root locale
4252 $loc = $lcnames{"en-US"};
4253 return $loc->{$field} if defined $loc->{$field};
4255 while (defined $loc->{alias}) # resolve aliases
4257 $loc = $lcnames{$loc->{alias}};
4258 return $loc->{$field} if defined $loc->{$field};
4260 my $cur = $loc->{name};
4261 while ($cur)
4263 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
4265 $cur = $lcnames{$cur}->{sparent};
4267 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
4269 $cur = $1;
4271 else
4273 return $def;
4275 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
4277 return $def;
4280 my $string_data;
4282 sub add_str_data($)
4284 my $txt = shift;
4285 my $ret = index( $string_data, $txt );
4286 if ($ret == -1)
4288 $ret = length($string_data);
4289 $string_data .= $txt
4291 return $ret / 2;
4294 sub add_string($)
4296 my $str = shift;
4297 return 0 unless defined($str) && $str ne "";
4298 my $utf = encode( "UTF16LE", $str );
4299 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
4302 sub add_fontsig(@)
4304 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
4307 sub add_strarray(@)
4309 return 0 unless @_;
4310 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
4313 sub format_to_grouping($)
4315 my $format = shift;
4316 if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); }
4317 if ($format =~ /#,(#+0)/) { return chr(length($1)); }
4318 # printf STDERR "unknown format %s\n", $format;
4319 return chr(3);
4322 sub parse_currency_format($$)
4324 my $name = shift;
4325 my ($posfmt, $negfmt) = split /;/, shift;
4326 my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1
4327 "00[^\xa0]*\xa4", # 1.1$
4328 "\xa4.*\xa0.*#", # $ 1.1
4329 "00.*\xa0.*\xa4" ); # 1.1 $
4330 my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1)
4331 "-\xa4[^\xa0]*#", # -$1.1
4332 "\xa4[^\xa0]*-#", # $-1.1
4333 "\xa4[^\xa0]*#.*00-", # $1.1-
4334 "00[^\xa0]*\xa4\\)", # (1.1$)
4335 "-#.*00[^\xa0]*\xa4", # -1.1$
4336 "00-[^\xa0]*\xa4", # 1.1-$
4337 "00[^\xa0]*\xa4-", # 1.1$-
4338 "-#.*00.*\xa0.*\xa4", # -1.1 $
4339 "-\xa4.*\xa0.*#", # -$ 1.1
4340 "00.*\xa0.*\xa4-", # 1.1 $-
4341 "\xa4.*\xa0.*#.*00-", # $ 1.1-
4342 "\xa4.*\xa0.*-#", # $ -1.1
4343 "00-.*\xa0.*\xa4", # 1.1- $
4344 "\\(\xa4.*\xa0.*#", # ($ 1.1)
4345 "00.*\xa0.*\xa4\\)"); # (1.1 $)
4346 my ($pos, $neg);
4348 for ($pos = 0; $pos < @pospatterns; $pos++)
4350 last if ($posfmt =~ /$pospatterns[$pos]/);
4352 #printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns);
4353 $pos = 0 if ($pos == @pospatterns);
4355 if (defined $negfmt)
4357 for ($neg = 0; $neg < @negpatterns; $neg++)
4359 last if ($negfmt =~ /$negpatterns[$neg]/);
4361 #printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns);
4362 $neg = 0 if ($neg == @negpatterns);
4364 elsif ($pos == 0) { $neg = 1; }
4365 elsif ($pos == 1) { $neg = 5; }
4366 elsif ($pos == 2) { $neg = 9; }
4367 elsif ($pos == 3) { $neg = 8; }
4369 return ($pos, $neg);
4372 sub parse_percent_format($)
4374 my $fmt = shift;
4375 my @patterns = ( "0.+%", # 1 %
4376 "0%", # 1%
4377 "%#", # %1
4378 "%.+#" ); # % 1
4379 my $pos;
4380 for ($pos = 0; $pos < @patterns; $pos++)
4382 last if ($fmt =~ /$patterns[$pos]/);
4384 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
4385 return ($pos, ($pos == 3) ? 7 : $pos);
4388 sub convert_date_format($)
4390 my $fmt = shift;
4391 $fmt =~ s/G+/gg/;
4392 $fmt =~ s/LLLL/MMMM/;
4393 $fmt =~ s/LLL/MMM/;
4394 $fmt =~ s/E+/dddd/;
4395 $fmt =~ s/ccc+/dddd/;
4396 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
4397 $fmt =~ s/^y([^y])/yyyy$1/;
4398 $fmt =~ s/([^gy])y$/$1yyyy/;
4399 return $fmt;
4402 sub convert_time_format($)
4404 my $fmt = shift;
4405 $fmt =~ s/a+/tt/;
4406 $fmt =~ s/B+/tt/;
4407 return $fmt;
4410 sub load_iso639()
4412 my %iso639;
4413 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3.tab" );
4414 while (<$DATA>)
4416 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
4418 close $DATA;
4419 return %iso639;
4423 ################################################################
4424 # build the locale table for locale.nls
4425 sub build_locale_data()
4427 my $base = "cldr-release-$CLDRVERSION";
4428 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" );
4429 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" );
4430 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" );
4431 # obsolete phone data from CLDR version 33
4432 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" );
4433 my %iso639 = load_iso639();
4434 $string_data = pack "S2", 0, 0; # offset 0 == empty string
4436 %lcnames = map { $_->{name} => $_ } @locales;
4438 my %lcids;
4439 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
4441 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
4443 # assign locale parents
4445 foreach my $loc (@locales)
4447 next if $loc->{name} eq "";
4448 next if defined $loc->{parent};
4449 (my $unix_name = $loc->{name}) =~ s/-/_/g;
4450 my $parent = xml_query( $suppl, "/supplementalData/parentLocales/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
4451 if ($parent)
4453 $parent =~ s/_/-/g;
4454 $parent = "" if $parent eq "root";
4456 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
4457 $loc->{parent} = $parent || "";
4460 # load per-locale XML files
4462 foreach my $loc (@locales)
4464 next if defined $loc->{alias};
4465 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
4466 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
4467 my $xml = load_xml_data_file( "cldr", $file );
4468 $loc->{xml} = $xml;
4469 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
4470 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
4471 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
4472 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
4473 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
4476 # assign a default territory and sort locale
4478 foreach my $loc (@locales)
4480 next if defined $loc->{alias};
4481 next if defined $loc->{territory};
4482 my $id = $loc->{sortlocale};
4483 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
4485 $loc->{territory} = $1;
4486 next;
4488 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
4489 if (@children == 1)
4491 $id = $children[0];
4493 else
4495 my $name = $loc->{file} || $loc->{name};
4496 $name =~ s/-(Arab|Beng|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
4497 $name =~ s/-/_/g;
4498 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
4499 $id =~ s/_/-/g if $id;
4501 if ($id =~ /[-_]([A-Z0-9]+)$/)
4503 $loc->{territory} = $1;
4504 next if defined $loc->{sortlocale};
4505 next unless $id =~ /^$loc->{name}/;
4506 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
4507 $loc->{sortlocale} = $id if defined $lcnames{$id};
4508 next;
4510 print STDERR "no territory found for $loc->{name}\n";
4513 # fill geoid table
4515 my %geotable;
4516 foreach my $geo (@geoids)
4518 my $name = $geo->{name};
4519 next unless defined $name;
4520 $geo->{alias} = $geotable{$name} if defined $geotable{$name};
4521 $geotable{$name} ||= $geo;
4523 foreach my $loc (@locales)
4525 next if defined $loc->{alias};
4526 my $territory = $loc->{territory};
4527 $geotable{$territory} ||= { name => $territory };
4529 foreach my $name (keys %geotable)
4531 my $geo = $geotable{$name};
4532 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
4533 if ($name =~ /\d+/)
4535 $geo->{uncode} = $name;
4536 next;
4538 $geo->{iso2} = $name;
4539 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
4540 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
4541 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
4542 $geo->{sintlsymbol} =~ s/XXX/XDR/;
4544 foreach my $geo (@geoids)
4546 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
4547 next if defined $geo->{iso2};
4548 next if defined $geo->{alias};
4549 next unless defined $geo->{uncode};
4550 my @contains;
4551 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
4552 push @contains, split /\s+/, $list if defined $list;
4553 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
4554 push @contains, split /\s+/, $list if defined $list;
4555 while (@contains)
4557 my $territory = pop @contains;
4558 if (defined $geotable{$territory})
4560 $geotable{$territory}->{parentid} ||= $geo->{id};
4562 elsif ($territory =~ /\d+/)
4564 # expand region recursively
4565 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
4566 push @contains, split /\s+/, $list if defined $list;
4571 # assign calendars to their locale
4573 foreach my $cal (@calendars)
4575 next unless defined $cal->{locale};
4576 my $loc = $lcnames{$cal->{locale}};
4577 $loc->{calendar} = [ ] unless defined $loc->{calendar};
4578 push @{$loc->{calendar}}, $cal;
4581 # assign default lcid to aliases
4583 foreach my $loc (@locales)
4585 next unless defined $loc->{alias};
4586 next if defined $loc->{lcid};
4587 my $alias = $loc->{alias};
4588 my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
4589 $loc->{lcid} = $lcid | 0x80000000;
4592 # assign sort aliases to parent locale
4594 foreach my $loc (@locales)
4596 next unless $loc->{name} =~ /_/;
4597 next unless defined $loc->{alias};
4598 my $alias = $loc->{alias};
4599 my $parent = $lcnames{$alias};
4600 my $basename = $parent->{name};
4601 while (1)
4603 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
4604 $alias = locale_parent( $alias );
4605 last unless $alias && defined $lcnames{$alias};
4606 $parent = $lcnames{$alias};
4607 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
4608 $parent->{sortbase} = $basename;
4612 # assign an array index to all locales
4614 my $idx = 0;
4615 foreach my $loc (@locales)
4617 next if defined $loc->{alias};
4618 $loc->{idx} = $idx++;
4620 foreach my $loc (@locales)
4622 my $alias = $loc->{alias};
4623 next unless defined $alias;
4624 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
4625 $loc->{idx} = $lcnames{$alias}->{idx};
4628 # output lcids table
4630 my $lcid_data = "";
4631 foreach my $id (sort { $a <=> $b } keys %lcids)
4633 my $loc = $lcids{$id};
4634 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
4637 # output lcnames table
4639 my $lcname_data = "";
4640 foreach my $name (sort compare_locales keys %lcnames)
4642 my $loc = $lcnames{$name};
4643 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
4646 # output locales array
4648 my $locale_data = "";
4649 my $default_lcid = 0x8001;
4650 foreach my $loc (@locales)
4652 next if defined $loc->{alias};
4653 my $sname = $loc->{name};
4654 my $language = $loc->{language};
4655 my $territory = $loc->{territory};
4656 my $script = $loc->{script};
4657 my $neutral = ($sname && $sname !~ /-$territory/);
4658 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
4659 my $unique_lcid = $loc->{lcid};
4660 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
4661 my $geo = $geotable{$territory};
4662 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
4664 # languages and scripts
4666 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
4667 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
4668 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
4669 (my $siso639langname = $sname) =~ s/-.*$//;
4670 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
4671 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
4672 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
4673 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
4674 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
4675 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
4676 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
4677 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
4678 $sengcountry =~ s/South Korea/Korea/;
4679 $snativelangname ||= $senglanguage;
4680 $snativectryname ||= $sengcountry;
4681 if ($script)
4683 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4684 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4685 $senglanguage .= " ($engscript)" if $engscript;
4686 $snativelangname .= " ($nativescript)" if $nativescript;
4688 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
4689 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
4690 $sengdisplayname =~ s/\) \(/, /;
4691 $snativedisplayname =~ s/\) \(/, /;
4692 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
4693 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
4694 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
4695 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
4696 if ($charlayout eq "right-to-left")
4698 $ireadinglayout = 1;
4700 elsif ($charlayout eq "top-to-bottom")
4702 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
4703 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
4705 my $igeoid = $geo->{id} || 0;
4707 # numbers
4709 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
4710 my $slist = locale_entry( $loc, "slist", ";" );
4711 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
4712 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
4713 $sthousand =~ s/\x{202f}/\x{00a0}/;
4714 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
4715 my $spositivesign = "";
4716 my $snegativesign = "-";
4717 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
4718 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
4719 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
4720 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
4721 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
4722 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
4723 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern" ) ||
4724 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern" );
4725 my $smongrouping = format_to_grouping( $currencyformat );
4726 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
4727 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
4728 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
4729 my @snativedigits = split //, (locale_entry( $loc, "nativedigits", "" ) || xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" ));
4730 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
4731 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
4732 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
4734 # currencies
4736 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
4737 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
4738 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
4739 $geo->{scurrency} = $scurrency if $scurrency;
4740 $scurrency ||= $sintlsymbol;
4741 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
4742 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
4743 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
4744 $icurrdigits = 2 unless defined $icurrdigits;
4746 # calendars
4748 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
4749 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
4750 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
4751 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
4752 my (@sdayname, @sabbrevdayname, @sshortestdayname);
4753 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
4755 my $n = $days{$d};
4756 my %name;
4757 foreach my $type (qw(wide abbreviated short))
4759 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
4761 push @sdayname, $name{wide};
4762 push @sabbrevdayname, $name{abbreviated} || $name{wide};
4763 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
4765 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
4766 foreach my $n (1..13)
4768 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
4769 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4770 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
4771 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4772 push @smonthname, $name || $genitive || "";
4773 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
4774 push @sgenitivemonth, $genitive || "";
4775 push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
4777 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
4778 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
4779 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
4780 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
4781 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
4782 my $icalendartype;
4783 my @scalnames;
4784 foreach my $c (split /\s+/, $calpref)
4786 next unless defined $caltypes{$c};
4787 $icalendartype .= chr($caltypes{$c});
4788 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
4791 # date/time formats
4793 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
4794 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
4795 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
4796 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
4797 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
4798 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4799 pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
4800 @stimeformat = map convert_time_format($_), @stimeformat;
4801 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
4802 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
4803 pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
4804 @sshorttime = map convert_time_format($_), @sshorttime;
4805 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
4806 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
4807 @sshortdate = map convert_date_format($_), @sshortdate;
4808 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
4809 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
4810 @slongdate = map convert_date_format($_), @slongdate;
4811 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
4812 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
4813 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
4814 @smonthday = map convert_date_format($_), @smonthday;
4815 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
4816 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4817 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
4818 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
4819 $srelativelongdate = convert_date_format( $srelativelongdate );
4821 if (defined $loc->{calendar})
4823 foreach my $cal (@{$loc->{calendar}})
4825 $cal->{sshortdate} = \@sshortdate;
4826 $cal->{syearmonth} = \@syearmonth;
4827 $cal->{slongdate} = \@slongdate;
4828 $cal->{serastring} = [ $serastring ];
4829 $cal->{sdayname} = \@sdayname;
4830 $cal->{sabbrevdayname} = \@sabbrevdayname;
4831 $cal->{smonthname} = \@smonthname;
4832 $cal->{sabbrevmonthname} = \@sabbrevmonthname;
4833 $cal->{scalname} = $scalnames[$cal->{id}];
4834 $cal->{smonthday} = \@smonthday;
4835 $cal->{sshortestdayname} = \@sshortestdayname;
4836 $cal->{sabbreverastring} = [ $serastring ];
4837 $cal->{sshortestdayname} = \@sshortestdayname;
4838 $cal->{srelativelongdate} = $srelativelongdate;
4842 # codepages
4844 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
4845 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
4846 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
4847 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
4848 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
4849 1258 => 10000 );
4850 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
4851 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
4852 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
4853 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
4854 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
4855 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
4856 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
4857 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
4858 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
4859 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
4860 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
4861 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
4862 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
4863 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
4864 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
4865 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
4866 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
4867 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
4868 my $oemcp = locale_entry( $loc, "oemcp", 65001 );
4869 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
4870 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
4871 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
4872 my $ansicp = $ansicpmap{$oemcp} || $oemcp;
4873 my @fontsig = (0) x 8;
4874 my $sig = locale_entry( $loc, "fontsig", [] );
4875 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
4876 foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; }
4877 $fontsig[3] |= 1 << 31;
4878 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
4879 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
4881 # special cases for invariant locale
4883 unless ($loc->{name})
4885 $siso639langname = "iv";
4886 $siso639langname2 = "ivl";
4887 $senglanguage = $snativelangname = "Invariant Language";
4888 $sengcountry = $snativectryname = "Invariant Country";
4889 $sengdisplayname = "Invariant Language (Invariant Country)";
4890 $snativedisplayname = "Invariant Language (Invariant Region)";
4891 $sengcurrname = $snativecurrname = "International Monetary Fund";
4892 $scurrency = "\x{00a4}";
4893 $ifirstdayofweek = 0;
4894 $igeoid = $geotable{"US"}->{id};
4895 @stimeformat = ("HH:mm:ss");
4896 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
4897 @slongdate = ("dddd, dd MMMM yyyy");
4898 @syearmonth = ("yyyy MMMM");
4899 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
4900 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
4901 $srelativelongdate = "dddd, MMMM dd";
4902 $sposinfinity = "Infinity";
4903 $sneginfinity = "-Infinity";
4904 $spositivesign = "+";
4905 $ipospercent = $inegpercent = 0;
4908 # output data
4910 $locale_data .= pack "L<2",
4911 add_string( $sname ), # name
4912 add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG
4914 $locale_data .= pack "S<14",
4915 $loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE
4916 $unique_lcid, # unique_lcid
4917 locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS
4918 locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER
4919 $icurrdigits, # LOCALE_ICURRDIGITS
4920 $icurrency, # LOCALE_ICURRENCY
4921 $inegcurr, # LOCALE_INEGCURR
4922 locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO
4923 !$neutral, # LOCALE_INEUTRAL
4924 $ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK
4925 $firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR
4926 $geo->{dialcode} || 1 , # LOCALE_ICOUNTRY,
4927 $measure, # LOCALE_IMEASURE
4928 $digitsubstitution; # LOCALE_IDIGITSUBSTITUTION
4930 $locale_data .= pack "L<18",
4931 add_string( $sgrouping ), # LOCALE_SGROUPING
4932 add_string( $smongrouping ), # LOCALE_SMONGROUPING
4933 add_string( $slist ), # LOCALE_SLIST
4934 add_string( $sdecimal ), # LOCALE_SDECIMAL
4935 add_string( $sthousand ), # LOCALE_STHOUSAND
4936 add_string( $scurrency ), # LOCALE_SCURRENCY
4937 add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP
4938 add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP
4939 add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN
4940 add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN
4941 add_string( $s1159 ), # LOCALE_S1159
4942 add_string( $s2359 ), # LOCALE_S2359
4943 add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS
4944 add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT
4945 add_strarray( @sshortdate ), # LOCALE_SSHORTDATE
4946 add_strarray( @slongdate ), # LOCALE_SLONGDATE
4947 add_strarray( @syearmonth ), # LOCALE_SYEARMONTH
4948 add_strarray( @sduration ); # LOCALE_SDURATION
4950 $locale_data .= pack "S<8",
4951 $idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE
4952 $ansicp, # LOCALE_IDEFAULTANSICODEPAGE
4953 $oemcp, # LOCALE_IDEFAULTCODEPAGE
4954 $maccp, # LOCALE_IDEFAULTMACCODEPAGE
4955 $ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE
4956 $igeoid < 65536 ? $igeoid : 39070, # old_geoid
4957 $papersize ? 1 : 9, # LOCALE_IPAPERSIZE
4958 0; # FIXME # islamic_cal
4960 $locale_data .= pack "L<24",
4961 add_string( $icalendartype ), # LOCALE_ICALENDARTYPE
4962 add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME
4963 add_string( $siso639langname ), # LOCALE_SISO639LANGNAME
4964 add_string( $senglanguage ), # LOCALE_SENGLANGUAGE
4965 add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME
4966 add_string( $sengcountry ), # LOCALE_SENGCOUNTRY
4967 add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME
4968 add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME
4969 add_string( $territory ), # LOCALE_SISO3166CTRYNAME
4970 add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL
4971 add_string( $sengcurrname ), # LOCALE_SENGCURRNAME
4972 add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME
4973 add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE
4974 add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2
4975 add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2
4976 add_string( $sparent ), # LOCALE_SPARENT
4977 add_strarray( @sdayname ), # LOCALE_SDAYNAME
4978 add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME
4979 add_strarray( @smonthname ), # LOCALE_SMONTHNAME
4980 add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME
4981 add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH
4982 add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH
4983 add_strarray( @scalnames ), # LOCALE_SCALNAMES
4984 add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES
4986 $locale_data .= pack "S<6",
4987 $inegpercent, # LOCALE_INEGATIVEPERCENT
4988 $ipospercent, # LOCALE_IPOSITIVEPERCENT
4989 0, # unknown
4990 $ireadinglayout, # LOCALE_IREADINGLAYOUT
4991 0x2a, # unknown
4992 0x2a; # unknown
4994 $locale_data .= pack "L<24",
4995 0, # unknown
4996 add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME
4997 add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME
4998 add_string( $spercent ), # LOCALE_SPERCENT
4999 add_string( $snan ), # LOCALE_SNAN
5000 add_string( $sposinfinity ), # LOCALE_SPOSINFINITY
5001 add_string( $sneginfinity ), # LOCALE_SNEGINFINITY
5002 0, # unknown
5003 add_string( $serastring ), # CAL_SERASTRING
5004 add_string( $serastring ), # CAL_SABBREVERASTRING
5005 0, # unknown
5006 add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME
5007 add_strarray( @sshorttime ), # LOCALE_SSHORTTIME
5008 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5009 0, # unknown
5010 add_string( $ssortlocale ), # LOCALE_SSORTLOCALE
5011 add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL
5012 add_string( $sscripts ), # LOCALE_SSCRIPTS
5013 add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE
5014 $igeoid, # LOCALE_IGEOID
5015 add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM
5016 add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM
5017 add_strarray( @smonthday ), # LOCALE_SMONTHDAY
5018 add_string( "k0-windows-us" ) # FIXME # keyboard_layout
5021 # output language groups
5023 my %groups;
5024 add_registry_key( $nlskey, "Locale", "00000409" );
5025 foreach my $loc (@locales)
5027 next unless defined $loc->{lcid};
5028 next if ($loc->{lcid} & 0x80000000);
5029 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales
5030 my $group = locale_entry( $loc, "group", 1 );
5031 my $name = sprintf( "%08x", $loc->{lcid} );
5032 my $val = sprintf( "%x", $group );
5033 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
5034 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
5035 $groups{$val} = 1;
5037 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); }
5039 # output calendar data
5041 my $calendar_data = "";
5042 foreach my $cal (@calendars)
5044 my $scalname = $cal->{name};
5045 my $iyearoffsetrange = 0;
5046 my $itwodigityearmax = $cal->{itwodigityearmax};
5047 my @sshortdate;
5048 my @syearmonth;
5049 my @slongdate;
5050 my @serastring;
5051 my @sdayname;
5052 my @sabbrevdayname;
5053 my @smonthname;
5054 my @sabbrevmonthname;
5055 my @smonthday;
5056 my @sabbreverastring;
5057 my @sshortestdayname;
5059 my $type = $cal->{type};
5060 if (defined $cal->{locale} && defined $type)
5062 my $loc = $lcnames{$cal->{locale}};
5063 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
5064 push @sshortdate, $fmt if $fmt;
5065 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
5066 push @sshortdate, $fmt if $fmt;
5067 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
5068 push @sshortdate, $fmt if $fmt;
5069 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
5070 push @sshortdate, $fmt if $fmt;
5071 @sshortdate = map convert_date_format($_), @sshortdate;
5072 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
5073 push @slongdate, $fmt if $fmt;
5074 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
5075 push @slongdate, $fmt if $fmt;
5076 @slongdate = map convert_date_format($_), @slongdate;
5078 foreach my $n (1..13)
5080 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
5081 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
5082 push @smonthname, $name || "";
5083 push @sabbrevmonthname, $abbrev || $name || "";
5086 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
5087 if (defined $cal->{eras})
5089 my @eras;
5090 my $idx = 1;
5091 foreach my $era (@{$cal->{eras}})
5093 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
5094 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
5095 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
5096 if ($zero < 0)
5098 $first -= $zero;
5099 $year = 1;
5100 $itwodigityearmax = 2049 - $zero;
5102 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
5103 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
5104 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
5106 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
5110 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
5111 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
5112 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
5113 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
5114 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
5115 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
5116 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
5117 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
5118 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
5119 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
5120 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
5121 my $srelativelongdate = $cal->{srelativelongdate};
5123 @serastring = ("A.D.") unless @serastring;
5124 @sabbreverastring = ("AD") unless @sabbreverastring;
5126 if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead
5128 @sshortdate = ("") unless @sshortdate;
5129 @syearmonth = ("") unless @syearmonth;
5130 @slongdate = ("") unless @slongdate;
5131 @sdayname = ("") x 7 unless @sdayname;
5132 @sabbrevdayname = ("") x 7 unless @sabbrevdayname;
5133 @sshortestdayname = ("") x 7 unless @sshortestdayname;
5134 @smonthname = ("") x 13 unless @smonthname;
5135 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
5136 @smonthday = ("") unless @smonthday;
5139 $calendar_data .= pack "S<2L<17",
5140 $cal->{id}, # CAL_ICALINTVALUE
5141 $itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX
5142 add_strarray( @sshortdate ), # CAL_SSHORTDATE
5143 add_strarray( @syearmonth ), # CAL_SYEARMONTH
5144 add_strarray( @slongdate ), # CAL_SLONGDATE
5145 add_strarray( @serastring ), # CAL_SERASTRING
5146 $iyearoffsetrange, # CAL_IYEAROFFSETRANGE
5147 add_strarray( @sdayname ), # CAL_SDAYNAME
5148 add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME
5149 add_strarray( @smonthname ), # CAL_SMONTHNAME
5150 add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME
5151 add_string( $scalname ), # CAL_SCALNAME
5152 add_strarray( @smonthday ), # CAL_SMONTHDAY
5153 add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING
5154 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5155 add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE
5158 # output locale header
5160 my $nb_lcids = scalar keys %lcids;
5161 my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
5162 my $nb_lcnames = scalar keys %lcnames;
5163 my $locale_size = length($locale_data) / $nb_locales;
5164 my $nb_calendars = scalar @calendars;
5165 my $calendar_size = length($calendar_data) / $nb_calendars;
5166 my $lcids_offset = 19 * 4; # size of header
5167 my $lcnames_offset = $lcids_offset + length $lcid_data;
5168 my $locales_offset = $lcnames_offset + length $lcname_data;
5169 my $calendar_offset = $locales_offset + length $locale_data;
5170 my $strings_offset = $calendar_offset + length $calendar_data;
5172 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
5173 8, # offset
5175 7, # version
5176 0x5344534e, # magic
5177 0, 0, 0,
5179 $nb_lcids,
5180 $nb_locales,
5181 $locale_size,
5182 $locales_offset,
5183 $nb_lcnames,
5185 $lcids_offset,
5186 $lcnames_offset,
5188 $nb_calendars,
5189 $calendar_size,
5190 $calendar_offset,
5191 $strings_offset,
5192 0, 0;
5194 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
5198 ################################################################
5199 # build the charmaps table for locale.nls
5200 sub build_charmaps_data()
5202 my $data = "";
5204 # MAP_FOLDDIGITS
5205 my @digits = (ord('0') .. ord('9'));
5206 $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero
5207 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits
5208 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits
5209 @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals
5210 @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits
5211 @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers
5212 $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals
5213 $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals
5214 $data .= dump_binary_case_table( @digitmap_table );
5216 # CJK compatibility map
5217 $data .= dump_binary_case_table( @cjk_compat_table );
5219 # LCMAP_HIRAGANA/KATAKANA
5220 my (@hiragana_table, @katakana_table);
5221 foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
5223 $hiragana_table[$ch + 0x60] = $ch;
5224 $katakana_table[$ch] = $ch + 0x60;
5226 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
5228 # LCMAP_HALFWIDTH/FULLWIDTH
5229 $halfwidth_table[0x2018] = 0x0027;
5230 $halfwidth_table[0x2019] = 0x0027;
5231 $halfwidth_table[0x201c] = 0x0022;
5232 $halfwidth_table[0x201d] = 0x0022;
5233 $halfwidth_table[0x309b] = 0xff9e;
5234 $halfwidth_table[0x309c] = 0xff9f;
5235 $fullwidth_table[0x309b] = 0x3099;
5236 $fullwidth_table[0x309c] = 0x309a;
5237 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
5239 # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
5240 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
5242 # FIXME: some more unknown tables here
5244 return $data;
5248 ################################################################
5249 # build the geoids table for locale.nls
5250 sub build_geoids_data()
5252 my $data = "";
5253 my %index;
5254 my $idx = 0;
5255 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
5257 foreach my $geo (@geoids)
5259 my $id = $geo->{id};
5260 $geo = $geo->{alias} if defined $geo->{alias};
5261 my $lat = "0.000";
5262 my $long = "0.000";
5263 my $iso2 = $geo->{iso2} || "XX";
5264 my $iso3 = $geo->{iso3} || "XX";
5265 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
5266 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
5267 my $scurrency = $geo->{scurrency} || "\x{00a4}";
5269 $data .= pack( "L<", $id );
5270 $data .= pad_string( 24, encode( "UTF16LE", $lat ));
5271 $data .= pad_string( 24, encode( "UTF16LE", $long ));
5272 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
5273 $data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
5274 $data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
5275 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
5276 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
5277 $data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
5278 $index{$geo->{name}} = $idx if $geo->{name};
5279 $idx++;
5281 $index{"XX"} = $index{"001"};
5283 $geo_header[5] = $geo_header[3] + length $data;
5284 $geo_header[6] = scalar keys %index;
5286 foreach my $name (sort keys %index)
5288 $data .= pad_string( 8, encode( "UTF16LE", $name ));
5289 $data .= pack "L<", $index{$name};
5292 $geo_header[2] = $geo_header[3] + length $data;
5293 return pack( "L<7", @geo_header ) . $data;
5297 ################################################################
5298 # build a binary locale table
5299 sub dump_locales($$)
5301 my ($filename, $chartypes) = @_;
5303 printf "Building $filename\n";
5305 my $locale_data = build_locale_data();
5306 my $charmaps_data = build_charmaps_data();
5307 my $geoids_data = build_geoids_data();
5308 my $scripts_data = ""; # FIXME
5310 my @header = ( 0 ) x 8;
5311 $header[0] = 4 * scalar @header; # chartypes offset
5312 $header[4] = $header[0] + length $chartypes; # locales offset
5313 $header[5] = $header[4] + length $locale_data; # charmaps offset
5314 $header[6] = $header[5] + length $charmaps_data; # geoids offset
5315 $header[7] = $header[6] + length $geoids_data; # scripts offset
5317 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5318 print OUTPUT pack "L<*", @header;
5319 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
5320 close OUTPUT;
5321 save_file($filename);
5325 ################################################################
5326 # return the day of week of the first of the month
5327 sub month_first_dow($$)
5329 my ($year, $month) = @_;
5330 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year ));
5331 return $time[6];
5335 ################################################################
5336 # compare system time values
5337 sub compare_systime($$)
5339 my ($a, $b) = @_;
5340 return $a->[0] <=> $b->[0] ||
5341 $a->[1] <=> $b->[1] ||
5342 $a->[2] <=> $b->[2] ||
5343 $a->[3] <=> $b->[3] ||
5344 $a->[4] <=> $b->[4] ||
5345 $a->[5] <=> $b->[5] ||
5346 $a->[6] <=> $b->[6];
5350 ################################################################
5351 # compare the zone transition date with the rule date
5352 sub compare_transition_date($$$$)
5354 my ($stdoff, $isdst, $zone, $rule) = @_;
5356 if (scalar @{$zone} <= 1)
5358 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1;
5361 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 );
5362 return compare_systime( \@date, $rule );
5366 ################################################################
5367 # get the Windows zone names from the CLDR data
5368 sub load_windows_zones()
5370 my $current_name;
5371 my %names;
5372 my $base = "cldr-release-$CLDRVERSION";
5373 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" );
5374 while (<$INPUT>)
5376 if (/<!-- +(\(UTC.*) -->.*/)
5378 $current_name = $1;
5380 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/)
5382 $names{$1} = [ $current_name, $2 ];
5385 close $INPUT;
5386 return %names;
5390 ################################################################
5391 # parse a transition date specification from the tzdata files
5392 sub parse_transition_date($$@)
5394 use integer;
5395 my ($stdoff, $isdst, $year, $in, $on, $at) = @_;
5397 $on = "1" unless defined $on;
5398 $at = "0" unless defined $at;
5400 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
5401 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 );
5402 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 );
5404 my $mon = $in ? $months{$in} : 1;
5405 my ($week, $dow, $flag, $time, $sec);
5406 my $first = month_first_dow( $year, $mon );
5408 if ($on =~ /^last(.*)$/)
5410 $week = 5;
5411 $dow = $days{$1};
5413 elsif ($on =~ /^(.*)>=(\d+)$/)
5415 $dow = $days{$1};
5416 my $diff = ($first + 6 - $dow) % 7;
5417 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7;
5419 elsif ($on =~ /^(.*)<=(\d+)$/)
5421 $dow = $days{$1};
5422 my $diff = ($first + $2 + 6 - $dow) % 7;
5423 $week = ($2 + 6 - $diff) / 7;
5424 if (!$week)
5426 $week = 5;
5427 if (!--$mon) { $mon = 12; $year--; }
5430 elsif ($on =~ /^\d+$/)
5432 $dow = ($first + $on - 1) % 7;
5433 $week = $on >= 25 ? 5 : ($on + 6) / 7;
5435 else
5437 die "unsupported date specification $year $in $on $at";
5440 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/)
5442 $time = $1 * 60 + $2;
5443 $sec = $3;
5444 $flag = $4;
5446 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/)
5448 $time = $1 * 60 + $2;
5449 $flag = $3;
5451 elsif ($at =~ /^(\d+)([uws]?)$/)
5453 $time = $1 * 60;
5454 $flag = $2;
5456 else
5458 die "unsupported time specification $year $in $on $at";
5461 $flag ||= "w";
5462 $time -= $stdoff if $flag eq "u";
5463 $time += 60 if !$isdst && $flag ne "w";
5465 if ($time < 0) # previous day
5467 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon );
5468 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 );
5469 if (!$week)
5471 $week = 5;
5472 if (!--$mon) { $mon = 12; $year--; }
5474 $dow = ($dow + 6) % 7;
5475 $time += 24 * 60;
5478 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0);
5482 ################################################################
5483 # parse a system time value as a SYSTEMTIME structure
5484 sub pack_systime(@)
5486 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_;
5487 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999);
5491 ################################################################
5492 # parse a timezone offset from the tzdata files
5493 sub parse_tz_offset($)
5495 my ($hour, $min) = split /:/, shift;
5496 $min ||= 0;
5497 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min; # invert sign
5501 ################################################################
5502 # build the timezone data
5503 sub dump_timezones($@)
5505 my $filename = shift;
5506 my $FIRST_YEAR = 2000;
5507 my $LAST_YEAR = 2030;
5509 my %names = load_windows_zones();
5510 my %zones;
5511 my %rules;
5512 my %links;
5513 my %res_indices;
5515 printf "Building $filename\n";
5517 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5518 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
5519 print OUTPUT "#include \"winresrc.h\"\n\n";
5520 print OUTPUT "#pragma makedep po\n\n";
5521 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n";
5522 print OUTPUT "STRINGTABLE\n{\n";
5524 # load tzdata files
5526 foreach my $filename (@_)
5528 my $FILE = open_data_file( "tzdata", $filename );
5529 my $zonename;
5530 while (<$FILE>)
5532 chomp;
5533 s/\#.*$//;
5534 next if /^\s*$/;
5535 my @fields = split /\s+/;
5536 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq ""))
5538 shift @fields;
5539 $zonename = shift @fields unless $zonename;
5540 my ($stdoff, $rules, $dummy, @date) = @fields;
5541 $zones{$zonename} ||= [ ];
5542 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ];
5543 $zonename = undef unless @date; # last entry doesn't have an until date
5544 next;
5546 if ($fields[0] eq "Rule")
5548 shift @fields;
5549 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields;
5550 $to = $from if $to eq "only";
5551 $to = $LAST_YEAR if $to eq "max";
5552 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ];
5553 next;
5555 if ($fields[0] eq "Link")
5557 $links{$fields[2]} = $fields[1];
5558 next;
5560 die "unrecognized line $_";
5562 close $FILE;
5565 foreach my $name (sort { uc($a) cmp uc($b) } keys %names)
5567 my ($display, $zone) = @{$names{$name}};
5568 $zone = $links{$zone} if defined $links{$zone};
5570 # build list of transitions
5572 my @transitions;
5573 my @from_date = ( 1 );
5574 my $last_stdoff = 0;
5575 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++)
5577 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]};
5578 my $isdst = ($last_stdoff != $stdoff);
5579 $from_date[0] ||= $LAST_YEAR;
5580 my @systime = parse_transition_date( $stdoff, $isdst, @from_date );
5581 push @transitions, [ $stdoff, -1, \@systime ];
5583 if (defined $rules{$rule})
5585 foreach my $r (@{$rules{$rule}})
5587 my ($offset, $from, $to, $in, $on, $at) = @{$r};
5588 foreach my $year ($from..$to)
5590 next if $year < $from_date[0];
5591 next if $until_date[0] && $year > $until_date[0];
5592 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at );
5593 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0;
5594 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime );
5595 next if $ret > 0;
5596 pop @transitions if !$ret; # remove transition if there's a dst change at the same time
5597 push @transitions, [ $stdoff, $offset, \@systime ];
5601 @from_date = @until_date;
5602 $last_stdoff = $stdoff;
5604 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions;
5606 # build per-year dynamic info
5608 my @info;
5609 my $last_dstoff = 0;
5610 my $last_dst = 0;
5611 my $year = $FIRST_YEAR;
5612 while ($year <= $LAST_YEAR)
5614 if (@transitions && $transitions[0]->[2]->[0] < $year)
5616 $last_stdoff = $transitions[0]->[0];
5617 shift @transitions;
5618 next;
5620 my ($std, $dst, @trans);
5621 my $cur_stdoff = $last_stdoff;
5622 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60;
5623 while (@transitions && $transitions[0]->[2]->[0] == $year)
5625 my $t = shift @transitions;
5626 my ($stdoff, $dstoff, $systime) = @{$t};
5627 $systime = pack_systime( @{$systime} );
5628 if (!$dstoff) # std
5630 $cur_stdoff = $stdoff unless $std;
5631 $std = $systime;
5633 elsif ($dstoff != -1) # dst
5635 $cur_dstoff = $dstoff unless $dst;
5636 $dst ||= $systime;
5638 elsif ($stdoff != $last_stdoff) # rule transition
5640 # Handle a special case: Samoa moved to the other side of
5641 # the date line between 2011-12-03 and 2012-01-01,
5642 # entirely skipping the day 2011-12-31. We ignore this
5643 # change because it happens on a year boundary and more
5644 # importantly it would generate on offset of -25 hours,
5645 # which some programs (e.g., Mono) do not like. See
5646 # https://bugs.winehq.org/show_bug.cgi?id=51758
5648 if ($last_stdoff - $stdoff < 24 * 60)
5650 @trans = ($last_stdoff, $stdoff, $systime);
5651 $cur_stdoff = $stdoff;
5654 elsif ($dst) # rule transition with no stdoff change
5656 $std = $systime;
5658 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff;
5660 $last_stdoff = $cur_stdoff;
5662 if ($cur_dstoff > 0) # swap std and dst to ensure that offset is negative
5664 ($std, $dst) = ($dst, $std);
5665 $cur_stdoff += $cur_dstoff;
5666 $cur_dstoff = -$cur_dstoff;
5669 if (@trans)
5671 # heuristic to prefer switching dst
5672 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1]))
5674 $dst ||= $trans[2];
5675 $cur_stdoff = $trans[0];
5676 $cur_dstoff = $trans[1] - $trans[0];
5678 else
5680 $std ||= $trans[2];
5681 $cur_stdoff = $trans[1];
5682 $cur_dstoff = $trans[0] - $trans[1];
5686 if ($std || $dst)
5688 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5689 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5690 $last_dst = $year;
5692 else
5694 $std = pack "S<8", 0;
5695 $dst = pack "S<8", 0;
5696 $cur_stdoff += $last_dstoff;
5698 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst;
5701 # output registry keys
5703 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name;
5704 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r;
5705 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4;
5706 $res_idx += 16 while exists $res_indices{$res_idx};
5707 $res_indices{$res_idx} = 1;
5709 add_registry_string_value( $zonekey, $name, "Display", $display );
5710 add_registry_string_value( $zonekey, $name, "Std", $std_name );
5711 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name );
5712 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx ));
5713 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 ));
5714 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 ));
5715 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] );
5717 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name;
5718 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name;
5719 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display;
5721 my $first_year = $FIRST_YEAR;
5722 my $last_year = $LAST_YEAR;
5723 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1];
5724 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year];
5726 next if $last_year <= $first_year;
5728 foreach my $i ($first_year..$last_year)
5730 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] );
5732 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year );
5733 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year );
5736 print OUTPUT "}\n";
5737 close OUTPUT;
5738 save_file($filename);
5742 ################################################################
5743 # build the script to create registry keys
5744 sub dump_registry_script($%)
5746 my ($filename, %keys) = @_;
5747 my $indent = 1;
5748 my @prev;
5750 printf "Building %s\n", $filename;
5751 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5752 print OUTPUT "HKLM\n{\n";
5753 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys)
5755 my @subkeys = split /\\/, $k;
5756 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; }
5757 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5758 my ($def, @vals) = @{$keys{$k}};
5759 for (my $i = 0; $i < @subkeys; $i++)
5761 my $name = $subkeys[$i];
5762 my $prefix = "";
5763 if ($name =~ /^-/)
5765 $name =~ s/^-//;
5766 $prefix = "NoRemove ";
5768 if ($name =~ /\s/)
5770 $name = "'$name'";
5772 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name,
5773 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
5774 $indent++;
5776 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
5777 @prev = split /\\/, $k;
5779 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5780 printf OUTPUT "}\n";
5781 close OUTPUT;
5782 save_file($filename);
5786 ################################################################
5787 # save a file if modified
5788 sub save_file($)
5790 my $file = shift;
5791 if (-f $file && !system "cmp $file $file.new >/dev/null")
5793 unlink "$file.new";
5795 else
5797 rename "$file.new", "$file";
5802 ################################################################
5803 # main routine
5805 chdir ".." if -f "./make_unicode";
5806 load_data();
5807 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
5808 dump_bidi_dir_table( "dlls/dwrite/direction.c" );
5809 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
5810 dump_mirroring( "dlls/dwrite/mirror.c" );
5811 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
5812 dump_bracket( "dlls/dwrite/bracket.c" );
5813 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
5814 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
5815 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
5816 dump_linebreak( "dlls/dwrite/linebreak.c" );
5817 dump_scripts( "dlls/dwrite/scripts" );
5818 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
5819 dump_vertical( "dlls/win32u/vertical.c", 1 );
5820 dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
5821 dump_intl_nls("nls/l_intl.nls");
5822 dump_norm_table( "nls/normnfc.nls" );
5823 dump_norm_table( "nls/normnfd.nls" );
5824 dump_norm_table( "nls/normnfkc.nls" );
5825 dump_norm_table( "nls/normnfkd.nls" );
5826 dump_norm_table( "nls/normidna.nls" );
5827 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" );
5828 dump_locales( "nls/locale.nls", $chartypes );
5829 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
5830 dump_eucjp_codepage();
5831 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files );
5832 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
5834 exit 0;
5836 # Local Variables:
5837 # compile-command: "./make_unicode"
5838 # End: