windowscodecs: Silence fixme for IID_CMetaBitmapRenderTarget.
[wine.git] / tools / make_unicode
blob5ad8251bea0de513a7da84b5673956a14a54b544
1 #!/usr/bin/perl -w
3 # Generate code page .c files from ftp.unicode.org descriptions
5 # Copyright 2000 Alexandre Julliard
7 # This library is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU Lesser General Public
9 # License as published by the Free Software Foundation; either
10 # version 2.1 of the License, or (at your option) any later version.
12 # This library is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # Lesser General Public License for more details.
17 # You should have received a copy of the GNU Lesser General Public
18 # License along with this library; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 use strict;
23 use XML::LibXML;
24 use Digest::SHA;
25 use Encode;
26 use Time::Local qw(timegm_modern);
28 my $UNIVERSION = "15.1.0";
29 my $CLDRVERSION = "44";
30 my $ISO639VERSION = "20230123";
31 my $TZVERSION = "2024a";
33 my %data_files =
35 ucd => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
36 sha => "cb1c663d053926500cd501229736045752713a066bd75802098598b7a7056177" },
37 unihan => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
38 sha => "a0226610e324bcf784ac380e11f4cbf533ee1e6b3d028b0991bf8c0dc3f85853" },
39 idna => { url => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
40 sha => "402cbd285f1f952fcd0834b63541d54f69d3d8f1b8f8599bf71a1a14935f82c4" },
41 cldr => { url => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
42 sha => "38d04cf28ccfee8b86d2feecebf99d5dc6d3317f53f87ba53b1e774f6395573c" },
43 cldr33 => { url => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
44 sha => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
45 sorting => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
46 sha => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
47 codepages => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
48 sha => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
49 iso639 => { url => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50 sha => "884faa6cc5ac5181ed7969eed75355c1bc665447614cf4c06c62e87b38fe6a97" },
51 ksx1001 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
52 sha => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
53 jis0208 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
54 sha => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
55 jis0212 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
56 sha => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
57 tzdata => { url => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
58 sha => "0d0434459acbd2059a7a8da1f3304a84a86591f6ed69c6248fffa502b6edffe3" },
62 # Default char for undefined mappings
63 my $DEF_CHAR = ord '?';
65 # Last valid Unicode character
66 my $MAX_CHAR = 0x10ffff;
68 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
69 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
71 my @allfiles =
73 "CodpageFiles/037.txt",
74 "CodpageFiles/437.txt",
75 "CodpageFiles/500.txt",
76 "CodpageFiles/708.txt",
77 "CodpageFiles/720.txt",
78 "CodpageFiles/737.txt",
79 "CodpageFiles/775.txt",
80 "CodpageFiles/850.txt",
81 "CodpageFiles/852.txt",
82 "CodpageFiles/855.txt",
83 "CodpageFiles/857.txt",
84 "CodpageFiles/860.txt",
85 "CodpageFiles/861.txt",
86 "CodpageFiles/862.txt",
87 "CodpageFiles/863.txt",
88 "CodpageFiles/864.txt",
89 "CodpageFiles/865.txt",
90 "CodpageFiles/866.txt",
91 "CodpageFiles/869.txt",
92 "CodpageFiles/874.txt",
93 "CodpageFiles/875.txt",
94 "CodpageFiles/932.txt",
95 "CodpageFiles/936.txt",
96 "CodpageFiles/949.txt",
97 "CodpageFiles/950.txt",
98 "CodpageFiles/1026.txt",
99 "CodpageFiles/1250.txt",
100 "CodpageFiles/1251.txt",
101 "CodpageFiles/1252.txt",
102 "CodpageFiles/1253.txt",
103 "CodpageFiles/1254.txt",
104 "CodpageFiles/1255.txt",
105 "CodpageFiles/1256.txt",
106 "CodpageFiles/1257.txt",
107 "CodpageFiles/1258.txt",
108 "CodpageFiles/1361.txt",
109 "CodpageFiles/10000.txt",
110 "CodpageFiles/10001.txt",
111 "CodpageFiles/10002.txt",
112 "CodpageFiles/10003.txt",
113 "CodpageFiles/10004.txt",
114 "CodpageFiles/10005.txt",
115 "CodpageFiles/10006.txt",
116 "CodpageFiles/10007.txt",
117 "CodpageFiles/10008.txt",
118 "CodpageFiles/10010.txt",
119 "CodpageFiles/10017.txt",
120 "CodpageFiles/10021.txt",
121 "CodpageFiles/10029.txt",
122 "CodpageFiles/10079.txt",
123 "CodpageFiles/10081.txt",
124 "CodpageFiles/10082.txt",
125 "CodpageFiles/20127.txt",
126 "CodpageFiles/20866.txt",
127 "CodpageFiles/21866.txt",
128 "CodpageFiles/28591.txt",
129 "CodpageFiles/28592.txt",
130 "CodpageFiles/28593.txt",
131 "CodpageFiles/28594.txt",
132 "CodpageFiles/28595.txt",
133 "CodpageFiles/28596.txt",
134 "CodpageFiles/28597.txt",
135 "CodpageFiles/28598.txt",
136 "CodpageFiles/28599.txt",
137 "CodpageFiles/28603.txt",
138 "CodpageFiles/28605.txt",
141 my @timezone_files = qw(africa antarctica asia australasia europe northamerica southamerica etcetera backward);
143 my %ctype =
145 # CT_CTYPE1
146 "upper" => 0x0001,
147 "lower" => 0x0002,
148 "digit" => 0x0004,
149 "space" => 0x0008,
150 "punct" => 0x0010,
151 "cntrl" => 0x0020,
152 "blank" => 0x0040,
153 "xdigit" => 0x0080,
154 "alpha" => 0x0100 | 0x80000000,
155 "defin" => 0x0200,
156 # CT_CTYPE3 in high 16 bits
157 "nonspacing" => 0x00010000,
158 "diacritic" => 0x00020000,
159 "vowelmark" => 0x00040000,
160 "symbol" => 0x00080000,
161 "katakana" => 0x00100000,
162 "hiragana" => 0x00200000,
163 "halfwidth" => 0x00400000,
164 "fullwidth" => 0x00800000,
165 "ideograph" => 0x01000000,
166 "kashida" => 0x02000000,
167 "lexical" => 0x04000000,
168 "highsurrogate" => 0x08000000,
169 "lowsurrogate" => 0x10000000,
172 my %bracket_types =
174 "o" => 0x0000,
175 "c" => 0x0001,
178 my %indic_types =
180 "Other" => 0x0000,
181 "Bindu" => 0x0001,
182 "Visarga" => 0x0002,
183 "Avagraha" => 0x0003,
184 "Nukta" => 0x0004,
185 "Virama" => 0x0005,
186 "Vowel_Independent" => 0x0006,
187 "Vowel_Dependent" => 0x0007,
188 "Vowel" => 0x0008,
189 "Consonant_Placeholder" => 0x0009,
190 "Consonant" => 0x000a,
191 "Consonant_Dead" => 0x000b,
192 "Consonant_Succeeding_Repha" => 0x000c,
193 "Consonant_Subjoined" => 0x000d,
194 "Consonant_Medial" => 0x000e,
195 "Consonant_Final" => 0x000f,
196 "Consonant_Head_Letter" => 0x0010,
197 "Modifying_Letter" => 0x0011,
198 "Tone_Letter" => 0x0012,
199 "Tone_Mark" => 0x0013,
200 "Register_Shifter" => 0x0014,
201 "Consonant_Preceding_Repha" => 0x0015,
202 "Pure_Killer" => 0x0016,
203 "Invisible_Stacker" => 0x0017,
204 "Gemination_Mark" => 0x0018,
205 "Cantillation_Mark" => 0x0019,
206 "Non_Joiner" => 0x001a,
207 "Joiner" => 0x001b,
208 "Number_Joiner" => 0x001c,
209 "Number" => 0x001d,
210 "Brahmi_Joining_Number" => 0x001e,
211 "Consonant_With_Stacker" => 0x001f,
212 "Consonant_Prefixed" => 0x0020,
213 "Syllable_Modifier" => 0x0021,
214 "Consonant_Killer" => 0x0022,
215 "Consonant_Initial_Postfixed" => 0x0023,
218 my %matra_types =
220 "Right" => 0x01,
221 "Left" => 0x02,
222 "Visual_Order_Left" => 0x03,
223 "Left_And_Right" => 0x04,
224 "Top" => 0x05,
225 "Bottom" => 0x06,
226 "Top_And_Bottom" => 0x07,
227 "Top_And_Right" => 0x08,
228 "Top_And_Left" => 0x09,
229 "Top_And_Left_And_Right" => 0x0a,
230 "Bottom_And_Right" => 0x0b,
231 "Top_And_Bottom_And_Right" => 0x0c,
232 "Overstruck" => 0x0d,
233 "Invisible" => 0x0e,
234 "Bottom_And_Left" => 0x0f,
235 "Top_And_Bottom_And_Left" => 0x10,
238 my %break_types =
240 "BK" => 0x0001,
241 "CR" => 0x0002,
242 "LF" => 0x0003,
243 "CM" => 0x0004,
244 "SG" => 0x0005,
245 "GL" => 0x0006,
246 "CB" => 0x0007,
247 "SP" => 0x0008,
248 "ZW" => 0x0009,
249 "NL" => 0x000a,
250 "WJ" => 0x000b,
251 "JL" => 0x000c,
252 "JV" => 0x000d,
253 "JT" => 0x000e,
254 "H2" => 0x000f,
255 "H3" => 0x0010,
256 "XX" => 0x0011,
257 "OP" => 0x0012,
258 "CL" => 0x0013,
259 "CP" => 0x0014,
260 "QU" => 0x0015,
261 "NS" => 0x0016,
262 "EX" => 0x0017,
263 "SY" => 0x0018,
264 "IS" => 0x0019,
265 "PR" => 0x001a,
266 "PO" => 0x001b,
267 "NU" => 0x001c,
268 "AL" => 0x001d,
269 "ID" => 0x001e,
270 "IN" => 0x001f,
271 "HY" => 0x0020,
272 "BB" => 0x0021,
273 "BA" => 0x0022,
274 "SA" => 0x0023,
275 "AI" => 0x0024,
276 "B2" => 0x0025,
277 "HL" => 0x0026,
278 "CJ" => 0x0027,
279 "RI" => 0x0028,
280 "EB" => 0x0029,
281 "EM" => 0x002a,
282 "ZWJ" => 0x002b,
283 "AK" => 0x002c,
284 "AP" => 0x002d,
285 "AS" => 0x002e,
286 "VF" => 0x002f,
287 "VI" => 0x0030,
290 my %vertical_types =
292 "R" => 0x0000,
293 "U" => 0x0001,
294 "Tr" => 0x0002,
295 "Tu" => 0x0003,
298 my %categories =
300 "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
301 "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
302 "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"}, # Letter, Titlecase
303 "Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
304 "Mc" => $ctype{"defin"}, # Mark, Spacing Combining
305 "Me" => $ctype{"defin"}, # Mark, Enclosing
306 "Nd" => $ctype{"defin"}|$ctype{"digit"}, # Number, Decimal Digit
307 "Nl" => $ctype{"defin"}|$ctype{"alpha"}, # Number, Letter
308 "No" => $ctype{"defin"}, # Number, Other
309 "Zs" => $ctype{"defin"}|$ctype{"space"}, # Separator, Space
310 "Zl" => $ctype{"defin"}|$ctype{"space"}, # Separator, Line
311 "Zp" => $ctype{"defin"}|$ctype{"space"}, # Separator, Paragraph
312 "Cc" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Control
313 "Cf" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Format
314 "Cs" => $ctype{"defin"}, # Other, Surrogate
315 "Co" => $ctype{"defin"}, # Other, Private Use
316 "Cn" => $ctype{"defin"}, # Other, Not Assigned
317 "Lm" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Modifier
318 "Lo" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Other
319 "Pc" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Connector
320 "Pd" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Dash
321 "Ps" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Open
322 "Pe" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Close
323 "Pi" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Initial quote
324 "Pf" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Final quote
325 "Po" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Other
326 "Sm" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Math
327 "Sc" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Currency
328 "Sk" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Modifier
329 "So" => $ctype{"defin"}|$ctype{"symbol"} # Symbol, Other
332 # a few characters need additional categories that cannot be determined automatically
333 my %special_categories =
335 "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
336 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
337 "space" => [ 0x09..0x0d, 0x85 ],
338 "blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
339 "cntrl" => [ 0x070f, 0x200c, 0x200d,
340 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
341 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
342 0xfff9, 0xfffa, 0xfffb ],
343 "punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
344 0xd7, 0xf7 ],
345 "digit" => [ 0xb2, 0xb3, 0xb9 ],
346 "lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
347 "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
348 0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
349 "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
350 "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
351 0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
352 0x02b9..0x02ba, 0x02c6..0x02cf ],
353 "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
354 "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
355 0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
356 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
357 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
358 0x3131..0x3164 ],
359 "ideograph" => [ 0x3006..0x3007 ],
360 "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
361 0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
362 0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
363 0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
364 0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
365 0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
366 0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
367 0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
368 0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
369 0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
370 "kashida" => [ 0x0640 ],
373 my %directions =
375 "L" => 1, # Left-to-Right
376 "R" => 2, # Right-to-Left
377 "AL" => 12, # Right-to-Left Arabic
378 "EN" => 3, # European Number
379 "ES" => 4, # European Number Separator
380 "ET" => 5, # European Number Terminator
381 "AN" => 6, # Arabic Number
382 "CS" => 7, # Common Number Separator
383 "NSM" => 13, # Non-Spacing Mark
384 "BN" => 14, # Boundary Neutral
385 "B" => 8, # Paragraph Separator
386 "S" => 9, # Segment Separator
387 "WS" => 10, # Whitespace
388 "ON" => 11, # Other Neutrals
389 "LRE" => 15, # Left-to-Right Embedding
390 "LRO" => 15, # Left-to-Right Override
391 "RLE" => 15, # Right-to-Left Embedding
392 "RLO" => 15, # Right-to-Left Override
393 "PDF" => 15, # Pop Directional Format
394 "LRI" => 15, # Left-to-Right Isolate
395 "RLI" => 15, # Right-to-Left Isolate
396 "FSI" => 15, # First Strong Isolate
397 "PDI" => 15 # Pop Directional Isolate
400 my %c2_types =
402 "L" => 1, # C2_LEFTTORIGHT
403 "R" => 2, # C2_RIGHTTOLEFT
404 "AL" => 2, # C2_RIGHTTOLEFT
405 "EN" => 3, # C2_EUROPENUMBER
406 "ES" => 4, # C2_EUROPESEPARATOR
407 "ET" => 5, # C2_EUROPETERMINATOR
408 "AN" => 6, # C2_ARABICNUMBER
409 "CS" => 7, # C2_COMMONSEPARATOR
410 "NSM" => 11, # C2_OTHERNEUTRAL
411 "BN" => 0, # C2_NOTAPPLICABLE
412 "B" => 8, # C2_BLOCKSEPARATOR
413 "S" => 9, # C2_SEGMENTSEPARATOR
414 "WS" => 10, # C2_WHITESPACE
415 "ON" => 11, # C2_OTHERNEUTRAL
416 "LRE" => 11, # C2_OTHERNEUTRAL
417 "LRO" => 11, # C2_OTHERNEUTRAL
418 "RLE" => 11, # C2_OTHERNEUTRAL
419 "RLO" => 11, # C2_OTHERNEUTRAL
420 "PDF" => 11, # C2_OTHERNEUTRAL
421 "LRI" => 11, # C2_OTHERNEUTRAL
422 "RLI" => 11, # C2_OTHERNEUTRAL
423 "FSI" => 11, # C2_OTHERNEUTRAL
424 "PDI" => 11 # C2_OTHERNEUTRAL
427 my %bidi_types =
429 "ON" => 0, # Other Neutrals
430 "L" => 1, # Left-to-Right
431 "R" => 2, # Right-to-Left
432 "AN" => 3, # Arabic Number
433 "EN" => 4, # European Number
434 "AL" => 5, # Right-to-Left Arabic
435 "NSM" => 6, # Non-Spacing Mark
436 "CS" => 7, # Common Number Separator
437 "ES" => 8, # European Number Separator
438 "ET" => 9, # European Number Terminator
439 "BN" => 10, # Boundary Neutral
440 "S" => 11, # Segment Separator
441 "WS" => 12, # Whitespace
442 "B" => 13, # Paragraph Separator
443 "RLO" => 14, # Right-to-Left Override
444 "RLE" => 15, # Right-to-Left Embedding
445 "LRO" => 16, # Left-to-Right Override
446 "LRE" => 17, # Left-to-Right Embedding
447 "PDF" => 18, # Pop Directional Format
448 "LRI" => 19, # Left-to-Right Isolate
449 "RLI" => 20, # Right-to-Left Isolate
450 "FSI" => 21, # First Strong Isolate
451 "PDI" => 22 # Pop Directional Isolate
454 my %joining_types =
456 "U" => 0, # Non_Joining
457 "L" => 1, # Left_Joining
458 "R" => 2, # Right_Joining
459 "D" => 3, # Dual_Joining
460 "C" => 3, # Join_Causing
461 "ALAPH" => 4, # Syriac ALAPH
462 "DALATH RISH" => 5, # Syriac DALATH RISH group
463 "T" => 6, # Transparent
466 my @locales =
468 { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
469 { name => "aa", sopentypelang => "AFR" },
470 { name => "aa-DJ" },
471 { name => "aa-ER" },
472 { name => "aa-ET" },
473 { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
474 { name => "af-NA" },
475 { name => "af-ZA", lcid => 0x00000436 },
476 { name => "agq" },
477 { name => "agq-CM" },
478 { name => "ak", sopentypelang => "TWI" },
479 { name => "ak-GH" },
480 { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
481 { name => "am-ET", lcid => 0x0000045e },
482 { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
483 { name => "ar-001" },
484 { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
485 { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
486 { name => "ar-DJ" },
487 { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG", nativedigits => "0123456789" },
488 { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
489 { name => "ar-EH" },
490 { name => "ar-ER" },
491 { name => "ar-IL" },
492 { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
493 { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
494 { name => "ar-KM" },
495 { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
496 { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
497 { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL", nativedigits => "0123456789" },
498 { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM", nativedigits => "0123456789" },
499 { name => "ar-MR" },
500 { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
501 { name => "ar-PS" },
502 { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
503 { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
504 { name => "ar-SD" },
505 { name => "ar-SO" },
506 { name => "ar-SS" },
507 { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
508 { name => "ar-TD" },
509 { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART", nativedigits => "0123456789" },
510 { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
511 { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", sabbrevlangname => "MPD", sopentypelang => "MAP" },
512 { name => "arn-CL", lcid => 0x0000047a },
513 { name => "arn-Latn", alias => "arn" },
514 { name => "arn-Latn-CL", alias => "arn-CL" },
515 { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
516 { name => "as-IN", lcid => 0x0000044d },
517 { name => "asa" },
518 { name => "asa-TZ" },
519 { name => "ast" },
520 { name => "ast-ES" },
521 { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
522 { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
523 { name => "az-Cyrl-AZ", lcid => 0x0000082c },
524 { name => "az-Latn", lcid => 0x0000782c },
525 { name => "az-Latn-AZ", lcid => 0x0000042c },
526 { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, sabbrevlangname => "BAS", sopentypelang => "BSH" },
527 { name => "ba-Cyrl", alias => "ba" },
528 { name => "ba-Cyrl-RU", alias => "ba-RU" },
529 { name => "ba-RU", lcid => 0x0000046d },
530 { name => "bas" },
531 { name => "bas-CM" },
532 { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
533 { name => "be-BY", lcid => 0x00000423 },
534 { name => "bem" },
535 { name => "bem-ZM" },
536 { name => "bew" },
537 { name => "bew-ID" },
538 { name => "bez" },
539 { name => "bez-TZ" },
540 { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
541 { name => "bg-BG", lcid => 0x00000402 },
542 { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
543 { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
544 { name => "blo" },
545 { name => "blo-BJ" },
546 { name => "bm", sopentypelang => "BMB" },
547 { name => "bm-Latn", file => "bm" },
548 { name => "bm-Latn-ML", file => "bm_ML" },
549 { name => "bm-ML", alias => "bm-Latn-ML" },
550 { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
551 { name => "bn-BD", lcid => 0x00000845 },
552 { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
553 { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
554 { name => "bo-CN", lcid => 0x00000451 },
555 { name => "bo-IN", slist => "," },
556 { name => "bo-Tibt", alias => "bo" },
557 { name => "bo-Tibt-CN", alias => "bo-CN" },
558 { name => "bo-Tibt-IN", alias => "bo-IN" },
559 { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
560 { name => "br-FR", lcid => 0x0000047e },
561 { name => "br-Latn", alias => "br" },
562 { name => "br-Latn-FR", alias => "br-FR" },
563 { name => "brx" },
564 { name => "brx-IN" },
565 { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
566 { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
567 { name => "bs-Cyrl-BA", lcid => 0x0000201a },
568 { name => "bs-Latn", lcid => 0x0000681a },
569 { name => "bs-Latn-BA", lcid => 0x0000141a },
570 { name => "byn", sopentypelang => "BIL" },
571 { name => "byn-ER" },
572 { name => "ca", lcid => 0x00000003, oemcp => 850 },
573 { name => "ca-AD", maccp => 65001 },
574 { name => "ca-ES", lcid => 0x00000403 },
575 { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
576 { name => "ca-FR", maccp => 65001 },
577 { name => "ca-IT", maccp => 65001 },
578 { name => "ccp" },
579 { name => "ccp-BD", alias => "ccp-Cakm-BD" },
580 { name => "ccp-Cakm", file => "ccp" },
581 { name => "ccp-Cakm-BD", file => "ccp_BD" },
582 { name => "ccp-Cakm-IN", file => "ccp_IN" },
583 { name => "ccp-IN", alias => "ccp-Cakm-IN" },
584 { name => "ce" },
585 { name => "ce-RU" },
586 { name => "ceb" },
587 { name => "ceb-Latn", file => "ceb" },
588 { name => "ceb-Latn-PH", file => "ceb_PH" },
589 { name => "ceb-PH", alias => "ceb-Latn-PH" },
590 { name => "cgg" },
591 { name => "cgg-UG" },
592 { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
593 { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
594 { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
595 { name => "chr-US", alias => "chr-Cher-US" },
596 { name => "ckb", alias => "ku" },
597 { name => "ckb-IQ", alias => "ku-Arab-IQ" },
598 { name => "ckb-IR", alias => "ku-Arab-IR" },
599 { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297 },
600 { name => "co-FR", lcid => 0x00000483 },
601 { name => "co-Latn", alias => "co" },
602 { name => "co-Latn-FR", alias => "co-FR" },
603 { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
604 { name => "cs-CZ", lcid => 0x00000405 },
605 { name => "csw" },
606 { name => "csw-CA" },
607 { name => "cu", sopentypelang => "CSL" },
608 { name => "cu-RU" },
609 { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
610 { name => "cy-GB", lcid => 0x00000452 },
611 { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
612 { name => "da-DK", lcid => 0x00000406 },
613 { name => "da-GL", maccp => 65001 },
614 { name => "dav" },
615 { name => "dav-KE" },
616 { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
617 { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
618 { name => "de-BE" },
619 { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
620 { name => "de-DE", lcid => 0x00000407 },
621 { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
622 { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
623 { name => "de-IT", oemcp => 65001 },
624 { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
625 { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
626 { name => "dje", sopentypelang => "DJR" },
627 { name => "dje-NE" },
628 { name => "doi", sopentypelang => "DGR" },
629 { name => "doi-IN", alias => "doi-Deva-IN" },
630 { name => "doi-Deva", file => "doi" },
631 { name => "doi-Deva-IN", file => "doi_IN" },
632 { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
633 { name => "dsb-DE", lcid => 0x0000082e },
634 { name => "dua" },
635 { name => "dua-CM" },
636 { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, nativedigits => "0123456789" },
637 { name => "dv-MV", lcid => 0x00000465 },
638 { name => "dyo" },
639 { name => "dyo-SN" },
640 { name => "dz", sopentypelang => "DZN" },
641 { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
642 { name => "ebu" },
643 { name => "ebu-KE" },
644 { name => "ee" },
645 { name => "ee-GH" },
646 { name => "ee-TG" },
647 { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
648 { name => "el-CY" },
649 { name => "el-GR", lcid => 0x00000408 },
650 { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
651 { name => "en-001", oemcp => 850 },
652 { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
653 { name => "en-150", oemcp => 65001 },
654 { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
655 { name => "en-AG", oemcp => 850 },
656 { name => "en-AI", oemcp => 850 },
657 { name => "en-AS", oemcp => 850 },
658 { name => "en-AT", oemcp => 65001 },
659 { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
660 { name => "en-BB", oemcp => 850 },
661 { name => "en-BE", oemcp => 850 },
662 { name => "en-BI", oemcp => 65001 },
663 { name => "en-BM", oemcp => 850 },
664 { name => "en-BS", oemcp => 850 },
665 { name => "en-BW", oemcp => 850 },
666 { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
667 { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
668 { name => "en-CC", oemcp => 850 },
669 { name => "en-CH", oemcp => 65001 },
670 { name => "en-CK", oemcp => 850 },
671 { name => "en-CM", oemcp => 850 },
672 { name => "en-CX", oemcp => 850 },
673 { name => "en-CY", oemcp => 65001 },
674 { name => "en-DE", oemcp => 65001 },
675 { name => "en-DG", oemcp => 850 },
676 { name => "en-DK", oemcp => 65001 },
677 { name => "en-DM", oemcp => 850 },
678 { name => "en-ER", oemcp => 850 },
679 { name => "en-FI", oemcp => 65001 },
680 { name => "en-FJ", oemcp => 850 },
681 { name => "en-FK", oemcp => 850 },
682 { name => "en-FM", oemcp => 850 },
683 { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
684 { name => "en-GD", oemcp => 850 },
685 { name => "en-GG", oemcp => 850 },
686 { name => "en-GH", oemcp => 850 },
687 { name => "en-GI", oemcp => 850 },
688 { name => "en-GM", oemcp => 850 },
689 { name => "en-GU", oemcp => 850 },
690 { name => "en-GY", oemcp => 850 },
691 { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
692 { name => "en-ID", lcid => 0x00003809, oemcp => 850, sabbrevlangname => "ZZZ" },
693 { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
694 { name => "en-IL", oemcp => 65001 },
695 { name => "en-IM", oemcp => 850 },
696 { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
697 { name => "en-IO", oemcp => 850 },
698 { name => "en-JE", oemcp => 850 },
699 { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
700 { name => "en-KE", oemcp => 850 },
701 { name => "en-KI", oemcp => 850 },
702 { name => "en-KN", oemcp => 850 },
703 { name => "en-KY", oemcp => 850 },
704 { name => "en-LC", oemcp => 850 },
705 { name => "en-LR", oemcp => 850 },
706 { name => "en-LS", oemcp => 850 },
707 { name => "en-MG", oemcp => 850 },
708 { name => "en-MH", oemcp => 850 },
709 { name => "en-MO", oemcp => 850 },
710 { name => "en-MP", oemcp => 850 },
711 { name => "en-MS", oemcp => 850 },
712 { name => "en-MT", oemcp => 850 },
713 { name => "en-MU", oemcp => 850 },
714 { name => "en-MW", oemcp => 850 },
715 { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
716 { name => "en-NA", oemcp => 850 },
717 { name => "en-NF", oemcp => 850 },
718 { name => "en-NG", oemcp => 850 },
719 { name => "en-NL", oemcp => 65001 },
720 { name => "en-NR", oemcp => 850 },
721 { name => "en-NU", oemcp => 850 },
722 { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
723 { name => "en-PG", oemcp => 850 },
724 { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
725 { name => "en-PK", oemcp => 850 },
726 { name => "en-PN", oemcp => 850 },
727 { name => "en-PR", oemcp => 850 },
728 { name => "en-PW", oemcp => 850 },
729 { name => "en-RW", oemcp => 850 },
730 { name => "en-SB", oemcp => 850 },
731 { name => "en-SC", oemcp => 850 },
732 { name => "en-SD", oemcp => 850 },
733 { name => "en-SE", oemcp => 65001 },
734 { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
735 { name => "en-SH", oemcp => 850 },
736 { name => "en-SI", oemcp => 65001 },
737 { name => "en-SL", oemcp => 850 },
738 { name => "en-SS", oemcp => 850 },
739 { name => "en-SX", oemcp => 850 },
740 { name => "en-SZ", oemcp => 850 },
741 { name => "en-TC", oemcp => 850 },
742 { name => "en-TK", oemcp => 850 },
743 { name => "en-TO", oemcp => 850 },
744 { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
745 { name => "en-TV", oemcp => 850 },
746 { name => "en-TZ", oemcp => 850 },
747 { name => "en-UG", oemcp => 850 },
748 { name => "en-UM", oemcp => 850 },
749 { name => "en-US", lcid => 0x00000409 },
750 { name => "en-VC", oemcp => 850 },
751 { name => "en-VG", oemcp => 850 },
752 { name => "en-VI", oemcp => 850 },
753 { name => "en-VU", oemcp => 850 },
754 { name => "en-WS", oemcp => 850 },
755 { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
756 { name => "en-ZM", oemcp => 850 },
757 { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
758 { name => "eo", sopentypelang => "NTO" },
759 { name => "eo-001" },
760 { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
761 { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
762 { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
763 { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
764 { name => "es-BR", oemcp => 65001 },
765 { name => "es-BZ", oemcp => 65001 },
766 { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
767 { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
768 { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
769 { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
770 { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
771 { name => "es-EA" },
772 { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
773 { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
774 { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
775 { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
776 { name => "es-GQ" },
777 { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
778 { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
779 { name => "es-IC" },
780 { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
781 { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
782 { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
783 { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
784 { name => "es-PH" },
785 { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
786 { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
787 { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
788 { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
789 { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
790 { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
791 { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
792 { name => "et-EE", lcid => 0x00000425 },
793 { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
794 { name => "eu-ES", lcid => 0x0000042d },
795 { name => "ewo" },
796 { name => "ewo-CM" },
797 { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
798 { name => "fa-AF", alias => "prs-AF" },
799 { name => "fa-IR", lcid => 0x00000429 },
800 { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
801 { name => "ff-CM", alias => "ff-Latn-CM" },
802 { name => "ff-GN", alias => "ff-Latn-GN" },
803 { name => "ff-MR", alias => "ff-Latn-MR" },
804 { name => "ff-NG", alias => "ff-Latn-NG" },
805 { name => "ff-SN", alias => "ff-Latn-SN" },
806 { name => "ff-Adlm", oemcp => 65001 },
807 { name => "ff-Adlm-BF" },
808 { name => "ff-Adlm-CM" },
809 { name => "ff-Adlm-GH" },
810 { name => "ff-Adlm-GM" },
811 { name => "ff-Adlm-GN" },
812 { name => "ff-Adlm-GW" },
813 { name => "ff-Adlm-LR" },
814 { name => "ff-Adlm-MR" },
815 { name => "ff-Adlm-NE" },
816 { name => "ff-Adlm-NG" },
817 { name => "ff-Adlm-SL" },
818 { name => "ff-Adlm-SN" },
819 { name => "ff-Latn", lcid => 0x00007c67 },
820 { name => "ff-Latn-BF", oemcp => 65001 },
821 { name => "ff-Latn-CM" },
822 { name => "ff-Latn-GH", oemcp => 65001 },
823 { name => "ff-Latn-GM", oemcp => 65001 },
824 { name => "ff-Latn-GN" },
825 { name => "ff-Latn-GW", oemcp => 65001 },
826 { name => "ff-Latn-LR", oemcp => 65001 },
827 { name => "ff-Latn-MR" },
828 { name => "ff-Latn-NE", oemcp => 65001 },
829 { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
830 { name => "ff-Latn-SL", oemcp => 65001 },
831 { name => "ff-Latn-SN", lcid => 0x00000867 },
832 { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
833 { name => "fi-FI", lcid => 0x0000040b },
834 { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
835 { name => "fil-PH", lcid => 0x00000464 },
836 { name => "fil-Latn", alias => "fil" },
837 { name => "fil-Latn-PH", alias => "fil-PH" },
838 { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
839 { name => "fo-DK", oemcp => 65001, maccp => 65001 },
840 { name => "fo-FO", lcid => 0x00000438 },
841 { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
842 { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
843 { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
844 { name => "fr-BF" },
845 { name => "fr-BI" },
846 { name => "fr-BJ" },
847 { name => "fr-BL" },
848 { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
849 { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
850 { name => "fr-CF" },
851 { name => "fr-CG" },
852 { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
853 { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
854 { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
855 { name => "fr-DJ" },
856 { name => "fr-DZ" },
857 { name => "fr-FR", lcid => 0x0000040c },
858 { name => "fr-GA" },
859 { name => "fr-GF" },
860 { name => "fr-GN" },
861 { name => "fr-GP" },
862 { name => "fr-GQ" },
863 { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
864 { name => "fr-KM" },
865 { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
866 { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
867 { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
868 { name => "fr-MF" },
869 { name => "fr-MG" },
870 { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
871 { name => "fr-MQ" },
872 { name => "fr-MR" },
873 { name => "fr-MU" },
874 { name => "fr-NC" },
875 { name => "fr-NE" },
876 { name => "fr-PF" },
877 { name => "fr-PM" },
878 { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
879 { name => "fr-RW" },
880 { name => "fr-SC" },
881 { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
882 { name => "fr-SY" },
883 { name => "fr-TD" },
884 { name => "fr-TG" },
885 { name => "fr-TN" },
886 { name => "fr-VU" },
887 { name => "fr-WF" },
888 { name => "fr-YT" },
889 { name => "fur", sopentypelang => "FRL" },
890 { name => "fur-IT" },
891 { name => "fuv-NG", alias => "ff-Latn-NG" },
892 { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
893 { name => "fy-NL", lcid => 0x00000462 },
894 { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
895 { name => "ga-GB", oemcp => 65001 },
896 { name => "ga-IE", lcid => 0x0000083c },
897 { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
898 { name => "gd-GB", lcid => 0x00000491 },
899 { name => "gd-Latn", alias => "gd" },
900 { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
901 { name => "gl-ES", lcid => 0x00000456 },
902 { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", sopentypelang => "GUA" },
903 { name => "gn-PY", lcid => 0x00000474 },
904 { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
905 { name => "gsw-CH" },
906 { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
907 { name => "gsw-LI" },
908 { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
909 { name => "gu-IN", lcid => 0x00000447 },
910 { name => "guz" },
911 { name => "guz-KE" },
912 { name => "gv", sopentypelang => "MNX" },
913 { name => "gv-GB", file => "gv" },
914 { name => "gv-IM" },
915 { name => "ha", lcid => 0x00000068, oemcp => 437 },
916 { name => "ha-GH", alias => "ha-Latn-GH" },
917 { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
918 { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
919 { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
920 { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
921 { name => "ha-NE", alias => "ha-Latn-NE" },
922 { name => "ha-NG", alias => "ha-Latn-NG" },
923 { name => "haw", lcid => 0x00000075, oemcp => 437 },
924 { name => "haw-Latn", alias => "haw" },
925 { name => "haw-Latn-US", alias => "haw-US" },
926 { name => "haw-US", lcid => 0x00000475 },
927 { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
928 { name => "he-IL", lcid => 0x0000040d },
929 { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
930 { name => "hi-IN", lcid => 0x00000439 },
931 { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
932 { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
933 { name => "hr-HR", lcid => 0x0000041a },
934 { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
935 { name => "hsb-DE", lcid => 0x0000042e },
936 { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
937 { name => "hu-HU", lcid => 0x0000040e },
938 { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
939 { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
940 { name => "hy-AM", lcid => 0x0000042b },
941 { name => "ia" },
942 { name => "ia-001" },
943 ## name => "ibb", lcid => 0x00000069 },
944 ## name => "ibb-NG", lcid => 0x00000469 },
945 { name => "id", lcid => 0x00000021, oemcp => 850 },
946 { name => "id-ID", lcid => 0x00000421 },
947 { name => "ie" },
948 { name => "ie-EE" },
949 { name => "ig", lcid => 0x00000070, oemcp => 437 },
950 { name => "ig-Latn", alias => "ig" },
951 { name => "ig-Latn-NG", alias => "ig-NG" },
952 { name => "ig-NG", lcid => 0x00000470 },
953 { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
954 { name => "ii-CN", lcid => 0x00000478 },
955 { name => "ii-Yiii", alias => "ii" },
956 { name => "ii-Yiii-CN", alias => "ii-CN" },
957 { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
958 { name => "is-IS", lcid => 0x0000040f },
959 { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
960 { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
961 { name => "it-IT", lcid => 0x00000410 },
962 { name => "it-SM" },
963 { name => "it-VA", oemcp => 65001 },
964 { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", sabbrevlangname => "IUK", sopentypelang => "INU" },
965 { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, sabbrevlangname => "IUS" },
966 { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA" },
967 { name => "iu-Latn", lcid => 0x00007c5d },
968 { name => "iu-Latn-CA", lcid => 0x0000085d },
969 { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
970 { name => "ja-JP", lcid => 0x00000411 },
971 { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
972 { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
973 { name => "jgo" },
974 { name => "jgo-CM" },
975 { name => "jmc" },
976 { name => "jmc-TZ" },
977 { name => "jv", oemcp => 850, nativedigits => "0123456789" },
978 { name => "jv-ID", alias => "jv-Latn-ID" },
979 ## name => "jv-Java" },
980 ## name => "jv-Java-ID" },
981 { name => "jv-Latn", file => "jv" },
982 { name => "jv-Latn-ID", file => "jv_ID" },
983 { name => "ka", lcid => 0x00000037, group => 16 },
984 { name => "ka-GE", lcid => 0x00000437 },
985 { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
986 { name => "kab", sopentypelang => "KAB0" },
987 { name => "kab-DZ" },
988 { name => "kam", sopentypelang => "KMB" },
989 { name => "kam-KE" },
990 { name => "kde" },
991 { name => "kde-TZ" },
992 { name => "kea" },
993 { name => "kea-CV" },
994 { name => "kgp" },
995 { name => "kgp-BR" },
996 { name => "khq" },
997 { name => "khq-ML" },
998 { name => "ki" },
999 { name => "ki-KE" },
1000 { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
1001 { name => "kk-Cyrl", alias => "kk" },
1002 { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
1003 { name => "kk-KZ", lcid => 0x0000043f },
1004 { name => "kkj" },
1005 { name => "kkj-CM" },
1006 { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
1007 { name => "kl-GL", lcid => 0x0000046f },
1008 { name => "kln", sopentypelang => "KAL" },
1009 { name => "kln-KE" },
1010 { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
1011 { name => "km-KH", lcid => 0x00000453 },
1012 { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
1013 { name => "kn-IN", lcid => 0x0000044b },
1014 { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
1015 { name => "ko-KP", oemcp => 65001 },
1016 { name => "ko-KR", lcid => 0x00000412 },
1017 { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
1018 { name => "kok-IN", lcid => 0x00000457 },
1019 { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
1020 { name => "kr-Latn", file => "kr", dir => "exemplars" },
1021 { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
1022 { name => "kr-NG", alias => "kr-Latn-NG" },
1023 { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
1024 { name => "ks-Arab", lcid => 0x00000460 },
1025 { name => "ks-Arab-IN" },
1026 { name => "ks-Deva", slist => "," },
1027 { name => "ks-Deva-IN", lcid => 0x00000860 },
1028 { name => "ks-IN", alias => "ks-Arab-IN" },
1029 { name => "ksb" },
1030 { name => "ksb-TZ" },
1031 { name => "ksf" },
1032 { name => "ksf-CM" },
1033 { name => "ksh", sopentypelang => "KSH0" },
1034 { name => "ksh-DE" },
1035 { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
1036 { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
1037 { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
1038 { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
1039 { name => "kw" },
1040 { name => "kw-GB" },
1041 { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
1042 { name => "ky-Cyrl", alias => "ky" },
1043 { name => "ky-Cyrl-KG", alias => "ky-KG" },
1044 { name => "ky-KG", lcid => 0x00000440 },
1045 { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", sabbrevlangname => "ZZZ" },
1046 { name => "la-VA", lcid => 0x00000476 },
1047 { name => "la-001", alias => "la-VA" },
1048 { name => "lag" },
1049 { name => "lag-TZ" },
1050 { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
1051 { name => "lb-LU", lcid => 0x0000046e },
1052 { name => "lg" },
1053 { name => "lg-UG" },
1054 { name => "lkt" },
1055 { name => "lkt-US" },
1056 { name => "ln" },
1057 { name => "ln-AO" },
1058 { name => "ln-CD" },
1059 { name => "ln-CF" },
1060 { name => "ln-CG" },
1061 { name => "lo", lcid => 0x00000054, group => 15 },
1062 { name => "lo-LA", lcid => 0x00000454 },
1063 { name => "lrc" },
1064 { name => "lrc-IQ" },
1065 { name => "lrc-IR" },
1066 { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
1067 { name => "lt-LT", lcid => 0x00000427 },
1068 { name => "lu" },
1069 { name => "lu-CD" },
1070 { name => "luo" },
1071 { name => "luo-KE" },
1072 { name => "luy", sopentypelang => "LUH" },
1073 { name => "luy-KE" },
1074 { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
1075 { name => "lv-LV", lcid => 0x00000426 },
1076 { name => "mai" },
1077 { name => "mai-IN" },
1078 { name => "mas" },
1079 { name => "mas-KE" },
1080 { name => "mas-TZ" },
1081 { name => "mer" },
1082 { name => "mer-KE" },
1083 { name => "mfe" },
1084 { name => "mfe-MU" },
1085 { name => "mg" },
1086 { name => "mg-MG" },
1087 { name => "mgh" },
1088 { name => "mgh-MZ" },
1089 { name => "mgo" },
1090 { name => "mgo-CM" },
1091 { name => "mi", lcid => 0x00000081, slist => "," },
1092 { name => "mi-Latn", alias => "mi" },
1093 { name => "mi-Latn-NZ", alias => "mi-NZ" },
1094 { name => "mi-NZ", lcid => 0x00000481 },
1095 { name => "mic" },
1096 { name => "mic-CA" },
1097 { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
1098 { name => "mk-MK", lcid => 0x0000042f },
1099 { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
1100 { name => "ml-IN", lcid => 0x0000044c },
1101 { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
1102 { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
1103 { name => "mn-Cyrl-MN", alias => "mn-MN" },
1104 { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
1105 { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, sabbrevlangname => "MNG", nativedigits => "0123456789" },
1106 { name => "mn-Mong-CN", lcid => 0x00000850 },
1107 { name => "mn-Mong-MN", lcid => 0x00000c50, sabbrevlangname => "MNM" },
1108 { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
1109 { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
1110 { name => "mni-Beng" },
1111 { name => "mni-Beng-IN", alias => "mni-IN" },
1112 { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", sabbrevlangname => "MWK" },
1113 { name => "moh-CA", lcid => 0x0000047c },
1114 { name => "moh-Latn", alias => "moh" },
1115 { name => "moh-Latn-CA", alias => "moh-CA" },
1116 { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
1117 { name => "mr-IN", lcid => 0x0000044e },
1118 { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
1119 { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
1120 { name => "ms-ID" },
1121 { name => "ms-Latn", alias => "ms" },
1122 { name => "ms-Latn-BN", alias => "ms-BN" },
1123 { name => "ms-Latn-MY", alias => "ms-MY" },
1124 { name => "ms-Latn-SG", alias => "ms-SG" },
1125 { name => "ms-MY", lcid => 0x0000043e },
1126 { name => "ms-SG" },
1127 { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
1128 { name => "mt-MT", lcid => 0x0000043a },
1129 { name => "mua" },
1130 { name => "mua-CM" },
1131 { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
1132 { name => "my-MM", lcid => 0x00000455 },
1133 { name => "mzn" },
1134 { name => "mzn-IR" },
1135 { name => "naq" },
1136 { name => "naq-NA" },
1137 { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
1138 { name => "nb-NO", lcid => 0x00000414 },
1139 { name => "nb-SJ" },
1140 { name => "nd", sopentypelang => "NDB" },
1141 { name => "nd-ZW" },
1142 { name => "nds" },
1143 { name => "nds-DE" },
1144 { name => "nds-NL" },
1145 { name => "ne", lcid => 0x00000061, slist => "," },
1146 { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
1147 { name => "ne-NP", lcid => 0x00000461, group => 15 },
1148 { name => "nl", lcid => 0x00000013, oemcp => 850 },
1149 { name => "nl-AW" },
1150 { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
1151 { name => "nl-BQ" },
1152 { name => "nl-CW" },
1153 { name => "nl-NL", lcid => 0x00000413 },
1154 { name => "nl-SR" },
1155 { name => "nl-SX" },
1156 { name => "nmg" },
1157 { name => "nmg-CM" },
1158 { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
1159 { name => "nn-NO", lcid => 0x00000814 },
1160 { name => "nnh" },
1161 { name => "nnh-CM" },
1162 { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
1163 { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", sopentypelang => "NKO" },
1164 { name => "nqo-GN" },
1165 { name => "nr", sopentypelang => "NDB" },
1166 { name => "nr-ZA" },
1167 { name => "nso", lcid => 0x0000006c, oemcp => 850, sopentypelang => "SOT" },
1168 { name => "nso-ZA", lcid => 0x0000046c },
1169 { name => "nus" },
1170 { name => "nus-SD", alias => "nus-SS" },
1171 { name => "nus-SS" },
1172 { name => "nyn", sopentypelang => "NKL" },
1173 { name => "nyn-UG" },
1174 { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297 },
1175 { name => "oc-FR", lcid => 0x00000482 },
1176 { name => "oc-Latn", alias => "oc" },
1177 { name => "oc-Latn-FR", alias => "oc-FR" },
1178 { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
1179 { name => "om-ET", lcid => 0x00000472 },
1180 { name => "om-KE" },
1181 { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
1182 { name => "or-IN", lcid => 0x00000448 },
1183 { name => "os" },
1184 { name => "os-GE" },
1185 { name => "os-RU" },
1186 { name => "pa", lcid => 0x00000046, slist => "," },
1187 { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
1188 { name => "pa-Arab-PK", lcid => 0x00000846 },
1189 { name => "pa-Guru" },
1190 { name => "pa-Guru-IN", alias => "pa-IN" },
1191 { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
1192 { name => "pap", lcid => 0x00000079, oemcp => 850, sopentypelang => "PAP0" },
1193 ## name => "pap-029", lcid => 0x00000479 },
1194 { name => "pcm" },
1195 { name => "pcm-NG", alias => "pcm-Latn-NG" },
1196 { name => "pcm-Latn", file => "pcm" },
1197 { name => "pcm-Latn-NG", file => "pcm_NG" },
1198 { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
1199 { name => "pl-PL", lcid => 0x00000415 },
1200 { name => "prg" },
1201 { name => "prg-001", file => "prg" },
1202 { name => "prg-PL" },
1203 { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
1204 { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
1205 { name => "prs-Arab", alias => "prs" },
1206 { name => "prs-Arab-AF", alias => "prs-AF" },
1207 { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
1208 { name => "ps-AF", lcid => 0x00000463 },
1209 { name => "ps-PK" },
1210 { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
1211 { name => "pt-AO" },
1212 { name => "pt-BR", lcid => 0x00000416 },
1213 { name => "pt-CH", oemcp => 65001 },
1214 { name => "pt-CV" },
1215 { name => "pt-GQ", oemcp => 65001 },
1216 { name => "pt-GW" },
1217 { name => "pt-LU", oemcp => 65001 },
1218 { name => "pt-MO" },
1219 { name => "pt-MZ" },
1220 { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
1221 { name => "pt-ST" },
1222 { name => "pt-TL" },
1223 ## name => qps-Latn-x-sh", lcid => 0x80000901 },
1224 ## name => qps-ploc", lcid => 0x80000501 },
1225 ## name => qps-ploca", lcid => 0x800005fe },
1226 ## name => qps-plocm", lcid => 0x800009ff },
1227 { name => "qu", alias => "quz" },
1228 { name => "qu-BO", alias => "quz-BO" },
1229 { name => "qu-EC", alias => "quz-EC" },
1230 { name => "qu-PE", alias => "quz-PE" },
1231 { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => "," },
1232 { name => "quc-Latn", lcid => 0x00007c86, file => "quc" },
1233 { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT" },
1234 { name => "qut", alias => "quc" },
1235 { name => "qut-GT", alias => "quc-Latn-GT" },
1236 { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
1237 { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
1238 { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
1239 { name => "quz-Latn", alias => "quz" },
1240 { name => "quz-Latn-BO", alias => "quz-BO" },
1241 { name => "quz-Latn-EC", alias => "quz-EC" },
1242 { name => "quz-Latn-PE", alias => "quz-PE" },
1243 { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
1244 { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
1245 { name => "rm-CH", lcid => 0x00000417 },
1246 { name => "rn" },
1247 { name => "rn-BI" },
1248 { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
1249 { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
1250 { name => "ro-RO", lcid => 0x00000418, group => 2 },
1251 { name => "rof" },
1252 { name => "rof-TZ" },
1253 { name => "ru", lcid => 0x00000019, oemcp => 866 },
1254 { name => "ru-BY", maccp => 65001 },
1255 { name => "ru-KG", maccp => 65001 },
1256 { name => "ru-KZ", maccp => 65001 },
1257 { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
1258 { name => "ru-RU", lcid => 0x00000419, group => 5 },
1259 { name => "ru-UA", maccp => 65001 },
1260 { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
1261 { name => "rw-RW", lcid => 0x00000487 },
1262 { name => "rwk" },
1263 { name => "rwk-TZ" },
1264 { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
1265 { name => "sa-Deva", alias => "sa" },
1266 { name => "sa-Deva-IN", alias => "sa-IN" },
1267 { name => "sa-IN", lcid => 0x0000044f },
1268 { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
1269 { name => "sah-Cyrl", alias => "sah" },
1270 { name => "sah-Cyrl-RU", alias => "sah-RU" },
1271 { name => "sah-RU", lcid => 0x00000485 },
1272 { name => "saq" },
1273 { name => "saq-KE" },
1274 { name => "sat" },
1275 { name => "sat-Olck" },
1276 { name => "sat-Olck-IN" },
1277 { name => "sbp" },
1278 { name => "sbp-TZ" },
1279 { name => "sc" },
1280 { name => "sc-IT" },
1281 { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
1282 { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
1283 { name => "sd-Arab-PK", lcid => 0x00000859 },
1284 { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
1285 { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
1286 { name => "sd-PK", alias => "sd-Arab-PK" },
1287 { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
1288 { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
1289 { name => "se-NO", lcid => 0x0000043b },
1290 { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
1291 { name => "se-Latn", alias => "se" },
1292 { name => "se-Latn-FI", alias => "se-FI" },
1293 { name => "se-Latn-NO", alias => "se-NO" },
1294 { name => "se-Latn-SE", alias => "se-SE" },
1295 { name => "seh" },
1296 { name => "seh-MZ" },
1297 { name => "ses" },
1298 { name => "ses-ML" },
1299 { name => "sg", sopentypelang => "SGO" },
1300 { name => "sg-CF" },
1301 { name => "shi" },
1302 { name => "shi-Latn" },
1303 { name => "shi-Latn-MA" },
1304 { name => "shi-Tfng" },
1305 { name => "shi-Tfng-MA" },
1306 { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
1307 { name => "si-LK", lcid => 0x0000045b },
1308 { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
1309 { name => "sk-SK", lcid => 0x0000041b },
1310 { name => "skr" },
1311 { name => "skr-PK" },
1312 { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
1313 { name => "sl-SI", lcid => 0x00000424 },
1314 { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMB", sopentypelang => "SSM" },
1315 { name => "sma-Latn", alias => "sma" },
1316 { name => "sma-Latn-NO", alias => "sma-NO" },
1317 { name => "sma-Latn-SE", alias => "sma-SE" },
1318 { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, sabbrevlangname => "SMA" },
1319 { name => "sma-SE", lcid => 0x00001c3b },
1320 { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMK", sopentypelang => "LSM" },
1321 { name => "smj-Latn", alias => "smj" },
1322 { name => "smj-Latn-NO", alias => "smj-NO" },
1323 { name => "smj-Latn-SE", alias => "smj-SE" },
1324 { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, sabbrevlangname => "SMJ" },
1325 { name => "smj-SE", lcid => 0x0000143b },
1326 { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
1327 { name => "smn-FI", lcid => 0x0000243b },
1328 { name => "smn-Latn", alias => "smn" },
1329 { name => "smn-Latn-FI", alias => "smn-FI" },
1330 { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, sopentypelang => "SKS" },
1331 { name => "sms-FI", lcid => 0x0000203b },
1332 { name => "sms-Latn", alias => "sms" },
1333 { name => "sms-Latn-FI", alias => "sms-FI" },
1334 { name => "sn", sopentypelang => "SNA0" },
1335 { name => "sn-Latn", file => "sn" },
1336 { name => "sn-Latn-ZW", file => "sn_ZW" },
1337 { name => "sn-ZW", alias => "sn-Latn-ZW" },
1338 { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
1339 { name => "so-DJ" },
1340 { name => "so-ET" },
1341 { name => "so-KE" },
1342 { name => "so-SO", lcid => 0x00000477 },
1343 { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
1344 { name => "sq-AL", lcid => 0x0000041c },
1345 { name => "sq-MK" },
1346 { name => "sq-XK" },
1347 { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
1348 { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
1349 { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
1350 { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
1351 { name => "sr-Cyrl-RS", lcid => 0x0000281a },
1352 { name => "sr-Cyrl-XK" },
1353 { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
1354 { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
1355 { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
1356 { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
1357 { name => "sr-Latn-XK" },
1358 ## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
1359 ## name => "sr-Latn-CS", lcid => 0x0000081a },
1360 { name => "ss", sopentypelang => "SWZ" },
1361 { name => "ss-SZ" },
1362 { name => "ss-ZA" },
1363 { name => "ssy" },
1364 { name => "ssy-ER" },
1365 { name => "st", lcid => 0x00000030 },
1366 { name => "st-LS" },
1367 { name => "st-ZA", lcid => 0x00000430 },
1368 { name => "su" },
1369 { name => "su-Latn" },
1370 { name => "su-Latn-ID" },
1371 { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
1372 { name => "sv-AX" },
1373 { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
1374 { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
1375 { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
1376 { name => "sw-CD" },
1377 { name => "sw-KE", lcid => 0x00000441 },
1378 { name => "sw-TZ" },
1379 { name => "sw-UG" },
1380 { name => "swc-CD", alias => "sw-CD" },
1381 { name => "syr", lcid => 0x0000005a, slist => ",", group => 13 },
1382 { name => "syr-SY", lcid => 0x0000045a },
1383 { name => "syr-Syrc", alias => "syr" },
1384 { name => "syr-Syrc-SY", alias => "syr-SY" },
1385 { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
1386 { name => "ta-IN", lcid => 0x00000449 },
1387 { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
1388 { name => "ta-MY" },
1389 { name => "ta-SG" },
1390 { name => "te", lcid => 0x0000004a, group => 15 },
1391 { name => "te-IN", lcid => 0x0000044a },
1392 { name => "teo" },
1393 { name => "teo-KE" },
1394 { name => "teo-UG" },
1395 { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
1396 { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
1397 { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
1398 { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
1399 { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
1400 { name => "th-TH", lcid => 0x0000041e },
1401 { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
1402 { name => "ti-ER", lcid => 0x00000873 },
1403 { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
1404 { name => "tig", sopentypelang => "TGR" },
1405 { name => "tig-ER" },
1406 { name => "tig-Ethi-ER", alias => "tig-ER" },
1407 { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
1408 { name => "tk-Latn", alias => "tk" },
1409 { name => "tk-Latn-TM", alias => "tk-TM" },
1410 { name => "tk-TM", lcid => 0x00000442 },
1411 { name => "tn", lcid => 0x00000032, oemcp => 850, sopentypelang => "TNA" },
1412 { name => "tn-BW", lcid => 0x00000832, sabbrevlangname => "TSB" },
1413 { name => "tn-ZA", lcid => 0x00000432 },
1414 { name => "to", sopentypelang => "TGN" },
1415 { name => "to-TO" },
1416 { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
1417 { name => "tr-CY" },
1418 { name => "tr-TR", lcid => 0x0000041f },
1419 { name => "ts", lcid => 0x00000031, sopentypelang => "TSG" },
1420 { name => "ts-ZA", lcid => 0x00000431 },
1421 { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
1422 { name => "tt-Cyrl", alias => "tt" },
1423 { name => "tt-Cyrl-RU", alias => "tt-RU" },
1424 { name => "tt-RU", lcid => 0x00000444 },
1425 { name => "twq" },
1426 { name => "twq-NE" },
1427 { name => "tyv" },
1428 { name => "tyv-RU" },
1429 { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
1430 { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
1431 { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
1432 { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
1433 { name => "tzm-MA", alias => "tzm-Latn-MA" },
1434 { name => "tzm-DZ", alias => "tzm-Latn-DZ" },
1435 ## name => "tzm-Arab", group => 13 },
1436 ## name => "tzm-Arab-MA", lcid => 0x0000045f },
1437 ## name => "tzm-Tfng", lcid => 0x0000785f },
1438 ## name => "tzm-Tfng-MA", lcid => 0x0000105f },
1439 { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG", nativedigits => "0123456789" },
1440 { name => "ug-Arab", alias => "ug" },
1441 { name => "ug-Arab-CN", alias => "ug-CN" },
1442 { name => "ug-CN", lcid => 0x00000480 },
1443 { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
1444 { name => "uk-UA", lcid => 0x00000422 },
1445 { name => "ur", lcid => 0x00000020, oemcp => 720 },
1446 { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
1447 { name => "ur-PK", lcid => 0x00000420, group => 13 },
1448 { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
1449 { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
1450 { name => "uz-Arab-AF" },
1451 { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
1452 { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
1453 { name => "uz-Latn", lcid => 0x00007c43 },
1454 { name => "uz-Latn-UZ", lcid => 0x00000443 },
1455 { name => "vai" },
1456 { name => "vai-Latn" },
1457 { name => "vai-Latn-LR" },
1458 { name => "vai-Vaii" },
1459 { name => "vai-Vaii-LR" },
1460 { name => "ve", lcid => 0x00000033, sabbrevlangname => "ZZZ" },
1461 { name => "ve-ZA", lcid => 0x00000433 },
1462 { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
1463 { name => "vi-VN", lcid => 0x0000042a },
1464 { name => "vmw" },
1465 { name => "vmw-MZ" },
1466 { name => "vo" },
1467 { name => "vo-001" },
1468 { name => "vun" },
1469 { name => "vun-TZ" },
1470 { name => "wa", oemcp => 850 },
1471 { name => "wa-BE" },
1472 { name => "wae" },
1473 { name => "wae-CH" },
1474 { name => "wal" },
1475 { name => "wal-ET" },
1476 { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
1477 { name => "wo-Latn", alias => "wo" },
1478 { name => "wo-Latn-SN", alias => "wo-SN" },
1479 { name => "wo-SN", lcid => 0x00000488 },
1480 { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
1481 { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
1482 { name => "xh-ZA", lcid => 0x00000434 },
1483 { name => "xnr" },
1484 { name => "xnr-IN" },
1485 { name => "xog" },
1486 { name => "xog-UG" },
1487 { name => "yav" },
1488 { name => "yav-CM" },
1489 { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
1490 { name => "yi-001", lcid => 0x0000043d, file => "yi" },
1491 { name => "yi-UA" },
1492 { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
1493 { name => "yo-BJ", ebcdiccp => 500 },
1494 { name => "yo-Latn", alias => "yo" },
1495 { name => "yo-Latn-NG", alias => "yo-NG" },
1496 { name => "yo-NG", lcid => 0x0000046a },
1497 { name => "yrl" },
1498 { name => "yrl-BR" },
1499 { name => "yrl-CO" },
1500 { name => "yrl-VE" },
1501 { name => "yue" },
1502 { name => "yue-Hans" },
1503 { name => "yue-Hans-CN" },
1504 { name => "yue-Hant" },
1505 { name => "yue-Hant-HK" },
1506 { name => "zgh" },
1507 { name => "zgh-MA", alias => "zgh-Tfng-MA" },
1508 { name => "zgh-Tfng", file => "zgh" },
1509 { name => "zgh-Tfng-MA", file => "zgh_MA" },
1510 { name => "za" },
1511 { name => "za-CN" },
1512 { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS", nativedigits => "0123456789" },
1513 { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
1514 { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
1515 { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
1516 { name => "zh-Hans", lcid => 0x00000004, group => 10 },
1517 { name => "zh-Hans-CN", alias => "zh-CN" },
1518 { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
1519 { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
1520 { name => "zh-Hans-HK", slist => ";", nativedigits => "" },
1521 { name => "zh-Hans-MO", slist => ";", nativedigits => "" },
1522 { name => "zh-Hans-SG", alias => "zh-SG" },
1523 { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
1524 { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
1525 { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
1526 { name => "zh-Hant-HK", alias => "zh-HK" },
1527 { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
1528 { name => "zh-Hant-MO", alias => "zh-MO" },
1529 { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
1530 { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
1531 { name => "zh-Hant-TW", alias => "zh-TW" },
1532 { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
1533 { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
1534 { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
1535 { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
1536 { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
1537 { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
1538 { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
1539 { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
1540 { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
1541 { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
1542 { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
1543 { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
1544 { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
1545 { name => "zu", lcid => 0x00000035, oemcp => 850 },
1546 { name => "zu-ZA", lcid => 0x00000435 },
1549 my @calendars =
1551 { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
1552 { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
1553 { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
1554 { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
1555 { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
1556 { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
1557 { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
1558 { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
1559 { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
1560 { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1561 { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1562 { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1563 { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
1564 { id => 14, name => "Japanese Lunisolar" },
1565 { id => 15, name => "Chinese Lunisolar" },
1566 { id => 16, name => "Saka" },
1567 { id => 17, name => "Lunar ETO Chinese" },
1568 { id => 18, name => "Lunar ETO Korean" },
1569 { id => 19, name => "Lunar ETO Rokuyou" },
1570 { id => 20, name => "Korean Lunisolar" },
1571 { id => 21, name => "Taiwan Lunisolar" },
1572 { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
1573 { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
1576 my @geoids =
1578 { id => 2, name => "AG" }, # Antigua and Barbuda
1579 { id => 3, name => "AF" }, # Afghanistan
1580 { id => 4, name => "DZ" }, # Algeria
1581 { id => 5, name => "AZ" }, # Azerbaijan
1582 { id => 6, name => "AL" }, # Albania
1583 { id => 7, name => "AM" }, # Armenia
1584 { id => 8, name => "AD" }, # Andorra
1585 { id => 9, name => "AO" }, # Angola
1586 { id => 10, name => "AS" }, # American Samoa
1587 { id => 11, name => "AR" }, # Argentina
1588 { id => 12, name => "AU" }, # Australia
1589 { id => 14, name => "AT" }, # Austria
1590 { id => 17, name => "BH" }, # Bahrain
1591 { id => 18, name => "BB" }, # Barbados
1592 { id => 19, name => "BW" }, # Botswana
1593 { id => 20, name => "BM" }, # Bermuda
1594 { id => 21, name => "BE" }, # Belgium
1595 { id => 22, name => "BS" }, # Bahamas, The
1596 { id => 23, name => "BD" }, # Bangladesh
1597 { id => 24, name => "BZ" }, # Belize
1598 { id => 25, name => "BA" }, # Bosnia and Herzegovina
1599 { id => 26, name => "BO" }, # Bolivia
1600 { id => 27, name => "MM" }, # Myanmar
1601 { id => 28, name => "BJ" }, # Benin
1602 { id => 29, name => "BY" }, # Belarus
1603 { id => 30, name => "SB" }, # Solomon Islands
1604 { id => 32, name => "BR" }, # Brazil
1605 { id => 34, name => "BT" }, # Bhutan
1606 { id => 35, name => "BG" }, # Bulgaria
1607 { id => 37, name => "BN" }, # Brunei
1608 { id => 38, name => "BI" }, # Burundi
1609 { id => 39, name => "CA" }, # Canada
1610 { id => 40, name => "KH" }, # Cambodia
1611 { id => 41, name => "TD" }, # Chad
1612 { id => 42, name => "LK" }, # Sri Lanka
1613 { id => 43, name => "CG" }, # Congo
1614 { id => 44, name => "CD" }, # Congo (DRC)
1615 { id => 45, name => "CN" }, # China
1616 { id => 46, name => "CL" }, # Chile
1617 { id => 49, name => "CM" }, # Cameroon
1618 { id => 50, name => "KM" }, # Comoros
1619 { id => 51, name => "CO" }, # Colombia
1620 { id => 54, name => "CR" }, # Costa Rica
1621 { id => 55, name => "CF" }, # Central African Republic
1622 { id => 56, name => "CU" }, # Cuba
1623 { id => 57, name => "CV" }, # Cape Verde
1624 { id => 59, name => "CY" }, # Cyprus
1625 { id => 61, name => "DK" }, # Denmark
1626 { id => 62, name => "DJ" }, # Djibouti
1627 { id => 63, name => "DM" }, # Dominica
1628 { id => 65, name => "DO" }, # Dominican Republic
1629 { id => 66, name => "EC" }, # Ecuador
1630 { id => 67, name => "EG" }, # Egypt
1631 { id => 68, name => "IE" }, # Ireland
1632 { id => 69, name => "GQ" }, # Equatorial Guinea
1633 { id => 70, name => "EE" }, # Estonia
1634 { id => 71, name => "ER" }, # Eritrea
1635 { id => 72, name => "SV" }, # El Salvador
1636 { id => 73, name => "ET" }, # Ethiopia
1637 { id => 75, name => "CZ" }, # Czech Republic
1638 { id => 77, name => "FI" }, # Finland
1639 { id => 78, name => "FJ" }, # Fiji Islands
1640 { id => 80, name => "FM" }, # Micronesia
1641 { id => 81, name => "FO" }, # Faroe Islands
1642 { id => 84, name => "FR" }, # France
1643 { id => 86, name => "GM" }, # Gambia, The
1644 { id => 87, name => "GA" }, # Gabon
1645 { id => 88, name => "GE" }, # Georgia
1646 { id => 89, name => "GH" }, # Ghana
1647 { id => 90, name => "GI" }, # Gibraltar
1648 { id => 91, name => "GD" }, # Grenada
1649 { id => 93, name => "GL" }, # Greenland
1650 { id => 94, name => "DE" }, # Germany
1651 { id => 98, name => "GR" }, # Greece
1652 { id => 99, name => "GT" }, # Guatemala
1653 { id => 100, name => "GN" }, # Guinea
1654 { id => 101, name => "GY" }, # Guyana
1655 { id => 103, name => "HT" }, # Haiti
1656 { id => 104, name => "HK" }, # Hong Kong S.A.R.
1657 { id => 106, name => "HN" }, # Honduras
1658 { id => 108, name => "HR" }, # Croatia
1659 { id => 109, name => "HU" }, # Hungary
1660 { id => 110, name => "IS" }, # Iceland
1661 { id => 111, name => "ID" }, # Indonesia
1662 { id => 113, name => "IN" }, # India
1663 { id => 114, name => "IO" }, # British Indian Ocean Territory
1664 { id => 116, name => "IR" }, # Iran
1665 { id => 117, name => "IL" }, # Israel
1666 { id => 118, name => "IT" }, # Italy
1667 { id => 119, name => "CI" }, # Côte d'Ivoire
1668 { id => 121, name => "IQ" }, # Iraq
1669 { id => 122, name => "JP" }, # Japan
1670 { id => 124, name => "JM" }, # Jamaica
1671 { id => 125, name => "SJ" }, # Jan Mayen
1672 { id => 126, name => "JO" }, # Jordan
1673 { id => 127, parent => "UM" }, # Johnston Atoll
1674 { id => 129, name => "KE" }, # Kenya
1675 { id => 130, name => "KG" }, # Kyrgyzstan
1676 { id => 131, name => "KP" }, # North Korea
1677 { id => 133, name => "KI" }, # Kiribati
1678 { id => 134, name => "KR" }, # Korea
1679 { id => 136, name => "KW" }, # Kuwait
1680 { id => 137, name => "KZ" }, # Kazakhstan
1681 { id => 138, name => "LA" }, # Laos
1682 { id => 139, name => "LB" }, # Lebanon
1683 { id => 140, name => "LV" }, # Latvia
1684 { id => 141, name => "LT" }, # Lithuania
1685 { id => 142, name => "LR" }, # Liberia
1686 { id => 143, name => "SK" }, # Slovakia
1687 { id => 145, name => "LI" }, # Liechtenstein
1688 { id => 146, name => "LS" }, # Lesotho
1689 { id => 147, name => "LU" }, # Luxembourg
1690 { id => 148, name => "LY" }, # Libya
1691 { id => 149, name => "MG" }, # Madagascar
1692 { id => 151, name => "MO" }, # Macao S.A.R.
1693 { id => 152, name => "MD" }, # Moldova
1694 { id => 154, name => "MN" }, # Mongolia
1695 { id => 156, name => "MW" }, # Malawi
1696 { id => 157, name => "ML" }, # Mali
1697 { id => 158, name => "MC" }, # Monaco
1698 { id => 159, name => "MA" }, # Morocco
1699 { id => 160, name => "MU" }, # Mauritius
1700 { id => 162, name => "MR" }, # Mauritania
1701 { id => 163, name => "MT" }, # Malta
1702 { id => 164, name => "OM" }, # Oman
1703 { id => 165, name => "MV" }, # Maldives
1704 { id => 166, name => "MX" }, # Mexico
1705 { id => 167, name => "MY" }, # Malaysia
1706 { id => 168, name => "MZ" }, # Mozambique
1707 { id => 173, name => "NE" }, # Niger
1708 { id => 174, name => "VU" }, # Vanuatu
1709 { id => 175, name => "NG" }, # Nigeria
1710 { id => 176, name => "NL" }, # Netherlands
1711 { id => 177, name => "NO" }, # Norway
1712 { id => 178, name => "NP" }, # Nepal
1713 { id => 180, name => "NR" }, # Nauru
1714 { id => 181, name => "SR" }, # Suriname
1715 { id => 182, name => "NI" }, # Nicaragua
1716 { id => 183, name => "NZ" }, # New Zealand
1717 { id => 184, name => "PS" }, # Palestinian Authority
1718 { id => 185, name => "PY" }, # Paraguay
1719 { id => 187, name => "PE" }, # Peru
1720 { id => 190, name => "PK" }, # Pakistan
1721 { id => 191, name => "PL" }, # Poland
1722 { id => 192, name => "PA" }, # Panama
1723 { id => 193, name => "PT" }, # Portugal
1724 { id => 194, name => "PG" }, # Papua New Guinea
1725 { id => 195, name => "PW" }, # Palau
1726 { id => 196, name => "GW" }, # Guinea-Bissau
1727 { id => 197, name => "QA" }, # Qatar
1728 { id => 198, name => "RE" }, # Reunion
1729 { id => 199, name => "MH" }, # Marshall Islands
1730 { id => 200, name => "RO" }, # Romania
1731 { id => 201, name => "PH" }, # Philippines
1732 { id => 202, name => "PR" }, # Puerto Rico
1733 { id => 203, name => "RU" }, # Russia
1734 { id => 204, name => "RW" }, # Rwanda
1735 { id => 205, name => "SA" }, # Saudi Arabia
1736 { id => 206, name => "PM" }, # St. Pierre and Miquelon
1737 { id => 207, name => "KN" }, # St. Kitts and Nevis
1738 { id => 208, name => "SC" }, # Seychelles
1739 { id => 209, name => "ZA" }, # South Africa
1740 { id => 210, name => "SN" }, # Senegal
1741 { id => 212, name => "SI" }, # Slovenia
1742 { id => 213, name => "SL" }, # Sierra Leone
1743 { id => 214, name => "SM" }, # San Marino
1744 { id => 215, name => "SG" }, # Singapore
1745 { id => 216, name => "SO" }, # Somalia
1746 { id => 217, name => "ES" }, # Spain
1747 { id => 218, name => "LC" }, # St. Lucia
1748 { id => 219, name => "SD" }, # Sudan
1749 { id => 220, name => "SJ" }, # Svalbard
1750 { id => 221, name => "SE" }, # Sweden
1751 { id => 222, name => "SY" }, # Syria
1752 { id => 223, name => "CH" }, # Switzerland
1753 { id => 224, name => "AE" }, # United Arab Emirates
1754 { id => 225, name => "TT" }, # Trinidad and Tobago
1755 { id => 227, name => "TH" }, # Thailand
1756 { id => 228, name => "TJ" }, # Tajikistan
1757 { id => 231, name => "TO" }, # Tonga
1758 { id => 232, name => "TG" }, # Togo
1759 { id => 233, name => "ST" }, # São Tomé and Príncipe
1760 { id => 234, name => "TN" }, # Tunisia
1761 { id => 235, name => "TR" }, # Turkey
1762 { id => 236, name => "TV" }, # Tuvalu
1763 { id => 237, name => "TW" }, # Taiwan
1764 { id => 238, name => "TM" }, # Turkmenistan
1765 { id => 239, name => "TZ" }, # Tanzania
1766 { id => 240, name => "UG" }, # Uganda
1767 { id => 241, name => "UA" }, # Ukraine
1768 { id => 242, name => "GB" }, # United Kingdom
1769 { id => 244, name => "US" }, # United States
1770 { id => 245, name => "BF" }, # Burkina Faso
1771 { id => 246, name => "UY" }, # Uruguay
1772 { id => 247, name => "UZ" }, # Uzbekistan
1773 { id => 248, name => "VC" }, # St. Vincent and the Grenadines
1774 { id => 249, name => "VE" }, # Bolivarian Republic of Venezuela
1775 { id => 251, name => "VN" }, # Vietnam
1776 { id => 252, name => "VI" }, # Virgin Islands
1777 { id => 253, name => "VA" }, # Vatican City
1778 { id => 254, name => "NA" }, # Namibia
1779 { id => 257, name => "EH" }, # Western Sahara (disputed)
1780 { id => 258, parent => "UM" }, # Wake Island
1781 { id => 259, name => "WS" }, # Samoa
1782 { id => 260, name => "SZ" }, # Swaziland
1783 { id => 261, name => "YE" }, # Yemen
1784 { id => 263, name => "ZM" }, # Zambia
1785 { id => 264, name => "ZW" }, # Zimbabwe
1786 { id => 269, name => "CS" }, # Serbia and Montenegro (Former)
1787 { id => 270, name => "ME" }, # Montenegro
1788 { id => 271, name => "RS" }, # Serbia
1789 { id => 273, name => "CW" }, # Curaçao
1790 { id => 276, name => "SS" }, # South Sudan
1791 { id => 300, name => "AI" }, # Anguilla
1792 { id => 301, name => "AQ" }, # Antarctica
1793 { id => 302, name => "AW" }, # Aruba
1794 { id => 303, parent => "SH" }, # Ascension Island
1795 { id => 304, parent => "053" }, # Ashmore and Cartier Islands
1796 { id => 305, parent => "UM" }, # Baker Island
1797 { id => 306, name => "BV" }, # Bouvet Island
1798 { id => 307, name => "KY" }, # Cayman Islands
1799 { id => 308, name => "830", parent => "155" }, # Channel Islands
1800 { id => 309, name => "CX" }, # Christmas Island
1801 { id => 310, parent => "009" }, # Clipperton Island
1802 { id => 311, name => "CC" }, # Cocos (Keeling) Islands
1803 { id => 312, name => "CK" }, # Cook Islands
1804 { id => 313, parent => "053" }, # Coral Sea Islands
1805 { id => 314, parent => "IO" }, # Diego Garcia
1806 { id => 315, name => "FK" }, # Falkland Islands (Islas Malvinas)
1807 { id => 317, name => "GF" }, # French Guiana
1808 { id => 318, name => "PF" }, # French Polynesia
1809 { id => 319, name => "TF" }, # French Southern and Antarctic Lands
1810 { id => 321, name => "GP" }, # Guadeloupe
1811 { id => 322, name => "GU" }, # Guam
1812 { id => 323 }, # Guantanamo Bay
1813 { id => 324, name => "GG" }, # Guernsey
1814 { id => 325, name => "HM" }, # Heard Island and McDonald Islands
1815 { id => 326, parent => "UM" }, # Howland Island
1816 { id => 327, parent => "UM" }, # Jarvis Island
1817 { id => 328, name => "JE" }, # Jersey
1818 { id => 329, parent => "UM" }, # Kingman Reef
1819 { id => 330, name => "MQ" }, # Martinique
1820 { id => 331, name => "YT" }, # Mayotte
1821 { id => 332, name => "MS" }, # Montserrat
1822 { id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
1823 { id => 334, name => "NC" }, # New Caledonia
1824 { id => 335, name => "NU" }, # Niue
1825 { id => 336, name => "NF" }, # Norfolk Island
1826 { id => 337, name => "MP" }, # Northern Mariana Islands
1827 { id => 338, parent => "UM" }, # Palmyra Atoll
1828 { id => 339, name => "PN" }, # Pitcairn Islands
1829 { id => 340, parent => "MP" }, # Rota Island
1830 { id => 341, parent => "MP" }, # Saipan
1831 { id => 342, name => "GS" }, # South Georgia and the South Sandwich Islands
1832 { id => 343, name => "SH" }, # St. Helena
1833 { id => 346, parent => "MP" }, # Tinian Island
1834 { id => 347, name => "TK" }, # Tokelau
1835 { id => 348, parent => "SH" }, # Tristan da Cunha
1836 { id => 349, name => "TC" }, # Turks and Caicos Islands
1837 { id => 351, name => "VG" }, # Virgin Islands, British
1838 { id => 352, name => "WF" }, # Wallis and Futuna
1839 { id => 742, name => "002" }, # Africa
1840 { id => 2129, name => "142" }, # Asia
1841 { id => 10541, name => "150" }, # Europe
1842 { id => 15126, name => "IM" }, # Man, Isle of
1843 { id => 19618, name => "MK" }, # Macedonia, Former Yugoslav Republic of
1844 { id => 20900, name => "054" }, # Melanesia
1845 { id => 21206, name => "057" }, # Micronesia
1846 { id => 21242, parent => "UM" }, # Midway Islands
1847 { id => 23581, name => "021" }, # Northern America
1848 { id => 26286, name => "061" }, # Polynesia
1849 { id => 27082, name => "013" }, # Central America
1850 { id => 27114, name => "009" }, # Oceania
1851 { id => 30967, name => "SX" }, # Sint Maarten (Dutch part)
1852 { id => 31396, name => "005" }, # South America
1853 { id => 31706, name => "MF" }, # Saint Martin (French part)
1854 { id => 39070, name => "001" }, # World
1855 { id => 42483, name => "011" }, # Western Africa
1856 { id => 42484, name => "017" }, # Middle Africa
1857 { id => 42487, name => "015" }, # Northern Africa
1858 { id => 47590, name => "143" }, # Central Asia
1859 { id => 47599, name => "035" }, # South-Eastern Asia
1860 { id => 47600, name => "030" }, # Eastern Asia
1861 { id => 47603, name => "014" }, # Eastern Africa
1862 { id => 47609, name => "151" }, # Eastern Europe
1863 { id => 47610, name => "039" }, # Southern Europe
1864 { id => 47611, name => "145" }, # Middle East
1865 { id => 47614, name => "034" }, # Southern Asia
1866 { id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
1867 { id => 9914689, name => "XK" }, # Kosovo
1868 { id => 10026358, name => "019" }, # Americas
1869 { id => 10028789, name => "AX" }, # Ã…land Islands
1870 { id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
1871 { id => 10039882, name => "154" }, # Northern Europe
1872 { id => 10039883, name => "018" }, # Southern Africa
1873 { id => 10210824, name => "155" }, # Western Europe
1874 { id => 10210825, name => "053" }, # Australia and New Zealand
1875 { id => 161832015, name => "BL" }, # Saint Barthélemy
1876 { id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
1877 { id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
1878 { id => 161832258, name => "BQ" }, # Bonaire, Sint Eustatius and Saba
1881 my @cp2uni = ();
1882 my @glyph2uni = ();
1883 my @lead_bytes = ();
1884 my @uni2cp = ();
1885 my @tolower_table = ();
1886 my @toupper_table = ();
1887 my @digitmap_table = ();
1888 my @halfwidth_table = ();
1889 my @fullwidth_table = ();
1890 my @cjk_compat_table = ();
1891 my @chinese_traditional_table = ();
1892 my @chinese_simplified_table = ();
1893 my @category_table = ();
1894 my @initial_joining_table = ();
1895 my @direction_table = ();
1896 my @decomp_table = ();
1897 my @combining_class_table = ();
1898 my @decomp_compat_table = ();
1899 my @comp_exclusions = ();
1900 my @idna_decomp_table = ();
1901 my @idna_disallowed = ();
1902 my %registry_keys;
1903 my $default_char;
1904 my $default_wchar;
1906 my %joining_forms =
1908 "isolated" => [],
1909 "final" => [],
1910 "initial" => [],
1911 "medial" => []
1914 my $current_data_file;
1916 sub to_utf16(@)
1918 my @ret;
1919 foreach my $ch (@_)
1921 if ($ch < 0x10000)
1923 push @ret, $ch;
1925 else
1927 my $val = $ch - 0x10000;
1928 push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
1931 return @ret;
1934 ################################################################
1935 # fetch a unicode.org file and open it
1936 sub open_data_file($@)
1938 my ($id, $name) = @_;
1939 my $data = $data_files{$id};
1940 my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
1941 local *FILE;
1943 my $url = $data->{url};
1944 my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
1945 unless (-f $filename)
1947 print "Fetching $url...\n";
1948 system "mkdir", "-p", $cache;
1949 !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
1952 my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
1953 die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
1955 if ($filename =~ /\.zip$/)
1957 open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
1959 elsif ($filename =~ /\.tar\.gz$/)
1961 open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
1963 else
1965 open FILE, "<$filename" or die "cannot open $filename";
1967 $current_data_file = $name ? "$url:$name" : $url;
1968 return *FILE;
1971 ################################################################
1972 # load a unicode.org file as XML data
1973 sub load_xml_data_file($@)
1975 my ($id, $name) = @_;
1976 my $FILE = open_data_file( $id, $name );
1977 my $xml = XML::LibXML->load_xml( IO => $FILE );
1978 close FILE;
1979 return $xml;
1982 ################################################################
1983 # recursively get the decomposition for a character
1984 sub get_decomposition($$);
1985 sub get_decomposition($$)
1987 my ($char, $table) = @_;
1988 my @ret;
1990 return $char unless defined $table->[$char];
1991 foreach my $ch (@{$table->[$char]})
1993 push @ret, get_decomposition( $ch, $table );
1995 return @ret;
1998 ################################################################
1999 # get the composition that results in a given character
2000 sub get_composition($$)
2002 my ($ch, $compat) = @_;
2003 return () unless defined $decomp_table[$ch]; # no decomposition
2004 my @ret = @{$decomp_table[$ch]};
2005 return () if @ret < 2; # singleton decomposition
2006 return () if $comp_exclusions[$ch]; # composition exclusion
2007 return () if $combining_class_table[$ch]; # non-starter
2008 return () if $combining_class_table[$ret[0]]; # first char is non-starter
2009 return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
2010 defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition
2011 return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
2012 defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition
2013 return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
2014 defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition
2015 return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition
2016 return @ret;
2019 ################################################################
2020 # recursively build decompositions
2021 sub build_decompositions(@)
2023 my @src = @_;
2024 my @dst;
2026 for (my $i = 0; $i < @src; $i++)
2028 next unless defined $src[$i];
2029 my @decomp = to_utf16( get_decomposition( $i, \@src ));
2030 $dst[$i] = \@decomp;
2032 return @dst;
2035 ################################################################
2036 # compose Hangul sequences
2037 sub compose_hangul(@)
2039 my $SBASE = 0xac00;
2040 my $LBASE = 0x1100;
2041 my $VBASE = 0x1161;
2042 my $TBASE = 0x11a7;
2043 my $LCOUNT = 19;
2044 my $VCOUNT = 21;
2045 my $TCOUNT = 28;
2046 my $NCOUNT = $VCOUNT * $TCOUNT;
2047 my $SCOUNT = $LCOUNT * $NCOUNT;
2049 my @seq = @_;
2050 my @ret;
2051 my $i;
2053 for ($i = 0; $i < @seq; $i++)
2055 my $ch = $seq[$i];
2056 if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
2057 $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
2059 $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
2060 $i++;
2062 if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
2063 $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
2065 $ch += $seq[$i+1] - $TBASE;
2066 $i++;
2068 push @ret, $ch;
2070 return @ret;
2073 ################################################################
2074 # remove linguistic-only mappings from the case table
2075 sub remove_linguistic_mappings($$)
2077 my ($upper, $lower) = @_;
2079 # remove case mappings that don't round-trip
2081 for (my $i = 0; $i < @{$upper}; $i++)
2083 next unless defined ${$upper}[$i];
2084 my $ch = ${$upper}[$i];
2085 ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
2087 for (my $i = 0; $i < @{$lower}; $i++)
2089 next unless defined ${$lower}[$i];
2090 my $ch = ${$lower}[$i];
2091 ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
2095 ################################################################
2096 # read in the Unicode database files
2097 sub load_data()
2099 my $start;
2101 # now build mappings from the decomposition field of the Unicode database
2103 my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
2104 while (<$UNICODE_DATA>)
2106 # Decode the fields ...
2107 my ($code, $name, $cat, $comb, $bidi,
2108 $decomp, $dec, $dig, $num, $mirror,
2109 $oldname, $comment, $upper, $lower, $title) = split /;/;
2110 my $src = hex $code;
2112 die "unknown category $cat" unless defined $categories{$cat};
2113 die "unknown directionality $bidi" unless defined $directions{$bidi};
2115 $category_table[$src] = $categories{$cat};
2116 $direction_table[$src] = $bidi;
2117 if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
2119 $initial_joining_table[$src] = $joining_types{"T"};
2121 else
2123 $initial_joining_table[$src] = $joining_types{"U"};
2126 if ($lower ne "")
2128 $tolower_table[$src] = hex $lower;
2130 if ($upper ne "")
2132 $toupper_table[$src] = hex $upper;
2134 if ($dec ne "")
2136 $category_table[$src] |= $ctype{"digit"};
2138 if ($dig ne "")
2140 $digitmap_table[$src] = ord $dig;
2142 $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
2144 $category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
2145 $category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
2146 $category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
2147 $category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
2148 $category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
2149 $category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
2150 $category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
2151 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
2152 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
2153 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
2154 $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
2155 $category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
2157 # copy the category and direction for everything between First/Last pairs
2158 if ($name =~ /, First>/) { $start = $src; }
2159 if ($name =~ /, Last>/)
2161 while ($start < $src)
2163 $category_table[$start] = $category_table[$src];
2164 $direction_table[$start] = $direction_table[$src];
2165 $combining_class_table[$start] = $combining_class_table[$src];
2166 $start++;
2170 next if $decomp eq ""; # no decomposition, skip it
2172 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
2174 my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
2175 $decomp_compat_table[$src] = \@seq;
2178 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
2180 # decomposition of the form "<foo> 1234" -> use char if type is known
2181 my $dst = hex $2;
2182 if ($1 eq "narrow")
2184 $halfwidth_table[$dst] = $src;
2185 $fullwidth_table[$src] = $dst;
2187 elsif ($1 eq "wide")
2189 next if $dst == 0x5c; # don't remap backslash
2190 $fullwidth_table[$dst] = $src;
2191 $halfwidth_table[$src] = $dst;
2193 elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
2195 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2197 elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
2199 ${joining_forms{$1}}[$dst] = $src;
2202 elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
2204 # decomposition "<compat> 0020 1234" -> combining accent
2206 elsif ($decomp =~ /^([0-9a-fA-F]+)/)
2208 # store decomposition
2209 if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
2211 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
2213 elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
2215 my $dst = hex $1;
2216 # Single char decomposition
2217 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
2218 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
2220 $cjk_compat_table[$src] = $dst;
2221 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2226 close $UNICODE_DATA;
2228 # patch the category of some special characters
2230 for (my $i = 0; $i < @decomp_table; $i++)
2232 next unless defined $decomp_table[$i];
2233 $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
2235 foreach my $cat (keys %special_categories)
2237 my $flag = $ctype{$cat};
2238 foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
2240 for (my $i = 0; $i < @decomp_compat_table; $i++)
2242 next unless defined $decomp_compat_table[$i];
2243 next unless @{$decomp_compat_table[$i]} == 2;
2244 $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
2247 # load the composition exclusions
2249 my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2250 while (<$EXCL>)
2252 s/\#.*//; # remove comments
2253 if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
2255 foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
2257 elsif (/^([0-9a-fA-F]+)\s*$/)
2259 $comp_exclusions[hex $1] = 1;
2262 close $EXCL;
2264 # load the IDNA mappings
2266 @idna_decomp_table = @decomp_compat_table;
2267 my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
2268 while (<$IDNA>)
2270 s/\#.*//; # remove comments
2271 next if /^\s*$/;
2272 my ($char, $type, $mapping) = split /;/;
2273 my ($ch1, $ch2);
2274 if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
2276 $ch1 = hex $1;
2277 $ch2 = hex $2;
2279 elsif ($char =~ /([0-9a-fA-F]+)/)
2281 $ch1 = $ch2 = hex $1;
2284 if ($type =~ /mapped/ || $type =~ /deviation/)
2286 $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
2287 my @seq = map { hex $_; } split /\s+/, $mapping;
2288 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
2290 elsif ($type =~ /valid/)
2293 elsif ($type =~ /ignored/)
2295 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
2297 elsif ($type =~ /disallowed/)
2299 foreach my $i ($ch1 .. $ch2)
2301 $idna_decomp_table[$i] = undef;
2302 $idna_disallowed[$i] = 1;
2306 close $IDNA;
2308 # load the Unihan mappings
2310 my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
2311 while (<$UNIHAN>)
2313 s/\#.*//; # remove comments
2314 next if /^\s*$/;
2315 if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
2317 next if hex $1 < 0x4dc0; # skip extension A
2318 $chinese_traditional_table[hex $1] = hex $2;
2320 elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
2322 next if hex $1 < 0x4dc0; # skip extension A
2323 $chinese_simplified_table[hex $1] = hex $2;
2326 close $UNIHAN;
2327 foreach my $i (0xf900..0xfaff)
2329 next unless defined $cjk_compat_table[$i];
2330 next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
2331 $chinese_simplified_table[$i] = $cjk_compat_table[$i];
2336 ################################################################
2337 # add a new registry key
2338 sub add_registry_key($$$)
2340 my ($base, $key, $defval) = @_;
2341 $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
2344 ################################################################
2345 # add a new registry value with explicit type
2346 sub add_registry_value($$$$)
2348 my ($base, $key, $name, $value) = @_;
2349 add_registry_key( $base, $key, undef );
2350 push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
2353 ################################################################
2354 # add a new registry string value
2355 sub add_registry_string_value($$$$)
2357 my ($base, $key, $name, $value) = @_;
2358 $value =~ s/\'/\'\'/g;
2359 add_registry_value( $base, $key, $name, "s '$value'" );
2362 ################################################################
2363 # add a new registry dword value
2364 sub add_registry_dword_value($$$$)
2366 my ($base, $key, $name, $value) = @_;
2367 add_registry_value( $base, $key, $name, "d $value" );
2370 ################################################################
2371 # add a new registry binary value
2372 sub add_registry_binary_value($$$$)
2374 my ($base, $key, $name, $value) = @_;
2375 add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
2378 ################################################################
2379 # define a new lead byte
2380 sub add_lead_byte($)
2382 my $ch = shift;
2383 return if defined $cp2uni[$ch];
2384 push @lead_bytes, $ch;
2385 $cp2uni[$ch] = 0;
2388 ################################################################
2389 # define a new char mapping
2390 sub add_mapping($$)
2392 my ($cp, $uni) = @_;
2393 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
2394 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
2395 if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
2398 ################################################################
2399 # get a mapping including glyph chars for MB_USEGLYPHCHARS
2400 sub get_glyphs_mapping(@)
2402 my @table = @_;
2404 for (my $i = 0; $i < @glyph2uni; $i++)
2406 $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
2408 return @table;
2411 ################################################################
2412 # build EUC-JP table from the JIS 0208/0212 files
2413 sub dump_eucjp_codepage()
2415 @cp2uni = ();
2416 @glyph2uni = ();
2417 @lead_bytes = ();
2418 @uni2cp = ();
2419 $default_char = $DEF_CHAR;
2420 $default_wchar = 0x30fb;
2422 # ASCII chars
2423 foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
2425 # lead bytes
2426 foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
2428 # JIS X 0201 right plane
2429 foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
2431 # undefined chars
2432 foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
2433 $cp2uni[0xa0] = 0xf8f0;
2434 $cp2uni[0xff] = 0xf8f3;
2436 # Fix backslash conversion
2437 add_mapping( 0xa1c0, 0xff3c );
2439 # Add private mappings for rows undefined in JIS 0208/0212
2440 my $private = 0xe000;
2441 foreach my $hi (0xf5 .. 0xfe)
2443 foreach my $lo (0xa1 .. 0xfe)
2445 add_mapping( ($hi << 8) + $lo, $private++ );
2448 foreach my $hi (0xf5 .. 0xfe)
2450 foreach my $lo (0x21 .. 0x7e)
2452 add_mapping( ($hi << 8) + $lo, $private++ );
2456 my $INPUT = open_data_file( "jis0208" );
2457 while (<$INPUT>)
2459 next if /^\#/; # skip comments
2460 next if /^$/; # skip empty lines
2461 next if /\x1a/; # skip ^Z
2462 if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2464 add_mapping( 0x8080 + hex $1, hex $2 );
2465 next;
2467 die "Unrecognized line $_\n";
2469 close $INPUT;
2471 $INPUT = open_data_file( "jis0212" );
2472 while (<$INPUT>)
2474 next if /^\#/; # skip comments
2475 next if /^$/; # skip empty lines
2476 next if /\x1a/; # skip ^Z
2477 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2479 add_mapping( 0x8000 + hex $1, hex $2 );
2480 next;
2482 die "Unrecognized line $_\n";
2484 close $INPUT;
2486 output_codepage_file( 20932 );
2489 ################################################################
2490 # build Korean Wansung table from the KSX1001 file
2491 sub dump_krwansung_codepage(@)
2493 my @cp949 = @_;
2494 @cp2uni = ();
2495 @glyph2uni = ();
2496 @lead_bytes = ();
2497 @uni2cp = ();
2498 $default_char = 0x3f;
2499 $default_wchar = 0x003f;
2501 # ASCII and undefined chars
2502 foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
2503 add_mapping( 0xa0, 0xf8e6 );
2504 add_mapping( 0xad, 0xf8e7 );
2505 add_mapping( 0xae, 0xf8e8 );
2506 add_mapping( 0xaf, 0xf8e9 );
2507 add_mapping( 0xfe, 0xf8ea );
2508 add_mapping( 0xff, 0xf8eb );
2510 my $INPUT = open_data_file( "ksx1001" );
2511 while (<$INPUT>)
2513 next if /^\#/; # skip comments
2514 next if /^$/; # skip empty lines
2515 next if /\x1a/; # skip ^Z
2516 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2518 add_mapping( 0x8080 + hex $1, hex $2 );
2519 next;
2521 die "Unrecognized line $_\n";
2523 close $INPUT;
2525 # get some extra mappings from cp 949
2526 my @defined_lb;
2527 map { $defined_lb[$_] = 1; } @lead_bytes;
2528 foreach my $i (0x0000 .. 0xffff)
2530 next if ($i >= 0x1100 && $i <= 0x11ff); # range not used in 20949
2531 next unless defined $cp949[$i];
2532 if ($cp949[$i] >= 0xff)
2534 # only add chars for lead bytes that exist in 20949
2535 my $hi = $cp949[$i] >> 8;
2536 my $lo = $cp949[$i] & 0xff;
2537 next unless $defined_lb[$hi];
2538 next unless $lo >= 0xa1 && $lo <= 0xfe;
2540 add_mapping( $cp949[$i], $i );
2543 output_codepage_file( 20949 );
2547 ################################################################
2548 # dump an array of integers
2549 sub dump_array($$@)
2551 my ($bit_width, $default, @array) = @_;
2552 my $format = sprintf "0x%%0%ux", $bit_width / 4;
2553 my $i;
2554 my $ret = " ";
2555 for ($i = 0; $i < $#array; $i++)
2557 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2558 $ret .= (($i % 8) != 7) ? ", " : ",\n ";
2560 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2561 return $ret;
2565 ################################################################
2566 # dump an SBCS mapping table in binary format
2567 sub dump_binary_sbcs_table($)
2569 my $codepage = shift;
2571 my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2572 my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
2574 print OUTPUT pack "S<*", @header;
2575 print OUTPUT pack "C12", (0) x 12;
2576 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2578 if (@glyph2uni)
2580 print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
2582 else
2584 print OUTPUT pack "S<*", 0;
2587 print OUTPUT pack "S<*", 0, 0;
2589 print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2593 ################################################################
2594 # dump a DBCS mapping table in binary format
2595 sub dump_binary_dbcs_table($)
2597 my $codepage = shift;
2598 my @lb_ranges = get_lb_ranges();
2599 my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2601 my @offsets = (0) x 256;
2602 my $pos = 0;
2603 foreach my $i (@lead_bytes)
2605 $offsets[$i] = ($pos += 256);
2606 $cp2uni[$i] = 0;
2609 my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
2611 print OUTPUT pack "S<*", @header;
2612 print OUTPUT pack "C12", @lb_ranges, 0 x 12;
2613 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2614 print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
2616 foreach my $i (@lead_bytes)
2618 my $base = $i << 8;
2619 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
2622 print OUTPUT pack "S<", 4;
2623 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2627 ################################################################
2628 # get the list of defined lead byte ranges
2629 sub get_lb_ranges()
2631 my @list = ();
2632 my @ranges = ();
2634 foreach my $i (@lead_bytes) { $list[$i] = 1; }
2635 my $on = 0;
2636 for (my $i = 0; $i < 256; $i++)
2638 if ($on)
2640 if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
2642 else
2644 if ($list[$i]) { push @ranges, $i; $on = 1; }
2647 if ($on) { push @ranges, 0xff; }
2648 return @ranges;
2651 ################################################################
2652 # dump the Indic Syllabic Category table
2653 sub dump_indic($)
2655 my $filename = shift;
2656 my @indic_table;
2658 my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
2659 while (<$INPUT>)
2661 next if /^\#/; # skip comments
2662 next if /^\s*$/; # skip empty lines
2663 next if /\x1a/; # skip ^Z
2664 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2666 my $type = $2;
2667 die "unknown indic $type" unless defined $indic_types{$type};
2668 if (hex $1 < 65536)
2670 $indic_table[hex $1] = $indic_types{$type};
2672 next;
2674 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2676 my $type = $3;
2677 die "unknown indic $type" unless defined $indic_types{$type};
2678 if (hex $1 < 65536 and hex $2 < 65536)
2680 foreach my $i (hex $1 .. hex $2)
2682 $indic_table[$i] = $indic_types{$type};
2685 next;
2687 die "malformed line $_";
2689 close $INPUT;
2691 my $prev_data_file = $current_data_file;
2692 $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
2693 while (<$INPUT>)
2695 next if /^\#/; # skip comments
2696 next if /^\s*$/; # skip empty lines
2697 next if /\x1a/; # skip ^Z
2698 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2700 my $type = $2;
2701 die "unknown matra $type" unless defined $matra_types{$type};
2702 $indic_table[hex $1] |= $matra_types{$type} << 8;
2703 next;
2705 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2707 my $type = $3;
2708 die "unknown matra $type" unless defined $matra_types{$type};
2709 foreach my $i (hex $1 .. hex $2)
2711 $indic_table[$i] |= $matra_types{$type} << 8;
2713 next;
2715 die "malformed line $_";
2717 close $INPUT;
2719 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2720 print "Building $filename\n";
2721 print OUTPUT "/* Unicode Indic Syllabic Category */\n";
2722 print OUTPUT "/* generated from $prev_data_file */\n";
2723 print OUTPUT "/* and from $current_data_file */\n";
2724 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2725 print OUTPUT "#include \"windef.h\"\n\n";
2727 dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
2729 close OUTPUT;
2730 save_file($filename);
2733 ################################################################
2734 # dump the Line Break Properties table
2735 sub dump_linebreak($)
2737 my $filename = shift;
2738 my @break_table;
2740 my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
2741 while (<$INPUT>)
2743 next if /^\#/; # skip comments
2744 next if /^\s*$/; # skip empty lines
2745 next if /\x1a/; # skip ^Z
2746 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2748 my $type = $2;
2749 die "unknown breaktype $type" unless defined $break_types{$type};
2750 $break_table[hex $1] = $break_types{$type};
2751 next;
2753 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2755 my $type = $3;
2756 die "unknown breaktype $type" unless defined $break_types{$type};
2757 foreach my $i (hex $1 .. hex $2)
2759 $break_table[$i] = $break_types{$type};
2761 next;
2763 elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2765 my $type = $2;
2766 die "unknown breaktype $type" unless defined $break_types{$type};
2767 $break_table[hex $1] = $break_types{$type};
2768 next;
2770 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2772 my $type = $3;
2773 die "unknown breaktype $type" unless defined $break_types{$type};
2774 foreach my $i (hex $1 .. hex $2)
2776 $break_table[$i] = $break_types{$type};
2778 next;
2780 die "malformed line $_";
2782 close $INPUT;
2784 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2785 print "Building $filename\n";
2786 print OUTPUT "/* Unicode Line Break Properties */\n";
2787 print OUTPUT "/* generated from $current_data_file */\n";
2788 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2789 print OUTPUT "#include \"windef.h\"\n\n";
2791 dump_three_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
2793 close OUTPUT;
2794 save_file($filename);
2797 my %scripts =
2799 "Unknown" => 0,
2800 "Common" => 1,
2801 "Inherited" => 2,
2802 "Arabic" => 3,
2803 "Armenian" => 4,
2804 "Avestan" => 5,
2805 "Balinese" => 6,
2806 "Bamum" => 7,
2807 "Batak" => 8,
2808 "Bengali" => 9,
2809 "Bopomofo" => 10,
2810 "Brahmi" => 11,
2811 "Braille" => 12,
2812 "Buginese" => 13,
2813 "Buhid" => 14,
2814 "Canadian_Aboriginal" => 15,
2815 "Carian" => 16,
2816 "Cham" => 17,
2817 "Cherokee" => 18,
2818 "Coptic" => 19,
2819 "Cuneiform" => 20,
2820 "Cypriot" => 21,
2821 "Cyrillic" => 22,
2822 "Deseret" => 23,
2823 "Devanagari" => 24,
2824 "Egyptian_Hieroglyphs" => 25,
2825 "Ethiopic" => 26,
2826 "Georgian" => 27,
2827 "Glagolitic" => 28,
2828 "Gothic" => 29,
2829 "Greek" => 30,
2830 "Gujarati" => 31,
2831 "Gurmukhi" => 32,
2832 "Han" => 33,
2833 "Hangul" => 34,
2834 "Hanunoo" => 35,
2835 "Hebrew" => 36,
2836 "Hiragana" => 37,
2837 "Imperial_Aramaic" => 38,
2838 "Inscriptional_Pahlavi" => 39,
2839 "Inscriptional_Parthian" => 40,
2840 "Javanese" => 41,
2841 "Kaithi" => 42,
2842 "Kannada" => 43,
2843 "Katakana" => 44,
2844 "Kayah_Li" => 45,
2845 "Kharoshthi" => 46,
2846 "Khmer" => 47,
2847 "Lao" => 48,
2848 "Latin" => 49,
2849 "Lepcha" => 50,
2850 "Limbu" => 51,
2851 "Linear_B" => 52,
2852 "Lisu" => 53,
2853 "Lycian" => 54,
2854 "Lydian" => 55,
2855 "Malayalam" => 56,
2856 "Mandaic" => 57,
2857 "Meetei_Mayek" => 58,
2858 "Mongolian" => 59,
2859 "Myanmar" => 60,
2860 "New_Tai_Lue" => 61,
2861 "Nko" => 62,
2862 "Ogham" => 63,
2863 "Ol_Chiki" => 64,
2864 "Old_Italic" => 65,
2865 "Old_Persian" => 66,
2866 "Old_South_Arabian" => 67,
2867 "Old_Turkic" => 68,
2868 "Oriya" => 69,
2869 "Osmanya" => 70,
2870 "Phags_Pa" => 71,
2871 "Phoenician" => 72,
2872 "Rejang" => 73,
2873 "Runic" => 74,
2874 "Samaritan" => 75,
2875 "Saurashtra" => 76,
2876 "Shavian" => 77,
2877 "Sinhala" => 78,
2878 "Sundanese" => 79,
2879 "Syloti_Nagri" => 80,
2880 "Syriac" => 81,
2881 "Tagalog" => 82,
2882 "Tagbanwa" => 83,
2883 "Tai_Le" => 84,
2884 "Tai_Tham" => 85,
2885 "Tai_Viet" => 86,
2886 "Tamil" => 87,
2887 "Telugu" => 88,
2888 "Thaana" => 89,
2889 "Thai" => 90,
2890 "Tibetan" => 91,
2891 "Tifinagh" => 92,
2892 "Ugaritic" => 93,
2893 "Vai" => 94,
2894 "Yi" => 95,
2895 # Win8/Win8.1
2896 "Chakma" => 96,
2897 "Meroitic_Cursive" => 97,
2898 "Meroitic_Hieroglyphs" => 98,
2899 "Miao" => 99,
2900 "Sharada" => 100,
2901 "Sora_Sompeng" => 101,
2902 "Takri" => 102,
2903 # Win10
2904 "Bassa_Vah" => 103,
2905 "Caucasian_Albanian" => 104,
2906 "Duployan" => 105,
2907 "Elbasan" => 106,
2908 "Grantha" => 107,
2909 "Khojki" => 108,
2910 "Khudawadi" => 109,
2911 "Linear_A" => 110,
2912 "Mahajani" => 111,
2913 "Manichaean" => 112,
2914 "Mende_Kikakui" => 113,
2915 "Modi" => 114,
2916 "Mro" => 115,
2917 "Nabataean" => 116,
2918 "Old_North_Arabian" => 117,
2919 "Old_Permic" => 118,
2920 "Pahawh_Hmong" => 119,
2921 "Palmyrene" => 120,
2922 "Pau_Cin_Hau" => 121,
2923 "Psalter_Pahlavi" => 122,
2924 "Siddham" => 123,
2925 "Tirhuta" => 124,
2926 "Warang_Citi" => 125,
2927 # Win10 RS1
2928 "Adlam" => 126,
2929 "Ahom" => 127,
2930 "Anatolian_Hieroglyphs" => 128,
2931 "Bhaiksuki" => 129,
2932 "Hatran" => 130,
2933 "Marchen" => 131,
2934 "Multani" => 132,
2935 "Newa" => 133,
2936 "Old_Hungarian" => 134,
2937 "Osage" => 135,
2938 "SignWriting" => 136,
2939 "Tangut" => 137,
2940 # Win10 RS4
2941 "Masaram_Gondi" => 138,
2942 "Nushu" => 139,
2943 "Soyombo" => 140,
2944 "Zanabazar_Square" => 141,
2945 # Win10 1903
2946 "Dogra" => 142,
2947 "Gunjala_Gondi" => 143,
2948 "Hanifi_Rohingya" => 144,
2949 "Makasar" => 145,
2950 "Medefaidrin" => 146,
2951 "Old_Sogdian" => 147,
2952 "Sogdian" => 148,
2953 # Win10 2004
2954 "Elymaic" => 149,
2955 "Nyiakeng_Puachue_Hmong" => 150,
2956 "Nandinagari" => 151,
2957 "Wancho" => 152,
2958 # Win11
2959 "Chorasmian" => 153,
2960 "Dives_Akuru" => 154,
2961 "Khitan_Small_Script" => 155,
2962 "Yezidi" => 156,
2965 ################################################################
2966 # dump Script IDs table
2967 sub dump_scripts($)
2969 my $filename = shift;
2970 my $header = $filename;
2971 my @scripts_table;
2972 my $script_index;
2973 my $i;
2975 my $INPUT = open_data_file( "ucd", "Scripts.txt" );
2976 # Fill the table
2977 # Unknown script id is always 0, so undefined scripts are automatically treated as such
2978 while (<$INPUT>)
2980 my $type = "";
2982 next if /^\#/; # skip comments
2983 next if /^\s*$/; # skip empty lines
2984 next if /\x1a/; # skip ^Z
2985 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2987 $type = $2;
2988 if (defined $scripts{$type})
2990 $scripts_table[hex $1] = $scripts{$type};
2992 next;
2994 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2996 $type = $3;
2997 if (defined $scripts{$type})
2999 foreach my $i (hex $1 .. hex $2)
3001 $scripts_table[$i] = $scripts{$type};
3004 next;
3008 close $INPUT;
3010 $header = "$filename.h";
3011 open OUTPUT,">$header.new" or die "Cannot create $header";
3012 print "Building $header\n";
3013 print OUTPUT "/* Unicode Script IDs */\n";
3014 print OUTPUT "/* generated from $current_data_file */\n";
3015 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3017 print OUTPUT "enum unicode_script_id {\n";
3018 foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
3020 print OUTPUT " Script_$script = $scripts{$script},\n";
3022 print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
3023 print OUTPUT "};\n";
3025 close OUTPUT;
3026 save_file($header);
3028 $filename = "$filename.c";
3029 open OUTPUT,">$filename.new" or die "Cannot create $header";
3030 print "Building $filename\n";
3031 print OUTPUT "/* Unicode Script IDs */\n";
3032 print OUTPUT "/* generated from $current_data_file */\n";
3033 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3034 print OUTPUT "#include \"windef.h\"\n\n";
3036 dump_three_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
3037 close OUTPUT;
3038 save_file($filename);
3041 ################################################################
3042 # dump the BiDi mirroring table
3043 sub dump_mirroring($)
3045 my $filename = shift;
3046 my @mirror_table = ();
3048 my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
3049 while (<$INPUT>)
3051 next if /^\#/; # skip comments
3052 next if /^$/; # skip empty lines
3053 next if /\x1a/; # skip ^Z
3054 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
3056 $mirror_table[hex $1] = hex $2;
3057 next;
3059 die "malformed line $_";
3061 close $INPUT;
3063 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3064 print "Building $filename\n";
3065 print OUTPUT "/* Unicode BiDi mirroring */\n";
3066 print OUTPUT "/* generated from $current_data_file */\n";
3067 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3068 print OUTPUT "#include \"windef.h\"\n\n";
3069 dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
3070 close OUTPUT;
3071 save_file($filename);
3074 ################################################################
3075 # dump the Bidi Brackets
3076 sub dump_bracket($)
3078 my $filename = shift;
3079 my @bracket_table;
3081 my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
3082 while (<$INPUT>)
3084 next if /^\#/; # skip comments
3085 next if /^\s*$/; # skip empty lines
3086 next if /\x1a/; # skip ^Z
3087 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
3089 my $type = $3;
3090 die "unknown bracket $type" unless defined $bracket_types{$type};
3091 die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
3092 $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
3093 $bracket_table[hex $1] += $bracket_types{$type} << 8;
3094 next;
3096 die "malformed line $_";
3098 close $INPUT;
3100 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3101 print "Building $filename\n";
3102 print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
3103 print OUTPUT "/* generated from $current_data_file */\n";
3104 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3105 print OUTPUT "#include \"windef.h\"\n\n";
3107 dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
3109 close OUTPUT;
3110 save_file($filename);
3113 ################################################################
3114 # dump the Arabic shaping table
3115 sub dump_shaping($)
3117 my $filename = shift;
3118 my @joining_table = @initial_joining_table;
3120 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3121 while (<$INPUT>)
3123 next if /^\#/; # skip comments
3124 next if /^\s*$/; # skip empty lines
3125 next if /\x1a/; # skip ^Z
3126 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3128 my $type = $2;
3129 $joining_table[hex $1] = $joining_types{$type};
3130 next;
3132 die "malformed line $_";
3134 close $INPUT;
3136 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3137 print "Building $filename\n";
3138 print OUTPUT "/* Unicode Arabic shaping */\n";
3139 print OUTPUT "/* generated from $current_data_file */\n";
3140 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3141 print OUTPUT "#include \"windef.h\"\n\n";
3143 dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
3145 print OUTPUT "\nconst unsigned short wine_shaping_forms[256][4] =\n{\n";
3146 for (my $i = 0x600; $i <= 0x6ff; $i++)
3148 printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
3149 ${joining_forms{"isolated"}}[$i] || $i,
3150 ${joining_forms{"final"}}[$i] || $i,
3151 ${joining_forms{"initial"}}[$i] || $i,
3152 ${joining_forms{"medial"}}[$i] || $i;
3154 print OUTPUT "};\n";
3156 close OUTPUT;
3157 save_file($filename);
3160 ################################################################
3161 # dump the Arabic shaping table
3162 sub dump_arabic_shaping($)
3164 my $filename = shift;
3165 my @joining_table = @initial_joining_table;
3167 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3168 while (<$INPUT>)
3170 next if /^\#/; # skip comments
3171 next if /^\s*$/; # skip empty lines
3172 next if /\x1a/; # skip ^Z
3173 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3175 my $type = $2;
3176 my $group = $3;
3178 if ($group eq "ALAPH" || $group eq "DALATH RISH")
3180 $joining_table[hex $1] = $joining_types{$group};
3182 else
3184 $joining_table[hex $1] = $joining_types{$type};
3187 next;
3189 die "malformed line $_";
3191 close $INPUT;
3193 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3194 print "Building $filename\n";
3195 print OUTPUT "/* Unicode Arabic shaping */\n";
3196 print OUTPUT "/* generated from $current_data_file */\n";
3197 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3198 print OUTPUT "#include \"windef.h\"\n\n";
3200 dump_three_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
3202 close OUTPUT;
3203 save_file($filename);
3206 ################################################################
3207 # dump the Vertical Orientation table
3208 sub dump_vertical($$)
3210 my ($filename, $unix) = @_;
3211 my @vertical_table;
3213 my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
3214 while (<$INPUT>)
3216 next if /^\#/; # skip comments
3217 next if /^\s*$/; # skip empty lines
3218 next if /\x1a/; # skip ^Z
3219 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
3221 my $type = $2;
3222 die "unknown vertical $type" unless defined $vertical_types{$type};
3223 if (hex $1 < 65536)
3225 $vertical_table[hex $1] = $vertical_types{$type};
3227 next;
3229 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
3231 my $type = $3;
3232 die "unknown vertical $type" unless defined $vertical_types{$type};
3233 foreach my $i (hex $1 .. hex $2)
3235 $vertical_table[$i] = $vertical_types{$type};
3237 next;
3239 die "malformed line $_";
3241 close $INPUT;
3243 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3244 print "Building $filename\n";
3245 print OUTPUT "/* Unicode Vertical Orientation */\n";
3246 print OUTPUT "/* generated from $current_data_file */\n";
3247 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3248 if ($unix)
3250 print OUTPUT "#if 0\n";
3251 print OUTPUT "#pragma makedep unix\n";
3252 print OUTPUT "#endif\n\n";
3254 print OUTPUT "#include \"windef.h\"\n\n";
3256 dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
3258 close OUTPUT;
3259 save_file($filename);
3262 ################################################################
3263 # compress a mapping table by removing identical rows
3264 sub compress_array($$@)
3266 my $rows = shift;
3267 my $def = shift;
3268 my @table = @_;
3269 my $len = @table / $rows;
3270 my @array;
3271 my $data = "";
3273 # try to merge table rows
3274 for (my $row = 0; $row < $rows; $row++)
3276 my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
3277 my $pos = index $data, $rowtxt;
3278 if ($pos == -1)
3280 # check if the tail of the data can match the start of the new row
3281 my $first = substr( $rowtxt, 0, 1 );
3282 for (my $i = length($data) - 1; $i > 0; $i--)
3284 $pos = index( substr( $data, -$i ), $first );
3285 last if $pos == -1;
3286 $i -= $pos;
3287 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
3288 substr( $data, -$i ) = "";
3289 last;
3291 $pos = length $data;
3292 $data .= $rowtxt;
3294 $array[$row] = $rows + $pos;
3296 return @array, unpack "U*", $data;
3299 ################################################################
3300 # dump a char -> value mapping table using two-level tables
3301 sub dump_two_level_mapping($$$@)
3303 my $name = shift;
3304 my $def = shift;
3305 my $size = shift;
3306 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3307 my (@array, @row_array, @data, @row_data);
3308 (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
3309 (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
3311 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
3313 printf OUTPUT "const %s %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
3314 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
3315 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
3316 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data );
3319 ################################################################
3320 # dump a char -> value mapping table using three-level tables
3321 sub dump_three_level_mapping($$@)
3323 my $name = shift;
3324 my $def = shift;
3325 my $size = shift;
3326 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3327 my $level3 = ($MAX_CHAR + 1) / 16;
3328 my $level2 = $level3 / 16;
3329 my $level1 = $level2 / 16;
3330 my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
3331 my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
3332 my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
3334 for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
3335 for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
3337 printf OUTPUT "const %s %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
3338 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
3339 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
3340 printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
3341 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
3344 ################################################################
3345 # dump a binary case mapping table in l_intl.nls format
3346 sub dump_binary_case_table(@)
3348 my (@table) = @_;
3349 my @difftable;
3350 my @res;
3352 for (my $i = 0; $i < @table; $i++)
3354 next unless defined $table[$i];
3355 $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
3358 my (@low_array1, @low_array2, @low_data, @low_row_data);
3359 (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
3360 (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
3362 if (scalar @table > 0x10000)
3364 my (@high_array1, @high_array2, @high_data, @high_row_data);
3365 (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
3366 (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
3368 push @res, map { $_ + 1024; } @low_array1;
3369 push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
3370 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3371 push @res, @low_data;
3372 push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
3373 return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
3375 else
3377 push @res, @low_array1;
3378 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3379 push @res, @low_data;
3380 return pack "S<*", 1 + scalar @res, @res;
3384 ################################################################
3385 # dump case mappings for l_intl.nls
3386 sub dump_intl_nls($)
3388 my @upper_table = @toupper_table;
3389 my @lower_table = @tolower_table;
3390 remove_linguistic_mappings( \@upper_table, \@lower_table );
3392 my $upper = dump_binary_case_table( @upper_table[0..65535] );
3393 my $lower = dump_binary_case_table( @lower_table[0..65535] );
3395 my $filename = shift;
3396 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3397 printf "Building $filename\n";
3399 binmode OUTPUT;
3400 print OUTPUT pack "S<", 1; # version
3401 print OUTPUT $upper;
3402 print OUTPUT $lower;
3403 close OUTPUT;
3404 save_file($filename);
3408 ################################################################
3409 # dump the bidi direction table
3410 sub dump_bidi_dir_table($)
3412 my $filename = shift;
3413 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3414 printf "Building $filename\n";
3415 printf OUTPUT "/* Unicode BiDi direction table */\n";
3416 printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
3417 printf OUTPUT "#include \"windef.h\"\n\n";
3419 my @table;
3421 for (my $i = 0; $i < @direction_table; $i++)
3423 $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
3426 dump_three_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
3428 close OUTPUT;
3429 save_file($filename);
3433 sub rol($$)
3435 my ($byte, $count) = @_;
3436 return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
3439 ################################################################
3440 # compress the character properties table
3441 sub compress_char_props_table($@)
3443 my $rows = shift;
3444 my @table = @_;
3445 my $len = @table / $rows;
3446 my $pos = 0;
3447 my @array = (0) x $rows;
3448 my %sequences;
3450 # add some predefined sequences
3451 foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
3453 # try to merge table rows
3454 for (my $row = 0; $row < $rows; $row++)
3456 my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
3457 my $rowtxt = pack "L*", @table_row;
3458 if (defined($sequences{$rowtxt}))
3460 # reuse an existing row
3461 $array[$row] = $sequences{$rowtxt};
3463 else
3465 # create a new row
3466 $sequences{$rowtxt} = $array[$row] = ++$pos;
3467 push @array, @table_row;
3470 return @array;
3473 ################################################################
3474 # dump a normalization table in binary format
3475 sub dump_norm_table($)
3477 my $filename = shift;
3479 my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
3480 my %decomp = ( "nfc" => \@decomp_table,
3481 "nfd" => \@decomp_table,
3482 "nfkc" => \@decomp_compat_table,
3483 "nfkd" => \@decomp_compat_table ,
3484 "idna" => \@idna_decomp_table );
3486 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3487 print "Building $filename\n";
3489 my $type = $filename;
3490 $type =~ s!.*/norm(\w+)\.nls!$1!;
3492 my $compose = $forms{$type} & 1;
3493 my $compat = !!($forms{$type} & 4) + ($type eq "idna");
3495 my @version = split /\./, $UNIVERSION;
3497 # combining classes
3499 my @classes;
3500 my @class_values;
3502 foreach my $c (grep defined, @combining_class_table)
3504 $classes[$c] = 1 if $c < 0x100;
3506 for (my $i = 0; $i < @classes; $i++)
3508 next unless defined $classes[$i];
3509 $classes[$i] = @class_values;
3510 push @class_values, $i;
3512 push @class_values, 0 if (@class_values % 2);
3513 die "too many classes" if @class_values >= 0x40;
3515 # character properties
3517 my @char_props;
3518 my @decomposed;
3519 my @comp_hash_table;
3520 my $comp_hash_size = $compose ? 254 : 0;
3522 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3524 next unless defined $combining_class_table[$i];
3525 if (defined $decomp{$type}->[$i])
3527 my @dec = get_decomposition( $i, $decomp{$type} );
3528 if ($compose && (my @comp = get_composition( $i, $compat )))
3530 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
3531 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
3533 my $val = 0;
3534 foreach my $d (@dec)
3536 $val = $combining_class_table[$d];
3537 last if $val;
3539 $char_props[$i] = $classes[$val];
3541 else
3543 $char_props[$i] = 0xbf;
3545 @dec = compose_hangul( @dec ) if $compose;
3546 @dec = to_utf16( @dec );
3547 push @dec, 0 if @dec >= 7;
3548 $decomposed[$i] = \@dec;
3550 else
3552 if ($combining_class_table[$i] == 0x100)
3554 $char_props[$i] = 0x7f;
3556 elsif ($combining_class_table[$i])
3558 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
3560 elsif ($type eq "idna" && defined $idna_disallowed[$i])
3562 $char_props[$i] = 0xff;
3564 else
3566 $char_props[$i] = 0;
3571 if ($compose)
3573 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3575 my @comp = get_composition( $i, $compat );
3576 next unless @comp;
3577 if ($combining_class_table[$comp[1]])
3579 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
3580 $char_props[$comp[1]] |= 0x40;
3582 else
3584 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
3585 $char_props[$comp[1]] |= 0xc0;
3590 # surrogates
3591 foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
3592 foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
3594 # Hangul
3595 if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
3596 elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
3597 foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
3599 # invalid chars
3600 if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
3601 foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
3602 foreach my $i (0x00..0x10)
3604 $char_props[($i << 16) | 0xfffe] = 0xff;
3605 $char_props[($i << 16) | 0xffff] = 0xff;
3608 # decomposition hash table
3610 my @decomp_hash_table;
3611 my @decomp_hash_index;
3612 my @decomp_hash_data;
3613 my $decomp_hash_size = 944;
3615 # build string of character data, reusing substrings when possible
3616 my $decomp_char_data = "";
3617 foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
3619 my $str = pack "U*", @{$i};
3620 $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
3622 for (my $i = 0; $i < @decomposed; $i++)
3624 next unless defined $decomposed[$i];
3625 my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
3626 die "sequence not found" if $pos == -1;
3627 my $len = @{$decomposed[$i]};
3628 $len = 7 if $len > 7;
3629 my $hash = $i % $decomp_hash_size;
3630 push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
3632 for (my $i = 0; $i < $decomp_hash_size; $i++)
3634 $decomp_hash_index[$i] = @decomp_hash_data / 2;
3635 next unless defined $decomp_hash_table[$i];
3636 if (@{$decomp_hash_table[$i]} == 1)
3638 my $entry = $decomp_hash_table[$i]->[0];
3639 if ($char_props[$entry->[0]] == 0xbf)
3641 $decomp_hash_index[$i] = $entry->[1];
3642 next;
3645 foreach my $entry (@{$decomp_hash_table[$i]})
3647 push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
3650 push @decomp_hash_data, 0, 0;
3652 # composition hash table
3654 my @comp_hash_index;
3655 my @comp_hash_data;
3656 if (@comp_hash_table)
3658 for (my $i = 0; $i < $comp_hash_size; $i++)
3660 $comp_hash_index[$i] = @comp_hash_data;
3661 push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
3663 $comp_hash_index[$comp_hash_size] = @comp_hash_data;
3664 push @comp_hash_data, 0, 0, 0;
3667 my $level1 = ($MAX_CHAR + 1) / 128;
3668 my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
3670 my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
3671 0, $decomp_hash_size, $comp_hash_size, 0 );
3672 my @tables = (0) x 8;
3674 $tables[0] = 16 + @header + @tables;
3675 $tables[1] = $tables[0] + @class_values / 2;
3676 $tables[2] = $tables[1] + $level1 / 2;
3677 $tables[3] = $tables[2] + (@rows - $level1) / 2;
3678 $tables[4] = $tables[3] + @decomp_hash_index;
3679 $tables[5] = $tables[4] + @decomp_hash_data;
3680 $tables[6] = $tables[5] + length $decomp_char_data;
3681 $tables[7] = $tables[6] + @comp_hash_index;
3683 print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
3684 print OUTPUT pack "S<*", @header;
3685 print OUTPUT pack "S<*", @tables;
3686 print OUTPUT pack "C*", @class_values;
3688 print OUTPUT pack "C*", @rows[0..$level1-1];
3689 print OUTPUT pack "C*", @rows[$level1..$#rows];
3690 print OUTPUT pack "S<*", @decomp_hash_index;
3691 print OUTPUT pack "S<*", @decomp_hash_data;
3692 print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
3693 print OUTPUT pack "S<*", @comp_hash_index;
3694 print OUTPUT pack "S<*", @comp_hash_data;
3696 close OUTPUT;
3697 save_file($filename);
3699 add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
3703 ################################################################
3704 # output a codepage definition file from the global tables
3705 sub output_codepage_file($)
3707 my $codepage = shift;
3709 my $output = sprintf "nls/c_%03d.nls", $codepage;
3710 open OUTPUT,">$output.new" or die "Cannot create $output";
3712 printf "Building %s\n", $output;
3713 if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
3714 else { dump_binary_dbcs_table( $codepage ); }
3716 close OUTPUT;
3717 save_file($output);
3719 add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
3722 ################################################################
3723 # output a codepage table from a Microsoft-style mapping file
3724 sub dump_msdata_codepage($)
3726 my $filename = shift;
3728 my $state = "";
3729 my ($codepage, $width, $count);
3730 my ($lb_cur, $lb_end);
3732 @cp2uni = ();
3733 @glyph2uni = ();
3734 @lead_bytes = ();
3735 @uni2cp = ();
3736 $default_char = $DEF_CHAR;
3737 $default_wchar = $DEF_CHAR;
3739 my $INPUT = open_data_file( "codepages", $filename );
3741 while (<$INPUT>)
3743 next if /^;/; # skip comments
3744 next if /^\s*$/; # skip empty lines
3745 next if /\x1a/; # skip ^Z
3746 last if /^ENDCODEPAGE/;
3748 if (/^CODEPAGE\s+(\d+)/)
3750 $codepage = $1;
3751 next;
3753 if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
3755 $width = $1;
3756 $default_char = hex $2;
3757 $default_wchar = hex $3;
3758 next;
3760 if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
3762 $state = $1;
3763 $count = $2;
3764 next;
3766 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
3768 if ($state eq "MBTABLE")
3770 my $cp = hex $1;
3771 my $uni = hex $2;
3772 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3773 next;
3775 if ($state eq "GLYPHTABLE")
3777 my $cp = hex $1;
3778 my $uni = hex $2;
3779 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
3780 next;
3782 if ($state eq "WCTABLE")
3784 my $uni = hex $1;
3785 my $cp = hex $2;
3786 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
3787 next;
3789 if ($state eq "DBCSRANGE")
3791 my $start = hex $1;
3792 my $end = hex $2;
3793 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
3794 $lb_cur = $start;
3795 $lb_end = $end;
3796 next;
3798 if ($state eq "DBCSTABLE")
3800 my $mb = hex $1;
3801 my $uni = hex $2;
3802 my $cp = ($lb_cur << 8) | $mb;
3803 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3804 if (!--$count)
3806 if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
3808 next;
3811 die "$filename: Unrecognized line $_\n";
3813 close $INPUT;
3815 output_codepage_file( $codepage );
3817 if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3820 ################################################################
3821 # align a string length
3822 sub align_string($$)
3824 my ($align, $str) = @_;
3825 $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
3826 return $str;
3829 ################################################################
3830 # pad a string with zeros
3831 sub pad_string($$)
3833 my ($pad, $str) = @_;
3834 $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
3835 return $str;
3838 ################################################################
3839 # pack a GUID string
3840 sub pack_guid($)
3842 $_ = shift;
3843 /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
3844 return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
3847 ################################################################
3848 # comparison function for compression sort
3849 sub cmp_compression
3851 return scalar @{$a} <=> scalar @{$b} ||
3852 $a->[4] <=> $b->[4] ||
3853 $a->[5] <=> $b->[5] ||
3854 $a->[6] <=> $b->[6] ||
3855 $a->[7] <=> $b->[7] ||
3856 $a->[8] <=> $b->[8] ||
3857 $a->[9] <=> $b->[9] ||
3858 $a->[10] <=> $b->[10] ||
3859 $a->[11] <=> $b->[11] ||
3860 $a->[12] <=> $b->[12];
3863 ################################################################
3864 # build a binary sort keys table
3865 sub dump_sortkey_table($)
3867 my $filename = shift;
3868 my @keys;
3869 my ($part, $section, $subsection, $guid, $version, $ling_flag);
3870 my @multiple_weights;
3871 my @expansions;
3872 my @compressions;
3873 my %exceptions;
3874 my %guids;
3875 my %compr_flags;
3876 my %locales;
3877 my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
3878 my $jamostr = "";
3880 my $re_hex = '0x[0-9A-Fa-f]+';
3881 my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
3882 $guids{$default_guid} = { };
3884 my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
3886 my $KEYS = open_data_file( "sorting" );
3888 printf "Building $filename\n";
3890 while (<$KEYS>)
3892 s/\s*;.*$//;
3893 next if /^\s*$/; # skip empty lines
3894 if (/^\s*(SORTKEY|SORTTABLES)/)
3896 $part = $1;
3897 next;
3899 if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
3901 $part = $section = "";
3902 next;
3904 if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
3906 $section = $1;
3907 $guid = undef;
3908 next;
3910 next unless $part;
3911 if ("$part.$section" eq "SORTKEY.DEFAULT")
3913 if (/^\s*($re_hex)\s+$re_key/)
3915 $keys[hex $1] = [ split(/\s+/,$2) ];
3916 next;
3919 elsif ("$part.$section" eq "SORTTABLES.RELEASE")
3921 if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
3923 $version = hex $1;
3924 next;
3926 if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
3928 # ignore for now
3929 next;
3932 elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
3933 "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
3934 "$part.$section" eq "SORTTABLES.INVERSECASING")
3936 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
3938 $guid = lc $1;
3939 $guids{$guid} = { } unless defined $guids{$guid};
3940 $guids{$guid}->{flags} |= $flags{$section};
3941 next;
3943 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3945 $locales{$1} = $guid;
3946 next;
3949 elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
3951 if (/^\s*(\d+)\s+(\d+)/)
3953 push @multiple_weights, $1, $2;
3954 next;
3957 elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
3959 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3961 my $pos = scalar @expansions / 2;
3962 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
3963 push @expansions, hex $2, hex $3;
3964 next;
3967 elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
3969 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3971 $keys[hex $1] = $keys[hex $2];
3972 next;
3975 elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
3977 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
3979 if ($subsection || !$guid) # start a new one
3981 $guid = lc $1;
3982 $subsection = "";
3983 $guids{$guid} = { } unless defined $guids{$guid};
3984 $guids{$guid}->{flags} |= $flags{$2} if $2;
3985 $guids{$guid}->{compr} = @compressions;
3986 $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
3987 $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
3988 push @compressions, [ ];
3990 else # merge with current one
3992 $guids{lc $1} = { } unless defined $guids{lc $1};
3993 $guids{lc $1}->{flags} |= $flags{$2} if $2;
3994 $guids{lc $1}->{compr} = $guids{$guid}->{compr};
3995 $compr_flags{lc $1} = $compr_flags{$guid};
3997 next;
3999 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4001 $locales{$1} = $guid;
4002 next;
4004 if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
4006 $subsection = $1;
4007 next;
4009 if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
4011 my @comp = map { hex $_; } split(/\s+/,$1);
4012 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
4013 # add compression flags
4014 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
4015 next;
4018 elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
4020 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
4022 $guid = lc $1;
4023 $guids{$guid} = { } unless defined $guids{lc $1};
4024 $ling_flag = ($2 ? "+" : "-");
4025 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
4026 next;
4028 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4030 $locales{$1} = $guid;
4031 next;
4033 if (/^\s*($re_hex)\s+$re_key/)
4035 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
4036 next;
4039 elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
4041 if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
4043 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
4044 next;
4047 die "$current_data_file: $part.$section: unrecognized line $_\n";
4049 close $KEYS;
4051 # Sortkey table
4053 my $table;
4054 for (my $i = 0; $i < 0x10000; $i++)
4056 my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
4057 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4060 foreach my $id (sort keys %exceptions)
4062 my $pos = length($table) / 4;
4063 my @exc = @{$exceptions{$id}};
4064 my @filled;
4065 my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
4066 my $guid = substr( $id, 0, -1 );
4067 $guids{$guid}->{$key} = $pos;
4068 $pos += 0x100;
4069 my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
4070 for (my $j = 0; $j < 0x10000; $j++)
4072 next unless defined $exc[$j] || defined $flags[$j];
4073 $filled[$j >> 8] = 1;
4074 $j |= 0xff;
4076 for (my $j = 0; $j < 0x100; $j++)
4078 $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
4079 $pos += 0x100 if $filled[$j];
4081 for (my $j = 0; $j < 0x10000; $j++)
4083 next unless $filled[$j >> 8];
4084 my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
4085 $k[3] |= $flags[$j] || 0;
4086 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4090 # Case mapping tables
4092 # standard table
4093 my @casemaps;
4094 my @upper = @toupper_table;
4095 my @lower = @tolower_table;
4096 remove_linguistic_mappings( \@upper, \@lower );
4097 $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4099 # linguistic table
4100 $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
4102 # Turkish table
4103 @upper = @toupper_table;
4104 @lower = @tolower_table;
4105 $upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
4106 $lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
4107 $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4108 my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
4110 # Char type table
4112 my @table;
4113 my $types = "";
4114 my %typestr;
4115 for (my $i = 0; $i < 0x10000; $i++)
4117 my $str = pack "S<3",
4118 ($category_table[$i] || 0) & 0xffff,
4119 defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
4120 ($category_table[$i] || 0) >> 16;
4122 if (!defined($typestr{$str}))
4124 $typestr{$str} = length($types) / 6;
4125 $types .= $str;
4127 $table[$i] = $typestr{$str};
4130 my (@rows, @array, @data, @row_data);
4131 (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
4132 (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
4133 for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets
4134 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
4136 my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
4137 my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
4138 $chartypes = align_string( 8, $chartypes . $types . $arraystr );
4140 # Sort tables
4142 # guids
4143 my $sorttables = pack "L<2", $version, scalar %guids;
4144 foreach my $id (sort keys %guids)
4146 my %guid = %{$guids{$id}};
4147 my $flags = $guid{flags} || 0;
4148 my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
4149 $sorttables .= pack_guid($id) . pack "L<5",
4150 $flags,
4151 defined($guid{compr}) ? $guid{compr} : 0xffffffff,
4152 $guid{except} || 0,
4153 $guid{ling_except} || 0,
4154 $map / 2;
4157 # expansions
4158 $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
4160 # compressions
4161 $sorttables .= pack "L<", scalar @compressions;
4162 my $rowstr = "";
4163 foreach my $c (@compressions)
4165 my $pos = length($rowstr) / 2;
4166 my $min = 0xffff;
4167 my $max = 0;
4168 my @lengths = (0) x 8;
4169 foreach my $r (sort cmp_compression @{$c})
4171 my @row = @{$r};
4172 $lengths[scalar @row - 6]++;
4173 foreach my $val (@row[4..$#row])
4175 $min = $val if $min > $val;
4176 $max = $val if $max < $val;
4178 $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
4179 $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
4181 $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
4183 $sorttables .= $rowstr;
4185 # multiple weights
4186 $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
4188 # jamo sort
4189 $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
4191 # Locales
4193 add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
4194 foreach my $loc (sort keys %locales)
4196 # skip specific locales that match more general ones
4197 my @parts = split /[-_]/, $loc;
4198 next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
4199 next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
4200 add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
4203 # File header
4205 my @header;
4206 $header[0] = 16;
4207 $header[1] = $header[0] + length $table;
4208 $header[2] = $header[1] + length $casemaps;
4209 $header[3] = $header[2] + length $chartypes;
4211 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
4212 print OUTPUT pack "L<*", @header;
4213 print OUTPUT $table, $casemaps, $chartypes, $sorttables;
4214 close OUTPUT;
4215 save_file($filename);
4216 return $chartypes;
4220 my %lcnames;
4222 sub locale_parent($)
4224 my $loc = shift;
4226 return undef unless $loc;
4227 return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
4228 return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
4229 if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
4230 return "";
4233 sub compare_locales
4235 (my $n1 = $a) =~ tr/A-Z_/a-z-/;
4236 (my $n2 = $b) =~ tr/A-Z_/a-z-/;
4237 return $n1 cmp $n2;
4240 # query an xml key
4241 sub xml_query($$)
4243 my ($xml, $query) = @_;
4244 my $ret = $xml->find( $query );
4245 return undef unless $ret;
4246 printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
4247 return @{$ret}[0]->textContent;
4250 # query an xml key for a locale, with fallback to the parents
4251 sub loc_query($$)
4253 my ($loc, $query) = @_;
4255 $loc = $lcnames{"en-US"} unless $loc->{name}; # fallback to "en-US" for root locale
4257 for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
4259 next unless defined $lcnames{$cur};
4260 my $xml = $lcnames{$cur}->{xml};
4261 my $ret = $xml->find( $query );
4262 next unless $ret;
4263 printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
4264 next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; # "↑↑↑"
4265 return @{$ret}[0]->textContent;
4267 return undef;
4270 # retrieve a locale field entry by going up the parents tree
4271 sub locale_entry($$$)
4273 my ($loc, $field, $def) = @_;
4275 return $loc->{$field} if defined $loc->{$field};
4277 unless ($loc->{name}) # fallback to "en-US" for root locale
4279 $loc = $lcnames{"en-US"};
4280 return $loc->{$field} if defined $loc->{$field};
4282 while (defined $loc->{alias}) # resolve aliases
4284 $loc = $lcnames{$loc->{alias}};
4285 return $loc->{$field} if defined $loc->{$field};
4287 my $cur = $loc->{name};
4288 while ($cur)
4290 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
4292 $cur = $lcnames{$cur}->{sparent};
4294 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
4296 $cur = $1;
4298 else
4300 return $def;
4302 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
4304 return $def;
4307 my $string_data;
4309 sub add_str_data($)
4311 my $txt = shift;
4312 my $ret = index( $string_data, $txt );
4313 if ($ret == -1)
4315 $ret = length($string_data);
4316 $string_data .= $txt
4318 return $ret / 2;
4321 sub add_string($)
4323 my $str = shift;
4324 return 0 unless defined($str) && $str ne "";
4325 my $utf = encode( "UTF16LE", $str );
4326 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
4329 sub add_fontsig(@)
4331 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
4334 sub add_strarray(@)
4336 return 0 unless @_;
4337 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
4340 sub format_to_grouping($)
4342 my $format = shift;
4343 if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); }
4344 if ($format =~ /#,(#+0)/) { return chr(length($1)); }
4345 # printf STDERR "unknown format %s\n", $format;
4346 return chr(3);
4349 sub parse_currency_format($$)
4351 my $name = shift;
4352 my ($posfmt, $negfmt) = split /;/, shift;
4353 my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1
4354 "00[^\xa0]*\xa4", # 1.1$
4355 "\xa4.*\xa0.*#", # $ 1.1
4356 "00.*\xa0.*\xa4" ); # 1.1 $
4357 my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1)
4358 "-\xa4[^\xa0]*#", # -$1.1
4359 "\xa4[^\xa0]*-#", # $-1.1
4360 "\xa4[^\xa0]*#.*00-", # $1.1-
4361 "00[^\xa0]*\xa4\\)", # (1.1$)
4362 "-#.*00[^\xa0]*\xa4", # -1.1$
4363 "00-[^\xa0]*\xa4", # 1.1-$
4364 "00[^\xa0]*\xa4-", # 1.1$-
4365 "-#.*00.*\xa0.*\xa4", # -1.1 $
4366 "-\xa4.*\xa0.*#", # -$ 1.1
4367 "00.*\xa0.*\xa4-", # 1.1 $-
4368 "\xa4.*\xa0.*#.*00-", # $ 1.1-
4369 "\xa4.*\xa0.*-#", # $ -1.1
4370 "00-.*\xa0.*\xa4", # 1.1- $
4371 "\\(\xa4.*\xa0.*#", # ($ 1.1)
4372 "00.*\xa0.*\xa4\\)"); # (1.1 $)
4373 my ($pos, $neg);
4375 for ($pos = 0; $pos < @pospatterns; $pos++)
4377 last if ($posfmt =~ /$pospatterns[$pos]/);
4379 #printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns);
4380 $pos = 0 if ($pos == @pospatterns);
4382 if (defined $negfmt)
4384 for ($neg = 0; $neg < @negpatterns; $neg++)
4386 last if ($negfmt =~ /$negpatterns[$neg]/);
4388 #printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns);
4389 $neg = 0 if ($neg == @negpatterns);
4391 elsif ($pos == 0) { $neg = 1; }
4392 elsif ($pos == 1) { $neg = 5; }
4393 elsif ($pos == 2) { $neg = 9; }
4394 elsif ($pos == 3) { $neg = 8; }
4396 return ($pos, $neg);
4399 sub parse_percent_format($)
4401 my $fmt = shift;
4402 my @patterns = ( "0.+%", # 1 %
4403 "0%", # 1%
4404 "%#", # %1
4405 "%.+#" ); # % 1
4406 my $pos;
4407 for ($pos = 0; $pos < @patterns; $pos++)
4409 last if ($fmt =~ /$patterns[$pos]/);
4411 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
4412 return ($pos, ($pos == 3) ? 7 : $pos);
4415 sub convert_date_format($)
4417 my $fmt = shift;
4418 $fmt =~ s/G+/gg/;
4419 $fmt =~ s/LLLL/MMMM/;
4420 $fmt =~ s/LLL/MMM/;
4421 $fmt =~ s/E+/dddd/;
4422 $fmt =~ s/ccc+/dddd/;
4423 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
4424 $fmt =~ s/^y([^y])/yyyy$1/;
4425 $fmt =~ s/([^gy])y$/$1yyyy/;
4426 return $fmt;
4429 sub convert_time_format($)
4431 my $fmt = shift;
4432 $fmt =~ s/a+/tt/;
4433 $fmt =~ s/B+/tt/;
4434 $fmt =~ s/\x{202f}/ /;
4435 return $fmt;
4438 sub load_iso639()
4440 my %iso639;
4441 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3_$ISO639VERSION.tab" );
4442 while (<$DATA>)
4444 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
4446 close $DATA;
4447 return %iso639;
4451 ################################################################
4452 # build the locale table for locale.nls
4453 sub build_locale_data()
4455 my $base = "cldr-release-$CLDRVERSION";
4456 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" );
4457 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" );
4458 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" );
4459 # obsolete phone data from CLDR version 33
4460 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" );
4461 my %iso639 = load_iso639();
4462 $string_data = pack "S2", 0, 0; # offset 0 == empty string
4464 %lcnames = map { $_->{name} => $_ } @locales;
4466 my %lcids;
4467 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
4469 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
4471 # assign locale parents
4473 foreach my $loc (@locales)
4475 next if $loc->{name} eq "";
4476 next if defined $loc->{parent};
4477 (my $unix_name = $loc->{name}) =~ s/-/_/g;
4478 my $parent = xml_query( $suppl, "/supplementalData/parentLocales[not(\@component)]/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
4479 if ($parent)
4481 $parent =~ s/_/-/g;
4482 $parent = "" if $parent eq "root";
4484 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
4485 $loc->{parent} = $parent || "";
4488 # load per-locale XML files
4490 foreach my $loc (@locales)
4492 next if defined $loc->{alias};
4493 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
4494 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
4495 my $xml = load_xml_data_file( "cldr", $file );
4496 $loc->{xml} = $xml;
4497 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
4498 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
4499 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
4500 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
4501 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
4504 # assign a default territory and sort locale
4506 foreach my $loc (@locales)
4508 next if defined $loc->{alias};
4509 next if defined $loc->{territory};
4510 my $id = $loc->{sortlocale};
4511 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
4513 $loc->{territory} = $1;
4514 next;
4516 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
4517 if (@children == 1)
4519 $id = $children[0];
4521 else
4523 my $name = $loc->{file} || $loc->{name};
4524 $name =~ s/-(Arab|Beng|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
4525 $name =~ s/-/_/g;
4526 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
4527 $id =~ s/_/-/g if $id;
4529 if ($id =~ /[-_]([A-Z0-9]+)$/)
4531 $loc->{territory} = $1;
4532 next if defined $loc->{sortlocale};
4533 next unless $id =~ /^$loc->{name}/;
4534 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
4535 $loc->{sortlocale} = $id if defined $lcnames{$id};
4536 next;
4538 print STDERR "no territory found for $loc->{name}\n";
4541 # fill geoid table
4543 my %geotable;
4544 foreach my $geo (@geoids)
4546 my $name = $geo->{name};
4547 next unless defined $name;
4548 $geo->{alias} = $geotable{$name} if defined $geotable{$name};
4549 $geotable{$name} ||= $geo;
4551 foreach my $loc (@locales)
4553 next if defined $loc->{alias};
4554 my $territory = $loc->{territory};
4555 $geotable{$territory} ||= { name => $territory };
4557 foreach my $name (keys %geotable)
4559 my $geo = $geotable{$name};
4560 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
4561 if ($name =~ /\d+/)
4563 $geo->{uncode} = $name;
4564 next;
4566 $geo->{iso2} = $name;
4567 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
4568 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
4569 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
4570 $geo->{sintlsymbol} =~ s/XXX/XDR/;
4572 foreach my $geo (@geoids)
4574 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
4575 next if defined $geo->{iso2};
4576 next if defined $geo->{alias};
4577 next unless defined $geo->{uncode};
4578 my @contains;
4579 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
4580 push @contains, split /\s+/, $list if defined $list;
4581 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
4582 push @contains, split /\s+/, $list if defined $list;
4583 while (@contains)
4585 my $territory = pop @contains;
4586 if (defined $geotable{$territory})
4588 $geotable{$territory}->{parentid} ||= $geo->{id};
4590 elsif ($territory =~ /\d+/)
4592 # expand region recursively
4593 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
4594 push @contains, split /\s+/, $list if defined $list;
4599 # assign calendars to their locale
4601 foreach my $cal (@calendars)
4603 next unless defined $cal->{locale};
4604 my $loc = $lcnames{$cal->{locale}};
4605 $loc->{calendar} = [ ] unless defined $loc->{calendar};
4606 push @{$loc->{calendar}}, $cal;
4609 # assign default lcid to aliases
4611 foreach my $loc (@locales)
4613 next unless defined $loc->{alias};
4614 next if defined $loc->{lcid};
4615 my $alias = $loc->{alias};
4616 my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
4617 $loc->{lcid} = $lcid | 0x80000000;
4620 # assign sort aliases to parent locale
4622 foreach my $loc (@locales)
4624 next unless $loc->{name} =~ /_/;
4625 next unless defined $loc->{alias};
4626 my $alias = $loc->{alias};
4627 my $parent = $lcnames{$alias};
4628 my $basename = $parent->{name};
4629 while (1)
4631 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
4632 $alias = locale_parent( $alias );
4633 last unless $alias && defined $lcnames{$alias};
4634 $parent = $lcnames{$alias};
4635 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
4636 $parent->{sortbase} = $basename;
4640 # assign an array index to all locales
4642 my $idx = 0;
4643 foreach my $loc (@locales)
4645 next if defined $loc->{alias};
4646 $loc->{idx} = $idx++;
4648 foreach my $loc (@locales)
4650 my $alias = $loc->{alias};
4651 next unless defined $alias;
4652 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
4653 $loc->{idx} = $lcnames{$alias}->{idx};
4656 # output lcids table
4658 my $lcid_data = "";
4659 foreach my $id (sort { $a <=> $b } keys %lcids)
4661 my $loc = $lcids{$id};
4662 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
4665 # output lcnames table
4667 my $lcname_data = "";
4668 foreach my $name (sort compare_locales keys %lcnames)
4670 my $loc = $lcnames{$name};
4671 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
4674 # output locales array
4676 my $locale_data = "";
4677 my $default_lcid = 0x8001;
4678 foreach my $loc (@locales)
4680 next if defined $loc->{alias};
4681 my $sname = $loc->{name};
4682 my $language = $loc->{language};
4683 my $territory = $loc->{territory};
4684 my $script = $loc->{script};
4685 my $neutral = ($sname && $sname !~ /-$territory/);
4686 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
4687 my $unique_lcid = $loc->{lcid};
4688 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
4689 my $geo = $geotable{$territory};
4690 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
4692 # languages and scripts
4694 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
4695 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
4696 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
4697 (my $siso639langname = $sname) =~ s/-.*$//;
4698 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
4699 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
4700 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
4701 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
4702 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
4703 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
4704 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
4705 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
4706 $sengcountry =~ s/South Korea/Korea/;
4707 $sengcountry =~ s/T\xfcrkiye/Turkey/;
4708 $snativelangname ||= $senglanguage;
4709 $snativectryname ||= $sengcountry;
4710 if ($script)
4712 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4713 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4714 $senglanguage .= " ($engscript)" if $engscript;
4715 $snativelangname .= " ($nativescript)" if $nativescript;
4717 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
4718 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
4719 $sengdisplayname =~ s/\) \(/, /;
4720 $snativedisplayname =~ s/\) \(/, /;
4721 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
4722 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
4723 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
4724 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
4725 if ($charlayout eq "right-to-left")
4727 $ireadinglayout = 1;
4729 elsif ($charlayout eq "top-to-bottom")
4731 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
4732 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
4734 my $igeoid = $geo->{id} || 0;
4736 # numbers
4738 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
4739 my $slist = locale_entry( $loc, "slist", ";" );
4740 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
4741 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
4742 $sthousand =~ s/\x{202f}/\x{00a0}/;
4743 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
4744 my $spositivesign = "";
4745 my $snegativesign = "-";
4746 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
4747 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
4748 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
4749 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
4750 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
4751 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
4752 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern[not(\@alt)]" ) ||
4753 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern[not(\@alt)]" );
4754 my $smongrouping = format_to_grouping( $currencyformat );
4755 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
4756 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
4757 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
4758 my @snativedigits = split //, (locale_entry( $loc, "nativedigits", "" ) || xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" ));
4759 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
4760 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
4761 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
4763 # currencies
4765 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
4766 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
4767 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
4768 $scurrency ||= $geo->{sintlsymbol};
4769 $geo->{scurrency} = $scurrency if $scurrency;
4770 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
4771 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
4772 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
4773 $icurrdigits = 2 unless defined $icurrdigits;
4775 # calendars
4777 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
4778 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
4779 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
4780 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
4781 my (@sdayname, @sabbrevdayname, @sshortestdayname);
4782 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
4784 my $n = $days{$d};
4785 my %name;
4786 foreach my $type (qw(wide abbreviated short))
4788 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
4790 push @sdayname, $name{wide};
4791 push @sabbrevdayname, $name{abbreviated} || $name{wide};
4792 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
4794 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
4795 foreach my $n (1..13)
4797 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
4798 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4799 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
4800 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4801 push @smonthname, $name || $genitive || "";
4802 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
4803 push @sgenitivemonth, $genitive || "";
4804 push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
4806 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
4807 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
4808 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
4809 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
4810 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
4811 my $icalendartype;
4812 my @scalnames;
4813 foreach my $c (split /\s+/, $calpref)
4815 next unless defined $caltypes{$c};
4816 $icalendartype .= chr($caltypes{$c});
4817 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
4820 # date/time formats
4822 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
4823 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
4824 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
4825 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
4826 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
4827 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4828 pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
4829 @stimeformat = map convert_time_format($_), @stimeformat;
4830 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
4831 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
4832 pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
4833 @sshorttime = map convert_time_format($_), @sshorttime;
4834 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
4835 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
4836 @sshortdate = map convert_date_format($_), @sshortdate;
4837 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
4838 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
4839 @slongdate = map convert_date_format($_), @slongdate;
4840 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
4841 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
4842 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
4843 @smonthday = map convert_date_format($_), @smonthday;
4844 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
4845 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4846 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
4847 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
4848 $srelativelongdate = convert_date_format( $srelativelongdate );
4850 if (defined $loc->{calendar})
4852 foreach my $cal (@{$loc->{calendar}})
4854 $cal->{sshortdate} = \@sshortdate;
4855 $cal->{syearmonth} = \@syearmonth;
4856 $cal->{slongdate} = \@slongdate;
4857 $cal->{serastring} = [ $serastring ];
4858 $cal->{sdayname} = \@sdayname;
4859 $cal->{sabbrevdayname} = \@sabbrevdayname;
4860 $cal->{smonthname} = \@smonthname;
4861 $cal->{sabbrevmonthname} = \@sabbrevmonthname;
4862 $cal->{scalname} = $scalnames[$cal->{id}];
4863 $cal->{smonthday} = \@smonthday;
4864 $cal->{sshortestdayname} = \@sshortestdayname;
4865 $cal->{sabbreverastring} = [ $serastring ];
4866 $cal->{sshortestdayname} = \@sshortestdayname;
4867 $cal->{srelativelongdate} = $srelativelongdate;
4871 # codepages
4873 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
4874 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
4875 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
4876 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
4877 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
4878 1258 => 10000 );
4879 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
4880 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
4881 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
4882 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
4883 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
4884 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
4885 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
4886 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
4887 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
4888 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
4889 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
4890 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
4891 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
4892 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
4893 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
4894 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
4895 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
4896 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
4897 my $oemcp = locale_entry( $loc, "oemcp", 65001 );
4898 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
4899 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
4900 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
4901 my $ansicp = $ansicpmap{$oemcp} || $oemcp;
4902 my @fontsig = (0) x 8;
4903 my $sig = locale_entry( $loc, "fontsig", [] );
4904 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
4905 foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; }
4906 $fontsig[3] |= 1 << 31;
4907 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
4908 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
4910 # special cases for invariant locale
4912 unless ($loc->{name})
4914 $siso639langname = "iv";
4915 $siso639langname2 = "ivl";
4916 $senglanguage = $snativelangname = "Invariant Language";
4917 $sengcountry = $snativectryname = "Invariant Country";
4918 $sengdisplayname = "Invariant Language (Invariant Country)";
4919 $snativedisplayname = "Invariant Language (Invariant Region)";
4920 $sengcurrname = $snativecurrname = "International Monetary Fund";
4921 $scurrency = "\x{00a4}";
4922 $ifirstdayofweek = 0;
4923 $igeoid = $geotable{"US"}->{id};
4924 @stimeformat = ("HH:mm:ss");
4925 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
4926 @slongdate = ("dddd, dd MMMM yyyy");
4927 @syearmonth = ("yyyy MMMM");
4928 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
4929 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
4930 $srelativelongdate = "dddd, MMMM dd";
4931 $sposinfinity = "Infinity";
4932 $sneginfinity = "-Infinity";
4933 $spositivesign = "+";
4934 $ipospercent = $inegpercent = 0;
4937 # output data
4939 $locale_data .= pack "L<2",
4940 add_string( $sname ), # name
4941 add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG
4943 $locale_data .= pack "S<14",
4944 $loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE
4945 $unique_lcid, # unique_lcid
4946 locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS
4947 locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER
4948 $icurrdigits, # LOCALE_ICURRDIGITS
4949 $icurrency, # LOCALE_ICURRENCY
4950 $inegcurr, # LOCALE_INEGCURR
4951 locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO
4952 !$neutral, # LOCALE_INEUTRAL
4953 $ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK
4954 $firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR
4955 $geo->{dialcode} || 1 , # LOCALE_ICOUNTRY,
4956 $measure, # LOCALE_IMEASURE
4957 $digitsubstitution; # LOCALE_IDIGITSUBSTITUTION
4959 $locale_data .= pack "L<18",
4960 add_string( $sgrouping ), # LOCALE_SGROUPING
4961 add_string( $smongrouping ), # LOCALE_SMONGROUPING
4962 add_string( $slist ), # LOCALE_SLIST
4963 add_string( $sdecimal ), # LOCALE_SDECIMAL
4964 add_string( $sthousand ), # LOCALE_STHOUSAND
4965 add_string( $scurrency ), # LOCALE_SCURRENCY
4966 add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP
4967 add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP
4968 add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN
4969 add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN
4970 add_string( $s1159 ), # LOCALE_S1159
4971 add_string( $s2359 ), # LOCALE_S2359
4972 add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS
4973 add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT
4974 add_strarray( @sshortdate ), # LOCALE_SSHORTDATE
4975 add_strarray( @slongdate ), # LOCALE_SLONGDATE
4976 add_strarray( @syearmonth ), # LOCALE_SYEARMONTH
4977 add_strarray( @sduration ); # LOCALE_SDURATION
4979 $locale_data .= pack "S<8",
4980 $idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE
4981 $ansicp, # LOCALE_IDEFAULTANSICODEPAGE
4982 $oemcp, # LOCALE_IDEFAULTCODEPAGE
4983 $maccp, # LOCALE_IDEFAULTMACCODEPAGE
4984 $ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE
4985 $igeoid < 65536 ? $igeoid : 39070, # old_geoid
4986 $papersize ? 1 : 9, # LOCALE_IPAPERSIZE
4987 0; # FIXME # islamic_cal
4989 $locale_data .= pack "L<24",
4990 add_string( $icalendartype ), # LOCALE_ICALENDARTYPE
4991 add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME
4992 add_string( $siso639langname ), # LOCALE_SISO639LANGNAME
4993 add_string( $senglanguage ), # LOCALE_SENGLANGUAGE
4994 add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME
4995 add_string( $sengcountry ), # LOCALE_SENGCOUNTRY
4996 add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME
4997 add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME
4998 add_string( $territory ), # LOCALE_SISO3166CTRYNAME
4999 add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL
5000 add_string( $sengcurrname ), # LOCALE_SENGCURRNAME
5001 add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME
5002 add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE
5003 add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2
5004 add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2
5005 add_string( $sparent ), # LOCALE_SPARENT
5006 add_strarray( @sdayname ), # LOCALE_SDAYNAME
5007 add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME
5008 add_strarray( @smonthname ), # LOCALE_SMONTHNAME
5009 add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME
5010 add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH
5011 add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH
5012 add_strarray( @scalnames ), # LOCALE_SCALNAMES
5013 add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES
5015 $locale_data .= pack "S<6",
5016 $inegpercent, # LOCALE_INEGATIVEPERCENT
5017 $ipospercent, # LOCALE_IPOSITIVEPERCENT
5018 0, # unknown
5019 $ireadinglayout, # LOCALE_IREADINGLAYOUT
5020 0x2a, # unknown
5021 0x2a; # unknown
5023 $locale_data .= pack "L<24",
5024 0, # unknown
5025 add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME
5026 add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME
5027 add_string( $spercent ), # LOCALE_SPERCENT
5028 add_string( $snan ), # LOCALE_SNAN
5029 add_string( $sposinfinity ), # LOCALE_SPOSINFINITY
5030 add_string( $sneginfinity ), # LOCALE_SNEGINFINITY
5031 0, # unknown
5032 add_string( $serastring ), # CAL_SERASTRING
5033 add_string( $serastring ), # CAL_SABBREVERASTRING
5034 0, # unknown
5035 add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME
5036 add_strarray( @sshorttime ), # LOCALE_SSHORTTIME
5037 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5038 0, # unknown
5039 add_string( $ssortlocale ), # LOCALE_SSORTLOCALE
5040 add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL
5041 add_string( $sscripts ), # LOCALE_SSCRIPTS
5042 add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE
5043 $igeoid, # LOCALE_IGEOID
5044 add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM
5045 add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM
5046 add_strarray( @smonthday ), # LOCALE_SMONTHDAY
5047 add_string( "k0-windows-us" ) # FIXME # keyboard_layout
5050 # output language groups
5052 my %groups;
5053 add_registry_key( $nlskey, "Locale", "00000409" );
5054 foreach my $loc (@locales)
5056 next unless defined $loc->{lcid};
5057 next if ($loc->{lcid} & 0x80000000);
5058 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales
5059 my $group = locale_entry( $loc, "group", 1 );
5060 my $name = sprintf( "%08x", $loc->{lcid} );
5061 my $val = sprintf( "%x", $group );
5062 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
5063 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
5064 $groups{$val} = 1;
5066 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); }
5068 # output calendar data
5070 my $calendar_data = "";
5071 foreach my $cal (@calendars)
5073 my $scalname = $cal->{name};
5074 my $iyearoffsetrange = 0;
5075 my $itwodigityearmax = $cal->{itwodigityearmax};
5076 my @sshortdate;
5077 my @syearmonth;
5078 my @slongdate;
5079 my @serastring;
5080 my @sdayname;
5081 my @sabbrevdayname;
5082 my @smonthname;
5083 my @sabbrevmonthname;
5084 my @smonthday;
5085 my @sabbreverastring;
5086 my @sshortestdayname;
5088 my $type = $cal->{type};
5089 if (defined $cal->{locale} && defined $type)
5091 my $loc = $lcnames{$cal->{locale}};
5092 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
5093 push @sshortdate, $fmt if $fmt;
5094 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
5095 push @sshortdate, $fmt if $fmt;
5096 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
5097 push @sshortdate, $fmt if $fmt;
5098 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
5099 push @sshortdate, $fmt if $fmt;
5100 @sshortdate = map convert_date_format($_), @sshortdate;
5101 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
5102 push @slongdate, $fmt if $fmt;
5103 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
5104 push @slongdate, $fmt if $fmt;
5105 @slongdate = map convert_date_format($_), @slongdate;
5107 foreach my $n (1..13)
5109 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
5110 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
5111 push @smonthname, $name || "";
5112 push @sabbrevmonthname, $abbrev || $name || "";
5115 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
5116 if (defined $cal->{eras})
5118 my @eras;
5119 my $idx = 1;
5120 foreach my $era (@{$cal->{eras}})
5122 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
5123 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
5124 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
5125 if ($zero < 0)
5127 $first -= $zero;
5128 $year = 1;
5129 $itwodigityearmax = 2049 - $zero;
5131 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
5132 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
5133 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
5135 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
5139 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
5140 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
5141 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
5142 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
5143 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
5144 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
5145 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
5146 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
5147 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
5148 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
5149 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
5150 my $srelativelongdate = $cal->{srelativelongdate};
5152 @serastring = ("A.D.") unless @serastring;
5153 @sabbreverastring = ("AD") unless @sabbreverastring;
5155 if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead
5157 @sshortdate = ("") unless @sshortdate;
5158 @syearmonth = ("") unless @syearmonth;
5159 @slongdate = ("") unless @slongdate;
5160 @sdayname = ("") x 7 unless @sdayname;
5161 @sabbrevdayname = ("") x 7 unless @sabbrevdayname;
5162 @sshortestdayname = ("") x 7 unless @sshortestdayname;
5163 @smonthname = ("") x 13 unless @smonthname;
5164 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
5165 @smonthday = ("") unless @smonthday;
5168 $calendar_data .= pack "S<2L<17",
5169 $cal->{id}, # CAL_ICALINTVALUE
5170 $itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX
5171 add_strarray( @sshortdate ), # CAL_SSHORTDATE
5172 add_strarray( @syearmonth ), # CAL_SYEARMONTH
5173 add_strarray( @slongdate ), # CAL_SLONGDATE
5174 add_strarray( @serastring ), # CAL_SERASTRING
5175 $iyearoffsetrange, # CAL_IYEAROFFSETRANGE
5176 add_strarray( @sdayname ), # CAL_SDAYNAME
5177 add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME
5178 add_strarray( @smonthname ), # CAL_SMONTHNAME
5179 add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME
5180 add_string( $scalname ), # CAL_SCALNAME
5181 add_strarray( @smonthday ), # CAL_SMONTHDAY
5182 add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING
5183 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5184 add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE
5187 # output locale header
5189 my $nb_lcids = scalar keys %lcids;
5190 my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
5191 my $nb_lcnames = scalar keys %lcnames;
5192 my $locale_size = length($locale_data) / $nb_locales;
5193 my $nb_calendars = scalar @calendars;
5194 my $calendar_size = length($calendar_data) / $nb_calendars;
5195 my $lcids_offset = 19 * 4; # size of header
5196 my $lcnames_offset = $lcids_offset + length $lcid_data;
5197 my $locales_offset = $lcnames_offset + length $lcname_data;
5198 my $calendar_offset = $locales_offset + length $locale_data;
5199 my $strings_offset = $calendar_offset + length $calendar_data;
5201 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
5202 8, # offset
5204 7, # version
5205 0x5344534e, # magic
5206 0, 0, 0,
5208 $nb_lcids,
5209 $nb_locales,
5210 $locale_size,
5211 $locales_offset,
5212 $nb_lcnames,
5214 $lcids_offset,
5215 $lcnames_offset,
5217 $nb_calendars,
5218 $calendar_size,
5219 $calendar_offset,
5220 $strings_offset,
5221 0, 0;
5223 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
5227 ################################################################
5228 # build the charmaps table for locale.nls
5229 sub build_charmaps_data()
5231 my $data = "";
5233 # MAP_FOLDDIGITS
5234 my @digits = (ord('0') .. ord('9'));
5235 $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero
5236 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits
5237 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits
5238 @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals
5239 @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits
5240 @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers
5241 $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals
5242 $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals
5243 $data .= dump_binary_case_table( @digitmap_table );
5245 # CJK compatibility map
5246 $data .= dump_binary_case_table( @cjk_compat_table );
5248 # LCMAP_HIRAGANA/KATAKANA
5249 my (@hiragana_table, @katakana_table);
5250 foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
5252 $hiragana_table[$ch + 0x60] = $ch;
5253 $katakana_table[$ch] = $ch + 0x60;
5255 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
5257 # LCMAP_HALFWIDTH/FULLWIDTH
5258 $halfwidth_table[0x2018] = 0x0027;
5259 $halfwidth_table[0x2019] = 0x0027;
5260 $halfwidth_table[0x201c] = 0x0022;
5261 $halfwidth_table[0x201d] = 0x0022;
5262 $halfwidth_table[0x309b] = 0xff9e;
5263 $halfwidth_table[0x309c] = 0xff9f;
5264 $fullwidth_table[0x309b] = 0x3099;
5265 $fullwidth_table[0x309c] = 0x309a;
5266 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
5268 # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
5269 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
5271 # FIXME: some more unknown tables here
5273 return $data;
5277 ################################################################
5278 # build the geoids table for locale.nls
5279 sub build_geoids_data()
5281 my $data = "";
5282 my %index;
5283 my $idx = 0;
5284 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
5286 foreach my $geo (@geoids)
5288 my $id = $geo->{id};
5289 $geo = $geo->{alias} if defined $geo->{alias};
5290 my $lat = "0.000";
5291 my $long = "0.000";
5292 my $iso2 = $geo->{iso2} || "XX";
5293 my $iso3 = $geo->{iso3} || "XX";
5294 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
5295 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
5296 my $scurrency = $geo->{scurrency} || "\x{00a4}";
5298 $data .= pack( "L<", $id );
5299 $data .= pad_string( 24, encode( "UTF16LE", $lat ));
5300 $data .= pad_string( 24, encode( "UTF16LE", $long ));
5301 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
5302 $data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
5303 $data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
5304 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
5305 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
5306 $data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
5307 $index{$geo->{name}} = $idx if $geo->{name};
5308 $idx++;
5310 $index{"XX"} = $index{"001"};
5312 $geo_header[5] = $geo_header[3] + length $data;
5313 $geo_header[6] = scalar keys %index;
5315 foreach my $name (sort keys %index)
5317 $data .= pad_string( 8, encode( "UTF16LE", $name ));
5318 $data .= pack "L<", $index{$name};
5321 $geo_header[2] = $geo_header[3] + length $data;
5322 return pack( "L<7", @geo_header ) . $data;
5326 ################################################################
5327 # build a binary locale table
5328 sub dump_locales($$)
5330 my ($filename, $chartypes) = @_;
5332 printf "Building $filename\n";
5334 my $locale_data = build_locale_data();
5335 my $charmaps_data = build_charmaps_data();
5336 my $geoids_data = build_geoids_data();
5337 my $scripts_data = ""; # FIXME
5339 my @header = ( 0 ) x 8;
5340 $header[0] = 4 * scalar @header; # chartypes offset
5341 $header[4] = $header[0] + length $chartypes; # locales offset
5342 $header[5] = $header[4] + length $locale_data; # charmaps offset
5343 $header[6] = $header[5] + length $charmaps_data; # geoids offset
5344 $header[7] = $header[6] + length $geoids_data; # scripts offset
5346 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5347 print OUTPUT pack "L<*", @header;
5348 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
5349 close OUTPUT;
5350 save_file($filename);
5354 ################################################################
5355 # return the day of week of the first of the month
5356 sub month_first_dow($$)
5358 my ($year, $month) = @_;
5359 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year ));
5360 return $time[6];
5364 ################################################################
5365 # compare system time values
5366 sub compare_systime($$)
5368 my ($a, $b) = @_;
5369 return $a->[0] <=> $b->[0] ||
5370 $a->[1] <=> $b->[1] ||
5371 $a->[2] <=> $b->[2] ||
5372 $a->[3] <=> $b->[3] ||
5373 $a->[4] <=> $b->[4] ||
5374 $a->[5] <=> $b->[5] ||
5375 $a->[6] <=> $b->[6];
5379 ################################################################
5380 # compare the zone transition date with the rule date
5381 sub compare_transition_date($$$$)
5383 my ($stdoff, $isdst, $zone, $rule) = @_;
5385 if (scalar @{$zone} <= 1)
5387 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1;
5390 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 );
5391 return compare_systime( \@date, $rule );
5395 ################################################################
5396 # get the Windows zone names from the CLDR data
5397 sub load_windows_zones()
5399 my $current_name;
5400 my %names;
5401 my $base = "cldr-release-$CLDRVERSION";
5402 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" );
5403 while (<$INPUT>)
5405 if (/<!-- +(\(UTC.*) -->.*/)
5407 $current_name = $1;
5409 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/)
5411 $names{$1} = [ $current_name, $2 ];
5414 close $INPUT;
5415 return %names;
5419 ################################################################
5420 # parse a transition date specification from the tzdata files
5421 sub parse_transition_date($$@)
5423 use integer;
5424 my ($stdoff, $isdst, $year, $in, $on, $at) = @_;
5426 $on = "1" unless defined $on;
5427 $at = "0" unless defined $at;
5429 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
5430 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 );
5431 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 );
5433 my $mon = $in ? $months{$in} : 1;
5434 my ($week, $dow, $flag, $time, $sec);
5435 my $first = month_first_dow( $year, $mon );
5437 if ($on =~ /^last(.*)$/)
5439 $week = 5;
5440 $dow = $days{$1};
5442 elsif ($on =~ /^(.*)>=(\d+)$/)
5444 $dow = $days{$1};
5445 my $diff = ($first + 6 - $dow) % 7;
5446 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7;
5448 elsif ($on =~ /^(.*)<=(\d+)$/)
5450 $dow = $days{$1};
5451 my $diff = ($first + $2 + 6 - $dow) % 7;
5452 $week = ($2 + 6 - $diff) / 7;
5453 if (!$week)
5455 $week = 5;
5456 if (!--$mon) { $mon = 12; $year--; }
5459 elsif ($on =~ /^\d+$/)
5461 $dow = ($first + $on - 1) % 7;
5462 $week = $on >= 25 ? 5 : ($on + 6) / 7;
5464 else
5466 die "unsupported date specification $year $in $on $at";
5469 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/)
5471 $time = $1 * 60 + $2;
5472 $sec = $3;
5473 $flag = $4;
5475 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/)
5477 $time = $1 * 60 + $2;
5478 $flag = $3;
5480 elsif ($at =~ /^(\d+)([uws]?)$/)
5482 $time = $1 * 60;
5483 $flag = $2;
5485 else
5487 die "unsupported time specification $year $in $on $at";
5490 $flag ||= "w";
5491 $time -= $stdoff if $flag eq "u";
5492 $time += 60 if !$isdst && $flag ne "w";
5494 if ($time < 0) # previous day
5496 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon );
5497 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 );
5498 if (!$week)
5500 $week = 5;
5501 if (!--$mon) { $mon = 12; $year--; }
5503 $dow = ($dow + 6) % 7;
5504 $time += 24 * 60;
5507 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0);
5511 ################################################################
5512 # parse a system time value as a SYSTEMTIME structure
5513 sub pack_systime(@)
5515 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_;
5516 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999);
5520 ################################################################
5521 # parse a timezone offset from the tzdata files
5522 sub parse_tz_offset($)
5524 my ($hour, $min) = split /:/, shift;
5525 $min ||= 0;
5526 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min; # invert sign
5530 ################################################################
5531 # build the timezone data
5532 sub dump_timezones($@)
5534 my $filename = shift;
5535 my $FIRST_YEAR = 2000;
5536 my $LAST_YEAR = 2030;
5538 my %names = load_windows_zones();
5539 my %zones;
5540 my %rules;
5541 my %links;
5542 my %res_indices;
5544 printf "Building $filename\n";
5546 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5547 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
5548 print OUTPUT "#include \"winresrc.h\"\n\n";
5549 print OUTPUT "#pragma makedep po\n\n";
5550 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n";
5551 print OUTPUT "STRINGTABLE\n{\n";
5553 # load tzdata files
5555 foreach my $filename (@_)
5557 my $FILE = open_data_file( "tzdata", $filename );
5558 my $zonename;
5559 while (<$FILE>)
5561 chomp;
5562 s/\#.*$//;
5563 next if /^\s*$/;
5564 my @fields = split /\s+/;
5565 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq ""))
5567 shift @fields;
5568 $zonename = shift @fields unless $zonename;
5569 my ($stdoff, $rules, $dummy, @date) = @fields;
5570 $zones{$zonename} ||= [ ];
5571 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ];
5572 $zonename = undef unless @date; # last entry doesn't have an until date
5573 next;
5575 if ($fields[0] eq "Rule")
5577 shift @fields;
5578 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields;
5579 $to = $from if $to eq "only";
5580 $to = $LAST_YEAR if $to eq "max";
5581 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ];
5582 next;
5584 if ($fields[0] eq "Link")
5586 $links{$fields[2]} = $fields[1];
5587 next;
5589 die "unrecognized line $_";
5591 close $FILE;
5594 foreach my $name (sort { uc($a) cmp uc($b) } keys %names)
5596 my ($display, $zone) = @{$names{$name}};
5597 $zone = $links{$zone} if defined $links{$zone};
5599 # build list of transitions
5601 my @transitions;
5602 my @from_date = ( 1 );
5603 my $last_stdoff = 0;
5604 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++)
5606 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]};
5607 my $isdst = ($last_stdoff != $stdoff);
5608 $from_date[0] ||= $LAST_YEAR;
5609 my @systime = parse_transition_date( $stdoff, $isdst, @from_date );
5610 push @transitions, [ $stdoff, -1, \@systime ];
5612 if (defined $rules{$rule})
5614 foreach my $r (@{$rules{$rule}})
5616 my ($offset, $from, $to, $in, $on, $at) = @{$r};
5617 foreach my $year ($from..$to)
5619 next if $year < $from_date[0];
5620 next if $until_date[0] && $year > $until_date[0];
5621 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at );
5622 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0;
5623 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime );
5624 next if $ret > 0;
5625 pop @transitions if !$ret; # remove transition if there's a dst change at the same time
5626 push @transitions, [ $stdoff, $offset, \@systime ];
5630 @from_date = @until_date;
5631 $last_stdoff = $stdoff;
5633 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions;
5635 # build per-year dynamic info
5637 my @info;
5638 my $last_dstoff = 0;
5639 my $last_dst = 0;
5640 my $year = $FIRST_YEAR;
5641 while ($year <= $LAST_YEAR)
5643 if (@transitions && $transitions[0]->[2]->[0] < $year)
5645 $last_stdoff = $transitions[0]->[0];
5646 shift @transitions;
5647 next;
5649 my ($std, $dst, @trans);
5650 my $cur_stdoff = $last_stdoff;
5651 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60;
5652 while (@transitions && $transitions[0]->[2]->[0] == $year)
5654 my $t = shift @transitions;
5655 my ($stdoff, $dstoff, $systime) = @{$t};
5656 $systime = pack_systime( @{$systime} );
5657 if (!$dstoff) # std
5659 $cur_stdoff = $stdoff unless $std;
5660 $std = $systime;
5662 elsif ($dstoff != -1) # dst
5664 $cur_dstoff = $dstoff unless $dst;
5665 $dst ||= $systime;
5667 elsif ($stdoff != $last_stdoff) # rule transition
5669 # Handle a special case: Samoa moved to the other side of
5670 # the date line between 2011-12-03 and 2012-01-01,
5671 # entirely skipping the day 2011-12-31. We ignore this
5672 # change because it happens on a year boundary and more
5673 # importantly it would generate on offset of -25 hours,
5674 # which some programs (e.g., Mono) do not like. See
5675 # https://bugs.winehq.org/show_bug.cgi?id=51758
5677 if ($last_stdoff - $stdoff < 24 * 60)
5679 @trans = ($last_stdoff, $stdoff, $systime);
5680 $cur_stdoff = $stdoff;
5683 elsif ($dst) # rule transition with no stdoff change
5685 $std = $systime;
5687 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff;
5689 $last_stdoff = $cur_stdoff;
5691 if ($cur_dstoff > 0) # swap std and dst to ensure that offset is negative
5693 ($std, $dst) = ($dst, $std);
5694 $cur_stdoff += $cur_dstoff;
5695 $cur_dstoff = -$cur_dstoff;
5698 if (@trans)
5700 # heuristic to prefer switching dst
5701 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1]))
5703 $dst ||= $trans[2];
5704 $cur_stdoff = $trans[0];
5705 $cur_dstoff = $trans[1] - $trans[0];
5707 else
5709 $std ||= $trans[2];
5710 $cur_stdoff = $trans[1];
5711 $cur_dstoff = $trans[0] - $trans[1];
5715 if ($std || $dst)
5717 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5718 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5719 $last_dst = $year;
5721 else
5723 $std = pack "S<8", 0;
5724 $dst = pack "S<8", 0;
5725 $cur_stdoff += $last_dstoff;
5727 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst;
5730 # output registry keys
5732 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name;
5733 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r;
5734 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4;
5735 $res_idx += 16 while exists $res_indices{$res_idx};
5736 $res_indices{$res_idx} = 1;
5738 add_registry_string_value( $zonekey, $name, "Display", $display );
5739 add_registry_string_value( $zonekey, $name, "Std", $std_name );
5740 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name );
5741 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx ));
5742 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 ));
5743 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 ));
5744 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] );
5746 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name;
5747 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name;
5748 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display;
5750 my $first_year = $FIRST_YEAR;
5751 my $last_year = $LAST_YEAR;
5752 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1];
5753 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year];
5755 next if $last_year <= $first_year;
5757 foreach my $i ($first_year..$last_year)
5759 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] );
5761 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year );
5762 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year );
5765 print OUTPUT "}\n";
5766 close OUTPUT;
5767 save_file($filename);
5771 ################################################################
5772 # build the script to create registry keys
5773 sub dump_registry_script($%)
5775 my ($filename, %keys) = @_;
5776 my $indent = 1;
5777 my @prev;
5779 printf "Building %s\n", $filename;
5780 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5781 print OUTPUT "HKLM\n{\n";
5782 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys)
5784 my @subkeys = split /\\/, $k;
5785 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; }
5786 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5787 my ($def, @vals) = @{$keys{$k}};
5788 for (my $i = 0; $i < @subkeys; $i++)
5790 my $name = $subkeys[$i];
5791 my $prefix = "";
5792 if ($name =~ /^-/)
5794 $name =~ s/^-//;
5795 $prefix = "NoRemove ";
5797 if ($name =~ /\s/)
5799 $name = "'$name'";
5801 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name,
5802 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
5803 $indent++;
5805 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
5806 @prev = split /\\/, $k;
5808 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5809 printf OUTPUT "}\n";
5810 close OUTPUT;
5811 save_file($filename);
5815 ################################################################
5816 # save a file if modified
5817 sub save_file($)
5819 my $file = shift;
5820 if (-f $file && !system "cmp $file $file.new >/dev/null")
5822 unlink "$file.new";
5824 else
5826 rename "$file.new", "$file";
5831 ################################################################
5832 # main routine
5834 chdir ".." if -f "./make_unicode";
5835 load_data();
5836 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
5837 dump_bidi_dir_table( "dlls/dwrite/direction.c" );
5838 dump_bidi_dir_table( "dlls/wineps.drv/direction.c" );
5839 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
5840 dump_mirroring( "dlls/dwrite/mirror.c" );
5841 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
5842 dump_bracket( "dlls/dwrite/bracket.c" );
5843 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
5844 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
5845 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
5846 dump_linebreak( "dlls/dwrite/linebreak.c" );
5847 dump_scripts( "dlls/dwrite/scripts" );
5848 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
5849 dump_vertical( "dlls/win32u/vertical.c", 1 );
5850 dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
5851 dump_intl_nls("nls/l_intl.nls");
5852 dump_norm_table( "nls/normnfc.nls" );
5853 dump_norm_table( "nls/normnfd.nls" );
5854 dump_norm_table( "nls/normnfkc.nls" );
5855 dump_norm_table( "nls/normnfkd.nls" );
5856 dump_norm_table( "nls/normidna.nls" );
5857 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" );
5858 dump_locales( "nls/locale.nls", $chartypes );
5859 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
5860 dump_eucjp_codepage();
5861 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files );
5862 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
5864 exit 0;
5866 # Local Variables:
5867 # compile-command: "./make_unicode"
5868 # End: