msi: Make TransformView_Create static.
[wine.git] / tools / make_unicode
blob4dde6c010ee7e37820e91f8376cf9f9bd4c91376
1 #!/usr/bin/perl -w
3 # Generate code page .c files from ftp.unicode.org descriptions
5 # Copyright 2000 Alexandre Julliard
7 # This library is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU Lesser General Public
9 # License as published by the Free Software Foundation; either
10 # version 2.1 of the License, or (at your option) any later version.
12 # This library is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # Lesser General Public License for more details.
17 # You should have received a copy of the GNU Lesser General Public
18 # License along with this library; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 use strict;
23 use XML::LibXML;
24 use Digest::SHA;
25 use Encode;
26 use Time::Local qw(timegm_modern);
28 my $UNIVERSION = "15.0.0";
29 my $CLDRVERSION = "43";
30 my $ISO639VERSION = "20230123";
31 my $TZVERSION = "2023c";
33 my %data_files =
35 ucd => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
36 sha => "5fbde400f3e687d25cc9b0a8d30d7619e76cb2f4c3e85ba9df8ec1312cb6718c" },
37 unihan => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
38 sha => "24b154691fc97cb44267b925d62064297086b3f896b57a8181c7b6d42702a026" },
39 idna => { url => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
40 sha => "cc8522199541d60326a42a8f91f8748fd15630a42502dd2cf4878e81e2066ead" },
41 cldr => { url => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
42 sha => "132cdd24e479abb6e86db1429931cec3dada485fd41da39ece3c08e531c477df" },
43 cldr33 => { url => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
44 sha => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
45 sorting => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
46 sha => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
47 codepages => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
48 sha => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
49 iso639 => { url => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50 sha => "884faa6cc5ac5181ed7969eed75355c1bc665447614cf4c06c62e87b38fe6a97" },
51 ksx1001 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
52 sha => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
53 jis0208 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
54 sha => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
55 jis0212 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
56 sha => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
57 tzdata => { url => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
58 sha => "3f510b5d1b4ae9bb38e485aa302a776b317fb3637bdb6404c4adf7b6cadd965c" },
62 # Default char for undefined mappings
63 my $DEF_CHAR = ord '?';
65 # Last valid Unicode character
66 my $MAX_CHAR = 0x10ffff;
68 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
69 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
71 my @allfiles =
73 "CodpageFiles/037.txt",
74 "CodpageFiles/437.txt",
75 "CodpageFiles/500.txt",
76 "CodpageFiles/708.txt",
77 "CodpageFiles/720.txt",
78 "CodpageFiles/737.txt",
79 "CodpageFiles/775.txt",
80 "CodpageFiles/850.txt",
81 "CodpageFiles/852.txt",
82 "CodpageFiles/855.txt",
83 "CodpageFiles/857.txt",
84 "CodpageFiles/860.txt",
85 "CodpageFiles/861.txt",
86 "CodpageFiles/862.txt",
87 "CodpageFiles/863.txt",
88 "CodpageFiles/864.txt",
89 "CodpageFiles/865.txt",
90 "CodpageFiles/866.txt",
91 "CodpageFiles/869.txt",
92 "CodpageFiles/874.txt",
93 "CodpageFiles/875.txt",
94 "CodpageFiles/932.txt",
95 "CodpageFiles/936.txt",
96 "CodpageFiles/949.txt",
97 "CodpageFiles/950.txt",
98 "CodpageFiles/1026.txt",
99 "CodpageFiles/1250.txt",
100 "CodpageFiles/1251.txt",
101 "CodpageFiles/1252.txt",
102 "CodpageFiles/1253.txt",
103 "CodpageFiles/1254.txt",
104 "CodpageFiles/1255.txt",
105 "CodpageFiles/1256.txt",
106 "CodpageFiles/1257.txt",
107 "CodpageFiles/1258.txt",
108 "CodpageFiles/1361.txt",
109 "CodpageFiles/10000.txt",
110 "CodpageFiles/10001.txt",
111 "CodpageFiles/10002.txt",
112 "CodpageFiles/10003.txt",
113 "CodpageFiles/10004.txt",
114 "CodpageFiles/10005.txt",
115 "CodpageFiles/10006.txt",
116 "CodpageFiles/10007.txt",
117 "CodpageFiles/10008.txt",
118 "CodpageFiles/10010.txt",
119 "CodpageFiles/10017.txt",
120 "CodpageFiles/10021.txt",
121 "CodpageFiles/10029.txt",
122 "CodpageFiles/10079.txt",
123 "CodpageFiles/10081.txt",
124 "CodpageFiles/10082.txt",
125 "CodpageFiles/20127.txt",
126 "CodpageFiles/20866.txt",
127 "CodpageFiles/21866.txt",
128 "CodpageFiles/28591.txt",
129 "CodpageFiles/28592.txt",
130 "CodpageFiles/28593.txt",
131 "CodpageFiles/28594.txt",
132 "CodpageFiles/28595.txt",
133 "CodpageFiles/28596.txt",
134 "CodpageFiles/28597.txt",
135 "CodpageFiles/28598.txt",
136 "CodpageFiles/28599.txt",
137 "CodpageFiles/28603.txt",
138 "CodpageFiles/28605.txt",
141 my @timezone_files = qw(africa antarctica asia australasia europe northamerica southamerica etcetera backward);
143 my %ctype =
145 # CT_CTYPE1
146 "upper" => 0x0001,
147 "lower" => 0x0002,
148 "digit" => 0x0004,
149 "space" => 0x0008,
150 "punct" => 0x0010,
151 "cntrl" => 0x0020,
152 "blank" => 0x0040,
153 "xdigit" => 0x0080,
154 "alpha" => 0x0100 | 0x80000000,
155 "defin" => 0x0200,
156 # CT_CTYPE3 in high 16 bits
157 "nonspacing" => 0x00010000,
158 "diacritic" => 0x00020000,
159 "vowelmark" => 0x00040000,
160 "symbol" => 0x00080000,
161 "katakana" => 0x00100000,
162 "hiragana" => 0x00200000,
163 "halfwidth" => 0x00400000,
164 "fullwidth" => 0x00800000,
165 "ideograph" => 0x01000000,
166 "kashida" => 0x02000000,
167 "lexical" => 0x04000000,
168 "highsurrogate" => 0x08000000,
169 "lowsurrogate" => 0x10000000,
172 my %bracket_types =
174 "o" => 0x0000,
175 "c" => 0x0001,
178 my %indic_types =
180 "Other" => 0x0000,
181 "Bindu" => 0x0001,
182 "Visarga" => 0x0002,
183 "Avagraha" => 0x0003,
184 "Nukta" => 0x0004,
185 "Virama" => 0x0005,
186 "Vowel_Independent" => 0x0006,
187 "Vowel_Dependent" => 0x0007,
188 "Vowel" => 0x0008,
189 "Consonant_Placeholder" => 0x0009,
190 "Consonant" => 0x000a,
191 "Consonant_Dead" => 0x000b,
192 "Consonant_Succeeding_Repha" => 0x000c,
193 "Consonant_Subjoined" => 0x000d,
194 "Consonant_Medial" => 0x000e,
195 "Consonant_Final" => 0x000f,
196 "Consonant_Head_Letter" => 0x0010,
197 "Modifying_Letter" => 0x0011,
198 "Tone_Letter" => 0x0012,
199 "Tone_Mark" => 0x0013,
200 "Register_Shifter" => 0x0014,
201 "Consonant_Preceding_Repha" => 0x0015,
202 "Pure_Killer" => 0x0016,
203 "Invisible_Stacker" => 0x0017,
204 "Gemination_Mark" => 0x0018,
205 "Cantillation_Mark" => 0x0019,
206 "Non_Joiner" => 0x001a,
207 "Joiner" => 0x001b,
208 "Number_Joiner" => 0x001c,
209 "Number" => 0x001d,
210 "Brahmi_Joining_Number" => 0x001e,
211 "Consonant_With_Stacker" => 0x001f,
212 "Consonant_Prefixed" => 0x0020,
213 "Syllable_Modifier" => 0x0021,
214 "Consonant_Killer" => 0x0022,
215 "Consonant_Initial_Postfixed" => 0x0023,
218 my %matra_types =
220 "Right" => 0x01,
221 "Left" => 0x02,
222 "Visual_Order_Left" => 0x03,
223 "Left_And_Right" => 0x04,
224 "Top" => 0x05,
225 "Bottom" => 0x06,
226 "Top_And_Bottom" => 0x07,
227 "Top_And_Right" => 0x08,
228 "Top_And_Left" => 0x09,
229 "Top_And_Left_And_Right" => 0x0a,
230 "Bottom_And_Right" => 0x0b,
231 "Top_And_Bottom_And_Right" => 0x0c,
232 "Overstruck" => 0x0d,
233 "Invisible" => 0x0e,
234 "Bottom_And_Left" => 0x0f,
235 "Top_And_Bottom_And_Left" => 0x10,
238 my %break_types =
240 "BK" => 0x0001,
241 "CR" => 0x0002,
242 "LF" => 0x0003,
243 "CM" => 0x0004,
244 "SG" => 0x0005,
245 "GL" => 0x0006,
246 "CB" => 0x0007,
247 "SP" => 0x0008,
248 "ZW" => 0x0009,
249 "NL" => 0x000a,
250 "WJ" => 0x000b,
251 "JL" => 0x000c,
252 "JV" => 0x000d,
253 "JT" => 0x000e,
254 "H2" => 0x000f,
255 "H3" => 0x0010,
256 "XX" => 0x0011,
257 "OP" => 0x0012,
258 "CL" => 0x0013,
259 "CP" => 0x0014,
260 "QU" => 0x0015,
261 "NS" => 0x0016,
262 "EX" => 0x0017,
263 "SY" => 0x0018,
264 "IS" => 0x0019,
265 "PR" => 0x001a,
266 "PO" => 0x001b,
267 "NU" => 0x001c,
268 "AL" => 0x001d,
269 "ID" => 0x001e,
270 "IN" => 0x001f,
271 "HY" => 0x0020,
272 "BB" => 0x0021,
273 "BA" => 0x0022,
274 "SA" => 0x0023,
275 "AI" => 0x0024,
276 "B2" => 0x0025,
277 "HL" => 0x0026,
278 "CJ" => 0x0027,
279 "RI" => 0x0028,
280 "EB" => 0x0029,
281 "EM" => 0x002a,
282 "ZWJ" => 0x002b,
285 my %vertical_types =
287 "R" => 0x0000,
288 "U" => 0x0001,
289 "Tr" => 0x0002,
290 "Tu" => 0x0003,
293 my %categories =
295 "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
296 "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
297 "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"}, # Letter, Titlecase
298 "Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
299 "Mc" => $ctype{"defin"}, # Mark, Spacing Combining
300 "Me" => $ctype{"defin"}, # Mark, Enclosing
301 "Nd" => $ctype{"defin"}|$ctype{"digit"}, # Number, Decimal Digit
302 "Nl" => $ctype{"defin"}|$ctype{"alpha"}, # Number, Letter
303 "No" => $ctype{"defin"}, # Number, Other
304 "Zs" => $ctype{"defin"}|$ctype{"space"}, # Separator, Space
305 "Zl" => $ctype{"defin"}|$ctype{"space"}, # Separator, Line
306 "Zp" => $ctype{"defin"}|$ctype{"space"}, # Separator, Paragraph
307 "Cc" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Control
308 "Cf" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Format
309 "Cs" => $ctype{"defin"}, # Other, Surrogate
310 "Co" => $ctype{"defin"}, # Other, Private Use
311 "Cn" => $ctype{"defin"}, # Other, Not Assigned
312 "Lm" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Modifier
313 "Lo" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Other
314 "Pc" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Connector
315 "Pd" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Dash
316 "Ps" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Open
317 "Pe" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Close
318 "Pi" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Initial quote
319 "Pf" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Final quote
320 "Po" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Other
321 "Sm" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Math
322 "Sc" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Currency
323 "Sk" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Modifier
324 "So" => $ctype{"defin"}|$ctype{"symbol"} # Symbol, Other
327 # a few characters need additional categories that cannot be determined automatically
328 my %special_categories =
330 "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
331 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
332 "space" => [ 0x09..0x0d, 0x85 ],
333 "blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
334 "cntrl" => [ 0x070f, 0x200c, 0x200d,
335 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
336 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
337 0xfff9, 0xfffa, 0xfffb ],
338 "punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
339 0xd7, 0xf7 ],
340 "digit" => [ 0xb2, 0xb3, 0xb9 ],
341 "lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
342 "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
343 0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
344 "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
345 "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
346 0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
347 0x02b9..0x02ba, 0x02c6..0x02cf ],
348 "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
349 "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
350 0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
351 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
352 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
353 0x3131..0x3164 ],
354 "ideograph" => [ 0x3006..0x3007 ],
355 "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
356 0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
357 0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
358 0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
359 0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
360 0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
361 0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
362 0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
363 0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
364 0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
365 "kashida" => [ 0x0640 ],
368 my %directions =
370 "L" => 1, # Left-to-Right
371 "R" => 2, # Right-to-Left
372 "AL" => 12, # Right-to-Left Arabic
373 "EN" => 3, # European Number
374 "ES" => 4, # European Number Separator
375 "ET" => 5, # European Number Terminator
376 "AN" => 6, # Arabic Number
377 "CS" => 7, # Common Number Separator
378 "NSM" => 13, # Non-Spacing Mark
379 "BN" => 14, # Boundary Neutral
380 "B" => 8, # Paragraph Separator
381 "S" => 9, # Segment Separator
382 "WS" => 10, # Whitespace
383 "ON" => 11, # Other Neutrals
384 "LRE" => 15, # Left-to-Right Embedding
385 "LRO" => 15, # Left-to-Right Override
386 "RLE" => 15, # Right-to-Left Embedding
387 "RLO" => 15, # Right-to-Left Override
388 "PDF" => 15, # Pop Directional Format
389 "LRI" => 15, # Left-to-Right Isolate
390 "RLI" => 15, # Right-to-Left Isolate
391 "FSI" => 15, # First Strong Isolate
392 "PDI" => 15 # Pop Directional Isolate
395 my %c2_types =
397 "L" => 1, # C2_LEFTTORIGHT
398 "R" => 2, # C2_RIGHTTOLEFT
399 "AL" => 2, # C2_RIGHTTOLEFT
400 "EN" => 3, # C2_EUROPENUMBER
401 "ES" => 4, # C2_EUROPESEPARATOR
402 "ET" => 5, # C2_EUROPETERMINATOR
403 "AN" => 6, # C2_ARABICNUMBER
404 "CS" => 7, # C2_COMMONSEPARATOR
405 "NSM" => 11, # C2_OTHERNEUTRAL
406 "BN" => 0, # C2_NOTAPPLICABLE
407 "B" => 8, # C2_BLOCKSEPARATOR
408 "S" => 9, # C2_SEGMENTSEPARATOR
409 "WS" => 10, # C2_WHITESPACE
410 "ON" => 11, # C2_OTHERNEUTRAL
411 "LRE" => 11, # C2_OTHERNEUTRAL
412 "LRO" => 11, # C2_OTHERNEUTRAL
413 "RLE" => 11, # C2_OTHERNEUTRAL
414 "RLO" => 11, # C2_OTHERNEUTRAL
415 "PDF" => 11, # C2_OTHERNEUTRAL
416 "LRI" => 11, # C2_OTHERNEUTRAL
417 "RLI" => 11, # C2_OTHERNEUTRAL
418 "FSI" => 11, # C2_OTHERNEUTRAL
419 "PDI" => 11 # C2_OTHERNEUTRAL
422 my %bidi_types =
424 "ON" => 0, # Other Neutrals
425 "L" => 1, # Left-to-Right
426 "R" => 2, # Right-to-Left
427 "AN" => 3, # Arabic Number
428 "EN" => 4, # European Number
429 "AL" => 5, # Right-to-Left Arabic
430 "NSM" => 6, # Non-Spacing Mark
431 "CS" => 7, # Common Number Separator
432 "ES" => 8, # European Number Separator
433 "ET" => 9, # European Number Terminator
434 "BN" => 10, # Boundary Neutral
435 "S" => 11, # Segment Separator
436 "WS" => 12, # Whitespace
437 "B" => 13, # Paragraph Separator
438 "RLO" => 14, # Right-to-Left Override
439 "RLE" => 15, # Right-to-Left Embedding
440 "LRO" => 16, # Left-to-Right Override
441 "LRE" => 17, # Left-to-Right Embedding
442 "PDF" => 18, # Pop Directional Format
443 "LRI" => 19, # Left-to-Right Isolate
444 "RLI" => 20, # Right-to-Left Isolate
445 "FSI" => 21, # First Strong Isolate
446 "PDI" => 22 # Pop Directional Isolate
449 my %joining_types =
451 "U" => 0, # Non_Joining
452 "L" => 1, # Left_Joining
453 "R" => 2, # Right_Joining
454 "D" => 3, # Dual_Joining
455 "C" => 3, # Join_Causing
456 "ALAPH" => 4, # Syriac ALAPH
457 "DALATH RISH" => 5, # Syriac DALATH RISH group
458 "T" => 6, # Transparent
461 my @locales =
463 { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
464 { name => "aa", sopentypelang => "AFR" },
465 { name => "aa-DJ" },
466 { name => "aa-ER" },
467 { name => "aa-ET" },
468 { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
469 { name => "af-NA" },
470 { name => "af-ZA", lcid => 0x00000436 },
471 { name => "agq" },
472 { name => "agq-CM" },
473 { name => "ak", sopentypelang => "TWI" },
474 { name => "ak-GH" },
475 { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
476 { name => "am-ET", lcid => 0x0000045e },
477 { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
478 { name => "ar-001" },
479 { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
480 { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
481 { name => "ar-DJ" },
482 { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG", nativedigits => "0123456789" },
483 { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
484 { name => "ar-EH" },
485 { name => "ar-ER" },
486 { name => "ar-IL" },
487 { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
488 { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
489 { name => "ar-KM" },
490 { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
491 { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
492 { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL", nativedigits => "0123456789" },
493 { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM", nativedigits => "0123456789" },
494 { name => "ar-MR" },
495 { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
496 { name => "ar-PS" },
497 { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
498 { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
499 { name => "ar-SD" },
500 { name => "ar-SO" },
501 { name => "ar-SS" },
502 { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
503 { name => "ar-TD" },
504 { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART", nativedigits => "0123456789" },
505 { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
506 { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", sabbrevlangname => "MPD", sopentypelang => "MAP" },
507 { name => "arn-CL", lcid => 0x0000047a },
508 { name => "arn-Latn", alias => "arn" },
509 { name => "arn-Latn-CL", alias => "arn-CL" },
510 { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
511 { name => "as-IN", lcid => 0x0000044d },
512 { name => "asa" },
513 { name => "asa-TZ" },
514 { name => "ast" },
515 { name => "ast-ES" },
516 { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
517 { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
518 { name => "az-Cyrl-AZ", lcid => 0x0000082c },
519 { name => "az-Latn", lcid => 0x0000782c },
520 { name => "az-Latn-AZ", lcid => 0x0000042c },
521 { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, sabbrevlangname => "BAS", sopentypelang => "BSH" },
522 { name => "ba-Cyrl", alias => "ba" },
523 { name => "ba-Cyrl-RU", alias => "ba-RU" },
524 { name => "ba-RU", lcid => 0x0000046d },
525 { name => "bas" },
526 { name => "bas-CM" },
527 { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
528 { name => "be-BY", lcid => 0x00000423 },
529 { name => "bem" },
530 { name => "bem-ZM" },
531 { name => "bez" },
532 { name => "bez-TZ" },
533 { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
534 { name => "bg-BG", lcid => 0x00000402 },
535 { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
536 { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
537 { name => "bm", sopentypelang => "BMB" },
538 { name => "bm-Latn", file => "bm" },
539 { name => "bm-Latn-ML", file => "bm_ML" },
540 { name => "bm-ML", alias => "bm-Latn-ML" },
541 { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
542 { name => "bn-BD", lcid => 0x00000845 },
543 { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
544 { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
545 { name => "bo-CN", lcid => 0x00000451 },
546 { name => "bo-IN", slist => "," },
547 { name => "bo-Tibt", alias => "bo" },
548 { name => "bo-Tibt-CN", alias => "bo-CN" },
549 { name => "bo-Tibt-IN", alias => "bo-IN" },
550 { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
551 { name => "br-FR", lcid => 0x0000047e },
552 { name => "br-Latn", alias => "br" },
553 { name => "br-Latn-FR", alias => "br-FR" },
554 { name => "brx" },
555 { name => "brx-IN" },
556 { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
557 { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
558 { name => "bs-Cyrl-BA", lcid => 0x0000201a },
559 { name => "bs-Latn", lcid => 0x0000681a },
560 { name => "bs-Latn-BA", lcid => 0x0000141a },
561 { name => "byn", sopentypelang => "BIL" },
562 { name => "byn-ER" },
563 { name => "ca", lcid => 0x00000003, oemcp => 850 },
564 { name => "ca-AD", maccp => 65001 },
565 { name => "ca-ES", lcid => 0x00000403 },
566 { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
567 { name => "ca-FR", maccp => 65001 },
568 { name => "ca-IT", maccp => 65001 },
569 { name => "ccp" },
570 { name => "ccp-BD", alias => "ccp-Cakm-BD" },
571 { name => "ccp-Cakm", file => "ccp" },
572 { name => "ccp-Cakm-BD", file => "ccp_BD" },
573 { name => "ccp-Cakm-IN", file => "ccp_IN" },
574 { name => "ccp-IN", alias => "ccp-Cakm-IN" },
575 { name => "ce" },
576 { name => "ce-RU" },
577 { name => "ceb" },
578 { name => "ceb-Latn", file => "ceb" },
579 { name => "ceb-Latn-PH", file => "ceb_PH" },
580 { name => "ceb-PH", alias => "ceb-Latn-PH" },
581 { name => "cgg" },
582 { name => "cgg-UG" },
583 { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
584 { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
585 { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
586 { name => "chr-US", alias => "chr-Cher-US" },
587 { name => "ckb", alias => "ku" },
588 { name => "ckb-IQ", alias => "ku-Arab-IQ" },
589 { name => "ckb-IR", alias => "ku-Arab-IR" },
590 { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297 },
591 { name => "co-FR", lcid => 0x00000483 },
592 { name => "co-Latn", alias => "co" },
593 { name => "co-Latn-FR", alias => "co-FR" },
594 { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
595 { name => "cs-CZ", lcid => 0x00000405 },
596 { name => "cu", sopentypelang => "CSL" },
597 { name => "cu-RU" },
598 { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
599 { name => "cy-GB", lcid => 0x00000452 },
600 { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
601 { name => "da-DK", lcid => 0x00000406 },
602 { name => "da-GL", maccp => 65001 },
603 { name => "dav" },
604 { name => "dav-KE" },
605 { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
606 { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
607 { name => "de-BE" },
608 { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
609 { name => "de-DE", lcid => 0x00000407 },
610 { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
611 { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
612 { name => "de-IT", oemcp => 65001 },
613 { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
614 { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
615 { name => "dje", sopentypelang => "DJR" },
616 { name => "dje-NE" },
617 { name => "doi", sopentypelang => "DGR" },
618 { name => "doi-IN", alias => "doi-Deva-IN" },
619 { name => "doi-Deva", file => "doi" },
620 { name => "doi-Deva-IN", file => "doi_IN" },
621 { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
622 { name => "dsb-DE", lcid => 0x0000082e },
623 { name => "dua" },
624 { name => "dua-CM" },
625 { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, nativedigits => "0123456789" },
626 { name => "dv-MV", lcid => 0x00000465 },
627 { name => "dyo" },
628 { name => "dyo-SN" },
629 { name => "dz", sopentypelang => "DZN" },
630 { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
631 { name => "ebu" },
632 { name => "ebu-KE" },
633 { name => "ee" },
634 { name => "ee-GH" },
635 { name => "ee-TG" },
636 { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
637 { name => "el-CY" },
638 { name => "el-GR", lcid => 0x00000408 },
639 { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
640 { name => "en-001", oemcp => 850 },
641 { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
642 { name => "en-150", oemcp => 65001 },
643 { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
644 { name => "en-AG", oemcp => 850 },
645 { name => "en-AI", oemcp => 850 },
646 { name => "en-AS", oemcp => 850 },
647 { name => "en-AT", oemcp => 65001 },
648 { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
649 { name => "en-BB", oemcp => 850 },
650 { name => "en-BE", oemcp => 850 },
651 { name => "en-BI", oemcp => 65001 },
652 { name => "en-BM", oemcp => 850 },
653 { name => "en-BS", oemcp => 850 },
654 { name => "en-BW", oemcp => 850 },
655 { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
656 { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
657 { name => "en-CC", oemcp => 850 },
658 { name => "en-CH", oemcp => 65001 },
659 { name => "en-CK", oemcp => 850 },
660 { name => "en-CM", oemcp => 850 },
661 { name => "en-CX", oemcp => 850 },
662 { name => "en-CY", oemcp => 65001 },
663 { name => "en-DE", oemcp => 65001 },
664 { name => "en-DG", oemcp => 850 },
665 { name => "en-DK", oemcp => 65001 },
666 { name => "en-DM", oemcp => 850 },
667 { name => "en-ER", oemcp => 850 },
668 { name => "en-FI", oemcp => 65001 },
669 { name => "en-FJ", oemcp => 850 },
670 { name => "en-FK", oemcp => 850 },
671 { name => "en-FM", oemcp => 850 },
672 { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
673 { name => "en-GD", oemcp => 850 },
674 { name => "en-GG", oemcp => 850 },
675 { name => "en-GH", oemcp => 850 },
676 { name => "en-GI", oemcp => 850 },
677 { name => "en-GM", oemcp => 850 },
678 { name => "en-GU", oemcp => 850 },
679 { name => "en-GY", oemcp => 850 },
680 { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
681 { name => "en-ID", lcid => 0x00003809, file => "en", oemcp => 850, sabbrevlangname => "ZZZ" },
682 { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
683 { name => "en-IL", oemcp => 65001 },
684 { name => "en-IM", oemcp => 850 },
685 { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
686 { name => "en-IO", oemcp => 850 },
687 { name => "en-JE", oemcp => 850 },
688 { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
689 { name => "en-KE", oemcp => 850 },
690 { name => "en-KI", oemcp => 850 },
691 { name => "en-KN", oemcp => 850 },
692 { name => "en-KY", oemcp => 850 },
693 { name => "en-LC", oemcp => 850 },
694 { name => "en-LR", oemcp => 850 },
695 { name => "en-LS", oemcp => 850 },
696 { name => "en-MG", oemcp => 850 },
697 { name => "en-MH", oemcp => 850 },
698 { name => "en-MO", oemcp => 850 },
699 { name => "en-MP", oemcp => 850 },
700 { name => "en-MS", oemcp => 850 },
701 { name => "en-MT", oemcp => 850 },
702 { name => "en-MU", oemcp => 850 },
703 { name => "en-MW", oemcp => 850 },
704 { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
705 { name => "en-NA", oemcp => 850 },
706 { name => "en-NF", oemcp => 850 },
707 { name => "en-NG", oemcp => 850 },
708 { name => "en-NL", oemcp => 65001 },
709 { name => "en-NR", oemcp => 850 },
710 { name => "en-NU", oemcp => 850 },
711 { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
712 { name => "en-PG", oemcp => 850 },
713 { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
714 { name => "en-PK", oemcp => 850 },
715 { name => "en-PN", oemcp => 850 },
716 { name => "en-PR", oemcp => 850 },
717 { name => "en-PW", oemcp => 850 },
718 { name => "en-RW", oemcp => 850 },
719 { name => "en-SB", oemcp => 850 },
720 { name => "en-SC", oemcp => 850 },
721 { name => "en-SD", oemcp => 850 },
722 { name => "en-SE", oemcp => 65001 },
723 { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
724 { name => "en-SH", oemcp => 850 },
725 { name => "en-SI", oemcp => 65001 },
726 { name => "en-SL", oemcp => 850 },
727 { name => "en-SS", oemcp => 850 },
728 { name => "en-SX", oemcp => 850 },
729 { name => "en-SZ", oemcp => 850 },
730 { name => "en-TC", oemcp => 850 },
731 { name => "en-TK", oemcp => 850 },
732 { name => "en-TO", oemcp => 850 },
733 { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
734 { name => "en-TV", oemcp => 850 },
735 { name => "en-TZ", oemcp => 850 },
736 { name => "en-UG", oemcp => 850 },
737 { name => "en-UM", oemcp => 850 },
738 { name => "en-US", lcid => 0x00000409 },
739 { name => "en-VC", oemcp => 850 },
740 { name => "en-VG", oemcp => 850 },
741 { name => "en-VI", oemcp => 850 },
742 { name => "en-VU", oemcp => 850 },
743 { name => "en-WS", oemcp => 850 },
744 { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
745 { name => "en-ZM", oemcp => 850 },
746 { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
747 { name => "eo", sopentypelang => "NTO" },
748 { name => "eo-001" },
749 { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
750 { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
751 { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
752 { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
753 { name => "es-BR", oemcp => 65001 },
754 { name => "es-BZ", oemcp => 65001 },
755 { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
756 { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
757 { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
758 { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
759 { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
760 { name => "es-EA" },
761 { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
762 { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
763 { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
764 { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
765 { name => "es-GQ" },
766 { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
767 { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
768 { name => "es-IC" },
769 { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
770 { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
771 { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
772 { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
773 { name => "es-PH" },
774 { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
775 { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
776 { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
777 { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
778 { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
779 { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
780 { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
781 { name => "et-EE", lcid => 0x00000425 },
782 { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
783 { name => "eu-ES", lcid => 0x0000042d },
784 { name => "ewo" },
785 { name => "ewo-CM" },
786 { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
787 { name => "fa-AF", alias => "prs-AF" },
788 { name => "fa-IR", lcid => 0x00000429 },
789 { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
790 { name => "ff-CM", alias => "ff-Latn-CM" },
791 { name => "ff-GN", alias => "ff-Latn-GN" },
792 { name => "ff-MR", alias => "ff-Latn-MR" },
793 { name => "ff-NG", alias => "ff-Latn-NG" },
794 { name => "ff-SN", alias => "ff-Latn-SN" },
795 { name => "ff-Adlm", oemcp => 65001 },
796 { name => "ff-Adlm-BF" },
797 { name => "ff-Adlm-CM" },
798 { name => "ff-Adlm-GH" },
799 { name => "ff-Adlm-GM" },
800 { name => "ff-Adlm-GN" },
801 { name => "ff-Adlm-GW" },
802 { name => "ff-Adlm-LR" },
803 { name => "ff-Adlm-MR" },
804 { name => "ff-Adlm-NE" },
805 { name => "ff-Adlm-NG" },
806 { name => "ff-Adlm-SL" },
807 { name => "ff-Adlm-SN" },
808 { name => "ff-Latn", lcid => 0x00007c67 },
809 { name => "ff-Latn-BF", oemcp => 65001 },
810 { name => "ff-Latn-CM" },
811 { name => "ff-Latn-GH", oemcp => 65001 },
812 { name => "ff-Latn-GM", oemcp => 65001 },
813 { name => "ff-Latn-GN" },
814 { name => "ff-Latn-GW", oemcp => 65001 },
815 { name => "ff-Latn-LR", oemcp => 65001 },
816 { name => "ff-Latn-MR" },
817 { name => "ff-Latn-NE", oemcp => 65001 },
818 { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
819 { name => "ff-Latn-SL", oemcp => 65001 },
820 { name => "ff-Latn-SN", lcid => 0x00000867 },
821 { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
822 { name => "fi-FI", lcid => 0x0000040b },
823 { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
824 { name => "fil-PH", lcid => 0x00000464 },
825 { name => "fil-Latn", alias => "fil" },
826 { name => "fil-Latn-PH", alias => "fil-PH" },
827 { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
828 { name => "fo-DK", oemcp => 65001, maccp => 65001 },
829 { name => "fo-FO", lcid => 0x00000438 },
830 { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
831 { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
832 { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
833 { name => "fr-BF" },
834 { name => "fr-BI" },
835 { name => "fr-BJ" },
836 { name => "fr-BL" },
837 { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
838 { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
839 { name => "fr-CF" },
840 { name => "fr-CG" },
841 { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
842 { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
843 { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
844 { name => "fr-DJ" },
845 { name => "fr-DZ" },
846 { name => "fr-FR", lcid => 0x0000040c },
847 { name => "fr-GA" },
848 { name => "fr-GF" },
849 { name => "fr-GN" },
850 { name => "fr-GP" },
851 { name => "fr-GQ" },
852 { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
853 { name => "fr-KM" },
854 { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
855 { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
856 { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
857 { name => "fr-MF" },
858 { name => "fr-MG" },
859 { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
860 { name => "fr-MQ" },
861 { name => "fr-MR" },
862 { name => "fr-MU" },
863 { name => "fr-NC" },
864 { name => "fr-NE" },
865 { name => "fr-PF" },
866 { name => "fr-PM" },
867 { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
868 { name => "fr-RW" },
869 { name => "fr-SC" },
870 { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
871 { name => "fr-SY" },
872 { name => "fr-TD" },
873 { name => "fr-TG" },
874 { name => "fr-TN" },
875 { name => "fr-VU" },
876 { name => "fr-WF" },
877 { name => "fr-YT" },
878 { name => "fur", sopentypelang => "FRL" },
879 { name => "fur-IT" },
880 { name => "fuv-NG", alias => "ff-Latn-NG" },
881 { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
882 { name => "fy-NL", lcid => 0x00000462 },
883 { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
884 { name => "ga-GB", oemcp => 65001 },
885 { name => "ga-IE", lcid => 0x0000083c },
886 { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
887 { name => "gd-GB", lcid => 0x00000491 },
888 { name => "gd-Latn", alias => "gd" },
889 { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
890 { name => "gl-ES", lcid => 0x00000456 },
891 { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", sopentypelang => "GUA" },
892 { name => "gn-PY", lcid => 0x00000474 },
893 { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
894 { name => "gsw-CH" },
895 { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
896 { name => "gsw-LI" },
897 { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
898 { name => "gu-IN", lcid => 0x00000447 },
899 { name => "guz" },
900 { name => "guz-KE" },
901 { name => "gv", sopentypelang => "MNX" },
902 { name => "gv-GB", file => "gv" },
903 { name => "gv-IM" },
904 { name => "ha", lcid => 0x00000068, oemcp => 437 },
905 { name => "ha-GH", alias => "ha-Latn-GH" },
906 { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
907 { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
908 { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
909 { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
910 { name => "ha-NE", alias => "ha-Latn-NE" },
911 { name => "ha-NG", alias => "ha-Latn-NG" },
912 { name => "haw", lcid => 0x00000075, oemcp => 437 },
913 { name => "haw-Latn", alias => "haw" },
914 { name => "haw-Latn-US", alias => "haw-US" },
915 { name => "haw-US", lcid => 0x00000475 },
916 { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
917 { name => "he-IL", lcid => 0x0000040d },
918 { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
919 { name => "hi-IN", lcid => 0x00000439 },
920 { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
921 { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
922 { name => "hr-HR", lcid => 0x0000041a },
923 { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
924 { name => "hsb-DE", lcid => 0x0000042e },
925 { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
926 { name => "hu-HU", lcid => 0x0000040e },
927 { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
928 { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
929 { name => "hy-AM", lcid => 0x0000042b },
930 { name => "ia" },
931 { name => "ia-001" },
932 ## name => "ibb", lcid => 0x00000069 },
933 ## name => "ibb-NG", lcid => 0x00000469 },
934 { name => "id", lcid => 0x00000021, oemcp => 850 },
935 { name => "id-ID", lcid => 0x00000421 },
936 { name => "ig", lcid => 0x00000070, oemcp => 437 },
937 { name => "ig-Latn", alias => "ig" },
938 { name => "ig-Latn-NG", alias => "ig-NG" },
939 { name => "ig-NG", lcid => 0x00000470 },
940 { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
941 { name => "ii-CN", lcid => 0x00000478 },
942 { name => "ii-Yiii", alias => "ii" },
943 { name => "ii-Yiii-CN", alias => "ii-CN" },
944 { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
945 { name => "is-IS", lcid => 0x0000040f },
946 { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
947 { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
948 { name => "it-IT", lcid => 0x00000410 },
949 { name => "it-SM" },
950 { name => "it-VA", oemcp => 65001 },
951 { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", sabbrevlangname => "IUK", sopentypelang => "INU" },
952 { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, sabbrevlangname => "IUS" },
953 { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA" },
954 { name => "iu-Latn", lcid => 0x00007c5d },
955 { name => "iu-Latn-CA", lcid => 0x0000085d },
956 { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
957 { name => "ja-JP", lcid => 0x00000411 },
958 { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
959 { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
960 { name => "jgo" },
961 { name => "jgo-CM" },
962 { name => "jmc" },
963 { name => "jmc-TZ" },
964 { name => "jv", oemcp => 850, nativedigits => "0123456789" },
965 { name => "jv-ID", alias => "jv-Latn-ID" },
966 ## name => "jv-Java" },
967 ## name => "jv-Java-ID" },
968 { name => "jv-Latn", file => "jv" },
969 { name => "jv-Latn-ID", file => "jv_ID" },
970 { name => "ka", lcid => 0x00000037, group => 16 },
971 { name => "ka-GE", lcid => 0x00000437 },
972 { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
973 { name => "kab", sopentypelang => "KAB0" },
974 { name => "kab-DZ" },
975 { name => "kam", sopentypelang => "KMB" },
976 { name => "kam-KE" },
977 { name => "kde" },
978 { name => "kde-TZ" },
979 { name => "kea" },
980 { name => "kea-CV" },
981 { name => "kgp" },
982 { name => "kgp-BR" },
983 { name => "khq" },
984 { name => "khq-ML" },
985 { name => "ki" },
986 { name => "ki-KE" },
987 { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
988 { name => "kk-Cyrl", alias => "kk" },
989 { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
990 { name => "kk-KZ", lcid => 0x0000043f },
991 { name => "kkj" },
992 { name => "kkj-CM" },
993 { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
994 { name => "kl-GL", lcid => 0x0000046f },
995 { name => "kln", sopentypelang => "KAL" },
996 { name => "kln-KE" },
997 { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
998 { name => "km-KH", lcid => 0x00000453 },
999 { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
1000 { name => "kn-IN", lcid => 0x0000044b },
1001 { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
1002 { name => "ko-KP", oemcp => 65001 },
1003 { name => "ko-KR", lcid => 0x00000412 },
1004 { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
1005 { name => "kok-IN", lcid => 0x00000457 },
1006 { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
1007 { name => "kr-Latn", file => "kr", dir => "exemplars" },
1008 { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
1009 { name => "kr-NG", alias => "kr-Latn-NG" },
1010 { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
1011 { name => "ks-Arab", lcid => 0x00000460 },
1012 { name => "ks-Arab-IN" },
1013 { name => "ks-Deva", slist => "," },
1014 { name => "ks-Deva-IN", lcid => 0x00000860 },
1015 { name => "ks-IN", alias => "ks-Arab-IN" },
1016 { name => "ksb" },
1017 { name => "ksb-TZ" },
1018 { name => "ksf" },
1019 { name => "ksf-CM" },
1020 { name => "ksh", sopentypelang => "KSH0" },
1021 { name => "ksh-DE" },
1022 { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
1023 { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
1024 { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
1025 { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
1026 { name => "kw" },
1027 { name => "kw-GB" },
1028 { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
1029 { name => "ky-Cyrl", alias => "ky" },
1030 { name => "ky-Cyrl-KG", alias => "ky-KG" },
1031 { name => "ky-KG", lcid => 0x00000440 },
1032 { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", sabbrevlangname => "ZZZ" },
1033 { name => "la-VA", lcid => 0x00000476 },
1034 { name => "la-001", alias => "la-VA" },
1035 { name => "lag" },
1036 { name => "lag-TZ" },
1037 { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
1038 { name => "lb-LU", lcid => 0x0000046e },
1039 { name => "lg" },
1040 { name => "lg-UG" },
1041 { name => "lkt" },
1042 { name => "lkt-US" },
1043 { name => "ln" },
1044 { name => "ln-AO" },
1045 { name => "ln-CD" },
1046 { name => "ln-CF" },
1047 { name => "ln-CG" },
1048 { name => "lo", lcid => 0x00000054, group => 15 },
1049 { name => "lo-LA", lcid => 0x00000454 },
1050 { name => "lrc" },
1051 { name => "lrc-IQ" },
1052 { name => "lrc-IR" },
1053 { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
1054 { name => "lt-LT", lcid => 0x00000427 },
1055 { name => "lu" },
1056 { name => "lu-CD" },
1057 { name => "luo" },
1058 { name => "luo-KE" },
1059 { name => "luy", sopentypelang => "LUH" },
1060 { name => "luy-KE" },
1061 { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
1062 { name => "lv-LV", lcid => 0x00000426 },
1063 { name => "mai" },
1064 { name => "mai-IN" },
1065 { name => "mas" },
1066 { name => "mas-KE" },
1067 { name => "mas-TZ" },
1068 { name => "mer" },
1069 { name => "mer-KE" },
1070 { name => "mfe" },
1071 { name => "mfe-MU" },
1072 { name => "mg" },
1073 { name => "mg-MG" },
1074 { name => "mgh" },
1075 { name => "mgh-MZ" },
1076 { name => "mgo" },
1077 { name => "mgo-CM" },
1078 { name => "mi", lcid => 0x00000081, slist => "," },
1079 { name => "mi-Latn", alias => "mi" },
1080 { name => "mi-Latn-NZ", alias => "mi-NZ" },
1081 { name => "mi-NZ", lcid => 0x00000481 },
1082 { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
1083 { name => "mk-MK", lcid => 0x0000042f },
1084 { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
1085 { name => "ml-IN", lcid => 0x0000044c },
1086 { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
1087 { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
1088 { name => "mn-Cyrl-MN", alias => "mn-MN" },
1089 { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
1090 { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, sabbrevlangname => "MNG", nativedigits => "0123456789" },
1091 { name => "mn-Mong-CN", lcid => 0x00000850 },
1092 { name => "mn-Mong-MN", lcid => 0x00000c50, sabbrevlangname => "MNM" },
1093 { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
1094 { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
1095 { name => "mni-Beng" },
1096 { name => "mni-Beng-IN", alias => "mni-IN" },
1097 { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", sabbrevlangname => "MWK" },
1098 { name => "moh-CA", lcid => 0x0000047c },
1099 { name => "moh-Latn", alias => "moh" },
1100 { name => "moh-Latn-CA", alias => "moh-CA" },
1101 { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
1102 { name => "mr-IN", lcid => 0x0000044e },
1103 { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
1104 { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
1105 { name => "ms-ID" },
1106 { name => "ms-Latn", alias => "ms" },
1107 { name => "ms-Latn-BN", alias => "ms-BN" },
1108 { name => "ms-Latn-MY", alias => "ms-MY" },
1109 { name => "ms-Latn-SG", alias => "ms-SG" },
1110 { name => "ms-MY", lcid => 0x0000043e },
1111 { name => "ms-SG" },
1112 { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
1113 { name => "mt-MT", lcid => 0x0000043a },
1114 { name => "mua" },
1115 { name => "mua-CM" },
1116 { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
1117 { name => "my-MM", lcid => 0x00000455 },
1118 { name => "mzn" },
1119 { name => "mzn-IR" },
1120 { name => "naq" },
1121 { name => "naq-NA" },
1122 { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
1123 { name => "nb-NO", lcid => 0x00000414 },
1124 { name => "nb-SJ" },
1125 { name => "nd", sopentypelang => "NDB" },
1126 { name => "nd-ZW" },
1127 { name => "nds" },
1128 { name => "nds-DE" },
1129 { name => "nds-NL" },
1130 { name => "ne", lcid => 0x00000061, slist => "," },
1131 { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
1132 { name => "ne-NP", lcid => 0x00000461, group => 15 },
1133 { name => "nl", lcid => 0x00000013, oemcp => 850 },
1134 { name => "nl-AW" },
1135 { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
1136 { name => "nl-BQ" },
1137 { name => "nl-CW" },
1138 { name => "nl-NL", lcid => 0x00000413 },
1139 { name => "nl-SR" },
1140 { name => "nl-SX" },
1141 { name => "nmg" },
1142 { name => "nmg-CM" },
1143 { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
1144 { name => "nn-NO", lcid => 0x00000814 },
1145 { name => "nnh" },
1146 { name => "nnh-CM" },
1147 { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
1148 { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", sopentypelang => "NKO" },
1149 { name => "nqo-GN" },
1150 { name => "nr", sopentypelang => "NDB" },
1151 { name => "nr-ZA" },
1152 { name => "nso", lcid => 0x0000006c, oemcp => 850, sopentypelang => "SOT" },
1153 { name => "nso-ZA", lcid => 0x0000046c },
1154 { name => "nus" },
1155 { name => "nus-SD", alias => "nus-SS" },
1156 { name => "nus-SS" },
1157 { name => "nyn", sopentypelang => "NKL" },
1158 { name => "nyn-UG" },
1159 { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297 },
1160 { name => "oc-FR", lcid => 0x00000482 },
1161 { name => "oc-Latn", alias => "oc" },
1162 { name => "oc-Latn-FR", alias => "oc-FR" },
1163 { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
1164 { name => "om-ET", lcid => 0x00000472 },
1165 { name => "om-KE" },
1166 { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
1167 { name => "or-IN", lcid => 0x00000448 },
1168 { name => "os" },
1169 { name => "os-GE" },
1170 { name => "os-RU" },
1171 { name => "pa", lcid => 0x00000046, slist => "," },
1172 { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
1173 { name => "pa-Arab-PK", lcid => 0x00000846 },
1174 { name => "pa-Guru" },
1175 { name => "pa-Guru-IN", alias => "pa-IN" },
1176 { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
1177 { name => "pap", lcid => 0x00000079, oemcp => 850, sopentypelang => "PAP0" },
1178 ## name => "pap-029", lcid => 0x00000479 },
1179 { name => "pcm" },
1180 { name => "pcm-NG", alias => "pcm-Latn-NG" },
1181 { name => "pcm-Latn", file => "pcm" },
1182 { name => "pcm-Latn-NG", file => "pcm_NG" },
1183 { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
1184 { name => "pl-PL", lcid => 0x00000415 },
1185 { name => "prg" },
1186 { name => "prg-001" },
1187 { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
1188 { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
1189 { name => "prs-Arab", alias => "prs" },
1190 { name => "prs-Arab-AF", alias => "prs-AF" },
1191 { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
1192 { name => "ps-AF", lcid => 0x00000463 },
1193 { name => "ps-PK" },
1194 { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
1195 { name => "pt-AO" },
1196 { name => "pt-BR", lcid => 0x00000416 },
1197 { name => "pt-CH", oemcp => 65001 },
1198 { name => "pt-CV" },
1199 { name => "pt-GQ", oemcp => 65001 },
1200 { name => "pt-GW" },
1201 { name => "pt-LU", oemcp => 65001 },
1202 { name => "pt-MO" },
1203 { name => "pt-MZ" },
1204 { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
1205 { name => "pt-ST" },
1206 { name => "pt-TL" },
1207 ## name => qps-Latn-x-sh", lcid => 0x80000901 },
1208 ## name => qps-ploc", lcid => 0x80000501 },
1209 ## name => qps-ploca", lcid => 0x800005fe },
1210 ## name => qps-plocm", lcid => 0x800009ff },
1211 { name => "qu", alias => "quz" },
1212 { name => "qu-BO", alias => "quz-BO" },
1213 { name => "qu-EC", alias => "quz-EC" },
1214 { name => "qu-PE", alias => "quz-PE" },
1215 { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => "," },
1216 { name => "quc-Latn", lcid => 0x00007c86, file => "quc" },
1217 { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT" },
1218 { name => "qut", alias => "quc" },
1219 { name => "qut-GT", alias => "quc-Latn-GT" },
1220 { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
1221 { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
1222 { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
1223 { name => "quz-Latn", alias => "quz" },
1224 { name => "quz-Latn-BO", alias => "quz-BO" },
1225 { name => "quz-Latn-EC", alias => "quz-EC" },
1226 { name => "quz-Latn-PE", alias => "quz-PE" },
1227 { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
1228 { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
1229 { name => "rm-CH", lcid => 0x00000417 },
1230 { name => "rn" },
1231 { name => "rn-BI" },
1232 { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
1233 { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
1234 { name => "ro-RO", lcid => 0x00000418, group => 2 },
1235 { name => "rof" },
1236 { name => "rof-TZ" },
1237 { name => "ru", lcid => 0x00000019, oemcp => 866 },
1238 { name => "ru-BY", maccp => 65001 },
1239 { name => "ru-KG", maccp => 65001 },
1240 { name => "ru-KZ", maccp => 65001 },
1241 { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
1242 { name => "ru-RU", lcid => 0x00000419, group => 5 },
1243 { name => "ru-UA", maccp => 65001 },
1244 { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
1245 { name => "rw-RW", lcid => 0x00000487 },
1246 { name => "rwk" },
1247 { name => "rwk-TZ" },
1248 { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
1249 { name => "sa-Deva", alias => "sa" },
1250 { name => "sa-Deva-IN", alias => "sa-IN" },
1251 { name => "sa-IN", lcid => 0x0000044f },
1252 { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
1253 { name => "sah-Cyrl", alias => "sah" },
1254 { name => "sah-Cyrl-RU", alias => "sah-RU" },
1255 { name => "sah-RU", lcid => 0x00000485 },
1256 { name => "saq" },
1257 { name => "saq-KE" },
1258 { name => "sat" },
1259 { name => "sat-Olck" },
1260 { name => "sat-Olck-IN" },
1261 { name => "sbp" },
1262 { name => "sbp-TZ" },
1263 { name => "sc" },
1264 { name => "sc-IT" },
1265 { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
1266 { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
1267 { name => "sd-Arab-PK", lcid => 0x00000859 },
1268 { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
1269 { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
1270 { name => "sd-PK", alias => "sd-Arab-PK" },
1271 { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
1272 { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
1273 { name => "se-NO", lcid => 0x0000043b },
1274 { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
1275 { name => "se-Latn", alias => "se" },
1276 { name => "se-Latn-FI", alias => "se-FI" },
1277 { name => "se-Latn-NO", alias => "se-NO" },
1278 { name => "se-Latn-SE", alias => "se-SE" },
1279 { name => "seh" },
1280 { name => "seh-MZ" },
1281 { name => "ses" },
1282 { name => "ses-ML" },
1283 { name => "sg", sopentypelang => "SGO" },
1284 { name => "sg-CF" },
1285 { name => "shi" },
1286 { name => "shi-Latn" },
1287 { name => "shi-Latn-MA" },
1288 { name => "shi-Tfng" },
1289 { name => "shi-Tfng-MA" },
1290 { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
1291 { name => "si-LK", lcid => 0x0000045b },
1292 { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
1293 { name => "sk-SK", lcid => 0x0000041b },
1294 { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
1295 { name => "sl-SI", lcid => 0x00000424 },
1296 { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMB", sopentypelang => "SSM" },
1297 { name => "sma-Latn", alias => "sma" },
1298 { name => "sma-Latn-NO", alias => "sma-NO" },
1299 { name => "sma-Latn-SE", alias => "sma-SE" },
1300 { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, sabbrevlangname => "SMA" },
1301 { name => "sma-SE", lcid => 0x00001c3b },
1302 { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, sabbrevlangname => "SMK", sopentypelang => "LSM" },
1303 { name => "smj-Latn", alias => "smj" },
1304 { name => "smj-Latn-NO", alias => "smj-NO" },
1305 { name => "smj-Latn-SE", alias => "smj-SE" },
1306 { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, sabbrevlangname => "SMJ" },
1307 { name => "smj-SE", lcid => 0x0000143b },
1308 { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
1309 { name => "smn-FI", lcid => 0x0000243b },
1310 { name => "smn-Latn", alias => "smn" },
1311 { name => "smn-Latn-FI", alias => "smn-FI" },
1312 { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, sopentypelang => "SKS" },
1313 { name => "sms-FI", lcid => 0x0000203b },
1314 { name => "sms-Latn", alias => "sms" },
1315 { name => "sms-Latn-FI", alias => "sms-FI" },
1316 { name => "sn", sopentypelang => "SNA0" },
1317 { name => "sn-Latn", file => "sn" },
1318 { name => "sn-Latn-ZW", file => "sn_ZW" },
1319 { name => "sn-ZW", alias => "sn-Latn-ZW" },
1320 { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
1321 { name => "so-DJ" },
1322 { name => "so-ET" },
1323 { name => "so-KE" },
1324 { name => "so-SO", lcid => 0x00000477 },
1325 { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
1326 { name => "sq-AL", lcid => 0x0000041c },
1327 { name => "sq-MK" },
1328 { name => "sq-XK" },
1329 { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
1330 { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
1331 { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
1332 { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
1333 { name => "sr-Cyrl-RS", lcid => 0x0000281a },
1334 { name => "sr-Cyrl-XK" },
1335 { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
1336 { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
1337 { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
1338 { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
1339 { name => "sr-Latn-XK" },
1340 ## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
1341 ## name => "sr-Latn-CS", lcid => 0x0000081a },
1342 { name => "ss", sopentypelang => "SWZ" },
1343 { name => "ss-SZ" },
1344 { name => "ss-ZA" },
1345 { name => "ssy" },
1346 { name => "ssy-ER" },
1347 { name => "st", lcid => 0x00000030 },
1348 { name => "st-LS" },
1349 { name => "st-ZA", lcid => 0x00000430 },
1350 { name => "su" },
1351 { name => "su-Latn" },
1352 { name => "su-Latn-ID" },
1353 { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
1354 { name => "sv-AX" },
1355 { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
1356 { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
1357 { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
1358 { name => "sw-CD" },
1359 { name => "sw-KE", lcid => 0x00000441 },
1360 { name => "sw-TZ" },
1361 { name => "sw-UG" },
1362 { name => "swc-CD", alias => "sw-CD" },
1363 { name => "syr", lcid => 0x0000005a, slist => ",", group => 13 },
1364 { name => "syr-SY", lcid => 0x0000045a },
1365 { name => "syr-Syrc", alias => "syr" },
1366 { name => "syr-Syrc-SY", alias => "syr-SY" },
1367 { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
1368 { name => "ta-IN", lcid => 0x00000449 },
1369 { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
1370 { name => "ta-MY" },
1371 { name => "ta-SG" },
1372 { name => "te", lcid => 0x0000004a, group => 15 },
1373 { name => "te-IN", lcid => 0x0000044a },
1374 { name => "teo" },
1375 { name => "teo-KE" },
1376 { name => "teo-UG" },
1377 { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
1378 { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
1379 { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
1380 { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
1381 { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
1382 { name => "th-TH", lcid => 0x0000041e },
1383 { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
1384 { name => "ti-ER", lcid => 0x00000873 },
1385 { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
1386 { name => "tig", sopentypelang => "TGR" },
1387 { name => "tig-ER" },
1388 { name => "tig-Ethi-ER", alias => "tig-ER" },
1389 { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
1390 { name => "tk-Latn", alias => "tk" },
1391 { name => "tk-Latn-TM", alias => "tk-TM" },
1392 { name => "tk-TM", lcid => 0x00000442 },
1393 { name => "tn", lcid => 0x00000032, oemcp => 850, sopentypelang => "TNA" },
1394 { name => "tn-BW", lcid => 0x00000832, sabbrevlangname => "TSB" },
1395 { name => "tn-ZA", lcid => 0x00000432 },
1396 { name => "to", sopentypelang => "TGN" },
1397 { name => "to-TO" },
1398 { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
1399 { name => "tr-CY" },
1400 { name => "tr-TR", lcid => 0x0000041f },
1401 { name => "ts", lcid => 0x00000031, sopentypelang => "TSG" },
1402 { name => "ts-ZA", lcid => 0x00000431 },
1403 { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
1404 { name => "tt-Cyrl", alias => "tt" },
1405 { name => "tt-Cyrl-RU", alias => "tt-RU" },
1406 { name => "tt-RU", lcid => 0x00000444 },
1407 { name => "twq" },
1408 { name => "twq-NE" },
1409 { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
1410 { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
1411 { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
1412 { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
1413 { name => "tzm-MA", alias => "tzm-Latn-MA" },
1414 { name => "tzm-DZ", alias => "tzm-Latn-DZ" },
1415 ## name => "tzm-Arab", group => 13 },
1416 ## name => "tzm-Arab-MA", lcid => 0x0000045f },
1417 ## name => "tzm-Tfng", lcid => 0x0000785f },
1418 ## name => "tzm-Tfng-MA", lcid => 0x0000105f },
1419 { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG", nativedigits => "0123456789" },
1420 { name => "ug-Arab", alias => "ug" },
1421 { name => "ug-Arab-CN", alias => "ug-CN" },
1422 { name => "ug-CN", lcid => 0x00000480 },
1423 { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
1424 { name => "uk-UA", lcid => 0x00000422 },
1425 { name => "ur", lcid => 0x00000020, oemcp => 720 },
1426 { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
1427 { name => "ur-PK", lcid => 0x00000420, group => 13 },
1428 { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
1429 { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
1430 { name => "uz-Arab-AF" },
1431 { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
1432 { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
1433 { name => "uz-Latn", lcid => 0x00007c43 },
1434 { name => "uz-Latn-UZ", lcid => 0x00000443 },
1435 { name => "vai" },
1436 { name => "vai-Latn" },
1437 { name => "vai-Latn-LR" },
1438 { name => "vai-Vaii" },
1439 { name => "vai-Vaii-LR" },
1440 { name => "ve", lcid => 0x00000033, sabbrevlangname => "ZZZ" },
1441 { name => "ve-ZA", lcid => 0x00000433 },
1442 { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
1443 { name => "vi-VN", lcid => 0x0000042a },
1444 { name => "vo" },
1445 { name => "vo-001" },
1446 { name => "vun" },
1447 { name => "vun-TZ" },
1448 { name => "wa", oemcp => 850 },
1449 { name => "wa-BE" },
1450 { name => "wae" },
1451 { name => "wae-CH" },
1452 { name => "wal" },
1453 { name => "wal-ET" },
1454 { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
1455 { name => "wo-Latn", alias => "wo" },
1456 { name => "wo-Latn-SN", alias => "wo-SN" },
1457 { name => "wo-SN", lcid => 0x00000488 },
1458 { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
1459 { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
1460 { name => "xh-ZA", lcid => 0x00000434 },
1461 { name => "xog" },
1462 { name => "xog-UG" },
1463 { name => "yav" },
1464 { name => "yav-CM" },
1465 { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
1466 { name => "yi-001", lcid => 0x0000043d },
1467 { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
1468 { name => "yo-BJ", ebcdiccp => 500 },
1469 { name => "yo-Latn", alias => "yo" },
1470 { name => "yo-Latn-NG", alias => "yo-NG" },
1471 { name => "yo-NG", lcid => 0x0000046a },
1472 { name => "yrl" },
1473 { name => "yrl-BR" },
1474 { name => "yrl-CO" },
1475 { name => "yrl-VE" },
1476 { name => "yue" },
1477 { name => "yue-Hans" },
1478 { name => "yue-Hans-CN" },
1479 { name => "yue-Hant" },
1480 { name => "yue-Hant-HK" },
1481 { name => "zgh" },
1482 { name => "zgh-MA", alias => "zgh-Tfng-MA" },
1483 { name => "zgh-Tfng", file => "zgh" },
1484 { name => "zgh-Tfng-MA", file => "zgh_MA" },
1485 { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS", nativedigits => "0123456789" },
1486 { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
1487 { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
1488 { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
1489 { name => "zh-Hans", lcid => 0x00000004, group => 10 },
1490 { name => "zh-Hans-CN", alias => "zh-CN" },
1491 { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
1492 { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
1493 { name => "zh-Hans-HK", slist => ";", nativedigits => "" },
1494 { name => "zh-Hans-MO", slist => ";", nativedigits => "" },
1495 { name => "zh-Hans-SG", alias => "zh-SG" },
1496 { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
1497 { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
1498 { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
1499 { name => "zh-Hant-HK", alias => "zh-HK" },
1500 { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
1501 { name => "zh-Hant-MO", alias => "zh-MO" },
1502 { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
1503 { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
1504 { name => "zh-Hant-TW", alias => "zh-TW" },
1505 { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
1506 { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
1507 { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
1508 { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
1509 { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
1510 { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
1511 { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
1512 { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
1513 { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
1514 { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
1515 { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
1516 { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
1517 { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
1518 { name => "zu", lcid => 0x00000035, oemcp => 850 },
1519 { name => "zu-ZA", lcid => 0x00000435 },
1522 my @calendars =
1524 { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
1525 { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
1526 { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
1527 { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
1528 { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
1529 { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
1530 { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
1531 { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
1532 { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
1533 { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1534 { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1535 { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1536 { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
1537 { id => 14, name => "Japanese Lunisolar" },
1538 { id => 15, name => "Chinese Lunisolar" },
1539 { id => 16, name => "Saka" },
1540 { id => 17, name => "Lunar ETO Chinese" },
1541 { id => 18, name => "Lunar ETO Korean" },
1542 { id => 19, name => "Lunar ETO Rokuyou" },
1543 { id => 20, name => "Korean Lunisolar" },
1544 { id => 21, name => "Taiwan Lunisolar" },
1545 { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
1546 { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
1549 my @geoids =
1551 { id => 2, name => "AG" }, # Antigua and Barbuda
1552 { id => 3, name => "AF" }, # Afghanistan
1553 { id => 4, name => "DZ" }, # Algeria
1554 { id => 5, name => "AZ" }, # Azerbaijan
1555 { id => 6, name => "AL" }, # Albania
1556 { id => 7, name => "AM" }, # Armenia
1557 { id => 8, name => "AD" }, # Andorra
1558 { id => 9, name => "AO" }, # Angola
1559 { id => 10, name => "AS" }, # American Samoa
1560 { id => 11, name => "AR" }, # Argentina
1561 { id => 12, name => "AU" }, # Australia
1562 { id => 14, name => "AT" }, # Austria
1563 { id => 17, name => "BH" }, # Bahrain
1564 { id => 18, name => "BB" }, # Barbados
1565 { id => 19, name => "BW" }, # Botswana
1566 { id => 20, name => "BM" }, # Bermuda
1567 { id => 21, name => "BE" }, # Belgium
1568 { id => 22, name => "BS" }, # Bahamas, The
1569 { id => 23, name => "BD" }, # Bangladesh
1570 { id => 24, name => "BZ" }, # Belize
1571 { id => 25, name => "BA" }, # Bosnia and Herzegovina
1572 { id => 26, name => "BO" }, # Bolivia
1573 { id => 27, name => "MM" }, # Myanmar
1574 { id => 28, name => "BJ" }, # Benin
1575 { id => 29, name => "BY" }, # Belarus
1576 { id => 30, name => "SB" }, # Solomon Islands
1577 { id => 32, name => "BR" }, # Brazil
1578 { id => 34, name => "BT" }, # Bhutan
1579 { id => 35, name => "BG" }, # Bulgaria
1580 { id => 37, name => "BN" }, # Brunei
1581 { id => 38, name => "BI" }, # Burundi
1582 { id => 39, name => "CA" }, # Canada
1583 { id => 40, name => "KH" }, # Cambodia
1584 { id => 41, name => "TD" }, # Chad
1585 { id => 42, name => "LK" }, # Sri Lanka
1586 { id => 43, name => "CG" }, # Congo
1587 { id => 44, name => "CD" }, # Congo (DRC)
1588 { id => 45, name => "CN" }, # China
1589 { id => 46, name => "CL" }, # Chile
1590 { id => 49, name => "CM" }, # Cameroon
1591 { id => 50, name => "KM" }, # Comoros
1592 { id => 51, name => "CO" }, # Colombia
1593 { id => 54, name => "CR" }, # Costa Rica
1594 { id => 55, name => "CF" }, # Central African Republic
1595 { id => 56, name => "CU" }, # Cuba
1596 { id => 57, name => "CV" }, # Cape Verde
1597 { id => 59, name => "CY" }, # Cyprus
1598 { id => 61, name => "DK" }, # Denmark
1599 { id => 62, name => "DJ" }, # Djibouti
1600 { id => 63, name => "DM" }, # Dominica
1601 { id => 65, name => "DO" }, # Dominican Republic
1602 { id => 66, name => "EC" }, # Ecuador
1603 { id => 67, name => "EG" }, # Egypt
1604 { id => 68, name => "IE" }, # Ireland
1605 { id => 69, name => "GQ" }, # Equatorial Guinea
1606 { id => 70, name => "EE" }, # Estonia
1607 { id => 71, name => "ER" }, # Eritrea
1608 { id => 72, name => "SV" }, # El Salvador
1609 { id => 73, name => "ET" }, # Ethiopia
1610 { id => 75, name => "CZ" }, # Czech Republic
1611 { id => 77, name => "FI" }, # Finland
1612 { id => 78, name => "FJ" }, # Fiji Islands
1613 { id => 80, name => "FM" }, # Micronesia
1614 { id => 81, name => "FO" }, # Faroe Islands
1615 { id => 84, name => "FR" }, # France
1616 { id => 86, name => "GM" }, # Gambia, The
1617 { id => 87, name => "GA" }, # Gabon
1618 { id => 88, name => "GE" }, # Georgia
1619 { id => 89, name => "GH" }, # Ghana
1620 { id => 90, name => "GI" }, # Gibraltar
1621 { id => 91, name => "GD" }, # Grenada
1622 { id => 93, name => "GL" }, # Greenland
1623 { id => 94, name => "DE" }, # Germany
1624 { id => 98, name => "GR" }, # Greece
1625 { id => 99, name => "GT" }, # Guatemala
1626 { id => 100, name => "GN" }, # Guinea
1627 { id => 101, name => "GY" }, # Guyana
1628 { id => 103, name => "HT" }, # Haiti
1629 { id => 104, name => "HK" }, # Hong Kong S.A.R.
1630 { id => 106, name => "HN" }, # Honduras
1631 { id => 108, name => "HR" }, # Croatia
1632 { id => 109, name => "HU" }, # Hungary
1633 { id => 110, name => "IS" }, # Iceland
1634 { id => 111, name => "ID" }, # Indonesia
1635 { id => 113, name => "IN" }, # India
1636 { id => 114, name => "IO" }, # British Indian Ocean Territory
1637 { id => 116, name => "IR" }, # Iran
1638 { id => 117, name => "IL" }, # Israel
1639 { id => 118, name => "IT" }, # Italy
1640 { id => 119, name => "CI" }, # Côte d'Ivoire
1641 { id => 121, name => "IQ" }, # Iraq
1642 { id => 122, name => "JP" }, # Japan
1643 { id => 124, name => "JM" }, # Jamaica
1644 { id => 125, name => "SJ" }, # Jan Mayen
1645 { id => 126, name => "JO" }, # Jordan
1646 { id => 127, parent => "UM" }, # Johnston Atoll
1647 { id => 129, name => "KE" }, # Kenya
1648 { id => 130, name => "KG" }, # Kyrgyzstan
1649 { id => 131, name => "KP" }, # North Korea
1650 { id => 133, name => "KI" }, # Kiribati
1651 { id => 134, name => "KR" }, # Korea
1652 { id => 136, name => "KW" }, # Kuwait
1653 { id => 137, name => "KZ" }, # Kazakhstan
1654 { id => 138, name => "LA" }, # Laos
1655 { id => 139, name => "LB" }, # Lebanon
1656 { id => 140, name => "LV" }, # Latvia
1657 { id => 141, name => "LT" }, # Lithuania
1658 { id => 142, name => "LR" }, # Liberia
1659 { id => 143, name => "SK" }, # Slovakia
1660 { id => 145, name => "LI" }, # Liechtenstein
1661 { id => 146, name => "LS" }, # Lesotho
1662 { id => 147, name => "LU" }, # Luxembourg
1663 { id => 148, name => "LY" }, # Libya
1664 { id => 149, name => "MG" }, # Madagascar
1665 { id => 151, name => "MO" }, # Macao S.A.R.
1666 { id => 152, name => "MD" }, # Moldova
1667 { id => 154, name => "MN" }, # Mongolia
1668 { id => 156, name => "MW" }, # Malawi
1669 { id => 157, name => "ML" }, # Mali
1670 { id => 158, name => "MC" }, # Monaco
1671 { id => 159, name => "MA" }, # Morocco
1672 { id => 160, name => "MU" }, # Mauritius
1673 { id => 162, name => "MR" }, # Mauritania
1674 { id => 163, name => "MT" }, # Malta
1675 { id => 164, name => "OM" }, # Oman
1676 { id => 165, name => "MV" }, # Maldives
1677 { id => 166, name => "MX" }, # Mexico
1678 { id => 167, name => "MY" }, # Malaysia
1679 { id => 168, name => "MZ" }, # Mozambique
1680 { id => 173, name => "NE" }, # Niger
1681 { id => 174, name => "VU" }, # Vanuatu
1682 { id => 175, name => "NG" }, # Nigeria
1683 { id => 176, name => "NL" }, # Netherlands
1684 { id => 177, name => "NO" }, # Norway
1685 { id => 178, name => "NP" }, # Nepal
1686 { id => 180, name => "NR" }, # Nauru
1687 { id => 181, name => "SR" }, # Suriname
1688 { id => 182, name => "NI" }, # Nicaragua
1689 { id => 183, name => "NZ" }, # New Zealand
1690 { id => 184, name => "PS" }, # Palestinian Authority
1691 { id => 185, name => "PY" }, # Paraguay
1692 { id => 187, name => "PE" }, # Peru
1693 { id => 190, name => "PK" }, # Pakistan
1694 { id => 191, name => "PL" }, # Poland
1695 { id => 192, name => "PA" }, # Panama
1696 { id => 193, name => "PT" }, # Portugal
1697 { id => 194, name => "PG" }, # Papua New Guinea
1698 { id => 195, name => "PW" }, # Palau
1699 { id => 196, name => "GW" }, # Guinea-Bissau
1700 { id => 197, name => "QA" }, # Qatar
1701 { id => 198, name => "RE" }, # Reunion
1702 { id => 199, name => "MH" }, # Marshall Islands
1703 { id => 200, name => "RO" }, # Romania
1704 { id => 201, name => "PH" }, # Philippines
1705 { id => 202, name => "PR" }, # Puerto Rico
1706 { id => 203, name => "RU" }, # Russia
1707 { id => 204, name => "RW" }, # Rwanda
1708 { id => 205, name => "SA" }, # Saudi Arabia
1709 { id => 206, name => "PM" }, # St. Pierre and Miquelon
1710 { id => 207, name => "KN" }, # St. Kitts and Nevis
1711 { id => 208, name => "SC" }, # Seychelles
1712 { id => 209, name => "ZA" }, # South Africa
1713 { id => 210, name => "SN" }, # Senegal
1714 { id => 212, name => "SI" }, # Slovenia
1715 { id => 213, name => "SL" }, # Sierra Leone
1716 { id => 214, name => "SM" }, # San Marino
1717 { id => 215, name => "SG" }, # Singapore
1718 { id => 216, name => "SO" }, # Somalia
1719 { id => 217, name => "ES" }, # Spain
1720 { id => 218, name => "LC" }, # St. Lucia
1721 { id => 219, name => "SD" }, # Sudan
1722 { id => 220, name => "SJ" }, # Svalbard
1723 { id => 221, name => "SE" }, # Sweden
1724 { id => 222, name => "SY" }, # Syria
1725 { id => 223, name => "CH" }, # Switzerland
1726 { id => 224, name => "AE" }, # United Arab Emirates
1727 { id => 225, name => "TT" }, # Trinidad and Tobago
1728 { id => 227, name => "TH" }, # Thailand
1729 { id => 228, name => "TJ" }, # Tajikistan
1730 { id => 231, name => "TO" }, # Tonga
1731 { id => 232, name => "TG" }, # Togo
1732 { id => 233, name => "ST" }, # São Tomé and Príncipe
1733 { id => 234, name => "TN" }, # Tunisia
1734 { id => 235, name => "TR" }, # Turkey
1735 { id => 236, name => "TV" }, # Tuvalu
1736 { id => 237, name => "TW" }, # Taiwan
1737 { id => 238, name => "TM" }, # Turkmenistan
1738 { id => 239, name => "TZ" }, # Tanzania
1739 { id => 240, name => "UG" }, # Uganda
1740 { id => 241, name => "UA" }, # Ukraine
1741 { id => 242, name => "GB" }, # United Kingdom
1742 { id => 244, name => "US" }, # United States
1743 { id => 245, name => "BF" }, # Burkina Faso
1744 { id => 246, name => "UY" }, # Uruguay
1745 { id => 247, name => "UZ" }, # Uzbekistan
1746 { id => 248, name => "VC" }, # St. Vincent and the Grenadines
1747 { id => 249, name => "VE" }, # Bolivarian Republic of Venezuela
1748 { id => 251, name => "VN" }, # Vietnam
1749 { id => 252, name => "VI" }, # Virgin Islands
1750 { id => 253, name => "VA" }, # Vatican City
1751 { id => 254, name => "NA" }, # Namibia
1752 { id => 257, name => "EH" }, # Western Sahara (disputed)
1753 { id => 258, parent => "UM" }, # Wake Island
1754 { id => 259, name => "WS" }, # Samoa
1755 { id => 260, name => "SZ" }, # Swaziland
1756 { id => 261, name => "YE" }, # Yemen
1757 { id => 263, name => "ZM" }, # Zambia
1758 { id => 264, name => "ZW" }, # Zimbabwe
1759 { id => 269, name => "CS" }, # Serbia and Montenegro (Former)
1760 { id => 270, name => "ME" }, # Montenegro
1761 { id => 271, name => "RS" }, # Serbia
1762 { id => 273, name => "CW" }, # Curaçao
1763 { id => 276, name => "SS" }, # South Sudan
1764 { id => 300, name => "AI" }, # Anguilla
1765 { id => 301, name => "AQ" }, # Antarctica
1766 { id => 302, name => "AW" }, # Aruba
1767 { id => 303, parent => "SH" }, # Ascension Island
1768 { id => 304, parent => "053" }, # Ashmore and Cartier Islands
1769 { id => 305, parent => "UM" }, # Baker Island
1770 { id => 306, name => "BV" }, # Bouvet Island
1771 { id => 307, name => "KY" }, # Cayman Islands
1772 { id => 308, name => "830", parent => "155" }, # Channel Islands
1773 { id => 309, name => "CX" }, # Christmas Island
1774 { id => 310, parent => "009" }, # Clipperton Island
1775 { id => 311, name => "CC" }, # Cocos (Keeling) Islands
1776 { id => 312, name => "CK" }, # Cook Islands
1777 { id => 313, parent => "053" }, # Coral Sea Islands
1778 { id => 314, parent => "IO" }, # Diego Garcia
1779 { id => 315, name => "FK" }, # Falkland Islands (Islas Malvinas)
1780 { id => 317, name => "GF" }, # French Guiana
1781 { id => 318, name => "PF" }, # French Polynesia
1782 { id => 319, name => "TF" }, # French Southern and Antarctic Lands
1783 { id => 321, name => "GP" }, # Guadeloupe
1784 { id => 322, name => "GU" }, # Guam
1785 { id => 323 }, # Guantanamo Bay
1786 { id => 324, name => "GG" }, # Guernsey
1787 { id => 325, name => "HM" }, # Heard Island and McDonald Islands
1788 { id => 326, parent => "UM" }, # Howland Island
1789 { id => 327, parent => "UM" }, # Jarvis Island
1790 { id => 328, name => "JE" }, # Jersey
1791 { id => 329, parent => "UM" }, # Kingman Reef
1792 { id => 330, name => "MQ" }, # Martinique
1793 { id => 331, name => "YT" }, # Mayotte
1794 { id => 332, name => "MS" }, # Montserrat
1795 { id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
1796 { id => 334, name => "NC" }, # New Caledonia
1797 { id => 335, name => "NU" }, # Niue
1798 { id => 336, name => "NF" }, # Norfolk Island
1799 { id => 337, name => "MP" }, # Northern Mariana Islands
1800 { id => 338, parent => "UM" }, # Palmyra Atoll
1801 { id => 339, name => "PN" }, # Pitcairn Islands
1802 { id => 340, parent => "MP" }, # Rota Island
1803 { id => 341, parent => "MP" }, # Saipan
1804 { id => 342, name => "GS" }, # South Georgia and the South Sandwich Islands
1805 { id => 343, name => "SH" }, # St. Helena
1806 { id => 346, parent => "MP" }, # Tinian Island
1807 { id => 347, name => "TK" }, # Tokelau
1808 { id => 348, parent => "SH" }, # Tristan da Cunha
1809 { id => 349, name => "TC" }, # Turks and Caicos Islands
1810 { id => 351, name => "VG" }, # Virgin Islands, British
1811 { id => 352, name => "WF" }, # Wallis and Futuna
1812 { id => 742, name => "002" }, # Africa
1813 { id => 2129, name => "142" }, # Asia
1814 { id => 10541, name => "150" }, # Europe
1815 { id => 15126, name => "IM" }, # Man, Isle of
1816 { id => 19618, name => "MK" }, # Macedonia, Former Yugoslav Republic of
1817 { id => 20900, name => "054" }, # Melanesia
1818 { id => 21206, name => "057" }, # Micronesia
1819 { id => 21242, parent => "UM" }, # Midway Islands
1820 { id => 23581, name => "021" }, # Northern America
1821 { id => 26286, name => "061" }, # Polynesia
1822 { id => 27082, name => "013" }, # Central America
1823 { id => 27114, name => "009" }, # Oceania
1824 { id => 30967, name => "SX" }, # Sint Maarten (Dutch part)
1825 { id => 31396, name => "005" }, # South America
1826 { id => 31706, name => "MF" }, # Saint Martin (French part)
1827 { id => 39070, name => "001" }, # World
1828 { id => 42483, name => "011" }, # Western Africa
1829 { id => 42484, name => "017" }, # Middle Africa
1830 { id => 42487, name => "015" }, # Northern Africa
1831 { id => 47590, name => "143" }, # Central Asia
1832 { id => 47599, name => "035" }, # South-Eastern Asia
1833 { id => 47600, name => "030" }, # Eastern Asia
1834 { id => 47603, name => "014" }, # Eastern Africa
1835 { id => 47609, name => "151" }, # Eastern Europe
1836 { id => 47610, name => "039" }, # Southern Europe
1837 { id => 47611, name => "145" }, # Middle East
1838 { id => 47614, name => "034" }, # Southern Asia
1839 { id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
1840 { id => 9914689, name => "XK" }, # Kosovo
1841 { id => 10026358, name => "019" }, # Americas
1842 { id => 10028789, name => "AX" }, # Ã…land Islands
1843 { id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
1844 { id => 10039882, name => "154" }, # Northern Europe
1845 { id => 10039883, name => "018" }, # Southern Africa
1846 { id => 10210824, name => "155" }, # Western Europe
1847 { id => 10210825, name => "053" }, # Australia and New Zealand
1848 { id => 161832015, name => "BL" }, # Saint Barthélemy
1849 { id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
1850 { id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
1851 { id => 161832258, name => "BQ" }, # Bonaire, Sint Eustatius and Saba
1854 my @cp2uni = ();
1855 my @glyph2uni = ();
1856 my @lead_bytes = ();
1857 my @uni2cp = ();
1858 my @tolower_table = ();
1859 my @toupper_table = ();
1860 my @digitmap_table = ();
1861 my @halfwidth_table = ();
1862 my @fullwidth_table = ();
1863 my @cjk_compat_table = ();
1864 my @chinese_traditional_table = ();
1865 my @chinese_simplified_table = ();
1866 my @category_table = ();
1867 my @initial_joining_table = ();
1868 my @direction_table = ();
1869 my @decomp_table = ();
1870 my @combining_class_table = ();
1871 my @decomp_compat_table = ();
1872 my @comp_exclusions = ();
1873 my @idna_decomp_table = ();
1874 my @idna_disallowed = ();
1875 my %registry_keys;
1876 my $default_char;
1877 my $default_wchar;
1879 my %joining_forms =
1881 "isolated" => [],
1882 "final" => [],
1883 "initial" => [],
1884 "medial" => []
1887 my $current_data_file;
1889 sub to_utf16(@)
1891 my @ret;
1892 foreach my $ch (@_)
1894 if ($ch < 0x10000)
1896 push @ret, $ch;
1898 else
1900 my $val = $ch - 0x10000;
1901 push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
1904 return @ret;
1907 ################################################################
1908 # fetch a unicode.org file and open it
1909 sub open_data_file($@)
1911 my ($id, $name) = @_;
1912 my $data = $data_files{$id};
1913 my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
1914 local *FILE;
1916 my $url = $data->{url};
1917 my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
1918 unless (-f $filename)
1920 print "Fetching $url...\n";
1921 system "mkdir", "-p", $cache;
1922 !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
1925 my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
1926 die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
1928 if ($filename =~ /\.zip$/)
1930 open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
1932 elsif ($filename =~ /\.tar\.gz$/)
1934 open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
1936 else
1938 open FILE, "<$filename" or die "cannot open $filename";
1940 $current_data_file = $name ? "$url:$name" : $url;
1941 return *FILE;
1944 ################################################################
1945 # load a unicode.org file as XML data
1946 sub load_xml_data_file($@)
1948 my ($id, $name) = @_;
1949 my $FILE = open_data_file( $id, $name );
1950 my $xml = XML::LibXML->load_xml( IO => $FILE );
1951 close FILE;
1952 return $xml;
1955 ################################################################
1956 # recursively get the decomposition for a character
1957 sub get_decomposition($$);
1958 sub get_decomposition($$)
1960 my ($char, $table) = @_;
1961 my @ret;
1963 return $char unless defined $table->[$char];
1964 foreach my $ch (@{$table->[$char]})
1966 push @ret, get_decomposition( $ch, $table );
1968 return @ret;
1971 ################################################################
1972 # get the composition that results in a given character
1973 sub get_composition($$)
1975 my ($ch, $compat) = @_;
1976 return () unless defined $decomp_table[$ch]; # no decomposition
1977 my @ret = @{$decomp_table[$ch]};
1978 return () if @ret < 2; # singleton decomposition
1979 return () if $comp_exclusions[$ch]; # composition exclusion
1980 return () if $combining_class_table[$ch]; # non-starter
1981 return () if $combining_class_table[$ret[0]]; # first char is non-starter
1982 return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
1983 defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition
1984 return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
1985 defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition
1986 return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
1987 defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition
1988 return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition
1989 return @ret;
1992 ################################################################
1993 # recursively build decompositions
1994 sub build_decompositions(@)
1996 my @src = @_;
1997 my @dst;
1999 for (my $i = 0; $i < @src; $i++)
2001 next unless defined $src[$i];
2002 my @decomp = to_utf16( get_decomposition( $i, \@src ));
2003 $dst[$i] = \@decomp;
2005 return @dst;
2008 ################################################################
2009 # compose Hangul sequences
2010 sub compose_hangul(@)
2012 my $SBASE = 0xac00;
2013 my $LBASE = 0x1100;
2014 my $VBASE = 0x1161;
2015 my $TBASE = 0x11a7;
2016 my $LCOUNT = 19;
2017 my $VCOUNT = 21;
2018 my $TCOUNT = 28;
2019 my $NCOUNT = $VCOUNT * $TCOUNT;
2020 my $SCOUNT = $LCOUNT * $NCOUNT;
2022 my @seq = @_;
2023 my @ret;
2024 my $i;
2026 for ($i = 0; $i < @seq; $i++)
2028 my $ch = $seq[$i];
2029 if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
2030 $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
2032 $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
2033 $i++;
2035 if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
2036 $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
2038 $ch += $seq[$i+1] - $TBASE;
2039 $i++;
2041 push @ret, $ch;
2043 return @ret;
2046 ################################################################
2047 # remove linguistic-only mappings from the case table
2048 sub remove_linguistic_mappings($$)
2050 my ($upper, $lower) = @_;
2052 # remove case mappings that don't round-trip
2054 for (my $i = 0; $i < @{$upper}; $i++)
2056 next unless defined ${$upper}[$i];
2057 my $ch = ${$upper}[$i];
2058 ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
2060 for (my $i = 0; $i < @{$lower}; $i++)
2062 next unless defined ${$lower}[$i];
2063 my $ch = ${$lower}[$i];
2064 ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
2068 ################################################################
2069 # read in the Unicode database files
2070 sub load_data()
2072 my $start;
2074 # now build mappings from the decomposition field of the Unicode database
2076 my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
2077 while (<$UNICODE_DATA>)
2079 # Decode the fields ...
2080 my ($code, $name, $cat, $comb, $bidi,
2081 $decomp, $dec, $dig, $num, $mirror,
2082 $oldname, $comment, $upper, $lower, $title) = split /;/;
2083 my $src = hex $code;
2085 die "unknown category $cat" unless defined $categories{$cat};
2086 die "unknown directionality $bidi" unless defined $directions{$bidi};
2088 $category_table[$src] = $categories{$cat};
2089 $direction_table[$src] = $bidi;
2090 if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
2092 $initial_joining_table[$src] = $joining_types{"T"};
2094 else
2096 $initial_joining_table[$src] = $joining_types{"U"};
2099 if ($lower ne "")
2101 $tolower_table[$src] = hex $lower;
2103 if ($upper ne "")
2105 $toupper_table[$src] = hex $upper;
2107 if ($dec ne "")
2109 $category_table[$src] |= $ctype{"digit"};
2111 if ($dig ne "")
2113 $digitmap_table[$src] = ord $dig;
2115 $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
2117 $category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
2118 $category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
2119 $category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
2120 $category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
2121 $category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
2122 $category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
2123 $category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
2124 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
2125 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
2126 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
2127 $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
2128 $category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
2130 # copy the category and direction for everything between First/Last pairs
2131 if ($name =~ /, First>/) { $start = $src; }
2132 if ($name =~ /, Last>/)
2134 while ($start < $src)
2136 $category_table[$start] = $category_table[$src];
2137 $direction_table[$start] = $direction_table[$src];
2138 $combining_class_table[$start] = $combining_class_table[$src];
2139 $start++;
2143 next if $decomp eq ""; # no decomposition, skip it
2145 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
2147 my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
2148 $decomp_compat_table[$src] = \@seq;
2151 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
2153 # decomposition of the form "<foo> 1234" -> use char if type is known
2154 my $dst = hex $2;
2155 if ($1 eq "narrow")
2157 $halfwidth_table[$dst] = $src;
2158 $fullwidth_table[$src] = $dst;
2160 elsif ($1 eq "wide")
2162 next if $dst == 0x5c; # don't remap backslash
2163 $fullwidth_table[$dst] = $src;
2164 $halfwidth_table[$src] = $dst;
2166 elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
2168 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2170 elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
2172 ${joining_forms{$1}}[$dst] = $src;
2175 elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
2177 # decomposition "<compat> 0020 1234" -> combining accent
2179 elsif ($decomp =~ /^([0-9a-fA-F]+)/)
2181 # store decomposition
2182 if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
2184 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
2186 elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
2188 my $dst = hex $1;
2189 # Single char decomposition
2190 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
2191 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
2193 $cjk_compat_table[$src] = $dst;
2194 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2199 close $UNICODE_DATA;
2201 # patch the category of some special characters
2203 for (my $i = 0; $i < @decomp_table; $i++)
2205 next unless defined $decomp_table[$i];
2206 $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
2208 foreach my $cat (keys %special_categories)
2210 my $flag = $ctype{$cat};
2211 foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
2213 for (my $i = 0; $i < @decomp_compat_table; $i++)
2215 next unless defined $decomp_compat_table[$i];
2216 next unless @{$decomp_compat_table[$i]} == 2;
2217 $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
2220 # load the composition exclusions
2222 my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2223 while (<$EXCL>)
2225 s/\#.*//; # remove comments
2226 if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
2228 foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
2230 elsif (/^([0-9a-fA-F]+)\s*$/)
2232 $comp_exclusions[hex $1] = 1;
2235 close $EXCL;
2237 # load the IDNA mappings
2239 @idna_decomp_table = @decomp_compat_table;
2240 my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
2241 while (<$IDNA>)
2243 s/\#.*//; # remove comments
2244 next if /^\s*$/;
2245 my ($char, $type, $mapping) = split /;/;
2246 my ($ch1, $ch2);
2247 if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
2249 $ch1 = hex $1;
2250 $ch2 = hex $2;
2252 elsif ($char =~ /([0-9a-fA-F]+)/)
2254 $ch1 = $ch2 = hex $1;
2257 if ($type =~ /mapped/ || $type =~ /deviation/)
2259 $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
2260 my @seq = map { hex $_; } split /\s+/, $mapping;
2261 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
2263 elsif ($type =~ /valid/)
2266 elsif ($type =~ /ignored/)
2268 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
2270 elsif ($type =~ /disallowed/)
2272 foreach my $i ($ch1 .. $ch2)
2274 $idna_decomp_table[$i] = undef;
2275 $idna_disallowed[$i] = 1;
2279 close $IDNA;
2281 # load the Unihan mappings
2283 my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
2284 while (<$UNIHAN>)
2286 s/\#.*//; # remove comments
2287 next if /^\s*$/;
2288 if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
2290 next if hex $1 < 0x4dc0; # skip extension A
2291 $chinese_traditional_table[hex $1] = hex $2;
2293 elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
2295 next if hex $1 < 0x4dc0; # skip extension A
2296 $chinese_simplified_table[hex $1] = hex $2;
2299 close $UNIHAN;
2300 foreach my $i (0xf900..0xfaff)
2302 next unless defined $cjk_compat_table[$i];
2303 next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
2304 $chinese_simplified_table[$i] = $cjk_compat_table[$i];
2309 ################################################################
2310 # add a new registry key
2311 sub add_registry_key($$$)
2313 my ($base, $key, $defval) = @_;
2314 $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
2317 ################################################################
2318 # add a new registry value with explicit type
2319 sub add_registry_value($$$$)
2321 my ($base, $key, $name, $value) = @_;
2322 add_registry_key( $base, $key, undef );
2323 push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
2326 ################################################################
2327 # add a new registry string value
2328 sub add_registry_string_value($$$$)
2330 my ($base, $key, $name, $value) = @_;
2331 $value =~ s/\'/\'\'/g;
2332 add_registry_value( $base, $key, $name, "s '$value'" );
2335 ################################################################
2336 # add a new registry dword value
2337 sub add_registry_dword_value($$$$)
2339 my ($base, $key, $name, $value) = @_;
2340 add_registry_value( $base, $key, $name, "d $value" );
2343 ################################################################
2344 # add a new registry binary value
2345 sub add_registry_binary_value($$$$)
2347 my ($base, $key, $name, $value) = @_;
2348 add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
2351 ################################################################
2352 # define a new lead byte
2353 sub add_lead_byte($)
2355 my $ch = shift;
2356 return if defined $cp2uni[$ch];
2357 push @lead_bytes, $ch;
2358 $cp2uni[$ch] = 0;
2361 ################################################################
2362 # define a new char mapping
2363 sub add_mapping($$)
2365 my ($cp, $uni) = @_;
2366 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
2367 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
2368 if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
2371 ################################################################
2372 # get a mapping including glyph chars for MB_USEGLYPHCHARS
2373 sub get_glyphs_mapping(@)
2375 my @table = @_;
2377 for (my $i = 0; $i < @glyph2uni; $i++)
2379 $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
2381 return @table;
2384 ################################################################
2385 # build EUC-JP table from the JIS 0208/0212 files
2386 sub dump_eucjp_codepage()
2388 @cp2uni = ();
2389 @glyph2uni = ();
2390 @lead_bytes = ();
2391 @uni2cp = ();
2392 $default_char = $DEF_CHAR;
2393 $default_wchar = 0x30fb;
2395 # ASCII chars
2396 foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
2398 # lead bytes
2399 foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
2401 # JIS X 0201 right plane
2402 foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
2404 # undefined chars
2405 foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
2406 $cp2uni[0xa0] = 0xf8f0;
2407 $cp2uni[0xff] = 0xf8f3;
2409 # Fix backslash conversion
2410 add_mapping( 0xa1c0, 0xff3c );
2412 # Add private mappings for rows undefined in JIS 0208/0212
2413 my $private = 0xe000;
2414 foreach my $hi (0xf5 .. 0xfe)
2416 foreach my $lo (0xa1 .. 0xfe)
2418 add_mapping( ($hi << 8) + $lo, $private++ );
2421 foreach my $hi (0xf5 .. 0xfe)
2423 foreach my $lo (0x21 .. 0x7e)
2425 add_mapping( ($hi << 8) + $lo, $private++ );
2429 my $INPUT = open_data_file( "jis0208" );
2430 while (<$INPUT>)
2432 next if /^\#/; # skip comments
2433 next if /^$/; # skip empty lines
2434 next if /\x1a/; # skip ^Z
2435 if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2437 add_mapping( 0x8080 + hex $1, hex $2 );
2438 next;
2440 die "Unrecognized line $_\n";
2442 close $INPUT;
2444 $INPUT = open_data_file( "jis0212" );
2445 while (<$INPUT>)
2447 next if /^\#/; # skip comments
2448 next if /^$/; # skip empty lines
2449 next if /\x1a/; # skip ^Z
2450 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2452 add_mapping( 0x8000 + hex $1, hex $2 );
2453 next;
2455 die "Unrecognized line $_\n";
2457 close $INPUT;
2459 output_codepage_file( 20932 );
2462 ################################################################
2463 # build Korean Wansung table from the KSX1001 file
2464 sub dump_krwansung_codepage(@)
2466 my @cp949 = @_;
2467 @cp2uni = ();
2468 @glyph2uni = ();
2469 @lead_bytes = ();
2470 @uni2cp = ();
2471 $default_char = 0x3f;
2472 $default_wchar = 0x003f;
2474 # ASCII and undefined chars
2475 foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
2476 add_mapping( 0xa0, 0xf8e6 );
2477 add_mapping( 0xad, 0xf8e7 );
2478 add_mapping( 0xae, 0xf8e8 );
2479 add_mapping( 0xaf, 0xf8e9 );
2480 add_mapping( 0xfe, 0xf8ea );
2481 add_mapping( 0xff, 0xf8eb );
2483 my $INPUT = open_data_file( "ksx1001" );
2484 while (<$INPUT>)
2486 next if /^\#/; # skip comments
2487 next if /^$/; # skip empty lines
2488 next if /\x1a/; # skip ^Z
2489 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2491 add_mapping( 0x8080 + hex $1, hex $2 );
2492 next;
2494 die "Unrecognized line $_\n";
2496 close $INPUT;
2498 # get some extra mappings from cp 949
2499 my @defined_lb;
2500 map { $defined_lb[$_] = 1; } @lead_bytes;
2501 foreach my $i (0x0000 .. 0xffff)
2503 next if ($i >= 0x1100 && $i <= 0x11ff); # range not used in 20949
2504 next unless defined $cp949[$i];
2505 if ($cp949[$i] >= 0xff)
2507 # only add chars for lead bytes that exist in 20949
2508 my $hi = $cp949[$i] >> 8;
2509 my $lo = $cp949[$i] & 0xff;
2510 next unless $defined_lb[$hi];
2511 next unless $lo >= 0xa1 && $lo <= 0xfe;
2513 add_mapping( $cp949[$i], $i );
2516 output_codepage_file( 20949 );
2520 ################################################################
2521 # dump an array of integers
2522 sub dump_array($$@)
2524 my ($bit_width, $default, @array) = @_;
2525 my $format = sprintf "0x%%0%ux", $bit_width / 4;
2526 my $i;
2527 my $ret = " ";
2528 for ($i = 0; $i < $#array; $i++)
2530 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2531 $ret .= (($i % 8) != 7) ? ", " : ",\n ";
2533 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2534 return $ret;
2538 ################################################################
2539 # dump an SBCS mapping table in binary format
2540 sub dump_binary_sbcs_table($)
2542 my $codepage = shift;
2544 my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2545 my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
2547 print OUTPUT pack "S<*", @header;
2548 print OUTPUT pack "C12", (0) x 12;
2549 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2551 if (@glyph2uni)
2553 print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
2555 else
2557 print OUTPUT pack "S<*", 0;
2560 print OUTPUT pack "S<*", 0, 0;
2562 print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2566 ################################################################
2567 # dump a DBCS mapping table in binary format
2568 sub dump_binary_dbcs_table($)
2570 my $codepage = shift;
2571 my @lb_ranges = get_lb_ranges();
2572 my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2574 my @offsets = (0) x 256;
2575 my $pos = 0;
2576 foreach my $i (@lead_bytes)
2578 $offsets[$i] = ($pos += 256);
2579 $cp2uni[$i] = 0;
2582 my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
2584 print OUTPUT pack "S<*", @header;
2585 print OUTPUT pack "C12", @lb_ranges, 0 x 12;
2586 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2587 print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
2589 foreach my $i (@lead_bytes)
2591 my $base = $i << 8;
2592 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
2595 print OUTPUT pack "S<", 4;
2596 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2600 ################################################################
2601 # get the list of defined lead byte ranges
2602 sub get_lb_ranges()
2604 my @list = ();
2605 my @ranges = ();
2607 foreach my $i (@lead_bytes) { $list[$i] = 1; }
2608 my $on = 0;
2609 for (my $i = 0; $i < 256; $i++)
2611 if ($on)
2613 if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
2615 else
2617 if ($list[$i]) { push @ranges, $i; $on = 1; }
2620 if ($on) { push @ranges, 0xff; }
2621 return @ranges;
2624 ################################################################
2625 # dump the Indic Syllabic Category table
2626 sub dump_indic($)
2628 my $filename = shift;
2629 my @indic_table;
2631 my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
2632 while (<$INPUT>)
2634 next if /^\#/; # skip comments
2635 next if /^\s*$/; # skip empty lines
2636 next if /\x1a/; # skip ^Z
2637 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2639 my $type = $2;
2640 die "unknown indic $type" unless defined $indic_types{$type};
2641 if (hex $1 < 65536)
2643 $indic_table[hex $1] = $indic_types{$type};
2645 next;
2647 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2649 my $type = $3;
2650 die "unknown indic $type" unless defined $indic_types{$type};
2651 if (hex $1 < 65536 and hex $2 < 65536)
2653 foreach my $i (hex $1 .. hex $2)
2655 $indic_table[$i] = $indic_types{$type};
2658 next;
2660 die "malformed line $_";
2662 close $INPUT;
2664 my $prev_data_file = $current_data_file;
2665 $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
2666 while (<$INPUT>)
2668 next if /^\#/; # skip comments
2669 next if /^\s*$/; # skip empty lines
2670 next if /\x1a/; # skip ^Z
2671 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2673 my $type = $2;
2674 die "unknown matra $type" unless defined $matra_types{$type};
2675 $indic_table[hex $1] |= $matra_types{$type} << 8;
2676 next;
2678 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2680 my $type = $3;
2681 die "unknown matra $type" unless defined $matra_types{$type};
2682 foreach my $i (hex $1 .. hex $2)
2684 $indic_table[$i] |= $matra_types{$type} << 8;
2686 next;
2688 die "malformed line $_";
2690 close $INPUT;
2692 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2693 print "Building $filename\n";
2694 print OUTPUT "/* Unicode Indic Syllabic Category */\n";
2695 print OUTPUT "/* generated from $prev_data_file */\n";
2696 print OUTPUT "/* and from $current_data_file */\n";
2697 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2698 print OUTPUT "#include \"windef.h\"\n\n";
2700 dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
2702 close OUTPUT;
2703 save_file($filename);
2706 ################################################################
2707 # dump the Line Break Properties table
2708 sub dump_linebreak($)
2710 my $filename = shift;
2711 my @break_table;
2713 my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
2714 while (<$INPUT>)
2716 next if /^\#/; # skip comments
2717 next if /^\s*$/; # skip empty lines
2718 next if /\x1a/; # skip ^Z
2719 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2721 my $type = $2;
2722 die "unknown breaktype $type" unless defined $break_types{$type};
2723 $break_table[hex $1] = $break_types{$type};
2724 next;
2726 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2728 my $type = $3;
2729 die "unknown breaktype $type" unless defined $break_types{$type};
2730 foreach my $i (hex $1 .. hex $2)
2732 $break_table[$i] = $break_types{$type};
2734 next;
2736 elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2738 my $type = $2;
2739 die "unknown breaktype $type" unless defined $break_types{$type};
2740 $break_table[hex $1] = $break_types{$type};
2741 next;
2743 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2745 my $type = $3;
2746 die "unknown breaktype $type" unless defined $break_types{$type};
2747 foreach my $i (hex $1 .. hex $2)
2749 $break_table[$i] = $break_types{$type};
2751 next;
2753 die "malformed line $_";
2755 close $INPUT;
2757 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2758 print "Building $filename\n";
2759 print OUTPUT "/* Unicode Line Break Properties */\n";
2760 print OUTPUT "/* generated from $current_data_file */\n";
2761 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2762 print OUTPUT "#include \"windef.h\"\n\n";
2764 dump_three_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
2766 close OUTPUT;
2767 save_file($filename);
2770 my %scripts =
2772 "Unknown" => 0,
2773 "Common" => 1,
2774 "Inherited" => 2,
2775 "Arabic" => 3,
2776 "Armenian" => 4,
2777 "Avestan" => 5,
2778 "Balinese" => 6,
2779 "Bamum" => 7,
2780 "Batak" => 8,
2781 "Bengali" => 9,
2782 "Bopomofo" => 10,
2783 "Brahmi" => 11,
2784 "Braille" => 12,
2785 "Buginese" => 13,
2786 "Buhid" => 14,
2787 "Canadian_Aboriginal" => 15,
2788 "Carian" => 16,
2789 "Cham" => 17,
2790 "Cherokee" => 18,
2791 "Coptic" => 19,
2792 "Cuneiform" => 20,
2793 "Cypriot" => 21,
2794 "Cyrillic" => 22,
2795 "Deseret" => 23,
2796 "Devanagari" => 24,
2797 "Egyptian_Hieroglyphs" => 25,
2798 "Ethiopic" => 26,
2799 "Georgian" => 27,
2800 "Glagolitic" => 28,
2801 "Gothic" => 29,
2802 "Greek" => 30,
2803 "Gujarati" => 31,
2804 "Gurmukhi" => 32,
2805 "Han" => 33,
2806 "Hangul" => 34,
2807 "Hanunoo" => 35,
2808 "Hebrew" => 36,
2809 "Hiragana" => 37,
2810 "Imperial_Aramaic" => 38,
2811 "Inscriptional_Pahlavi" => 39,
2812 "Inscriptional_Parthian" => 40,
2813 "Javanese" => 41,
2814 "Kaithi" => 42,
2815 "Kannada" => 43,
2816 "Katakana" => 44,
2817 "Kayah_Li" => 45,
2818 "Kharoshthi" => 46,
2819 "Khmer" => 47,
2820 "Lao" => 48,
2821 "Latin" => 49,
2822 "Lepcha" => 50,
2823 "Limbu" => 51,
2824 "Linear_B" => 52,
2825 "Lisu" => 53,
2826 "Lycian" => 54,
2827 "Lydian" => 55,
2828 "Malayalam" => 56,
2829 "Mandaic" => 57,
2830 "Meetei_Mayek" => 58,
2831 "Mongolian" => 59,
2832 "Myanmar" => 60,
2833 "New_Tai_Lue" => 61,
2834 "Nko" => 62,
2835 "Ogham" => 63,
2836 "Ol_Chiki" => 64,
2837 "Old_Italic" => 65,
2838 "Old_Persian" => 66,
2839 "Old_South_Arabian" => 67,
2840 "Old_Turkic" => 68,
2841 "Oriya" => 69,
2842 "Osmanya" => 70,
2843 "Phags_Pa" => 71,
2844 "Phoenician" => 72,
2845 "Rejang" => 73,
2846 "Runic" => 74,
2847 "Samaritan" => 75,
2848 "Saurashtra" => 76,
2849 "Shavian" => 77,
2850 "Sinhala" => 78,
2851 "Sundanese" => 79,
2852 "Syloti_Nagri" => 80,
2853 "Syriac" => 81,
2854 "Tagalog" => 82,
2855 "Tagbanwa" => 83,
2856 "Tai_Le" => 84,
2857 "Tai_Tham" => 85,
2858 "Tai_Viet" => 86,
2859 "Tamil" => 87,
2860 "Telugu" => 88,
2861 "Thaana" => 89,
2862 "Thai" => 90,
2863 "Tibetan" => 91,
2864 "Tifinagh" => 92,
2865 "Ugaritic" => 93,
2866 "Vai" => 94,
2867 "Yi" => 95,
2868 # Win8/Win8.1
2869 "Chakma" => 96,
2870 "Meroitic_Cursive" => 97,
2871 "Meroitic_Hieroglyphs" => 98,
2872 "Miao" => 99,
2873 "Sharada" => 100,
2874 "Sora_Sompeng" => 101,
2875 "Takri" => 102,
2876 # Win10
2877 "Bassa_Vah" => 103,
2878 "Caucasian_Albanian" => 104,
2879 "Duployan" => 105,
2880 "Elbasan" => 106,
2881 "Grantha" => 107,
2882 "Khojki" => 108,
2883 "Khudawadi" => 109,
2884 "Linear_A" => 110,
2885 "Mahajani" => 111,
2886 "Manichaean" => 112,
2887 "Mende_Kikakui" => 113,
2888 "Modi" => 114,
2889 "Mro" => 115,
2890 "Nabataean" => 116,
2891 "Old_North_Arabian" => 117,
2892 "Old_Permic" => 118,
2893 "Pahawh_Hmong" => 119,
2894 "Palmyrene" => 120,
2895 "Pau_Cin_Hau" => 121,
2896 "Psalter_Pahlavi" => 122,
2897 "Siddham" => 123,
2898 "Tirhuta" => 124,
2899 "Warang_Citi" => 125,
2900 # Win10 RS1
2901 "Adlam" => 126,
2902 "Ahom" => 127,
2903 "Anatolian_Hieroglyphs" => 128,
2904 "Bhaiksuki" => 129,
2905 "Hatran" => 130,
2906 "Marchen" => 131,
2907 "Multani" => 132,
2908 "Newa" => 133,
2909 "Old_Hungarian" => 134,
2910 "Osage" => 135,
2911 "SignWriting" => 136,
2912 "Tangut" => 137,
2913 # Win10 RS4
2914 "Masaram_Gondi" => 138,
2915 "Nushu" => 139,
2916 "Soyombo" => 140,
2917 "Zanabazar_Square" => 141,
2918 # Win10 1903
2919 "Dogra" => 142,
2920 "Gunjala_Gondi" => 143,
2921 "Hanifi_Rohingya" => 144,
2922 "Makasar" => 145,
2923 "Medefaidrin" => 146,
2924 "Old_Sogdian" => 147,
2925 "Sogdian" => 148,
2926 # Win10 2004
2927 "Elymaic" => 149,
2928 "Nyiakeng_Puachue_Hmong" => 150,
2929 "Nandinagari" => 151,
2930 "Wancho" => 152,
2931 # Win11
2932 "Chorasmian" => 153,
2933 "Dives_Akuru" => 154,
2934 "Khitan_Small_Script" => 155,
2935 "Yezidi" => 156,
2938 ################################################################
2939 # dump Script IDs table
2940 sub dump_scripts($)
2942 my $filename = shift;
2943 my $header = $filename;
2944 my @scripts_table;
2945 my $script_index;
2946 my $i;
2948 my $INPUT = open_data_file( "ucd", "Scripts.txt" );
2949 # Fill the table
2950 # Unknown script id is always 0, so undefined scripts are automatically treated as such
2951 while (<$INPUT>)
2953 my $type = "";
2955 next if /^\#/; # skip comments
2956 next if /^\s*$/; # skip empty lines
2957 next if /\x1a/; # skip ^Z
2958 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2960 $type = $2;
2961 if (defined $scripts{$type})
2963 $scripts_table[hex $1] = $scripts{$type};
2965 next;
2967 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2969 $type = $3;
2970 if (defined $scripts{$type})
2972 foreach my $i (hex $1 .. hex $2)
2974 $scripts_table[$i] = $scripts{$type};
2977 next;
2981 close $INPUT;
2983 $header = "$filename.h";
2984 open OUTPUT,">$header.new" or die "Cannot create $header";
2985 print "Building $header\n";
2986 print OUTPUT "/* Unicode Script IDs */\n";
2987 print OUTPUT "/* generated from $current_data_file */\n";
2988 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2990 print OUTPUT "enum unicode_script_id {\n";
2991 foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
2993 print OUTPUT " Script_$script = $scripts{$script},\n";
2995 print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
2996 print OUTPUT "};\n";
2998 close OUTPUT;
2999 save_file($header);
3001 $filename = "$filename.c";
3002 open OUTPUT,">$filename.new" or die "Cannot create $header";
3003 print "Building $filename\n";
3004 print OUTPUT "/* Unicode Script IDs */\n";
3005 print OUTPUT "/* generated from $current_data_file */\n";
3006 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3007 print OUTPUT "#include \"windef.h\"\n\n";
3009 dump_three_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
3010 close OUTPUT;
3011 save_file($filename);
3014 ################################################################
3015 # dump the BiDi mirroring table
3016 sub dump_mirroring($)
3018 my $filename = shift;
3019 my @mirror_table = ();
3021 my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
3022 while (<$INPUT>)
3024 next if /^\#/; # skip comments
3025 next if /^$/; # skip empty lines
3026 next if /\x1a/; # skip ^Z
3027 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
3029 $mirror_table[hex $1] = hex $2;
3030 next;
3032 die "malformed line $_";
3034 close $INPUT;
3036 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3037 print "Building $filename\n";
3038 print OUTPUT "/* Unicode BiDi mirroring */\n";
3039 print OUTPUT "/* generated from $current_data_file */\n";
3040 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3041 print OUTPUT "#include \"windef.h\"\n\n";
3042 dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
3043 close OUTPUT;
3044 save_file($filename);
3047 ################################################################
3048 # dump the Bidi Brackets
3049 sub dump_bracket($)
3051 my $filename = shift;
3052 my @bracket_table;
3054 my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
3055 while (<$INPUT>)
3057 next if /^\#/; # skip comments
3058 next if /^\s*$/; # skip empty lines
3059 next if /\x1a/; # skip ^Z
3060 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
3062 my $type = $3;
3063 die "unknown bracket $type" unless defined $bracket_types{$type};
3064 die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
3065 $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
3066 $bracket_table[hex $1] += $bracket_types{$type} << 8;
3067 next;
3069 die "malformed line $_";
3071 close $INPUT;
3073 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3074 print "Building $filename\n";
3075 print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
3076 print OUTPUT "/* generated from $current_data_file */\n";
3077 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3078 print OUTPUT "#include \"windef.h\"\n\n";
3080 dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
3082 close OUTPUT;
3083 save_file($filename);
3086 ################################################################
3087 # dump the Arabic shaping table
3088 sub dump_shaping($)
3090 my $filename = shift;
3091 my @joining_table = @initial_joining_table;
3093 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3094 while (<$INPUT>)
3096 next if /^\#/; # skip comments
3097 next if /^\s*$/; # skip empty lines
3098 next if /\x1a/; # skip ^Z
3099 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3101 my $type = $2;
3102 $joining_table[hex $1] = $joining_types{$type};
3103 next;
3105 die "malformed line $_";
3107 close $INPUT;
3109 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3110 print "Building $filename\n";
3111 print OUTPUT "/* Unicode Arabic shaping */\n";
3112 print OUTPUT "/* generated from $current_data_file */\n";
3113 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3114 print OUTPUT "#include \"windef.h\"\n\n";
3116 dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
3118 print OUTPUT "\nconst unsigned short DECLSPEC_HIDDEN wine_shaping_forms[256][4] =\n{\n";
3119 for (my $i = 0x600; $i <= 0x6ff; $i++)
3121 printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
3122 ${joining_forms{"isolated"}}[$i] || $i,
3123 ${joining_forms{"final"}}[$i] || $i,
3124 ${joining_forms{"initial"}}[$i] || $i,
3125 ${joining_forms{"medial"}}[$i] || $i;
3127 print OUTPUT "};\n";
3129 close OUTPUT;
3130 save_file($filename);
3133 ################################################################
3134 # dump the Arabic shaping table
3135 sub dump_arabic_shaping($)
3137 my $filename = shift;
3138 my @joining_table = @initial_joining_table;
3140 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3141 while (<$INPUT>)
3143 next if /^\#/; # skip comments
3144 next if /^\s*$/; # skip empty lines
3145 next if /\x1a/; # skip ^Z
3146 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3148 my $type = $2;
3149 my $group = $3;
3151 if ($group eq "ALAPH" || $group eq "DALATH RISH")
3153 $joining_table[hex $1] = $joining_types{$group};
3155 else
3157 $joining_table[hex $1] = $joining_types{$type};
3160 next;
3162 die "malformed line $_";
3164 close $INPUT;
3166 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3167 print "Building $filename\n";
3168 print OUTPUT "/* Unicode Arabic shaping */\n";
3169 print OUTPUT "/* generated from $current_data_file */\n";
3170 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3171 print OUTPUT "#include \"windef.h\"\n\n";
3173 dump_three_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
3175 close OUTPUT;
3176 save_file($filename);
3179 ################################################################
3180 # dump the Vertical Orientation table
3181 sub dump_vertical($$)
3183 my ($filename, $unix) = @_;
3184 my @vertical_table;
3186 my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
3187 while (<$INPUT>)
3189 next if /^\#/; # skip comments
3190 next if /^\s*$/; # skip empty lines
3191 next if /\x1a/; # skip ^Z
3192 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
3194 my $type = $2;
3195 die "unknown vertical $type" unless defined $vertical_types{$type};
3196 if (hex $1 < 65536)
3198 $vertical_table[hex $1] = $vertical_types{$type};
3200 next;
3202 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
3204 my $type = $3;
3205 die "unknown vertical $type" unless defined $vertical_types{$type};
3206 foreach my $i (hex $1 .. hex $2)
3208 $vertical_table[$i] = $vertical_types{$type};
3210 next;
3212 die "malformed line $_";
3214 close $INPUT;
3216 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3217 print "Building $filename\n";
3218 print OUTPUT "/* Unicode Vertical Orientation */\n";
3219 print OUTPUT "/* generated from $current_data_file */\n";
3220 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3221 if ($unix)
3223 print OUTPUT "#if 0\n";
3224 print OUTPUT "#pragma makedep unix\n";
3225 print OUTPUT "#endif\n\n";
3227 print OUTPUT "#include \"windef.h\"\n\n";
3229 dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
3231 close OUTPUT;
3232 save_file($filename);
3235 ################################################################
3236 # compress a mapping table by removing identical rows
3237 sub compress_array($$@)
3239 my $rows = shift;
3240 my $def = shift;
3241 my @table = @_;
3242 my $len = @table / $rows;
3243 my @array;
3244 my $data = "";
3246 # try to merge table rows
3247 for (my $row = 0; $row < $rows; $row++)
3249 my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
3250 my $pos = index $data, $rowtxt;
3251 if ($pos == -1)
3253 # check if the tail of the data can match the start of the new row
3254 my $first = substr( $rowtxt, 0, 1 );
3255 for (my $i = length($data) - 1; $i > 0; $i--)
3257 $pos = index( substr( $data, -$i ), $first );
3258 last if $pos == -1;
3259 $i -= $pos;
3260 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
3261 substr( $data, -$i ) = "";
3262 last;
3264 $pos = length $data;
3265 $data .= $rowtxt;
3267 $array[$row] = $rows + $pos;
3269 return @array, unpack "U*", $data;
3272 ################################################################
3273 # dump a char -> value mapping table using two-level tables
3274 sub dump_two_level_mapping($$$@)
3276 my $name = shift;
3277 my $def = shift;
3278 my $size = shift;
3279 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3280 my (@array, @row_array, @data, @row_data);
3281 (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
3282 (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
3284 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
3286 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
3287 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
3288 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
3289 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data );
3292 ################################################################
3293 # dump a char -> value mapping table using three-level tables
3294 sub dump_three_level_mapping($$@)
3296 my $name = shift;
3297 my $def = shift;
3298 my $size = shift;
3299 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3300 my $level3 = ($MAX_CHAR + 1) / 16;
3301 my $level2 = $level3 / 16;
3302 my $level1 = $level2 / 16;
3303 my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
3304 my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
3305 my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
3307 for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
3308 for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
3310 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
3311 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
3312 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
3313 printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
3314 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
3317 ################################################################
3318 # dump a binary case mapping table in l_intl.nls format
3319 sub dump_binary_case_table(@)
3321 my (@table) = @_;
3322 my @difftable;
3323 my @res;
3325 for (my $i = 0; $i < @table; $i++)
3327 next unless defined $table[$i];
3328 $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
3331 my (@low_array1, @low_array2, @low_data, @low_row_data);
3332 (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
3333 (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
3335 if (scalar @table > 0x10000)
3337 my (@high_array1, @high_array2, @high_data, @high_row_data);
3338 (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
3339 (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
3341 push @res, map { $_ + 1024; } @low_array1;
3342 push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
3343 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3344 push @res, @low_data;
3345 push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
3346 return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
3348 else
3350 push @res, @low_array1;
3351 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3352 push @res, @low_data;
3353 return pack "S<*", 1 + scalar @res, @res;
3357 ################################################################
3358 # dump case mappings for l_intl.nls
3359 sub dump_intl_nls($)
3361 my @upper_table = @toupper_table;
3362 my @lower_table = @tolower_table;
3363 remove_linguistic_mappings( \@upper_table, \@lower_table );
3365 my $upper = dump_binary_case_table( @upper_table[0..65535] );
3366 my $lower = dump_binary_case_table( @lower_table[0..65535] );
3368 my $filename = shift;
3369 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3370 printf "Building $filename\n";
3372 binmode OUTPUT;
3373 print OUTPUT pack "S<", 1; # version
3374 print OUTPUT $upper;
3375 print OUTPUT $lower;
3376 close OUTPUT;
3377 save_file($filename);
3381 ################################################################
3382 # dump the bidi direction table
3383 sub dump_bidi_dir_table($)
3385 my $filename = shift;
3386 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3387 printf "Building $filename\n";
3388 printf OUTPUT "/* Unicode BiDi direction table */\n";
3389 printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
3390 printf OUTPUT "#include \"windef.h\"\n\n";
3392 my @table;
3394 for (my $i = 0; $i < @direction_table; $i++)
3396 $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
3399 dump_three_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
3401 close OUTPUT;
3402 save_file($filename);
3406 sub rol($$)
3408 my ($byte, $count) = @_;
3409 return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
3412 ################################################################
3413 # compress the character properties table
3414 sub compress_char_props_table($@)
3416 my $rows = shift;
3417 my @table = @_;
3418 my $len = @table / $rows;
3419 my $pos = 0;
3420 my @array = (0) x $rows;
3421 my %sequences;
3423 # add some predefined sequences
3424 foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
3426 # try to merge table rows
3427 for (my $row = 0; $row < $rows; $row++)
3429 my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
3430 my $rowtxt = pack "L*", @table_row;
3431 if (defined($sequences{$rowtxt}))
3433 # reuse an existing row
3434 $array[$row] = $sequences{$rowtxt};
3436 else
3438 # create a new row
3439 $sequences{$rowtxt} = $array[$row] = ++$pos;
3440 push @array, @table_row;
3443 return @array;
3446 ################################################################
3447 # dump a normalization table in binary format
3448 sub dump_norm_table($)
3450 my $filename = shift;
3452 my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
3453 my %decomp = ( "nfc" => \@decomp_table,
3454 "nfd" => \@decomp_table,
3455 "nfkc" => \@decomp_compat_table,
3456 "nfkd" => \@decomp_compat_table ,
3457 "idna" => \@idna_decomp_table );
3459 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3460 print "Building $filename\n";
3462 my $type = $filename;
3463 $type =~ s!.*/norm(\w+)\.nls!$1!;
3465 my $compose = $forms{$type} & 1;
3466 my $compat = !!($forms{$type} & 4) + ($type eq "idna");
3468 my @version = split /\./, $UNIVERSION;
3470 # combining classes
3472 my @classes;
3473 my @class_values;
3475 foreach my $c (grep defined, @combining_class_table)
3477 $classes[$c] = 1 if $c < 0x100;
3479 for (my $i = 0; $i < @classes; $i++)
3481 next unless defined $classes[$i];
3482 $classes[$i] = @class_values;
3483 push @class_values, $i;
3485 push @class_values, 0 if (@class_values % 2);
3486 die "too many classes" if @class_values >= 0x40;
3488 # character properties
3490 my @char_props;
3491 my @decomposed;
3492 my @comp_hash_table;
3493 my $comp_hash_size = $compose ? 254 : 0;
3495 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3497 next unless defined $combining_class_table[$i];
3498 if (defined $decomp{$type}->[$i])
3500 my @dec = get_decomposition( $i, $decomp{$type} );
3501 if ($compose && (my @comp = get_composition( $i, $compat )))
3503 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
3504 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
3506 my $val = 0;
3507 foreach my $d (@dec)
3509 $val = $combining_class_table[$d];
3510 last if $val;
3512 $char_props[$i] = $classes[$val];
3514 else
3516 $char_props[$i] = 0xbf;
3518 @dec = compose_hangul( @dec ) if $compose;
3519 @dec = to_utf16( @dec );
3520 push @dec, 0 if @dec >= 7;
3521 $decomposed[$i] = \@dec;
3523 else
3525 if ($combining_class_table[$i] == 0x100)
3527 $char_props[$i] = 0x7f;
3529 elsif ($combining_class_table[$i])
3531 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
3533 elsif ($type eq "idna" && defined $idna_disallowed[$i])
3535 $char_props[$i] = 0xff;
3537 else
3539 $char_props[$i] = 0;
3544 if ($compose)
3546 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3548 my @comp = get_composition( $i, $compat );
3549 next unless @comp;
3550 if ($combining_class_table[$comp[1]])
3552 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
3553 $char_props[$comp[1]] |= 0x40;
3555 else
3557 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
3558 $char_props[$comp[1]] |= 0xc0;
3563 # surrogates
3564 foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
3565 foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
3567 # Hangul
3568 if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
3569 elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
3570 foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
3572 # invalid chars
3573 if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
3574 foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
3575 foreach my $i (0x00..0x10)
3577 $char_props[($i << 16) | 0xfffe] = 0xff;
3578 $char_props[($i << 16) | 0xffff] = 0xff;
3581 # decomposition hash table
3583 my @decomp_hash_table;
3584 my @decomp_hash_index;
3585 my @decomp_hash_data;
3586 my $decomp_hash_size = 944;
3588 # build string of character data, reusing substrings when possible
3589 my $decomp_char_data = "";
3590 foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
3592 my $str = pack "U*", @{$i};
3593 $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
3595 for (my $i = 0; $i < @decomposed; $i++)
3597 next unless defined $decomposed[$i];
3598 my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
3599 die "sequence not found" if $pos == -1;
3600 my $len = @{$decomposed[$i]};
3601 $len = 7 if $len > 7;
3602 my $hash = $i % $decomp_hash_size;
3603 push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
3605 for (my $i = 0; $i < $decomp_hash_size; $i++)
3607 $decomp_hash_index[$i] = @decomp_hash_data / 2;
3608 next unless defined $decomp_hash_table[$i];
3609 if (@{$decomp_hash_table[$i]} == 1)
3611 my $entry = $decomp_hash_table[$i]->[0];
3612 if ($char_props[$entry->[0]] == 0xbf)
3614 $decomp_hash_index[$i] = $entry->[1];
3615 next;
3618 foreach my $entry (@{$decomp_hash_table[$i]})
3620 push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
3623 push @decomp_hash_data, 0, 0;
3625 # composition hash table
3627 my @comp_hash_index;
3628 my @comp_hash_data;
3629 if (@comp_hash_table)
3631 for (my $i = 0; $i < $comp_hash_size; $i++)
3633 $comp_hash_index[$i] = @comp_hash_data;
3634 push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
3636 $comp_hash_index[$comp_hash_size] = @comp_hash_data;
3637 push @comp_hash_data, 0, 0, 0;
3640 my $level1 = ($MAX_CHAR + 1) / 128;
3641 my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
3643 my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
3644 0, $decomp_hash_size, $comp_hash_size, 0 );
3645 my @tables = (0) x 8;
3647 $tables[0] = 16 + @header + @tables;
3648 $tables[1] = $tables[0] + @class_values / 2;
3649 $tables[2] = $tables[1] + $level1 / 2;
3650 $tables[3] = $tables[2] + (@rows - $level1) / 2;
3651 $tables[4] = $tables[3] + @decomp_hash_index;
3652 $tables[5] = $tables[4] + @decomp_hash_data;
3653 $tables[6] = $tables[5] + length $decomp_char_data;
3654 $tables[7] = $tables[6] + @comp_hash_index;
3656 print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
3657 print OUTPUT pack "S<*", @header;
3658 print OUTPUT pack "S<*", @tables;
3659 print OUTPUT pack "C*", @class_values;
3661 print OUTPUT pack "C*", @rows[0..$level1-1];
3662 print OUTPUT pack "C*", @rows[$level1..$#rows];
3663 print OUTPUT pack "S<*", @decomp_hash_index;
3664 print OUTPUT pack "S<*", @decomp_hash_data;
3665 print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
3666 print OUTPUT pack "S<*", @comp_hash_index;
3667 print OUTPUT pack "S<*", @comp_hash_data;
3669 close OUTPUT;
3670 save_file($filename);
3672 add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
3676 ################################################################
3677 # output a codepage definition file from the global tables
3678 sub output_codepage_file($)
3680 my $codepage = shift;
3682 my $output = sprintf "nls/c_%03d.nls", $codepage;
3683 open OUTPUT,">$output.new" or die "Cannot create $output";
3685 printf "Building %s\n", $output;
3686 if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
3687 else { dump_binary_dbcs_table( $codepage ); }
3689 close OUTPUT;
3690 save_file($output);
3692 add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
3695 ################################################################
3696 # output a codepage table from a Microsoft-style mapping file
3697 sub dump_msdata_codepage($)
3699 my $filename = shift;
3701 my $state = "";
3702 my ($codepage, $width, $count);
3703 my ($lb_cur, $lb_end);
3705 @cp2uni = ();
3706 @glyph2uni = ();
3707 @lead_bytes = ();
3708 @uni2cp = ();
3709 $default_char = $DEF_CHAR;
3710 $default_wchar = $DEF_CHAR;
3712 my $INPUT = open_data_file( "codepages", $filename );
3714 while (<$INPUT>)
3716 next if /^;/; # skip comments
3717 next if /^\s*$/; # skip empty lines
3718 next if /\x1a/; # skip ^Z
3719 last if /^ENDCODEPAGE/;
3721 if (/^CODEPAGE\s+(\d+)/)
3723 $codepage = $1;
3724 next;
3726 if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
3728 $width = $1;
3729 $default_char = hex $2;
3730 $default_wchar = hex $3;
3731 next;
3733 if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
3735 $state = $1;
3736 $count = $2;
3737 next;
3739 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
3741 if ($state eq "MBTABLE")
3743 my $cp = hex $1;
3744 my $uni = hex $2;
3745 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3746 next;
3748 if ($state eq "GLYPHTABLE")
3750 my $cp = hex $1;
3751 my $uni = hex $2;
3752 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
3753 next;
3755 if ($state eq "WCTABLE")
3757 my $uni = hex $1;
3758 my $cp = hex $2;
3759 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
3760 next;
3762 if ($state eq "DBCSRANGE")
3764 my $start = hex $1;
3765 my $end = hex $2;
3766 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
3767 $lb_cur = $start;
3768 $lb_end = $end;
3769 next;
3771 if ($state eq "DBCSTABLE")
3773 my $mb = hex $1;
3774 my $uni = hex $2;
3775 my $cp = ($lb_cur << 8) | $mb;
3776 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3777 if (!--$count)
3779 if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
3781 next;
3784 die "$filename: Unrecognized line $_\n";
3786 close $INPUT;
3788 output_codepage_file( $codepage );
3790 if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3793 ################################################################
3794 # align a string length
3795 sub align_string($$)
3797 my ($align, $str) = @_;
3798 $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
3799 return $str;
3802 ################################################################
3803 # pad a string with zeros
3804 sub pad_string($$)
3806 my ($pad, $str) = @_;
3807 $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
3808 return $str;
3811 ################################################################
3812 # pack a GUID string
3813 sub pack_guid($)
3815 $_ = shift;
3816 /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
3817 return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
3820 ################################################################
3821 # comparison function for compression sort
3822 sub cmp_compression
3824 return scalar @{$a} <=> scalar @{$b} ||
3825 $a->[4] <=> $b->[4] ||
3826 $a->[5] <=> $b->[5] ||
3827 $a->[6] <=> $b->[6] ||
3828 $a->[7] <=> $b->[7] ||
3829 $a->[8] <=> $b->[8] ||
3830 $a->[9] <=> $b->[9] ||
3831 $a->[10] <=> $b->[10] ||
3832 $a->[11] <=> $b->[11] ||
3833 $a->[12] <=> $b->[12];
3836 ################################################################
3837 # build a binary sort keys table
3838 sub dump_sortkey_table($)
3840 my $filename = shift;
3841 my @keys;
3842 my ($part, $section, $subsection, $guid, $version, $ling_flag);
3843 my @multiple_weights;
3844 my @expansions;
3845 my @compressions;
3846 my %exceptions;
3847 my %guids;
3848 my %compr_flags;
3849 my %locales;
3850 my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
3851 my $jamostr = "";
3853 my $re_hex = '0x[0-9A-Fa-f]+';
3854 my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
3855 $guids{$default_guid} = { };
3857 my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
3859 my $KEYS = open_data_file( "sorting" );
3861 printf "Building $filename\n";
3863 while (<$KEYS>)
3865 s/\s*;.*$//;
3866 next if /^\s*$/; # skip empty lines
3867 if (/^\s*(SORTKEY|SORTTABLES)/)
3869 $part = $1;
3870 next;
3872 if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
3874 $part = $section = "";
3875 next;
3877 if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
3879 $section = $1;
3880 $guid = undef;
3881 next;
3883 next unless $part;
3884 if ("$part.$section" eq "SORTKEY.DEFAULT")
3886 if (/^\s*($re_hex)\s+$re_key/)
3888 $keys[hex $1] = [ split(/\s+/,$2) ];
3889 next;
3892 elsif ("$part.$section" eq "SORTTABLES.RELEASE")
3894 if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
3896 $version = hex $1;
3897 next;
3899 if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
3901 # ignore for now
3902 next;
3905 elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
3906 "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
3907 "$part.$section" eq "SORTTABLES.INVERSECASING")
3909 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
3911 $guid = lc $1;
3912 $guids{$guid} = { } unless defined $guids{$guid};
3913 $guids{$guid}->{flags} |= $flags{$section};
3914 next;
3916 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3918 $locales{$1} = $guid;
3919 next;
3922 elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
3924 if (/^\s*(\d+)\s+(\d+)/)
3926 push @multiple_weights, $1, $2;
3927 next;
3930 elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
3932 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3934 my $pos = scalar @expansions / 2;
3935 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
3936 push @expansions, hex $2, hex $3;
3937 next;
3940 elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
3942 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3944 $keys[hex $1] = $keys[hex $2];
3945 next;
3948 elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
3950 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
3952 if ($subsection || !$guid) # start a new one
3954 $guid = lc $1;
3955 $subsection = "";
3956 $guids{$guid} = { } unless defined $guids{$guid};
3957 $guids{$guid}->{flags} |= $flags{$2} if $2;
3958 $guids{$guid}->{compr} = @compressions;
3959 $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
3960 $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
3961 push @compressions, [ ];
3963 else # merge with current one
3965 $guids{lc $1} = { } unless defined $guids{lc $1};
3966 $guids{lc $1}->{flags} |= $flags{$2} if $2;
3967 $guids{lc $1}->{compr} = $guids{$guid}->{compr};
3968 $compr_flags{lc $1} = $compr_flags{$guid};
3970 next;
3972 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3974 $locales{$1} = $guid;
3975 next;
3977 if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
3979 $subsection = $1;
3980 next;
3982 if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
3984 my @comp = map { hex $_; } split(/\s+/,$1);
3985 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
3986 # add compression flags
3987 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
3988 next;
3991 elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
3993 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
3995 $guid = lc $1;
3996 $guids{$guid} = { } unless defined $guids{lc $1};
3997 $ling_flag = ($2 ? "+" : "-");
3998 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
3999 next;
4001 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
4003 $locales{$1} = $guid;
4004 next;
4006 if (/^\s*($re_hex)\s+$re_key/)
4008 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
4009 next;
4012 elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
4014 if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
4016 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
4017 next;
4020 die "$current_data_file: $part.$section: unrecognized line $_\n";
4022 close $KEYS;
4024 # Sortkey table
4026 my $table;
4027 for (my $i = 0; $i < 0x10000; $i++)
4029 my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
4030 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4033 foreach my $id (sort keys %exceptions)
4035 my $pos = length($table) / 4;
4036 my @exc = @{$exceptions{$id}};
4037 my @filled;
4038 my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
4039 my $guid = substr( $id, 0, -1 );
4040 $guids{$guid}->{$key} = $pos;
4041 $pos += 0x100;
4042 my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
4043 for (my $j = 0; $j < 0x10000; $j++)
4045 next unless defined $exc[$j] || defined $flags[$j];
4046 $filled[$j >> 8] = 1;
4047 $j |= 0xff;
4049 for (my $j = 0; $j < 0x100; $j++)
4051 $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
4052 $pos += 0x100 if $filled[$j];
4054 for (my $j = 0; $j < 0x10000; $j++)
4056 next unless $filled[$j >> 8];
4057 my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
4058 $k[3] |= $flags[$j] || 0;
4059 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4063 # Case mapping tables
4065 # standard table
4066 my @casemaps;
4067 my @upper = @toupper_table;
4068 my @lower = @tolower_table;
4069 remove_linguistic_mappings( \@upper, \@lower );
4070 $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4072 # linguistic table
4073 $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
4075 # Turkish table
4076 @upper = @toupper_table;
4077 @lower = @tolower_table;
4078 $upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
4079 $lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
4080 $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4081 my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
4083 # Char type table
4085 my @table;
4086 my $types = "";
4087 my %typestr;
4088 for (my $i = 0; $i < 0x10000; $i++)
4090 my $str = pack "S<3",
4091 ($category_table[$i] || 0) & 0xffff,
4092 defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
4093 ($category_table[$i] || 0) >> 16;
4095 if (!defined($typestr{$str}))
4097 $typestr{$str} = length($types) / 6;
4098 $types .= $str;
4100 $table[$i] = $typestr{$str};
4103 my (@rows, @array, @data, @row_data);
4104 (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
4105 (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
4106 for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets
4107 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
4109 my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
4110 my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
4111 $chartypes = align_string( 8, $chartypes . $types . $arraystr );
4113 # Sort tables
4115 # guids
4116 my $sorttables = pack "L<2", $version, scalar %guids;
4117 foreach my $id (sort keys %guids)
4119 my %guid = %{$guids{$id}};
4120 my $flags = $guid{flags} || 0;
4121 my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
4122 $sorttables .= pack_guid($id) . pack "L<5",
4123 $flags,
4124 defined($guid{compr}) ? $guid{compr} : 0xffffffff,
4125 $guid{except} || 0,
4126 $guid{ling_except} || 0,
4127 $map / 2;
4130 # expansions
4131 $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
4133 # compressions
4134 $sorttables .= pack "L<", scalar @compressions;
4135 my $rowstr = "";
4136 foreach my $c (@compressions)
4138 my $pos = length($rowstr) / 2;
4139 my $min = 0xffff;
4140 my $max = 0;
4141 my @lengths = (0) x 8;
4142 foreach my $r (sort cmp_compression @{$c})
4144 my @row = @{$r};
4145 $lengths[scalar @row - 6]++;
4146 foreach my $val (@row[4..$#row])
4148 $min = $val if $min > $val;
4149 $max = $val if $max < $val;
4151 $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
4152 $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
4154 $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
4156 $sorttables .= $rowstr;
4158 # multiple weights
4159 $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
4161 # jamo sort
4162 $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
4164 # Locales
4166 add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
4167 foreach my $loc (sort keys %locales)
4169 # skip specific locales that match more general ones
4170 my @parts = split /[-_]/, $loc;
4171 next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
4172 next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
4173 add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
4176 # File header
4178 my @header;
4179 $header[0] = 16;
4180 $header[1] = $header[0] + length $table;
4181 $header[2] = $header[1] + length $casemaps;
4182 $header[3] = $header[2] + length $chartypes;
4184 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
4185 print OUTPUT pack "L<*", @header;
4186 print OUTPUT $table, $casemaps, $chartypes, $sorttables;
4187 close OUTPUT;
4188 save_file($filename);
4189 return $chartypes;
4193 my %lcnames;
4195 sub locale_parent($)
4197 my $loc = shift;
4199 return undef unless $loc;
4200 return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
4201 return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
4202 if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
4203 return "";
4206 sub compare_locales
4208 (my $n1 = $a) =~ tr/A-Z_/a-z-/;
4209 (my $n2 = $b) =~ tr/A-Z_/a-z-/;
4210 return $n1 cmp $n2;
4213 # query an xml key
4214 sub xml_query($$)
4216 my ($xml, $query) = @_;
4217 my $ret = $xml->find( $query );
4218 return undef unless $ret;
4219 printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
4220 return @{$ret}[0]->textContent;
4223 # query an xml key for a locale, with fallback to the parents
4224 sub loc_query($$)
4226 my ($loc, $query) = @_;
4228 $loc = $lcnames{"en-US"} unless $loc->{name}; # fallback to "en-US" for root locale
4230 for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
4232 next unless defined $lcnames{$cur};
4233 my $xml = $lcnames{$cur}->{xml};
4234 my $ret = $xml->find( $query );
4235 next unless $ret;
4236 printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
4237 next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; # "↑↑↑"
4238 return @{$ret}[0]->textContent;
4240 return undef;
4243 # retrieve a locale field entry by going up the parents tree
4244 sub locale_entry($$$)
4246 my ($loc, $field, $def) = @_;
4248 return $loc->{$field} if defined $loc->{$field};
4250 unless ($loc->{name}) # fallback to "en-US" for root locale
4252 $loc = $lcnames{"en-US"};
4253 return $loc->{$field} if defined $loc->{$field};
4255 while (defined $loc->{alias}) # resolve aliases
4257 $loc = $lcnames{$loc->{alias}};
4258 return $loc->{$field} if defined $loc->{$field};
4260 my $cur = $loc->{name};
4261 while ($cur)
4263 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
4265 $cur = $lcnames{$cur}->{sparent};
4267 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
4269 $cur = $1;
4271 else
4273 return $def;
4275 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
4277 return $def;
4280 my $string_data;
4282 sub add_str_data($)
4284 my $txt = shift;
4285 my $ret = index( $string_data, $txt );
4286 if ($ret == -1)
4288 $ret = length($string_data);
4289 $string_data .= $txt
4291 return $ret / 2;
4294 sub add_string($)
4296 my $str = shift;
4297 return 0 unless defined($str) && $str ne "";
4298 my $utf = encode( "UTF16LE", $str );
4299 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
4302 sub add_fontsig(@)
4304 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
4307 sub add_strarray(@)
4309 return 0 unless @_;
4310 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
4313 sub format_to_grouping($)
4315 my $format = shift;
4316 if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); }
4317 if ($format =~ /#,(#+0)/) { return chr(length($1)); }
4318 # printf STDERR "unknown format %s\n", $format;
4319 return chr(3);
4322 sub parse_currency_format($$)
4324 my $name = shift;
4325 my ($posfmt, $negfmt) = split /;/, shift;
4326 my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1
4327 "00[^\xa0]*\xa4", # 1.1$
4328 "\xa4.*\xa0.*#", # $ 1.1
4329 "00.*\xa0.*\xa4" ); # 1.1 $
4330 my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1)
4331 "-\xa4[^\xa0]*#", # -$1.1
4332 "\xa4[^\xa0]*-#", # $-1.1
4333 "\xa4[^\xa0]*#.*00-", # $1.1-
4334 "00[^\xa0]*\xa4\\)", # (1.1$)
4335 "-#.*00[^\xa0]*\xa4", # -1.1$
4336 "00-[^\xa0]*\xa4", # 1.1-$
4337 "00[^\xa0]*\xa4-", # 1.1$-
4338 "-#.*00.*\xa0.*\xa4", # -1.1 $
4339 "-\xa4.*\xa0.*#", # -$ 1.1
4340 "00.*\xa0.*\xa4-", # 1.1 $-
4341 "\xa4.*\xa0.*#.*00-", # $ 1.1-
4342 "\xa4.*\xa0.*-#", # $ -1.1
4343 "00-.*\xa0.*\xa4", # 1.1- $
4344 "\\(\xa4.*\xa0.*#", # ($ 1.1)
4345 "00.*\xa0.*\xa4\\)"); # (1.1 $)
4346 my ($pos, $neg);
4348 for ($pos = 0; $pos < @pospatterns; $pos++)
4350 last if ($posfmt =~ /$pospatterns[$pos]/);
4352 #printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns);
4353 $pos = 0 if ($pos == @pospatterns);
4355 if (defined $negfmt)
4357 for ($neg = 0; $neg < @negpatterns; $neg++)
4359 last if ($negfmt =~ /$negpatterns[$neg]/);
4361 #printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns);
4362 $neg = 0 if ($neg == @negpatterns);
4364 elsif ($pos == 0) { $neg = 1; }
4365 elsif ($pos == 1) { $neg = 5; }
4366 elsif ($pos == 2) { $neg = 9; }
4367 elsif ($pos == 3) { $neg = 8; }
4369 return ($pos, $neg);
4372 sub parse_percent_format($)
4374 my $fmt = shift;
4375 my @patterns = ( "0.+%", # 1 %
4376 "0%", # 1%
4377 "%#", # %1
4378 "%.+#" ); # % 1
4379 my $pos;
4380 for ($pos = 0; $pos < @patterns; $pos++)
4382 last if ($fmt =~ /$patterns[$pos]/);
4384 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
4385 return ($pos, ($pos == 3) ? 7 : $pos);
4388 sub convert_date_format($)
4390 my $fmt = shift;
4391 $fmt =~ s/G+/gg/;
4392 $fmt =~ s/LLLL/MMMM/;
4393 $fmt =~ s/LLL/MMM/;
4394 $fmt =~ s/E+/dddd/;
4395 $fmt =~ s/ccc+/dddd/;
4396 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
4397 $fmt =~ s/^y([^y])/yyyy$1/;
4398 $fmt =~ s/([^gy])y$/$1yyyy/;
4399 return $fmt;
4402 sub convert_time_format($)
4404 my $fmt = shift;
4405 $fmt =~ s/a+/tt/;
4406 $fmt =~ s/B+/tt/;
4407 $fmt =~ s/\x{202f}/ /;
4408 return $fmt;
4411 sub load_iso639()
4413 my %iso639;
4414 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3_$ISO639VERSION.tab" );
4415 while (<$DATA>)
4417 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
4419 close $DATA;
4420 return %iso639;
4424 ################################################################
4425 # build the locale table for locale.nls
4426 sub build_locale_data()
4428 my $base = "cldr-release-$CLDRVERSION";
4429 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" );
4430 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" );
4431 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" );
4432 # obsolete phone data from CLDR version 33
4433 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" );
4434 my %iso639 = load_iso639();
4435 $string_data = pack "S2", 0, 0; # offset 0 == empty string
4437 %lcnames = map { $_->{name} => $_ } @locales;
4439 my %lcids;
4440 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
4442 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
4444 # assign locale parents
4446 foreach my $loc (@locales)
4448 next if $loc->{name} eq "";
4449 next if defined $loc->{parent};
4450 (my $unix_name = $loc->{name}) =~ s/-/_/g;
4451 my $parent = xml_query( $suppl, "/supplementalData/parentLocales[not(\@component)]/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
4452 if ($parent)
4454 $parent =~ s/_/-/g;
4455 $parent = "" if $parent eq "root";
4457 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
4458 $loc->{parent} = $parent || "";
4461 # load per-locale XML files
4463 foreach my $loc (@locales)
4465 next if defined $loc->{alias};
4466 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
4467 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
4468 my $xml = load_xml_data_file( "cldr", $file );
4469 $loc->{xml} = $xml;
4470 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
4471 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
4472 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
4473 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
4474 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
4477 # assign a default territory and sort locale
4479 foreach my $loc (@locales)
4481 next if defined $loc->{alias};
4482 next if defined $loc->{territory};
4483 my $id = $loc->{sortlocale};
4484 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
4486 $loc->{territory} = $1;
4487 next;
4489 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
4490 if (@children == 1)
4492 $id = $children[0];
4494 else
4496 my $name = $loc->{file} || $loc->{name};
4497 $name =~ s/-(Arab|Beng|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
4498 $name =~ s/-/_/g;
4499 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
4500 $id =~ s/_/-/g if $id;
4502 if ($id =~ /[-_]([A-Z0-9]+)$/)
4504 $loc->{territory} = $1;
4505 next if defined $loc->{sortlocale};
4506 next unless $id =~ /^$loc->{name}/;
4507 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
4508 $loc->{sortlocale} = $id if defined $lcnames{$id};
4509 next;
4511 print STDERR "no territory found for $loc->{name}\n";
4514 # fill geoid table
4516 my %geotable;
4517 foreach my $geo (@geoids)
4519 my $name = $geo->{name};
4520 next unless defined $name;
4521 $geo->{alias} = $geotable{$name} if defined $geotable{$name};
4522 $geotable{$name} ||= $geo;
4524 foreach my $loc (@locales)
4526 next if defined $loc->{alias};
4527 my $territory = $loc->{territory};
4528 $geotable{$territory} ||= { name => $territory };
4530 foreach my $name (keys %geotable)
4532 my $geo = $geotable{$name};
4533 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
4534 if ($name =~ /\d+/)
4536 $geo->{uncode} = $name;
4537 next;
4539 $geo->{iso2} = $name;
4540 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
4541 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
4542 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
4543 $geo->{sintlsymbol} =~ s/XXX/XDR/;
4545 foreach my $geo (@geoids)
4547 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
4548 next if defined $geo->{iso2};
4549 next if defined $geo->{alias};
4550 next unless defined $geo->{uncode};
4551 my @contains;
4552 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
4553 push @contains, split /\s+/, $list if defined $list;
4554 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
4555 push @contains, split /\s+/, $list if defined $list;
4556 while (@contains)
4558 my $territory = pop @contains;
4559 if (defined $geotable{$territory})
4561 $geotable{$territory}->{parentid} ||= $geo->{id};
4563 elsif ($territory =~ /\d+/)
4565 # expand region recursively
4566 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
4567 push @contains, split /\s+/, $list if defined $list;
4572 # assign calendars to their locale
4574 foreach my $cal (@calendars)
4576 next unless defined $cal->{locale};
4577 my $loc = $lcnames{$cal->{locale}};
4578 $loc->{calendar} = [ ] unless defined $loc->{calendar};
4579 push @{$loc->{calendar}}, $cal;
4582 # assign default lcid to aliases
4584 foreach my $loc (@locales)
4586 next unless defined $loc->{alias};
4587 next if defined $loc->{lcid};
4588 my $alias = $loc->{alias};
4589 my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
4590 $loc->{lcid} = $lcid | 0x80000000;
4593 # assign sort aliases to parent locale
4595 foreach my $loc (@locales)
4597 next unless $loc->{name} =~ /_/;
4598 next unless defined $loc->{alias};
4599 my $alias = $loc->{alias};
4600 my $parent = $lcnames{$alias};
4601 my $basename = $parent->{name};
4602 while (1)
4604 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
4605 $alias = locale_parent( $alias );
4606 last unless $alias && defined $lcnames{$alias};
4607 $parent = $lcnames{$alias};
4608 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
4609 $parent->{sortbase} = $basename;
4613 # assign an array index to all locales
4615 my $idx = 0;
4616 foreach my $loc (@locales)
4618 next if defined $loc->{alias};
4619 $loc->{idx} = $idx++;
4621 foreach my $loc (@locales)
4623 my $alias = $loc->{alias};
4624 next unless defined $alias;
4625 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
4626 $loc->{idx} = $lcnames{$alias}->{idx};
4629 # output lcids table
4631 my $lcid_data = "";
4632 foreach my $id (sort { $a <=> $b } keys %lcids)
4634 my $loc = $lcids{$id};
4635 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
4638 # output lcnames table
4640 my $lcname_data = "";
4641 foreach my $name (sort compare_locales keys %lcnames)
4643 my $loc = $lcnames{$name};
4644 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
4647 # output locales array
4649 my $locale_data = "";
4650 my $default_lcid = 0x8001;
4651 foreach my $loc (@locales)
4653 next if defined $loc->{alias};
4654 my $sname = $loc->{name};
4655 my $language = $loc->{language};
4656 my $territory = $loc->{territory};
4657 my $script = $loc->{script};
4658 my $neutral = ($sname && $sname !~ /-$territory/);
4659 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
4660 my $unique_lcid = $loc->{lcid};
4661 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
4662 my $geo = $geotable{$territory};
4663 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
4665 # languages and scripts
4667 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
4668 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
4669 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
4670 (my $siso639langname = $sname) =~ s/-.*$//;
4671 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
4672 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
4673 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
4674 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
4675 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
4676 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
4677 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
4678 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
4679 $sengcountry =~ s/South Korea/Korea/;
4680 $snativelangname ||= $senglanguage;
4681 $snativectryname ||= $sengcountry;
4682 if ($script)
4684 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4685 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4686 $senglanguage .= " ($engscript)" if $engscript;
4687 $snativelangname .= " ($nativescript)" if $nativescript;
4689 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
4690 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
4691 $sengdisplayname =~ s/\) \(/, /;
4692 $snativedisplayname =~ s/\) \(/, /;
4693 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
4694 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
4695 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
4696 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
4697 if ($charlayout eq "right-to-left")
4699 $ireadinglayout = 1;
4701 elsif ($charlayout eq "top-to-bottom")
4703 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
4704 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
4706 my $igeoid = $geo->{id} || 0;
4708 # numbers
4710 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
4711 my $slist = locale_entry( $loc, "slist", ";" );
4712 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
4713 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
4714 $sthousand =~ s/\x{202f}/\x{00a0}/;
4715 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
4716 my $spositivesign = "";
4717 my $snegativesign = "-";
4718 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
4719 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
4720 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
4721 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
4722 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
4723 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
4724 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern[not(\@alt)]" ) ||
4725 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern[not(\@alt)]" );
4726 my $smongrouping = format_to_grouping( $currencyformat );
4727 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
4728 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
4729 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
4730 my @snativedigits = split //, (locale_entry( $loc, "nativedigits", "" ) || xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" ));
4731 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
4732 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
4733 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
4735 # currencies
4737 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
4738 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
4739 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
4740 $geo->{scurrency} = $scurrency if $scurrency;
4741 $scurrency ||= $sintlsymbol;
4742 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
4743 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
4744 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
4745 $icurrdigits = 2 unless defined $icurrdigits;
4747 # calendars
4749 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
4750 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
4751 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
4752 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
4753 my (@sdayname, @sabbrevdayname, @sshortestdayname);
4754 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
4756 my $n = $days{$d};
4757 my %name;
4758 foreach my $type (qw(wide abbreviated short))
4760 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
4762 push @sdayname, $name{wide};
4763 push @sabbrevdayname, $name{abbreviated} || $name{wide};
4764 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
4766 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
4767 foreach my $n (1..13)
4769 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
4770 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4771 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
4772 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4773 push @smonthname, $name || $genitive || "";
4774 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
4775 push @sgenitivemonth, $genitive || "";
4776 push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
4778 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
4779 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
4780 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
4781 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
4782 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
4783 my $icalendartype;
4784 my @scalnames;
4785 foreach my $c (split /\s+/, $calpref)
4787 next unless defined $caltypes{$c};
4788 $icalendartype .= chr($caltypes{$c});
4789 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
4792 # date/time formats
4794 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
4795 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
4796 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
4797 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
4798 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
4799 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4800 pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
4801 @stimeformat = map convert_time_format($_), @stimeformat;
4802 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
4803 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
4804 pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
4805 @sshorttime = map convert_time_format($_), @sshorttime;
4806 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
4807 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
4808 @sshortdate = map convert_date_format($_), @sshortdate;
4809 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
4810 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
4811 @slongdate = map convert_date_format($_), @slongdate;
4812 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
4813 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
4814 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
4815 @smonthday = map convert_date_format($_), @smonthday;
4816 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
4817 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4818 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
4819 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
4820 $srelativelongdate = convert_date_format( $srelativelongdate );
4822 if (defined $loc->{calendar})
4824 foreach my $cal (@{$loc->{calendar}})
4826 $cal->{sshortdate} = \@sshortdate;
4827 $cal->{syearmonth} = \@syearmonth;
4828 $cal->{slongdate} = \@slongdate;
4829 $cal->{serastring} = [ $serastring ];
4830 $cal->{sdayname} = \@sdayname;
4831 $cal->{sabbrevdayname} = \@sabbrevdayname;
4832 $cal->{smonthname} = \@smonthname;
4833 $cal->{sabbrevmonthname} = \@sabbrevmonthname;
4834 $cal->{scalname} = $scalnames[$cal->{id}];
4835 $cal->{smonthday} = \@smonthday;
4836 $cal->{sshortestdayname} = \@sshortestdayname;
4837 $cal->{sabbreverastring} = [ $serastring ];
4838 $cal->{sshortestdayname} = \@sshortestdayname;
4839 $cal->{srelativelongdate} = $srelativelongdate;
4843 # codepages
4845 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
4846 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
4847 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
4848 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
4849 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
4850 1258 => 10000 );
4851 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
4852 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
4853 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
4854 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
4855 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
4856 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
4857 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
4858 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
4859 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
4860 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
4861 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
4862 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
4863 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
4864 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
4865 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
4866 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
4867 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
4868 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
4869 my $oemcp = locale_entry( $loc, "oemcp", 65001 );
4870 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
4871 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
4872 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
4873 my $ansicp = $ansicpmap{$oemcp} || $oemcp;
4874 my @fontsig = (0) x 8;
4875 my $sig = locale_entry( $loc, "fontsig", [] );
4876 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
4877 foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; }
4878 $fontsig[3] |= 1 << 31;
4879 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
4880 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
4882 # special cases for invariant locale
4884 unless ($loc->{name})
4886 $siso639langname = "iv";
4887 $siso639langname2 = "ivl";
4888 $senglanguage = $snativelangname = "Invariant Language";
4889 $sengcountry = $snativectryname = "Invariant Country";
4890 $sengdisplayname = "Invariant Language (Invariant Country)";
4891 $snativedisplayname = "Invariant Language (Invariant Region)";
4892 $sengcurrname = $snativecurrname = "International Monetary Fund";
4893 $scurrency = "\x{00a4}";
4894 $ifirstdayofweek = 0;
4895 $igeoid = $geotable{"US"}->{id};
4896 @stimeformat = ("HH:mm:ss");
4897 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
4898 @slongdate = ("dddd, dd MMMM yyyy");
4899 @syearmonth = ("yyyy MMMM");
4900 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
4901 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
4902 $srelativelongdate = "dddd, MMMM dd";
4903 $sposinfinity = "Infinity";
4904 $sneginfinity = "-Infinity";
4905 $spositivesign = "+";
4906 $ipospercent = $inegpercent = 0;
4909 # output data
4911 $locale_data .= pack "L<2",
4912 add_string( $sname ), # name
4913 add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG
4915 $locale_data .= pack "S<14",
4916 $loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE
4917 $unique_lcid, # unique_lcid
4918 locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS
4919 locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER
4920 $icurrdigits, # LOCALE_ICURRDIGITS
4921 $icurrency, # LOCALE_ICURRENCY
4922 $inegcurr, # LOCALE_INEGCURR
4923 locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO
4924 !$neutral, # LOCALE_INEUTRAL
4925 $ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK
4926 $firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR
4927 $geo->{dialcode} || 1 , # LOCALE_ICOUNTRY,
4928 $measure, # LOCALE_IMEASURE
4929 $digitsubstitution; # LOCALE_IDIGITSUBSTITUTION
4931 $locale_data .= pack "L<18",
4932 add_string( $sgrouping ), # LOCALE_SGROUPING
4933 add_string( $smongrouping ), # LOCALE_SMONGROUPING
4934 add_string( $slist ), # LOCALE_SLIST
4935 add_string( $sdecimal ), # LOCALE_SDECIMAL
4936 add_string( $sthousand ), # LOCALE_STHOUSAND
4937 add_string( $scurrency ), # LOCALE_SCURRENCY
4938 add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP
4939 add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP
4940 add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN
4941 add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN
4942 add_string( $s1159 ), # LOCALE_S1159
4943 add_string( $s2359 ), # LOCALE_S2359
4944 add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS
4945 add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT
4946 add_strarray( @sshortdate ), # LOCALE_SSHORTDATE
4947 add_strarray( @slongdate ), # LOCALE_SLONGDATE
4948 add_strarray( @syearmonth ), # LOCALE_SYEARMONTH
4949 add_strarray( @sduration ); # LOCALE_SDURATION
4951 $locale_data .= pack "S<8",
4952 $idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE
4953 $ansicp, # LOCALE_IDEFAULTANSICODEPAGE
4954 $oemcp, # LOCALE_IDEFAULTCODEPAGE
4955 $maccp, # LOCALE_IDEFAULTMACCODEPAGE
4956 $ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE
4957 $igeoid < 65536 ? $igeoid : 39070, # old_geoid
4958 $papersize ? 1 : 9, # LOCALE_IPAPERSIZE
4959 0; # FIXME # islamic_cal
4961 $locale_data .= pack "L<24",
4962 add_string( $icalendartype ), # LOCALE_ICALENDARTYPE
4963 add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME
4964 add_string( $siso639langname ), # LOCALE_SISO639LANGNAME
4965 add_string( $senglanguage ), # LOCALE_SENGLANGUAGE
4966 add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME
4967 add_string( $sengcountry ), # LOCALE_SENGCOUNTRY
4968 add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME
4969 add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME
4970 add_string( $territory ), # LOCALE_SISO3166CTRYNAME
4971 add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL
4972 add_string( $sengcurrname ), # LOCALE_SENGCURRNAME
4973 add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME
4974 add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE
4975 add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2
4976 add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2
4977 add_string( $sparent ), # LOCALE_SPARENT
4978 add_strarray( @sdayname ), # LOCALE_SDAYNAME
4979 add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME
4980 add_strarray( @smonthname ), # LOCALE_SMONTHNAME
4981 add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME
4982 add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH
4983 add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH
4984 add_strarray( @scalnames ), # LOCALE_SCALNAMES
4985 add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES
4987 $locale_data .= pack "S<6",
4988 $inegpercent, # LOCALE_INEGATIVEPERCENT
4989 $ipospercent, # LOCALE_IPOSITIVEPERCENT
4990 0, # unknown
4991 $ireadinglayout, # LOCALE_IREADINGLAYOUT
4992 0x2a, # unknown
4993 0x2a; # unknown
4995 $locale_data .= pack "L<24",
4996 0, # unknown
4997 add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME
4998 add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME
4999 add_string( $spercent ), # LOCALE_SPERCENT
5000 add_string( $snan ), # LOCALE_SNAN
5001 add_string( $sposinfinity ), # LOCALE_SPOSINFINITY
5002 add_string( $sneginfinity ), # LOCALE_SNEGINFINITY
5003 0, # unknown
5004 add_string( $serastring ), # CAL_SERASTRING
5005 add_string( $serastring ), # CAL_SABBREVERASTRING
5006 0, # unknown
5007 add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME
5008 add_strarray( @sshorttime ), # LOCALE_SSHORTTIME
5009 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5010 0, # unknown
5011 add_string( $ssortlocale ), # LOCALE_SSORTLOCALE
5012 add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL
5013 add_string( $sscripts ), # LOCALE_SSCRIPTS
5014 add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE
5015 $igeoid, # LOCALE_IGEOID
5016 add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM
5017 add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM
5018 add_strarray( @smonthday ), # LOCALE_SMONTHDAY
5019 add_string( "k0-windows-us" ) # FIXME # keyboard_layout
5022 # output language groups
5024 my %groups;
5025 add_registry_key( $nlskey, "Locale", "00000409" );
5026 foreach my $loc (@locales)
5028 next unless defined $loc->{lcid};
5029 next if ($loc->{lcid} & 0x80000000);
5030 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales
5031 my $group = locale_entry( $loc, "group", 1 );
5032 my $name = sprintf( "%08x", $loc->{lcid} );
5033 my $val = sprintf( "%x", $group );
5034 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
5035 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
5036 $groups{$val} = 1;
5038 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); }
5040 # output calendar data
5042 my $calendar_data = "";
5043 foreach my $cal (@calendars)
5045 my $scalname = $cal->{name};
5046 my $iyearoffsetrange = 0;
5047 my $itwodigityearmax = $cal->{itwodigityearmax};
5048 my @sshortdate;
5049 my @syearmonth;
5050 my @slongdate;
5051 my @serastring;
5052 my @sdayname;
5053 my @sabbrevdayname;
5054 my @smonthname;
5055 my @sabbrevmonthname;
5056 my @smonthday;
5057 my @sabbreverastring;
5058 my @sshortestdayname;
5060 my $type = $cal->{type};
5061 if (defined $cal->{locale} && defined $type)
5063 my $loc = $lcnames{$cal->{locale}};
5064 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
5065 push @sshortdate, $fmt if $fmt;
5066 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
5067 push @sshortdate, $fmt if $fmt;
5068 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
5069 push @sshortdate, $fmt if $fmt;
5070 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
5071 push @sshortdate, $fmt if $fmt;
5072 @sshortdate = map convert_date_format($_), @sshortdate;
5073 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
5074 push @slongdate, $fmt if $fmt;
5075 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
5076 push @slongdate, $fmt if $fmt;
5077 @slongdate = map convert_date_format($_), @slongdate;
5079 foreach my $n (1..13)
5081 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
5082 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
5083 push @smonthname, $name || "";
5084 push @sabbrevmonthname, $abbrev || $name || "";
5087 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
5088 if (defined $cal->{eras})
5090 my @eras;
5091 my $idx = 1;
5092 foreach my $era (@{$cal->{eras}})
5094 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
5095 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
5096 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
5097 if ($zero < 0)
5099 $first -= $zero;
5100 $year = 1;
5101 $itwodigityearmax = 2049 - $zero;
5103 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
5104 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
5105 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
5107 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
5111 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
5112 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
5113 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
5114 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
5115 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
5116 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
5117 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
5118 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
5119 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
5120 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
5121 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
5122 my $srelativelongdate = $cal->{srelativelongdate};
5124 @serastring = ("A.D.") unless @serastring;
5125 @sabbreverastring = ("AD") unless @sabbreverastring;
5127 if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead
5129 @sshortdate = ("") unless @sshortdate;
5130 @syearmonth = ("") unless @syearmonth;
5131 @slongdate = ("") unless @slongdate;
5132 @sdayname = ("") x 7 unless @sdayname;
5133 @sabbrevdayname = ("") x 7 unless @sabbrevdayname;
5134 @sshortestdayname = ("") x 7 unless @sshortestdayname;
5135 @smonthname = ("") x 13 unless @smonthname;
5136 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
5137 @smonthday = ("") unless @smonthday;
5140 $calendar_data .= pack "S<2L<17",
5141 $cal->{id}, # CAL_ICALINTVALUE
5142 $itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX
5143 add_strarray( @sshortdate ), # CAL_SSHORTDATE
5144 add_strarray( @syearmonth ), # CAL_SYEARMONTH
5145 add_strarray( @slongdate ), # CAL_SLONGDATE
5146 add_strarray( @serastring ), # CAL_SERASTRING
5147 $iyearoffsetrange, # CAL_IYEAROFFSETRANGE
5148 add_strarray( @sdayname ), # CAL_SDAYNAME
5149 add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME
5150 add_strarray( @smonthname ), # CAL_SMONTHNAME
5151 add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME
5152 add_string( $scalname ), # CAL_SCALNAME
5153 add_strarray( @smonthday ), # CAL_SMONTHDAY
5154 add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING
5155 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5156 add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE
5159 # output locale header
5161 my $nb_lcids = scalar keys %lcids;
5162 my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
5163 my $nb_lcnames = scalar keys %lcnames;
5164 my $locale_size = length($locale_data) / $nb_locales;
5165 my $nb_calendars = scalar @calendars;
5166 my $calendar_size = length($calendar_data) / $nb_calendars;
5167 my $lcids_offset = 19 * 4; # size of header
5168 my $lcnames_offset = $lcids_offset + length $lcid_data;
5169 my $locales_offset = $lcnames_offset + length $lcname_data;
5170 my $calendar_offset = $locales_offset + length $locale_data;
5171 my $strings_offset = $calendar_offset + length $calendar_data;
5173 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
5174 8, # offset
5176 7, # version
5177 0x5344534e, # magic
5178 0, 0, 0,
5180 $nb_lcids,
5181 $nb_locales,
5182 $locale_size,
5183 $locales_offset,
5184 $nb_lcnames,
5186 $lcids_offset,
5187 $lcnames_offset,
5189 $nb_calendars,
5190 $calendar_size,
5191 $calendar_offset,
5192 $strings_offset,
5193 0, 0;
5195 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
5199 ################################################################
5200 # build the charmaps table for locale.nls
5201 sub build_charmaps_data()
5203 my $data = "";
5205 # MAP_FOLDDIGITS
5206 my @digits = (ord('0') .. ord('9'));
5207 $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero
5208 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits
5209 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits
5210 @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals
5211 @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits
5212 @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers
5213 $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals
5214 $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals
5215 $data .= dump_binary_case_table( @digitmap_table );
5217 # CJK compatibility map
5218 $data .= dump_binary_case_table( @cjk_compat_table );
5220 # LCMAP_HIRAGANA/KATAKANA
5221 my (@hiragana_table, @katakana_table);
5222 foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
5224 $hiragana_table[$ch + 0x60] = $ch;
5225 $katakana_table[$ch] = $ch + 0x60;
5227 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
5229 # LCMAP_HALFWIDTH/FULLWIDTH
5230 $halfwidth_table[0x2018] = 0x0027;
5231 $halfwidth_table[0x2019] = 0x0027;
5232 $halfwidth_table[0x201c] = 0x0022;
5233 $halfwidth_table[0x201d] = 0x0022;
5234 $halfwidth_table[0x309b] = 0xff9e;
5235 $halfwidth_table[0x309c] = 0xff9f;
5236 $fullwidth_table[0x309b] = 0x3099;
5237 $fullwidth_table[0x309c] = 0x309a;
5238 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
5240 # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
5241 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
5243 # FIXME: some more unknown tables here
5245 return $data;
5249 ################################################################
5250 # build the geoids table for locale.nls
5251 sub build_geoids_data()
5253 my $data = "";
5254 my %index;
5255 my $idx = 0;
5256 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
5258 foreach my $geo (@geoids)
5260 my $id = $geo->{id};
5261 $geo = $geo->{alias} if defined $geo->{alias};
5262 my $lat = "0.000";
5263 my $long = "0.000";
5264 my $iso2 = $geo->{iso2} || "XX";
5265 my $iso3 = $geo->{iso3} || "XX";
5266 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
5267 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
5268 my $scurrency = $geo->{scurrency} || "\x{00a4}";
5270 $data .= pack( "L<", $id );
5271 $data .= pad_string( 24, encode( "UTF16LE", $lat ));
5272 $data .= pad_string( 24, encode( "UTF16LE", $long ));
5273 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
5274 $data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
5275 $data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
5276 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
5277 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
5278 $data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
5279 $index{$geo->{name}} = $idx if $geo->{name};
5280 $idx++;
5282 $index{"XX"} = $index{"001"};
5284 $geo_header[5] = $geo_header[3] + length $data;
5285 $geo_header[6] = scalar keys %index;
5287 foreach my $name (sort keys %index)
5289 $data .= pad_string( 8, encode( "UTF16LE", $name ));
5290 $data .= pack "L<", $index{$name};
5293 $geo_header[2] = $geo_header[3] + length $data;
5294 return pack( "L<7", @geo_header ) . $data;
5298 ################################################################
5299 # build a binary locale table
5300 sub dump_locales($$)
5302 my ($filename, $chartypes) = @_;
5304 printf "Building $filename\n";
5306 my $locale_data = build_locale_data();
5307 my $charmaps_data = build_charmaps_data();
5308 my $geoids_data = build_geoids_data();
5309 my $scripts_data = ""; # FIXME
5311 my @header = ( 0 ) x 8;
5312 $header[0] = 4 * scalar @header; # chartypes offset
5313 $header[4] = $header[0] + length $chartypes; # locales offset
5314 $header[5] = $header[4] + length $locale_data; # charmaps offset
5315 $header[6] = $header[5] + length $charmaps_data; # geoids offset
5316 $header[7] = $header[6] + length $geoids_data; # scripts offset
5318 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5319 print OUTPUT pack "L<*", @header;
5320 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
5321 close OUTPUT;
5322 save_file($filename);
5326 ################################################################
5327 # return the day of week of the first of the month
5328 sub month_first_dow($$)
5330 my ($year, $month) = @_;
5331 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year ));
5332 return $time[6];
5336 ################################################################
5337 # compare system time values
5338 sub compare_systime($$)
5340 my ($a, $b) = @_;
5341 return $a->[0] <=> $b->[0] ||
5342 $a->[1] <=> $b->[1] ||
5343 $a->[2] <=> $b->[2] ||
5344 $a->[3] <=> $b->[3] ||
5345 $a->[4] <=> $b->[4] ||
5346 $a->[5] <=> $b->[5] ||
5347 $a->[6] <=> $b->[6];
5351 ################################################################
5352 # compare the zone transition date with the rule date
5353 sub compare_transition_date($$$$)
5355 my ($stdoff, $isdst, $zone, $rule) = @_;
5357 if (scalar @{$zone} <= 1)
5359 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1;
5362 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 );
5363 return compare_systime( \@date, $rule );
5367 ################################################################
5368 # get the Windows zone names from the CLDR data
5369 sub load_windows_zones()
5371 my $current_name;
5372 my %names;
5373 my $base = "cldr-release-$CLDRVERSION";
5374 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" );
5375 while (<$INPUT>)
5377 if (/<!-- +(\(UTC.*) -->.*/)
5379 $current_name = $1;
5381 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/)
5383 $names{$1} = [ $current_name, $2 ];
5386 close $INPUT;
5387 return %names;
5391 ################################################################
5392 # parse a transition date specification from the tzdata files
5393 sub parse_transition_date($$@)
5395 use integer;
5396 my ($stdoff, $isdst, $year, $in, $on, $at) = @_;
5398 $on = "1" unless defined $on;
5399 $at = "0" unless defined $at;
5401 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
5402 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 );
5403 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 );
5405 my $mon = $in ? $months{$in} : 1;
5406 my ($week, $dow, $flag, $time, $sec);
5407 my $first = month_first_dow( $year, $mon );
5409 if ($on =~ /^last(.*)$/)
5411 $week = 5;
5412 $dow = $days{$1};
5414 elsif ($on =~ /^(.*)>=(\d+)$/)
5416 $dow = $days{$1};
5417 my $diff = ($first + 6 - $dow) % 7;
5418 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7;
5420 elsif ($on =~ /^(.*)<=(\d+)$/)
5422 $dow = $days{$1};
5423 my $diff = ($first + $2 + 6 - $dow) % 7;
5424 $week = ($2 + 6 - $diff) / 7;
5425 if (!$week)
5427 $week = 5;
5428 if (!--$mon) { $mon = 12; $year--; }
5431 elsif ($on =~ /^\d+$/)
5433 $dow = ($first + $on - 1) % 7;
5434 $week = $on >= 25 ? 5 : ($on + 6) / 7;
5436 else
5438 die "unsupported date specification $year $in $on $at";
5441 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/)
5443 $time = $1 * 60 + $2;
5444 $sec = $3;
5445 $flag = $4;
5447 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/)
5449 $time = $1 * 60 + $2;
5450 $flag = $3;
5452 elsif ($at =~ /^(\d+)([uws]?)$/)
5454 $time = $1 * 60;
5455 $flag = $2;
5457 else
5459 die "unsupported time specification $year $in $on $at";
5462 $flag ||= "w";
5463 $time -= $stdoff if $flag eq "u";
5464 $time += 60 if !$isdst && $flag ne "w";
5466 if ($time < 0) # previous day
5468 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon );
5469 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 );
5470 if (!$week)
5472 $week = 5;
5473 if (!--$mon) { $mon = 12; $year--; }
5475 $dow = ($dow + 6) % 7;
5476 $time += 24 * 60;
5479 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0);
5483 ################################################################
5484 # parse a system time value as a SYSTEMTIME structure
5485 sub pack_systime(@)
5487 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_;
5488 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999);
5492 ################################################################
5493 # parse a timezone offset from the tzdata files
5494 sub parse_tz_offset($)
5496 my ($hour, $min) = split /:/, shift;
5497 $min ||= 0;
5498 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min; # invert sign
5502 ################################################################
5503 # build the timezone data
5504 sub dump_timezones($@)
5506 my $filename = shift;
5507 my $FIRST_YEAR = 2000;
5508 my $LAST_YEAR = 2030;
5510 my %names = load_windows_zones();
5511 my %zones;
5512 my %rules;
5513 my %links;
5514 my %res_indices;
5516 printf "Building $filename\n";
5518 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5519 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
5520 print OUTPUT "#include \"winresrc.h\"\n\n";
5521 print OUTPUT "#pragma makedep po\n\n";
5522 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n";
5523 print OUTPUT "STRINGTABLE\n{\n";
5525 # load tzdata files
5527 foreach my $filename (@_)
5529 my $FILE = open_data_file( "tzdata", $filename );
5530 my $zonename;
5531 while (<$FILE>)
5533 chomp;
5534 s/\#.*$//;
5535 next if /^\s*$/;
5536 my @fields = split /\s+/;
5537 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq ""))
5539 shift @fields;
5540 $zonename = shift @fields unless $zonename;
5541 my ($stdoff, $rules, $dummy, @date) = @fields;
5542 $zones{$zonename} ||= [ ];
5543 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ];
5544 $zonename = undef unless @date; # last entry doesn't have an until date
5545 next;
5547 if ($fields[0] eq "Rule")
5549 shift @fields;
5550 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields;
5551 $to = $from if $to eq "only";
5552 $to = $LAST_YEAR if $to eq "max";
5553 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ];
5554 next;
5556 if ($fields[0] eq "Link")
5558 $links{$fields[2]} = $fields[1];
5559 next;
5561 die "unrecognized line $_";
5563 close $FILE;
5566 foreach my $name (sort { uc($a) cmp uc($b) } keys %names)
5568 my ($display, $zone) = @{$names{$name}};
5569 $zone = $links{$zone} if defined $links{$zone};
5571 # build list of transitions
5573 my @transitions;
5574 my @from_date = ( 1 );
5575 my $last_stdoff = 0;
5576 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++)
5578 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]};
5579 my $isdst = ($last_stdoff != $stdoff);
5580 $from_date[0] ||= $LAST_YEAR;
5581 my @systime = parse_transition_date( $stdoff, $isdst, @from_date );
5582 push @transitions, [ $stdoff, -1, \@systime ];
5584 if (defined $rules{$rule})
5586 foreach my $r (@{$rules{$rule}})
5588 my ($offset, $from, $to, $in, $on, $at) = @{$r};
5589 foreach my $year ($from..$to)
5591 next if $year < $from_date[0];
5592 next if $until_date[0] && $year > $until_date[0];
5593 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at );
5594 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0;
5595 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime );
5596 next if $ret > 0;
5597 pop @transitions if !$ret; # remove transition if there's a dst change at the same time
5598 push @transitions, [ $stdoff, $offset, \@systime ];
5602 @from_date = @until_date;
5603 $last_stdoff = $stdoff;
5605 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions;
5607 # build per-year dynamic info
5609 my @info;
5610 my $last_dstoff = 0;
5611 my $last_dst = 0;
5612 my $year = $FIRST_YEAR;
5613 while ($year <= $LAST_YEAR)
5615 if (@transitions && $transitions[0]->[2]->[0] < $year)
5617 $last_stdoff = $transitions[0]->[0];
5618 shift @transitions;
5619 next;
5621 my ($std, $dst, @trans);
5622 my $cur_stdoff = $last_stdoff;
5623 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60;
5624 while (@transitions && $transitions[0]->[2]->[0] == $year)
5626 my $t = shift @transitions;
5627 my ($stdoff, $dstoff, $systime) = @{$t};
5628 $systime = pack_systime( @{$systime} );
5629 if (!$dstoff) # std
5631 $cur_stdoff = $stdoff unless $std;
5632 $std = $systime;
5634 elsif ($dstoff != -1) # dst
5636 $cur_dstoff = $dstoff unless $dst;
5637 $dst ||= $systime;
5639 elsif ($stdoff != $last_stdoff) # rule transition
5641 # Handle a special case: Samoa moved to the other side of
5642 # the date line between 2011-12-03 and 2012-01-01,
5643 # entirely skipping the day 2011-12-31. We ignore this
5644 # change because it happens on a year boundary and more
5645 # importantly it would generate on offset of -25 hours,
5646 # which some programs (e.g., Mono) do not like. See
5647 # https://bugs.winehq.org/show_bug.cgi?id=51758
5649 if ($last_stdoff - $stdoff < 24 * 60)
5651 @trans = ($last_stdoff, $stdoff, $systime);
5652 $cur_stdoff = $stdoff;
5655 elsif ($dst) # rule transition with no stdoff change
5657 $std = $systime;
5659 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff;
5661 $last_stdoff = $cur_stdoff;
5663 if ($cur_dstoff > 0) # swap std and dst to ensure that offset is negative
5665 ($std, $dst) = ($dst, $std);
5666 $cur_stdoff += $cur_dstoff;
5667 $cur_dstoff = -$cur_dstoff;
5670 if (@trans)
5672 # heuristic to prefer switching dst
5673 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1]))
5675 $dst ||= $trans[2];
5676 $cur_stdoff = $trans[0];
5677 $cur_dstoff = $trans[1] - $trans[0];
5679 else
5681 $std ||= $trans[2];
5682 $cur_stdoff = $trans[1];
5683 $cur_dstoff = $trans[0] - $trans[1];
5687 if ($std || $dst)
5689 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5690 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5691 $last_dst = $year;
5693 else
5695 $std = pack "S<8", 0;
5696 $dst = pack "S<8", 0;
5697 $cur_stdoff += $last_dstoff;
5699 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst;
5702 # output registry keys
5704 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name;
5705 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r;
5706 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4;
5707 $res_idx += 16 while exists $res_indices{$res_idx};
5708 $res_indices{$res_idx} = 1;
5710 add_registry_string_value( $zonekey, $name, "Display", $display );
5711 add_registry_string_value( $zonekey, $name, "Std", $std_name );
5712 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name );
5713 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx ));
5714 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 ));
5715 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 ));
5716 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] );
5718 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name;
5719 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name;
5720 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display;
5722 my $first_year = $FIRST_YEAR;
5723 my $last_year = $LAST_YEAR;
5724 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1];
5725 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year];
5727 next if $last_year <= $first_year;
5729 foreach my $i ($first_year..$last_year)
5731 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] );
5733 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year );
5734 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year );
5737 print OUTPUT "}\n";
5738 close OUTPUT;
5739 save_file($filename);
5743 ################################################################
5744 # build the script to create registry keys
5745 sub dump_registry_script($%)
5747 my ($filename, %keys) = @_;
5748 my $indent = 1;
5749 my @prev;
5751 printf "Building %s\n", $filename;
5752 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5753 print OUTPUT "HKLM\n{\n";
5754 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys)
5756 my @subkeys = split /\\/, $k;
5757 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; }
5758 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5759 my ($def, @vals) = @{$keys{$k}};
5760 for (my $i = 0; $i < @subkeys; $i++)
5762 my $name = $subkeys[$i];
5763 my $prefix = "";
5764 if ($name =~ /^-/)
5766 $name =~ s/^-//;
5767 $prefix = "NoRemove ";
5769 if ($name =~ /\s/)
5771 $name = "'$name'";
5773 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name,
5774 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
5775 $indent++;
5777 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
5778 @prev = split /\\/, $k;
5780 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5781 printf OUTPUT "}\n";
5782 close OUTPUT;
5783 save_file($filename);
5787 ################################################################
5788 # save a file if modified
5789 sub save_file($)
5791 my $file = shift;
5792 if (-f $file && !system "cmp $file $file.new >/dev/null")
5794 unlink "$file.new";
5796 else
5798 rename "$file.new", "$file";
5803 ################################################################
5804 # main routine
5806 chdir ".." if -f "./make_unicode";
5807 load_data();
5808 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
5809 dump_bidi_dir_table( "dlls/dwrite/direction.c" );
5810 dump_bidi_dir_table( "dlls/wineps.drv/direction.c" );
5811 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
5812 dump_mirroring( "dlls/dwrite/mirror.c" );
5813 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
5814 dump_bracket( "dlls/dwrite/bracket.c" );
5815 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
5816 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
5817 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
5818 dump_linebreak( "dlls/dwrite/linebreak.c" );
5819 dump_scripts( "dlls/dwrite/scripts" );
5820 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
5821 dump_vertical( "dlls/win32u/vertical.c", 1 );
5822 dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
5823 dump_intl_nls("nls/l_intl.nls");
5824 dump_norm_table( "nls/normnfc.nls" );
5825 dump_norm_table( "nls/normnfd.nls" );
5826 dump_norm_table( "nls/normnfkc.nls" );
5827 dump_norm_table( "nls/normnfkd.nls" );
5828 dump_norm_table( "nls/normidna.nls" );
5829 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" );
5830 dump_locales( "nls/locale.nls", $chartypes );
5831 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
5832 dump_eucjp_codepage();
5833 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files );
5834 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
5836 exit 0;
5838 # Local Variables:
5839 # compile-command: "./make_unicode"
5840 # End: