README: Mention Gitlab.
[wine.git] / tools / make_unicode
blobdc2197f2e8e9cf2ec9ae64c3888b4ef39dbc4162
1 #!/usr/bin/perl -w
3 # Generate code page .c files from ftp.unicode.org descriptions
5 # Copyright 2000 Alexandre Julliard
7 # This library is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU Lesser General Public
9 # License as published by the Free Software Foundation; either
10 # version 2.1 of the License, or (at your option) any later version.
12 # This library is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 # Lesser General Public License for more details.
17 # You should have received a copy of the GNU Lesser General Public
18 # License along with this library; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 use strict;
23 use XML::LibXML;
24 use Digest::SHA;
25 use Encode;
26 use Time::Local qw(timegm_modern);
28 my $UNIVERSION = "14.0.0";
29 my $CLDRVERSION = "41";
30 my $ISO639VERSION = "20220120";
31 my $TZVERSION = "2022a";
33 my %data_files =
35 ucd => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/UCD.zip", name => "UCD-$UNIVERSION.zip",
36 sha => "033a5276b5d7af8844589f8e3482f3977a8385e71d107d375055465178c23600" },
37 unihan => { url => "https://www.unicode.org/Public/$UNIVERSION/ucd/Unihan.zip", name => "Unihan-$UNIVERSION.zip",
38 sha => "2ae4519b2b82cd4d15379c17e57bfb12c33c0f54da4977de03b2b04bcf11852d" },
39 idna => { url => "https://www.unicode.org/Public/idna/$UNIVERSION/IdnaMappingTable.txt", name => "IdnaMappingTable-$UNIVERSION.txt",
40 sha => "d43d9ca367af27b0e4c9dc645cadc23690bdecaf7ec2687f37f01180022d4dfa" },
41 cldr => { url => "https://github.com/unicode-org/cldr/archive/refs/tags/release-$CLDRVERSION.zip",
42 sha => "a2b7aee281ad2f497d47995808cf5e8f24123b0814ca47f7a824556aec8a0d91" },
43 cldr33 => { url => "https://www.unicode.org/Public/cldr/33/cldr-common-33.0.zip",
44 sha => "fa3490082c086d21257153609642f54fcf788fcfda4966fe67f3f6daca0d58b9" },
45 sorting => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows 10 Sorting Weight Table.txt",
46 sha => "81fcfa1e5ed3e3a94d329959ff7d97d522ddf9d653d2c4d6ddcccc5cd4df663f" },
47 codepages => { url => "https://download.microsoft.com/download/C/F/7/CF713A5E-9FBC-4FD6-9246-275F65C0E498/Windows Supported Code Page Data Files.zip",
48 sha => "5074e6dd253056ba61fc6c870c9a955467855129c6ad3a51761c386b301b125a" },
49 iso639 => { url => "https://iso639-3.sil.org/sites/iso639-3/files/downloads/iso-639-3_Code_Tables_$ISO639VERSION.zip",
50 sha => "d912749d10c344835f052a9f31d13f13d5ffc99bc589e1eb88f2b4663e990881" },
51 ksx1001 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/KSC/KSX1001.TXT",
52 sha => "d8d2a35206ac0ea2865f5d801c9d6717f735bf46f263a658a64a960abe59e371" },
53 jis0208 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT",
54 sha => "1c571870457f19c97720631fa83ee491549a96ba1436da1296786a67d8632e87" },
55 jis0212 => { url => "https://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT",
56 sha => "477820bb3055bbcc90880d788cd95607d221dc94457bae249231adecf13c12e6" },
57 tzdata => { url => "https://data.iana.org/time-zones/releases/tzdata$TZVERSION.tar.gz",
58 sha => "ef7fffd9f4f50f4f58328b35022a32a5a056b245c5cb3d6791dddb342f871664" },
62 # Default char for undefined mappings
63 my $DEF_CHAR = ord '?';
65 # Last valid Unicode character
66 my $MAX_CHAR = 0x10ffff;
68 my $nlskey = "-SYSTEM\\-CurrentControlSet\\-Control\\-Nls";
69 my $zonekey = "-Software\\-Microsoft\\-Windows NT\\-CurrentVersion\\Time Zones";
71 my @allfiles =
73 "CodpageFiles/037.txt",
74 "CodpageFiles/437.txt",
75 "CodpageFiles/500.txt",
76 "CodpageFiles/708.txt",
77 "CodpageFiles/720.txt",
78 "CodpageFiles/737.txt",
79 "CodpageFiles/775.txt",
80 "CodpageFiles/850.txt",
81 "CodpageFiles/852.txt",
82 "CodpageFiles/855.txt",
83 "CodpageFiles/857.txt",
84 "CodpageFiles/860.txt",
85 "CodpageFiles/861.txt",
86 "CodpageFiles/862.txt",
87 "CodpageFiles/863.txt",
88 "CodpageFiles/864.txt",
89 "CodpageFiles/865.txt",
90 "CodpageFiles/866.txt",
91 "CodpageFiles/869.txt",
92 "CodpageFiles/874.txt",
93 "CodpageFiles/875.txt",
94 "CodpageFiles/932.txt",
95 "CodpageFiles/936.txt",
96 "CodpageFiles/949.txt",
97 "CodpageFiles/950.txt",
98 "CodpageFiles/1026.txt",
99 "CodpageFiles/1250.txt",
100 "CodpageFiles/1251.txt",
101 "CodpageFiles/1252.txt",
102 "CodpageFiles/1253.txt",
103 "CodpageFiles/1254.txt",
104 "CodpageFiles/1255.txt",
105 "CodpageFiles/1256.txt",
106 "CodpageFiles/1257.txt",
107 "CodpageFiles/1258.txt",
108 "CodpageFiles/1361.txt",
109 "CodpageFiles/10000.txt",
110 "CodpageFiles/10001.txt",
111 "CodpageFiles/10002.txt",
112 "CodpageFiles/10003.txt",
113 "CodpageFiles/10004.txt",
114 "CodpageFiles/10005.txt",
115 "CodpageFiles/10006.txt",
116 "CodpageFiles/10007.txt",
117 "CodpageFiles/10008.txt",
118 "CodpageFiles/10010.txt",
119 "CodpageFiles/10017.txt",
120 "CodpageFiles/10021.txt",
121 "CodpageFiles/10029.txt",
122 "CodpageFiles/10079.txt",
123 "CodpageFiles/10081.txt",
124 "CodpageFiles/10082.txt",
125 "CodpageFiles/20127.txt",
126 "CodpageFiles/20866.txt",
127 "CodpageFiles/21866.txt",
128 "CodpageFiles/28591.txt",
129 "CodpageFiles/28592.txt",
130 "CodpageFiles/28593.txt",
131 "CodpageFiles/28594.txt",
132 "CodpageFiles/28595.txt",
133 "CodpageFiles/28596.txt",
134 "CodpageFiles/28597.txt",
135 "CodpageFiles/28598.txt",
136 "CodpageFiles/28599.txt",
137 "CodpageFiles/28603.txt",
138 "CodpageFiles/28605.txt",
141 my @timezone_files = qw(africa antarctica asia australasia europe northamerica southamerica etcetera backward);
143 my %ctype =
145 # CT_CTYPE1
146 "upper" => 0x0001,
147 "lower" => 0x0002,
148 "digit" => 0x0004,
149 "space" => 0x0008,
150 "punct" => 0x0010,
151 "cntrl" => 0x0020,
152 "blank" => 0x0040,
153 "xdigit" => 0x0080,
154 "alpha" => 0x0100 | 0x80000000,
155 "defin" => 0x0200,
156 # CT_CTYPE3 in high 16 bits
157 "nonspacing" => 0x00010000,
158 "diacritic" => 0x00020000,
159 "vowelmark" => 0x00040000,
160 "symbol" => 0x00080000,
161 "katakana" => 0x00100000,
162 "hiragana" => 0x00200000,
163 "halfwidth" => 0x00400000,
164 "fullwidth" => 0x00800000,
165 "ideograph" => 0x01000000,
166 "kashida" => 0x02000000,
167 "lexical" => 0x04000000,
168 "highsurrogate" => 0x08000000,
169 "lowsurrogate" => 0x10000000,
172 my %bracket_types =
174 "o" => 0x0000,
175 "c" => 0x0001,
178 my %indic_types =
180 "Other" => 0x0000,
181 "Bindu" => 0x0001,
182 "Visarga" => 0x0002,
183 "Avagraha" => 0x0003,
184 "Nukta" => 0x0004,
185 "Virama" => 0x0005,
186 "Vowel_Independent" => 0x0006,
187 "Vowel_Dependent" => 0x0007,
188 "Vowel" => 0x0008,
189 "Consonant_Placeholder" => 0x0009,
190 "Consonant" => 0x000a,
191 "Consonant_Dead" => 0x000b,
192 "Consonant_Succeeding_Repha" => 0x000c,
193 "Consonant_Subjoined" => 0x000d,
194 "Consonant_Medial" => 0x000e,
195 "Consonant_Final" => 0x000f,
196 "Consonant_Head_Letter" => 0x0010,
197 "Modifying_Letter" => 0x0011,
198 "Tone_Letter" => 0x0012,
199 "Tone_Mark" => 0x0013,
200 "Register_Shifter" => 0x0014,
201 "Consonant_Preceding_Repha" => 0x0015,
202 "Pure_Killer" => 0x0016,
203 "Invisible_Stacker" => 0x0017,
204 "Gemination_Mark" => 0x0018,
205 "Cantillation_Mark" => 0x0019,
206 "Non_Joiner" => 0x001a,
207 "Joiner" => 0x001b,
208 "Number_Joiner" => 0x001c,
209 "Number" => 0x001d,
210 "Brahmi_Joining_Number" => 0x001e,
211 "Consonant_With_Stacker" => 0x001f,
212 "Consonant_Prefixed" => 0x0020,
213 "Syllable_Modifier" => 0x0021,
214 "Consonant_Killer" => 0x0022,
215 "Consonant_Initial_Postfixed" => 0x0023,
218 my %matra_types =
220 "Right" => 0x01,
221 "Left" => 0x02,
222 "Visual_Order_Left" => 0x03,
223 "Left_And_Right" => 0x04,
224 "Top" => 0x05,
225 "Bottom" => 0x06,
226 "Top_And_Bottom" => 0x07,
227 "Top_And_Right" => 0x08,
228 "Top_And_Left" => 0x09,
229 "Top_And_Left_And_Right" => 0x0a,
230 "Bottom_And_Right" => 0x0b,
231 "Top_And_Bottom_And_Right" => 0x0c,
232 "Overstruck" => 0x0d,
233 "Invisible" => 0x0e,
234 "Bottom_And_Left" => 0x0f,
235 "Top_And_Bottom_And_Left" => 0x10,
238 my %break_types =
240 "BK" => 0x0001,
241 "CR" => 0x0002,
242 "LF" => 0x0003,
243 "CM" => 0x0004,
244 "SG" => 0x0005,
245 "GL" => 0x0006,
246 "CB" => 0x0007,
247 "SP" => 0x0008,
248 "ZW" => 0x0009,
249 "NL" => 0x000a,
250 "WJ" => 0x000b,
251 "JL" => 0x000c,
252 "JV" => 0x000d,
253 "JT" => 0x000e,
254 "H2" => 0x000f,
255 "H3" => 0x0010,
256 "XX" => 0x0011,
257 "OP" => 0x0012,
258 "CL" => 0x0013,
259 "CP" => 0x0014,
260 "QU" => 0x0015,
261 "NS" => 0x0016,
262 "EX" => 0x0017,
263 "SY" => 0x0018,
264 "IS" => 0x0019,
265 "PR" => 0x001a,
266 "PO" => 0x001b,
267 "NU" => 0x001c,
268 "AL" => 0x001d,
269 "ID" => 0x001e,
270 "IN" => 0x001f,
271 "HY" => 0x0020,
272 "BB" => 0x0021,
273 "BA" => 0x0022,
274 "SA" => 0x0023,
275 "AI" => 0x0024,
276 "B2" => 0x0025,
277 "HL" => 0x0026,
278 "CJ" => 0x0027,
279 "RI" => 0x0028,
280 "EB" => 0x0029,
281 "EM" => 0x002a,
282 "ZWJ" => 0x002b,
285 my %vertical_types =
287 "R" => 0x0000,
288 "U" => 0x0001,
289 "Tr" => 0x0002,
290 "Tu" => 0x0003,
293 my %categories =
295 "Lu" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
296 "Ll" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
297 "Lt" => $ctype{"defin"}|$ctype{"alpha"}|$ctype{"upper"}|$ctype{"lower"}, # Letter, Titlecase
298 "Mn" => $ctype{"defin"}|$ctype{"nonspacing"}, # Mark, Non-Spacing
299 "Mc" => $ctype{"defin"}, # Mark, Spacing Combining
300 "Me" => $ctype{"defin"}, # Mark, Enclosing
301 "Nd" => $ctype{"defin"}|$ctype{"digit"}, # Number, Decimal Digit
302 "Nl" => $ctype{"defin"}|$ctype{"alpha"}, # Number, Letter
303 "No" => $ctype{"defin"}, # Number, Other
304 "Zs" => $ctype{"defin"}|$ctype{"space"}, # Separator, Space
305 "Zl" => $ctype{"defin"}|$ctype{"space"}, # Separator, Line
306 "Zp" => $ctype{"defin"}|$ctype{"space"}, # Separator, Paragraph
307 "Cc" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Control
308 "Cf" => $ctype{"defin"}|$ctype{"cntrl"}, # Other, Format
309 "Cs" => $ctype{"defin"}, # Other, Surrogate
310 "Co" => $ctype{"defin"}, # Other, Private Use
311 "Cn" => $ctype{"defin"}, # Other, Not Assigned
312 "Lm" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Modifier
313 "Lo" => $ctype{"defin"}|$ctype{"alpha"}, # Letter, Other
314 "Pc" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Connector
315 "Pd" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Dash
316 "Ps" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Open
317 "Pe" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Close
318 "Pi" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Initial quote
319 "Pf" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Final quote
320 "Po" => $ctype{"defin"}|$ctype{"punct"}, # Punctuation, Other
321 "Sm" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Math
322 "Sc" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Currency
323 "Sk" => $ctype{"defin"}|$ctype{"symbol"}, # Symbol, Modifier
324 "So" => $ctype{"defin"}|$ctype{"symbol"} # Symbol, Other
327 # a few characters need additional categories that cannot be determined automatically
328 my %special_categories =
330 "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
331 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
332 "space" => [ 0x09..0x0d, 0x85 ],
333 "blank" => [ 0x09, 0x20, 0xa0, 0x3000, 0xfeff ],
334 "cntrl" => [ 0x070f, 0x200c, 0x200d,
335 0x200e, 0x200f, 0x202a, 0x202b, 0x202c, 0x202d, 0x202e,
336 0x206a, 0x206b, 0x206c, 0x206d, 0x206e, 0x206f, 0xfeff,
337 0xfff9, 0xfffa, 0xfffb ],
338 "punct" => [ 0x24, 0x2b, 0x3c..0x3e, 0x5e, 0x60, 0x7c, 0x7e, 0xa2..0xbe,
339 0xd7, 0xf7 ],
340 "digit" => [ 0xb2, 0xb3, 0xb9 ],
341 "lower" => [ 0xaa, 0xba, 0x2071, 0x207f ],
342 "nonspacing" => [ 0xc0..0xc5, 0xc7..0xcf, 0xd1..0xd6, 0xd8..0xdd, 0xe0..0xe5, 0xe7..0xef,
343 0xf1..0xf6, 0xf8..0xfd, 0xff, 0x6de, 0x1929..0x192b, 0x302e..0x302f ],
344 "diacritic" => [ 0x5e, 0x60, 0xb7, 0xd8, 0xf8 ],
345 "symbol" => [ 0x09..0x0d, 0x20..0x23, 0x25, 0x26, 0x28..0x2a, 0x2c, 0x2e..0x2f, 0x3a..0x40,
346 0x5b..0x60, 0x7b..0x7e, 0xa0..0xa9, 0xab..0xb1, 0xb4..0xb8, 0xbb, 0xbf,
347 0x02b9..0x02ba, 0x02c6..0x02cf ],
348 "halfwidth" => [ 0x20..0x7e, 0xa2..0xa3, 0xa5..0xa6, 0xac, 0xaf, 0x20a9 ],
349 "fullwidth" => [ 0x2018..0x2019, 0x201c..0x201d, 0x3000..0x3002, 0x300c..0x300d, 0x309b..0x309c,
350 0x30a1..0x30ab, 0x30ad, 0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9,
351 0x30bb, 0x30bd, 0x30bf, 0x30c1, 0x30c3, 0x30c4, 0x30c6, 0x30c8, 0x30ca..0x30cf,
352 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de..0x30ed, 0x30ef, 0x30f2..0x30f3, 0x30fb,
353 0x3131..0x3164 ],
354 "ideograph" => [ 0x3006..0x3007 ],
355 "lexical" => [ 0x22, 0x24, 0x27, 0x2d, 0x2f, 0x3d, 0x40, 0x5c, 0x5e..0x60, 0x7e,
356 0xa8, 0xaa, 0xad, 0xaf, 0xb4, 0xb8, 0xba,
357 0x02b0..0x02b8, 0x02bc, 0x02c7, 0x02ca..0x02cb, 0x02cf, 0x02d8..0x02dd, 0x02e0..0x02e3,
358 0x037a, 0x0384..0x0385, 0x0387, 0x0559..0x055a, 0x0640, 0x1fbd..0x1fc1,
359 0x1fcd..0x1fcf, 0x1fdd..0x1fdf, 0x1fed..0x1fef, 0x1ffd..0x1ffe, 0x2010..0x2015,
360 0x2032..0x2034, 0x2038, 0x2043..0x2044, 0x207b..0x207c, 0x207f, 0x208b..0x208c,
361 0x2212, 0x2215..0x2216, 0x2500, 0x2504..0x2505, 0x2508..0x2509, 0x254c..0x254d,
362 0x3003, 0x301c, 0x3030..0x3035, 0x309b..0x309e, 0x30fd..0x30fe, 0xfe31..0xfe32,
363 0xfe58, 0xfe63, 0xfe66, 0xfe68..0xfe69, 0xfe6b, 0xff04, 0xff07, 0xff0d, 0xff0f,
364 0xff1d, 0xff20, 0xff3c, 0xff3e, 0xff40, 0xff5e ],
365 "kashida" => [ 0x0640 ],
368 my %directions =
370 "L" => 1, # Left-to-Right
371 "R" => 2, # Right-to-Left
372 "AL" => 12, # Right-to-Left Arabic
373 "EN" => 3, # European Number
374 "ES" => 4, # European Number Separator
375 "ET" => 5, # European Number Terminator
376 "AN" => 6, # Arabic Number
377 "CS" => 7, # Common Number Separator
378 "NSM" => 13, # Non-Spacing Mark
379 "BN" => 14, # Boundary Neutral
380 "B" => 8, # Paragraph Separator
381 "S" => 9, # Segment Separator
382 "WS" => 10, # Whitespace
383 "ON" => 11, # Other Neutrals
384 "LRE" => 15, # Left-to-Right Embedding
385 "LRO" => 15, # Left-to-Right Override
386 "RLE" => 15, # Right-to-Left Embedding
387 "RLO" => 15, # Right-to-Left Override
388 "PDF" => 15, # Pop Directional Format
389 "LRI" => 15, # Left-to-Right Isolate
390 "RLI" => 15, # Right-to-Left Isolate
391 "FSI" => 15, # First Strong Isolate
392 "PDI" => 15 # Pop Directional Isolate
395 my %c2_types =
397 "L" => 1, # C2_LEFTTORIGHT
398 "R" => 2, # C2_RIGHTTOLEFT
399 "AL" => 2, # C2_RIGHTTOLEFT
400 "EN" => 3, # C2_EUROPENUMBER
401 "ES" => 4, # C2_EUROPESEPARATOR
402 "ET" => 5, # C2_EUROPETERMINATOR
403 "AN" => 6, # C2_ARABICNUMBER
404 "CS" => 7, # C2_COMMONSEPARATOR
405 "NSM" => 11, # C2_OTHERNEUTRAL
406 "BN" => 0, # C2_NOTAPPLICABLE
407 "B" => 8, # C2_BLOCKSEPARATOR
408 "S" => 9, # C2_SEGMENTSEPARATOR
409 "WS" => 10, # C2_WHITESPACE
410 "ON" => 11, # C2_OTHERNEUTRAL
411 "LRE" => 11, # C2_OTHERNEUTRAL
412 "LRO" => 11, # C2_OTHERNEUTRAL
413 "RLE" => 11, # C2_OTHERNEUTRAL
414 "RLO" => 11, # C2_OTHERNEUTRAL
415 "PDF" => 11, # C2_OTHERNEUTRAL
416 "LRI" => 11, # C2_OTHERNEUTRAL
417 "RLI" => 11, # C2_OTHERNEUTRAL
418 "FSI" => 11, # C2_OTHERNEUTRAL
419 "PDI" => 11 # C2_OTHERNEUTRAL
422 my %bidi_types =
424 "ON" => 0, # Other Neutrals
425 "L" => 1, # Left-to-Right
426 "R" => 2, # Right-to-Left
427 "AN" => 3, # Arabic Number
428 "EN" => 4, # European Number
429 "AL" => 5, # Right-to-Left Arabic
430 "NSM" => 6, # Non-Spacing Mark
431 "CS" => 7, # Common Number Separator
432 "ES" => 8, # European Number Separator
433 "ET" => 9, # European Number Terminator
434 "BN" => 10, # Boundary Neutral
435 "S" => 11, # Segment Separator
436 "WS" => 12, # Whitespace
437 "B" => 13, # Paragraph Separator
438 "RLO" => 14, # Right-to-Left Override
439 "RLE" => 15, # Right-to-Left Embedding
440 "LRO" => 16, # Left-to-Right Override
441 "LRE" => 17, # Left-to-Right Embedding
442 "PDF" => 18, # Pop Directional Format
443 "LRI" => 19, # Left-to-Right Isolate
444 "RLI" => 20, # Right-to-Left Isolate
445 "FSI" => 21, # First Strong Isolate
446 "PDI" => 22 # Pop Directional Isolate
449 my %joining_types =
451 "U" => 0, # Non_Joining
452 "L" => 1, # Left_Joining
453 "R" => 2, # Right_Joining
454 "D" => 3, # Dual_Joining
455 "C" => 3, # Join_Causing
456 "ALAPH" => 4, # Syriac ALAPH
457 "DALATH RISH" => 5, # Syriac DALATH RISH group
458 "T" => 6, # Transparent
461 my @locales =
463 { name => "", lcid => 0x0000007f, file => "root", territory => "IV", sabbrevlangname => "IVL", sopentypelang =>"dflt" },
464 { name => "aa", dir => "seed", sopentypelang => "AFR" },
465 { name => "aa-DJ", dir => "seed" },
466 { name => "aa-ER", dir => "seed" },
467 { name => "aa-ET", dir => "seed" },
468 { name => "af", lcid => 0x00000036, oemcp => 850, sabbrevlangname => "AFK", sopentypelang => "AFK" },
469 { name => "af-NA" },
470 { name => "af-ZA", lcid => 0x00000436 },
471 { name => "agq" },
472 { name => "agq-CM" },
473 { name => "ak", sopentypelang => "TWI" },
474 { name => "ak-GH" },
475 { name => "am", lcid => 0x0000005e, sabbrevlangname => "AMH" },
476 { name => "am-ET", lcid => 0x0000045e },
477 { name => "ar", lcid => 0x00000001, territory => "SA", oemcp => 720, group => 13 },
478 { name => "ar-001" },
479 { name => "ar-AE", lcid => 0x00003801, sabbrevlangname => "ARU" },
480 { name => "ar-BH", lcid => 0x00003c01, sabbrevlangname => "ARH" },
481 { name => "ar-DJ" },
482 { name => "ar-DZ", lcid => 0x00001401, sabbrevlangname => "ARG" },
483 { name => "ar-EG", lcid => 0x00000c01, sabbrevlangname => "ARE" },
484 { name => "ar-EH" },
485 { name => "ar-ER" },
486 { name => "ar-IL" },
487 { name => "ar-IQ", lcid => 0x00000801, sabbrevlangname => "ARI" },
488 { name => "ar-JO", lcid => 0x00002c01, sabbrevlangname => "ARJ" },
489 { name => "ar-KM" },
490 { name => "ar-KW", lcid => 0x00003401, sabbrevlangname => "ARK" },
491 { name => "ar-LB", lcid => 0x00003001, sabbrevlangname => "ARB" },
492 { name => "ar-LY", lcid => 0x00001001, sabbrevlangname => "ARL" },
493 { name => "ar-MA", lcid => 0x00001801, sabbrevlangname => "ARM" },
494 { name => "ar-MR" },
495 { name => "ar-OM", lcid => 0x00002001, sabbrevlangname => "ARO" },
496 { name => "ar-PS" },
497 { name => "ar-QA", lcid => 0x00004001, sabbrevlangname => "ARQ" },
498 { name => "ar-SA", lcid => 0x00000401, sabbrevlangname => "ARA" },
499 { name => "ar-SD" },
500 { name => "ar-SO" },
501 { name => "ar-SS" },
502 { name => "ar-SY", lcid => 0x00002801, sabbrevlangname => "ARS" },
503 { name => "ar-TD" },
504 { name => "ar-TN", lcid => 0x00001c01, sabbrevlangname => "ART" },
505 { name => "ar-YE", lcid => 0x00002401, sabbrevlangname => "ARY" },
506 { name => "arn", lcid => 0x0000007a, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sabbrevlangname => "MPD", sopentypelang => "MAP" },
507 { name => "arn-CL", lcid => 0x0000047a, dir => "seed" },
508 { name => "arn-Latn", alias => "arn" },
509 { name => "arn-Latn-CL", alias => "arn-CL" },
510 { name => "as", lcid => 0x0000004d, slist => ",", group => 15 },
511 { name => "as-IN", lcid => 0x0000044d },
512 { name => "asa" },
513 { name => "asa-TZ" },
514 { name => "ast" },
515 { name => "ast-ES" },
516 { name => "az", lcid => 0x0000002c, oemcp => 857, ebcdiccp => 20905, group => 2 },
517 { name => "az-Cyrl", lcid => 0x0000742c, oemcp => 866, ebcdiccp => 20880, group => 5, sabbrevlangname => "AZC" },
518 { name => "az-Cyrl-AZ", lcid => 0x0000082c },
519 { name => "az-Latn", lcid => 0x0000782c },
520 { name => "az-Latn-AZ", lcid => 0x0000042c },
521 { name => "ba", lcid => 0x0000006d, oemcp => 866, group => 5, dir => "seed", sabbrevlangname => "BAS", sopentypelang => "BSH" },
522 { name => "ba-Cyrl", alias => "ba" },
523 { name => "ba-Cyrl-RU", alias => "ba-RU" },
524 { name => "ba-RU", lcid => 0x0000046d, dir => "seed" },
525 { name => "bas" },
526 { name => "bas-CM" },
527 { name => "be", lcid => 0x00000023, oemcp => 866, ebcdiccp => 500, group => 5 },
528 { name => "be-BY", lcid => 0x00000423 },
529 { name => "bem" },
530 { name => "bem-ZM" },
531 { name => "bez" },
532 { name => "bez-TZ" },
533 { name => "bg", lcid => 0x00000002, oemcp => 866, ebcdiccp => 21025, group => 5, sabbrevlangname => "BGR", sopentypelang => "BGR" },
534 { name => "bg-BG", lcid => 0x00000402 },
535 { name => "bin", lcid => 0x00000066, oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "EDO" },
536 { name => "bin-NG", lcid => 0x00000466, file => "bin", dir => "exemplars" },
537 { name => "bm", sopentypelang => "BMB" },
538 { name => "bm-Latn", file => "bm" },
539 { name => "bm-Latn-ML", file => "bm_ML" },
540 { name => "bm-ML", alias => "bm-Latn-ML" },
541 { name => "bn", lcid => 0x00000045, slist => ",", group => 15, sabbrevlangname => "BNB" },
542 { name => "bn-BD", lcid => 0x00000845 },
543 { name => "bn-IN", lcid => 0x00000445, sabbrevlangname => "BNG" },
544 { name => "bo", lcid => 0x00000051, slist => ",", group => 15, sabbrevlangname => "BOB", sopentypelang => "TIB" },
545 { name => "bo-CN", lcid => 0x00000451 },
546 { name => "bo-IN", slist => "," },
547 { name => "bo-Tibt", alias => "bo" },
548 { name => "bo-Tibt-CN", alias => "bo-CN" },
549 { name => "bo-Tibt-IN", alias => "bo-IN" },
550 { name => "br", lcid => 0x0000007e, oemcp => 850, ebcdiccp => 20297 },
551 { name => "br-FR", lcid => 0x0000047e },
552 { name => "br-Latn", alias => "br" },
553 { name => "br-Latn-FR", alias => "br-FR" },
554 { name => "brx" },
555 { name => "brx-IN" },
556 { name => "bs", lcid => 0x0000781a, oemcp => 852, maccp => 10082, ebcdiccp => 870, group => 2, sabbrevlangname => "BSB" },
557 { name => "bs-Cyrl", lcid => 0x0000641a, oemcp => 855, group => 5, sabbrevlangname => "BSC" },
558 { name => "bs-Cyrl-BA", lcid => 0x0000201a },
559 { name => "bs-Latn", lcid => 0x0000681a },
560 { name => "bs-Latn-BA", lcid => 0x0000141a },
561 { name => "byn", dir => "seed", sopentypelang => "BIL" },
562 { name => "byn-ER", dir => "seed" },
563 { name => "ca", lcid => 0x00000003, oemcp => 850 },
564 { name => "ca-AD", maccp => 65001 },
565 { name => "ca-ES", lcid => 0x00000403 },
566 { name => "ca-ES-valencia", lcid => 0x00000803, file => "ca_ES_VALENCIA", sabbrevlangname => "VAL" },
567 { name => "ca-FR", maccp => 65001 },
568 { name => "ca-IT", maccp => 65001 },
569 { name => "ccp" },
570 { name => "ccp-BD", alias => "ccp-Cakm-BD" },
571 { name => "ccp-Cakm", file => "ccp" },
572 { name => "ccp-Cakm-BD", file => "ccp_BD" },
573 { name => "ccp-Cakm-IN", file => "ccp_IN" },
574 { name => "ccp-IN", alias => "ccp-Cakm-IN" },
575 { name => "ce" },
576 { name => "ce-RU" },
577 { name => "ceb" },
578 { name => "ceb-Latn", file => "ceb" },
579 { name => "ceb-Latn-PH", file => "ceb_PH" },
580 { name => "ceb-PH", alias => "ceb-Latn-PH" },
581 { name => "cgg" },
582 { name => "cgg-UG" },
583 { name => "chr", lcid => 0x0000005c, slist => ",", sabbrevlangname => "CRE" },
584 { name => "chr-Cher", lcid => 0x00007c5c, file => "chr" },
585 { name => "chr-Cher-US", lcid => 0x0000045c, file => "chr_US" },
586 { name => "chr-US", alias => "chr-Cher-US" },
587 { name => "ckb", alias => "ku" },
588 { name => "ckb-IQ", alias => "ku-Arab-IQ" },
589 { name => "ckb-IR", alias => "ku-Arab-IR" },
590 { name => "co", lcid => 0x00000083, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
591 { name => "co-FR", lcid => 0x00000483, dir => "seed" },
592 { name => "co-Latn", alias => "co" },
593 { name => "co-Latn-FR", alias => "co-FR" },
594 { name => "cs", lcid => 0x00000005, oemcp => 852, group => 2, sabbrevlangname => "CSY", sopentypelang => "CSY" },
595 { name => "cs-CZ", lcid => 0x00000405 },
596 { name => "cu", dir => "seed", sopentypelang => "CSL" },
597 { name => "cu-RU", dir => "seed" },
598 { name => "cy", lcid => 0x00000052, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "CYM", sopentypelang => "WEL" },
599 { name => "cy-GB", lcid => 0x00000452 },
600 { name => "da", lcid => 0x00000006, oemcp => 850, ebcdiccp => 20277 },
601 { name => "da-DK", lcid => 0x00000406 },
602 { name => "da-GL", maccp => 65001 },
603 { name => "dav" },
604 { name => "dav-KE" },
605 { name => "de", lcid => 0x00000007, oemcp => 850, ebcdiccp => 20273 },
606 { name => "de-AT", lcid => 0x00000c07, sabbrevlangname => "DEA" },
607 { name => "de-BE" },
608 { name => "de-CH", lcid => 0x00000807, sabbrevlangname => "DES" },
609 { name => "de-DE", lcid => 0x00000407 },
610 { name => "de-DE_phoneb", lcid => 0x00010407, alias => "de-DE" },
611 { name => "de-DE-u-co-phonebk", alias => "de-DE_phoneb" },
612 { name => "de-IT", oemcp => 65001 },
613 { name => "de-LI", lcid => 0x00001407, sabbrevlangname => "DEC" },
614 { name => "de-LU", lcid => 0x00001007, sabbrevlangname => "DEL" },
615 { name => "dje", sopentypelang => "DJR" },
616 { name => "dje-NE" },
617 { name => "doi" },
618 { name => "doi-IN" },
619 { name => "dsb", lcid => 0x00007c2e, sparent => "hsb", oemcp => 850, ebcdiccp => 870, sabbrevlangname => "DSB", sopentypelang => "LSB" },
620 { name => "dsb-DE", lcid => 0x0000082e },
621 { name => "dua" },
622 { name => "dua-CM" },
623 { name => "dv", lcid => 0x00000065, slist => "\x{060c}", group => 13, dir => "seed" },
624 { name => "dv-MV", lcid => 0x00000465, dir => "seed" },
625 { name => "dyo" },
626 { name => "dyo-SN" },
627 { name => "dz", sopentypelang => "DZN" },
628 { name => "dz-BT", lcid => 0x00000c51, sabbrevlangname => "ZZZ" },
629 { name => "ebu" },
630 { name => "ebu-KE" },
631 { name => "ee" },
632 { name => "ee-GH" },
633 { name => "ee-TG" },
634 { name => "el", lcid => 0x00000008, oemcp => 737, group => 4 },
635 { name => "el-CY" },
636 { name => "el-GR", lcid => 0x00000408 },
637 { name => "en", lcid => 0x00000009, oemcp => 437, slist => ",", sabbrevlangname => "ENU" },
638 { name => "en-001", oemcp => 850 },
639 { name => "en-029", lcid => 0x00002409, file => "en", oemcp => 850, sabbrevlangname => "ENB" },
640 { name => "en-150", oemcp => 65001 },
641 { name => "en-AE", lcid => 0x00004c09, oemcp => 65001, sabbrevlangname => "ZZZ" },
642 { name => "en-AG", oemcp => 850 },
643 { name => "en-AI", oemcp => 850 },
644 { name => "en-AS", oemcp => 850 },
645 { name => "en-AT", oemcp => 65001 },
646 { name => "en-AU", lcid => 0x00000c09, oemcp => 850, sabbrevlangname => "ENA" },
647 { name => "en-BB", oemcp => 850 },
648 { name => "en-BE", oemcp => 850 },
649 { name => "en-BI", oemcp => 65001 },
650 { name => "en-BM", oemcp => 850 },
651 { name => "en-BS", oemcp => 850 },
652 { name => "en-BW", oemcp => 850 },
653 { name => "en-BZ", lcid => 0x00002809, oemcp => 850, sabbrevlangname => "ENL" },
654 { name => "en-CA", lcid => 0x00001009, oemcp => 850, ebcdiccp => 37, sabbrevlangname => "ENC" },
655 { name => "en-CC", oemcp => 850 },
656 { name => "en-CH", oemcp => 65001 },
657 { name => "en-CK", oemcp => 850 },
658 { name => "en-CM", oemcp => 850 },
659 { name => "en-CX", oemcp => 850 },
660 { name => "en-CY", oemcp => 65001 },
661 { name => "en-DE", oemcp => 65001 },
662 { name => "en-DG", oemcp => 850 },
663 { name => "en-DK", oemcp => 65001 },
664 { name => "en-DM", oemcp => 850 },
665 { name => "en-ER", oemcp => 850 },
666 { name => "en-FI", oemcp => 65001 },
667 { name => "en-FJ", oemcp => 850 },
668 { name => "en-FK", oemcp => 850 },
669 { name => "en-FM", oemcp => 850 },
670 { name => "en-GB", lcid => 0x00000809, oemcp => 850, ebcdiccp => 20285, sabbrevlangname => "ENG" },
671 { name => "en-GD", oemcp => 850 },
672 { name => "en-GG", oemcp => 850 },
673 { name => "en-GH", oemcp => 850 },
674 { name => "en-GI", oemcp => 850 },
675 { name => "en-GM", oemcp => 850 },
676 { name => "en-GU", oemcp => 850 },
677 { name => "en-GY", oemcp => 850 },
678 { name => "en-HK", lcid => 0x00003c09, oemcp => 850, sabbrevlangname => "ENH" },
679 { name => "en-ID", lcid => 0x00003809, file => "en", oemcp => 850, sabbrevlangname => "ZZZ" },
680 { name => "en-IE", lcid => 0x00001809, oemcp => 850, sabbrevlangname => "ENI" },
681 { name => "en-IL", oemcp => 65001 },
682 { name => "en-IM", oemcp => 850 },
683 { name => "en-IN", lcid => 0x00004009, sabbrevlangname => "ENN" },
684 { name => "en-IO", oemcp => 850 },
685 { name => "en-JE", oemcp => 850 },
686 { name => "en-JM", lcid => 0x00002009, oemcp => 850, sabbrevlangname => "ENJ" },
687 { name => "en-KE", oemcp => 850 },
688 { name => "en-KI", oemcp => 850 },
689 { name => "en-KN", oemcp => 850 },
690 { name => "en-KY", oemcp => 850 },
691 { name => "en-LC", oemcp => 850 },
692 { name => "en-LR", oemcp => 850 },
693 { name => "en-LS", oemcp => 850 },
694 { name => "en-MG", oemcp => 850 },
695 { name => "en-MH", oemcp => 850 },
696 { name => "en-MO", oemcp => 850 },
697 { name => "en-MP", oemcp => 850 },
698 { name => "en-MS", oemcp => 850 },
699 { name => "en-MT", oemcp => 850 },
700 { name => "en-MU", oemcp => 850 },
701 { name => "en-MW", oemcp => 850 },
702 { name => "en-MY", lcid => 0x00004409, sabbrevlangname => "ENM" },
703 { name => "en-NA", oemcp => 850 },
704 { name => "en-NF", oemcp => 850 },
705 { name => "en-NG", oemcp => 850 },
706 { name => "en-NL", oemcp => 65001 },
707 { name => "en-NR", oemcp => 850 },
708 { name => "en-NU", oemcp => 850 },
709 { name => "en-NZ", lcid => 0x00001409, oemcp => 850, sabbrevlangname => "ENZ" },
710 { name => "en-PG", oemcp => 850 },
711 { name => "en-PH", lcid => 0x00003409, ebcdiccp => 500, sabbrevlangname => "ENP" },
712 { name => "en-PK", oemcp => 850 },
713 { name => "en-PN", oemcp => 850 },
714 { name => "en-PR", oemcp => 850 },
715 { name => "en-PW", oemcp => 850 },
716 { name => "en-RW", oemcp => 850 },
717 { name => "en-SB", oemcp => 850 },
718 { name => "en-SC", oemcp => 850 },
719 { name => "en-SD", oemcp => 850 },
720 { name => "en-SE", oemcp => 65001 },
721 { name => "en-SG", lcid => 0x00004809, sabbrevlangname => "ENE" },
722 { name => "en-SH", oemcp => 850 },
723 { name => "en-SI", oemcp => 65001 },
724 { name => "en-SL", oemcp => 850 },
725 { name => "en-SS", oemcp => 850 },
726 { name => "en-SX", oemcp => 850 },
727 { name => "en-SZ", oemcp => 850 },
728 { name => "en-TC", oemcp => 850 },
729 { name => "en-TK", oemcp => 850 },
730 { name => "en-TO", oemcp => 850 },
731 { name => "en-TT", lcid => 0x00002c09, oemcp => 850, sabbrevlangname => "ENT" },
732 { name => "en-TV", oemcp => 850 },
733 { name => "en-TZ", oemcp => 850 },
734 { name => "en-UG", oemcp => 850 },
735 { name => "en-UM", oemcp => 850 },
736 { name => "en-US", lcid => 0x00000409 },
737 { name => "en-VC", oemcp => 850 },
738 { name => "en-VG", oemcp => 850 },
739 { name => "en-VI", oemcp => 850 },
740 { name => "en-VU", oemcp => 850 },
741 { name => "en-WS", oemcp => 850 },
742 { name => "en-ZA", lcid => 0x00001c09, ebcdiccp => 500, sabbrevlangname => "ENS" },
743 { name => "en-ZM", oemcp => 850 },
744 { name => "en-ZW", lcid => 0x00003009, ebcdiccp => 500, sabbrevlangname => "ENW" },
745 { name => "eo", sopentypelang => "NTO" },
746 { name => "eo-001" },
747 { name => "es", lcid => 0x0000000a, oemcp => 850, ebcdiccp => 20284, sabbrevlangname => "ESP", sopentypelang => "ESP" },
748 { name => "es-419", lcid => 0x0000580a, sabbrevlangname => "ESJ" },
749 { name => "es-AR", lcid => 0x00002c0a, sabbrevlangname => "ESS" },
750 { name => "es-BO", lcid => 0x0000400a, sabbrevlangname => "ESB" },
751 { name => "es-BR", oemcp => 65001 },
752 { name => "es-BZ", oemcp => 65001 },
753 { name => "es-CL", lcid => 0x0000340a, sabbrevlangname => "ESL" },
754 { name => "es-CO", lcid => 0x0000240a, sabbrevlangname => "ESO" },
755 { name => "es-CR", lcid => 0x0000140a, sabbrevlangname => "ESC" },
756 { name => "es-CU", lcid => 0x00005c0a, sabbrevlangname => "ESK" },
757 { name => "es-DO", lcid => 0x00001c0a, sabbrevlangname => "ESD" },
758 { name => "es-EA" },
759 { name => "es-EC", lcid => 0x0000300a, sabbrevlangname => "ESF" },
760 { name => "es-ES", lcid => 0x00000c0a, sabbrevlangname => "ESN" },
761 { name => "es-ES_tradnl", lcid => 0x0000040a, file => "es_ES" },
762 { name => "es-ES-u-co-trad", alias => "es-ES_tradnl" },
763 { name => "es-GQ" },
764 { name => "es-GT", lcid => 0x0000100a, sabbrevlangname => "ESG" },
765 { name => "es-HN", lcid => 0x0000480a, sabbrevlangname => "ESH" },
766 { name => "es-IC" },
767 { name => "es-MX", lcid => 0x0000080a, sabbrevlangname => "ESM" },
768 { name => "es-NI", lcid => 0x00004c0a, sabbrevlangname => "ESI" },
769 { name => "es-PA", lcid => 0x0000180a, sabbrevlangname => "ESA" },
770 { name => "es-PE", lcid => 0x0000280a, sabbrevlangname => "ESR" },
771 { name => "es-PH" },
772 { name => "es-PR", lcid => 0x0000500a, sabbrevlangname => "ESU" },
773 { name => "es-PY", lcid => 0x00003c0a, sabbrevlangname => "ESZ" },
774 { name => "es-SV", lcid => 0x0000440a, sabbrevlangname => "ESE" },
775 { name => "es-US", lcid => 0x0000540a, sabbrevlangname => "EST" },
776 { name => "es-UY", lcid => 0x0000380a, sabbrevlangname => "ESY" },
777 { name => "es-VE", lcid => 0x0000200a, sabbrevlangname => "ESV" },
778 { name => "et", lcid => 0x00000025, oemcp => 775, group => 3, sabbrevlangname => "ETI", sopentypelang => "ETI" },
779 { name => "et-EE", lcid => 0x00000425 },
780 { name => "eu", lcid => 0x0000002d, oemcp => 850, maccp => 65001, sabbrevlangname => "EUQ", sopentypelang => "EUQ" },
781 { name => "eu-ES", lcid => 0x0000042d },
782 { name => "ewo" },
783 { name => "ewo-CM" },
784 { name => "fa", lcid => 0x00000029, inegnumber => 3, oemcp => 720, slist => "\x{061b}", group => 13, sabbrevlangname => "FAR", sopentypelang => "FAR" },
785 { name => "fa-AF", alias => "prs-AF" },
786 { name => "fa-IR", lcid => 0x00000429 },
787 { name => "ff", lcid => 0x00000067, oemcp => 850, ebcdiccp => 20297 },
788 { name => "ff-CM", alias => "ff-Latn-CM" },
789 { name => "ff-GN", alias => "ff-Latn-GN" },
790 { name => "ff-MR", alias => "ff-Latn-MR" },
791 { name => "ff-NG", alias => "ff-Latn-NG" },
792 { name => "ff-SN", alias => "ff-Latn-SN" },
793 { name => "ff-Adlm" },
794 { name => "ff-Adlm-BF" },
795 { name => "ff-Adlm-CM" },
796 { name => "ff-Adlm-GH" },
797 { name => "ff-Adlm-GM" },
798 { name => "ff-Adlm-GN" },
799 { name => "ff-Adlm-GW" },
800 { name => "ff-Adlm-LR" },
801 { name => "ff-Adlm-MR" },
802 { name => "ff-Adlm-NE" },
803 { name => "ff-Adlm-NG" },
804 { name => "ff-Adlm-SL" },
805 { name => "ff-Adlm-SN" },
806 { name => "ff-Latn", lcid => 0x00007c67 },
807 { name => "ff-Latn-BF", oemcp => 65001 },
808 { name => "ff-Latn-CM" },
809 { name => "ff-Latn-GH", oemcp => 65001 },
810 { name => "ff-Latn-GM", oemcp => 65001 },
811 { name => "ff-Latn-GN" },
812 { name => "ff-Latn-GW", oemcp => 65001 },
813 { name => "ff-Latn-LR", oemcp => 65001 },
814 { name => "ff-Latn-MR" },
815 { name => "ff-Latn-NE", oemcp => 65001 },
816 { name => "ff-Latn-NG", lcid => 0x00000467, sabbrevlangname => "ZZZ" },
817 { name => "ff-Latn-SL", oemcp => 65001 },
818 { name => "ff-Latn-SN", lcid => 0x00000867 },
819 { name => "fi", lcid => 0x0000000b, oemcp => 850, ebcdiccp => 20278 },
820 { name => "fi-FI", lcid => 0x0000040b },
821 { name => "fil", lcid => 0x00000064, oemcp => 437, ebcdiccp => 500, sabbrevlangname => "FPO", sopentypelang => "PIL" },
822 { name => "fil-PH", lcid => 0x00000464 },
823 { name => "fil-Latn", alias => "fil" },
824 { name => "fil-Latn-PH", alias => "fil-PH" },
825 { name => "fo", lcid => 0x00000038, oemcp => 850, maccp => 10079, ebcdiccp => 20277, sabbrevlangname => "FOS", sopentypelang => "FOS" },
826 { name => "fo-DK", oemcp => 65001, maccp => 65001 },
827 { name => "fo-FO", lcid => 0x00000438 },
828 { name => "fr", lcid => 0x0000000c, oemcp => 850, ebcdiccp => 20297 },
829 { name => "fr-029", lcid => 0x00001c0c, file => "fr", sabbrevlangname => "ZZZ" },
830 { name => "fr-BE", lcid => 0x0000080c, sabbrevlangname => "FRB" },
831 { name => "fr-BF" },
832 { name => "fr-BI" },
833 { name => "fr-BJ" },
834 { name => "fr-BL" },
835 { name => "fr-CA", lcid => 0x00000c0c, sabbrevlangname => "FRC" },
836 { name => "fr-CD", lcid => 0x0000240c, sabbrevlangname => "FRD" },
837 { name => "fr-CF" },
838 { name => "fr-CG" },
839 { name => "fr-CH", lcid => 0x0000100c, sabbrevlangname => "FRS" },
840 { name => "fr-CI", lcid => 0x0000300c, sabbrevlangname => "FRI" },
841 { name => "fr-CM", lcid => 0x00002c0c, sabbrevlangname => "FRE" },
842 { name => "fr-DJ" },
843 { name => "fr-DZ" },
844 { name => "fr-FR", lcid => 0x0000040c },
845 { name => "fr-GA" },
846 { name => "fr-GF" },
847 { name => "fr-GN" },
848 { name => "fr-GP" },
849 { name => "fr-GQ" },
850 { name => "fr-HT", lcid => 0x00003c0c, sabbrevlangname => "FRH" },
851 { name => "fr-KM" },
852 { name => "fr-LU", lcid => 0x0000140c, sabbrevlangname => "FRL" },
853 { name => "fr-MA", lcid => 0x0000380c, sabbrevlangname => "FRO" },
854 { name => "fr-MC", lcid => 0x0000180c, sabbrevlangname => "FRM" },
855 { name => "fr-MF" },
856 { name => "fr-MG" },
857 { name => "fr-ML", lcid => 0x0000340c, sabbrevlangname => "FRF" },
858 { name => "fr-MQ" },
859 { name => "fr-MR" },
860 { name => "fr-MU" },
861 { name => "fr-NC" },
862 { name => "fr-NE" },
863 { name => "fr-PF" },
864 { name => "fr-PM" },
865 { name => "fr-RE", lcid => 0x0000200c, sabbrevlangname => "FRR" },
866 { name => "fr-RW" },
867 { name => "fr-SC" },
868 { name => "fr-SN", lcid => 0x0000280c, sabbrevlangname => "FRN" },
869 { name => "fr-SY" },
870 { name => "fr-TD" },
871 { name => "fr-TG" },
872 { name => "fr-TN" },
873 { name => "fr-VU" },
874 { name => "fr-WF" },
875 { name => "fr-YT" },
876 { name => "fur", sopentypelang => "FRL" },
877 { name => "fur-IT" },
878 { name => "fy", lcid => 0x00000062, oemcp => 850, sabbrevlangname => "FYN", sopentypelang => "FRI" },
879 { name => "fy-NL", lcid => 0x00000462 },
880 { name => "ga", lcid => 0x0000003c, oemcp => 850, sabbrevlangname => "IRE", sopentypelang => "IRI" },
881 { name => "ga-GB" },
882 { name => "ga-IE", lcid => 0x0000083c },
883 { name => "gd", lcid => 0x00000091, oemcp => 850, ebcdiccp => 20285, sopentypelang => "GAE" },
884 { name => "gd-GB", lcid => 0x00000491 },
885 { name => "gd-Latn", alias => "gd" },
886 { name => "gl", lcid => 0x00000056, oemcp => 850, sabbrevlangname => "GLC", sopentypelang => "GAL" },
887 { name => "gl-ES", lcid => 0x00000456 },
888 { name => "gn", lcid => 0x00000074, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed", sopentypelang => "GUA" },
889 { name => "gn-PY", lcid => 0x00000474, dir => "seed" },
890 { name => "gsw", lcid => 0x00000084, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "ZZZ", sopentypelang => "ALS" },
891 { name => "gsw-CH" },
892 { name => "gsw-FR", lcid => 0x00000484, sabbrevlangname => "GSW" },
893 { name => "gsw-LI" },
894 { name => "gu", lcid => 0x00000047, slist => ",", group => 15 },
895 { name => "gu-IN", lcid => 0x00000447 },
896 { name => "guz" },
897 { name => "guz-KE" },
898 { name => "gv", sopentypelang => "MNX" },
899 { name => "gv-IM" },
900 { name => "ha", lcid => 0x00000068, oemcp => 437 },
901 { name => "ha-GH", alias => "ha-Latn-GH" },
902 { name => "ha-Latn", lcid => 0x00007c68, file => "ha" },
903 { name => "ha-Latn-GH", file => "ha_GH", ebcdiccp => 500 },
904 { name => "ha-Latn-NE", file => "ha_NE", ebcdiccp => 500 },
905 { name => "ha-Latn-NG", lcid => 0x00000468, file => "ha_NG" },
906 { name => "ha-NE", alias => "ha-Latn-NE" },
907 { name => "ha-NG", alias => "ha-Latn-NG" },
908 { name => "haw", lcid => 0x00000075, oemcp => 437 },
909 { name => "haw-Latn", alias => "haw" },
910 { name => "haw-Latn-US", alias => "haw-US" },
911 { name => "haw-US", lcid => 0x00000475 },
912 { name => "he", lcid => 0x0000000d, oemcp => 862, slist => ",", group => 12, sopentypelang => "IWR" },
913 { name => "he-IL", lcid => 0x0000040d },
914 { name => "hi", lcid => 0x00000039, slist => ",", group => 15 },
915 { name => "hi-IN", lcid => 0x00000439 },
916 { name => "hr", lcid => 0x0000001a, inegnumber => 2, oemcp => 852, maccp => 10082, group => 2 },
917 { name => "hr-BA", lcid => 0x0000101a, ebcdiccp => 870, inegnumber => 1, sabbrevlangname => "HRB" },
918 { name => "hr-HR", lcid => 0x0000041a },
919 { name => "hsb", lcid => 0x0000002e, oemcp => 850, ebcdiccp => 870, sopentypelang => "USB" },
920 { name => "hsb-DE", lcid => 0x0000042e },
921 { name => "hu", lcid => 0x0000000e, oemcp => 852, group => 2 },
922 { name => "hu-HU", lcid => 0x0000040e },
923 { name => "hu-HU_technl", lcid => 0x0001040e, alias => "hu-HU" },
924 { name => "hy", lcid => 0x0000002b, slist => ",", group => 17 },
925 { name => "hy-AM", lcid => 0x0000042b },
926 { name => "ia" },
927 { name => "ia-001" },
928 ## name => "ibb", lcid => 0x00000069 },
929 ## name => "ibb-NG", lcid => 0x00000469 },
930 { name => "id", lcid => 0x00000021, oemcp => 850 },
931 { name => "id-ID", lcid => 0x00000421 },
932 { name => "ig", lcid => 0x00000070, oemcp => 437 },
933 { name => "ig-Latn", alias => "ig" },
934 { name => "ig-Latn-NG", alias => "ig-NG" },
935 { name => "ig-NG", lcid => 0x00000470 },
936 { name => "ii", lcid => 0x00000078, group => 9, sopentypelang => "YIM" },
937 { name => "ii-CN", lcid => 0x00000478 },
938 { name => "ii-Yiii", alias => "ii" },
939 { name => "ii-Yiii-CN", alias => "ii-CN" },
940 { name => "is", lcid => 0x0000000f, oemcp => 850, maccp => 10079, ebcdiccp => 20871 },
941 { name => "is-IS", lcid => 0x0000040f },
942 { name => "it", lcid => 0x00000010, oemcp => 850, ebcdiccp => 20280 },
943 { name => "it-CH", lcid => 0x00000810, ebcdiccp => 500, sabbrevlangname => "ITS" },
944 { name => "it-IT", lcid => 0x00000410 },
945 { name => "it-SM" },
946 { name => "it-VA", oemcp => 65001 },
947 { name => "iu", lcid => 0x0000005d, oemcp => 437, slist => ",", sortlocale => "iu-Latn-CA", dir => "seed", sabbrevlangname => "IUK", sopentypelang => "INU" },
948 { name => "iu-Cans", lcid => 0x0000785d, file => "iu", oemcp => 65001, dir => "seed", sabbrevlangname => "IUS" },
949 { name => "iu-Cans-CA", lcid => 0x0000045d, file => "iu_CA", dir => "seed" },
950 { name => "iu-Latn", lcid => 0x00007c5d, dir => "seed" },
951 { name => "iu-Latn-CA", lcid => 0x0000085d, dir => "seed" },
952 { name => "ja", lcid => 0x00000011, ireadinglayout => 2, oemcp => 932, slist => ",", sscripts => "Hani Hira Jpan Kana", group => 7, sopentypelang => "JAN" },
953 { name => "ja-JP", lcid => 0x00000411 },
954 { name => "ja-JP_radstr", lcid => 0x00040411, alias => "ja-JP" },
955 { name => "ja-JP-u-co-unihan", alias => "ja-JP_radstr" },
956 { name => "jgo" },
957 { name => "jgo-CM" },
958 { name => "jmc" },
959 { name => "jmc-TZ" },
960 { name => "jv", oemcp => 850 },
961 { name => "jv-ID", alias => "jv-Latn-ID" },
962 ## name => "jv-Java" },
963 ## name => "jv-Java-ID" },
964 { name => "jv-Latn", file => "jv" },
965 { name => "jv-Latn-ID", file => "jv_ID" },
966 { name => "ka", lcid => 0x00000037, group => 16 },
967 { name => "ka-GE", lcid => 0x00000437 },
968 { name => "ka-GE_modern", lcid => 0x00010437, alias => "ka-GE" },
969 { name => "kab", sopentypelang => "KAB0" },
970 { name => "kab-DZ" },
971 { name => "kam", sopentypelang => "KMB" },
972 { name => "kam-KE" },
973 { name => "kde" },
974 { name => "kde-TZ" },
975 { name => "kea" },
976 { name => "kea-CV" },
977 { name => "kgp" },
978 { name => "kgp-BR" },
979 { name => "khq" },
980 { name => "khq-ML" },
981 { name => "ki" },
982 { name => "ki-KE" },
983 { name => "kk", lcid => 0x0000003f, group => 5, sabbrevlangname => "KKZ" },
984 { name => "kk-Cyrl", alias => "kk" },
985 { name => "kk-Cyrl-KZ", alias => "kk-KZ" },
986 { name => "kk-KZ", lcid => 0x0000043f },
987 { name => "kkj" },
988 { name => "kkj-CM" },
989 { name => "kl", lcid => 0x0000006f, oemcp => 850, ebcdiccp => 20277, sopentypelang => "GRN" },
990 { name => "kl-GL", lcid => 0x0000046f },
991 { name => "kln", sopentypelang => "KAL" },
992 { name => "kln-KE" },
993 { name => "km", lcid => 0x00000053, inegnumber => 2, slist => ",", group => 15 },
994 { name => "km-KH", lcid => 0x00000453 },
995 { name => "kn", lcid => 0x0000004b, slist => ",", group => 15, sabbrevlangname => "KDI" },
996 { name => "kn-IN", lcid => 0x0000044b },
997 { name => "ko", lcid => 0x00000012, ireadinglayout => 2, slist => ",", oemcp => 949, ebcdiccp => 20833, sscripts => "Hang Hani Kore", group => 8 },
998 { name => "ko-KP", oemcp => 65001 },
999 { name => "ko-KR", lcid => 0x00000412 },
1000 { name => "kok", lcid => 0x00000057, slist => ",", group => 15, sabbrevlangname => "KNK" },
1001 { name => "kok-IN", lcid => 0x00000457 },
1002 { name => "kr", lcid => 0x00000071, sortlocale => "kr-Latn-NG", oemcp => 850, dir => "exemplars", sabbrevlangname => "ZZZ", sopentypelang => "KNR" },
1003 { name => "kr-Latn", file => "kr", dir => "exemplars" },
1004 { name => "kr-Latn-NG", lcid => 0x00000471, file => "kr", dir => "exemplars" },
1005 { name => "kr-NG", alias => "kr-Latn-NG" },
1006 { name => "ks", lcid => 0x00000060, group => 15, sabbrevlangname => "ZZZ", sopentypelang => "KSH" },
1007 { name => "ks-Arab", lcid => 0x00000460 },
1008 { name => "ks-Arab-IN" },
1009 { name => "ks-Deva", slist => "," },
1010 { name => "ks-Deva-IN", lcid => 0x00000860 },
1011 { name => "ks-IN", alias => "ks-Arab-IN" },
1012 { name => "ksb" },
1013 { name => "ksb-TZ" },
1014 { name => "ksf" },
1015 { name => "ksf-CM" },
1016 { name => "ksh", sopentypelang => "KSH0" },
1017 { name => "ksh-DE" },
1018 { name => "ku", lcid => 0x00000092, file => "ckb", slist => "\x{061b}", sortlocale => "ku-Arab-IQ", oemcp => 720 },
1019 { name => "ku-Arab", lcid => 0x00007c92, file => "ckb", group => 13 },
1020 { name => "ku-Arab-IQ", lcid => 0x00000492, file => "ckb_IQ" },
1021 { name => "ku-Arab-IR", file => "ckb_IR", oemcp => 65001 },
1022 { name => "kw" },
1023 { name => "kw-GB" },
1024 { name => "ky", lcid => 0x00000040, oemcp => 866, group => 5, sabbrevlangname => "KYR" },
1025 { name => "ky-Cyrl", alias => "ky" },
1026 { name => "ky-Cyrl-KG", alias => "ky-KG" },
1027 { name => "ky-KG", lcid => 0x00000440 },
1028 { name => "la", lcid => 0x00000076, oemcp => 437, slist => ",", dir => "seed", sabbrevlangname => "ZZZ" },
1029 { name => "la-001", lcid => 0x00000476, file => "la", dir => "seed" },
1030 { name => "lag" },
1031 { name => "lag-TZ" },
1032 { name => "lb", lcid => 0x0000006e, oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "LBX" },
1033 { name => "lb-LU", lcid => 0x0000046e },
1034 { name => "lg" },
1035 { name => "lg-UG" },
1036 { name => "lkt" },
1037 { name => "lkt-US" },
1038 { name => "ln" },
1039 { name => "ln-AO" },
1040 { name => "ln-CD" },
1041 { name => "ln-CF" },
1042 { name => "ln-CG" },
1043 { name => "lo", lcid => 0x00000054, group => 15 },
1044 { name => "lo-LA", lcid => 0x00000454 },
1045 { name => "lrc" },
1046 { name => "lrc-IQ" },
1047 { name => "lrc-IR" },
1048 { name => "lt", lcid => 0x00000027, oemcp => 775, group => 3, sabbrevlangname => "LTH", sopentypelang => "LTH" },
1049 { name => "lt-LT", lcid => 0x00000427 },
1050 { name => "lu" },
1051 { name => "lu-CD" },
1052 { name => "luo" },
1053 { name => "luo-KE" },
1054 { name => "luy", sopentypelang => "LUH" },
1055 { name => "luy-KE" },
1056 { name => "lv", lcid => 0x00000026, oemcp => 775, group => 3, sabbrevlangname => "LVI", sopentypelang => "LVI" },
1057 { name => "lv-LV", lcid => 0x00000426 },
1058 { name => "mai" },
1059 { name => "mai-IN" },
1060 { name => "mas" },
1061 { name => "mas-KE" },
1062 { name => "mas-TZ" },
1063 { name => "mer" },
1064 { name => "mer-KE" },
1065 { name => "mfe" },
1066 { name => "mfe-MU" },
1067 { name => "mg" },
1068 { name => "mg-MG" },
1069 { name => "mgh" },
1070 { name => "mgh-MZ" },
1071 { name => "mgo" },
1072 { name => "mgo-CM" },
1073 { name => "mi", lcid => 0x00000081, slist => "," },
1074 { name => "mi-Latn", alias => "mi" },
1075 { name => "mi-Latn-NZ", alias => "mi-NZ" },
1076 { name => "mi-NZ", lcid => 0x00000481 },
1077 { name => "mk", lcid => 0x0000002f, oemcp => 866, ebcdiccp => 500, group => 5, sabbrevlangname => "MKI" },
1078 { name => "mk-MK", lcid => 0x0000042f },
1079 { name => "ml", lcid => 0x0000004c, group => 15, sabbrevlangname => "MYM", sopentypelang => "MLR" },
1080 { name => "ml-IN", lcid => 0x0000044c },
1081 { name => "mn", lcid => 0x00000050, oemcp => 866, sopentypelang => "MNG" },
1082 { name => "mn-Cyrl", lcid => 0x00007850, file => "mn", sabbrevlangname => "MNN" },
1083 { name => "mn-Cyrl-MN", alias => "mn-MN" },
1084 { name => "mn-MN", lcid => 0x00000450, sparent => "mn-Cyrl", group => 5 },
1085 { name => "mn-Mong", lcid => 0x00007c50, oemcp => 65001, slist => ",", group => 15, dir => "seed", sabbrevlangname => "MNG" },
1086 { name => "mn-Mong-CN", lcid => 0x00000850, dir => "seed" },
1087 { name => "mn-Mong-MN", lcid => 0x00000c50, dir => "seed", sabbrevlangname => "MNM" },
1088 { name => "mni", lcid => 0x00000058, slist => ",", sabbrevlangname => "ZZZ" },
1089 { name => "mni-IN", lcid => 0x00000458, file => "mni_Beng_IN" },
1090 { name => "moh", lcid => 0x0000007c, oemcp => 850, ebcdiccp => 37, slist => ",", dir => "seed", sabbrevlangname => "MWK" },
1091 { name => "moh-CA", lcid => 0x0000047c, dir => "seed" },
1092 { name => "moh-Latn", alias => "moh" },
1093 { name => "moh-Latn-CA", alias => "moh-CA" },
1094 { name => "mr", lcid => 0x0000004e, slist => ",", group => 15 },
1095 { name => "mr-IN", lcid => 0x0000044e },
1096 { name => "ms", lcid => 0x0000003e, oemcp => 850, sabbrevlangname => "MSL", sopentypelang => "MLY" },
1097 { name => "ms-BN", lcid => 0x0000083e, sabbrevlangname => "MSB" },
1098 { name => "ms-ID" },
1099 { name => "ms-Latn", alias => "ms" },
1100 { name => "ms-Latn-BN", alias => "ms-BN" },
1101 { name => "ms-Latn-MY", alias => "ms-MY" },
1102 { name => "ms-Latn-SG", alias => "ms-SG" },
1103 { name => "ms-MY", lcid => 0x0000043e },
1104 { name => "ms-SG" },
1105 { name => "mt", lcid => 0x0000003a, sopentypelang => "MTS" },
1106 { name => "mt-MT", lcid => 0x0000043a },
1107 { name => "mua" },
1108 { name => "mua-CM" },
1109 { name => "my", lcid => 0x00000055, sopentypelang => "BRM" },
1110 { name => "my-MM", lcid => 0x00000455 },
1111 { name => "mzn" },
1112 { name => "mzn-IR" },
1113 { name => "naq" },
1114 { name => "naq-NA" },
1115 { name => "nb", lcid => 0x00007c14, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NOR", sopentypelang => "NOR" },
1116 { name => "nb-NO", lcid => 0x00000414 },
1117 { name => "nb-SJ" },
1118 { name => "nd", sopentypelang => "NDB" },
1119 { name => "nd-ZW" },
1120 { name => "nds" },
1121 { name => "nds-DE" },
1122 { name => "nds-NL" },
1123 { name => "ne", lcid => 0x00000061, slist => "," },
1124 { name => "ne-IN", lcid => 0x00000861, sabbrevlangname => "NEI" },
1125 { name => "ne-NP", lcid => 0x00000461, group => 15 },
1126 { name => "nl", lcid => 0x00000013, oemcp => 850 },
1127 { name => "nl-AW" },
1128 { name => "nl-BE", lcid => 0x00000813, sabbrevlangname => "NLB" },
1129 { name => "nl-BQ" },
1130 { name => "nl-CW" },
1131 { name => "nl-NL", lcid => 0x00000413 },
1132 { name => "nl-SR" },
1133 { name => "nl-SX" },
1134 { name => "nmg" },
1135 { name => "nmg-CM" },
1136 { name => "nn", lcid => 0x00007814, oemcp => 850, ebcdiccp => 20277, sabbrevlangname => "NON", sopentypelang => "NYN" },
1137 { name => "nn-NO", lcid => 0x00000814 },
1138 { name => "nnh" },
1139 { name => "nnh-CM" },
1140 { name => "no", lcid => 0x00000014, oemcp => 850, ebcdiccp => 20277, sortlocale => "nb-NO" },
1141 { name => "nqo", idigits => 3, inegnumber => 3, slist => "\x{060c}", dir => "seed", sopentypelang => "NKO" },
1142 { name => "nqo-GN", dir => "seed" },
1143 { name => "nr", dir => "seed", sopentypelang => "NDB" },
1144 { name => "nr-ZA", dir => "seed" },
1145 { name => "nso", lcid => 0x0000006c, oemcp => 850, dir => "seed", sopentypelang => "SOT" },
1146 { name => "nso-ZA", lcid => 0x0000046c, dir => "seed" },
1147 { name => "nus" },
1148 { name => "nus-SD", alias => "nus-SS" },
1149 { name => "nus-SS" },
1150 { name => "nyn", sopentypelang => "NKL" },
1151 { name => "nyn-UG" },
1152 { name => "oc", lcid => 0x00000082, oemcp => 850, ebcdiccp => 20297, dir => "seed" },
1153 { name => "oc-FR", lcid => 0x00000482, dir => "seed" },
1154 { name => "oc-Latn", alias => "oc" },
1155 { name => "oc-Latn-FR", alias => "oc-FR" },
1156 { name => "om", lcid => 0x00000072, sopentypelang => "ORO" },
1157 { name => "om-ET", lcid => 0x00000472 },
1158 { name => "om-KE" },
1159 { name => "or", lcid => 0x00000048, slist => ",", group => 15 },
1160 { name => "or-IN", lcid => 0x00000448 },
1161 { name => "os" },
1162 { name => "os-GE" },
1163 { name => "os-RU" },
1164 { name => "pa", lcid => 0x00000046, slist => "," },
1165 { name => "pa-Arab", lcid => 0x00007c46, slist => ";", inegnumber => 2, oemcp => 720, group => 13, sabbrevlangname => "PAP" },
1166 { name => "pa-Arab-PK", lcid => 0x00000846 },
1167 { name => "pa-Guru" },
1168 { name => "pa-Guru-IN", alias => "pa-IN" },
1169 { name => "pa-IN", lcid => 0x00000446, sparent => "pa-Guru", file => "pa_Guru_IN", group => 15 },
1170 ## name => "pap", lcid => 0x00000079 },
1171 ## name => "pap-029", lcid => 0x00000479 },
1172 { name => "pcm" },
1173 { name => "pcm-NG" },
1174 { name => "pl", lcid => 0x00000015, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "PLK", sopentypelang => "PLK" },
1175 { name => "pl-PL", lcid => 0x00000415 },
1176 ## name => "prg" },
1177 ## name => "prg-001" },
1178 { name => "prs", lcid => 0x0000008c, file => "fa", inegnumber => 3, oemcp => 720, group => 13, sopentypelang => "DRI" },
1179 { name => "prs-AF", lcid => 0x0000048c, file => "fa_AF" },
1180 { name => "prs-Arab", alias => "prs" },
1181 { name => "prs-Arab-AF", alias => "prs-AF" },
1182 { name => "ps", lcid => 0x00000063, group => 13, sabbrevlangname => "PAS", sopentypelang => "PAS" },
1183 { name => "ps-AF", lcid => 0x00000463 },
1184 { name => "ps-PK" },
1185 { name => "pt", lcid => 0x00000016, oemcp => 850, sabbrevlangname => "PTB", sopentypelang => "PTG" },
1186 { name => "pt-AO" },
1187 { name => "pt-BR", lcid => 0x00000416 },
1188 { name => "pt-CH", oemcp => 65001 },
1189 { name => "pt-CV" },
1190 { name => "pt-GQ", oemcp => 65001 },
1191 { name => "pt-GW" },
1192 { name => "pt-LU", oemcp => 65001 },
1193 { name => "pt-MO" },
1194 { name => "pt-MZ" },
1195 { name => "pt-PT", lcid => 0x00000816, sabbrevlangname => "PTG" },
1196 { name => "pt-ST" },
1197 { name => "pt-TL" },
1198 ## name => qps-Latn-x-sh", lcid => 0x80000901 },
1199 ## name => qps-ploc", lcid => 0x80000501 },
1200 ## name => qps-ploca", lcid => 0x800005fe },
1201 ## name => qps-plocm", lcid => 0x800009ff },
1202 { name => "qu", alias => "quz" },
1203 { name => "qu-BO", alias => "quz-BO" },
1204 { name => "qu-EC", alias => "quz-EC" },
1205 { name => "qu-PE", alias => "quz-PE" },
1206 { name => "quc", lcid => 0x00000086, oemcp => 850, ebcdiccp => 20284, slist => ",", dir => "seed" },
1207 { name => "quc-Latn", lcid => 0x00007c86, file => "quc", dir => "seed" },
1208 { name => "quc-Latn-GT", lcid => 0x00000486, file => "quc_GT", dir => "seed" },
1209 { name => "qut", alias => "quc" },
1210 { name => "qut-GT", alias => "quc-Latn-GT" },
1211 { name => "quz", lcid => 0x0000006b, file => "qu", territory => "BO", oemcp => 850, ebcdiccp => 20284, slist => "," },
1212 { name => "quz-BO", lcid => 0x0000046b, file => "qu_BO" },
1213 { name => "quz-EC", lcid => 0x0000086b, file => "qu_EC" },
1214 { name => "quz-Latn", alias => "quz" },
1215 { name => "quz-Latn-BO", alias => "quz-BO" },
1216 { name => "quz-Latn-EC", alias => "quz-EC" },
1217 { name => "quz-Latn-PE", alias => "quz-PE" },
1218 { name => "quz-PE", lcid => 0x00000c6b, file => "qu_PE" },
1219 { name => "rm", lcid => 0x00000017, oemcp => 850, ebcdiccp => 20273, sabbrevlangname => "RMC", sopentypelang => "RMS" },
1220 { name => "rm-CH", lcid => 0x00000417 },
1221 { name => "rn" },
1222 { name => "rn-BI" },
1223 { name => "ro", lcid => 0x00000018, oemcp => 852, ebcdiccp => 20880, sabbrevlangname => "ROM", sopentypelang => "ROM" },
1224 { name => "ro-MD", lcid => 0x00000818, maccp => 65001, sabbrevlangname => "ROD" },
1225 { name => "ro-RO", lcid => 0x00000418, group => 2 },
1226 { name => "rof" },
1227 { name => "rof-TZ" },
1228 { name => "ru", lcid => 0x00000019, oemcp => 866 },
1229 { name => "ru-BY", maccp => 65001 },
1230 { name => "ru-KG", maccp => 65001 },
1231 { name => "ru-KZ", maccp => 65001 },
1232 { name => "ru-MD", lcid => 0x00000819, maccp => 65001, sabbrevlangname => "RUM" },
1233 { name => "ru-RU", lcid => 0x00000419, group => 5 },
1234 { name => "ru-UA", maccp => 65001 },
1235 { name => "rw", lcid => 0x00000087, oemcp => 437, sopentypelang => "RUA" },
1236 { name => "rw-RW", lcid => 0x00000487 },
1237 { name => "rwk" },
1238 { name => "rwk-TZ" },
1239 { name => "sa", lcid => 0x0000004f, slist => ",", group => 15 },
1240 { name => "sa-Deva", alias => "sa" },
1241 { name => "sa-Deva-IN", alias => "sa-IN" },
1242 { name => "sa-IN", lcid => 0x0000044f },
1243 { name => "sah", lcid => 0x00000085, oemcp => 866, group => 5, sopentypelang => "YAK" },
1244 { name => "sah-Cyrl", alias => "sah" },
1245 { name => "sah-Cyrl-RU", alias => "sah-RU" },
1246 { name => "sah-RU", lcid => 0x00000485 },
1247 { name => "saq" },
1248 { name => "saq-KE" },
1249 { name => "sat" },
1250 { name => "sat-Olck" },
1251 { name => "sat-Olck-IN" },
1252 { name => "sbp" },
1253 { name => "sbp-TZ" },
1254 { name => "sc" },
1255 { name => "sc-IT" },
1256 { name => "sd", lcid => 0x00000059, inegnumber => 3, oemcp => 720, sabbrevlangname => "SIP" },
1257 { name => "sd-Arab", lcid => 0x00007c59, group => 13 },
1258 { name => "sd-Arab-PK", lcid => 0x00000859 },
1259 { name => "sd-Deva", inegnumber => 1, slist => ",", oemcp => 65001, group => 15 },
1260 { name => "sd-Deva-IN", lcid => 0x00000459, sabbrevlangname => "ZZZ" },
1261 { name => "sd-PK", alias => "sd-Arab-PK" },
1262 { name => "se", lcid => 0x0000003b, oemcp => 850, ebcdiccp => 20277, sopentypelang => "NSM" },
1263 { name => "se-FI", lcid => 0x00000c3b, ebcdiccp => 20278, sabbrevlangname => "SMG" },
1264 { name => "se-NO", lcid => 0x0000043b },
1265 { name => "se-SE", lcid => 0x0000083b, ebcdiccp => 20278, sabbrevlangname => "SMF" },
1266 { name => "se-Latn", alias => "se" },
1267 { name => "se-Latn-FI", alias => "se-FI" },
1268 { name => "se-Latn-NO", alias => "se-NO" },
1269 { name => "se-Latn-SE", alias => "se-SE" },
1270 { name => "seh" },
1271 { name => "seh-MZ" },
1272 { name => "ses" },
1273 { name => "ses-ML" },
1274 { name => "sg", sopentypelang => "SGO" },
1275 { name => "sg-CF" },
1276 { name => "shi" },
1277 { name => "shi-Latn" },
1278 { name => "shi-Latn-MA" },
1279 { name => "shi-Tfng" },
1280 { name => "shi-Tfng-MA" },
1281 { name => "si", lcid => 0x0000005b, group => 15, sopentypelang => "SNH" },
1282 { name => "si-LK", lcid => 0x0000045b },
1283 { name => "sk", lcid => 0x0000001b, oemcp => 852, ebcdiccp => 20880, group => 2, sabbrevlangname => "SKY", sopentypelang => "SKY" },
1284 { name => "sk-SK", lcid => 0x0000041b },
1285 { name => "sl", lcid => 0x00000024, oemcp => 852, ebcdiccp => 20880, group => 2 },
1286 { name => "sl-SI", lcid => 0x00000424 },
1287 { name => "sma", lcid => 0x0000783b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMB", sopentypelang => "SSM" },
1288 { name => "sma-Latn", alias => "sma" },
1289 { name => "sma-Latn-NO", alias => "sma-NO" },
1290 { name => "sma-Latn-SE", alias => "sma-SE" },
1291 { name => "sma-NO", lcid => 0x0000183b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMA" },
1292 { name => "sma-SE", lcid => 0x00001c3b, dir => "seed" },
1293 { name => "smj", lcid => 0x00007c3b, sparent => "se", ebcdiccp => 20278, dir => "seed", sabbrevlangname => "SMK", sopentypelang => "LSM" },
1294 { name => "smj-Latn", alias => "smj" },
1295 { name => "smj-Latn-NO", alias => "smj-NO" },
1296 { name => "smj-Latn-SE", alias => "smj-SE" },
1297 { name => "smj-NO", lcid => 0x0000103b, ebcdiccp => 20277, dir => "seed", sabbrevlangname => "SMJ" },
1298 { name => "smj-SE", lcid => 0x0000143b, dir => "seed" },
1299 { name => "smn", lcid => 0x0000703b, sparent => "se", ebcdiccp => 20278, sopentypelang => "ISM" },
1300 { name => "smn-FI", lcid => 0x0000243b },
1301 { name => "smn-Latn", alias => "smn" },
1302 { name => "smn-Latn-FI", alias => "smn-FI" },
1303 { name => "sms", lcid => 0x0000743b, sparent => "se", ebcdiccp => 20278, dir => "seed", sopentypelang => "SKS" },
1304 { name => "sms-FI", lcid => 0x0000203b, dir => "seed" },
1305 { name => "sms-Latn", alias => "sms" },
1306 { name => "sms-Latn-FI", alias => "sms-FI" },
1307 { name => "sn", sopentypelang => "SNA0" },
1308 { name => "sn-Latn", file => "sn" },
1309 { name => "sn-Latn-ZW", file => "sn_ZW" },
1310 { name => "sn-ZW", alias => "sn-Latn-ZW" },
1311 { name => "so", lcid => 0x00000077, sopentypelang => "SML" },
1312 { name => "so-DJ" },
1313 { name => "so-ET" },
1314 { name => "so-KE" },
1315 { name => "so-SO", lcid => 0x00000477 },
1316 { name => "sq", lcid => 0x0000001c, oemcp => 852, ebcdiccp => 20880, group => 2 },
1317 { name => "sq-AL", lcid => 0x0000041c },
1318 { name => "sq-MK" },
1319 { name => "sq-XK" },
1320 { name => "sr", lcid => 0x00007c1a, sortlocale => "sr-Latn-RS", oemcp => 852, group => 2, sabbrevlangname => "SRB", sopentypelang => "SRB" },
1321 { name => "sr-Cyrl", lcid => 0x00006c1a, oemcp => 855, ebcdiccp => 21025, group => 5, sabbrevlangname => "SRO" },
1322 { name => "sr-Cyrl-BA", lcid => 0x00001c1a, sabbrevlangname => "SRN" },
1323 { name => "sr-Cyrl-ME", lcid => 0x0000301a, sabbrevlangname => "SRQ" },
1324 { name => "sr-Cyrl-RS", lcid => 0x0000281a },
1325 { name => "sr-Cyrl-XK" },
1326 { name => "sr-Latn", lcid => 0x0000701a, sabbrevlangname => "SRM" },
1327 { name => "sr-Latn-BA", lcid => 0x0000181a, maccp => 10082, ebcdiccp => 870, sabbrevlangname => "SRS" },
1328 { name => "sr-Latn-ME", lcid => 0x00002c1a, sabbrevlangname => "SRP" },
1329 { name => "sr-Latn-RS", lcid => 0x0000241a, sabbrevlangname => "SRM" },
1330 { name => "sr-Latn-XK" },
1331 ## name => "sr-Cyrl-CS", lcid => 0x00000c1a },
1332 ## name => "sr-Latn-CS", lcid => 0x0000081a },
1333 { name => "ss", dir => "seed", sopentypelang => "SWZ" },
1334 { name => "ss-SZ", dir => "seed" },
1335 { name => "ss-ZA", dir => "seed" },
1336 { name => "ssy", dir => "seed" },
1337 { name => "ssy-ER", dir => "seed" },
1338 { name => "st", lcid => 0x00000030, dir => "seed" },
1339 { name => "st-LS", dir => "seed" },
1340 { name => "st-ZA", lcid => 0x00000430, dir => "seed" },
1341 { name => "su" },
1342 { name => "su-Latn" },
1343 { name => "su-Latn-ID" },
1344 { name => "sv", lcid => 0x0000001d, oemcp => 850, ebcdiccp => 20278, sabbrevlangname => "SVE", sopentypelang => "SVE" },
1345 { name => "sv-AX" },
1346 { name => "sv-FI", lcid => 0x0000081d, sabbrevlangname => "SVF" },
1347 { name => "sv-SE", lcid => 0x0000041d, sabbrevlangname => "SVE" },
1348 { name => "sw", lcid => 0x00000041, territory => "KE", oemcp => 437, ebcdiccp => 500, sabbrevlangname => "SWK", sopentypelang => "SWK" },
1349 { name => "sw-CD" },
1350 { name => "sw-KE", lcid => 0x00000441 },
1351 { name => "sw-TZ" },
1352 { name => "sw-UG" },
1353 { name => "swc-CD", alias => "sw-CD" },
1354 { name => "syr", lcid => 0x0000005a, slist => ",", group => 13, dir => "seed" },
1355 { name => "syr-SY", lcid => 0x0000045a, dir => "seed" },
1356 { name => "syr-Syrc", alias => "syr" },
1357 { name => "syr-Syrc-SY", alias => "syr-SY" },
1358 { name => "ta", lcid => 0x00000049, slist => ",", group => 15, sabbrevlangname => "TAI" },
1359 { name => "ta-IN", lcid => 0x00000449 },
1360 { name => "ta-LK", lcid => 0x00000849, sabbrevlangname => "TAM" },
1361 { name => "ta-MY" },
1362 { name => "ta-SG" },
1363 { name => "te", lcid => 0x0000004a, group => 15 },
1364 { name => "te-IN", lcid => 0x0000044a },
1365 { name => "teo" },
1366 { name => "teo-KE" },
1367 { name => "teo-UG" },
1368 { name => "tg", lcid => 0x00000028, oemcp => 866, group => 5, sabbrevlangname => "TAJ", sopentypelang => "TAJ" },
1369 { name => "tg-Cyrl", lcid => 0x00007c28, file => "tg" },
1370 { name => "tg-Cyrl-TJ", lcid => 0x00000428, file => "tg_TJ" },
1371 { name => "tg-TJ", alias => "tg-Cyrl-TJ" },
1372 { name => "th", lcid => 0x0000001e, oemcp => 874, ebcdiccp => 20838, slist => ",", group => 11 },
1373 { name => "th-TH", lcid => 0x0000041e },
1374 { name => "ti", lcid => 0x00000073, territory => "ER", sopentypelang => "TGY" },
1375 { name => "ti-ER", lcid => 0x00000873 },
1376 { name => "ti-ET", lcid => 0x00000473, sabbrevlangname => "TIE" },
1377 { name => "tig", dir => "seed", sopentypelang => "TGR" },
1378 { name => "tig-ER", dir => "seed" },
1379 { name => "tk", lcid => 0x00000042, oemcp => 852, ebcdiccp => 20880, group => 2, sopentypelang => "TKM" },
1380 { name => "tk-Latn", alias => "tk" },
1381 { name => "tk-Latn-TM", alias => "tk-TM" },
1382 { name => "tk-TM", lcid => 0x00000442 },
1383 { name => "tn", lcid => 0x00000032, oemcp => 850, dir => "seed", sopentypelang => "TNA" },
1384 { name => "tn-BW", lcid => 0x00000832, dir => "seed", sabbrevlangname => "TSB" },
1385 { name => "tn-ZA", lcid => 0x00000432, dir => "seed" },
1386 { name => "to", sopentypelang => "TGN" },
1387 { name => "to-TO" },
1388 { name => "tr", lcid => 0x0000001f, oemcp => 857, ebcdiccp => 20905, group => 6, sabbrevlangname => "TRK", sopentypelang => "TRK" },
1389 { name => "tr-CY" },
1390 { name => "tr-TR", lcid => 0x0000041f },
1391 { name => "ts", lcid => 0x00000031, dir => "seed", sopentypelang => "TSG" },
1392 { name => "ts-ZA", lcid => 0x00000431, dir => "seed" },
1393 { name => "tt", lcid => 0x00000044, oemcp => 866, group => 5, sabbrevlangname => "TTT" },
1394 { name => "tt-Cyrl", alias => "tt" },
1395 { name => "tt-Cyrl-RU", alias => "tt-RU" },
1396 { name => "tt-RU", lcid => 0x00000444 },
1397 { name => "twq" },
1398 { name => "twq-NE" },
1399 { name => "tzm", lcid => 0x0000005f, sortlocale => "tzm-Latn-DZ", oemcp => 850, ebcdiccp => 20297, sabbrevlangname => "TZA" },
1400 { name => "tzm-Latn", lcid => 0x00007c5f, territory => "DZ", file => "tzm" },
1401 { name => "tzm-Latn-MA", file => "tzm_MA", oemcp => 65001 },
1402 { name => "tzm-Latn-DZ", lcid => 0x0000085f, file => "tzm" },
1403 { name => "tzm-MA", alias => "tzm-Latn-MA" },
1404 ## name => "tzm-Arab", group => 13 },
1405 ## name => "tzm-Arab-MA", lcid => 0x0000045f },
1406 ## name => "tzm-Tfng", lcid => 0x0000785f },
1407 ## name => "tzm-Tfng-MA", lcid => 0x0000105f },
1408 { name => "ug", lcid => 0x00000080, oemcp => 720, slist => ",", group => 13, sopentypelang => "UYG" },
1409 { name => "ug-Arab", alias => "ug" },
1410 { name => "ug-Arab-CN", alias => "ug-CN" },
1411 { name => "ug-CN", lcid => 0x00000480 },
1412 { name => "uk", lcid => 0x00000022, oemcp => 866, maccp => 10017, ebcdiccp => 500, group => 5 },
1413 { name => "uk-UA", lcid => 0x00000422 },
1414 { name => "ur", lcid => 0x00000020, oemcp => 720 },
1415 { name => "ur-IN", lcid => 0x00000820, maccp => 65001, sabbrevlangname => "URI" },
1416 { name => "ur-PK", lcid => 0x00000420, group => 13 },
1417 { name => "uz", lcid => 0x00000043, oemcp => 857, maccp => 10029, group => 2 },
1418 { name => "uz-Arab", oemcp => 65001, maccp => 65001 },
1419 { name => "uz-Arab-AF" },
1420 { name => "uz-Cyrl", lcid => 0x00007843, oemcp => 866, maccp => 10007, group => 5, sabbrevlangname => "UZC" },
1421 { name => "uz-Cyrl-UZ", lcid => 0x00000843 },
1422 { name => "uz-Latn", lcid => 0x00007c43 },
1423 { name => "uz-Latn-UZ", lcid => 0x00000443 },
1424 { name => "vai" },
1425 { name => "vai-Latn" },
1426 { name => "vai-Latn-LR" },
1427 { name => "vai-Vaii" },
1428 { name => "vai-Vaii-LR" },
1429 { name => "ve", lcid => 0x00000033, dir => "seed", sabbrevlangname => "ZZZ" },
1430 { name => "ve-ZA", lcid => 0x00000433, dir => "seed" },
1431 { name => "vi", lcid => 0x0000002a, oemcp => 1258, slist => ",", group => 14, sabbrevlangname => "VIT", sopentypelang => "VIT" },
1432 { name => "vi-VN", lcid => 0x0000042a },
1433 { name => "vo", dir => "seed" },
1434 { name => "vo-001", dir => "seed" },
1435 { name => "vun" },
1436 { name => "vun-TZ" },
1437 { name => "wae" },
1438 { name => "wae-CH" },
1439 { name => "wal", dir => "seed" },
1440 { name => "wal-ET", dir => "seed" },
1441 { name => "wo", lcid => 0x00000088, oemcp => 850, ebcdiccp => 20297, sopentypelang => "WLF" },
1442 { name => "wo-Latn", alias => "wo" },
1443 { name => "wo-Latn-SN", alias => "wo-SN" },
1444 { name => "wo-SN", lcid => 0x00000488 },
1445 { name => "x-IV_mathan", lcid => 0x0001007f, alias => "" },
1446 { name => "xh", lcid => 0x00000034, oemcp => 850, sopentypelang => "XHS" },
1447 { name => "xh-ZA", lcid => 0x00000434 },
1448 { name => "xog" },
1449 { name => "xog-UG" },
1450 { name => "yav" },
1451 { name => "yav-CM" },
1452 { name => "yi", lcid => 0x0000003d, sabbrevlangname => "ZZZ", sopentypelang => "JII" },
1453 { name => "yi-001", lcid => 0x0000043d },
1454 { name => "yo", lcid => 0x0000006a, oemcp => 437, sopentypelang => "YBA" },
1455 { name => "yo-BJ", ebcdiccp => 500 },
1456 { name => "yo-Latn", alias => "yo" },
1457 { name => "yo-Latn-NG", alias => "yo-NG" },
1458 { name => "yo-NG", lcid => 0x0000046a },
1459 { name => "yrl" },
1460 { name => "yrl-BR" },
1461 { name => "yrl-CO" },
1462 { name => "yrl-VE" },
1463 { name => "yue" },
1464 { name => "yue-Hans" },
1465 { name => "yue-Hans-CN" },
1466 { name => "yue-Hant" },
1467 { name => "yue-Hant-HK" },
1468 { name => "zgh" },
1469 { name => "zgh-MA", alias => "zgh-Tfng-MA" },
1470 { name => "zgh-Tfng", file => "zgh" },
1471 { name => "zgh-Tfng-MA", file => "zgh_MA" },
1472 { name => "zh", lcid => 0x00007804, ireadinglayout => 2, oemcp => 936, slist => ",", sscripts => "Hani Hans", sabbrevlangname => "CHS", sopentypelang => "ZHS" },
1473 { name => "zh-CN", lcid => 0x00000804, file => "zh_Hans_CN", sparent => "zh-Hans" },
1474 { name => "zh-CN_phoneb", lcid => 0x00050804, alias => "zh-CN" },
1475 { name => "zh-CN_stroke", lcid => 0x00020804, alias => "zh-CN" },
1476 { name => "zh-Hans", lcid => 0x00000004, group => 10 },
1477 { name => "zh-Hans-CN", alias => "zh-CN" },
1478 { name => "zh-Hans-CN-u-co-phonebk", alias => "zh-CN_phoneb" },
1479 { name => "zh-Hans-CN-u-co-stroke", alias => "zh-CN_stroke" },
1480 { name => "zh-Hans-HK", slist => ";" },
1481 { name => "zh-Hans-MO", slist => ";" },
1482 { name => "zh-Hans-SG", alias => "zh-SG" },
1483 { name => "zh-Hans-SG-u-co-phonebk", alias => "zh-SG_phoneb" },
1484 { name => "zh-Hans-SG-u-co-stroke", alias => "zh-SG_stroke" },
1485 { name => "zh-Hant", lcid => 0x00007c04, sortlocale => "zh-HK", ireadinglayout => 2, oemcp => 950, slist => ",", sscripts => "Hani Hant", group => 9, sabbrevlangname => "CHT", sopentypelang => "ZHH" },
1486 { name => "zh-Hant-HK", alias => "zh-HK" },
1487 { name => "zh-Hant-HK-u-co-unihan", alias => "zh-HK_radstr" },
1488 { name => "zh-Hant-MO", alias => "zh-MO" },
1489 { name => "zh-Hant-MO-u-co-stroke", alias => "zh-MO_stroke" },
1490 { name => "zh-Hant-MO-u-co-unihan", alias => "zh-MO_radstr" },
1491 { name => "zh-Hant-TW", alias => "zh-TW" },
1492 { name => "zh-Hant-TW-u-co-phonetic", alias => "zh-TW_pronun" },
1493 { name => "zh-Hant-TW-u-co-unihan", alias => "zh-TW_radstr" },
1494 { name => "zh-HK", lcid => 0x00000c04, file => "zh_Hant_HK", sparent => "zh-Hant", sabbrevlangname => "ZHH" },
1495 { name => "zh-HK_radstr", lcid => 0x00040c04, alias => "zh-HK" },
1496 { name => "zh-MO", lcid => 0x00001404, file => "zh_Hant_MO", sparent => "zh-Hant", sabbrevlangname => "ZHM", sopentypelang => "ZHT" },
1497 { name => "zh-MO_radstr", lcid => 0x00041404, alias => "zh-MO" },
1498 { name => "zh-MO_stroke", lcid => 0x00021404, alias => "zh-MO" },
1499 { name => "zh-SG", lcid => 0x00001004, file => "zh_Hans_SG", sparent => "zh-Hans", sabbrevlangname => "ZHI" },
1500 { name => "zh-SG_phoneb", lcid => 0x00051004, alias => "zh-SG" },
1501 { name => "zh-SG_stroke", lcid => 0x00021004, alias => "zh-SG" },
1502 { name => "zh-TW", lcid => 0x00000404, file => "zh_Hant_TW", sparent => "zh-Hant", sopentypelang => "ZHT" },
1503 { name => "zh-TW_pronun", lcid => 0x00030404, alias => "zh-TW" },
1504 { name => "zh-TW_radstr", lcid => 0x00040404, alias => "zh-TW" },
1505 { name => "zu", lcid => 0x00000035, oemcp => 850 },
1506 { name => "zu-ZA", lcid => 0x00000435 },
1509 my @calendars =
1511 { id => 1, name => "Gregorian", itwodigityearmax => 2049 },
1512 { id => 2, type => "gregorian", locale => "en-US", itwodigityearmax => 2049 },
1513 { id => 3, type => "japanese", locale => "ja-JP", eras => [ 232..236 ] },
1514 { id => 4, type => "roc", locale => "zh-TW", eras => [ 1 ] },
1515 { id => 5, type => "dangi", locale => "ko-KR", eras => [ 0 ] },
1516 { id => 6, type => "islamic", locale => "ar-SA", itwodigityearmax => 1451 },
1517 { id => 7, type => "buddhist", locale => "th-TH", eras => [ 0 ] },
1518 { id => 8, type => "hebrew", locale => "he-IL", itwodigityearmax => 5810 },
1519 { id => 9, type => "gregorian", locale => "fr-FR", itwodigityearmax => 2049 },
1520 { id => 10, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1521 { id => 11, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1522 { id => 12, type => "gregorian", locale => "ar-SA", itwodigityearmax => 2049 },
1523 { id => 13, name => "Julian", locale => "en-US", itwodigityearmax => 2049 },
1524 { id => 14, name => "Japanese Lunisolar" },
1525 { id => 15, name => "Chinese Lunisolar" },
1526 { id => 16, name => "Saka" },
1527 { id => 17, name => "Lunar ETO Chinese" },
1528 { id => 18, name => "Lunar ETO Korean" },
1529 { id => 19, name => "Lunar ETO Rokuyou" },
1530 { id => 20, name => "Korean Lunisolar" },
1531 { id => 21, name => "Taiwan Lunisolar" },
1532 { id => 22, type => "persian", locale => "prs-AF", itwodigityearmax => 1429 },
1533 { id => 23, type => "islamic-umalqura", locale => "ar-SA", itwodigityearmax => 1451 },
1536 my @geoids =
1538 { id => 2, name => "AG" }, # Antigua and Barbuda
1539 { id => 3, name => "AF" }, # Afghanistan
1540 { id => 4, name => "DZ" }, # Algeria
1541 { id => 5, name => "AZ" }, # Azerbaijan
1542 { id => 6, name => "AL" }, # Albania
1543 { id => 7, name => "AM" }, # Armenia
1544 { id => 8, name => "AD" }, # Andorra
1545 { id => 9, name => "AO" }, # Angola
1546 { id => 10, name => "AS" }, # American Samoa
1547 { id => 11, name => "AR" }, # Argentina
1548 { id => 12, name => "AU" }, # Australia
1549 { id => 14, name => "AT" }, # Austria
1550 { id => 17, name => "BH" }, # Bahrain
1551 { id => 18, name => "BB" }, # Barbados
1552 { id => 19, name => "BW" }, # Botswana
1553 { id => 20, name => "BM" }, # Bermuda
1554 { id => 21, name => "BE" }, # Belgium
1555 { id => 22, name => "BS" }, # Bahamas, The
1556 { id => 23, name => "BD" }, # Bangladesh
1557 { id => 24, name => "BZ" }, # Belize
1558 { id => 25, name => "BA" }, # Bosnia and Herzegovina
1559 { id => 26, name => "BO" }, # Bolivia
1560 { id => 27, name => "MM" }, # Myanmar
1561 { id => 28, name => "BJ" }, # Benin
1562 { id => 29, name => "BY" }, # Belarus
1563 { id => 30, name => "SB" }, # Solomon Islands
1564 { id => 32, name => "BR" }, # Brazil
1565 { id => 34, name => "BT" }, # Bhutan
1566 { id => 35, name => "BG" }, # Bulgaria
1567 { id => 37, name => "BN" }, # Brunei
1568 { id => 38, name => "BI" }, # Burundi
1569 { id => 39, name => "CA" }, # Canada
1570 { id => 40, name => "KH" }, # Cambodia
1571 { id => 41, name => "TD" }, # Chad
1572 { id => 42, name => "LK" }, # Sri Lanka
1573 { id => 43, name => "CG" }, # Congo
1574 { id => 44, name => "CD" }, # Congo (DRC)
1575 { id => 45, name => "CN" }, # China
1576 { id => 46, name => "CL" }, # Chile
1577 { id => 49, name => "CM" }, # Cameroon
1578 { id => 50, name => "KM" }, # Comoros
1579 { id => 51, name => "CO" }, # Colombia
1580 { id => 54, name => "CR" }, # Costa Rica
1581 { id => 55, name => "CF" }, # Central African Republic
1582 { id => 56, name => "CU" }, # Cuba
1583 { id => 57, name => "CV" }, # Cape Verde
1584 { id => 59, name => "CY" }, # Cyprus
1585 { id => 61, name => "DK" }, # Denmark
1586 { id => 62, name => "DJ" }, # Djibouti
1587 { id => 63, name => "DM" }, # Dominica
1588 { id => 65, name => "DO" }, # Dominican Republic
1589 { id => 66, name => "EC" }, # Ecuador
1590 { id => 67, name => "EG" }, # Egypt
1591 { id => 68, name => "IE" }, # Ireland
1592 { id => 69, name => "GQ" }, # Equatorial Guinea
1593 { id => 70, name => "EE" }, # Estonia
1594 { id => 71, name => "ER" }, # Eritrea
1595 { id => 72, name => "SV" }, # El Salvador
1596 { id => 73, name => "ET" }, # Ethiopia
1597 { id => 75, name => "CZ" }, # Czech Republic
1598 { id => 77, name => "FI" }, # Finland
1599 { id => 78, name => "FJ" }, # Fiji Islands
1600 { id => 80, name => "FM" }, # Micronesia
1601 { id => 81, name => "FO" }, # Faroe Islands
1602 { id => 84, name => "FR" }, # France
1603 { id => 86, name => "GM" }, # Gambia, The
1604 { id => 87, name => "GA" }, # Gabon
1605 { id => 88, name => "GE" }, # Georgia
1606 { id => 89, name => "GH" }, # Ghana
1607 { id => 90, name => "GI" }, # Gibraltar
1608 { id => 91, name => "GD" }, # Grenada
1609 { id => 93, name => "GL" }, # Greenland
1610 { id => 94, name => "DE" }, # Germany
1611 { id => 98, name => "GR" }, # Greece
1612 { id => 99, name => "GT" }, # Guatemala
1613 { id => 100, name => "GN" }, # Guinea
1614 { id => 101, name => "GY" }, # Guyana
1615 { id => 103, name => "HT" }, # Haiti
1616 { id => 104, name => "HK" }, # Hong Kong S.A.R.
1617 { id => 106, name => "HN" }, # Honduras
1618 { id => 108, name => "HR" }, # Croatia
1619 { id => 109, name => "HU" }, # Hungary
1620 { id => 110, name => "IS" }, # Iceland
1621 { id => 111, name => "ID" }, # Indonesia
1622 { id => 113, name => "IN" }, # India
1623 { id => 114, name => "IO" }, # British Indian Ocean Territory
1624 { id => 116, name => "IR" }, # Iran
1625 { id => 117, name => "IL" }, # Israel
1626 { id => 118, name => "IT" }, # Italy
1627 { id => 119, name => "CI" }, # Côte d'Ivoire
1628 { id => 121, name => "IQ" }, # Iraq
1629 { id => 122, name => "JP" }, # Japan
1630 { id => 124, name => "JM" }, # Jamaica
1631 { id => 125, name => "SJ" }, # Jan Mayen
1632 { id => 126, name => "JO" }, # Jordan
1633 { id => 127, parent => "UM" }, # Johnston Atoll
1634 { id => 129, name => "KE" }, # Kenya
1635 { id => 130, name => "KG" }, # Kyrgyzstan
1636 { id => 131, name => "KP" }, # North Korea
1637 { id => 133, name => "KI" }, # Kiribati
1638 { id => 134, name => "KR" }, # Korea
1639 { id => 136, name => "KW" }, # Kuwait
1640 { id => 137, name => "KZ" }, # Kazakhstan
1641 { id => 138, name => "LA" }, # Laos
1642 { id => 139, name => "LB" }, # Lebanon
1643 { id => 140, name => "LV" }, # Latvia
1644 { id => 141, name => "LT" }, # Lithuania
1645 { id => 142, name => "LR" }, # Liberia
1646 { id => 143, name => "SK" }, # Slovakia
1647 { id => 145, name => "LI" }, # Liechtenstein
1648 { id => 146, name => "LS" }, # Lesotho
1649 { id => 147, name => "LU" }, # Luxembourg
1650 { id => 148, name => "LY" }, # Libya
1651 { id => 149, name => "MG" }, # Madagascar
1652 { id => 151, name => "MO" }, # Macao S.A.R.
1653 { id => 152, name => "MD" }, # Moldova
1654 { id => 154, name => "MN" }, # Mongolia
1655 { id => 156, name => "MW" }, # Malawi
1656 { id => 157, name => "ML" }, # Mali
1657 { id => 158, name => "MC" }, # Monaco
1658 { id => 159, name => "MA" }, # Morocco
1659 { id => 160, name => "MU" }, # Mauritius
1660 { id => 162, name => "MR" }, # Mauritania
1661 { id => 163, name => "MT" }, # Malta
1662 { id => 164, name => "OM" }, # Oman
1663 { id => 165, name => "MV" }, # Maldives
1664 { id => 166, name => "MX" }, # Mexico
1665 { id => 167, name => "MY" }, # Malaysia
1666 { id => 168, name => "MZ" }, # Mozambique
1667 { id => 173, name => "NE" }, # Niger
1668 { id => 174, name => "VU" }, # Vanuatu
1669 { id => 175, name => "NG" }, # Nigeria
1670 { id => 176, name => "NL" }, # Netherlands
1671 { id => 177, name => "NO" }, # Norway
1672 { id => 178, name => "NP" }, # Nepal
1673 { id => 180, name => "NR" }, # Nauru
1674 { id => 181, name => "SR" }, # Suriname
1675 { id => 182, name => "NI" }, # Nicaragua
1676 { id => 183, name => "NZ" }, # New Zealand
1677 { id => 184, name => "PS" }, # Palestinian Authority
1678 { id => 185, name => "PY" }, # Paraguay
1679 { id => 187, name => "PE" }, # Peru
1680 { id => 190, name => "PK" }, # Pakistan
1681 { id => 191, name => "PL" }, # Poland
1682 { id => 192, name => "PA" }, # Panama
1683 { id => 193, name => "PT" }, # Portugal
1684 { id => 194, name => "PG" }, # Papua New Guinea
1685 { id => 195, name => "PW" }, # Palau
1686 { id => 196, name => "GW" }, # Guinea-Bissau
1687 { id => 197, name => "QA" }, # Qatar
1688 { id => 198, name => "RE" }, # Reunion
1689 { id => 199, name => "MH" }, # Marshall Islands
1690 { id => 200, name => "RO" }, # Romania
1691 { id => 201, name => "PH" }, # Philippines
1692 { id => 202, name => "PR" }, # Puerto Rico
1693 { id => 203, name => "RU" }, # Russia
1694 { id => 204, name => "RW" }, # Rwanda
1695 { id => 205, name => "SA" }, # Saudi Arabia
1696 { id => 206, name => "PM" }, # St. Pierre and Miquelon
1697 { id => 207, name => "KN" }, # St. Kitts and Nevis
1698 { id => 208, name => "SC" }, # Seychelles
1699 { id => 209, name => "ZA" }, # South Africa
1700 { id => 210, name => "SN" }, # Senegal
1701 { id => 212, name => "SI" }, # Slovenia
1702 { id => 213, name => "SL" }, # Sierra Leone
1703 { id => 214, name => "SM" }, # San Marino
1704 { id => 215, name => "SG" }, # Singapore
1705 { id => 216, name => "SO" }, # Somalia
1706 { id => 217, name => "ES" }, # Spain
1707 { id => 218, name => "LC" }, # St. Lucia
1708 { id => 219, name => "SD" }, # Sudan
1709 { id => 220, name => "SJ" }, # Svalbard
1710 { id => 221, name => "SE" }, # Sweden
1711 { id => 222, name => "SY" }, # Syria
1712 { id => 223, name => "CH" }, # Switzerland
1713 { id => 224, name => "AE" }, # United Arab Emirates
1714 { id => 225, name => "TT" }, # Trinidad and Tobago
1715 { id => 227, name => "TH" }, # Thailand
1716 { id => 228, name => "TJ" }, # Tajikistan
1717 { id => 231, name => "TO" }, # Tonga
1718 { id => 232, name => "TG" }, # Togo
1719 { id => 233, name => "ST" }, # São Tomé and Príncipe
1720 { id => 234, name => "TN" }, # Tunisia
1721 { id => 235, name => "TR" }, # Turkey
1722 { id => 236, name => "TV" }, # Tuvalu
1723 { id => 237, name => "TW" }, # Taiwan
1724 { id => 238, name => "TM" }, # Turkmenistan
1725 { id => 239, name => "TZ" }, # Tanzania
1726 { id => 240, name => "UG" }, # Uganda
1727 { id => 241, name => "UA" }, # Ukraine
1728 { id => 242, name => "GB" }, # United Kingdom
1729 { id => 244, name => "US" }, # United States
1730 { id => 245, name => "BF" }, # Burkina Faso
1731 { id => 246, name => "UY" }, # Uruguay
1732 { id => 247, name => "UZ" }, # Uzbekistan
1733 { id => 248, name => "VC" }, # St. Vincent and the Grenadines
1734 { id => 249, name => "VE" }, # Bolivarian Republic of Venezuela
1735 { id => 251, name => "VN" }, # Vietnam
1736 { id => 252, name => "VI" }, # Virgin Islands
1737 { id => 253, name => "VA" }, # Vatican City
1738 { id => 254, name => "NA" }, # Namibia
1739 { id => 257, name => "EH" }, # Western Sahara (disputed)
1740 { id => 258, parent => "UM" }, # Wake Island
1741 { id => 259, name => "WS" }, # Samoa
1742 { id => 260, name => "SZ" }, # Swaziland
1743 { id => 261, name => "YE" }, # Yemen
1744 { id => 263, name => "ZM" }, # Zambia
1745 { id => 264, name => "ZW" }, # Zimbabwe
1746 { id => 269, name => "CS" }, # Serbia and Montenegro (Former)
1747 { id => 270, name => "ME" }, # Montenegro
1748 { id => 271, name => "RS" }, # Serbia
1749 { id => 273, name => "CW" }, # Curaçao
1750 { id => 276, name => "SS" }, # South Sudan
1751 { id => 300, name => "AI" }, # Anguilla
1752 { id => 301, name => "AQ" }, # Antarctica
1753 { id => 302, name => "AW" }, # Aruba
1754 { id => 303, parent => "SH" }, # Ascension Island
1755 { id => 304, parent => "053" }, # Ashmore and Cartier Islands
1756 { id => 305, parent => "UM" }, # Baker Island
1757 { id => 306, name => "BV" }, # Bouvet Island
1758 { id => 307, name => "KY" }, # Cayman Islands
1759 { id => 308, name => "830", parent => "155" }, # Channel Islands
1760 { id => 309, name => "CX" }, # Christmas Island
1761 { id => 310, parent => "009" }, # Clipperton Island
1762 { id => 311, name => "CC" }, # Cocos (Keeling) Islands
1763 { id => 312, name => "CK" }, # Cook Islands
1764 { id => 313, parent => "053" }, # Coral Sea Islands
1765 { id => 314, parent => "IO" }, # Diego Garcia
1766 { id => 315, name => "FK" }, # Falkland Islands (Islas Malvinas)
1767 { id => 317, name => "GF" }, # French Guiana
1768 { id => 318, name => "PF" }, # French Polynesia
1769 { id => 319, name => "TF" }, # French Southern and Antarctic Lands
1770 { id => 321, name => "GP" }, # Guadeloupe
1771 { id => 322, name => "GU" }, # Guam
1772 { id => 323 }, # Guantanamo Bay
1773 { id => 324, name => "GG" }, # Guernsey
1774 { id => 325, name => "HM" }, # Heard Island and McDonald Islands
1775 { id => 326, parent => "UM" }, # Howland Island
1776 { id => 327, parent => "UM" }, # Jarvis Island
1777 { id => 328, name => "JE" }, # Jersey
1778 { id => 329, parent => "UM" }, # Kingman Reef
1779 { id => 330, name => "MQ" }, # Martinique
1780 { id => 331, name => "YT" }, # Mayotte
1781 { id => 332, name => "MS" }, # Montserrat
1782 { id => 333, name => "AN", region => 1 }, # Netherlands Antilles (Former)
1783 { id => 334, name => "NC" }, # New Caledonia
1784 { id => 335, name => "NU" }, # Niue
1785 { id => 336, name => "NF" }, # Norfolk Island
1786 { id => 337, name => "MP" }, # Northern Mariana Islands
1787 { id => 338, parent => "UM" }, # Palmyra Atoll
1788 { id => 339, name => "PN" }, # Pitcairn Islands
1789 { id => 340, parent => "MP" }, # Rota Island
1790 { id => 341, parent => "MP" }, # Saipan
1791 { id => 342, name => "GS" }, # South Georgia and the South Sandwich Islands
1792 { id => 343, name => "SH" }, # St. Helena
1793 { id => 346, parent => "MP" }, # Tinian Island
1794 { id => 347, name => "TK" }, # Tokelau
1795 { id => 348, parent => "SH" }, # Tristan da Cunha
1796 { id => 349, name => "TC" }, # Turks and Caicos Islands
1797 { id => 351, name => "VG" }, # Virgin Islands, British
1798 { id => 352, name => "WF" }, # Wallis and Futuna
1799 { id => 742, name => "002" }, # Africa
1800 { id => 2129, name => "142" }, # Asia
1801 { id => 10541, name => "150" }, # Europe
1802 { id => 15126, name => "IM" }, # Man, Isle of
1803 { id => 19618, name => "MK" }, # Macedonia, Former Yugoslav Republic of
1804 { id => 20900, name => "054" }, # Melanesia
1805 { id => 21206, name => "057" }, # Micronesia
1806 { id => 21242, parent => "UM" }, # Midway Islands
1807 { id => 23581, name => "021" }, # Northern America
1808 { id => 26286, name => "061" }, # Polynesia
1809 { id => 27082, name => "013" }, # Central America
1810 { id => 27114, name => "009" }, # Oceania
1811 { id => 30967, name => "SX" }, # Sint Maarten (Dutch part)
1812 { id => 31396, name => "005" }, # South America
1813 { id => 31706, name => "MF" }, # Saint Martin (French part)
1814 { id => 39070, name => "001" }, # World
1815 { id => 42483, name => "011" }, # Western Africa
1816 { id => 42484, name => "017" }, # Middle Africa
1817 { id => 42487, name => "015" }, # Northern Africa
1818 { id => 47590, name => "143" }, # Central Asia
1819 { id => 47599, name => "035" }, # South-Eastern Asia
1820 { id => 47600, name => "030" }, # Eastern Asia
1821 { id => 47603, name => "014" }, # Eastern Africa
1822 { id => 47609, name => "151" }, # Eastern Europe
1823 { id => 47610, name => "039" }, # Southern Europe
1824 { id => 47611, name => "145" }, # Middle East
1825 { id => 47614, name => "034" }, # Southern Asia
1826 { id => 7299303, name => "TL" }, # Democratic Republic of Timor-Leste
1827 { id => 9914689, name => "XK" }, # Kosovo
1828 { id => 10026358, name => "019" }, # Americas
1829 { id => 10028789, name => "AX" }, # Ã…land Islands
1830 { id => 10039880, name => "029", sintlsymbol => "XCD" }, # Caribbean
1831 { id => 10039882, name => "154" }, # Northern Europe
1832 { id => 10039883, name => "018" }, # Southern Africa
1833 { id => 10210824, name => "155" }, # Western Europe
1834 { id => 10210825, name => "053" }, # Australia and New Zealand
1835 { id => 161832015, name => "BL" }, # Saint Barthélemy
1836 { id => 161832256, name => "UM" }, # U.S. Minor Outlying Islands
1837 { id => 161832257, name => "419", parent => "019" }, # Latin America and the Caribbean
1838 { id => 161832258, name => "BQ" }, # Bonaire, Sint Eustatius and Saba
1841 my @cp2uni = ();
1842 my @glyph2uni = ();
1843 my @lead_bytes = ();
1844 my @uni2cp = ();
1845 my @tolower_table = ();
1846 my @toupper_table = ();
1847 my @digitmap_table = ();
1848 my @halfwidth_table = ();
1849 my @fullwidth_table = ();
1850 my @cjk_compat_table = ();
1851 my @chinese_traditional_table = ();
1852 my @chinese_simplified_table = ();
1853 my @category_table = ();
1854 my @initial_joining_table = ();
1855 my @direction_table = ();
1856 my @decomp_table = ();
1857 my @combining_class_table = ();
1858 my @decomp_compat_table = ();
1859 my @comp_exclusions = ();
1860 my @idna_decomp_table = ();
1861 my @idna_disallowed = ();
1862 my %registry_keys;
1863 my $default_char;
1864 my $default_wchar;
1866 my %joining_forms =
1868 "isolated" => [],
1869 "final" => [],
1870 "initial" => [],
1871 "medial" => []
1874 my $current_data_file;
1876 sub to_utf16(@)
1878 my @ret;
1879 foreach my $ch (@_)
1881 if ($ch < 0x10000)
1883 push @ret, $ch;
1885 else
1887 my $val = $ch - 0x10000;
1888 push @ret, 0xd800 | ($val >> 10), 0xdc00 | ($val & 0x3ff);
1891 return @ret;
1894 ################################################################
1895 # fetch a unicode.org file and open it
1896 sub open_data_file($@)
1898 my ($id, $name) = @_;
1899 my $data = $data_files{$id};
1900 my $cache = ($ENV{XDG_CACHE_HOME} || "$ENV{HOME}/.cache") . "/wine";
1901 local *FILE;
1903 my $url = $data->{url};
1904 my $filename = "$cache/" . ($data->{name} || ($url =~ s/.*\/([^\/]+)$/$1/r));
1905 unless (-f $filename)
1907 print "Fetching $url...\n";
1908 system "mkdir", "-p", $cache;
1909 !system "wget", "-q", "-O", $filename, $url or die "cannot fetch $url";
1912 my $sha = Digest::SHA->new( "sha256" )->addfile( $filename )->hexdigest;
1913 die "invalid checksum $sha for $filename" unless $sha eq $data->{sha};
1915 if ($filename =~ /\.zip$/)
1917 open FILE, "-|", "unzip", "-p", $filename, $name or die "cannot extract $name from $filename";
1919 elsif ($filename =~ /\.tar\.gz$/)
1921 open FILE, "-|", "tar", "-x", "-f", $filename, "-O", $name or die "cannot extract $name from $filename";
1923 else
1925 open FILE, "<$filename" or die "cannot open $filename";
1927 $current_data_file = $name ? "$url:$name" : $url;
1928 return *FILE;
1931 ################################################################
1932 # load a unicode.org file as XML data
1933 sub load_xml_data_file($@)
1935 my ($id, $name) = @_;
1936 my $FILE = open_data_file( $id, $name );
1937 my $xml = XML::LibXML->load_xml( IO => $FILE );
1938 close FILE;
1939 return $xml;
1942 ################################################################
1943 # recursively get the decomposition for a character
1944 sub get_decomposition($$);
1945 sub get_decomposition($$)
1947 my ($char, $table) = @_;
1948 my @ret;
1950 return $char unless defined $table->[$char];
1951 foreach my $ch (@{$table->[$char]})
1953 push @ret, get_decomposition( $ch, $table );
1955 return @ret;
1958 ################################################################
1959 # get the composition that results in a given character
1960 sub get_composition($$)
1962 my ($ch, $compat) = @_;
1963 return () unless defined $decomp_table[$ch]; # no decomposition
1964 my @ret = @{$decomp_table[$ch]};
1965 return () if @ret < 2; # singleton decomposition
1966 return () if $comp_exclusions[$ch]; # composition exclusion
1967 return () if $combining_class_table[$ch]; # non-starter
1968 return () if $combining_class_table[$ret[0]]; # first char is non-starter
1969 return () if $compat == 1 && !defined $decomp_table[$ret[0]] &&
1970 defined $decomp_compat_table[$ret[0]]; # first char has compat decomposition
1971 return () if $compat == 2 && !defined $decomp_table[$ret[0]] &&
1972 defined $idna_decomp_table[$ret[0]]; # first char has IDNA decomposition
1973 return () if $compat == 2 && defined $idna_decomp_table[$ret[0]] &&
1974 defined $idna_decomp_table[$idna_decomp_table[$ret[0]]->[0]]; # first char's decomposition has IDNA decomposition
1975 return () if $compat == 2 && defined $idna_decomp_table[$ret[1]]; # second char has IDNA decomposition
1976 return @ret;
1979 ################################################################
1980 # recursively build decompositions
1981 sub build_decompositions(@)
1983 my @src = @_;
1984 my @dst;
1986 for (my $i = 0; $i < @src; $i++)
1988 next unless defined $src[$i];
1989 my @decomp = to_utf16( get_decomposition( $i, \@src ));
1990 $dst[$i] = \@decomp;
1992 return @dst;
1995 ################################################################
1996 # compose Hangul sequences
1997 sub compose_hangul(@)
1999 my $SBASE = 0xac00;
2000 my $LBASE = 0x1100;
2001 my $VBASE = 0x1161;
2002 my $TBASE = 0x11a7;
2003 my $LCOUNT = 19;
2004 my $VCOUNT = 21;
2005 my $TCOUNT = 28;
2006 my $NCOUNT = $VCOUNT * $TCOUNT;
2007 my $SCOUNT = $LCOUNT * $NCOUNT;
2009 my @seq = @_;
2010 my @ret;
2011 my $i;
2013 for ($i = 0; $i < @seq; $i++)
2015 my $ch = $seq[$i];
2016 if ($ch >= $LBASE && $ch < $LBASE + $LCOUNT && $i < @seq - 1 &&
2017 $seq[$i+1] >= $VBASE && $seq[$i+1] < $VBASE + $VCOUNT)
2019 $ch = $SBASE + (($seq[$i] - $LBASE) * $VCOUNT + ($seq[$i+1] - $VBASE)) * $TCOUNT;
2020 $i++;
2022 if ($ch >= $SBASE && $ch < $SBASE + $SCOUNT && !(($ch - $SBASE) % $TCOUNT) && $i < @seq - 1 &&
2023 $seq[$i+1] > $TBASE && $seq[$i+1] < $TBASE + $TCOUNT)
2025 $ch += $seq[$i+1] - $TBASE;
2026 $i++;
2028 push @ret, $ch;
2030 return @ret;
2033 ################################################################
2034 # remove linguistic-only mappings from the case table
2035 sub remove_linguistic_mappings($$)
2037 my ($upper, $lower) = @_;
2039 # remove case mappings that don't round-trip
2041 for (my $i = 0; $i < @{$upper}; $i++)
2043 next unless defined ${$upper}[$i];
2044 my $ch = ${$upper}[$i];
2045 ${$upper}[$i] = undef unless defined ${$lower}[$ch] && ${$lower}[$ch] == $i;
2047 for (my $i = 0; $i < @{$lower}; $i++)
2049 next unless defined ${$lower}[$i];
2050 my $ch = ${$lower}[$i];
2051 ${$lower}[$i] = undef unless defined ${$upper}[$ch] && ${$upper}[$ch] == $i;
2055 ################################################################
2056 # read in the Unicode database files
2057 sub load_data()
2059 my $start;
2061 # now build mappings from the decomposition field of the Unicode database
2063 my $UNICODE_DATA = open_data_file( "ucd", "UnicodeData.txt" );
2064 while (<$UNICODE_DATA>)
2066 # Decode the fields ...
2067 my ($code, $name, $cat, $comb, $bidi,
2068 $decomp, $dec, $dig, $num, $mirror,
2069 $oldname, $comment, $upper, $lower, $title) = split /;/;
2070 my $src = hex $code;
2072 die "unknown category $cat" unless defined $categories{$cat};
2073 die "unknown directionality $bidi" unless defined $directions{$bidi};
2075 $category_table[$src] = $categories{$cat};
2076 $direction_table[$src] = $bidi;
2077 if ($cat eq "Mn" || $cat eq "Me" || $cat eq "Cf")
2079 $initial_joining_table[$src] = $joining_types{"T"};
2081 else
2083 $initial_joining_table[$src] = $joining_types{"U"};
2086 if ($lower ne "")
2088 $tolower_table[$src] = hex $lower;
2090 if ($upper ne "")
2092 $toupper_table[$src] = hex $upper;
2094 if ($dec ne "")
2096 $category_table[$src] |= $ctype{"digit"};
2098 if ($dig ne "")
2100 $digitmap_table[$src] = ord $dig;
2102 $combining_class_table[$src] = ($cat ne "Co") ? $comb : 0x100; # Private Use
2104 $category_table[$src] |= $ctype{"nonspacing"} if $bidi eq "NSM";
2105 $category_table[$src] |= $ctype{"diacritic"} if $name =~ /^(COMBINING)|(MODIFIER LETTER)\W/;
2106 $category_table[$src] |= $ctype{"vowelmark"} if $name =~ /\sVOWEL/ || $oldname =~ /\sVOWEL/;
2107 $category_table[$src] |= $ctype{"halfwidth"} if $name =~ /^HALFWIDTH\s/;
2108 $category_table[$src] |= $ctype{"fullwidth"} if $name =~ /^FULLWIDTH\s/;
2109 $category_table[$src] |= $ctype{"hiragana"} if $name =~ /(HIRAGANA)|(\WKANA\W)/;
2110 $category_table[$src] |= $ctype{"katakana"} if $name =~ /(KATAKANA)|(\WKANA\W)/;
2111 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^<CJK Ideograph/;
2112 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^CJK COMPATIBILITY IDEOGRAPH/;
2113 $category_table[$src] |= $ctype{"ideograph"} if $name =~ /^HANGZHOU/;
2114 $category_table[$src] |= $ctype{"highsurrogate"} if $name =~ /High Surrogate/;
2115 $category_table[$src] |= $ctype{"lowsurrogate"} if $name =~ /Low Surrogate/;
2117 # copy the category and direction for everything between First/Last pairs
2118 if ($name =~ /, First>/) { $start = $src; }
2119 if ($name =~ /, Last>/)
2121 while ($start < $src)
2123 $category_table[$start] = $category_table[$src];
2124 $direction_table[$start] = $direction_table[$src];
2125 $combining_class_table[$start] = $combining_class_table[$src];
2126 $start++;
2130 next if $decomp eq ""; # no decomposition, skip it
2132 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)/)
2134 my @seq = map { hex $_; } (split /\s+/, (split /\s+/, $decomp, 2)[1]);
2135 $decomp_compat_table[$src] = \@seq;
2138 if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
2140 # decomposition of the form "<foo> 1234" -> use char if type is known
2141 my $dst = hex $2;
2142 if ($1 eq "narrow")
2144 $halfwidth_table[$dst] = $src;
2145 $fullwidth_table[$src] = $dst;
2147 elsif ($1 eq "wide")
2149 next if $dst == 0x5c; # don't remap backslash
2150 $fullwidth_table[$dst] = $src;
2151 $halfwidth_table[$src] = $dst;
2153 elsif ($1 eq "font" || $1 eq "square" || $1 eq "circle")
2155 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2157 elsif ($1 eq "isolated" || $1 eq "final" || $1 eq "initial" || $1 eq "medial")
2159 ${joining_forms{$1}}[$dst] = $src;
2162 elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
2164 # decomposition "<compat> 0020 1234" -> combining accent
2166 elsif ($decomp =~ /^([0-9a-fA-F]+)/)
2168 # store decomposition
2169 if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
2171 $decomp_table[$src] = $decomp_compat_table[$src] = [ hex $1, hex $2 ];
2173 elsif ($decomp =~ /^([0-9a-fA-F]+)$/)
2175 my $dst = hex $1;
2176 # Single char decomposition
2177 $decomp_table[$src] = $decomp_compat_table[$src] = [ $dst ];
2178 if ($name =~ /^CJK COMPATIBILITY IDEOGRAPH/)
2180 $cjk_compat_table[$src] = $dst;
2181 $fullwidth_table[$src] = $dst if $src >= 0x10000;
2186 close $UNICODE_DATA;
2188 # patch the category of some special characters
2190 for (my $i = 0; $i < @decomp_table; $i++)
2192 next unless defined $decomp_table[$i];
2193 $category_table[$i] |= $category_table[$decomp_table[$i]->[0]];
2195 foreach my $cat (keys %special_categories)
2197 my $flag = $ctype{$cat};
2198 foreach my $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
2200 for (my $i = 0; $i < @decomp_compat_table; $i++)
2202 next unless defined $decomp_compat_table[$i];
2203 next unless @{$decomp_compat_table[$i]} == 2;
2204 $category_table[$i] |= $category_table[$decomp_compat_table[$i]->[1]] & $ctype{"diacritic"};
2207 # load the composition exclusions
2209 my $EXCL = open_data_file( "ucd", "CompositionExclusions.txt" );
2210 while (<$EXCL>)
2212 s/\#.*//; # remove comments
2213 if (/^([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)\s*$/)
2215 foreach my $i (hex $1 .. hex $2) { $comp_exclusions[$i] = 1; }
2217 elsif (/^([0-9a-fA-F]+)\s*$/)
2219 $comp_exclusions[hex $1] = 1;
2222 close $EXCL;
2224 # load the IDNA mappings
2226 @idna_decomp_table = @decomp_compat_table;
2227 my $IDNA = open_data_file( "idna", "IdnaMappingTable.txt" );
2228 while (<$IDNA>)
2230 s/\#.*//; # remove comments
2231 next if /^\s*$/;
2232 my ($char, $type, $mapping) = split /;/;
2233 my ($ch1, $ch2);
2234 if ($char =~ /([0-9a-fA-F]+)\.\.([0-9a-fA-F]+)/)
2236 $ch1 = hex $1;
2237 $ch2 = hex $2;
2239 elsif ($char =~ /([0-9a-fA-F]+)/)
2241 $ch1 = $ch2 = hex $1;
2244 if ($type =~ /mapped/ || $type =~ /deviation/)
2246 $mapping =~ s/^\s*(([0-9a-fA-F]+\s+)+)\s*$/$1/;
2247 my @seq = map { hex $_; } split /\s+/, $mapping;
2248 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = @seq ? \@seq : [ 0 ]; }
2250 elsif ($type =~ /valid/)
2253 elsif ($type =~ /ignored/)
2255 foreach my $i ($ch1 .. $ch2) { $idna_decomp_table[$i] = [ 0 ]; }
2257 elsif ($type =~ /disallowed/)
2259 foreach my $i ($ch1 .. $ch2)
2261 $idna_decomp_table[$i] = undef;
2262 $idna_disallowed[$i] = 1;
2266 close $IDNA;
2268 # load the Unihan mappings
2270 my $UNIHAN = open_data_file( "unihan", "Unihan_Variants.txt" );
2271 while (<$UNIHAN>)
2273 s/\#.*//; # remove comments
2274 next if /^\s*$/;
2275 if (/^U\+([0-9a-fA-F]{4})\s+kTraditionalVariant\s+U\+([0-9a-fA-F]{4})$/)
2277 next if hex $1 < 0x4dc0; # skip extension A
2278 $chinese_traditional_table[hex $1] = hex $2;
2280 elsif (/^U\+([0-9a-fA-F]{4})\s+kSimplifiedVariant\s+U\+([0-9a-fA-F]{4})$/)
2282 next if hex $1 < 0x4dc0; # skip extension A
2283 $chinese_simplified_table[hex $1] = hex $2;
2286 close $UNIHAN;
2287 foreach my $i (0xf900..0xfaff)
2289 next unless defined $cjk_compat_table[$i];
2290 next if defined $chinese_simplified_table[$cjk_compat_table[$i]];
2291 $chinese_simplified_table[$i] = $cjk_compat_table[$i];
2296 ################################################################
2297 # add a new registry key
2298 sub add_registry_key($$$)
2300 my ($base, $key, $defval) = @_;
2301 $registry_keys{"$base\\$key"} = [ $defval ] unless defined $registry_keys{"$base\\$key"};
2304 ################################################################
2305 # add a new registry value with explicit type
2306 sub add_registry_value($$$$)
2308 my ($base, $key, $name, $value) = @_;
2309 add_registry_key( $base, $key, undef );
2310 push @{$registry_keys{"$base\\$key"}}, "'$name' = $value";
2313 ################################################################
2314 # add a new registry string value
2315 sub add_registry_string_value($$$$)
2317 my ($base, $key, $name, $value) = @_;
2318 $value =~ s/\'/\'\'/g;
2319 add_registry_value( $base, $key, $name, "s '$value'" );
2322 ################################################################
2323 # add a new registry dword value
2324 sub add_registry_dword_value($$$$)
2326 my ($base, $key, $name, $value) = @_;
2327 add_registry_value( $base, $key, $name, "d $value" );
2330 ################################################################
2331 # add a new registry binary value
2332 sub add_registry_binary_value($$$$)
2334 my ($base, $key, $name, $value) = @_;
2335 add_registry_value( $base, $key, $name, "b " . join "", map { sprintf "%02x", $_; } unpack( "C*", $value ));
2338 ################################################################
2339 # define a new lead byte
2340 sub add_lead_byte($)
2342 my $ch = shift;
2343 return if defined $cp2uni[$ch];
2344 push @lead_bytes, $ch;
2345 $cp2uni[$ch] = 0;
2348 ################################################################
2349 # define a new char mapping
2350 sub add_mapping($$)
2352 my ($cp, $uni) = @_;
2353 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
2354 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
2355 if ($cp > 0xff) { add_lead_byte( $cp >> 8 ); }
2358 ################################################################
2359 # get a mapping including glyph chars for MB_USEGLYPHCHARS
2360 sub get_glyphs_mapping(@)
2362 my @table = @_;
2364 for (my $i = 0; $i < @glyph2uni; $i++)
2366 $table[$i] = $glyph2uni[$i] if defined $glyph2uni[$i];
2368 return @table;
2371 ################################################################
2372 # build EUC-JP table from the JIS 0208/0212 files
2373 sub dump_eucjp_codepage()
2375 @cp2uni = ();
2376 @glyph2uni = ();
2377 @lead_bytes = ();
2378 @uni2cp = ();
2379 $default_char = $DEF_CHAR;
2380 $default_wchar = 0x30fb;
2382 # ASCII chars
2383 foreach my $i (0x00 .. 0x7f) { add_mapping( $i, $i ); }
2385 # lead bytes
2386 foreach my $i (0x8e, 0xa1 .. 0xfe) { add_lead_byte($i); }
2388 # JIS X 0201 right plane
2389 foreach my $i (0xa1 .. 0xdf) { add_mapping( 0x8e00 + $i, 0xfec0 + $i ); }
2391 # undefined chars
2392 foreach my $i (0x80 .. 0x8d, 0x8f .. 0x9f) { $cp2uni[$i] = $i; }
2393 $cp2uni[0xa0] = 0xf8f0;
2394 $cp2uni[0xff] = 0xf8f3;
2396 # Fix backslash conversion
2397 add_mapping( 0xa1c0, 0xff3c );
2399 # Add private mappings for rows undefined in JIS 0208/0212
2400 my $private = 0xe000;
2401 foreach my $hi (0xf5 .. 0xfe)
2403 foreach my $lo (0xa1 .. 0xfe)
2405 add_mapping( ($hi << 8) + $lo, $private++ );
2408 foreach my $hi (0xf5 .. 0xfe)
2410 foreach my $lo (0x21 .. 0x7e)
2412 add_mapping( ($hi << 8) + $lo, $private++ );
2416 my $INPUT = open_data_file( "jis0208" );
2417 while (<$INPUT>)
2419 next if /^\#/; # skip comments
2420 next if /^$/; # skip empty lines
2421 next if /\x1a/; # skip ^Z
2422 if (/^0x[0-9a-fA-F]+\s+0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2424 add_mapping( 0x8080 + hex $1, hex $2 );
2425 next;
2427 die "Unrecognized line $_\n";
2429 close $INPUT;
2431 $INPUT = open_data_file( "jis0212" );
2432 while (<$INPUT>)
2434 next if /^\#/; # skip comments
2435 next if /^$/; # skip empty lines
2436 next if /\x1a/; # skip ^Z
2437 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2439 add_mapping( 0x8000 + hex $1, hex $2 );
2440 next;
2442 die "Unrecognized line $_\n";
2444 close $INPUT;
2446 output_codepage_file( 20932 );
2449 ################################################################
2450 # build Korean Wansung table from the KSX1001 file
2451 sub dump_krwansung_codepage(@)
2453 my @cp949 = @_;
2454 @cp2uni = ();
2455 @glyph2uni = ();
2456 @lead_bytes = ();
2457 @uni2cp = ();
2458 $default_char = 0x3f;
2459 $default_wchar = 0x003f;
2461 # ASCII and undefined chars
2462 foreach my $i (0x00 .. 0x9f) { add_mapping( $i, $i ); }
2463 add_mapping( 0xa0, 0xf8e6 );
2464 add_mapping( 0xad, 0xf8e7 );
2465 add_mapping( 0xae, 0xf8e8 );
2466 add_mapping( 0xaf, 0xf8e9 );
2467 add_mapping( 0xfe, 0xf8ea );
2468 add_mapping( 0xff, 0xf8eb );
2470 my $INPUT = open_data_file( "ksx1001" );
2471 while (<$INPUT>)
2473 next if /^\#/; # skip comments
2474 next if /^$/; # skip empty lines
2475 next if /\x1a/; # skip ^Z
2476 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
2478 add_mapping( 0x8080 + hex $1, hex $2 );
2479 next;
2481 die "Unrecognized line $_\n";
2483 close $INPUT;
2485 # get some extra mappings from cp 949
2486 my @defined_lb;
2487 map { $defined_lb[$_] = 1; } @lead_bytes;
2488 foreach my $i (0x0000 .. 0xffff)
2490 next if ($i >= 0x1100 && $i <= 0x11ff); # range not used in 20949
2491 next unless defined $cp949[$i];
2492 if ($cp949[$i] >= 0xff)
2494 # only add chars for lead bytes that exist in 20949
2495 my $hi = $cp949[$i] >> 8;
2496 my $lo = $cp949[$i] & 0xff;
2497 next unless $defined_lb[$hi];
2498 next unless $lo >= 0xa1 && $lo <= 0xfe;
2500 add_mapping( $cp949[$i], $i );
2503 output_codepage_file( 20949 );
2507 ################################################################
2508 # dump an array of integers
2509 sub dump_array($$@)
2511 my ($bit_width, $default, @array) = @_;
2512 my $format = sprintf "0x%%0%ux", $bit_width / 4;
2513 my $i;
2514 my $ret = " ";
2515 for ($i = 0; $i < $#array; $i++)
2517 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2518 $ret .= (($i % 8) != 7) ? ", " : ",\n ";
2520 $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
2521 return $ret;
2525 ################################################################
2526 # dump an SBCS mapping table in binary format
2527 sub dump_binary_sbcs_table($)
2529 my $codepage = shift;
2531 my @header = ( 13, $codepage, 1, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2532 my $wc_offset = 256 + 3 + (@glyph2uni ? 256 : 0);
2534 print OUTPUT pack "S<*", @header;
2535 print OUTPUT pack "C12", (0) x 12;
2536 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2538 if (@glyph2uni)
2540 print OUTPUT pack "S<*", 256, get_glyphs_mapping(@cp2uni[0 .. 255]);
2542 else
2544 print OUTPUT pack "S<*", 0;
2547 print OUTPUT pack "S<*", 0, 0;
2549 print OUTPUT pack "C*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2553 ################################################################
2554 # dump a DBCS mapping table in binary format
2555 sub dump_binary_dbcs_table($)
2557 my $codepage = shift;
2558 my @lb_ranges = get_lb_ranges();
2559 my @header = ( 13, $codepage, 2, $default_char, $default_wchar, $cp2uni[$default_char], $uni2cp[$default_wchar] );
2561 my @offsets = (0) x 256;
2562 my $pos = 0;
2563 foreach my $i (@lead_bytes)
2565 $offsets[$i] = ($pos += 256);
2566 $cp2uni[$i] = 0;
2569 my $wc_offset = 256 + 3 + 256 * (1 + scalar @lead_bytes);
2571 print OUTPUT pack "S<*", @header;
2572 print OUTPUT pack "C12", @lb_ranges, 0 x 12;
2573 print OUTPUT pack "S<*", $wc_offset, map { $_ || 0; } @cp2uni[0 .. 255];
2574 print OUTPUT pack "S<*", 0, scalar @lb_ranges / 2, @offsets;
2576 foreach my $i (@lead_bytes)
2578 my $base = $i << 8;
2579 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_wchar; } @cp2uni[$base .. $base + 255];
2582 print OUTPUT pack "S<", 4;
2583 print OUTPUT pack "S<*", map { defined $_ ? $_ : $default_char; } @uni2cp[0 .. 65535];
2587 ################################################################
2588 # get the list of defined lead byte ranges
2589 sub get_lb_ranges()
2591 my @list = ();
2592 my @ranges = ();
2594 foreach my $i (@lead_bytes) { $list[$i] = 1; }
2595 my $on = 0;
2596 for (my $i = 0; $i < 256; $i++)
2598 if ($on)
2600 if (!defined $list[$i]) { push @ranges, $i-1; $on = 0; }
2602 else
2604 if ($list[$i]) { push @ranges, $i; $on = 1; }
2607 if ($on) { push @ranges, 0xff; }
2608 return @ranges;
2611 ################################################################
2612 # dump the Indic Syllabic Category table
2613 sub dump_indic($)
2615 my $filename = shift;
2616 my @indic_table;
2618 my $INPUT = open_data_file( "ucd", "IndicSyllabicCategory.txt" );
2619 while (<$INPUT>)
2621 next if /^\#/; # skip comments
2622 next if /^\s*$/; # skip empty lines
2623 next if /\x1a/; # skip ^Z
2624 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2626 my $type = $2;
2627 die "unknown indic $type" unless defined $indic_types{$type};
2628 if (hex $1 < 65536)
2630 $indic_table[hex $1] = $indic_types{$type};
2632 next;
2634 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2636 my $type = $3;
2637 die "unknown indic $type" unless defined $indic_types{$type};
2638 if (hex $1 < 65536 and hex $2 < 65536)
2640 foreach my $i (hex $1 .. hex $2)
2642 $indic_table[$i] = $indic_types{$type};
2645 next;
2647 die "malformed line $_";
2649 close $INPUT;
2651 my $prev_data_file = $current_data_file;
2652 $INPUT = open_data_file( "ucd", "IndicPositionalCategory.txt" );
2653 while (<$INPUT>)
2655 next if /^\#/; # skip comments
2656 next if /^\s*$/; # skip empty lines
2657 next if /\x1a/; # skip ^Z
2658 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*#/)
2660 my $type = $2;
2661 die "unknown matra $type" unless defined $matra_types{$type};
2662 $indic_table[hex $1] |= $matra_types{$type} << 8;
2663 next;
2665 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*#/)
2667 my $type = $3;
2668 die "unknown matra $type" unless defined $matra_types{$type};
2669 foreach my $i (hex $1 .. hex $2)
2671 $indic_table[$i] |= $matra_types{$type} << 8;
2673 next;
2675 die "malformed line $_";
2677 close $INPUT;
2679 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2680 print "Building $filename\n";
2681 print OUTPUT "/* Unicode Indic Syllabic Category */\n";
2682 print OUTPUT "/* generated from $prev_data_file */\n";
2683 print OUTPUT "/* and from $current_data_file */\n";
2684 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2685 print OUTPUT "#include \"windef.h\"\n\n";
2687 dump_two_level_mapping( "indic_syllabic_table", $indic_types{'Other'}, 16, @indic_table );
2689 close OUTPUT;
2690 save_file($filename);
2693 ################################################################
2694 # dump the Line Break Properties table
2695 sub dump_linebreak($)
2697 my $filename = shift;
2698 my @break_table;
2700 my $INPUT = open_data_file( "ucd", "LineBreak.txt" );
2701 while (<$INPUT>)
2703 next if /^\#/; # skip comments
2704 next if /^\s*$/; # skip empty lines
2705 next if /\x1a/; # skip ^Z
2706 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2708 my $type = $2;
2709 die "unknown breaktype $type" unless defined $break_types{$type};
2710 $break_table[hex $1] = $break_types{$type};
2711 next;
2713 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z][0-9A-Z])+\s*/)
2715 my $type = $3;
2716 die "unknown breaktype $type" unless defined $break_types{$type};
2717 foreach my $i (hex $1 .. hex $2)
2719 $break_table[$i] = $break_types{$type};
2721 next;
2723 elsif (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2725 my $type = $2;
2726 die "unknown breaktype $type" unless defined $break_types{$type};
2727 $break_table[hex $1] = $break_types{$type};
2728 next;
2730 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([0-9A-Z][0-9A-Z])+\s*/)
2732 my $type = $3;
2733 die "unknown breaktype $type" unless defined $break_types{$type};
2734 foreach my $i (hex $1 .. hex $2)
2736 $break_table[$i] = $break_types{$type};
2738 next;
2740 die "malformed line $_";
2742 close $INPUT;
2744 open OUTPUT,">$filename.new" or die "Cannot create $filename";
2745 print "Building $filename\n";
2746 print OUTPUT "/* Unicode Line Break Properties */\n";
2747 print OUTPUT "/* generated from $current_data_file */\n";
2748 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2749 print OUTPUT "#include \"windef.h\"\n\n";
2751 dump_two_level_mapping( "wine_linebreak_table", $break_types{'XX'}, 16, @break_table );
2753 close OUTPUT;
2754 save_file($filename);
2757 my %scripts =
2759 "Unknown" => 0,
2760 "Common" => 1,
2761 "Inherited" => 2,
2762 "Arabic" => 3,
2763 "Armenian" => 4,
2764 "Avestan" => 5,
2765 "Balinese" => 6,
2766 "Bamum" => 7,
2767 "Batak" => 8,
2768 "Bengali" => 9,
2769 "Bopomofo" => 10,
2770 "Brahmi" => 11,
2771 "Braille" => 12,
2772 "Buginese" => 13,
2773 "Buhid" => 14,
2774 "Canadian_Aboriginal" => 15,
2775 "Carian" => 16,
2776 "Cham" => 17,
2777 "Cherokee" => 18,
2778 "Coptic" => 19,
2779 "Cuneiform" => 20,
2780 "Cypriot" => 21,
2781 "Cyrillic" => 22,
2782 "Deseret" => 23,
2783 "Devanagari" => 24,
2784 "Egyptian_Hieroglyphs" => 25,
2785 "Ethiopic" => 26,
2786 "Georgian" => 27,
2787 "Glagolitic" => 28,
2788 "Gothic" => 29,
2789 "Greek" => 30,
2790 "Gujarati" => 31,
2791 "Gurmukhi" => 32,
2792 "Han" => 33,
2793 "Hangul" => 34,
2794 "Hanunoo" => 35,
2795 "Hebrew" => 36,
2796 "Hiragana" => 37,
2797 "Imperial_Aramaic" => 38,
2798 "Inscriptional_Pahlavi" => 39,
2799 "Inscriptional_Parthian" => 40,
2800 "Javanese" => 41,
2801 "Kaithi" => 42,
2802 "Kannada" => 43,
2803 "Katakana" => 44,
2804 "Kayah_Li" => 45,
2805 "Kharoshthi" => 46,
2806 "Khmer" => 47,
2807 "Lao" => 48,
2808 "Latin" => 49,
2809 "Lepcha" => 50,
2810 "Limbu" => 51,
2811 "Linear_B" => 52,
2812 "Lisu" => 53,
2813 "Lycian" => 54,
2814 "Lydian" => 55,
2815 "Malayalam" => 56,
2816 "Mandaic" => 57,
2817 "Meetei_Mayek" => 58,
2818 "Mongolian" => 59,
2819 "Myanmar" => 60,
2820 "New_Tai_Lue" => 61,
2821 "Nko" => 62,
2822 "Ogham" => 63,
2823 "Ol_Chiki" => 64,
2824 "Old_Italic" => 65,
2825 "Old_Persian" => 66,
2826 "Old_South_Arabian" => 67,
2827 "Old_Turkic" => 68,
2828 "Oriya" => 69,
2829 "Osmanya" => 70,
2830 "Phags_Pa" => 71,
2831 "Phoenician" => 72,
2832 "Rejang" => 73,
2833 "Runic" => 74,
2834 "Samaritan" => 75,
2835 "Saurashtra" => 76,
2836 "Shavian" => 77,
2837 "Sinhala" => 78,
2838 "Sundanese" => 79,
2839 "Syloti_Nagri" => 80,
2840 "Syriac" => 81,
2841 "Tagalog" => 82,
2842 "Tagbanwa" => 83,
2843 "Tai_Le" => 84,
2844 "Tai_Tham" => 85,
2845 "Tai_Viet" => 86,
2846 "Tamil" => 87,
2847 "Telugu" => 88,
2848 "Thaana" => 89,
2849 "Thai" => 90,
2850 "Tibetan" => 91,
2851 "Tifinagh" => 92,
2852 "Ugaritic" => 93,
2853 "Vai" => 94,
2854 "Yi" => 95,
2855 # Win8/Win8.1
2856 "Chakma" => 96,
2857 "Meroitic_Cursive" => 97,
2858 "Meroitic_Hieroglyphs" => 98,
2859 "Miao" => 99,
2860 "Sharada" => 100,
2861 "Sora_Sompeng" => 101,
2862 "Takri" => 102,
2863 # Win10
2864 "Bassa_Vah" => 103,
2865 "Caucasian_Albanian" => 104,
2866 "Duployan" => 105,
2867 "Elbasan" => 106,
2868 "Grantha" => 107,
2869 "Khojki" => 108,
2870 "Khudawadi" => 109,
2871 "Linear_A" => 110,
2872 "Mahajani" => 111,
2873 "Manichaean" => 112,
2874 "Mende_Kikakui" => 113,
2875 "Modi" => 114,
2876 "Mro" => 115,
2877 "Nabataean" => 116,
2878 "Old_North_Arabian" => 117,
2879 "Old_Permic" => 118,
2880 "Pahawh_Hmong" => 119,
2881 "Palmyrene" => 120,
2882 "Pau_Cin_Hau" => 121,
2883 "Psalter_Pahlavi" => 122,
2884 "Siddham" => 123,
2885 "Tirhuta" => 124,
2886 "Warang_Citi" => 125,
2887 # Win10 RS1
2888 "Adlam" => 126,
2889 "Ahom" => 127,
2890 "Anatolian_Hieroglyphs" => 128,
2891 "Bhaiksuki" => 129,
2892 "Hatran" => 130,
2893 "Marchen" => 131,
2894 "Multani" => 132,
2895 "Newa" => 133,
2896 "Old_Hungarian" => 134,
2897 "Osage" => 135,
2898 "SignWriting" => 136,
2899 "Tangut" => 137,
2900 # Win10 RS4
2901 "Masaram_Gondi" => 138,
2902 "Nushu" => 139,
2903 "Soyombo" => 140,
2904 "Zanabazar_Square" => 141,
2905 # Win10 1903
2906 "Dogra" => 142,
2907 "Gunjala_Gondi" => 143,
2908 "Hanifi_Rohingya" => 144,
2909 "Makasar" => 145,
2910 "Medefaidrin" => 146,
2911 "Old_Sogdian" => 147,
2912 "Sogdian" => 148,
2913 # Win10 2004
2914 "Elymaic" => 149,
2915 "Nyiakeng_Puachue_Hmong" => 150,
2916 "Nandinagari" => 151,
2917 "Wancho" => 152,
2918 # Win11
2919 "Chorasmian" => 153,
2920 "Dives_Akuru" => 154,
2921 "Khitan_Small_Script" => 155,
2922 "Yezidi" => 156,
2925 ################################################################
2926 # dump Script IDs table
2927 sub dump_scripts($)
2929 my $filename = shift;
2930 my $header = $filename;
2931 my @scripts_table;
2932 my $script_index;
2933 my $i;
2935 my $INPUT = open_data_file( "ucd", "Scripts.txt" );
2936 # Fill the table
2937 # Unknown script id is always 0, so undefined scripts are automatically treated as such
2938 while (<$INPUT>)
2940 my $type = "";
2942 next if /^\#/; # skip comments
2943 next if /^\s*$/; # skip empty lines
2944 next if /\x1a/; # skip ^Z
2945 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2947 $type = $2;
2948 if (defined $scripts{$type})
2950 $scripts_table[hex $1] = $scripts{$type};
2952 next;
2954 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
2956 $type = $3;
2957 if (defined $scripts{$type})
2959 foreach my $i (hex $1 .. hex $2)
2961 $scripts_table[$i] = $scripts{$type};
2964 next;
2968 close $INPUT;
2970 $header = "$filename.h";
2971 open OUTPUT,">$header.new" or die "Cannot create $header";
2972 print "Building $header\n";
2973 print OUTPUT "/* Unicode Script IDs */\n";
2974 print OUTPUT "/* generated from $current_data_file */\n";
2975 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2977 print OUTPUT "enum unicode_script_id {\n";
2978 foreach my $script (sort { $scripts{$a} <=> $scripts{$b} } keys %scripts)
2980 print OUTPUT " Script_$script = $scripts{$script},\n";
2982 print OUTPUT " Script_LastId = ", (scalar keys %scripts) - 1, "\n";
2983 print OUTPUT "};\n";
2985 close OUTPUT;
2986 save_file($header);
2988 $filename = "$filename.c";
2989 open OUTPUT,">$filename.new" or die "Cannot create $header";
2990 print "Building $filename\n";
2991 print OUTPUT "/* Unicode Script IDs */\n";
2992 print OUTPUT "/* generated from $current_data_file */\n";
2993 print OUTPUT "/* DO NOT EDIT!! */\n\n";
2994 print OUTPUT "#include \"windef.h\"\n\n";
2996 dump_two_level_mapping( "wine_scripts_table", 0, 16, @scripts_table );
2997 close OUTPUT;
2998 save_file($filename);
3001 ################################################################
3002 # dump the BiDi mirroring table
3003 sub dump_mirroring($)
3005 my $filename = shift;
3006 my @mirror_table = ();
3008 my $INPUT = open_data_file( "ucd", "BidiMirroring.txt" );
3009 while (<$INPUT>)
3011 next if /^\#/; # skip comments
3012 next if /^$/; # skip empty lines
3013 next if /\x1a/; # skip ^Z
3014 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+)/)
3016 $mirror_table[hex $1] = hex $2;
3017 next;
3019 die "malformed line $_";
3021 close $INPUT;
3023 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3024 print "Building $filename\n";
3025 print OUTPUT "/* Unicode BiDi mirroring */\n";
3026 print OUTPUT "/* generated from $current_data_file */\n";
3027 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3028 print OUTPUT "#include \"windef.h\"\n\n";
3029 dump_two_level_mapping( "wine_mirror_map", 0, 16, @mirror_table );
3030 close OUTPUT;
3031 save_file($filename);
3034 ################################################################
3035 # dump the Bidi Brackets
3036 sub dump_bracket($)
3038 my $filename = shift;
3039 my @bracket_table;
3041 my $INPUT = open_data_file( "ucd", "BidiBrackets.txt" );
3042 while (<$INPUT>)
3044 next if /^\#/; # skip comments
3045 next if /^\s*$/; # skip empty lines
3046 next if /\x1a/; # skip ^Z
3047 if (/^\s*([0-9a-fA-F]+)\s*;\s*([0-9a-fA-F]+);\s*([con])/)
3049 my $type = $3;
3050 die "unknown bracket $type" unless defined $bracket_types{$type};
3051 die "characters too distant $1 and $2" if abs(hex($2) - hex($1)) >= 128;
3052 $bracket_table[hex $1] = (hex($2) - hex($1)) % 255;
3053 $bracket_table[hex $1] += $bracket_types{$type} << 8;
3054 next;
3056 die "malformed line $_";
3058 close $INPUT;
3060 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3061 print "Building $filename\n";
3062 print OUTPUT "/* Unicode Bidirectional Bracket table */\n";
3063 print OUTPUT "/* generated from $current_data_file */\n";
3064 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3065 print OUTPUT "#include \"windef.h\"\n\n";
3067 dump_two_level_mapping( "bidi_bracket_table", 0, 16, @bracket_table );
3069 close OUTPUT;
3070 save_file($filename);
3073 ################################################################
3074 # dump the Arabic shaping table
3075 sub dump_shaping($)
3077 my $filename = shift;
3078 my @joining_table = @initial_joining_table;
3080 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3081 while (<$INPUT>)
3083 next if /^\#/; # skip comments
3084 next if /^\s*$/; # skip empty lines
3085 next if /\x1a/; # skip ^Z
3086 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3088 my $type = $2;
3089 $joining_table[hex $1] = $joining_types{$type};
3090 next;
3092 die "malformed line $_";
3094 close $INPUT;
3096 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3097 print "Building $filename\n";
3098 print OUTPUT "/* Unicode Arabic shaping */\n";
3099 print OUTPUT "/* generated from $current_data_file */\n";
3100 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3101 print OUTPUT "#include \"windef.h\"\n\n";
3103 dump_two_level_mapping( "wine_shaping_table", 0, 16, @joining_table );
3105 print OUTPUT "\nconst unsigned short DECLSPEC_HIDDEN wine_shaping_forms[256][4] =\n{\n";
3106 for (my $i = 0x600; $i <= 0x6ff; $i++)
3108 printf OUTPUT " { 0x%04x, 0x%04x, 0x%04x, 0x%04x },\n",
3109 ${joining_forms{"isolated"}}[$i] || $i,
3110 ${joining_forms{"final"}}[$i] || $i,
3111 ${joining_forms{"initial"}}[$i] || $i,
3112 ${joining_forms{"medial"}}[$i] || $i;
3114 print OUTPUT "};\n";
3116 close OUTPUT;
3117 save_file($filename);
3120 ################################################################
3121 # dump the Arabic shaping table
3122 sub dump_arabic_shaping($)
3124 my $filename = shift;
3125 my @joining_table = @initial_joining_table;
3127 my $INPUT = open_data_file( "ucd", "ArabicShaping.txt" );
3128 while (<$INPUT>)
3130 next if /^\#/; # skip comments
3131 next if /^\s*$/; # skip empty lines
3132 next if /\x1a/; # skip ^Z
3133 if (/^\s*([0-9a-fA-F]+)\s*;.*;\s*([RLDCUT])\s*;\s*(\w+)/)
3135 my $type = $2;
3136 my $group = $3;
3138 if ($group eq "ALAPH" || $group eq "DALATH RISH")
3140 $joining_table[hex $1] = $joining_types{$group};
3142 else
3144 $joining_table[hex $1] = $joining_types{$type};
3147 next;
3149 die "malformed line $_";
3151 close $INPUT;
3153 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3154 print "Building $filename\n";
3155 print OUTPUT "/* Unicode Arabic shaping */\n";
3156 print OUTPUT "/* generated from $current_data_file */\n";
3157 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3158 print OUTPUT "#include \"windef.h\"\n\n";
3160 dump_two_level_mapping( "arabic_shaping_table", 0, 16, @joining_table );
3162 close OUTPUT;
3163 save_file($filename);
3166 ################################################################
3167 # dump the Vertical Orientation table
3168 sub dump_vertical($$)
3170 my ($filename, $unix) = @_;
3171 my @vertical_table;
3173 my $INPUT = open_data_file( "ucd", "VerticalOrientation.txt" );
3174 while (<$INPUT>)
3176 next if /^\#/; # skip comments
3177 next if /^\s*$/; # skip empty lines
3178 next if /\x1a/; # skip ^Z
3179 if (/^\s*([0-9a-fA-F]+)\s*;\s*([a-zA-Z_]+)\s*/)
3181 my $type = $2;
3182 die "unknown vertical $type" unless defined $vertical_types{$type};
3183 if (hex $1 < 65536)
3185 $vertical_table[hex $1] = $vertical_types{$type};
3187 next;
3189 elsif (/^\s*([0-9a-fA-F]+)\.\.\s*([0-9a-fA-F]+)\s*;\s*([A-Za-z_]+)\s*/)
3191 my $type = $3;
3192 die "unknown vertical $type" unless defined $vertical_types{$type};
3193 foreach my $i (hex $1 .. hex $2)
3195 $vertical_table[$i] = $vertical_types{$type};
3197 next;
3199 die "malformed line $_";
3201 close $INPUT;
3203 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3204 print "Building $filename\n";
3205 print OUTPUT "/* Unicode Vertical Orientation */\n";
3206 print OUTPUT "/* generated from $current_data_file */\n";
3207 print OUTPUT "/* DO NOT EDIT!! */\n\n";
3208 if ($unix)
3210 print OUTPUT "#if 0\n";
3211 print OUTPUT "#pragma makedep unix\n";
3212 print OUTPUT "#endif\n\n";
3214 print OUTPUT "#include \"windef.h\"\n\n";
3216 dump_two_level_mapping( "vertical_orientation_table", $vertical_types{'R'}, 16, @vertical_table );
3218 close OUTPUT;
3219 save_file($filename);
3222 ################################################################
3223 # compress a mapping table by removing identical rows
3224 sub compress_array($$@)
3226 my $rows = shift;
3227 my $def = shift;
3228 my @table = @_;
3229 my $len = @table / $rows;
3230 my @array;
3231 my $data = "";
3233 # try to merge table rows
3234 for (my $row = 0; $row < $rows; $row++)
3236 my $rowtxt = pack "U*", map { defined($_) ? $_ : $def; } @table[($row * $len)..(($row + 1) * $len - 1)];
3237 my $pos = index $data, $rowtxt;
3238 if ($pos == -1)
3240 # check if the tail of the data can match the start of the new row
3241 my $first = substr( $rowtxt, 0, 1 );
3242 for (my $i = length($data) - 1; $i > 0; $i--)
3244 $pos = index( substr( $data, -$i ), $first );
3245 last if $pos == -1;
3246 $i -= $pos;
3247 next unless substr( $data, -$i ) eq substr( $rowtxt, 0, $i );
3248 substr( $data, -$i ) = "";
3249 last;
3251 $pos = length $data;
3252 $data .= $rowtxt;
3254 $array[$row] = $rows + $pos;
3256 return @array, unpack "U*", $data;
3259 ################################################################
3260 # dump a char -> 16-bit value mapping table using two-level tables
3261 sub dump_two_level_mapping($$$@)
3263 my $name = shift;
3264 my $def = shift;
3265 my $size = shift;
3266 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3267 my (@array, @row_array, @data, @row_data);
3268 (@row_array[0..4095], @data) = compress_array( 4096, $def, @_[0..65535] );
3269 (@array[0..255], @row_data) = compress_array( 256, 0, @row_array );
3271 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += @row_data + 256 - 4096; }
3273 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%d] =\n{\n", $type, $name, @array + @row_data + @data;
3274 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array );
3275 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @row_data );
3276 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @data );
3279 ################################################################
3280 # dump a char -> value mapping table using three-level tables
3281 sub dump_three_level_mapping($$@)
3283 my $name = shift;
3284 my $def = shift;
3285 my $size = shift;
3286 my $type = $size == 16 ? "unsigned short" : "unsigned int";
3287 my $level3 = ($MAX_CHAR + 1) / 16;
3288 my $level2 = $level3 / 16;
3289 my $level1 = $level2 / 16;
3290 my @array3 = compress_array( $level3, $def, @_[0..$MAX_CHAR] );
3291 my @array2 = compress_array( $level2, 0, @array3[0..$level3-1] );
3292 my @array1 = compress_array( $level1, 0, @array2[0..$level2-1] );
3294 for (my $i = $level2; $i < @array2; $i++) { $array2[$i] += @array1 + @array2 - $level2 - $level3; }
3295 for (my $i = $level1; $i < @array1; $i++) { $array1[$i] += @array1 - $level2; }
3297 printf OUTPUT "const %s DECLSPEC_HIDDEN %s[%u] =\n{\n", $type, $name, @array1 + (@array2 - $level2) + (@array3 - $level3);
3298 printf OUTPUT " /* level 1 offsets */\n%s,\n", dump_array( $size, 0, @array1[0..$level1-1] );
3299 printf OUTPUT " /* level 2 offsets */\n%s,\n", dump_array( $size, 0, @array1[$level1..$#array1] );
3300 printf OUTPUT " /* level 3 offsets */\n%s,\n", dump_array( $size, 0, @array2[$level2..$#array2] );
3301 printf OUTPUT " /* values */\n%s\n};\n", dump_array( $size, 0, @array3[$level3..$#array3] );
3304 ################################################################
3305 # dump a binary case mapping table in l_intl.nls format
3306 sub dump_binary_case_table(@)
3308 my (@table) = @_;
3309 my @difftable;
3310 my @res;
3312 for (my $i = 0; $i < @table; $i++)
3314 next unless defined $table[$i];
3315 $difftable[$i] = ($table[$i] - $i) & 0xffffffff;
3318 my (@low_array1, @low_array2, @low_data, @low_row_data);
3319 (@low_array2[0..4095], @low_data) = compress_array( 4096, 0, @difftable[0..65535] );
3320 (@low_array1[0..255], @low_row_data) = compress_array( 256, 0, @low_array2 );
3322 if (scalar @table > 0x10000)
3324 my (@high_array1, @high_array2, @high_data, @high_row_data);
3325 (@high_array2[0..32767], @high_data) = compress_array( 32768, 0, @difftable[65536..$MAX_CHAR] );
3326 (@high_array1[0..1023], @high_row_data) = compress_array( 1024, 0, @high_array2 );
3328 push @res, map { $_ + 1024; } @low_array1;
3329 push @res, map { $_ + @res + @low_row_data + @low_data; } @high_array1;
3330 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3331 push @res, @low_data;
3332 push @res, map { 2 * ($_ - 32768) + @res + @high_row_data; } @high_row_data;
3333 return pack( "S<*", 1 + scalar @res + 2 * scalar @high_data, @res ) . pack( "L<*", @high_data );
3335 else
3337 push @res, @low_array1;
3338 push @res, map { $_ + @res + @low_row_data - 4096; } @low_row_data;
3339 push @res, @low_data;
3340 return pack "S<*", 1 + scalar @res, @res;
3344 ################################################################
3345 # dump case mappings for l_intl.nls
3346 sub dump_intl_nls($)
3348 my @upper_table = @toupper_table;
3349 my @lower_table = @tolower_table;
3350 remove_linguistic_mappings( \@upper_table, \@lower_table );
3352 my $upper = dump_binary_case_table( @upper_table[0..65535] );
3353 my $lower = dump_binary_case_table( @lower_table[0..65535] );
3355 my $filename = shift;
3356 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3357 printf "Building $filename\n";
3359 binmode OUTPUT;
3360 print OUTPUT pack "S<", 1; # version
3361 print OUTPUT $upper;
3362 print OUTPUT $lower;
3363 close OUTPUT;
3364 save_file($filename);
3368 ################################################################
3369 # dump the bidi direction table
3370 sub dump_bidi_dir_table($)
3372 my $filename = shift;
3373 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3374 printf "Building $filename\n";
3375 printf OUTPUT "/* Unicode BiDi direction table */\n";
3376 printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
3377 printf OUTPUT "#include \"windef.h\"\n\n";
3379 my @table;
3381 for (my $i = 0; $i < 65536; $i++)
3383 $table[$i] = $bidi_types{$direction_table[$i]} if defined $direction_table[$i];
3386 dump_two_level_mapping( "bidi_direction_table", $bidi_types{"L"}, 16, @table );
3388 close OUTPUT;
3389 save_file($filename);
3393 sub rol($$)
3395 my ($byte, $count) = @_;
3396 return (($byte << $count) | ($byte >> (8 - $count))) & 0xff;
3399 ################################################################
3400 # compress the character properties table
3401 sub compress_char_props_table($@)
3403 my $rows = shift;
3404 my @table = @_;
3405 my $len = @table / $rows;
3406 my $pos = 0;
3407 my @array = (0) x $rows;
3408 my %sequences;
3410 # add some predefined sequences
3411 foreach my $i (0, 0xfb .. 0xff) { $sequences{pack "L*", (rol($i,5)) x $len} = $i; }
3413 # try to merge table rows
3414 for (my $row = 0; $row < $rows; $row++)
3416 my @table_row = map { defined $_ ? $_ : 0x7f; } @table[($row * $len)..(($row + 1) * $len - 1)];
3417 my $rowtxt = pack "L*", @table_row;
3418 if (defined($sequences{$rowtxt}))
3420 # reuse an existing row
3421 $array[$row] = $sequences{$rowtxt};
3423 else
3425 # create a new row
3426 $sequences{$rowtxt} = $array[$row] = ++$pos;
3427 push @array, @table_row;
3430 return @array;
3433 ################################################################
3434 # dump a normalization table in binary format
3435 sub dump_norm_table($)
3437 my $filename = shift;
3439 my %forms = ( "nfc" => 1, "nfd" => 2, "nfkc" => 5, "nfkd" => 6, "idna" => 13 );
3440 my %decomp = ( "nfc" => \@decomp_table,
3441 "nfd" => \@decomp_table,
3442 "nfkc" => \@decomp_compat_table,
3443 "nfkd" => \@decomp_compat_table ,
3444 "idna" => \@idna_decomp_table );
3446 open OUTPUT,">$filename.new" or die "Cannot create $filename";
3447 print "Building $filename\n";
3449 my $type = $filename;
3450 $type =~ s!.*/norm(\w+)\.nls!$1!;
3452 my $compose = $forms{$type} & 1;
3453 my $compat = !!($forms{$type} & 4) + ($type eq "idna");
3455 my @version = split /\./, $UNIVERSION;
3457 # combining classes
3459 my @classes;
3460 my @class_values;
3462 foreach my $c (grep defined, @combining_class_table)
3464 $classes[$c] = 1 if $c < 0x100;
3466 for (my $i = 0; $i < @classes; $i++)
3468 next unless defined $classes[$i];
3469 $classes[$i] = @class_values;
3470 push @class_values, $i;
3472 push @class_values, 0 if (@class_values % 2);
3473 die "too many classes" if @class_values >= 0x40;
3475 # character properties
3477 my @char_props;
3478 my @decomposed;
3479 my @comp_hash_table;
3480 my $comp_hash_size = $compose ? 254 : 0;
3482 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3484 next unless defined $combining_class_table[$i];
3485 if (defined $decomp{$type}->[$i])
3487 my @dec = get_decomposition( $i, $decomp{$type} );
3488 if ($compose && (my @comp = get_composition( $i, $compat )))
3490 my $hash = ($comp[0] + 95 * $comp[1]) % $comp_hash_size;
3491 push @{$comp_hash_table[$hash]}, to_utf16( @comp, $i );
3493 my $val = 0;
3494 foreach my $d (@dec)
3496 $val = $combining_class_table[$d];
3497 last if $val;
3499 $char_props[$i] = $classes[$val];
3501 else
3503 $char_props[$i] = 0xbf;
3505 @dec = compose_hangul( @dec ) if $compose;
3506 @dec = to_utf16( @dec );
3507 push @dec, 0 if @dec >= 7;
3508 $decomposed[$i] = \@dec;
3510 else
3512 if ($combining_class_table[$i] == 0x100)
3514 $char_props[$i] = 0x7f;
3516 elsif ($combining_class_table[$i])
3518 $char_props[$i] = $classes[$combining_class_table[$i]] | 0x80;
3520 elsif ($type eq "idna" && defined $idna_disallowed[$i])
3522 $char_props[$i] = 0xff;
3524 else
3526 $char_props[$i] = 0;
3531 if ($compose)
3533 for (my $i = 0; $i <= $MAX_CHAR; $i++)
3535 my @comp = get_composition( $i, $compat );
3536 next unless @comp;
3537 if ($combining_class_table[$comp[1]])
3539 $char_props[$comp[0]] |= 0x40 unless $char_props[$comp[0]] & 0x80;
3540 $char_props[$comp[1]] |= 0x40;
3542 else
3544 $char_props[$comp[0]] = ($char_props[$comp[0]] & ~0x40) | 0x80;
3545 $char_props[$comp[1]] |= 0xc0;
3550 # surrogates
3551 foreach my $i (0xd800..0xdbff) { $char_props[$i] = 0xdf; }
3552 foreach my $i (0xdc00..0xdfff) { $char_props[$i] = 0x9f; }
3554 # Hangul
3555 if ($type eq "nfc") { foreach my $i (0x1100..0x117f) { $char_props[$i] = 0xff; } }
3556 elsif ($compose) { foreach my $i (0x1100..0x11ff) { $char_props[$i] = 0xff; } }
3557 foreach my $i (0xac00..0xd7ff) { $char_props[$i] = 0xff; }
3559 # invalid chars
3560 if ($type eq "idna") { foreach my $i (0x00..0x1f, 0x7f) { $char_props[$i] = 0xff; } }
3561 foreach my $i (0xfdd0..0xfdef) { $char_props[$i] = 0xff; }
3562 foreach my $i (0x00..0x10)
3564 $char_props[($i << 16) | 0xfffe] = 0xff;
3565 $char_props[($i << 16) | 0xffff] = 0xff;
3568 # decomposition hash table
3570 my @decomp_hash_table;
3571 my @decomp_hash_index;
3572 my @decomp_hash_data;
3573 my $decomp_hash_size = 944;
3575 # build string of character data, reusing substrings when possible
3576 my $decomp_char_data = "";
3577 foreach my $i (sort { @{$b} <=> @{$a} } grep defined, @decomposed)
3579 my $str = pack "U*", @{$i};
3580 $decomp_char_data .= $str if index( $decomp_char_data, $str) == -1;
3582 for (my $i = 0; $i < @decomposed; $i++)
3584 next unless defined $decomposed[$i];
3585 my $pos = index( $decomp_char_data, pack( "U*", @{$decomposed[$i]} ));
3586 die "sequence not found" if $pos == -1;
3587 my $len = @{$decomposed[$i]};
3588 $len = 7 if $len > 7;
3589 my $hash = $i % $decomp_hash_size;
3590 push @{$decomp_hash_table[$hash]}, [ $i, ($len << 13) | $pos ];
3592 for (my $i = 0; $i < $decomp_hash_size; $i++)
3594 $decomp_hash_index[$i] = @decomp_hash_data / 2;
3595 next unless defined $decomp_hash_table[$i];
3596 if (@{$decomp_hash_table[$i]} == 1)
3598 my $entry = $decomp_hash_table[$i]->[0];
3599 if ($char_props[$entry->[0]] == 0xbf)
3601 $decomp_hash_index[$i] = $entry->[1];
3602 next;
3605 foreach my $entry (@{$decomp_hash_table[$i]})
3607 push @decomp_hash_data, $entry->[0] & 0xffff, $entry->[1];
3610 push @decomp_hash_data, 0, 0;
3612 # composition hash table
3614 my @comp_hash_index;
3615 my @comp_hash_data;
3616 if (@comp_hash_table)
3618 for (my $i = 0; $i < $comp_hash_size; $i++)
3620 $comp_hash_index[$i] = @comp_hash_data;
3621 push @comp_hash_data, @{$comp_hash_table[$i]} if defined $comp_hash_table[$i];
3623 $comp_hash_index[$comp_hash_size] = @comp_hash_data;
3624 push @comp_hash_data, 0, 0, 0;
3627 my $level1 = ($MAX_CHAR + 1) / 128;
3628 my @rows = compress_char_props_table( $level1, @char_props[0..$MAX_CHAR] );
3630 my @header = ( $version[0], $version[1], $version[2], 0, $forms{$type}, $compat ? 18 : 3,
3631 0, $decomp_hash_size, $comp_hash_size, 0 );
3632 my @tables = (0) x 8;
3634 $tables[0] = 16 + @header + @tables;
3635 $tables[1] = $tables[0] + @class_values / 2;
3636 $tables[2] = $tables[1] + $level1 / 2;
3637 $tables[3] = $tables[2] + (@rows - $level1) / 2;
3638 $tables[4] = $tables[3] + @decomp_hash_index;
3639 $tables[5] = $tables[4] + @decomp_hash_data;
3640 $tables[6] = $tables[5] + length $decomp_char_data;
3641 $tables[7] = $tables[6] + @comp_hash_index;
3643 print OUTPUT pack "S<16", unpack "U*", "norm$type.nlp";
3644 print OUTPUT pack "S<*", @header;
3645 print OUTPUT pack "S<*", @tables;
3646 print OUTPUT pack "C*", @class_values;
3648 print OUTPUT pack "C*", @rows[0..$level1-1];
3649 print OUTPUT pack "C*", @rows[$level1..$#rows];
3650 print OUTPUT pack "S<*", @decomp_hash_index;
3651 print OUTPUT pack "S<*", @decomp_hash_data;
3652 print OUTPUT pack "S<*", unpack "U*", $decomp_char_data;
3653 print OUTPUT pack "S<*", @comp_hash_index;
3654 print OUTPUT pack "S<*", @comp_hash_data;
3656 close OUTPUT;
3657 save_file($filename);
3659 add_registry_string_value( $nlskey, "Normalization", sprintf( "%x", $forms{$type} ), "norm$type.nls" );
3663 ################################################################
3664 # output a codepage definition file from the global tables
3665 sub output_codepage_file($)
3667 my $codepage = shift;
3669 my $output = sprintf "nls/c_%03d.nls", $codepage;
3670 open OUTPUT,">$output.new" or die "Cannot create $output";
3672 printf "Building %s\n", $output;
3673 if (!@lead_bytes) { dump_binary_sbcs_table( $codepage ); }
3674 else { dump_binary_dbcs_table( $codepage ); }
3676 close OUTPUT;
3677 save_file($output);
3679 add_registry_string_value( $nlskey, "Codepage", sprintf( "%d", $codepage ), sprintf( "c_%03d.nls", $codepage ));
3682 ################################################################
3683 # output a codepage table from a Microsoft-style mapping file
3684 sub dump_msdata_codepage($)
3686 my $filename = shift;
3688 my $state = "";
3689 my ($codepage, $width, $count);
3690 my ($lb_cur, $lb_end);
3692 @cp2uni = ();
3693 @glyph2uni = ();
3694 @lead_bytes = ();
3695 @uni2cp = ();
3696 $default_char = $DEF_CHAR;
3697 $default_wchar = $DEF_CHAR;
3699 my $INPUT = open_data_file( "codepages", $filename );
3701 while (<$INPUT>)
3703 next if /^;/; # skip comments
3704 next if /^\s*$/; # skip empty lines
3705 next if /\x1a/; # skip ^Z
3706 last if /^ENDCODEPAGE/;
3708 if (/^CODEPAGE\s+(\d+)/)
3710 $codepage = $1;
3711 next;
3713 if (/^CPINFO\s+(\d+)\s+0x([0-9a-fA-f]+)\s+0x([0-9a-fA-F]+)/)
3715 $width = $1;
3716 $default_char = hex $2;
3717 $default_wchar = hex $3;
3718 next;
3720 if (/^(MBTABLE|GLYPHTABLE|WCTABLE|DBCSRANGE|DBCSTABLE)\s+(\d+)/)
3722 $state = $1;
3723 $count = $2;
3724 next;
3726 if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)/)
3728 if ($state eq "MBTABLE")
3730 my $cp = hex $1;
3731 my $uni = hex $2;
3732 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3733 next;
3735 if ($state eq "GLYPHTABLE")
3737 my $cp = hex $1;
3738 my $uni = hex $2;
3739 $glyph2uni[$cp] = $uni unless defined($glyph2uni[$cp]);
3740 next;
3742 if ($state eq "WCTABLE")
3744 my $uni = hex $1;
3745 my $cp = hex $2;
3746 $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
3747 next;
3749 if ($state eq "DBCSRANGE")
3751 my $start = hex $1;
3752 my $end = hex $2;
3753 for (my $i = $start; $i <= $end; $i++) { add_lead_byte( $i ); }
3754 $lb_cur = $start;
3755 $lb_end = $end;
3756 next;
3758 if ($state eq "DBCSTABLE")
3760 my $mb = hex $1;
3761 my $uni = hex $2;
3762 my $cp = ($lb_cur << 8) | $mb;
3763 $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
3764 if (!--$count)
3766 if (++$lb_cur > $lb_end) { $state = "DBCSRANGE"; }
3768 next;
3771 die "$filename: Unrecognized line $_\n";
3773 close $INPUT;
3775 output_codepage_file( $codepage );
3777 if ($codepage == 949) { dump_krwansung_codepage( @uni2cp ); }
3780 ################################################################
3781 # align a string length
3782 sub align_string($$)
3784 my ($align, $str) = @_;
3785 $str .= pack "C*", (0) x ($align - length($str) % $align) if length($str) % $align;
3786 return $str;
3789 ################################################################
3790 # pad a string with zeros
3791 sub pad_string($$)
3793 my ($pad, $str) = @_;
3794 $str .= pack "C*", (0) x ($pad - length($str)) if length($str) < $pad;
3795 return $str;
3798 ################################################################
3799 # pack a GUID string
3800 sub pack_guid($)
3802 $_ = shift;
3803 /([0-9A-Fa-f]{8})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{4})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})-([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})([0-9A-Fa-f]{2})/;
3804 return pack "L<S<2C8", hex $1, hex $2, hex $3, hex $4, hex $5, hex $6, hex $7, hex $8, hex $9, hex $10, hex $11;
3807 ################################################################
3808 # comparison function for compression sort
3809 sub cmp_compression
3811 return scalar @{$a} <=> scalar @{$b} ||
3812 $a->[4] <=> $b->[4] ||
3813 $a->[5] <=> $b->[5] ||
3814 $a->[6] <=> $b->[6] ||
3815 $a->[7] <=> $b->[7] ||
3816 $a->[8] <=> $b->[8] ||
3817 $a->[9] <=> $b->[9] ||
3818 $a->[10] <=> $b->[10] ||
3819 $a->[11] <=> $b->[11] ||
3820 $a->[12] <=> $b->[12];
3823 ################################################################
3824 # build a binary sort keys table
3825 sub dump_sortkey_table($)
3827 my $filename = shift;
3828 my @keys;
3829 my ($part, $section, $subsection, $guid, $version, $ling_flag);
3830 my @multiple_weights;
3831 my @expansions;
3832 my @compressions;
3833 my %exceptions;
3834 my %guids;
3835 my %compr_flags;
3836 my %locales;
3837 my $default_guid = "00000001-57ee-1e5c-00b4-d0000bb1e11e";
3838 my $jamostr = "";
3840 my $re_hex = '0x[0-9A-Fa-f]+';
3841 my $re_key = '(\d+\s+\d+\s+\d+\s+\d+)';
3842 $guids{$default_guid} = { };
3844 my %flags = ( "HAS_3_BYTE_WEIGHTS" => 0x01, "REVERSEDIACRITICS" => 0x10, "DOUBLECOMPRESSION" => 0x20, "INVERSECASING" => 0x40 );
3846 my $KEYS = open_data_file( "sorting" );
3848 printf "Building $filename\n";
3850 while (<$KEYS>)
3852 s/\s*;.*$//;
3853 next if /^\s*$/; # skip empty lines
3854 if (/^\s*(SORTKEY|SORTTABLES)/)
3856 $part = $1;
3857 next;
3859 if (/^\s*(ENDSORTKEY|ENDSORTTABLES)/)
3861 $part = $section = "";
3862 next;
3864 if (/^\s*(DEFAULT|RELEASE|REVERSEDIACRITICS|DOUBLECOMPRESSION|INVERSECASING|MULTIPLEWEIGHTS|EXPANSION|COMPATIBILITY|COMPRESSION|EXCEPTION|JAMOSORT)\s+/)
3866 $section = $1;
3867 $guid = undef;
3868 next;
3870 next unless $part;
3871 if ("$part.$section" eq "SORTKEY.DEFAULT")
3873 if (/^\s*($re_hex)\s+$re_key/)
3875 $keys[hex $1] = [ split(/\s+/,$2) ];
3876 next;
3879 elsif ("$part.$section" eq "SORTTABLES.RELEASE")
3881 if (/^\s*NLSVERSION\s+0x([0-9A-Fa-f]+)/)
3883 $version = hex $1;
3884 next;
3886 if (/^\s*DEFINEDVERSION\s+0x([0-9A-Fa-f]+)/)
3888 # ignore for now
3889 next;
3892 elsif ("$part.$section" eq "SORTTABLES.REVERSEDIACRITICS" ||
3893 "$part.$section" eq "SORTTABLES.DOUBLECOMPRESSION" ||
3894 "$part.$section" eq "SORTTABLES.INVERSECASING")
3896 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)/)
3898 $guid = lc $1;
3899 $guids{$guid} = { } unless defined $guids{$guid};
3900 $guids{$guid}->{flags} |= $flags{$section};
3901 next;
3903 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3905 $locales{$1} = $guid;
3906 next;
3909 elsif ("$part.$section" eq "SORTTABLES.MULTIPLEWEIGHTS")
3911 if (/^\s*(\d+)\s+(\d+)/)
3913 push @multiple_weights, $1, $2;
3914 next;
3917 elsif ("$part.$section" eq "SORTTABLES.EXPANSION")
3919 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3921 my $pos = scalar @expansions / 2;
3922 $keys[hex $1] = [ 2, 0, $pos & 0xff, $pos >> 8 ] unless defined $keys[hex $1];
3923 push @expansions, hex $2, hex $3;
3924 next;
3927 elsif ("$part.$section" eq "SORTTABLES.COMPATIBILITY")
3929 if (/^\s*0x([0-9A-Fa-f]+)\s+0x([0-9A-Fa-f]+)/)
3931 $keys[hex $1] = $keys[hex $2];
3932 next;
3935 elsif ("$part.$section" eq "SORTTABLES.COMPRESSION")
3937 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*([A-Z0-9_]+)?/)
3939 if ($subsection || !$guid) # start a new one
3941 $guid = lc $1;
3942 $subsection = "";
3943 $guids{$guid} = { } unless defined $guids{$guid};
3944 $guids{$guid}->{flags} |= $flags{$2} if $2;
3945 $guids{$guid}->{compr} = @compressions;
3946 $exceptions{"$guid-"} = [ ] unless defined $exceptions{"$guid-"};
3947 $compr_flags{$guid} = [ ] unless defined $compr_flags{$guid};
3948 push @compressions, [ ];
3950 else # merge with current one
3952 $guids{lc $1} = { } unless defined $guids{lc $1};
3953 $guids{lc $1}->{flags} |= $flags{$2} if $2;
3954 $guids{lc $1}->{compr} = $guids{$guid}->{compr};
3955 $compr_flags{lc $1} = $compr_flags{$guid};
3957 next;
3959 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3961 $locales{$1} = $guid;
3962 next;
3964 if (/^\s*(TWO|THREE|FOUR|FIVE|SIX|SEVEN|EIGHT)/)
3966 $subsection = $1;
3967 next;
3969 if ($subsection && /^\s*(($re_hex\s+){2,8})$re_key/)
3971 my @comp = map { hex $_; } split(/\s+/,$1);
3972 push @{$compressions[$#compressions]}, [ split(/\s+/,$3), @comp ];
3973 # add compression flags
3974 $compr_flags{$guid}->[$comp[0]] |= @comp >= 6 ? 0xc0 : @comp >= 4 ? 0x80 : 0x40;
3975 next;
3978 elsif ("$part.$section" eq "SORTTABLES.EXCEPTION")
3980 if (/^\s*SORTGUID\s+([-0-9A-Fa-f]+)\s+\d*\s*(LINGUISTIC_CASING)?/)
3982 $guid = lc $1;
3983 $guids{$guid} = { } unless defined $guids{lc $1};
3984 $ling_flag = ($2 ? "+" : "-");
3985 $exceptions{"$guid$ling_flag"} = [ ] unless defined $exceptions{"$guid$ling_flag"};
3986 next;
3988 if (/^\s*LOCALENAME\s+([A-Za-z0-9-_]+)/)
3990 $locales{$1} = $guid;
3991 next;
3993 if (/^\s*($re_hex)\s+$re_key/)
3995 $exceptions{"$guid$ling_flag"}->[hex $1] = [ split(/\s+/,$2) ];
3996 next;
3999 elsif ("$part.$section" eq "SORTTABLES.JAMOSORT")
4001 if (/^\s*$re_hex\s+(($re_hex\s*){5})/)
4003 $jamostr .= pack "C8", map { hex $_; } split /\s+/, $1;
4004 next;
4007 die "$current_data_file: $part.$section: unrecognized line $_\n";
4009 close $KEYS;
4011 # Sortkey table
4013 my $table;
4014 for (my $i = 0; $i < 0x10000; $i++)
4016 my @k = defined $keys[$i] ? @{$keys[$i]} : (0) x 4;
4017 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4020 foreach my $id (sort keys %exceptions)
4022 my $pos = length($table) / 4;
4023 my @exc = @{$exceptions{$id}};
4024 my @filled;
4025 my $key = (substr( $id, -1 ) eq "+" ? "ling_except" : "except");
4026 my $guid = substr( $id, 0, -1 );
4027 $guids{$guid}->{$key} = $pos;
4028 $pos += 0x100;
4029 my @flags = @{$compr_flags{$guid}} if defined $compr_flags{$guid};
4030 for (my $j = 0; $j < 0x10000; $j++)
4032 next unless defined $exc[$j] || defined $flags[$j];
4033 $filled[$j >> 8] = 1;
4034 $j |= 0xff;
4036 for (my $j = 0; $j < 0x100; $j++)
4038 $table .= pack "L<", $filled[$j] ? $pos : $j * 0x100;
4039 $pos += 0x100 if $filled[$j];
4041 for (my $j = 0; $j < 0x10000; $j++)
4043 next unless $filled[$j >> 8];
4044 my @k = defined $exc[$j] ? @{$exc[$j]} : defined $keys[$j] ? @{$keys[$j]} : (0) x 4;
4045 $k[3] |= $flags[$j] || 0;
4046 $table .= pack "C4", $k[1], $k[0], $k[2], $k[3];
4050 # Case mapping tables
4052 # standard table
4053 my @casemaps;
4054 my @upper = @toupper_table;
4055 my @lower = @tolower_table;
4056 remove_linguistic_mappings( \@upper, \@lower );
4057 $casemaps[0] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4059 # linguistic table
4060 $casemaps[1] = pack( "S<*", 1) . dump_binary_case_table( @toupper_table ) . dump_binary_case_table( @tolower_table );
4062 # Turkish table
4063 @upper = @toupper_table;
4064 @lower = @tolower_table;
4065 $upper[ord 'i'] = 0x130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
4066 $lower[ord 'I'] = 0x131; # LATIN SMALL LETTER DOTLESS I
4067 $casemaps[2] = pack( "S<*", 1) . dump_binary_case_table( @upper ) . dump_binary_case_table( @lower );
4068 my $casemaps = align_string( 8, $casemaps[0] . $casemaps[1] . $casemaps[2] );
4070 # Char type table
4072 my @table;
4073 my $types = "";
4074 my %typestr;
4075 for (my $i = 0; $i < 0x10000; $i++)
4077 my $str = pack "S<3",
4078 ($category_table[$i] || 0) & 0xffff,
4079 defined($direction_table[$i]) ? $c2_types{$direction_table[$i]} : 0,
4080 ($category_table[$i] || 0) >> 16;
4082 if (!defined($typestr{$str}))
4084 $typestr{$str} = length($types) / 6;
4085 $types .= $str;
4087 $table[$i] = $typestr{$str};
4090 my (@rows, @array, @data, @row_data);
4091 (@rows[0..4095], @data) = compress_array( 4096, 0, @table[0..65535] );
4092 (@array[0..255], @row_data) = compress_array( 256, 0, @rows );
4093 for (my $i = 0; $i < 256; $i++) { $array[$i] *= 2; } # we need byte offsets
4094 for (my $i = 0; $i < @row_data; $i++) { $row_data[$i] += 2 * @row_data + 512 - 4096; }
4096 my $arraystr = pack("S<*", @array, @row_data) . pack("C*", @data);
4097 my $chartypes = pack "S<2", 4 + length($types) + length($arraystr), 2 + length($types);
4098 $chartypes = align_string( 8, $chartypes . $types . $arraystr );
4100 # Sort tables
4102 # guids
4103 my $sorttables = pack "L<2", $version, scalar %guids;
4104 foreach my $id (sort keys %guids)
4106 my %guid = %{$guids{$id}};
4107 my $flags = $guid{flags} || 0;
4108 my $map = length($casemaps[0]) + (defined $guid{ling_except} ? length($casemaps[1]) : 0);
4109 $sorttables .= pack_guid($id) . pack "L<5",
4110 $flags,
4111 defined($guid{compr}) ? $guid{compr} : 0xffffffff,
4112 $guid{except} || 0,
4113 $guid{ling_except} || 0,
4114 $map / 2;
4117 # expansions
4118 $sorttables .= pack "L<S<*", scalar @expansions / 2, @expansions;
4120 # compressions
4121 $sorttables .= pack "L<", scalar @compressions;
4122 my $rowstr = "";
4123 foreach my $c (@compressions)
4125 my $pos = length($rowstr) / 2;
4126 my $min = 0xffff;
4127 my $max = 0;
4128 my @lengths = (0) x 8;
4129 foreach my $r (sort cmp_compression @{$c})
4131 my @row = @{$r};
4132 $lengths[scalar @row - 6]++;
4133 foreach my $val (@row[4..$#row])
4135 $min = $val if $min > $val;
4136 $max = $val if $max < $val;
4138 $rowstr .= align_string( 4, pack "S<*", @row[4..$#row] );
4139 $rowstr .= pack "C4", $row[1], $row[0], $row[2], $row[3];
4141 $sorttables .= pack "L<S<10", $pos, $min, $max, @lengths;
4143 $sorttables .= $rowstr;
4145 # multiple weights
4146 $sorttables .= align_string( 4, pack "L<C*", scalar @multiple_weights / 2, @multiple_weights );
4148 # jamo sort
4149 $sorttables .= pack("L<", length($jamostr) / 8) . $jamostr;
4151 # Locales
4153 add_registry_key( $nlskey, "Sorting\\Ids", "{$default_guid}" );
4154 foreach my $loc (sort keys %locales)
4156 # skip specific locales that match more general ones
4157 my @parts = split /[-_]/, $loc;
4158 next if @parts > 1 && defined($locales{$parts[0]}) && $locales{$parts[0]} eq $locales{$loc};
4159 next if @parts > 2 && defined($locales{"$parts[0]-$parts[1]"}) && $locales{"$parts[0]-$parts[1]"} eq $locales{$loc};
4160 add_registry_string_value( $nlskey, "Sorting\\Ids", $loc, "\{$locales{$loc}\}" );
4163 # File header
4165 my @header;
4166 $header[0] = 16;
4167 $header[1] = $header[0] + length $table;
4168 $header[2] = $header[1] + length $casemaps;
4169 $header[3] = $header[2] + length $chartypes;
4171 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
4172 print OUTPUT pack "L<*", @header;
4173 print OUTPUT $table, $casemaps, $chartypes, $sorttables;
4174 close OUTPUT;
4175 save_file($filename);
4176 return $chartypes;
4180 my %lcnames;
4182 sub locale_parent($)
4184 my $loc = shift;
4186 return undef unless $loc;
4187 return $lcnames{$loc}->{sparent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{sparent};
4188 return $lcnames{$loc}->{parent} if defined $lcnames{$loc} && defined $lcnames{$loc}->{parent};
4189 if ($loc =~ /(.*)-[0-9A-Za-z]+/) { return $1; }
4190 return "";
4193 sub compare_locales
4195 (my $n1 = $a) =~ tr/A-Z_/a-z-/;
4196 (my $n2 = $b) =~ tr/A-Z_/a-z-/;
4197 return $n1 cmp $n2;
4200 # query an xml key
4201 sub xml_query($$)
4203 my ($xml, $query) = @_;
4204 my $ret = $xml->find( $query );
4205 return undef unless $ret;
4206 printf STDERR "multiple entries for %s\n", $query if (@{$ret} > 1);
4207 return @{$ret}[0]->textContent;
4210 # query an xml key for a locale, with fallback to the parents
4211 sub loc_query($$)
4213 my ($loc, $query) = @_;
4215 $loc = $lcnames{"en-US"} unless $loc->{name}; # fallback to "en-US" for root locale
4217 for (my $cur = $loc->{name}; defined $cur; $cur = locale_parent( $cur ))
4219 next unless defined $lcnames{$cur};
4220 my $xml = $lcnames{$cur}->{xml};
4221 my $ret = $xml->find( $query );
4222 next unless $ret;
4223 printf STDERR "%s: multiple entries for %s\n", $cur, $query if (@{$ret} > 1);
4224 next if @{$ret}[0]->textContent eq "\x{2191}\x{2191}\x{2191}"; # "↑↑↑"
4225 return @{$ret}[0]->textContent;
4227 return undef;
4230 # retrieve a locale field entry by going up the parents tree
4231 sub locale_entry($$$)
4233 my ($loc, $field, $def) = @_;
4235 return $loc->{$field} if defined $loc->{$field};
4237 unless ($loc->{name}) # fallback to "en-US" for root locale
4239 $loc = $lcnames{"en-US"};
4240 return $loc->{$field} if defined $loc->{$field};
4242 while (defined $loc->{alias}) # resolve aliases
4244 $loc = $lcnames{$loc->{alias}};
4245 return $loc->{$field} if defined $loc->{$field};
4247 my $cur = $loc->{name};
4248 while ($cur)
4250 if (defined $lcnames{$cur} && defined $lcnames{$cur}->{sparent})
4252 $cur = $lcnames{$cur}->{sparent};
4254 elsif ($cur =~ /(.*)-[0-9A-Za-z]+/)
4256 $cur = $1;
4258 else
4260 return $def;
4262 return $lcnames{$cur}->{$field} if defined $lcnames{$cur} && defined $lcnames{$cur}->{$field};
4264 return $def;
4267 my $string_data;
4269 sub add_str_data($)
4271 my $txt = shift;
4272 my $ret = index( $string_data, $txt );
4273 if ($ret == -1)
4275 $ret = length($string_data);
4276 $string_data .= $txt
4278 return $ret / 2;
4281 sub add_string($)
4283 my $str = shift;
4284 return 0 unless defined($str) && $str ne "";
4285 my $utf = encode( "UTF16LE", $str );
4286 return add_str_data( (pack "S<", length($utf) / 2) . $utf . (pack "S", 0) );
4289 sub add_fontsig(@)
4291 return add_str_data( pack "S<L<*", scalar(@_) * 2, @_ );
4294 sub add_strarray(@)
4296 return 0 unless @_;
4297 return add_str_data( pack "S<L<*", scalar @_, map { add_string($_) } @_);
4300 sub format_to_grouping($)
4302 my $format = shift;
4303 if ($format =~ /#,(#+),(#+0)/) { return chr(length($2)) . chr(length($1)); }
4304 if ($format =~ /#,(#+0)/) { return chr(length($1)); }
4305 # printf STDERR "unknown format %s\n", $format;
4306 return chr(3);
4309 sub parse_currency_format($$)
4311 my $name = shift;
4312 my ($posfmt, $negfmt) = split /;/, shift;
4313 my @pospatterns = ( "\xa4[^\xa0]*#", # $1.1
4314 "00[^\xa0]*\xa4", # 1.1$
4315 "\xa4.*\xa0.*#", # $ 1.1
4316 "00.*\xa0.*\xa4" ); # 1.1 $
4317 my @negpatterns = ( "\\(\xa4[^\xa0]*#", # ($1.1)
4318 "-\xa4[^\xa0]*#", # -$1.1
4319 "\xa4[^\xa0]*-#", # $-1.1
4320 "\xa4[^\xa0]*#.*00-", # $1.1-
4321 "00[^\xa0]*\xa4\\)", # (1.1$)
4322 "-#.*00[^\xa0]*\xa4", # -1.1$
4323 "00-[^\xa0]*\xa4", # 1.1-$
4324 "00[^\xa0]*\xa4-", # 1.1$-
4325 "-#.*00.*\xa0.*\xa4", # -1.1 $
4326 "-\xa4.*\xa0.*#", # -$ 1.1
4327 "00.*\xa0.*\xa4-", # 1.1 $-
4328 "\xa4.*\xa0.*#.*00-", # $ 1.1-
4329 "\xa4.*\xa0.*-#", # $ -1.1
4330 "00-.*\xa0.*\xa4", # 1.1- $
4331 "\\(\xa4.*\xa0.*#", # ($ 1.1)
4332 "00.*\xa0.*\xa4\\)"); # (1.1 $)
4333 my ($pos, $neg);
4335 for ($pos = 0; $pos < @pospatterns; $pos++)
4337 last if ($posfmt =~ /$pospatterns[$pos]/);
4339 #printf STDERR "$name: unknown format '%s'\n", $posfmt if ($pos == @pospatterns);
4340 $pos = 0 if ($pos == @pospatterns);
4342 if (defined $negfmt)
4344 for ($neg = 0; $neg < @negpatterns; $neg++)
4346 last if ($negfmt =~ /$negpatterns[$neg]/);
4348 #printf STDERR "$name: unknown format '%s'\n", $negfmt if ($neg == @negpatterns);
4349 $neg = 0 if ($neg == @negpatterns);
4351 elsif ($pos == 0) { $neg = 1; }
4352 elsif ($pos == 1) { $neg = 5; }
4353 elsif ($pos == 2) { $neg = 9; }
4354 elsif ($pos == 3) { $neg = 8; }
4356 return ($pos, $neg);
4359 sub parse_percent_format($)
4361 my $fmt = shift;
4362 my @patterns = ( "0.+%", # 1 %
4363 "0%", # 1%
4364 "%#", # %1
4365 "%.+#" ); # % 1
4366 my $pos;
4367 for ($pos = 0; $pos < @patterns; $pos++)
4369 last if ($fmt =~ /$patterns[$pos]/);
4371 printf STDERR "unknown format '%s'\n", $fmt if ($pos == @patterns);
4372 return ($pos, ($pos == 3) ? 7 : $pos);
4375 sub convert_date_format($)
4377 my $fmt = shift;
4378 $fmt =~ s/G+/gg/;
4379 $fmt =~ s/LLLL/MMMM/;
4380 $fmt =~ s/LLL/MMM/;
4381 $fmt =~ s/E+/dddd/;
4382 $fmt =~ s/ccc+/dddd/;
4383 $fmt =~ s/([^gy])y([^y])/$1yyyy$2/;
4384 $fmt =~ s/^y([^y])/yyyy$1/;
4385 $fmt =~ s/([^gy])y$/$1yyyy/;
4386 return $fmt;
4389 sub convert_time_format($)
4391 my $fmt = shift;
4392 $fmt =~ s/a+/tt/;
4393 $fmt =~ s/B+/tt/;
4394 return $fmt;
4397 sub load_iso639()
4399 my %iso639;
4400 my $DATA = open_data_file( "iso639", "iso-639-3_Code_Tables_$ISO639VERSION/iso-639-3.tab" );
4401 while (<$DATA>)
4403 if (/^\s*[a-z]{3}\s+[a-z]{3}\s+([a-z]{3})\s+([a-z]{2})\s/) { $iso639{$2} = $1; }
4405 close $DATA;
4406 return %iso639;
4410 ################################################################
4411 # build the locale table for locale.nls
4412 sub build_locale_data()
4414 my $base = "cldr-release-$CLDRVERSION";
4415 my $suppl = load_xml_data_file( "cldr", "$base/common/supplemental/supplementalData.xml" );
4416 my $subtags = load_xml_data_file( "cldr", "$base/common/supplemental/likelySubtags.xml" );
4417 my $numbers = load_xml_data_file( "cldr", "$base/common/supplemental/numberingSystems.xml" );
4418 # obsolete phone data from CLDR version 33
4419 my $phone = load_xml_data_file( "cldr33", "common/supplemental/telephoneCodeData.xml" );
4420 my %iso639 = load_iso639();
4421 $string_data = pack "S2", 0, 0; # offset 0 == empty string
4423 %lcnames = map { $_->{name} => $_ } @locales;
4425 my %lcids;
4426 foreach my $loc (@locales) { $lcids{$loc->{lcid}} = $loc if defined $loc->{lcid}; }
4428 my %days = ( "sun" => 0, "mon" => 1, "tue" => 2, "wed" => 3, "thu" => 4, "fri" => 5, "sat" => 6 );
4430 # assign locale parents
4432 foreach my $loc (@locales)
4434 next if $loc->{name} eq "";
4435 next if defined $loc->{parent};
4436 (my $unix_name = $loc->{name}) =~ s/-/_/g;
4437 my $parent = xml_query( $suppl, "/supplementalData/parentLocales/parentLocale[contains(concat(' ',\@locales,' '),' $unix_name ')]/\@parent" );
4438 if ($parent)
4440 $parent =~ s/_/-/g;
4441 $parent = "" if $parent eq "root";
4443 elsif ($loc->{name} =~ /(.*)-[0-9A-Za-z]+/) { $parent = $1; }
4444 $loc->{parent} = $parent || "";
4447 # load per-locale XML files
4449 foreach my $loc (@locales)
4451 next if defined $loc->{alias};
4452 (my $file = $loc->{file} || $loc->{name}) =~ s/-/_/g;
4453 $file = "$base/" . ($loc->{dir} || "common") . "/main/$file.xml";
4454 my $xml = load_xml_data_file( "cldr", $file );
4455 $loc->{xml} = $xml;
4456 $loc->{language} ||= xml_query( $xml, "/ldml/identity/language/\@type" );
4457 $loc->{territory} ||= xml_query( $xml, "/ldml/identity/territory/\@type" );
4458 $loc->{script} = xml_query( $xml, "/ldml/identity/script/\@type" );
4459 if (!defined($loc->{territory}) && $loc->{name} =~ /-([A-Z]{2}|[0-9]{3})$/) { $loc->{territory} = $1; }
4460 if (!defined($loc->{script}) && $loc->{name} =~ /-([A-Z][a-z]{3})(-[A-Z]{2})?$/) { $loc->{script} = $1; }
4463 # assign a default territory and sort locale
4465 foreach my $loc (@locales)
4467 next if defined $loc->{alias};
4468 next if defined $loc->{territory};
4469 my $id = $loc->{sortlocale};
4470 if (defined $id && ($id =~ /[-_]([A-Z0-9]+)$/))
4472 $loc->{territory} = $1;
4473 next;
4475 my @children = grep /^$loc->{name}-[A-Z0-9]+$/ && !defined $lcnames{$_}->{alias}, keys %lcnames;
4476 if (@children == 1)
4478 $id = $children[0];
4480 else
4482 my $name = $loc->{file} || $loc->{name};
4483 $name =~ s/-(Arab|Cyrl|Deva|Guru|Hans|Hant|Latn|Tfng|Vaii)$//;
4484 $name =~ s/-/_/g;
4485 $id = xml_query( $subtags, "/supplementalData/likelySubtags/likelySubtag[\@from='$name']/\@to" );
4486 $id =~ s/_/-/g if $id;
4488 if ($id =~ /[-_]([A-Z0-9]+)$/)
4490 $loc->{territory} = $1;
4491 next if defined $loc->{sortlocale};
4492 next unless $id =~ /^$loc->{name}/;
4493 while (defined $lcnames{$id} && defined $lcnames{$id}->{alias}) { $id = $lcnames{$id}->{alias}; }
4494 $loc->{sortlocale} = $id if defined $lcnames{$id};
4495 next;
4497 print STDERR "no territory found for $loc->{name}\n";
4500 # fill geoid table
4502 my %geotable;
4503 foreach my $geo (@geoids)
4505 my $name = $geo->{name};
4506 next unless defined $name;
4507 $geo->{alias} = $geotable{$name} if defined $geotable{$name};
4508 $geotable{$name} ||= $geo;
4510 foreach my $loc (@locales)
4512 next if defined $loc->{alias};
4513 my $territory = $loc->{territory};
4514 $geotable{$territory} ||= { name => $territory };
4516 foreach my $name (keys %geotable)
4518 my $geo = $geotable{$name};
4519 $geo->{dialcode} = xml_query( $phone, "(/supplementalData/telephoneCodeData/codesByTerritory[\@territory='$name']/telephoneCountryCode)[1]/\@code" );
4520 if ($name =~ /\d+/)
4522 $geo->{uncode} = $name;
4523 next;
4525 $geo->{iso2} = $name;
4526 $geo->{iso3} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@alpha3");
4527 $geo->{uncode} = xml_query( $suppl, "/supplementalData/codeMappings/territoryCodes[\@type='$name']/\@numeric");
4528 $geo->{sintlsymbol} ||= xml_query( $suppl, "(/supplementalData/currencyData/region[\@iso3166='$name']/currency[not(\@to)])[1]/\@iso4217") || "XXX";
4529 $geo->{sintlsymbol} =~ s/XXX/XDR/;
4531 foreach my $geo (@geoids)
4533 $geo->{parentid} = $geotable{$geo->{parent}}->{id} if defined $geo->{parent};
4534 next if defined $geo->{iso2};
4535 next if defined $geo->{alias};
4536 next unless defined $geo->{uncode};
4537 my @contains;
4538 my $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and not(\@status)]/\@contains");
4539 push @contains, split /\s+/, $list if defined $list;
4540 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$geo->{uncode}' and \@status='deprecated']/\@contains");
4541 push @contains, split /\s+/, $list if defined $list;
4542 while (@contains)
4544 my $territory = pop @contains;
4545 if (defined $geotable{$territory})
4547 $geotable{$territory}->{parentid} ||= $geo->{id};
4549 elsif ($territory =~ /\d+/)
4551 # expand region recursively
4552 $list = xml_query( $suppl, "/supplementalData/territoryContainment/group[\@type='$territory' and not(\@status)]/\@contains" );
4553 push @contains, split /\s+/, $list if defined $list;
4558 # assign calendars to their locale
4560 foreach my $cal (@calendars)
4562 next unless defined $cal->{locale};
4563 my $loc = $lcnames{$cal->{locale}};
4564 $loc->{calendar} = [ ] unless defined $loc->{calendar};
4565 push @{$loc->{calendar}}, $cal;
4568 # assign default lcid to aliases
4570 foreach my $loc (@locales)
4572 next unless defined $loc->{alias};
4573 next if defined $loc->{lcid};
4574 my $alias = $loc->{alias};
4575 my $lcid = $lcnames{$alias}->{lcid} || 0x1000;
4576 $loc->{lcid} = $lcid | 0x80000000;
4579 # assign sort aliases to parent locale
4581 foreach my $loc (@locales)
4583 next unless $loc->{name} =~ /_/;
4584 next unless defined $loc->{alias};
4585 my $alias = $loc->{alias};
4586 my $parent = $lcnames{$alias};
4587 my $basename = $parent->{name};
4588 while (1)
4590 @{$parent->{sortnames}}[($loc->{lcid} >> 16) - 1] = $loc->{name};
4591 $alias = locale_parent( $alias );
4592 last unless $alias && defined $lcnames{$alias};
4593 $parent = $lcnames{$alias};
4594 last if defined $parent->{sortbase} && $parent->{sortbase} ne $basename;
4595 $parent->{sortbase} = $basename;
4599 # assign an array index to all locales
4601 my $idx = 0;
4602 foreach my $loc (@locales)
4604 next if defined $loc->{alias};
4605 $loc->{idx} = $idx++;
4607 foreach my $loc (@locales)
4609 my $alias = $loc->{alias};
4610 next unless defined $alias;
4611 while (defined $lcnames{$alias}->{alias}) { $alias = $lcnames{$alias}->{alias}; }
4612 $loc->{idx} = $lcnames{$alias}->{idx};
4615 # output lcids table
4617 my $lcid_data = "";
4618 foreach my $id (sort { $a <=> $b } keys %lcids)
4620 my $loc = $lcids{$id};
4621 $lcid_data .= pack "L<S<2", $id, $loc->{idx}, add_string($loc->{name});
4624 # output lcnames table
4626 my $lcname_data = "";
4627 foreach my $name (sort compare_locales keys %lcnames)
4629 my $loc = $lcnames{$name};
4630 $lcname_data .= pack "S<2L<", add_string($name), $loc->{idx}, $loc->{lcid} || 0x1000;
4633 # output locales array
4635 my $locale_data = "";
4636 my $default_lcid = 0x8001;
4637 foreach my $loc (@locales)
4639 next if defined $loc->{alias};
4640 my $sname = $loc->{name};
4641 my $language = $loc->{language};
4642 my $territory = $loc->{territory};
4643 my $script = $loc->{script};
4644 my $neutral = ($sname && $sname !~ /-$territory/);
4645 my $sparent = $loc->{sparent} || (($sname =~ /(.*)-[0-9A-Za-z]+/) ? $1 : $loc->{parent});
4646 my $unique_lcid = $loc->{lcid};
4647 unless (defined $unique_lcid) { $unique_lcid = $default_lcid++; }
4648 my $geo = $geotable{$territory};
4649 my $territory_match = "contains(concat(' ',normalize-space(\@territories),' '),' $territory ')";
4651 # languages and scripts
4653 my $ssortlocale = $loc->{sortlocale} || ($neutral ? "$sname-$territory" : $sname);
4654 my $idefaultlanguage = defined $lcnames{$ssortlocale} ? $lcnames{$ssortlocale}->{lcid} : undef;
4655 $idefaultlanguage = $lcnames{"en-US"}->{lcid} unless $ssortlocale;
4656 (my $siso639langname = $sname) =~ s/-.*$//;
4657 my $siso639langname2 = $iso639{$siso639langname} || $siso639langname;
4658 my $sopentypelang = sprintf "%-4s", locale_entry( $loc, "sopentypelang", uc $siso639langname2 );
4659 my $sabbrevlangname = defined $loc->{lcid} ? locale_entry( $loc, "sabbrevlangname", uc $siso639langname2 ) : "ZZZ";
4660 my $siso3166ctryname2 = $geo->{iso3} || $geo->{uncode};
4661 my $senglanguage = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" ) || "";
4662 my $sengcountry = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" ) || "";
4663 my $snativelangname = loc_query( $loc, "/ldml/localeDisplayNames/languages/language[\@type='$language' and not(\@alt)]" );
4664 my $snativectryname = loc_query( $loc, "/ldml/localeDisplayNames/territories/territory[\@type='$territory' and not(\@alt)]" );
4665 $sengcountry =~ s/South Korea/Korea/;
4666 $snativelangname ||= $senglanguage;
4667 $snativectryname ||= $sengcountry;
4668 if ($script)
4670 my $engscript = loc_query( $lcnames{en}, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4671 my $nativescript = loc_query( $loc, "/ldml/localeDisplayNames/scripts/script[\@type='$script' and not(\@alt)]" );
4672 $senglanguage .= " ($engscript)" if $engscript;
4673 $snativelangname .= " ($nativescript)" if $nativescript;
4675 my $sengdisplayname = $neutral ? $senglanguage : "$senglanguage ($sengcountry)";
4676 my $snativedisplayname = $neutral ? $snativelangname : "$snativelangname ($snativectryname)";
4677 $sengdisplayname =~ s/\) \(/, /;
4678 $snativedisplayname =~ s/\) \(/, /;
4679 my $sscripts = locale_entry( $loc, "sscripts", $script ) || xml_query( $suppl, "/supplementalData/languageData/language[\@type='$language' and not(\@alt)]/\@scripts" );
4680 $sscripts = (join ";", (sort split / /, ($sscripts || "Latn"))) . ";";
4681 my $ireadinglayout = locale_entry( $loc, "ireadinglayout", 0 );
4682 my $charlayout = loc_query( $loc, "/ldml/layout/orientation/characterOrder" );
4683 if ($charlayout eq "right-to-left")
4685 $ireadinglayout = 1;
4687 elsif ($charlayout eq "top-to-bottom")
4689 my $linelayout = loc_query( $loc, "/ldml/layout/orientation/lineOrder" );
4690 $ireadinglayout = $linelayout eq "right-to-left" ? 2 : 3;
4692 my $igeoid = $geo->{id} || 0;
4694 # numbers
4696 my $sdecimal = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/decimal" );
4697 my $slist = locale_entry( $loc, "slist", ";" );
4698 my $smondecimalsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyDecimal" ) || $sdecimal;
4699 my $sthousand = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/group" );
4700 $sthousand =~ s/\x{202f}/\x{00a0}/;
4701 my $smonthousandsep = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/currencyGroup" ) || $sthousand;
4702 my $spositivesign = "";
4703 my $snegativesign = "-";
4704 my $spercent = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/percentSign" );
4705 my $snan = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/nan" );
4706 my $sposinfinity = loc_query( $loc, "/ldml/numbers/symbols[\@numberSystem='latn']/infinity" );
4707 my $sneginfinity = $sposinfinity ? "-$sposinfinity" : "";
4708 my $sgrouping = format_to_grouping( loc_query( $loc, "/ldml/numbers/decimalFormats[\@numberSystem='latn']/decimalFormatLength[not(\@type)]/decimalFormat/pattern" ));
4709 my $percentformat = loc_query( $loc, "/ldml/numbers/percentFormats[\@numberSystem='latn']/percentFormatLength[not(\@type)]/percentFormat/pattern" );
4710 my $currencyformat = loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='accounting']/pattern" ) ||
4711 loc_query( $loc, "/ldml/numbers/currencyFormats[\@numberSystem='latn']/currencyFormatLength[not(\@type)]/currencyFormat[\@type='standard']/pattern" );
4712 my $smongrouping = format_to_grouping( $currencyformat );
4713 my ($icurrency, $inegcurr) = parse_currency_format( $sname, $currencyformat );
4714 my ($ipospercent, $inegpercent) = parse_percent_format( $percentformat );
4715 my $native_numbering = loc_query( $loc, "/ldml/numbers/otherNumberingSystems/native" );
4716 my @snativedigits = split //, xml_query( $numbers, "/supplementalData/numberingSystems/numberingSystem[\@id='$native_numbering']/\@digits" );
4717 my $digitsubstitution = !(ord($snativedigits[0]) >= 0x600 && ord($snativedigits[0]) <= 0x6ff);
4718 my $measure = defined xml_query( $suppl, "/supplementalData/measurementData/measurementSystem[\@type='US' and $territory_match]" );
4719 my $papersize = defined xml_query( $suppl, "/supplementalData/measurementData/paperSize[\@type='US-Letter' and $territory_match]" );
4721 # currencies
4723 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
4724 my $scurrency = $geo->{scurrency} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[\@alt='narrow']" );
4725 $scurrency ||= loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/symbol[not(\@alt)]" );
4726 $geo->{scurrency} = $scurrency if $scurrency;
4727 $scurrency ||= $sintlsymbol;
4728 my $sengcurrname = $loc->{sengcurrname} || loc_query( $lcnames{en}, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" );
4729 my $snativecurrname = $loc->{sengcurrname} || loc_query( $loc, "/ldml/numbers/currencies/currency[\@type='$sintlsymbol']/displayName[not(\@count)]" ) || $sengcurrname;
4730 my $icurrdigits = xml_query( $suppl, "/supplementalData/currencyData/fractions/info[\@iso4217='$sintlsymbol']/\@digits" );
4731 $icurrdigits = 2 unless defined $icurrdigits;
4733 # calendars
4735 my $firstday = xml_query( $suppl, "/supplementalData/weekData/firstDay[not(\@alt) and $territory_match]/\@day" );
4736 my $ifirstdayofweek = $firstday ? $days{$firstday} : 1;
4737 my $firstweekofyear = (xml_query( $suppl, "/supplementalData/weekData/minDays[$territory_match]/\@count" ) || 0) == 4 ? 2 : 0;
4738 my $serastring = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/eras/eraAbbr/era[\@type='1' and not(\@alt)]" );
4739 my (@sdayname, @sabbrevdayname, @sshortestdayname);
4740 foreach my $d (sort { $days{$a} <=> $days{$b} } keys %days)
4742 my $n = $days{$d};
4743 my %name;
4744 foreach my $type (qw(wide abbreviated short))
4746 $name{$type} = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/days/dayContext[\@type='format']/dayWidth[\@type='$type']/day[\@type='$d' and not(\@alt)]" );
4748 push @sdayname, $name{wide};
4749 push @sabbrevdayname, $name{abbreviated} || $name{wide};
4750 push @sshortestdayname, $name{short} || $name{abbreviated} || $name{wide};
4752 my (@smonthname, @sabbrevmonthname, @sgenitivemonth, @sabbrevgenitivemonth);
4753 foreach my $n (1..13)
4755 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='wide']/month[\@type='$n']" );
4756 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='stand-alone']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4757 my $genitive = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n']" );
4758 my $abbrevgen = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n']" );
4759 push @smonthname, $name || $genitive || "";
4760 push @sabbrevmonthname, $abbrev || $abbrevgen || $name || $genitive || "";
4761 push @sgenitivemonth, $genitive || "";
4762 push @sabbrevgenitivemonth, $abbrevgen || $genitive || "";
4764 @sgenitivemonth = () if join("|",@smonthname) eq join("|",@sgenitivemonth);
4765 @sabbrevgenitivemonth = () if join("|",@sabbrevmonthname) eq join("|",@sabbrevgenitivemonth);
4766 my %caltypes = ( "gregorian" => 1, "japanese" => 3, "chinese" => 4, "dangi" => 5, "islamic" => 6, "buddhist" => 7, "hebrew" => 8,
4767 "persian" => 22, "islamic-civil" => 23, "islamic-umalqura" => 23 );
4768 my $calpref = xml_query( $suppl, "/supplementalData/calendarPreferenceData/calendarPreference[$territory_match]/\@ordering" ) || "gregorian";
4769 my $icalendartype;
4770 my @scalnames;
4771 foreach my $c (split /\s+/, $calpref)
4773 next unless defined $caltypes{$c};
4774 $icalendartype .= chr($caltypes{$c});
4775 $scalnames[$caltypes{$c} - 1] = loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$c']" );
4778 # date/time formats
4780 my $s1159 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='am' and not(\@alt)]" );
4781 my $s2359 = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='abbreviated']/dayPeriod[\@type='pm' and not (\@alt)]" );
4782 my $sshortestam = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='am' and not(\@alt)]" );
4783 my $sshortestpm = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dayPeriods/dayPeriodContext[\@type='format']/dayPeriodWidth[\@type='narrow']/dayPeriod[\@type='pm' and not (\@alt)]" );
4784 my @stimeformat = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='medium']/timeFormat/pattern[not(\@alt)]" ));
4785 push @stimeformat, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4786 pop @stimeformat if $stimeformat[0] eq $stimeformat[1];
4787 @stimeformat = map convert_time_format($_), @stimeformat;
4788 my @sshorttime = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/timeFormats/timeFormatLength[\@type='short']/timeFormat/pattern[not(\@alt)]" ));
4789 push @sshorttime, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hm' and not(\@alt)]" );
4790 pop @sshorttime if $sshorttime[0] eq $sshorttime[1];
4791 @sshorttime = map convert_time_format($_), @sshorttime;
4792 my @sshortdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
4793 push @sshortdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
4794 @sshortdate = map convert_date_format($_), @sshortdate;
4795 my @slongdate = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" ));
4796 push @slongdate, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
4797 @slongdate = map convert_date_format($_), @slongdate;
4798 my @smonthday = (loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMd' and not(\@alt)]" ));
4799 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Md' and not(\@alt)]" );
4800 push @smonthday, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMd' and not(\@alt)]" );
4801 @smonthday = map convert_date_format($_), @smonthday;
4802 my @syearmonth = map convert_date_format($_), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMM' and not(\@alt)]" );
4803 my @sduration = map convert_time_format( lc $_ ), loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='Hms' and not(\@alt)]" );
4804 my $srelativelongdate = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMMEd' and not(\@alt)]" ) ||
4805 loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[\@id='MMMEd' and not(\@alt)]" );
4806 $srelativelongdate = convert_date_format( $srelativelongdate );
4808 if (defined $loc->{calendar})
4810 foreach my $cal (@{$loc->{calendar}})
4812 $cal->{sshortdate} = \@sshortdate;
4813 $cal->{syearmonth} = \@syearmonth;
4814 $cal->{slongdate} = \@slongdate;
4815 $cal->{serastring} = [ $serastring ];
4816 $cal->{sdayname} = \@sdayname;
4817 $cal->{sabbrevdayname} = \@sabbrevdayname;
4818 $cal->{smonthname} = \@smonthname;
4819 $cal->{sabbrevmonthname} = \@sabbrevmonthname;
4820 $cal->{scalname} = $scalnames[$cal->{id}];
4821 $cal->{smonthday} = \@smonthday;
4822 $cal->{sshortestdayname} = \@sshortestdayname;
4823 $cal->{sabbreverastring} = [ $serastring ];
4824 $cal->{sshortestdayname} = \@sshortestdayname;
4825 $cal->{srelativelongdate} = $srelativelongdate;
4829 # codepages
4831 my %ansicpmap = ( 437 => 1252, 720 => 1256, 737 => 1253, 775 => 1257, 850 => 1252,
4832 852 => 1250, 855 => 1251, 866 => 1251, 857 => 1254, 862 => 1255 );
4833 my %maccpmap = ( 437 => 10000, 720 => 10004, 737 => 10006, 775 => 10029, 850 => 10000,
4834 852 => 10029, 855 => 10007, 857 => 10081, 862 => 10005, 866 => 10007,
4835 874 => 10021, 932 => 10001, 936 => 10008, 949 => 10003, 950 => 10002,
4836 1258 => 10000 );
4837 my %ebcdiccpmap = ( 437 => 37, 720 => 20420, 737 => 20273, 866 => 20880, 932 => 20290 );
4838 my %codepagemasks = ( 874 => [ 0x01000000, 0x00000000, 0x00000000, 0, 0x00010000, 0x00000000, 0x00010000, 0x00000000 ],
4839 932 => [ 0x00000000, 0x28c70000, 0x00000010, 0, 0x00020000, 0x00000000, 0x00020000, 0x00000000 ],
4840 936 => [ 0x00000000, 0x28010000, 0x00000002, 0, 0x00040000, 0x00000000, 0x00040000, 0x00000000 ],
4841 949 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00080000, 0x00000000, 0x00080000, 0x00000000 ],
4842 950 => [ 0x00000000, 0x28c10000, 0x00000012, 0, 0x00100000, 0x00000000, 0x00100000, 0x00000000 ],
4843 1258 => [ 0x2000000f, 0x00000000, 0x00000000, 0, 0x00000100, 0x00008000, 0x00000100, 0x00008000 ],
4844 866 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x00020000, 0x00000004, 0x02020000 ],
4845 862 => [ 0x00000800, 0x40000000, 0x00000000, 0, 0x00000020, 0x00200000, 0x00000020, 0x00200000 ],
4846 857 => [ 0x0000001f, 0x00000000, 0x00000000, 0, 0x00000010, 0x01000000, 0x00000010, 0x01000000 ],
4847 855 => [ 0x00000200, 0x00000000, 0x00000000, 0, 0x00000004, 0x02000000, 0x00000004, 0x02000000 ],
4848 852 => [ 0x00000027, 0x00000000, 0x00000000, 0, 0x00000002, 0x04000000, 0x00000002, 0x04000000 ],
4849 775 => [ 0x00000007, 0x00000000, 0x00000000, 0, 0x00000080, 0x08000000, 0x00000080, 0x08000000 ],
4850 737 => [ 0x00000080, 0x00000000, 0x00000000, 0, 0x00000008, 0x10000000, 0x00000008, 0x10010000 ],
4851 720 => [ 0x00002000, 0x00000000, 0x00000000, 0, 0x00000040, 0x20000000, 0x00000040, 0x20080000 ],
4852 850 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x40000000, 0x0000019f, 0xdfd70000 ],
4853 437 => [ 0x00000003, 0x00000000, 0x00000000, 0, 0x00000001, 0x80000000, 0x0000019f, 0xdfd70000 ],
4854 65001 => [ 0x00000000, 0x00000000, 0x00000000, 0, 0x00000000, 0x00000000, 0x0000019f, 0xdfd70000 ] );
4855 my $oemcp = locale_entry( $loc, "oemcp", 65001 );
4856 my $maccp = locale_entry( $loc, "maccp", undef ) || $maccpmap{$oemcp} || 65001;
4857 my $ebcdiccp = locale_entry( $loc, "ebcdiccp", undef ) || $ebcdiccpmap{$oemcp} || 500;
4858 $ebcdiccp = 500 if (defined $loc->{oemcp} && $loc->{oemcp} == 65001) || (defined $loc->{maccp} && $loc->{maccp} == 65001);
4859 my $ansicp = $ansicpmap{$oemcp} || $oemcp;
4860 my @fontsig = (0) x 8;
4861 my $sig = locale_entry( $loc, "fontsig", [] );
4862 foreach my $i (0..7) { $fontsig[$i] |= $codepagemasks{$oemcp}->[$i]; }
4863 foreach my $i (0..$#{$sig}) { $fontsig[$i] |= $sig->[$i]; }
4864 $fontsig[3] |= 1 << 31;
4865 $fontsig[3] |= 1 << 27 if $ireadinglayout == 1;
4866 $fontsig[3] |= 1 << 28 if $ireadinglayout == 3;
4868 # special cases for invariant locale
4870 unless ($loc->{name})
4872 $siso639langname = "iv";
4873 $siso639langname2 = "ivl";
4874 $senglanguage = $snativelangname = "Invariant Language";
4875 $sengcountry = $snativectryname = "Invariant Country";
4876 $sengdisplayname = "Invariant Language (Invariant Country)";
4877 $snativedisplayname = "Invariant Language (Invariant Region)";
4878 $sengcurrname = $snativecurrname = "International Monetary Fund";
4879 $scurrency = "\x{00a4}";
4880 $ifirstdayofweek = 0;
4881 $igeoid = $geotable{"US"}->{id};
4882 @stimeformat = ("HH:mm:ss");
4883 @sshortdate = ("MM/dd/yyyy", "yyyy-MM-dd");
4884 @slongdate = ("dddd, dd MMMM yyyy");
4885 @syearmonth = ("yyyy MMMM");
4886 @smonthday = ("MMMM dd", "MMMM d", "M/d", "MMM d");
4887 @sshorttime = ("HH:mm", "hh:mm tt", "H:mm", "h:mm tt");
4888 $srelativelongdate = "dddd, MMMM dd";
4889 $sposinfinity = "Infinity";
4890 $sneginfinity = "-Infinity";
4891 $spositivesign = "+";
4892 $ipospercent = $inegpercent = 0;
4895 # output data
4897 $locale_data .= pack "L<2",
4898 add_string( $sname ), # name
4899 add_string( $sopentypelang ); # LOCALE_SOPENTYPELANGUAGETAG
4901 $locale_data .= pack "S<14",
4902 $loc->{lcid} || 0x1000, # LOCALE_ILANGUAGE
4903 $unique_lcid, # unique_lcid
4904 locale_entry( $loc, "idigits", 2 ), # LOCALE_IDIGITS
4905 locale_entry( $loc, "inegnumber", 1 ), # LOCALE_INEGNUMBER
4906 $icurrdigits, # LOCALE_ICURRDIGITS
4907 $icurrency, # LOCALE_ICURRENCY
4908 $inegcurr, # LOCALE_INEGCURR
4909 locale_entry( $loc, "ilzero", 1 ), # LOCALE_ILZERO
4910 !$neutral, # LOCALE_INEUTRAL
4911 $ifirstdayofweek, # LOCALE_IFIRSTDAYOFWEEK
4912 $firstweekofyear, # LOCALE_IFIRSTWEEKOFYEAR
4913 $geo->{dialcode} || 1 , # LOCALE_ICOUNTRY,
4914 $measure, # LOCALE_IMEASURE
4915 $digitsubstitution; # LOCALE_IDIGITSUBSTITUTION
4917 $locale_data .= pack "L<18",
4918 add_string( $sgrouping ), # LOCALE_SGROUPING
4919 add_string( $smongrouping ), # LOCALE_SMONGROUPING
4920 add_string( $slist ), # LOCALE_SLIST
4921 add_string( $sdecimal ), # LOCALE_SDECIMAL
4922 add_string( $sthousand ), # LOCALE_STHOUSAND
4923 add_string( $scurrency ), # LOCALE_SCURRENCY
4924 add_string( $smondecimalsep ), # LOCALE_SMONDECIMALSEP
4925 add_string( $smonthousandsep ), # LOCALE_SMONTHOUSANDSEP
4926 add_string( $spositivesign ), # LOCALE_SPOSITIVESIGN
4927 add_string( $snegativesign ), # LOCALE_SNEGATIVESIGN
4928 add_string( $s1159 ), # LOCALE_S1159
4929 add_string( $s2359 ), # LOCALE_S2359
4930 add_strarray( @snativedigits ), # LOCALE_SNATIVEDIGITS
4931 add_strarray( @stimeformat ), # LOCALE_STIMEFORMAT
4932 add_strarray( @sshortdate ), # LOCALE_SSHORTDATE
4933 add_strarray( @slongdate ), # LOCALE_SLONGDATE
4934 add_strarray( @syearmonth ), # LOCALE_SYEARMONTH
4935 add_strarray( @sduration ); # LOCALE_SDURATION
4937 $locale_data .= pack "S<8",
4938 $idefaultlanguage || 0x1000, # LOCALE_IDEFAULTLANGUAGE
4939 $ansicp, # LOCALE_IDEFAULTANSICODEPAGE
4940 $oemcp, # LOCALE_IDEFAULTCODEPAGE
4941 $maccp, # LOCALE_IDEFAULTMACCODEPAGE
4942 $ebcdiccp, # LOCALE_IDEFAULTEBCDICCODEPAGE
4943 $igeoid < 65536 ? $igeoid : 39070, # old_geoid
4944 $papersize ? 1 : 9, # LOCALE_IPAPERSIZE
4945 0; # FIXME # islamic_cal
4947 $locale_data .= pack "L<24",
4948 add_string( $icalendartype ), # LOCALE_ICALENDARTYPE
4949 add_string( $sabbrevlangname ), # LOCALE_SABBREVLANGNAME
4950 add_string( $siso639langname ), # LOCALE_SISO639LANGNAME
4951 add_string( $senglanguage ), # LOCALE_SENGLANGUAGE
4952 add_string( $snativelangname ), # LOCALE_SNATIVELANGNAME
4953 add_string( $sengcountry ), # LOCALE_SENGCOUNTRY
4954 add_string( $snativectryname ), # LOCALE_SNATIVECTRYNAME
4955 add_string( $siso3166ctryname2 ), # LOCALE_SABBREVCTRYNAME
4956 add_string( $territory ), # LOCALE_SISO3166CTRYNAME
4957 add_string( $sintlsymbol ), # LOCALE_SINTLSYMBOL
4958 add_string( $sengcurrname ), # LOCALE_SENGCURRNAME
4959 add_string( $snativecurrname ), # LOCALE_SNATIVECURRNAME
4960 add_fontsig( @fontsig ), # LOCALE_FONTSIGNATURE
4961 add_string( $siso639langname2 ), # LOCALE_SISO639LANGNAME2
4962 add_string( $siso3166ctryname2 ), # LOCALE_SISO3166CTRYNAME2
4963 add_string( $sparent ), # LOCALE_SPARENT
4964 add_strarray( @sdayname ), # LOCALE_SDAYNAME
4965 add_strarray( @sabbrevdayname ), # LOCALE_SABBREVDAYNAME
4966 add_strarray( @smonthname ), # LOCALE_SMONTHNAME
4967 add_strarray( @sabbrevmonthname ), # LOCALE_SABBREVMONTHNAME
4968 add_strarray( @sgenitivemonth ), # LOCALE_SGENITIVEMONTH
4969 add_strarray( @sabbrevgenitivemonth ), # LOCALE_SABBREVGENITIVEMONTH
4970 add_strarray( @scalnames ), # LOCALE_SCALNAMES
4971 add_strarray( @{$loc->{sortnames}} ); # LOCALE_SSORTNAMES
4973 $locale_data .= pack "S<6",
4974 $inegpercent, # LOCALE_INEGATIVEPERCENT
4975 $ipospercent, # LOCALE_IPOSITIVEPERCENT
4976 0, # unknown
4977 $ireadinglayout, # LOCALE_IREADINGLAYOUT
4978 0x2a, # unknown
4979 0x2a; # unknown
4981 $locale_data .= pack "L<24",
4982 0, # unknown
4983 add_string( $sengdisplayname ), # LOCALE_SENGLISHDISPLAYNAME
4984 add_string( $snativedisplayname ), # LOCALE_SNATIVEDISPLAYNAME
4985 add_string( $spercent ), # LOCALE_SPERCENT
4986 add_string( $snan ), # LOCALE_SNAN
4987 add_string( $sposinfinity ), # LOCALE_SPOSINFINITY
4988 add_string( $sneginfinity ), # LOCALE_SNEGINFINITY
4989 0, # unknown
4990 add_string( $serastring ), # CAL_SERASTRING
4991 add_string( $serastring ), # CAL_SABBREVERASTRING
4992 0, # unknown
4993 add_string( $ssortlocale ), # LOCALE_SCONSOLEFALLBACKNAME
4994 add_strarray( @sshorttime ), # LOCALE_SSHORTTIME
4995 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
4996 0, # unknown
4997 add_string( $ssortlocale ), # LOCALE_SSORTLOCALE
4998 add_string( "0409:00000409" ), # FIXME # LOCALE_SKEYBOARDSTOINSTALL
4999 add_string( $sscripts ), # LOCALE_SSCRIPTS
5000 add_string( $srelativelongdate ), # LOCALE_SRELATIVELONGDATE
5001 $igeoid, # LOCALE_IGEOID
5002 add_string( $sshortestam || "a" ), # LOCALE_SSHORTESTAM
5003 add_string( $sshortestpm || "p" ), # LOCALE_SSHORTESTPM
5004 add_strarray( @smonthday ), # LOCALE_SMONTHDAY
5005 add_string( "k0-windows-us" ) # FIXME # keyboard_layout
5008 # output language groups
5010 my %groups;
5011 add_registry_key( $nlskey, "Locale", "00000409" );
5012 foreach my $loc (@locales)
5014 next unless defined $loc->{lcid};
5015 next if ($loc->{lcid} & 0x80000000);
5016 next if !defined($loc->{alias}) && $loc->{name} !~ /-$loc->{territory}/; # skip neutral locales
5017 my $group = locale_entry( $loc, "group", 1 );
5018 my $name = sprintf( "%08x", $loc->{lcid} );
5019 my $val = sprintf( "%x", $group );
5020 add_registry_string_value( $nlskey, "Locale", $name, $val ) unless ($loc->{lcid} & 0x000f0000);
5021 add_registry_string_value( $nlskey, "Locale\\Alternate Sorts", $name, $val ) if $loc->{name} =~ /_/;
5022 $groups{$val} = 1;
5024 foreach my $group (keys %groups) { add_registry_string_value( $nlskey, "Language Groups", $group, "1" ); }
5026 # output calendar data
5028 my $calendar_data = "";
5029 foreach my $cal (@calendars)
5031 my $scalname = $cal->{name};
5032 my $iyearoffsetrange = 0;
5033 my $itwodigityearmax = $cal->{itwodigityearmax};
5034 my @sshortdate;
5035 my @syearmonth;
5036 my @slongdate;
5037 my @serastring;
5038 my @sdayname;
5039 my @sabbrevdayname;
5040 my @smonthname;
5041 my @sabbrevmonthname;
5042 my @smonthday;
5043 my @sabbreverastring;
5044 my @sshortestdayname;
5046 my $type = $cal->{type};
5047 if (defined $cal->{locale} && defined $type)
5049 my $loc = $lcnames{$cal->{locale}};
5050 my $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMd' and not(\@alt)]" );
5051 push @sshortdate, $fmt if $fmt;
5052 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMd' and not(\@alt)]" );
5053 push @sshortdate, $fmt if $fmt;
5054 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yMMMd' and not(\@alt)]" );
5055 push @sshortdate, $fmt if $fmt;
5056 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateTimeFormats/availableFormats/dateFormatItem[\@id='yyyyMMMd' and not(\@alt)]" );
5057 push @sshortdate, $fmt if $fmt;
5058 @sshortdate = map convert_date_format($_), @sshortdate;
5059 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='full']/dateFormat/pattern[not(\@alt)]" );
5060 push @slongdate, $fmt if $fmt;
5061 $fmt = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/dateFormats/dateFormatLength[\@type='long']/dateFormat/pattern[not(\@alt)]" );
5062 push @slongdate, $fmt if $fmt;
5063 @slongdate = map convert_date_format($_), @slongdate;
5065 foreach my $n (1..13)
5067 my $name = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='wide']/month[\@type='$n' and not(\@yeartype)]" );
5068 my $abbrev = loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/months/monthContext[\@type='format']/monthWidth[\@type='abbreviated']/month[\@type='$n' and not(\@yeartype)]" );
5069 push @smonthname, $name || "";
5070 push @sabbrevmonthname, $abbrev || $name || "";
5073 $scalname ||= loc_query( $loc, "/ldml/localeDisplayNames/types/type[\@key='calendar' and \@type='$type']" );
5074 if (defined $cal->{eras})
5076 my @eras;
5077 my $idx = 1;
5078 foreach my $era (@{$cal->{eras}})
5080 my $start = xml_query( $suppl, "/supplementalData/calendarData/calendar[\@type='$type']/eras/era[\@type='$era']/\@start" );
5081 next unless $start =~ /^(-?\d+)-(\d+)-(\d+)/;
5082 my ($year, $mon, $day, $zero, $first) = ($1, $2, $3, $1 - 1, 1);
5083 if ($zero < 0)
5085 $first -= $zero;
5086 $year = 1;
5087 $itwodigityearmax = 2049 - $zero;
5089 unshift @eras, pack( "S<8", 6, $idx++, $year, $mon, $day, $zero, $first, 0 );
5090 push @serastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraAbbr/era[\@type='$era']" );
5091 push @sabbreverastring, loc_query( $loc, "/ldml/dates/calendars/calendar[\@type='$type']/eras/eraNarrow/era[\@type='$era']" );
5093 $iyearoffsetrange = add_str_data( pack "S<L<*", scalar @eras, map { add_str_data($_); } @eras );
5097 @sshortdate = @{$cal->{sshortdate}} if defined $cal->{sshortdate} && !@sshortdate;
5098 @syearmonth = @{$cal->{syearmonth}} if defined $cal->{syearmonth};
5099 @slongdate = @{$cal->{slongdate}} if defined $cal->{slongdate} && !@slongdate;
5100 @serastring = @{$cal->{serastring}} if defined $cal->{serastring} && !@serastring;
5101 @sdayname = @{$cal->{sdayname}} if defined $cal->{sdayname};
5102 @sabbrevdayname = @{$cal->{sabbrevdayname}} if defined $cal->{sabbrevdayname};
5103 @smonthname = @{$cal->{smonthname}} if defined $cal->{smonthname} && !join("",@smonthname);
5104 @sabbrevmonthname = @{$cal->{sabbrevmonthname}} if defined $cal->{sabbrevmonthname} && !join("",@sabbrevmonthname);
5105 @smonthday = @{$cal->{smonthday}} if defined $cal->{smonthday};
5106 @sabbreverastring = @{$cal->{sabbreverastring}} if defined $cal->{sabbreverastring} && !@sabbreverastring;
5107 @sshortestdayname = @{$cal->{sshortestdayname}} if defined $cal->{sshortestdayname};
5108 my $srelativelongdate = $cal->{srelativelongdate};
5110 @serastring = ("A.D.") unless @serastring;
5111 @sabbreverastring = ("AD") unless @sabbreverastring;
5113 if ($cal->{id} != 1) # calendar 1 is a placeholder, information is fetched from locale instead
5115 @sshortdate = ("") unless @sshortdate;
5116 @syearmonth = ("") unless @syearmonth;
5117 @slongdate = ("") unless @slongdate;
5118 @sdayname = ("") x 7 unless @sdayname;
5119 @sabbrevdayname = ("") x 7 unless @sabbrevdayname;
5120 @sshortestdayname = ("") x 7 unless @sshortestdayname;
5121 @smonthname = ("") x 13 unless @smonthname;
5122 @sabbrevmonthname = ("") x 13 unless @sabbrevmonthname;
5123 @smonthday = ("") unless @smonthday;
5126 $calendar_data .= pack "S<2L<17",
5127 $cal->{id}, # CAL_ICALINTVALUE
5128 $itwodigityearmax || 99, # CAL_ITWODIGITYEARMAX
5129 add_strarray( @sshortdate ), # CAL_SSHORTDATE
5130 add_strarray( @syearmonth ), # CAL_SYEARMONTH
5131 add_strarray( @slongdate ), # CAL_SLONGDATE
5132 add_strarray( @serastring ), # CAL_SERASTRING
5133 $iyearoffsetrange, # CAL_IYEAROFFSETRANGE
5134 add_strarray( @sdayname ), # CAL_SDAYNAME
5135 add_strarray( @sabbrevdayname ), # CAL_SABBREVDAYNAME
5136 add_strarray( @smonthname ), # CAL_SMONTHNAME
5137 add_strarray( @sabbrevmonthname ), # CAL_SABBREVMONTHNAME
5138 add_string( $scalname ), # CAL_SCALNAME
5139 add_strarray( @smonthday ), # CAL_SMONTHDAY
5140 add_strarray( @sabbreverastring ), # CAL_SABBREVERASTRING
5141 add_strarray( @sshortestdayname ), # CAL_SSHORTESTDAYNAME
5142 add_string( $srelativelongdate ); # CAL_SRELATIVELONGDATE
5145 # output locale header
5147 my $nb_lcids = scalar keys %lcids;
5148 my $nb_locales = scalar grep { !defined $_->{alias} } @locales;
5149 my $nb_lcnames = scalar keys %lcnames;
5150 my $locale_size = length($locale_data) / $nb_locales;
5151 my $nb_calendars = scalar @calendars;
5152 my $calendar_size = length($calendar_data) / $nb_calendars;
5153 my $lcids_offset = 19 * 4; # size of header
5154 my $lcnames_offset = $lcids_offset + length $lcid_data;
5155 my $locales_offset = $lcnames_offset + length $lcname_data;
5156 my $calendar_offset = $locales_offset + length $locale_data;
5157 my $strings_offset = $calendar_offset + length $calendar_data;
5159 my $locale_header = pack "L<7S<4L<S<2L<3S<2L<4",
5160 8, # offset
5162 7, # version
5163 0x5344534e, # magic
5164 0, 0, 0,
5166 $nb_lcids,
5167 $nb_locales,
5168 $locale_size,
5169 $locales_offset,
5170 $nb_lcnames,
5172 $lcids_offset,
5173 $lcnames_offset,
5175 $nb_calendars,
5176 $calendar_size,
5177 $calendar_offset,
5178 $strings_offset,
5179 0, 0;
5181 return align_string( 4, $locale_header . $lcid_data . $lcname_data . $locale_data . $calendar_data . $string_data );
5185 ################################################################
5186 # build the charmaps table for locale.nls
5187 sub build_charmaps_data()
5189 my $data = "";
5191 # MAP_FOLDDIGITS
5192 my @digits = (ord('0') .. ord('9'));
5193 $digitmap_table[0x3007] = $digits[0]; # Ideographic Zero
5194 @digitmap_table[0x0c78..0x0c7b] = @digits[0..3]; # Telugu Fraction Digits
5195 @digitmap_table[0x0c7c..0x0c7e] = @digits[1..3]; # Telugu Fraction Digits
5196 @digitmap_table[0x3021..0x3029] = @digits[1..9]; # Hangzhou Numerals
5197 @digitmap_table[0xa8e0..0xa8e9] = @digits; # Combining Devanagari Digits
5198 @digitmap_table[0x10107..0x1010f] = @digits[1..9]; # Aegean Numbers
5199 $digitmap_table[0x10320] = $digits[1]; # Old Italic Numerals
5200 $digitmap_table[0x10321] = $digits[5]; # Old Italic Numerals
5201 $data .= dump_binary_case_table( @digitmap_table );
5203 # CJK compatibility map
5204 $data .= dump_binary_case_table( @cjk_compat_table );
5206 # LCMAP_HIRAGANA/KATAKANA
5207 my (@hiragana_table, @katakana_table);
5208 foreach my $ch (0x3041..0x3096, 0x309d..0x309e)
5210 $hiragana_table[$ch + 0x60] = $ch;
5211 $katakana_table[$ch] = $ch + 0x60;
5213 $data .= dump_binary_case_table( @hiragana_table ) . dump_binary_case_table( @katakana_table );
5215 # LCMAP_HALFWIDTH/FULLWIDTH
5216 $halfwidth_table[0x2018] = 0x0027;
5217 $halfwidth_table[0x2019] = 0x0027;
5218 $halfwidth_table[0x201c] = 0x0022;
5219 $halfwidth_table[0x201d] = 0x0022;
5220 $halfwidth_table[0x309b] = 0xff9e;
5221 $halfwidth_table[0x309c] = 0xff9f;
5222 $fullwidth_table[0x309b] = 0x3099;
5223 $fullwidth_table[0x309c] = 0x309a;
5224 $data .= dump_binary_case_table( @halfwidth_table ) . dump_binary_case_table( @fullwidth_table );
5226 # LCMAP_TRADITIONAL/SIMPLIFIED_CHINESE
5227 $data .= dump_binary_case_table( @chinese_traditional_table ) . dump_binary_case_table( @chinese_simplified_table );
5229 # FIXME: some more unknown tables here
5231 return $data;
5235 ################################################################
5236 # build the geoids table for locale.nls
5237 sub build_geoids_data()
5239 my $data = "";
5240 my %index;
5241 my $idx = 0;
5242 my @geo_header = (0x00650067, 0x0000006f, 0, 4 * 7, scalar @geoids, 0, 0);
5244 foreach my $geo (@geoids)
5246 my $id = $geo->{id};
5247 $geo = $geo->{alias} if defined $geo->{alias};
5248 my $lat = "0.000";
5249 my $long = "0.000";
5250 my $iso2 = $geo->{iso2} || "XX";
5251 my $iso3 = $geo->{iso3} || "XX";
5252 my $isregion = $geo->{region} || (defined $geo->{uncode} && !defined $geo->{iso2});
5253 my $sintlsymbol = $geo->{sintlsymbol} || "XDR";
5254 my $scurrency = $geo->{scurrency} || "\x{00a4}";
5256 $data .= pack( "L<", $id );
5257 $data .= pad_string( 24, encode( "UTF16LE", $lat ));
5258 $data .= pad_string( 24, encode( "UTF16LE", $long ));
5259 $data .= pack( "L<2", $isregion ? 14 : 16, $geo->{parentid} || 39070 );
5260 $data .= pad_string( 8, encode( "UTF16LE", $iso2 ));
5261 $data .= pad_string( 8, encode( "UTF16LE", $iso3 ));
5262 $data .= pack( "S<2", $geo->{uncode} || 0, $geo->{dialcode} || 0 );
5263 $data .= pad_string( 8, encode( "UTF16LE", $sintlsymbol ));
5264 $data .= pad_string( 16, encode( "UTF16LE", $scurrency ));
5265 $index{$geo->{name}} = $idx if $geo->{name};
5266 $idx++;
5268 $index{"XX"} = $index{"001"};
5270 $geo_header[5] = $geo_header[3] + length $data;
5271 $geo_header[6] = scalar keys %index;
5273 foreach my $name (sort keys %index)
5275 $data .= pad_string( 8, encode( "UTF16LE", $name ));
5276 $data .= pack "L<", $index{$name};
5279 $geo_header[2] = $geo_header[3] + length $data;
5280 return pack( "L<7", @geo_header ) . $data;
5284 ################################################################
5285 # build a binary locale table
5286 sub dump_locales($$)
5288 my ($filename, $chartypes) = @_;
5290 printf "Building $filename\n";
5292 my $locale_data = build_locale_data();
5293 my $charmaps_data = build_charmaps_data();
5294 my $geoids_data = build_geoids_data();
5295 my $scripts_data = ""; # FIXME
5297 my @header = ( 0 ) x 8;
5298 $header[0] = 4 * scalar @header; # chartypes offset
5299 $header[4] = $header[0] + length $chartypes; # locales offset
5300 $header[5] = $header[4] + length $locale_data; # charmaps offset
5301 $header[6] = $header[5] + length $charmaps_data; # geoids offset
5302 $header[7] = $header[6] + length $geoids_data; # scripts offset
5304 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5305 print OUTPUT pack "L<*", @header;
5306 print OUTPUT $chartypes, $locale_data, $charmaps_data, $geoids_data, $scripts_data;
5307 close OUTPUT;
5308 save_file($filename);
5312 ################################################################
5313 # return the day of week of the first of the month
5314 sub month_first_dow($$)
5316 my ($year, $month) = @_;
5317 my @time = gmtime( timegm_modern( 0, 0, 0, 1, $month - 1, $year ));
5318 return $time[6];
5322 ################################################################
5323 # compare system time values
5324 sub compare_systime($$)
5326 my ($a, $b) = @_;
5327 return $a->[0] <=> $b->[0] ||
5328 $a->[1] <=> $b->[1] ||
5329 $a->[2] <=> $b->[2] ||
5330 $a->[3] <=> $b->[3] ||
5331 $a->[4] <=> $b->[4] ||
5332 $a->[5] <=> $b->[5] ||
5333 $a->[6] <=> $b->[6];
5337 ################################################################
5338 # compare the zone transition date with the rule date
5339 sub compare_transition_date($$$$)
5341 my ($stdoff, $isdst, $zone, $rule) = @_;
5343 if (scalar @{$zone} <= 1)
5345 return (!defined($zone->[0]) || $zone->[0] > $rule->[0]) ? 1 : -1;
5348 my @date = parse_transition_date( $stdoff, $isdst, $zone->[0], $zone->[1], $zone->[2], $zone->[3] || 0 );
5349 return compare_systime( \@date, $rule );
5353 ################################################################
5354 # get the Windows zone names from the CLDR data
5355 sub load_windows_zones()
5357 my $current_name;
5358 my %names;
5359 my $base = "cldr-release-$CLDRVERSION";
5360 my $INPUT = open_data_file( "cldr", "$base/common/supplemental/windowsZones.xml" );
5361 while (<$INPUT>)
5363 if (/<!-- +(\(UTC.*) -->.*/)
5365 $current_name = $1;
5367 if (/<mapZone other="(.*)" territory="001" type="(.*)"\/>/)
5369 $names{$1} = [ $current_name, $2 ];
5372 close $INPUT;
5373 return %names;
5377 ################################################################
5378 # parse a transition date specification from the tzdata files
5379 sub parse_transition_date($$@)
5381 use integer;
5382 my ($stdoff, $isdst, $year, $in, $on, $at) = @_;
5384 $on = "1" unless defined $on;
5385 $at = "0" unless defined $at;
5387 my %months = ( Jan => 1, Feb => 2, Mar => 3, Apr => 4, May => 5, Jun => 6,
5388 Jul => 7, Aug => 8, Sep => 9, Oct => 10, Nov => 11, Dec => 12 );
5389 my %days = ( Sun => 0, Mon => 1, Tue => 2, Wed => 3, Thu => 4, Fri => 5, Sat => 6 );
5391 my $mon = $in ? $months{$in} : 1;
5392 my ($week, $dow, $flag, $time, $sec);
5393 my $first = month_first_dow( $year, $mon );
5395 if ($on =~ /^last(.*)$/)
5397 $week = 5;
5398 $dow = $days{$1};
5400 elsif ($on =~ /^(.*)>=(\d+)$/)
5402 $dow = $days{$1};
5403 my $diff = ($first + 6 - $dow) % 7;
5404 $week = $2 >= 25 ? 5 : ($2 + 6 + $diff) / 7;
5406 elsif ($on =~ /^(.*)<=(\d+)$/)
5408 $dow = $days{$1};
5409 my $diff = ($first + $2 + 6 - $dow) % 7;
5410 $week = ($2 + 6 - $diff) / 7;
5411 if (!$week)
5413 $week = 5;
5414 if (!--$mon) { $mon = 12; $year--; }
5417 elsif ($on =~ /^\d+$/)
5419 $dow = ($first + $on - 1) % 7;
5420 $week = $on >= 25 ? 5 : ($on + 6) / 7;
5422 else
5424 die "unsupported date specification $year $in $on $at";
5427 if ($at =~ /^(\d+):(\d+):(\d+)([uws]?)$/)
5429 $time = $1 * 60 + $2;
5430 $sec = $3;
5431 $flag = $4;
5433 elsif ($at =~ /^(\d+):(\d+)([uws]?)$/)
5435 $time = $1 * 60 + $2;
5436 $flag = $3;
5438 elsif ($at =~ /^(\d+)([uws]?)$/)
5440 $time = $1 * 60;
5441 $flag = $2;
5443 else
5445 die "unsupported time specification $year $in $on $at";
5448 $flag ||= "w";
5449 $time -= $stdoff if $flag eq "u";
5450 $time += 60 if !$isdst && $flag ne "w";
5452 if ($time < 0) # previous day
5454 $week-- if $week < 5 && $dow == month_first_dow( $year, $mon );
5455 $week-- if $week == 5 && $dow == month_first_dow( $year + ($mon == 12), $mon % 12 + 1 );
5456 if (!$week)
5458 $week = 5;
5459 if (!--$mon) { $mon = 12; $year--; }
5461 $dow = ($dow + 6) % 7;
5462 $time += 24 * 60;
5465 return ($year, $mon, $week, $dow, $time / 60, $time % 60, $sec || 0);
5469 ################################################################
5470 # parse a system time value as a SYSTEMTIME structure
5471 sub pack_systime(@)
5473 my ($year, $mon, $week, $dow, $hour, $min, $sec) = @_;
5474 return pack "S<8", 0, $mon, $dow, $week, $hour < 24 ? ($hour, $min, $sec, 0) : (23, 59, 59, 999);
5478 ################################################################
5479 # parse a timezone offset from the tzdata files
5480 sub parse_tz_offset($)
5482 my ($hour, $min) = split /:/, shift;
5483 $min ||= 0;
5484 return $hour < 0 ? -$hour * 60 + $min : -$hour * 60 - $min; # invert sign
5488 ################################################################
5489 # build the timezone data
5490 sub dump_timezones($@)
5492 my $filename = shift;
5493 my $FIRST_YEAR = 2000;
5494 my $LAST_YEAR = 2030;
5496 my %names = load_windows_zones();
5497 my %zones;
5498 my %rules;
5499 my %links;
5500 my %res_indices;
5502 printf "Building $filename\n";
5504 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5505 print OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
5506 print OUTPUT "#include \"winresrc.h\"\n\n";
5507 print OUTPUT "#pragma makedep po\n\n";
5508 print OUTPUT "LANGUAGE LANG_ENGLISH, SUBLANG_DEFAULT\n\n";
5509 print OUTPUT "STRINGTABLE\n{\n";
5511 # load tzdata files
5513 foreach my $filename (@_)
5515 my $FILE = open_data_file( "tzdata", $filename );
5516 my $zonename;
5517 while (<$FILE>)
5519 chomp;
5520 s/\#.*$//;
5521 next if /^\s*$/;
5522 my @fields = split /\s+/;
5523 if ($fields[0] eq "Zone" || ($zonename && $fields[0] eq ""))
5525 shift @fields;
5526 $zonename = shift @fields unless $zonename;
5527 my ($stdoff, $rules, $dummy, @date) = @fields;
5528 $zones{$zonename} ||= [ ];
5529 push @{$zones{$zonename}}, [ parse_tz_offset( $stdoff ), $rules, @date ];
5530 $zonename = undef unless @date; # last entry doesn't have an until date
5531 next;
5533 if ($fields[0] eq "Rule")
5535 shift @fields;
5536 my ($rulename, $from, $to, $dummy, $in, $on, $at, $save) = @fields;
5537 $to = $from if $to eq "only";
5538 $to = $LAST_YEAR if $to eq "max";
5539 push @{$rules{$rulename}}, [ parse_tz_offset( $save ), $from, $to, $in, $on, $at ];
5540 next;
5542 if ($fields[0] eq "Link")
5544 $links{$fields[2]} = $fields[1];
5545 next;
5547 die "unrecognized line $_";
5549 close $FILE;
5552 foreach my $name (sort { uc($a) cmp uc($b) } keys %names)
5554 my ($display, $zone) = @{$names{$name}};
5555 $zone = $links{$zone} if defined $links{$zone};
5557 # build list of transitions
5559 my @transitions;
5560 my @from_date = ( 1 );
5561 my $last_stdoff = 0;
5562 for (my $i = 0; $i < scalar @{$zones{$zone}}; $i++)
5564 my ($stdoff, $rule, @until_date) = @{$zones{$zone}->[$i]};
5565 my $isdst = ($last_stdoff != $stdoff);
5566 $from_date[0] ||= $LAST_YEAR;
5567 my @systime = parse_transition_date( $stdoff, $isdst, @from_date );
5568 push @transitions, [ $stdoff, -1, \@systime ];
5570 if (defined $rules{$rule})
5572 foreach my $r (@{$rules{$rule}})
5574 my ($offset, $from, $to, $in, $on, $at) = @{$r};
5575 foreach my $year ($from..$to)
5577 next if $year < $from_date[0];
5578 next if $until_date[0] && $year > $until_date[0];
5579 my @systime = parse_transition_date( $stdoff, !!$offset, $year, $in, $on, $at );
5580 next if compare_transition_date( $stdoff, $isdst, \@until_date, \@systime ) <= 0;
5581 my $ret = compare_transition_date( $stdoff, $isdst, \@from_date, \@systime );
5582 next if $ret > 0;
5583 pop @transitions if !$ret; # remove transition if there's a dst change at the same time
5584 push @transitions, [ $stdoff, $offset, \@systime ];
5588 @from_date = @until_date;
5589 $last_stdoff = $stdoff;
5591 @transitions = sort { compare_systime( $a->[2], $b->[2] ) } @transitions;
5593 # build per-year dynamic info
5595 my @info;
5596 my $last_dstoff = 0;
5597 my $last_dst = 0;
5598 my $year = $FIRST_YEAR;
5599 while ($year <= $LAST_YEAR)
5601 if (@transitions && $transitions[0]->[2]->[0] < $year)
5603 $last_stdoff = $transitions[0]->[0];
5604 shift @transitions;
5605 next;
5607 my ($std, $dst, @trans);
5608 my $cur_stdoff = $last_stdoff;
5609 my $cur_dstoff = ($name =~ /^UTC/) ? 0 : -60;
5610 while (@transitions && $transitions[0]->[2]->[0] == $year)
5612 my $t = shift @transitions;
5613 my ($stdoff, $dstoff, $systime) = @{$t};
5614 $systime = pack_systime( @{$systime} );
5615 if (!$dstoff) # std
5617 $cur_stdoff = $stdoff unless $std;
5618 $std = $systime;
5620 elsif ($dstoff != -1) # dst
5622 $cur_dstoff = $dstoff unless $dst;
5623 $dst ||= $systime;
5625 elsif ($stdoff != $last_stdoff) # rule transition
5627 # Handle a special case: Samoa moved to the other side of
5628 # the date line between 2011-12-03 and 2012-01-01,
5629 # entirely skipping the day 2011-12-31. We ignore this
5630 # change because it happens on a year boundary and more
5631 # importantly it would generate on offset of -25 hours,
5632 # which some programs (e.g., Mono) do not like. See
5633 # https://bugs.winehq.org/show_bug.cgi?id=51758
5635 if ($last_stdoff - $stdoff < 24 * 60)
5637 @trans = ($last_stdoff, $stdoff, $systime);
5638 $cur_stdoff = $stdoff;
5641 elsif ($dst) # rule transition with no stdoff change
5643 $std = $systime;
5645 $last_dstoff = ($dstoff == -1) ? 0 : $dstoff;
5647 $last_stdoff = $cur_stdoff;
5649 if ($cur_dstoff > 0) # swap std and dst to ensure that offset is negative
5651 ($std, $dst) = ($dst, $std);
5652 $cur_stdoff += $cur_dstoff;
5653 $cur_dstoff = -$cur_dstoff;
5656 if (@trans)
5658 # heuristic to prefer switching dst
5659 if ($last_dst == $year - 1 || (!$last_dst && $trans[0] > $trans[1]))
5661 $dst ||= $trans[2];
5662 $cur_stdoff = $trans[0];
5663 $cur_dstoff = $trans[1] - $trans[0];
5665 else
5667 $std ||= $trans[2];
5668 $cur_stdoff = $trans[1];
5669 $cur_dstoff = $trans[0] - $trans[1];
5673 if ($std || $dst)
5675 $std ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5676 $dst ||= pack_systime( parse_transition_date( 0, 0, $year, "Jan", 1 ));
5677 $last_dst = $year;
5679 else
5681 $std = pack "S<8", 0;
5682 $dst = pack "S<8", 0;
5683 $cur_stdoff += $last_dstoff;
5685 $info[$year++] = pack( "l<3", $cur_stdoff, 0, $cur_dstoff ) . $std . $dst;
5688 # output registry keys
5690 my $std_name = $name eq "UTC" ? "Coordinated Universal Time" : $name;
5691 my $dlt_name = $std_name =~ s/Standard Time/Daylight Time/r;
5692 my $res_idx = hex( substr( Digest::SHA::sha1_hex($name), -3, 3 )) << 4;
5693 $res_idx += 16 while exists $res_indices{$res_idx};
5694 $res_indices{$res_idx} = 1;
5696 add_registry_string_value( $zonekey, $name, "Display", $display );
5697 add_registry_string_value( $zonekey, $name, "Std", $std_name );
5698 add_registry_string_value( $zonekey, $name, "Dlt", $dlt_name );
5699 add_registry_string_value( $zonekey, $name, "MUI_Std", sprintf( "\@tzres.dll,-%u", $res_idx ));
5700 add_registry_string_value( $zonekey, $name, "MUI_Dlt", sprintf( "\@tzres.dll,-%u", $res_idx + 1 ));
5701 add_registry_string_value( $zonekey, $name, "MUI_Display", sprintf( "\@tzres.dll,-%u", $res_idx + 2 ));
5702 add_registry_binary_value( $zonekey, $name, "TZI", $info[$LAST_YEAR] );
5704 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx, $std_name;
5705 printf OUTPUT "%7d \"#msgctxt#maximum 31 characters#%s\"\n", $res_idx + 1, $dlt_name;
5706 printf OUTPUT "%7d \"%s\"\n", $res_idx + 2, $display;
5708 my $first_year = $FIRST_YEAR;
5709 my $last_year = $LAST_YEAR;
5710 $last_year-- while $last_year > $FIRST_YEAR && $info[$last_year] eq $info[$last_year - 1];
5711 $first_year++ while $first_year < $last_year && $info[$first_year] eq $info[$last_year];
5713 next if $last_year <= $first_year;
5715 foreach my $i ($first_year..$last_year)
5717 add_registry_binary_value( $zonekey, "$name\\Dynamic DST", $i, $info[$i] );
5719 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "FirstEntry", $first_year );
5720 add_registry_dword_value( $zonekey, "$name\\Dynamic DST", "LastEntry", $last_year );
5723 print OUTPUT "}\n";
5724 close OUTPUT;
5725 save_file($filename);
5729 ################################################################
5730 # build the script to create registry keys
5731 sub dump_registry_script($%)
5733 my ($filename, %keys) = @_;
5734 my $indent = 1;
5735 my @prev;
5737 printf "Building %s\n", $filename;
5738 open OUTPUT, ">$filename.new" or die "Cannot create $filename";
5739 print OUTPUT "HKLM\n{\n";
5740 foreach my $k (sort { ($a =~ tr/a-z\\/A-Z\001/r) cmp ($b =~ tr/a-z\\/A-Z\001/r) } keys %keys)
5742 my @subkeys = split /\\/, $k;
5743 while (@prev && @subkeys && $prev[0] eq $subkeys[0]) { shift @prev; shift @subkeys; }
5744 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5745 my ($def, @vals) = @{$keys{$k}};
5746 for (my $i = 0; $i < @subkeys; $i++)
5748 my $name = $subkeys[$i];
5749 my $prefix = "";
5750 if ($name =~ /^-/)
5752 $name =~ s/^-//;
5753 $prefix = "NoRemove ";
5755 if ($name =~ /\s/)
5757 $name = "'$name'";
5759 printf OUTPUT "%*s%s%s%s\n%*s{\n", 4 * $indent, "", $prefix, $name,
5760 $i == $#subkeys && $def ? " = s '$def'" : "", 4 * $indent, "";
5761 $indent++;
5763 foreach my $v (sort @vals) { printf OUTPUT "%*sval $v\n", 4 * $indent, ""; }
5764 @prev = split /\\/, $k;
5766 while (@prev) { printf OUTPUT "%*s}\n", 4 * --$indent, ""; shift @prev; }
5767 printf OUTPUT "}\n";
5768 close OUTPUT;
5769 save_file($filename);
5773 ################################################################
5774 # save a file if modified
5775 sub save_file($)
5777 my $file = shift;
5778 if (-f $file && !system "cmp $file $file.new >/dev/null")
5780 unlink "$file.new";
5782 else
5784 rename "$file.new", "$file";
5789 ################################################################
5790 # main routine
5792 chdir ".." if -f "./make_unicode";
5793 load_data();
5794 dump_bidi_dir_table( "dlls/gdi32/uniscribe/direction.c" );
5795 dump_bidi_dir_table( "dlls/dwrite/direction.c" );
5796 dump_mirroring( "dlls/gdi32/uniscribe/mirror.c" );
5797 dump_mirroring( "dlls/dwrite/mirror.c" );
5798 dump_bracket( "dlls/gdi32/uniscribe/bracket.c" );
5799 dump_bracket( "dlls/dwrite/bracket.c" );
5800 dump_shaping( "dlls/gdi32/uniscribe/shaping.c" );
5801 dump_arabic_shaping( "dlls/dwrite/shapers/arabic_table.c" );
5802 dump_linebreak( "dlls/gdi32/uniscribe/linebreak.c" );
5803 dump_linebreak( "dlls/dwrite/linebreak.c" );
5804 dump_scripts( "dlls/dwrite/scripts" );
5805 dump_indic( "dlls/gdi32/uniscribe/indicsyllable.c" );
5806 dump_vertical( "dlls/win32u/vertical.c", 1 );
5807 dump_vertical( "dlls/wineps.drv/vertical.c", 0 );
5808 dump_intl_nls("nls/l_intl.nls");
5809 dump_norm_table( "nls/normnfc.nls" );
5810 dump_norm_table( "nls/normnfd.nls" );
5811 dump_norm_table( "nls/normnfkc.nls" );
5812 dump_norm_table( "nls/normnfkd.nls" );
5813 dump_norm_table( "nls/normidna.nls" );
5814 my $chartypes = dump_sortkey_table( "nls/sortdefault.nls" );
5815 dump_locales( "nls/locale.nls", $chartypes );
5816 foreach my $file (@allfiles) { dump_msdata_codepage( $file ); }
5817 dump_eucjp_codepage();
5818 dump_timezones( "dlls/tzres/tzres.rc", @timezone_files );
5819 dump_registry_script( "dlls/kernelbase/kernelbase.rgs", %registry_keys );
5821 exit 0;
5823 # Local Variables:
5824 # compile-command: "./make_unicode"
5825 # End: