Use AC_LIBTOOL_WIN32_DLL.
[libidn.git] / lib / pr29.c
bloba069af2e5aeb830db03699c0d95dd6f71941b0d9
1 /* pr29.h --- Detect strings that are non-idempotent under NFKC in Unicode 3.2.
2 * Copyright (C) 2004, 2005 Simon Josefsson.
4 * This file is part of GNU Libidn.
6 * GNU Libidn is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * GNU Libidn is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with GNU Libidn; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
22 #include "pr29.h"
24 /* Get stringprep_utf8_to_ucs4. */
25 #include <stringprep.h>
28 * The tables used in this file was extracted by Simon Josefsson from
29 * pr-29.html and DerivedCombiningClass-3.2.0.txt, as published by
30 * Unicode Inc., for the GNU Libidn project.
34 /* These are the characters with non-zero combination class, extracted
35 from DerivedCombiningClass-3.2.0.txt. */
36 static uint32_t nzcc[] = {
37 /* 1 # Mn [5] COMBINING TILDE OVERLAY..
38 * ..COMBINING LONG SOLIDUS OVERLAY */
39 0x0334,
40 0x0335,
41 0x0336,
42 0x0337,
43 0x0338,
44 /* 1 # Mn [2] COMBINING LONG VERTICAL LINE OVERLAY..
45 * ..COMBINING SHORT VERTICAL LINE OVERLAY */
46 0x20D2,
47 0x20D3,
48 /* 1 # Mn [3] COMBINING RING OVERLAY..
49 * ..COMBINING ANTICLOCKWISE RING OVERLAY */
50 0x20D8,
51 0x20D9,
52 0x20DA,
53 /* 1 # Mn [2] COMBINING REVERSE SOLIDUS OVERLAY..
54 * ..COMBINING DOUBLE VERTICAL STROKE OVERLAY */
55 0x20E5,
56 0x20E6,
57 /* 1 # Mn COMBINING LEFTWARDS ARROW OVERLAY */
58 0x20EA,
59 /* 1 # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..
60 * ..MUSICAL SYMBOL COMBINING TREMOLO-3 */
61 0x1D167,
62 0x1D168,
63 0x1D169,
64 /* 7 # Mn DEVANAGARI SIGN NUKTA */
65 0x093C,
66 /* 7 # Mn BENGALI SIGN NUKTA */
67 0x09BC,
68 /* 7 # Mn GURMUKHI SIGN NUKTA */
69 0x0A3C,
70 /* 7 # Mn GUJARATI SIGN NUKTA */
71 0x0ABC,
72 /* 7 # Mn ORIYA SIGN NUKTA */
73 0x0B3C,
74 /* 7 # Mn MYANMAR SIGN DOT BELOW */
75 0x1037,
76 /* 8 # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..
77 * ..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK */
78 0x3099,
79 0x309A,
80 /* 9 # Mn DEVANAGARI SIGN VIRAMA */
81 0x094D,
82 /* 9 # Mn BENGALI SIGN VIRAMA */
83 0x09CD,
84 /* 9 # Mn GURMUKHI SIGN VIRAMA */
85 0x0A4D,
86 /* 9 # Mn GUJARATI SIGN VIRAMA */
87 0x0ACD,
88 /* 9 # Mn ORIYA SIGN VIRAMA */
89 0x0B4D,
90 /* 9 # Mn TAMIL SIGN VIRAMA */
91 0x0BCD,
92 /* 9 # Mn TELUGU SIGN VIRAMA */
93 0x0C4D,
94 /* 9 # Mn KANNADA SIGN VIRAMA */
95 0x0CCD,
96 /* 9 # Mn MALAYALAM SIGN VIRAMA */
97 0x0D4D,
98 /* 9 # Mn SINHALA SIGN AL-LAKUNA */
99 0x0DCA,
100 /* 9 # Mn THAI CHARACTER PHINTHU */
101 0x0E3A,
102 /* 9 # Mn TIBETAN MARK HALANTA */
103 0x0F84,
104 /* 9 # Mn MYANMAR SIGN VIRAMA */
105 0x1039,
106 /* 9 # Mn TAGALOG SIGN VIRAMA */
107 0x1714,
108 /* 9 # Mn HANUNOO SIGN PAMUDPOD */
109 0x1734,
110 /* 9 # Mn KHMER SIGN COENG */
111 0x17D2,
112 /* 10 # Mn HEBREW POINT SHEVA */
113 0x05B0,
114 /* 11 # Mn HEBREW POINT HATAF SEGOL */
115 0x05B1,
116 /* 12 # Mn HEBREW POINT HATAF PATAH */
117 0x05B2,
118 /* 13 # Mn HEBREW POINT HATAF QAMATS */
119 0x05B3,
120 /* 14 # Mn HEBREW POINT HIRIQ */
121 0x05B4,
122 /* 15 # Mn HEBREW POINT TSERE */
123 0x05B5,
124 /* 16 # Mn HEBREW POINT SEGOL */
125 0x05B6,
126 /* 17 # Mn HEBREW POINT PATAH */
127 0x05B7,
128 /* 18 # Mn HEBREW POINT QAMATS */
129 0x05B8,
130 /* 19 # Mn HEBREW POINT HOLAM */
131 0x05B9,
132 /* 20 # Mn HEBREW POINT QUBUTS */
133 0x05BB,
134 /* 21 # Mn HEBREW POINT DAGESH OR MAPIQ */
135 0x05BC,
136 /* 22 # Mn HEBREW POINT METEG */
137 0x05BD,
138 /* 23 # Mn HEBREW POINT RAFE */
139 0x05BF,
140 /* 24 # Mn HEBREW POINT SHIN DOT */
141 0x05C1,
142 /* 25 # Mn HEBREW POINT SIN DOT */
143 0x05C2,
144 /* 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA */
145 0xFB1E,
146 /* 27 # Mn ARABIC FATHATAN */
147 0x064B,
148 /* 28 # Mn ARABIC DAMMATAN */
149 0x064C,
150 /* 29 # Mn ARABIC KASRATAN */
151 0x064D,
152 /* 30 # Mn ARABIC FATHA */
153 0x064E,
154 /* 31 # Mn ARABIC DAMMA */
155 0x064F,
156 /* 32 # Mn ARABIC KASRA */
157 0x0650,
158 /* 33 # Mn ARABIC SHADDA */
159 0x0651,
160 /* 34 # Mn ARABIC SUKUN */
161 0x0652,
162 /* 35 # Mn ARABIC LETTER SUPERSCRIPT ALEF */
163 0x0670,
164 /* 36 # Mn SYRIAC LETTER SUPERSCRIPT ALAPH */
165 0x0711,
166 /* 84 # Mn TELUGU LENGTH MARK */
167 0x0C55,
168 /* 91 # Mn TELUGU AI LENGTH MARK */
169 0x0C56,
170 /* 103 # Mn [2] THAI CHARACTER SARA U..
171 * ..THAI CHARACTER SARA UU */
172 0x0E38,
173 0x0E39,
174 /* 107 # Mn [4] THAI CHARACTER MAI EK..
175 * ..THAI CHARACTER MAI CHATTAWA */
176 0x0E48,
177 0x0E49,
178 0x0E4A,
179 0x04EB,
180 /* 118 # Mn [2] LAO VOWEL SIGN U..
181 * ..LAO VOWEL SIGN UU */
182 0x0EB8,
183 0x0EB9,
184 /* 122 # Mn [4] LAO TONE MAI EK..
185 * ..LAO TONE MAI CATAWA */
186 0x0EC8,
187 0x0EC9,
188 0x0ECA,
189 0x0ECB,
190 /* 129 # Mn TIBETAN VOWEL SIGN AA */
191 0x0F71,
192 /* 130 # Mn TIBETAN VOWEL SIGN I */
193 0x0F72,
194 /* 130 # Mn [4] TIBETAN VOWEL SIGN E..
195 * ..TIBETAN VOWEL SIGN OO */
196 0x0F7A,
197 0x0F7B,
198 0x0F7C,
199 0x0F7D,
200 /* 130 # Mn TIBETAN VOWEL SIGN REVERSED I */
201 0x0F80,
202 /* 132 # Mn TIBETAN VOWEL SIGN U */
203 0x0F74,
204 /* 202 # Mn [2] COMBINING PALATALIZED HOOK BELOW..
205 * ..COMBINING RETROFLEX HOOK BELOW */
206 0x0321,
207 0x0322,
208 /* 202 # Mn [2] COMBINING CEDILLA..
209 * ..COMBINING OGONEK */
210 0x0327,
211 0x0328,
212 /* 216 # Mn COMBINING HORN */
213 0x031B,
214 /* 216 # Mn TIBETAN MARK TSA -PHRU */
215 0x0F39,
216 /* 216 # Mc [2] MUSICAL SYMBOL COMBINING STEM..
217 * ..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM */
218 0x1D165,
219 0x1D166,
220 /* 216 # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..
221 * ..MUSICAL SYMBOL COMBINING FLAG-5 */
222 0x1D16E,
223 0x1D16F,
224 0x1D170,
225 0x1D171,
226 0x1D172,
227 /* 218 # Mn IDEOGRAPHIC LEVEL TONE MARK */
228 0x302A,
229 /* 220 # Mn [4] COMBINING GRAVE ACCENT BELOW..
230 * ..COMBINING RIGHT TACK BELOW */
231 0x0316,
232 0x0317,
233 0x0318,
234 0x0319,
235 /* 220 # Mn [5] COMBINING LEFT HALF RING BELOW..
236 * ..COMBINING MINUS SIGN BELOW */
237 0x031C,
238 0x031D,
239 0x031E,
240 0x031F,
241 0x0320,
242 /* 220 # Mn [4] COMBINING DOT BELOW..
243 * ..COMBINING COMMA BELOW */
244 0x0323,
245 0x0324,
246 0x0325,
247 0x0326,
248 /* 220 # Mn [11] COMBINING VERTICAL LINE BELOW..
249 * ..COMBINING DOUBLE LOW LINE */
250 0x0329,
251 0x032A,
252 0x032B,
253 0x032C,
254 0x032D,
255 0x032E,
256 0x032F,
257 0x0330,
258 0x0331,
259 0x0332,
260 0x0333,
261 /* 220 # Mn [4] COMBINING RIGHT HALF RING BELOW..
262 * ..COMBINING SEAGULL BELOW */
263 0x0339,
264 0x033A,
265 0x033B,
266 0x033C,
267 /* 220 # Mn [3] COMBINING EQUALS SIGN BELOW..
268 * ..COMBINING LEFT ANGLE BELOW */
269 0x0347,
270 0x0348,
271 0x0349,
272 /* 220 # Mn [2] COMBINING LEFT RIGHT ARROW BELOW..
273 * ..COMBINING UPWARDS ARROW BELOW */
274 0x034D,
275 0x034E,
276 /* 220 # Mn HEBREW ACCENT ETNAHTA */
277 0x0591,
278 /* 220 # Mn HEBREW ACCENT TIPEHA */
279 0x0596,
280 /* 220 # Mn HEBREW ACCENT TEVIR */
281 0x059B,
282 /* 220 # Mn [5] HEBREW ACCENT MUNAH..
283 * ..HEBREW ACCENT DARGA */
284 0x05A3,
285 0x05A4,
286 0x05A5,
287 0x05A6,
288 0x05A7,
289 /* 220 # Mn HEBREW ACCENT YERAH BEN YOMO */
290 0x05AA,
291 /* 220 # Mn ARABIC HAMZA BELOW */
292 0x0655,
293 /* 220 # Mn ARABIC SMALL LOW SEEN */
294 0x06E3,
295 /* 220 # Mn ARABIC EMPTY CENTRE LOW STOP */
296 0x06EA,
297 /* 220 # Mn ARABIC SMALL LOW MEEM */
298 0x06ED,
299 /* 220 # Mn SYRIAC PTHAHA BELOW */
300 0x0731,
301 /* 220 # Mn SYRIAC ZQAPHA BELOW */
302 0x0734,
303 /* 220 # Mn [3] SYRIAC RBASA BELOW..
304 * ..SYRIAC DOTTED ZLAMA ANGULAR */
305 0x0737,
306 0x0738,
307 0x0739,
308 /* 220 # Mn [2] SYRIAC HBASA BELOW..
309 * ..SYRIAC HBASA-ESASA DOTTED */
310 0x073B,
311 0x073C,
312 /* 220 # Mn SYRIAC ESASA BELOW */
313 0x073E,
314 /* 220 # Mn SYRIAC RUKKAKHA */
315 0x0742,
316 /* 220 # Mn SYRIAC TWO VERTICAL DOTS BELOW */
317 0x0744,
318 /* 220 # Mn SYRIAC THREE DOTS BELOW */
319 0x0746,
320 /* 220 # Mn SYRIAC OBLIQUE LINE BELOW */
321 0x0748,
322 /* 220 # Mn DEVANAGARI STRESS SIGN ANUDATTA */
323 0x0952,
324 /* 220 # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..
325 * ..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS */
326 0x0F18,
327 0x0F19,
328 /* 220 # Mn TIBETAN MARK NGAS BZUNG NYI ZLA */
329 0x0F35,
330 /* 220 # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS */
331 0x0F37,
332 /* 220 # Mn TIBETAN SYMBOL PADMA GDAN */
333 0x0FC6,
334 /* 220 # Mn COMBINING TRIPLE UNDERDOT */
335 0x20E8,
336 /* 220 # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..
337 * ..MUSICAL SYMBOL COMBINING LOURE */
338 0x1D17B,
339 0x1D17C,
340 0x1D17D,
341 0x1D17E,
342 0x1D17F,
343 0x1D180,
344 0x1D181,
345 0x1D182,
346 /* 220 # Mn [2] MUSICAL SYMBOL COMBINING DOUBLE TONGUE..
347 * ..MUSICAL SYMBOL COMBINING TRIPLE TONGUE */
348 0x1D18A,
349 0x1D18B,
350 /* 222 # Mn HEBREW ACCENT YETIV */
351 0x059A,
352 /* 222 # Mn HEBREW ACCENT DEHI */
353 0x05AD,
354 /* 222 # Mn IDEOGRAPHIC ENTERING TONE MARK */
355 0x302D,
356 /* 224 # Mn [2] HANGUL SINGLE DOT TONE MARK..
357 * ..HANGUL DOUBLE DOT TONE MARK */
358 0x302E,
359 0x302F,
360 /* 226 # Mc MUSICAL SYMBOL COMBINING AUGMENTATION DOT */
361 0x1D16D,
362 /* 228 # Mn HEBREW ACCENT ZINOR */
363 0x05AE,
364 /* 228 # Mn MONGOLIAN LETTER ALI GALI DAGALGA */
365 0x18A9,
366 /* 228 # Mn IDEOGRAPHIC RISING TONE MARK */
367 0x302B,
368 /* 230 # Mn [21] COMBINING GRAVE ACCENT..
369 * ..COMBINING REVERSED COMMA ABOVE */
370 0x0300,
371 0x0301,
372 0x0302,
373 0x0303,
374 0x0304,
375 0x0305,
376 0x0306,
377 0x0307,
378 0x0308,
379 0x0309,
380 0x030A,
381 0x030B,
382 0x030C,
383 0x030D,
384 0x030E,
385 0x030F,
386 0x0310,
387 0x0311,
388 0x0312,
389 0x0313,
390 0x0314,
391 /* 230 # Mn [8] COMBINING X ABOVE..
392 * ..COMBINING GREEK DIALYTIKA TONOS */
393 0x033D,
394 0x033E,
395 0x033F,
396 0x0340,
397 0x0341,
398 0x0342,
399 0x0343,
400 0x0344,
401 /* 230 # Mn COMBINING BRIDGE ABOVE */
402 0x0346,
403 /* 230 # Mn [3] COMBINING NOT TILDE ABOVE..
404 * ..COMBINING ALMOST EQUAL TO ABOVE */
405 0x034A,
406 0x034B,
407 0x034C,
408 /* 230 # Mn [13] COMBINING LATIN SMALL LETTER A..
409 * ..COMBINING LATIN SMALL LETTER X */
410 0x0363,
411 0x0364,
412 0x0365,
413 0x0366,
414 0x0367,
415 0x0368,
416 0x0369,
417 0x036A,
418 0x036B,
419 0x036C,
420 0x036D,
421 0x036E,
422 0x036F,
423 /* 230 # Mn [4] COMBINING CYRILLIC TITLO..
424 * ..COMBINING CYRILLIC PSILI PNEUMATA */
425 0x0483,
426 0x0484,
427 0x0485,
428 0x0486,
429 /* 230 # Mn [4] HEBREW ACCENT SEGOL..
430 * ..HEBREW ACCENT ZAQEF GADOL */
431 0x0592,
432 0x0593,
433 0x0594,
434 0x0595,
435 /* 230 # Mn [3] HEBREW ACCENT REVIA..
436 * ..HEBREW ACCENT PASHTA */
437 0x0597,
438 0x0598,
439 0x0599,
440 /* 230 # Mn [6] HEBREW ACCENT GERESH..
441 * ..HEBREW ACCENT PAZER */
442 0x059C,
443 0x059D,
444 0x059E,
445 0x059F,
446 0x05A0,
447 0x05A1,
448 /* 230 # Mn [2] HEBREW ACCENT QADMA..
449 * ..HEBREW ACCENT TELISHA QETANA */
450 0x05A8,
451 0x05A9,
452 /* 230 # Mn [2] HEBREW ACCENT OLE..
453 * ..HEBREW ACCENT ILUY */
454 0x05AB,
455 0x05AC,
456 /* 230 # Mn HEBREW MARK MASORA CIRCLE */
457 0x05AF,
458 /* 230 # Mn HEBREW MARK UPPER DOT */
459 0x05C4,
460 /* 230 # Mn [2] ARABIC MADDAH ABOVE..
461 * ..ARABIC HAMZA ABOVE */
462 0x0653,
463 0x0654,
464 /* 230 # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..
465 * ..ARABIC SMALL HIGH SEEN */
466 0x06D6,
467 0x06D7,
468 0x06D8,
469 0x06D9,
470 0x06DA,
471 0x06DB,
472 0x06DC,
473 /* 230 # Mn [4] ARABIC SMALL HIGH ROUNDED ZERO..
474 * ..ARABIC SMALL HIGH MEEM ISOLATED FORM */
475 0x06DF,
476 0x06E0,
477 0x06E1,
478 0x06E2,
479 /* 230 # Mn ARABIC SMALL HIGH MADDA */
480 0x06E4,
481 /* 230 # Mn [2] ARABIC SMALL HIGH YEH..
482 * ..ARABIC SMALL HIGH NOON */
483 0x06E7,
484 0x06E8,
485 /* 230 # Mn [2] ARABIC EMPTY CENTRE HIGH STOP..
486 * ..ARABIC ROUNDED HIGH STOP WITH FILLED CENTRE */
487 0x06EB,
488 0x06EC,
489 /* 230 # Mn SYRIAC PTHAHA ABOVE */
490 0x0730,
491 /* 230 # Mn [2] SYRIAC PTHAHA DOTTED..
492 * ..SYRIAC ZQAPHA ABOVE */
493 0x0732,
494 0x0733,
495 /* 230 # Mn [2] SYRIAC ZQAPHA DOTTED..
496 * ..SYRIAC RBASA ABOVE */
497 0x0735,
498 0x0736,
499 /* 230 # Mn SYRIAC HBASA ABOVE */
500 0x073A,
501 /* 230 # Mn SYRIAC ESASA ABOVE */
502 0x073D,
503 /* 230 # Mn [3] SYRIAC RWAHA..
504 * ..SYRIAC QUSHSHAYA */
505 0x073F,
506 0x0740,
507 0x0741,
508 /* 230 # Mn SYRIAC TWO VERTICAL DOTS ABOVE */
509 0x0743,
510 /* 230 # Mn SYRIAC THREE DOTS ABOVE */
511 0x0745,
512 /* 230 # Mn SYRIAC OBLIQUE LINE ABOVE */
513 0x0747,
514 /* 230 # Mn [2] SYRIAC MUSIC..
515 * ..SYRIAC BARREKH */
516 0x0749,
517 0x074A,
518 /* 230 # Mn DEVANAGARI STRESS SIGN UDATTA */
519 0x0951,
520 /* 230 # Mn [2] DEVANAGARI GRAVE ACCENT..
521 * ..DEVANAGARI ACUTE ACCENT */
522 0x0953,
523 0x0954,
524 /* 230 # Mn [2] TIBETAN SIGN NYI ZLA NAA DA..
525 * ..TIBETAN SIGN SNA LDAN */
526 0x0F82,
527 0x0F83,
528 /* 230 # Mn [2] TIBETAN SIGN LCI RTAGS..
529 * ..TIBETAN SIGN YANG RTAGS */
530 0x0F86,
531 0x0F87,
532 /* 230 # Mn [2] COMBINING LEFT HARPOON ABOVE..
533 * ..COMBINING RIGHT HARPOON ABOVE */
534 0x20D0,
535 0x20D1,
536 /* 230 # Mn [4] COMBINING ANTICLOCKWISE ARROW ABOVE..
537 * ..COMBINING RIGHT ARROW ABOVE */
538 0x20D4,
539 0x20D5,
540 0x20D6,
541 0x20D7,
542 /* 230 # Mn [2] COMBINING THREE DOTS ABOVE..
543 * ..COMBINING FOUR DOTS ABOVE */
544 0x20DB,
545 0x20DC,
546 /* 230 # Mn COMBINING LEFT RIGHT ARROW ABOVE */
547 0x20E1,
548 /* 230 # Mn COMBINING ANNUITY SYMBOL */
549 0x20E7,
550 /* 230 # Mn COMBINING WIDE BRIDGE ABOVE */
551 0x20E9,
552 /* 230 # Mn [4] COMBINING LIGATURE LEFT HALF..
553 * ..COMBINING DOUBLE TILDE RIGHT HALF */
554 0xFE20,
555 0xFE21,
556 0xFE22,
557 0xFE23,
558 /* 230 # Mn [5] MUSICAL SYMBOL COMBINING DOIT..
559 * ..MUSICAL SYMBOL COMBINING BEND */
560 0x1D185,
561 0x1D186,
562 0x1D187,
563 0x1D188,
564 0x1D189,
565 /* 230 # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..
566 * ..MUSICAL SYMBOL COMBINING SNAP PIZZICATO */
567 0x1D1AA,
568 0x1D1AB,
569 0x1D1AC,
570 0x1D1AD,
571 /* 232 # Mn COMBINING COMMA ABOVE RIGHT */
572 0x0315,
573 /* 232 # Mn COMBINING LEFT ANGLE ABOVE */
574 0x031A,
575 /* 232 # Mn IDEOGRAPHIC DEPARTING TONE MARK */
576 0x302C,
577 /* 233 # Mn COMBINING DOUBLE RIGHTWARDS ARROW BELOW */
578 0x0362,
579 /* 234 # Mn [2] COMBINING DOUBLE TILDE..
580 * ..COMBINING DOUBLE INVERTED BREVE */
581 0x0360,
582 0x0361,
583 /* 240 # Mn COMBINING GREEK YPOGEGRAMMENI */
584 0x0345,
589 * 09C7 BENGALI VOWEL SIGN E 09BE BENGALI VOWEL SIGN AA or
590 * 09D7 BENGALI AU LENGTH MARK
593 static const uint32_t pr29_1_1[] = {
594 0x09C7, 0
597 static const uint32_t pr29_1_2[] = {
598 0x09BE, 0x09D7, 0
602 * 0B47 ORIYA VOWEL SIGN E 0B3E ORIYA VOWEL SIGN AA or
603 * 0B56 ORIYA AI LENGTH MARK or
604 * 0B57 ORIYA AU LENGTH MARK
607 static const uint32_t pr29_2_1[] = {
608 0x0B47, 0
611 static const uint32_t pr29_2_2[] = {
612 0x0B3E, 0x0B56, 0x0B57, 0
616 * 0BC6 TAMIL VOWEL SIGN E 0BBE TAMIL VOWEL SIGN AA or
617 * 0BD7 TAMIL AU LENGTH MARK
620 static const uint32_t pr29_3_1[] = {
621 0x0BC6, 0
624 static const uint32_t pr29_3_2[] = {
625 0x0BBE, 0x0BD7, 0
629 * 0BC7 TAMIL VOWEL SIGN EE 0BBE TAMIL VOWEL SIGN AA
632 static const uint32_t pr29_4_1[] = {
633 0x0BC7, 0
636 static const uint32_t pr29_4_2[] = {
637 0x0BBE, 0
641 * 0B92 TAMIL LETTER O 0BD7 TAMIL AU LENGTH MARK
644 static const uint32_t pr29_5_1[] = {
645 0x0B92, 0
648 static const uint32_t pr29_5_2[] = {
649 0x0BD7, 0
653 * 0CC6 KANNADA VOWEL SIGN E 0CC2 KANNADA VOWEL SIGN UU or
654 * 0CD5 KANNADA LENGTH MARK or
655 * 0CD6 KANNADA AI LENGTH MARK
658 static const uint32_t pr29_6_1[] = {
659 0x0CC6, 0
662 static const uint32_t pr29_6_2[] = {
663 0x0CC2, 0xCD5, 0xCD6, 0
667 * 0CBF KANNADA VOWEL SIGN I or
668 * 0CCA KANNADA VOWEL SIGN O 0CD5 KANNADA LENGTH MARK
671 static const uint32_t pr29_7_1[] = {
672 0x0CBF, 0xCCA, 0
675 static const uint32_t pr29_7_2[] = {
676 0x0CD5, 0
680 * 0D47 MALAYALAM VOWEL SIGN EE 0D3E MALAYALAM VOWEL SIGN AA
683 static const uint32_t pr29_8_1[] = {
684 0x0D47, 0
687 static const uint32_t pr29_8_2[] = {
688 0x0D3E, 0
692 * 0D46 MALAYALAM VOWEL SIGN E 0D3E MALAYALAM VOWEL SIGN AA or
693 * 0D57 MALAYALAM AU LENGTH MARK
696 static const uint32_t pr29_9_1[] = {
697 0x0D46, 0
700 static const uint32_t pr29_9_2[] = {
701 0x0D3E, 0x0D57, 0
705 * 1025 MYANMAR LETTER U 102E MYANMAR VOWEL SIGN II
708 static const uint32_t pr29_10_1[] = {
709 0x1025, 0
712 static const uint32_t pr29_10_2[] = {
713 0x102E, 0
717 * 0DD9 SINHALA VOWEL SIGN KOMBUVA 0DCF SINHALA VOWEL SIGN AELA-PILLA or
718 * 0DDF SINHALA VOWEL SIGN GAYANUKITTA
721 static const uint32_t pr29_11_1[] = {
722 0x0DD9, 0
725 static const uint32_t pr29_11_2[] = {
726 0x0DCF, 0x0DDF, 0
730 * 1100..1112 HANGUL CHOSEONG KIYEOK..HIEUH [19 instances]
731 * 1161..1175 HANGUL JUNGSEONG A..I [21 instances]
734 static const uint32_t pr29_12_1[] = {
735 0x1100, 0x1101, 0x1102, 0x1103, 0x1104, 0x1105, 0x1106, 0x1107,
736 0x1108, 0x1109, 0x110A, 0x110B, 0x110C, 0x110D, 0x110E, 0x110F,
737 0x1110, 0x1111, 0x1112, 0
740 static const uint32_t pr29_12_2[] = {
741 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168,
742 0x1169, 0x116A, 0x116B, 0x116C, 0x116D, 0x116E, 0x116F, 0x1170,
743 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0
748 * [:HangulSyllableType=LV:]
749 * 11A8..11C2 HANGUL JONGSEONG KIYEOK..HIEUH [27 instances]
752 static const uint32_t pr29_13_1[] = {
753 0xAC00, /* LV # Lo HANGUL SYLLABLE GA */
754 0xAC1C, /* LV # Lo HANGUL SYLLABLE GAE */
755 0xAC38, /* LV # Lo HANGUL SYLLABLE GYA */
756 0xAC54, /* LV # Lo HANGUL SYLLABLE GYAE */
757 0xAC70, /* LV # Lo HANGUL SYLLABLE GEO */
758 0xAC8C, /* LV # Lo HANGUL SYLLABLE GE */
759 0xACA8, /* LV # Lo HANGUL SYLLABLE GYEO */
760 0xACC4, /* LV # Lo HANGUL SYLLABLE GYE */
761 0xACE0, /* LV # Lo HANGUL SYLLABLE GO */
762 0xACFC, /* LV # Lo HANGUL SYLLABLE GWA */
763 0xAD18, /* LV # Lo HANGUL SYLLABLE GWAE */
764 0xAD34, /* LV # Lo HANGUL SYLLABLE GOE */
765 0xAD50, /* LV # Lo HANGUL SYLLABLE GYO */
766 0xAD6C, /* LV # Lo HANGUL SYLLABLE GU */
767 0xAD88, /* LV # Lo HANGUL SYLLABLE GWEO */
768 0xADA4, /* LV # Lo HANGUL SYLLABLE GWE */
769 0xADC0, /* LV # Lo HANGUL SYLLABLE GWI */
770 0xADDC, /* LV # Lo HANGUL SYLLABLE GYU */
771 0xADF8, /* LV # Lo HANGUL SYLLABLE GEU */
772 0xAE14, /* LV # Lo HANGUL SYLLABLE GYI */
773 0xAE30, /* LV # Lo HANGUL SYLLABLE GI */
774 0xAE4C, /* LV # Lo HANGUL SYLLABLE GGA */
775 0xAE68, /* LV # Lo HANGUL SYLLABLE GGAE */
776 0xAE84, /* LV # Lo HANGUL SYLLABLE GGYA */
777 0xAEA0, /* LV # Lo HANGUL SYLLABLE GGYAE */
778 0xAEBC, /* LV # Lo HANGUL SYLLABLE GGEO */
779 0xAED8, /* LV # Lo HANGUL SYLLABLE GGE */
780 0xAEF4, /* LV # Lo HANGUL SYLLABLE GGYEO */
781 0xAF10, /* LV # Lo HANGUL SYLLABLE GGYE */
782 0xAF2C, /* LV # Lo HANGUL SYLLABLE GGO */
783 0xAF48, /* LV # Lo HANGUL SYLLABLE GGWA */
784 0xAF64, /* LV # Lo HANGUL SYLLABLE GGWAE */
785 0xAF80, /* LV # Lo HANGUL SYLLABLE GGOE */
786 0xAF9C, /* LV # Lo HANGUL SYLLABLE GGYO */
787 0xAFB8, /* LV # Lo HANGUL SYLLABLE GGU */
788 0xAFD4, /* LV # Lo HANGUL SYLLABLE GGWEO */
789 0xAFF0, /* LV # Lo HANGUL SYLLABLE GGWE */
790 0xB00C, /* LV # Lo HANGUL SYLLABLE GGWI */
791 0xB028, /* LV # Lo HANGUL SYLLABLE GGYU */
792 0xB044, /* LV # Lo HANGUL SYLLABLE GGEU */
793 0xB060, /* LV # Lo HANGUL SYLLABLE GGYI */
794 0xB07C, /* LV # Lo HANGUL SYLLABLE GGI */
795 0xB098, /* LV # Lo HANGUL SYLLABLE NA */
796 0xB0B4, /* LV # Lo HANGUL SYLLABLE NAE */
797 0xB0D0, /* LV # Lo HANGUL SYLLABLE NYA */
798 0xB0EC, /* LV # Lo HANGUL SYLLABLE NYAE */
799 0xB108, /* LV # Lo HANGUL SYLLABLE NEO */
800 0xB124, /* LV # Lo HANGUL SYLLABLE NE */
801 0xB140, /* LV # Lo HANGUL SYLLABLE NYEO */
802 0xB15C, /* LV # Lo HANGUL SYLLABLE NYE */
803 0xB178, /* LV # Lo HANGUL SYLLABLE NO */
804 0xB194, /* LV # Lo HANGUL SYLLABLE NWA */
805 0xB1B0, /* LV # Lo HANGUL SYLLABLE NWAE */
806 0xB1CC, /* LV # Lo HANGUL SYLLABLE NOE */
807 0xB1E8, /* LV # Lo HANGUL SYLLABLE NYO */
808 0xB204, /* LV # Lo HANGUL SYLLABLE NU */
809 0xB220, /* LV # Lo HANGUL SYLLABLE NWEO */
810 0xB23C, /* LV # Lo HANGUL SYLLABLE NWE */
811 0xB258, /* LV # Lo HANGUL SYLLABLE NWI */
812 0xB274, /* LV # Lo HANGUL SYLLABLE NYU */
813 0xB290, /* LV # Lo HANGUL SYLLABLE NEU */
814 0xB2AC, /* LV # Lo HANGUL SYLLABLE NYI */
815 0xB2C8, /* LV # Lo HANGUL SYLLABLE NI */
816 0xB2E4, /* LV # Lo HANGUL SYLLABLE DA */
817 0xB300, /* LV # Lo HANGUL SYLLABLE DAE */
818 0xB31C, /* LV # Lo HANGUL SYLLABLE DYA */
819 0xB338, /* LV # Lo HANGUL SYLLABLE DYAE */
820 0xB354, /* LV # Lo HANGUL SYLLABLE DEO */
821 0xB370, /* LV # Lo HANGUL SYLLABLE DE */
822 0xB38C, /* LV # Lo HANGUL SYLLABLE DYEO */
823 0xB3A8, /* LV # Lo HANGUL SYLLABLE DYE */
824 0xB3C4, /* LV # Lo HANGUL SYLLABLE DO */
825 0xB3E0, /* LV # Lo HANGUL SYLLABLE DWA */
826 0xB3FC, /* LV # Lo HANGUL SYLLABLE DWAE */
827 0xB418, /* LV # Lo HANGUL SYLLABLE DOE */
828 0xB434, /* LV # Lo HANGUL SYLLABLE DYO */
829 0xB450, /* LV # Lo HANGUL SYLLABLE DU */
830 0xB46C, /* LV # Lo HANGUL SYLLABLE DWEO */
831 0xB488, /* LV # Lo HANGUL SYLLABLE DWE */
832 0xB4A4, /* LV # Lo HANGUL SYLLABLE DWI */
833 0xB4C0, /* LV # Lo HANGUL SYLLABLE DYU */
834 0xB4DC, /* LV # Lo HANGUL SYLLABLE DEU */
835 0xB4F8, /* LV # Lo HANGUL SYLLABLE DYI */
836 0xB514, /* LV # Lo HANGUL SYLLABLE DI */
837 0xB530, /* LV # Lo HANGUL SYLLABLE DDA */
838 0xB54C, /* LV # Lo HANGUL SYLLABLE DDAE */
839 0xB568, /* LV # Lo HANGUL SYLLABLE DDYA */
840 0xB584, /* LV # Lo HANGUL SYLLABLE DDYAE */
841 0xB5A0, /* LV # Lo HANGUL SYLLABLE DDEO */
842 0xB5BC, /* LV # Lo HANGUL SYLLABLE DDE */
843 0xB5D8, /* LV # Lo HANGUL SYLLABLE DDYEO */
844 0xB5F4, /* LV # Lo HANGUL SYLLABLE DDYE */
845 0xB610, /* LV # Lo HANGUL SYLLABLE DDO */
846 0xB62C, /* LV # Lo HANGUL SYLLABLE DDWA */
847 0xB648, /* LV # Lo HANGUL SYLLABLE DDWAE */
848 0xB664, /* LV # Lo HANGUL SYLLABLE DDOE */
849 0xB680, /* LV # Lo HANGUL SYLLABLE DDYO */
850 0xB69C, /* LV # Lo HANGUL SYLLABLE DDU */
851 0xB6B8, /* LV # Lo HANGUL SYLLABLE DDWEO */
852 0xB6D4, /* LV # Lo HANGUL SYLLABLE DDWE */
853 0xB6F0, /* LV # Lo HANGUL SYLLABLE DDWI */
854 0xB70C, /* LV # Lo HANGUL SYLLABLE DDYU */
855 0xB728, /* LV # Lo HANGUL SYLLABLE DDEU */
856 0xB744, /* LV # Lo HANGUL SYLLABLE DDYI */
857 0xB760, /* LV # Lo HANGUL SYLLABLE DDI */
858 0xB77C, /* LV # Lo HANGUL SYLLABLE RA */
859 0xB798, /* LV # Lo HANGUL SYLLABLE RAE */
860 0xB7B4, /* LV # Lo HANGUL SYLLABLE RYA */
861 0xB7D0, /* LV # Lo HANGUL SYLLABLE RYAE */
862 0xB7EC, /* LV # Lo HANGUL SYLLABLE REO */
863 0xB808, /* LV # Lo HANGUL SYLLABLE RE */
864 0xB824, /* LV # Lo HANGUL SYLLABLE RYEO */
865 0xB840, /* LV # Lo HANGUL SYLLABLE RYE */
866 0xB85C, /* LV # Lo HANGUL SYLLABLE RO */
867 0xB878, /* LV # Lo HANGUL SYLLABLE RWA */
868 0xB894, /* LV # Lo HANGUL SYLLABLE RWAE */
869 0xB8B0, /* LV # Lo HANGUL SYLLABLE ROE */
870 0xB8CC, /* LV # Lo HANGUL SYLLABLE RYO */
871 0xB8E8, /* LV # Lo HANGUL SYLLABLE RU */
872 0xB904, /* LV # Lo HANGUL SYLLABLE RWEO */
873 0xB920, /* LV # Lo HANGUL SYLLABLE RWE */
874 0xB93C, /* LV # Lo HANGUL SYLLABLE RWI */
875 0xB958, /* LV # Lo HANGUL SYLLABLE RYU */
876 0xB974, /* LV # Lo HANGUL SYLLABLE REU */
877 0xB990, /* LV # Lo HANGUL SYLLABLE RYI */
878 0xB9AC, /* LV # Lo HANGUL SYLLABLE RI */
879 0xB9C8, /* LV # Lo HANGUL SYLLABLE MA */
880 0xB9E4, /* LV # Lo HANGUL SYLLABLE MAE */
881 0xBA00, /* LV # Lo HANGUL SYLLABLE MYA */
882 0xBA1C, /* LV # Lo HANGUL SYLLABLE MYAE */
883 0xBA38, /* LV # Lo HANGUL SYLLABLE MEO */
884 0xBA54, /* LV # Lo HANGUL SYLLABLE ME */
885 0xBA70, /* LV # Lo HANGUL SYLLABLE MYEO */
886 0xBA8C, /* LV # Lo HANGUL SYLLABLE MYE */
887 0xBAA8, /* LV # Lo HANGUL SYLLABLE MO */
888 0xBAC4, /* LV # Lo HANGUL SYLLABLE MWA */
889 0xBAE0, /* LV # Lo HANGUL SYLLABLE MWAE */
890 0xBAFC, /* LV # Lo HANGUL SYLLABLE MOE */
891 0xBB18, /* LV # Lo HANGUL SYLLABLE MYO */
892 0xBB34, /* LV # Lo HANGUL SYLLABLE MU */
893 0xBB50, /* LV # Lo HANGUL SYLLABLE MWEO */
894 0xBB6C, /* LV # Lo HANGUL SYLLABLE MWE */
895 0xBB88, /* LV # Lo HANGUL SYLLABLE MWI */
896 0xBBA4, /* LV # Lo HANGUL SYLLABLE MYU */
897 0xBBC0, /* LV # Lo HANGUL SYLLABLE MEU */
898 0xBBDC, /* LV # Lo HANGUL SYLLABLE MYI */
899 0xBBF8, /* LV # Lo HANGUL SYLLABLE MI */
900 0xBC14, /* LV # Lo HANGUL SYLLABLE BA */
901 0xBC30, /* LV # Lo HANGUL SYLLABLE BAE */
902 0xBC4C, /* LV # Lo HANGUL SYLLABLE BYA */
903 0xBC68, /* LV # Lo HANGUL SYLLABLE BYAE */
904 0xBC84, /* LV # Lo HANGUL SYLLABLE BEO */
905 0xBCA0, /* LV # Lo HANGUL SYLLABLE BE */
906 0xBCBC, /* LV # Lo HANGUL SYLLABLE BYEO */
907 0xBCD8, /* LV # Lo HANGUL SYLLABLE BYE */
908 0xBCF4, /* LV # Lo HANGUL SYLLABLE BO */
909 0xBD10, /* LV # Lo HANGUL SYLLABLE BWA */
910 0xBD2C, /* LV # Lo HANGUL SYLLABLE BWAE */
911 0xBD48, /* LV # Lo HANGUL SYLLABLE BOE */
912 0xBD64, /* LV # Lo HANGUL SYLLABLE BYO */
913 0xBD80, /* LV # Lo HANGUL SYLLABLE BU */
914 0xBD9C, /* LV # Lo HANGUL SYLLABLE BWEO */
915 0xBDB8, /* LV # Lo HANGUL SYLLABLE BWE */
916 0xBDD4, /* LV # Lo HANGUL SYLLABLE BWI */
917 0xBDF0, /* LV # Lo HANGUL SYLLABLE BYU */
918 0xBE0C, /* LV # Lo HANGUL SYLLABLE BEU */
919 0xBE28, /* LV # Lo HANGUL SYLLABLE BYI */
920 0xBE44, /* LV # Lo HANGUL SYLLABLE BI */
921 0xBE60, /* LV # Lo HANGUL SYLLABLE BBA */
922 0xBE7C, /* LV # Lo HANGUL SYLLABLE BBAE */
923 0xBE98, /* LV # Lo HANGUL SYLLABLE BBYA */
924 0xBEB4, /* LV # Lo HANGUL SYLLABLE BBYAE */
925 0xBED0, /* LV # Lo HANGUL SYLLABLE BBEO */
926 0xBEEC, /* LV # Lo HANGUL SYLLABLE BBE */
927 0xBF08, /* LV # Lo HANGUL SYLLABLE BBYEO */
928 0xBF24, /* LV # Lo HANGUL SYLLABLE BBYE */
929 0xBF40, /* LV # Lo HANGUL SYLLABLE BBO */
930 0xBF5C, /* LV # Lo HANGUL SYLLABLE BBWA */
931 0xBF78, /* LV # Lo HANGUL SYLLABLE BBWAE */
932 0xBF94, /* LV # Lo HANGUL SYLLABLE BBOE */
933 0xBFB0, /* LV # Lo HANGUL SYLLABLE BBYO */
934 0xBFCC, /* LV # Lo HANGUL SYLLABLE BBU */
935 0xBFE8, /* LV # Lo HANGUL SYLLABLE BBWEO */
936 0xC004, /* LV # Lo HANGUL SYLLABLE BBWE */
937 0xC020, /* LV # Lo HANGUL SYLLABLE BBWI */
938 0xC03C, /* LV # Lo HANGUL SYLLABLE BBYU */
939 0xC058, /* LV # Lo HANGUL SYLLABLE BBEU */
940 0xC074, /* LV # Lo HANGUL SYLLABLE BBYI */
941 0xC090, /* LV # Lo HANGUL SYLLABLE BBI */
942 0xC0AC, /* LV # Lo HANGUL SYLLABLE SA */
943 0xC0C8, /* LV # Lo HANGUL SYLLABLE SAE */
944 0xC0E4, /* LV # Lo HANGUL SYLLABLE SYA */
945 0xC100, /* LV # Lo HANGUL SYLLABLE SYAE */
946 0xC11C, /* LV # Lo HANGUL SYLLABLE SEO */
947 0xC138, /* LV # Lo HANGUL SYLLABLE SE */
948 0xC154, /* LV # Lo HANGUL SYLLABLE SYEO */
949 0xC170, /* LV # Lo HANGUL SYLLABLE SYE */
950 0xC18C, /* LV # Lo HANGUL SYLLABLE SO */
951 0xC1A8, /* LV # Lo HANGUL SYLLABLE SWA */
952 0xC1C4, /* LV # Lo HANGUL SYLLABLE SWAE */
953 0xC1E0, /* LV # Lo HANGUL SYLLABLE SOE */
954 0xC1FC, /* LV # Lo HANGUL SYLLABLE SYO */
955 0xC218, /* LV # Lo HANGUL SYLLABLE SU */
956 0xC234, /* LV # Lo HANGUL SYLLABLE SWEO */
957 0xC250, /* LV # Lo HANGUL SYLLABLE SWE */
958 0xC26C, /* LV # Lo HANGUL SYLLABLE SWI */
959 0xC288, /* LV # Lo HANGUL SYLLABLE SYU */
960 0xC2A4, /* LV # Lo HANGUL SYLLABLE SEU */
961 0xC2C0, /* LV # Lo HANGUL SYLLABLE SYI */
962 0xC2DC, /* LV # Lo HANGUL SYLLABLE SI */
963 0xC2F8, /* LV # Lo HANGUL SYLLABLE SSA */
964 0xC314, /* LV # Lo HANGUL SYLLABLE SSAE */
965 0xC330, /* LV # Lo HANGUL SYLLABLE SSYA */
966 0xC34C, /* LV # Lo HANGUL SYLLABLE SSYAE */
967 0xC368, /* LV # Lo HANGUL SYLLABLE SSEO */
968 0xC384, /* LV # Lo HANGUL SYLLABLE SSE */
969 0xC3A0, /* LV # Lo HANGUL SYLLABLE SSYEO */
970 0xC3BC, /* LV # Lo HANGUL SYLLABLE SSYE */
971 0xC3D8, /* LV # Lo HANGUL SYLLABLE SSO */
972 0xC3F4, /* LV # Lo HANGUL SYLLABLE SSWA */
973 0xC410, /* LV # Lo HANGUL SYLLABLE SSWAE */
974 0xC42C, /* LV # Lo HANGUL SYLLABLE SSOE */
975 0xC448, /* LV # Lo HANGUL SYLLABLE SSYO */
976 0xC464, /* LV # Lo HANGUL SYLLABLE SSU */
977 0xC480, /* LV # Lo HANGUL SYLLABLE SSWEO */
978 0xC49C, /* LV # Lo HANGUL SYLLABLE SSWE */
979 0xC4B8, /* LV # Lo HANGUL SYLLABLE SSWI */
980 0xC4D4, /* LV # Lo HANGUL SYLLABLE SSYU */
981 0xC4F0, /* LV # Lo HANGUL SYLLABLE SSEU */
982 0xC50C, /* LV # Lo HANGUL SYLLABLE SSYI */
983 0xC528, /* LV # Lo HANGUL SYLLABLE SSI */
984 0xC544, /* LV # Lo HANGUL SYLLABLE A */
985 0xC560, /* LV # Lo HANGUL SYLLABLE AE */
986 0xC57C, /* LV # Lo HANGUL SYLLABLE YA */
987 0xC598, /* LV # Lo HANGUL SYLLABLE YAE */
988 0xC5B4, /* LV # Lo HANGUL SYLLABLE EO */
989 0xC5D0, /* LV # Lo HANGUL SYLLABLE E */
990 0xC5EC, /* LV # Lo HANGUL SYLLABLE YEO */
991 0xC608, /* LV # Lo HANGUL SYLLABLE YE */
992 0xC624, /* LV # Lo HANGUL SYLLABLE O */
993 0xC640, /* LV # Lo HANGUL SYLLABLE WA */
994 0xC65C, /* LV # Lo HANGUL SYLLABLE WAE */
995 0xC678, /* LV # Lo HANGUL SYLLABLE OE */
996 0xC694, /* LV # Lo HANGUL SYLLABLE YO */
997 0xC6B0, /* LV # Lo HANGUL SYLLABLE U */
998 0xC6CC, /* LV # Lo HANGUL SYLLABLE WEO */
999 0xC6E8, /* LV # Lo HANGUL SYLLABLE WE */
1000 0xC704, /* LV # Lo HANGUL SYLLABLE WI */
1001 0xC720, /* LV # Lo HANGUL SYLLABLE YU */
1002 0xC73C, /* LV # Lo HANGUL SYLLABLE EU */
1003 0xC758, /* LV # Lo HANGUL SYLLABLE YI */
1004 0xC774, /* LV # Lo HANGUL SYLLABLE I */
1005 0xC790, /* LV # Lo HANGUL SYLLABLE JA */
1006 0xC7AC, /* LV # Lo HANGUL SYLLABLE JAE */
1007 0xC7C8, /* LV # Lo HANGUL SYLLABLE JYA */
1008 0xC7E4, /* LV # Lo HANGUL SYLLABLE JYAE */
1009 0xC800, /* LV # Lo HANGUL SYLLABLE JEO */
1010 0xC81C, /* LV # Lo HANGUL SYLLABLE JE */
1011 0xC838, /* LV # Lo HANGUL SYLLABLE JYEO */
1012 0xC854, /* LV # Lo HANGUL SYLLABLE JYE */
1013 0xC870, /* LV # Lo HANGUL SYLLABLE JO */
1014 0xC88C, /* LV # Lo HANGUL SYLLABLE JWA */
1015 0xC8A8, /* LV # Lo HANGUL SYLLABLE JWAE */
1016 0xC8C4, /* LV # Lo HANGUL SYLLABLE JOE */
1017 0xC8E0, /* LV # Lo HANGUL SYLLABLE JYO */
1018 0xC8FC, /* LV # Lo HANGUL SYLLABLE JU */
1019 0xC918, /* LV # Lo HANGUL SYLLABLE JWEO */
1020 0xC934, /* LV # Lo HANGUL SYLLABLE JWE */
1021 0xC950, /* LV # Lo HANGUL SYLLABLE JWI */
1022 0xC96C, /* LV # Lo HANGUL SYLLABLE JYU */
1023 0xC988, /* LV # Lo HANGUL SYLLABLE JEU */
1024 0xC9A4, /* LV # Lo HANGUL SYLLABLE JYI */
1025 0xC9C0, /* LV # Lo HANGUL SYLLABLE JI */
1026 0xC9DC, /* LV # Lo HANGUL SYLLABLE JJA */
1027 0xC9F8, /* LV # Lo HANGUL SYLLABLE JJAE */
1028 0xCA14, /* LV # Lo HANGUL SYLLABLE JJYA */
1029 0xCA30, /* LV # Lo HANGUL SYLLABLE JJYAE */
1030 0xCA4C, /* LV # Lo HANGUL SYLLABLE JJEO */
1031 0xCA68, /* LV # Lo HANGUL SYLLABLE JJE */
1032 0xCA84, /* LV # Lo HANGUL SYLLABLE JJYEO */
1033 0xCAA0, /* LV # Lo HANGUL SYLLABLE JJYE */
1034 0xCABC, /* LV # Lo HANGUL SYLLABLE JJO */
1035 0xCAD8, /* LV # Lo HANGUL SYLLABLE JJWA */
1036 0xCAF4, /* LV # Lo HANGUL SYLLABLE JJWAE */
1037 0xCB10, /* LV # Lo HANGUL SYLLABLE JJOE */
1038 0xCB2C, /* LV # Lo HANGUL SYLLABLE JJYO */
1039 0xCB48, /* LV # Lo HANGUL SYLLABLE JJU */
1040 0xCB64, /* LV # Lo HANGUL SYLLABLE JJWEO */
1041 0xCB80, /* LV # Lo HANGUL SYLLABLE JJWE */
1042 0xCB9C, /* LV # Lo HANGUL SYLLABLE JJWI */
1043 0xCBB8, /* LV # Lo HANGUL SYLLABLE JJYU */
1044 0xCBD4, /* LV # Lo HANGUL SYLLABLE JJEU */
1045 0xCBF0, /* LV # Lo HANGUL SYLLABLE JJYI */
1046 0xCC0C, /* LV # Lo HANGUL SYLLABLE JJI */
1047 0xCC28, /* LV # Lo HANGUL SYLLABLE CA */
1048 0xCC44, /* LV # Lo HANGUL SYLLABLE CAE */
1049 0xCC60, /* LV # Lo HANGUL SYLLABLE CYA */
1050 0xCC7C, /* LV # Lo HANGUL SYLLABLE CYAE */
1051 0xCC98, /* LV # Lo HANGUL SYLLABLE CEO */
1052 0xCCB4, /* LV # Lo HANGUL SYLLABLE CE */
1053 0xCCD0, /* LV # Lo HANGUL SYLLABLE CYEO */
1054 0xCCEC, /* LV # Lo HANGUL SYLLABLE CYE */
1055 0xCD08, /* LV # Lo HANGUL SYLLABLE CO */
1056 0xCD24, /* LV # Lo HANGUL SYLLABLE CWA */
1057 0xCD40, /* LV # Lo HANGUL SYLLABLE CWAE */
1058 0xCD5C, /* LV # Lo HANGUL SYLLABLE COE */
1059 0xCD78, /* LV # Lo HANGUL SYLLABLE CYO */
1060 0xCD94, /* LV # Lo HANGUL SYLLABLE CU */
1061 0xCDB0, /* LV # Lo HANGUL SYLLABLE CWEO */
1062 0xCDCC, /* LV # Lo HANGUL SYLLABLE CWE */
1063 0xCDE8, /* LV # Lo HANGUL SYLLABLE CWI */
1064 0xCE04, /* LV # Lo HANGUL SYLLABLE CYU */
1065 0xCE20, /* LV # Lo HANGUL SYLLABLE CEU */
1066 0xCE3C, /* LV # Lo HANGUL SYLLABLE CYI */
1067 0xCE58, /* LV # Lo HANGUL SYLLABLE CI */
1068 0xCE74, /* LV # Lo HANGUL SYLLABLE KA */
1069 0xCE90, /* LV # Lo HANGUL SYLLABLE KAE */
1070 0xCEAC, /* LV # Lo HANGUL SYLLABLE KYA */
1071 0xCEC8, /* LV # Lo HANGUL SYLLABLE KYAE */
1072 0xCEE4, /* LV # Lo HANGUL SYLLABLE KEO */
1073 0xCF00, /* LV # Lo HANGUL SYLLABLE KE */
1074 0xCF1C, /* LV # Lo HANGUL SYLLABLE KYEO */
1075 0xCF38, /* LV # Lo HANGUL SYLLABLE KYE */
1076 0xCF54, /* LV # Lo HANGUL SYLLABLE KO */
1077 0xCF70, /* LV # Lo HANGUL SYLLABLE KWA */
1078 0xCF8C, /* LV # Lo HANGUL SYLLABLE KWAE */
1079 0xCFA8, /* LV # Lo HANGUL SYLLABLE KOE */
1080 0xCFC4, /* LV # Lo HANGUL SYLLABLE KYO */
1081 0xCFE0, /* LV # Lo HANGUL SYLLABLE KU */
1082 0xCFFC, /* LV # Lo HANGUL SYLLABLE KWEO */
1083 0xD018, /* LV # Lo HANGUL SYLLABLE KWE */
1084 0xD034, /* LV # Lo HANGUL SYLLABLE KWI */
1085 0xD050, /* LV # Lo HANGUL SYLLABLE KYU */
1086 0xD06C, /* LV # Lo HANGUL SYLLABLE KEU */
1087 0xD088, /* LV # Lo HANGUL SYLLABLE KYI */
1088 0xD0A4, /* LV # Lo HANGUL SYLLABLE KI */
1089 0xD0C0, /* LV # Lo HANGUL SYLLABLE TA */
1090 0xD0DC, /* LV # Lo HANGUL SYLLABLE TAE */
1091 0xD0F8, /* LV # Lo HANGUL SYLLABLE TYA */
1092 0xD114, /* LV # Lo HANGUL SYLLABLE TYAE */
1093 0xD130, /* LV # Lo HANGUL SYLLABLE TEO */
1094 0xD14C, /* LV # Lo HANGUL SYLLABLE TE */
1095 0xD168, /* LV # Lo HANGUL SYLLABLE TYEO */
1096 0xD184, /* LV # Lo HANGUL SYLLABLE TYE */
1097 0xD1A0, /* LV # Lo HANGUL SYLLABLE TO */
1098 0xD1BC, /* LV # Lo HANGUL SYLLABLE TWA */
1099 0xD1D8, /* LV # Lo HANGUL SYLLABLE TWAE */
1100 0xD1F4, /* LV # Lo HANGUL SYLLABLE TOE */
1101 0xD210, /* LV # Lo HANGUL SYLLABLE TYO */
1102 0xD22C, /* LV # Lo HANGUL SYLLABLE TU */
1103 0xD248, /* LV # Lo HANGUL SYLLABLE TWEO */
1104 0xD264, /* LV # Lo HANGUL SYLLABLE TWE */
1105 0xD280, /* LV # Lo HANGUL SYLLABLE TWI */
1106 0xD29C, /* LV # Lo HANGUL SYLLABLE TYU */
1107 0xD2B8, /* LV # Lo HANGUL SYLLABLE TEU */
1108 0xD2D4, /* LV # Lo HANGUL SYLLABLE TYI */
1109 0xD2F0, /* LV # Lo HANGUL SYLLABLE TI */
1110 0xD30C, /* LV # Lo HANGUL SYLLABLE PA */
1111 0xD328, /* LV # Lo HANGUL SYLLABLE PAE */
1112 0xD344, /* LV # Lo HANGUL SYLLABLE PYA */
1113 0xD360, /* LV # Lo HANGUL SYLLABLE PYAE */
1114 0xD37C, /* LV # Lo HANGUL SYLLABLE PEO */
1115 0xD398, /* LV # Lo HANGUL SYLLABLE PE */
1116 0xD3B4, /* LV # Lo HANGUL SYLLABLE PYEO */
1117 0xD3D0, /* LV # Lo HANGUL SYLLABLE PYE */
1118 0xD3EC, /* LV # Lo HANGUL SYLLABLE PO */
1119 0xD408, /* LV # Lo HANGUL SYLLABLE PWA */
1120 0xD424, /* LV # Lo HANGUL SYLLABLE PWAE */
1121 0xD440, /* LV # Lo HANGUL SYLLABLE POE */
1122 0xD45C, /* LV # Lo HANGUL SYLLABLE PYO */
1123 0xD478, /* LV # Lo HANGUL SYLLABLE PU */
1124 0xD494, /* LV # Lo HANGUL SYLLABLE PWEO */
1125 0xD4B0, /* LV # Lo HANGUL SYLLABLE PWE */
1126 0xD4CC, /* LV # Lo HANGUL SYLLABLE PWI */
1127 0xD4E8, /* LV # Lo HANGUL SYLLABLE PYU */
1128 0xD504, /* LV # Lo HANGUL SYLLABLE PEU */
1129 0xD520, /* LV # Lo HANGUL SYLLABLE PYI */
1130 0xD53C, /* LV # Lo HANGUL SYLLABLE PI */
1131 0xD558, /* LV # Lo HANGUL SYLLABLE HA */
1132 0xD574, /* LV # Lo HANGUL SYLLABLE HAE */
1133 0xD590, /* LV # Lo HANGUL SYLLABLE HYA */
1134 0xD5AC, /* LV # Lo HANGUL SYLLABLE HYAE */
1135 0xD5C8, /* LV # Lo HANGUL SYLLABLE HEO */
1136 0xD5E4, /* LV # Lo HANGUL SYLLABLE HE */
1137 0xD600, /* LV # Lo HANGUL SYLLABLE HYEO */
1138 0xD61C, /* LV # Lo HANGUL SYLLABLE HYE */
1139 0xD638, /* LV # Lo HANGUL SYLLABLE HO */
1140 0xD654, /* LV # Lo HANGUL SYLLABLE HWA */
1141 0xD670, /* LV # Lo HANGUL SYLLABLE HWAE */
1142 0xD68C, /* LV # Lo HANGUL SYLLABLE HOE */
1143 0xD6A8, /* LV # Lo HANGUL SYLLABLE HYO */
1144 0xD6C4, /* LV # Lo HANGUL SYLLABLE HU */
1145 0xD6E0, /* LV # Lo HANGUL SYLLABLE HWEO */
1146 0xD6FC, /* LV # Lo HANGUL SYLLABLE HWE */
1147 0xD718, /* LV # Lo HANGUL SYLLABLE HWI */
1148 0xD734, /* LV # Lo HANGUL SYLLABLE HYU */
1149 0xD750, /* LV # Lo HANGUL SYLLABLE HEU */
1150 0xD76C, /* LV # Lo HANGUL SYLLABLE HYI */
1151 0xD788, /* LV # Lo HANGUL SYLLABLE HI */
1155 static const uint32_t pr29_13_2[] = {
1156 0x11A8, 0x11A9, 0x11AA, 0x11AB, 0x11AC, 0x11AD, 0x11AE, 0x11AF,
1157 0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x11B6, 0x11B7,
1158 0x11B8, 0x11B9, 0x11BA, 0x11BB, 0x11BC, 0x11BD, 0x11BE, 0x11BF,
1159 0x11C0, 0x11C1, 0x11C2, 0
1162 typedef struct
1164 const uint32_t *first;
1165 const uint32_t *last;
1166 } Pr29;
1168 static const Pr29 pr29[] = {
1169 {&pr29_1_1[0], &pr29_1_2[0]},
1170 {&pr29_2_1[0], &pr29_2_2[0]},
1171 {&pr29_3_1[0], &pr29_3_2[0]},
1172 {&pr29_4_1[0], &pr29_4_2[0]},
1173 {&pr29_5_1[0], &pr29_5_2[0]},
1174 {&pr29_6_1[0], &pr29_6_2[0]},
1175 {&pr29_7_1[0], &pr29_7_2[0]},
1176 {&pr29_8_1[0], &pr29_8_2[0]},
1177 {&pr29_9_1[0], &pr29_9_2[0]},
1178 {&pr29_10_1[0], &pr29_10_2[0]},
1179 {&pr29_11_1[0], &pr29_11_2[0]},
1180 {&pr29_12_1[0], &pr29_12_2[0]},
1181 {&pr29_13_1[0], &pr29_13_2[0]},
1182 {NULL, NULL}
1185 static size_t
1186 first_column (uint32_t c)
1188 size_t i, j;
1190 for (i = 0; pr29[i].first; i++)
1191 for (j = 0; pr29[i].first[j]; j++)
1192 if (c == pr29[i].first[j])
1193 return i + 1;
1195 return 0;
1198 static int
1199 in_last_column_row (uint32_t c, size_t row)
1201 size_t i;
1203 for (i = 0; pr29[row - 1].last[i]; i++)
1204 if (c == pr29[row - 1].last[i])
1205 return 1;
1207 return 0;
1210 static size_t
1211 combinationclass (uint32_t c)
1213 size_t i;
1215 for (i = 0; nzcc[i]; i++)
1216 if (c == nzcc[i])
1217 return i + 1;
1219 return 0;
1223 * pr29_4 - check if input trigger Unicode normalization bugs
1224 * @in: input array with unicode code points.
1225 * @len: length of input array with unicode code points.
1227 * Check the input to see if it may be normalized into different
1228 * strings by different NFKC implementations, due to an anomaly in the
1229 * NFKC specifications.
1231 * Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
1232 * and %PR29_PROBLEM if the input sequence is a "problem sequence"
1233 * (i.e., may be normalized into different strings by different
1234 * implementations).
1237 pr29_4 (const uint32_t * in, size_t len)
1239 size_t i, j, k, row;
1242 * The problem sequence are of the form:
1244 * first_character intervening_character+ last_character
1246 * where the first_character and last_character come from the same
1247 * row in the following table, and there is at least one
1248 * intervening_character with non-zero Canonical Combining
1249 * Class. (The '+' above means one or more occurrences.)
1253 for (i = 0; i < len; i++)
1254 if ((row = first_column (in[i])) > 0)
1255 for (j = i + 1; j < len; j++)
1256 if (combinationclass (in[j]))
1257 for (k = j + 1; k < len; j++)
1258 if (in_last_column_row (in[k], row))
1259 return PR29_PROBLEM;
1261 return PR29_SUCCESS;
1265 * pr29_4z - check if input trigger Unicode normalization bugs
1266 * @in: zero terminated array of Unicode code points.
1268 * Check the input to see if it may be normalized into different
1269 * strings by different NFKC implementations, due to an anomaly in the
1270 * NFKC specifications.
1272 * Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
1273 * and %PR29_PROBLEM if the input sequence is a "problem sequence"
1274 * (i.e., may be normalized into different strings by different
1275 * implementations).
1278 pr29_4z (const uint32_t * in)
1280 size_t len;
1282 for (len = 0; in[len]; len++)
1285 return pr29_4 (in, len);
1289 * pr29_8z - check if input trigger Unicode normalization bugs
1290 * @in: zero terminated input UTF-8 string.
1292 * Check the input to see if it may be normalized into different
1293 * strings by different NFKC implementations, due to an anomaly in the
1294 * NFKC specifications.
1296 * Return value: Returns the #Pr29_rc value %PR29_SUCCESS on success,
1297 * and %PR29_PROBLEM if the input sequence is a "problem sequence"
1298 * (i.e., may be normalized into different strings by different
1299 * implementations), or %PR29_STRINGPREP_ERROR if there was a
1300 * problem converting the string from UTF-8 to UCS-4.
1303 pr29_8z (const char *in)
1305 uint32_t *p;
1306 int rc;
1308 p = stringprep_utf8_to_ucs4 (in, -1, NULL);
1309 if (!p)
1310 return PR29_STRINGPREP_ERROR;
1312 rc = pr29_4z (p);
1314 free (p);
1316 return rc;
1320 * Pr29_rc:
1321 * @PR29_SUCCESS: Successful operation. This value is guaranteed to
1322 * always be zero, the remaining ones are only guaranteed to hold
1323 * non-zero values, for logical comparison purposes.
1324 * @PR29_PROBLEM: A problem sequence was encountered.
1325 * @PR29_STRINGPREP_ERROR: The character set conversion failed (only
1326 * for pr29_8() and pr29_8z()).
1328 * Enumerated return codes for pr29_4(), pr29_4z(), pr29_8z(). The
1329 * value 0 is guaranteed to always correspond to success.