usleep tests: Avoid failure due to known Cygwin 3.5.3 bug.
[gnulib.git] / lib / unilbrk / lbrktables.h
blob5166d430302060e65872c201f7016ea9ae721e71
1 /* Line breaking auxiliary tables.
2 Copyright (C) 2001-2003, 2006-2024 Free Software Foundation, Inc.
3 Written by Bruno Haible <bruno@clisp.org>, 2001.
5 This file is free software.
6 It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
7 You can redistribute it and/or modify it under either
8 - the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation, either version 3, or (at your
10 option) any later version, or
11 - the terms of the GNU General Public License as published by the
12 Free Software Foundation; either version 2, or (at your option)
13 any later version, or
14 - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
16 This file is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 Lesser General Public License and the GNU General Public License
20 for more details.
22 You should have received a copy of the GNU Lesser General Public
23 License and of the GNU General Public License along with this
24 program. If not, see <https://www.gnu.org/licenses/>. */
26 #include "unitypes.h"
28 #ifdef __cplusplus
29 extern "C" {
30 #endif
33 /* Line breaking classification. */
35 enum
37 /* Values >= 40 are resolved at run time. */
38 LBP_BK = 40, /* mandatory break */
39 LBP_CR = 41, /* carriage return */
40 LBP_LF = 42, /* line feed */
41 LBP_CM = 43, /* attached characters and combining marks */
42 /*LBP_NL, next line - not used here because it's equivalent to LBP_BK */
43 /*LBP_SG, surrogates - not used here because they are not characters */
44 LBP_WJ = 0, /* word joiner */
45 LBP_ZW = 44, /* zero width space */
46 LBP_GL = 1, /* non-breaking (glue) */
47 LBP_SP = 45, /* space */
48 LBP_B2 = 2, /* break opportunity before and after */
49 LBP_BA = 3, /* break opportunity after */
50 LBP_BB = 4, /* break opportunity before */
51 LBP_HY = 5, /* hyphen */
52 LBP_CB = 46, /* contingent break opportunity */
53 LBP_CL = 6, /* closing punctuation */
54 LBP_CP1 = 7, /* closing parenthesis, non-EastAsian character */
55 LBP_CP2 = 8, /* closing parenthesis, EastAsian character */
56 LBP_EX = 9, /* exclamation/interrogation */
57 LBP_IN = 10, /* inseparable */
58 LBP_NS = 11, /* non starter */
59 LBP_OP1 = 12, /* opening punctuation, non-EastAsian character */
60 LBP_OP2 = 13, /* opening punctuation, EastAsian character */
61 LBP_QU1 = 14, /* ambiguous quotation, neither initial nor final punctuation */
62 LBP_QU2 = 15, /* ambiguous quotation, initial punctuation */
63 LBP_QU3 = 16, /* ambiguous quotation, final punctuation */
64 LBP_IS = 17, /* infix separator (numeric) */
65 LBP_NU = 18, /* numeric */
66 LBP_PO = 19, /* postfix (numeric) */
67 LBP_PR = 20, /* prefix (numeric) */
68 LBP_SY = 21, /* symbols allowing breaks */
69 LBP_AI = 47, /* ambiguous (alphabetic or ideograph) */
70 LBP_AL = 22, /* ordinary alphabetic and symbol characters */
71 /*LBP_CJ, conditional Japanese starter, resolved to NS */
72 LBP_H2 = 23, /* Hangul LV syllable */
73 LBP_H3 = 24, /* Hangul LVT syllable */
74 LBP_HL = 30, /* Hebrew letter */
75 LBP_ID1 = 25, /* ideographic */
76 LBP_ID2 = 26, /* ideographic and potential future emoji */
77 LBP_JL = 27, /* Hangul L Jamo */
78 LBP_JV = 28, /* Hangul V Jamo */
79 LBP_JT = 29, /* Hangul T Jamo */
80 LBP_AP = 31, /* Brahmic scripts: pre-base repha */
81 LBP_AK = 32, /* Brahmic scripts: consonants */
82 LBP_AS = 33, /* Brahmic scripts: independent vowels */
83 LBP_VI = 34, /* Brahmic scripts: conjoining viramas */
84 LBP_VF = 35, /* Brahmic scripts: viramas for final consonants */
85 LBP_RI = 36, /* regional indicator */
86 LBP_SA = 48, /* complex context (South East Asian) */
87 LBP_ZWJ = 37, /* zero width joiner */
88 LBP_EB = 38, /* emoji base */
89 LBP_EM = 39, /* emoji modifier */
90 LBP_XX = 49, /* unknown */
91 /* Artificial values that exist only at runtime, not in the tables. */
92 LBP_HL_BA = 100
95 #include "lbrkprop1.h"
97 static inline unsigned char
98 unilbrkprop_lookup (ucs4_t uc)
100 unsigned int index1 = uc >> lbrkprop_header_0;
101 if (index1 < lbrkprop_header_1)
103 int lookup1 = unilbrkprop.level1[index1];
104 if (lookup1 >= 0)
106 unsigned int index2 = (uc >> lbrkprop_header_2) & lbrkprop_header_3;
107 int lookup2 = unilbrkprop.level2[lookup1 + index2];
108 if (lookup2 >= 0)
110 unsigned int index3 = uc & lbrkprop_header_4;
111 return unilbrkprop.level3[lookup2 + index3];
115 return LBP_XX;
118 /* Table indexed by two line breaking classifications. */
119 #define D 1 /* direct break opportunity, empty in table 7.3 of UTR #14 */
120 #define I 2 /* indirect break opportunity, '%' in table 7.3 of UTR #14 */
121 #define P 3 /* prohibited break, '^' in table 7.3 of UTR #14 */
123 extern const unsigned char unilbrk_table[40][40];
125 /* We don't support line breaking of complex-context dependent characters
126 (Thai, Lao, Myanmar, Khmer) yet, because it requires dictionary lookup. */
129 #ifdef __cplusplus
131 #endif