Bump github/codeql-action from 3.25.12 to 3.25.13
[ruby.git] / enc / emacs_mule.c
blobf92eb183cf788de310ee5da32cf1f463c3c33575
1 /**********************************************************************
2 emacs_mule.c - Oniguruma (regular expression library)
3 **********************************************************************/
4 /*-
5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include "regenc.h"
33 #define emacsmule_islead(c) ((UChar )(c) < 0x9e)
36 CHARACTER := ASCII_CHAR | MULTIBYTE_CHAR
37 MULTIBYTE_CHAR := PRIMARY_CHAR_1 | PRIMARY_CHAR_2
38 | SECONDARY_CHAR_1 | SECONDARY_CHAR_2
39 PRIMARY_CHAR_1 := LEADING_CODE_PRI C1
40 PRIMARY_CHAR_2 := LEADING_CODE_PRI C1 C2
41 SECONDARY_CHAR_1 := LEADING_CODE_SEC LEADING_CODE_EXT C1
42 SECONDARY_CHAR_2 := LEADING_CODE_SEC LEADING_CODE_EXT C1 C2
43 ASCII_CHAR := 0 | 1 | ... | 127
44 LEADING_CODE_PRI := 129 | 130 | ... | 153
45 LEADING_CODE_SEC := 154 | 155 | 156 | 157
46 C1, C2, LEADING_CODE_EXT := 160 | 161 | ... | 255
49 static const int EncLen_EmacsMule[] = {
50 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
59 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
68 typedef enum { FAILURE = -2, ACCEPT = -1, S0 = 0, S1, S2, S3, S4, S5, S6 } state_t;
69 #define A ACCEPT
70 #define F FAILURE
71 static const signed char trans[][0x100] = {
72 { /* S0 0 1 2 3 4 5 6 7 8 9 a b c d e f */
73 /* 0 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
74 /* 1 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
75 /* 2 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
76 /* 3 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
77 /* 4 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
78 /* 5 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
79 /* 6 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
80 /* 7 */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
81 /* 8 */ F, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
82 /* 9 */ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 5, 6, F, F,
83 /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
84 /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
85 /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
86 /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
87 /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
88 /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
90 { /* S1 0 1 2 3 4 5 6 7 8 9 a b c d e f */
91 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
92 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
93 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
94 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
95 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
96 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
97 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
98 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
99 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
100 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
101 /* a */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
102 /* b */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
103 /* c */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
104 /* d */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
105 /* e */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,
106 /* f */ A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A
108 { /* S2 0 1 2 3 4 5 6 7 8 9 a b c d e f */
109 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
110 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
111 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
112 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
113 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
114 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
115 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
116 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
117 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
118 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
119 /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
120 /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
121 /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
122 /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
123 /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
124 /* f */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
126 { /* S3 0 1 2 3 4 5 6 7 8 9 a b c d e f */
127 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
128 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
129 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
130 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
131 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
132 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
133 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
134 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
135 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
136 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
137 /* a */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 /* b */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 /* c */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 /* d */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
142 /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
144 { /* S4 0 1 2 3 4 5 6 7 8 9 a b c d e f */
145 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
146 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
147 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
148 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
149 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
150 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
151 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
152 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
153 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
154 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
155 /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
156 /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
157 /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
158 /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
159 /* e */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160 /* f */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F
162 { /* S5 0 1 2 3 4 5 6 7 8 9 a b c d e f */
163 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
164 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
165 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
166 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
167 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
168 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
169 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
170 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
171 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
172 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
173 /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
174 /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
175 /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
176 /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
177 /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
178 /* f */ 2, 2, 2, 2, 2, F, F, F, F, F, F, F, F, F, F, F
180 { /* S6 0 1 2 3 4 5 6 7 8 9 a b c d e f */
181 /* 0 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
182 /* 1 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
183 /* 2 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
184 /* 3 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
185 /* 4 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
186 /* 5 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
187 /* 6 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
188 /* 7 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
189 /* 8 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
190 /* 9 */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
191 /* a */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
192 /* b */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
193 /* c */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
194 /* d */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
195 /* e */ F, F, F, F, F, F, F, F, F, F, F, F, F, F, F, F,
196 /* f */ F, F, F, F, F, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, F
200 #undef A
201 #undef F
203 static int
204 mbc_enc_len(const UChar* p, const UChar* e, OnigEncoding enc ARG_UNUSED)
206 int firstbyte = *p++;
207 state_t s;
208 s = trans[0][firstbyte];
209 if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(1) :
210 ONIGENC_CONSTRUCT_MBCLEN_INVALID();
211 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-1);
212 s = trans[s][*p++];
213 if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(2) :
214 ONIGENC_CONSTRUCT_MBCLEN_INVALID();
215 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-2);
216 s = trans[s][*p++];
217 if (s < 0) return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(3) :
218 ONIGENC_CONSTRUCT_MBCLEN_INVALID();
219 if (p == e) return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(EncLen_EmacsMule[firstbyte]-3);
220 s = trans[s][*p++];
221 return s == ACCEPT ? ONIGENC_CONSTRUCT_MBCLEN_CHARFOUND(4) :
222 ONIGENC_CONSTRUCT_MBCLEN_INVALID();
225 static OnigCodePoint
226 mbc_to_code(const UChar* p, const UChar* end, OnigEncoding enc)
228 int c, i, len;
229 OnigCodePoint n;
231 len = enclen(enc, p, end);
232 n = (OnigCodePoint )*p++;
233 if (len == 1) return n;
235 for (i = 1; i < len; i++) {
236 if (p >= end) break;
237 c = *p++;
238 n <<= 8; n += c;
240 return n;
243 static int
244 code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED)
246 if (ONIGENC_IS_CODE_ASCII(code)) return 1;
247 else if (code > 0xffffffff) return 0;
248 else if ((code & 0xff000000) >= 0x80000000) return 4;
249 else if ((code & 0xff0000) >= 0x800000) return 3;
250 else if ((code & 0xff00) >= 0x8000) return 2;
251 else
252 return ONIGERR_INVALID_CODE_POINT_VALUE;
255 static int
256 code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc)
258 UChar *p = buf;
260 if ((code & 0xff000000) != 0) *p++ = (UChar )(((code >> 24) & 0xff));
261 if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
262 if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
263 *p++ = (UChar )(code & 0xff);
265 if (enclen(enc, buf, p) != (p - buf))
266 return ONIGERR_INVALID_CODE_POINT_VALUE;
267 return (int)(p - buf);
270 static int
271 mbc_case_fold(OnigCaseFoldType flag,
272 const UChar** pp, const UChar* end, UChar* lower,
273 OnigEncoding enc)
275 int len;
276 const UChar* p = *pp;
278 if (ONIGENC_IS_MBC_ASCII(p)) {
279 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(*p);
280 (*pp)++;
281 return 1;
283 else {
284 int i;
286 len = mbc_enc_len(p, end, enc);
287 for (i = 0; i < len; i++) {
288 *lower++ = *p++;
290 (*pp) += len;
291 return len; /* return byte length of converted char to lower */
295 static UChar*
296 left_adjust_char_head(const UChar* start, const UChar* s, const UChar* end, OnigEncoding enc)
298 const UChar *p;
300 if (s <= start) return (UChar* )s;
301 p = s;
303 while (!emacsmule_islead(*p) && p > start) p--;
304 return (UChar* )p;
307 static int
308 is_code_ctype(OnigCodePoint code, unsigned int ctype, OnigEncoding enc ARG_UNUSED)
310 if (code < 128)
311 return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
312 else
313 return (code_to_mbclen(code, enc) > 1 ? TRUE : FALSE);
317 * Name: Emacs-Mule
318 * Link: http://www.m17n.org/mule/pricai96/mule.en.html
320 OnigEncodingDefine(emacs_mule, Emacs_Mule) = {
321 mbc_enc_len,
322 "Emacs-Mule", /* name */
323 4, /* max enc length */
324 1, /* min enc length */
325 onigenc_is_mbc_newline_0x0a,
326 mbc_to_code,
327 code_to_mbclen,
328 code_to_mbc,
329 mbc_case_fold,
330 onigenc_ascii_apply_all_case_fold,
331 onigenc_ascii_get_case_fold_codes_by_str,
332 onigenc_minimum_property_name_to_ctype,
333 is_code_ctype,
334 onigenc_not_support_get_ctype_code_range,
335 left_adjust_char_head,
336 onigenc_always_true_is_allowed_reverse_match,
337 onigenc_ascii_only_case_map,
339 ONIGENC_FLAG_NONE,
342 ENC_REPLICATE("stateless-ISO-2022-JP", "Emacs-Mule")