1 /* Mapping tables for JOHAB handling.
2 Copyright (C) 1998-2022 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
23 /* The table for Bit pattern to Hangul Jamo
24 5 bits each are used to encode
25 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
26 and trailing consonants(27 + 1 filler).
28 KS C 5601-1992 Annex 3 Table 2
29 0 : Filler, -1: invalid, >= 1 : valid
32 static const int init
[32] =
34 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
35 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
37 static const int mid
[32] =
39 -1, -1, 0, 1, 2, 3, 4, 5,
40 -1, -1, 6, 7, 8, 9, 10, 11,
41 -1, -1, 12, 13, 14, 15, 16, 17,
42 -1, -1, 18, 19, 20, 21, -1, -1
44 static const int final
[32] =
46 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
47 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
51 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
52 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
54 It's to be considered later which Jamo block to use, Compatibility
55 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58 static const uint32_t init_to_ucs
[19] =
60 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
61 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
62 0x314c, 0x314d, 0x314e
65 static const uint32_t final_to_ucs
[31] =
67 L
'\0', L
'\0', 0x3133, L
'\0', 0x3135, 0x3136, L
'\0', L
'\0',
68 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
69 0x3140, L
'\0', L
'\0', 0x3144, L
'\0', L
'\0', L
'\0', L
'\0',
70 L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0', L
'\0'
73 /* The following three arrays are used to convert
74 precomposed Hangul syllables in [0xac00,0xd???]
75 to Jamo bit patterns for Johab encoding
77 cf. : KS C 5601-1992, Annex3 Table 2
79 Arrays are used to speed up things although it's possible
80 to get the same result arithmetically.
83 static const int init_to_bit
[19] =
85 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
86 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
87 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
91 static const int mid_to_bit
[21] =
93 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
94 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
95 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
96 0x0340, 0x0360, 0x0380, 0x03a0
99 static const int final_to_bit
[28] =
101 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
102 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105 /* The conversion table from
106 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 cf. 1. KS C 5601-1992 Annex 3 Table 2
110 2. Unicode 2.0 manual
113 static const uint16_t jamo_from_ucs_table
[51] =
119 0x9441, 0x9841, 0x9c41,
120 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
121 0xa041, 0xa441, 0xa841,
123 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
124 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
125 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
126 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
127 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
128 0x8741, 0x8761, 0x8781, 0x87a1
133 johab_sym_hanja_to_ucs (uint32_t idx
, uint32_t c1
, uint32_t c2
)
136 return (uint32_t) __ksc5601_sym_to_ucs
[(c1
- 0xd9) * 188 + c2
137 - (c2
> 0x90 ? 0x43 : 0x31)];
139 return (uint32_t) __ksc5601_hanja_to_ucs
[(c1
- 0xe0) * 188 + c2
140 - (c2
> 0x90 ? 0x43 : 0x31)];
142 /* Definitions used in the body of the `gconv' function. */
143 #define CHARSET_NAME "JOHAB//"
144 #define FROM_LOOP from_johab
145 #define TO_LOOP to_johab
146 #define DEFINE_INIT 1
147 #define DEFINE_FINI 1
148 #define MIN_NEEDED_FROM 1
149 #define MAX_NEEDED_FROM 2
150 #define MIN_NEEDED_TO 4
151 #define ONE_DIRECTION 0
154 /* First define the conversion function from JOHAB to UCS4. */
155 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
156 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
157 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
158 #define LOOPFCT FROM_LOOP
161 uint32_t ch = *inptr; \
165 /* Plain ISO646-KR. */ \
167 ch = 0x20a9; /* half-width Korean Currency WON sign */ \
170 /* Johab : 1. Hangul \
171 1st byte : 0x84-0xd3 \
172 2nd byte : 0x41-0x7e, 0x81-0xfe \
173 2. Hanja & Symbol : \
174 1st byte : 0xd8-0xde, 0xe0-0xf9 \
175 2nd byte : 0x31-0x7e, 0x91-0xfe \
176 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
179 if (__builtin_expect (ch > 0xf9, 0) \
180 || __builtin_expect (ch == 0xdf, 0) \
181 || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84) \
182 || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9)) \
184 /* These are illegal. */ \
185 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
189 /* Two-byte character. First test whether the next \
190 character is also available. */ \
194 if (__glibc_unlikely (inptr + 1 >= inend)) \
196 /* The second character is not available. Store the \
197 intermediate result. */ \
198 result = __GCONV_INCOMPLETE_INPUT; \
203 idx = ch * 256 + ch2; \
204 if (__glibc_likely (ch <= 0xd3)) \
209 i = init[(idx & 0x7c00) >> 10]; \
210 m = mid[(idx & 0x03e0) >> 5]; \
211 f = final[idx & 0x001f]; \
213 if (__builtin_expect (i == -1, 0) \
214 || __builtin_expect (m == -1, 0) \
215 || __builtin_expect (f == -1, 0)) \
217 /* This is illegal. */ \
218 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
220 else if (i > 0 && m > 0) \
221 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
222 else if (i > 0 && m == 0 && f == 0) \
223 ch = init_to_ucs[i - 1]; \
224 else if (i == 0 && m > 0 && f == 0) \
225 ch = 0x314e + m; /* 0x314f + m - 1 */ \
226 else if (__builtin_expect ((i | m) == 0, 1) \
227 && __builtin_expect (f > 0, 1)) \
228 ch = final_to_ucs[f - 1]; /* round trip?? */ \
231 /* This is illegal. */ \
232 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
237 if (__builtin_expect (ch2 < 0x31, 0) \
238 || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91) \
239 || __builtin_expect (ch2, 0) == 0xff \
240 || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8) \
241 || (__builtin_expect (ch, 0) == 0xda \
242 && ch2 > 0xa0 && ch2 < 0xd4) \
243 || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
245 /* This is illegal. */ \
246 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
250 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
251 /* if (idx <= 0xdefe) \
252 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
253 + ch2 - (ch2 > 0x90 \
256 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
257 + ch2 - (ch2 > 0x90 \
264 if (__glibc_unlikely (ch == 0)) \
266 /* This is an illegal character. */ \
267 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
273 put32 (outptr, ch); \
276 #define LOOP_NEED_FLAGS
277 #define ONEBYTE_BODY \
280 return (c == 0x5c ? 0x20a9 : c); \
284 #include <iconv/loop.c>
287 /* Next, define the other direction. */
288 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
289 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
290 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
291 #define LOOPFCT TO_LOOP
294 uint32_t ch = get32 (inptr); \
296 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
298 if (ch >= 0x0391 && ch <= 0x0451) \
299 cp = from_ucs4_greek[ch - 0x391]; \
300 else if (ch >= 0x2010 && ch <= 0x9fa0) \
301 cp = from_ucs4_cjk[ch - 0x02010]; \
306 cp = from_ucs4_lat1[ch]; \
309 if (ch <= 0x7f && ch != 0x5c) \
313 if (ch >= 0xac00 && ch <= 0xd7a3) \
315 if (__glibc_unlikely (outptr + 2 > outend)) \
317 result = __GCONV_FULL_OUTPUT; \
323 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
324 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
325 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
327 *outptr++ = ch / 256; \
328 *outptr++ = ch % 256; \
330 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
332 else if (ch >= 0x3131 && ch <= 0x3163) \
334 ch = jamo_from_ucs_table[ch - 0x3131]; \
336 if (__glibc_unlikely (outptr + 2 > outend)) \
338 result = __GCONV_FULL_OUTPUT; \
342 *outptr++ = ch / 256; \
343 *outptr++ = ch % 256; \
345 else if ((ch >= 0x4e00 && ch <= 0x9fa5) \
346 || (ch >= 0xf900 && ch <= 0xfa0b)) \
351 written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr); \
352 if (__builtin_expect (written, 1) == 0) \
354 result = __GCONV_FULL_OUTPUT; \
357 if (__glibc_unlikely (written == __UNKNOWN_10646_CHAR)) \
359 STANDARD_TO_LOOP_ERR_HANDLER (4); \
365 temp = outptr[0] * 94 + outptr[1]; \
367 outptr[0] = 0xe0 + temp / 188; \
368 outptr[1] = temp % 188; \
369 outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31; \
373 else if (ch == 0x20a9) \
380 written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr); \
381 if (__builtin_expect (written, 1) == 0) \
383 result = __GCONV_FULL_OUTPUT; \
386 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0) \
387 || (outptr[0] == 0x22 && outptr[1] > 0x68)) \
389 UNICODE_TAG_HANDLER (ch, 4); \
390 STANDARD_TO_LOOP_ERR_HANDLER (4); \
393 temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
394 outptr[1] += (temp % 2 ? 0x5e : 0); \
395 outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22); \
396 outptr[0] = temp / 2; \
404 #define LOOP_NEED_FLAGS
405 #include <iconv/loop.c>
408 /* Now define the toplevel functions. */
409 #include <iconv/skeleton.c>