Update copyright notices with scripts/update-copyrights
[glibc.git] / iconvdata / johab.c
blobfb280a368fa815162b1b4b4c26af9c4724609023
1 /* Mapping tables for JOHAB handling.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
21 #include <dlfcn.h>
22 #include <stdint.h>
23 #include <ksc5601.h>
25 /* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
34 static const int init[32] =
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
39 static const int mid[32] =
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
46 static const int final[32] =
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
60 static const uint32_t init_to_ucs[19] =
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
67 static const uint32_t final_to_ucs[31] =
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
75 /* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
79 cf. : KS C 5601-1992, Annex3 Table 2
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
85 static const int init_to_bit[19] =
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
93 static const int mid_to_bit[21] =
95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
101 static const int final_to_bit[28] =
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
107 /* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
115 static const uint16_t jamo_from_ucs_table[51] =
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
122 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
134 static uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
137 if (idx <= 0xdefe)
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
140 else
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
144 /* Definitions used in the body of the `gconv' function. */
145 #define CHARSET_NAME "JOHAB//"
146 #define FROM_LOOP from_johab
147 #define TO_LOOP to_johab
148 #define DEFINE_INIT 1
149 #define DEFINE_FINI 1
150 #define MIN_NEEDED_FROM 1
151 #define MAX_NEEDED_FROM 2
152 #define MIN_NEEDED_TO 4
155 /* First define the conversion function from JOHAB to UCS4. */
156 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
159 #define LOOPFCT FROM_LOOP
160 #define BODY \
162 uint32_t ch = *inptr; \
164 if (ch <= 0x7f) \
166 /* Plain ISO646-KR. */ \
167 if (ch == 0x5c) \
168 ch = 0x20a9; /* half-width Korean Currency WON sign */ \
169 ++inptr; \
171 /* Johab : 1. Hangul \
172 1st byte : 0x84-0xd3 \
173 2nd byte : 0x41-0x7e, 0x81-0xfe \
174 2. Hanja & Symbol : \
175 1st byte : 0xd8-0xde, 0xe0-0xf9 \
176 2nd byte : 0x31-0x7e, 0x91-0xfe \
177 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
178 else \
180 if (__builtin_expect (ch > 0xf9, 0) \
181 || __builtin_expect (ch == 0xdf, 0) \
182 || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84) \
183 || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9)) \
185 /* These are illegal. */ \
186 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
188 else \
190 /* Two-byte character. First test whether the next \
191 character is also available. */ \
192 uint32_t ch2; \
193 uint_fast32_t idx; \
195 if (__builtin_expect (inptr + 1 >= inend, 0)) \
197 /* The second character is not available. Store the \
198 intermediate result. */ \
199 result = __GCONV_INCOMPLETE_INPUT; \
200 break; \
203 ch2 = inptr[1]; \
204 idx = ch * 256 + ch2; \
205 if (__builtin_expect (ch <= 0xd3, 1)) \
207 /* Hangul */ \
208 int_fast32_t i, m, f; \
210 i = init[(idx & 0x7c00) >> 10]; \
211 m = mid[(idx & 0x03e0) >> 5]; \
212 f = final[idx & 0x001f]; \
214 if (__builtin_expect (i == -1, 0) \
215 || __builtin_expect (m == -1, 0) \
216 || __builtin_expect (f == -1, 0)) \
218 /* This is illegal. */ \
219 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
221 else if (i > 0 && m > 0) \
222 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
223 else if (i > 0 && m == 0 && f == 0) \
224 ch = init_to_ucs[i - 1]; \
225 else if (i == 0 && m > 0 && f == 0) \
226 ch = 0x314e + m; /* 0x314f + m - 1 */ \
227 else if (__builtin_expect ((i | m) == 0, 1) \
228 && __builtin_expect (f > 0, 1)) \
229 ch = final_to_ucs[f - 1]; /* round trip?? */ \
230 else \
232 /* This is illegal. */ \
233 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
236 else \
238 if (__builtin_expect (ch2 < 0x31, 0) \
239 || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91) \
240 || __builtin_expect (ch2, 0) == 0xff \
241 || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8) \
242 || (__builtin_expect (ch, 0) == 0xda \
243 && ch2 > 0xa0 && ch2 < 0xd4) \
244 || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
246 /* This is illegal. */ \
247 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
249 else \
251 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
252 /* if (idx <= 0xdefe) \
253 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
254 + ch2 - (ch2 > 0x90 \
255 ? 0x43 : 0x31)]; \
256 else \
257 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
258 + ch2 - (ch2 > 0x90 \
259 ?0x43 : 0x31)];\
260 */ \
265 if (__builtin_expect (ch == 0, 0)) \
267 /* This is an illegal character. */ \
268 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
271 inptr += 2; \
274 put32 (outptr, ch); \
275 outptr += 4; \
277 #define LOOP_NEED_FLAGS
278 #define ONEBYTE_BODY \
280 if (c <= 0x7f) \
281 return (c == 0x5c ? 0x20a9 : c); \
282 else \
283 return WEOF; \
285 #include <iconv/loop.c>
288 /* Next, define the other direction. */
289 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
290 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
291 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
292 #define LOOPFCT TO_LOOP
293 #define BODY \
295 uint32_t ch = get32 (inptr); \
296 /* \
297 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
299 if (ch >= 0x0391 && ch <= 0x0451) \
300 cp = from_ucs4_greek[ch - 0x391]; \
301 else if (ch >= 0x2010 && ch <= 0x9fa0) \
302 cp = from_ucs4_cjk[ch - 0x02010]; \
303 else \
304 break; \
306 else \
307 cp = from_ucs4_lat1[ch]; \
308 */ \
310 if (ch <= 0x7f && ch != 0x5c) \
311 *outptr++ = ch; \
312 else \
314 if (ch >= 0xac00 && ch <= 0xd7a3) \
316 if (__builtin_expect (outptr + 2 > outend, 0)) \
318 result = __GCONV_FULL_OUTPUT; \
319 break; \
322 ch -= 0xac00; \
324 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
325 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
326 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
328 *outptr++ = ch / 256; \
329 *outptr++ = ch % 256; \
331 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
332 as symbol */ \
333 else if (ch >= 0x3131 && ch <= 0x3163) \
335 ch = jamo_from_ucs_table[ch - 0x3131]; \
337 if (__builtin_expect (outptr + 2 > outend, 0)) \
339 result = __GCONV_FULL_OUTPUT; \
340 break; \
343 *outptr++ = ch / 256; \
344 *outptr++ = ch % 256; \
346 else if ((ch >= 0x4e00 && ch <= 0x9fa5) \
347 || (ch >= 0xf900 && ch <= 0xfa0b)) \
349 size_t written; \
350 uint32_t temp; \
352 written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr); \
353 if (__builtin_expect (written, 1) == 0) \
355 result = __GCONV_FULL_OUTPUT; \
356 break; \
358 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0)) \
360 STANDARD_TO_LOOP_ERR_HANDLER (4); \
363 outptr[0] -= 0x4a; \
364 outptr[1] -= 0x21; \
366 temp = outptr[0] * 94 + outptr[1]; \
368 outptr[0] = 0xe0 + temp / 188; \
369 outptr[1] = temp % 188; \
370 outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31; \
372 outptr += 2; \
374 else if (ch == 0x20a9) \
375 *outptr++ = 0x5c; \
376 else \
378 size_t written; \
379 uint32_t temp; \
381 written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr); \
382 if (__builtin_expect (written, 1) == 0) \
384 result = __GCONV_FULL_OUTPUT; \
385 break; \
387 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0) \
388 || (outptr[0] == 0x22 && outptr[1] > 0x68)) \
390 UNICODE_TAG_HANDLER (ch, 4); \
391 STANDARD_TO_LOOP_ERR_HANDLER (4); \
394 temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
395 outptr[1] += (temp % 2 ? 0x5e : 0); \
396 outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22); \
397 outptr[0] = temp / 2; \
399 outptr += 2; \
403 inptr += 4; \
405 #define LOOP_NEED_FLAGS
406 #include <iconv/loop.c>
409 /* Now define the toplevel functions. */
410 #include <iconv/skeleton.c>