Update.
[glibc.git] / iconvdata / johab.c
blob6a5d8be68fba5ec84059975a07d565e17b306bcc
1 /* Mapping tables for JOHAB handling.
2 Copyright (C) 1998 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Library General Public License for more details.
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB. If not,
19 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 #include <stdint.h>
23 #include <ksc5601.h>
25 /* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
34 static const int init[32] =
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
39 static const int mid[32] =
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
46 static const int final[32] =
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
60 static const uint32_t init_to_ucs[19] =
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
67 static const uint32_t final_to_ucs[27] =
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70 0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0',
72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
75 /* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
79 cf. : KS C 5601-1992, Annex3 Table 2
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
85 static const int init_to_bit[19] =
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
93 static const int mid_to_bit[21] =
95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
101 static const int final_to_bit[28] =
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
107 /* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
115 static const uint16_t jamo_from_ucs_table[51] =
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
122 0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126 0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
134 static inline uint32_t
135 johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
137 if (idx <= 0xdefe)
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
140 else
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
144 /* Definitions used in the body of the `gconv' function. */
145 #define CHARSET_NAME "JOHAB//"
146 #define FROM_LOOP from_johab
147 #define TO_LOOP to_johab
148 #define DEFINE_INIT 1
149 #define DEFINE_FINI 1
150 #define MIN_NEEDED_FROM 1
151 #define MAX_NEEDED_FROM 2
152 #define MIN_NEEDED_TO 4
155 /* First define the conversion function from JOHAB to UCS4. */
156 #define MIN_NEEDED_INPUT MIN_NEEDED_FROM
157 #define MAX_NEEDED_INPUT MAX_NEEDED_FROM
158 #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
159 #define LOOPFCT FROM_LOOP
160 #define BODY \
162 uint32_t ch = *inptr; \
164 /* half-width Korean Currency WON sign \
165 if (ch == 0x5c) \
166 ch = 0x20a9; \
167 else if (ch < 0x7f) \
168 ch = (uint32_t) ch; \
169 */ \
170 if (ch < 0x7f) \
171 /* Plain ASCII. */ \
172 ++inptr; \
173 /* Johab : 1. Hangul \
174 1st byte : 0x84-0xd3 \
175 2nd byte : 0x41-0x7e, 0x81-0xfe \
176 2. Hanja & Symbol : \
177 1st byte : 0xd8-0xde, 0xe0-0xf9 \
178 2nd byte : 0x31-0x7e, 0x91-0xfe \
179 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
180 else \
182 if (ch > 0xf9 || ch == 0xdf || (ch > 0x7e && ch < 0x84) \
183 || (ch > 0xd3 && ch < 0xd9)) \
185 /* These are illegal. */ \
186 result = GCONV_ILLEGAL_INPUT; \
187 break; \
189 else \
191 /* Two-byte character. First test whether the next \
192 character is also available. */ \
193 uint32_t ch2; \
194 uint_fast32_t idx; \
196 if (NEED_LENGTH_TEST && inptr + 1 >= inend) \
198 /* The second character is not available. Store the \
199 intermediate result. */ \
200 result = GCONV_INCOMPLETE_INPUT; \
201 break; \
204 ch2 = inptr[1]; \
205 idx = ch * 256 + ch2; \
206 if (ch <= 0xd3) \
208 /* Hangul */ \
209 uint_fast32_t i, m, f; \
211 i = init[(idx & 0x7c00) >> 10]; \
212 m = mid[(idx & 0x03e0) >> 5]; \
213 f = final[idx & 0x001f]; \
215 if (i == -1 || m == -1 || f == -1) \
217 /* This is illegal. */ \
218 result = GCONV_ILLEGAL_INPUT; \
219 break; \
221 else if (i > 0 && m > 0) \
222 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
223 else if (i > 0 && m == 0 && f == 0) \
224 ch = init_to_ucs[i - 1]; \
225 else if (i == 0 && m > 0 && f == 0) \
226 ch = 0x314e + m; /* 0x314f + m - 1 */ \
227 else if (i == 0 && m == 0 && f > 0) \
228 ch = final_to_ucs[f - 1]; /* round trip?? */ \
229 else \
231 /* This is illegal. */ \
232 result = GCONV_ILLEGAL_INPUT; \
233 break; \
236 else \
238 if (ch2 < 0x31 || (ch2 > 0x7e && ch2 < 0x91) || ch2 == 0xff) \
240 /* This is illegal. */ \
241 result = GCONV_ILLEGAL_INPUT; \
242 break; \
244 else if (ch == 0xda && ch2 > 0xa0 && ch2 < 0xd4) \
246 /* This is illegal. Modern Hangul Jaso is defined \
247 elsewhere in Johab */ \
248 result = GCONV_ILLEGAL_INPUT; \
249 break; \
251 else \
253 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
254 /* if (idx <= 0xdefe) \
255 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
256 + ch2 - (ch2 > 0x90 \
257 ? 0x43 : 0x31)]; \
258 else \
259 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
260 + ch2 - (ch2 > 0x90 \
261 ?0x43 : 0x31)];\
262 */ \
267 if (ch == 0) \
269 /* This is an illegal character. */ \
270 result = GCONV_ILLEGAL_INPUT; \
271 break; \
274 inptr += 2; \
277 *((uint32_t *) outptr)++ = ch; \
279 #include <iconv/loop.c>
282 /* Next, define the other direction. */
283 #define MIN_NEEDED_INPUT MIN_NEEDED_TO
284 #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
285 #define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
286 #define LOOPFCT TO_LOOP
287 #define BODY \
289 uint32_t ch = *((uint32_t *) inptr); \
290 /* \
291 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
293 if (ch >= 0x0391 && ch <= 0x0451) \
294 cp = from_ucs4_greek[ch - 0x391]; \
295 else if (ch >= 0x2010 && ch <= 0x9fa0) \
296 cp = from_ucs4_cjk[ch - 0x02010]; \
297 else \
298 break; \
300 else \
301 cp = from_ucs4_lat1[ch]; \
302 */ \
304 if (ch < 0x7f) \
305 *outptr++ = ch; \
306 else \
308 if (ch >= 0xac00 && ch <= 0xd7a3) \
310 ch -= 0xac00; \
312 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
313 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
314 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
316 if (NEED_LENGTH_TEST && outptr + 2 > outend) \
318 result = GCONV_FULL_OUTPUT; \
319 break; \
322 *outptr++ = ch / 256; \
323 *outptr++ = ch % 256; \
325 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
326 as symbol */ \
327 else if (ch >= 0x3131 && ch <= 0x3163) \
329 ch = jamo_from_ucs_table[ch - 0x3131]; \
331 if (NEED_LENGTH_TEST && outptr + 2 > outend) \
333 result = GCONV_FULL_OUTPUT; \
334 break; \
337 *outptr++ = ch / 256; \
338 *outptr++ = ch % 256; \
340 if ((ch >= 0x4e00 && ch <= 0x9fa5) || (ch >= 0xf900 && ch <= 0xfa0b)) \
342 size_t written; \
344 written = ucs4_to_ksc5601_hanja (ch, outptr, \
345 (NEED_LENGTH_TEST \
346 ? outend - outptr : 2)); \
347 if (NEED_LENGTH_TEST && written == 0) \
349 result = GCONV_FULL_OUTPUT; \
350 break; \
352 if (written == UNKNOWN_10646_CHAR) \
354 result = GCONV_ILLEGAL_INPUT; \
355 break; \
358 outptr[0] -= 0x4a; \
359 outptr[1] += 0x80; \
361 outptr[1] += (outptr[0] % 2 \
362 ? 0 : (outptr[1] > 0xee ? 0x43 : 0x31)); \
363 outptr[1] -= 0xa1; \
364 outptr[0] /= 2; \
365 outptr[0] += 0xe0; \
367 outptr += 2; \
369 else \
371 size_t written; \
373 written = ucs4_to_ksc5601_sym (ch, outptr, \
374 (NEED_LENGTH_TEST \
375 ? outend - outptr : 2)); \
376 if (NEED_LENGTH_TEST && written == 0) \
378 result = GCONV_FULL_OUTPUT; \
379 break; \
381 if (written == UNKNOWN_10646_CHAR) \
383 result = GCONV_ILLEGAL_INPUT; \
384 break; \
387 outptr[0] -= 0x4a; \
388 outptr[1] += 0x80; \
390 outptr[1] += (outptr[0] % 2 \
391 ? 0 : (outptr[1] > 0xee ? 0x43 : 0x31)); \
392 outptr[1] -= 0xa1; \
393 outptr[0] /= 2; \
394 outptr[0] += 0xe0; \
396 outptr += 2; \
400 inptr += 4; \
402 #include <iconv/loop.c>
405 /* Now define the toplevel functions. */
406 #include <iconv/skeleton.c>