here, have a Thumb back-end
[mozilla-central.git] / intl / uconv / util / ugen.c
blobb327be6abf5ee09b1d810aa8a45f12741ad95d5d
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
23 * jeroen.dobbelaere@acunia.com
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
38 #include "unicpriv.h"
39 /*=================================================================================
41 =================================================================================*/
42 typedef PRBool (*uSubGeneratorFunc) (PRUint16 in, unsigned char* out);
43 /*=================================================================================
45 =================================================================================*/
47 typedef PRBool (*uGeneratorFunc) (
48 PRInt32* state,
49 PRUint16 in,
50 unsigned char* out,
51 PRUint32 outbuflen,
52 PRUint32* outlen
55 MODULE_PRIVATE PRBool uGenerate(
56 uScanClassID scanClass,
57 PRInt32* state,
58 PRUint16 in,
59 unsigned char* out,
60 PRUint32 outbuflen,
61 PRUint32* outlen
64 #define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
66 PRIVATE PRBool uCheckAndGenAlways1Byte(
67 PRInt32* state,
68 PRUint16 in,
69 unsigned char* out,
70 PRUint32 outbuflen,
71 PRUint32* outlen
73 PRIVATE PRBool uCheckAndGenAlways2Byte(
74 PRInt32* state,
75 PRUint16 in,
76 unsigned char* out,
77 PRUint32 outbuflen,
78 PRUint32* outlen
80 PRIVATE PRBool uCheckAndGenAlways2ByteShiftGR(
81 PRInt32* state,
82 PRUint16 in,
83 unsigned char* out,
84 PRUint32 outbuflen,
85 PRUint32* outlen
87 MODULE_PRIVATE PRBool uGenerateShift(
88 uShiftOutTable *shift,
89 PRInt32* state,
90 PRUint16 in,
91 unsigned char* out,
92 PRUint32 outbuflen,
93 PRUint32* outlen
95 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8F(
96 PRInt32* state,
97 PRUint16 in,
98 unsigned char* out,
99 PRUint32 outbuflen,
100 PRUint32* outlen
102 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA2(
103 PRInt32* state,
104 PRUint16 in,
105 unsigned char* out,
106 PRUint32 outbuflen,
107 PRUint32* outlen
110 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA3(
111 PRInt32* state,
112 PRUint16 in,
113 unsigned char* out,
114 PRUint32 outbuflen,
115 PRUint32* outlen
118 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA4(
119 PRInt32* state,
120 PRUint16 in,
121 unsigned char* out,
122 PRUint32 outbuflen,
123 PRUint32* outlen
126 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA5(
127 PRInt32* state,
128 PRUint16 in,
129 unsigned char* out,
130 PRUint32 outbuflen,
131 PRUint32* outlen
134 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA6(
135 PRInt32* state,
136 PRUint16 in,
137 unsigned char* out,
138 PRUint32 outbuflen,
139 PRUint32* outlen
142 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA7(
143 PRInt32* state,
144 PRUint16 in,
145 unsigned char* out,
146 PRUint32 outbuflen,
147 PRUint32* outlen
149 PRIVATE PRBool uCnGAlways8BytesDecomposedHangul(
150 PRInt32* state,
151 PRUint16 in,
152 unsigned char* out,
153 PRUint32 outbuflen,
154 PRUint32* outlen
157 PRIVATE PRBool uCheckAndGenJohabHangul(
158 PRInt32* state,
159 PRUint16 in,
160 unsigned char* out,
161 PRUint32 outbuflen,
162 PRUint32* outlen
165 PRIVATE PRBool uCheckAndGenJohabSymbol(
166 PRInt32* state,
167 PRUint16 in,
168 unsigned char* out,
169 PRUint32 outbuflen,
170 PRUint32* outlen
174 PRIVATE PRBool uCheckAndGen4BytesGB18030(
175 PRInt32* state,
176 PRUint16 in,
177 unsigned char* out,
178 PRUint32 outbuflen,
179 PRUint32* outlen
182 PRIVATE PRBool uGenAlways2Byte(
183 PRUint16 in,
184 unsigned char* out
186 PRIVATE PRBool uGenAlways2ByteShiftGR(
187 PRUint16 in,
188 unsigned char* out
190 PRIVATE PRBool uGenAlways1Byte(
191 PRUint16 in,
192 unsigned char* out
194 PRIVATE PRBool uGenAlways1BytePrefix8E(
195 PRUint16 in,
196 unsigned char* out
198 /*=================================================================================
200 =================================================================================*/
201 PRIVATE const uGeneratorFunc m_generator[uNumOfCharsetType] =
203 uCheckAndGenAlways1Byte,
204 uCheckAndGenAlways2Byte,
205 uCheckAndGenAlways2ByteShiftGR,
206 uCheckAndGen2ByteGRPrefix8F,
207 uCheckAndGen2ByteGRPrefix8EA2,
208 uCheckAndGen2ByteGRPrefix8EA3,
209 uCheckAndGen2ByteGRPrefix8EA4,
210 uCheckAndGen2ByteGRPrefix8EA5,
211 uCheckAndGen2ByteGRPrefix8EA6,
212 uCheckAndGen2ByteGRPrefix8EA7,
213 uCnGAlways8BytesDecomposedHangul,
214 uCheckAndGenJohabHangul,
215 uCheckAndGenJohabSymbol,
216 uCheckAndGen4BytesGB18030,
217 uCheckAndGenAlways2Byte /* place-holder for GR128 */
220 /*=================================================================================
222 =================================================================================*/
224 PRIVATE const uSubGeneratorFunc m_subgenerator[uNumOfCharType] =
226 uGenAlways1Byte,
227 uGenAlways2Byte,
228 uGenAlways2ByteShiftGR,
229 uGenAlways1BytePrefix8E
232 /*=================================================================================
234 =================================================================================*/
235 MODULE_PRIVATE PRBool uGenerate(
236 uScanClassID scanClass,
237 PRInt32* state,
238 PRUint16 in,
239 unsigned char* out,
240 PRUint32 outbuflen,
241 PRUint32* outlen
244 return (* m_generator[scanClass]) (state,in,out,outbuflen,outlen);
246 /*=================================================================================
248 =================================================================================*/
249 PRIVATE PRBool uGenAlways1Byte(
250 PRUint16 in,
251 unsigned char* out
254 out[0] = (unsigned char)in;
255 return PR_TRUE;
258 /*=================================================================================
260 =================================================================================*/
261 PRIVATE PRBool uGenAlways2Byte(
262 PRUint16 in,
263 unsigned char* out
266 out[0] = (unsigned char)((in >> 8) & 0xff);
267 out[1] = (unsigned char)(in & 0xff);
268 return PR_TRUE;
270 /*=================================================================================
272 =================================================================================*/
273 PRIVATE PRBool uGenAlways2ByteShiftGR(
274 PRUint16 in,
275 unsigned char* out
278 out[0] = (unsigned char)(((in >> 8) & 0xff) | 0x80);
279 out[1] = (unsigned char)((in & 0xff) | 0x80);
280 return PR_TRUE;
282 /*=================================================================================
284 =================================================================================*/
285 PRIVATE PRBool uGenAlways1BytePrefix8E(
286 PRUint16 in,
287 unsigned char* out
290 out[0] = 0x8E;
291 out[1] = (unsigned char)(in & 0xff);
292 return PR_TRUE;
294 /*=================================================================================
296 =================================================================================*/
297 PRIVATE PRBool uCheckAndGenAlways1Byte(
298 PRInt32* state,
299 PRUint16 in,
300 unsigned char* out,
301 PRUint32 outbuflen,
302 PRUint32* outlen
305 /* Don't check inlen. The caller should ensure it is larger than 0 */
306 /* Oops, I don't agree. Code changed to check every time. [CATA] */
307 if(outbuflen < 1)
308 return PR_FALSE;
309 else
311 *outlen = 1;
312 out[0] = in & 0xff;
313 return PR_TRUE;
317 /*=================================================================================
319 =================================================================================*/
320 PRIVATE PRBool uCheckAndGenAlways2Byte(
321 PRInt32* state,
322 PRUint16 in,
323 unsigned char* out,
324 PRUint32 outbuflen,
325 PRUint32* outlen
328 if(outbuflen < 2)
329 return PR_FALSE;
330 else
332 *outlen = 2;
333 out[0] = ((in >> 8 ) & 0xff);
334 out[1] = in & 0xff;
335 return PR_TRUE;
338 /*=================================================================================
340 =================================================================================*/
341 PRIVATE PRBool uCheckAndGenAlways2ByteShiftGR(
342 PRInt32* state,
343 PRUint16 in,
344 unsigned char* out,
345 PRUint32 outbuflen,
346 PRUint32* outlen
349 if(outbuflen < 2)
350 return PR_FALSE;
351 else
353 *outlen = 2;
354 out[0] = ((in >> 8 ) & 0xff) | 0x80;
355 out[1] = (in & 0xff) | 0x80;
356 return PR_TRUE;
359 /*=================================================================================
361 =================================================================================*/
362 MODULE_PRIVATE PRBool uGenerateShift(
363 uShiftOutTable *shift,
364 PRInt32* state,
365 PRUint16 in,
366 unsigned char* out,
367 PRUint32 outbuflen,
368 PRUint32* outlen
371 PRInt16 i;
372 const uShiftOutCell* cell = &(shift->shiftcell[0]);
373 PRInt16 itemnum = shift->numOfItem;
374 unsigned char inH, inL;
375 inH = (in >> 8) & 0xff;
376 inL = (in & 0xff );
377 for(i=0;i<itemnum;i++)
379 if( ( inL >= cell[i].shiftout_MinLB) &&
380 ( inL <= cell[i].shiftout_MaxLB) &&
381 ( inH >= cell[i].shiftout_MinHB) &&
382 ( inH <= cell[i].shiftout_MaxHB) )
384 if(outbuflen < cell[i].reserveLen)
386 return PR_FALSE;
388 else
390 *outlen = cell[i].reserveLen;
391 return (uSubGenerator(cell[i].classID,in,out));
395 return PR_FALSE;
397 /*=================================================================================
399 =================================================================================*/
400 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8F( PRInt32* state,
401 PRUint16 in,
402 unsigned char* out,
403 PRUint32 outbuflen,
404 PRUint32* outlen
407 if(outbuflen < 3)
408 return PR_FALSE;
409 else
411 *outlen = 3;
412 out[0] = 0x8F;
413 out[1] = ((in >> 8 ) & 0xff) | 0x80;
414 out[2] = (in & 0xff) | 0x80;
415 return PR_TRUE;
418 /*=================================================================================
420 =================================================================================*/
421 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA2( PRInt32* state,
422 PRUint16 in,
423 unsigned char* out,
424 PRUint32 outbuflen,
425 PRUint32* outlen
428 if(outbuflen < 4)
429 return PR_FALSE;
430 else
432 *outlen = 4;
433 out[0] = 0x8E;
434 out[1] = 0xA2;
435 out[2] = ((in >> 8 ) & 0xff) | 0x80;
436 out[3] = (in & 0xff) | 0x80;
437 return PR_TRUE;
442 /*=================================================================================
444 =================================================================================*/
445 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA3( PRInt32* state,
446 PRUint16 in,
447 unsigned char* out,
448 PRUint32 outbuflen,
449 PRUint32* outlen
452 if(outbuflen < 4)
453 return PR_FALSE;
454 else
456 *outlen = 4;
457 out[0] = 0x8E;
458 out[1] = 0xA3;
459 out[2] = ((in >> 8 ) & 0xff) | 0x80;
460 out[3] = (in & 0xff) | 0x80;
461 return PR_TRUE;
464 /*=================================================================================
466 =================================================================================*/
467 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA4( PRInt32* state,
468 PRUint16 in,
469 unsigned char* out,
470 PRUint32 outbuflen,
471 PRUint32* outlen
474 if(outbuflen < 4)
475 return PR_FALSE;
476 else
478 *outlen = 4;
479 out[0] = 0x8E;
480 out[1] = 0xA4;
481 out[2] = ((in >> 8 ) & 0xff) | 0x80;
482 out[3] = (in & 0xff) | 0x80;
483 return PR_TRUE;
486 /*=================================================================================
488 =================================================================================*/
489 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA5( PRInt32* state,
490 PRUint16 in,
491 unsigned char* out,
492 PRUint32 outbuflen,
493 PRUint32* outlen
496 if(outbuflen < 4)
497 return PR_FALSE;
498 else
500 *outlen = 4;
501 out[0] = 0x8E;
502 out[1] = 0xA5;
503 out[2] = ((in >> 8 ) & 0xff) | 0x80;
504 out[3] = (in & 0xff) | 0x80;
505 return PR_TRUE;
508 /*=================================================================================
510 =================================================================================*/
511 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA6( PRInt32* state,
512 PRUint16 in,
513 unsigned char* out,
514 PRUint32 outbuflen,
515 PRUint32* outlen
518 if(outbuflen < 4)
519 return PR_FALSE;
520 else
522 *outlen = 4;
523 out[0] = 0x8E;
524 out[1] = 0xA6;
525 out[2] = ((in >> 8 ) & 0xff) | 0x80;
526 out[3] = (in & 0xff) | 0x80;
527 return PR_TRUE;
530 /*=================================================================================
532 =================================================================================*/
533 PRIVATE PRBool uCheckAndGen2ByteGRPrefix8EA7( PRInt32* state,
534 PRUint16 in,
535 unsigned char* out,
536 PRUint32 outbuflen,
537 PRUint32* outlen
540 if(outbuflen < 4)
541 return PR_FALSE;
542 else
544 *outlen = 4;
545 out[0] = 0x8E;
546 out[1] = 0xA7;
547 out[2] = ((in >> 8 ) & 0xff) | 0x80;
548 out[3] = (in & 0xff) | 0x80;
549 return PR_TRUE;
552 /*=================================================================================
554 =================================================================================*/
555 #define SBase 0xAC00
556 #define LCount 19
557 #define VCount 21
558 #define TCount 28
559 #define NCount (VCount * TCount)
560 /*=================================================================================
562 =================================================================================*/
563 PRIVATE PRBool uCnGAlways8BytesDecomposedHangul(
564 PRInt32* state,
565 PRUint16 in,
566 unsigned char* out,
567 PRUint32 outbuflen,
568 PRUint32* outlen
571 static const PRUint8 lMap[LCount] = {
572 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
573 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
576 static const PRUint8 tMap[TCount] = {
577 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
578 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
579 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
582 PRUint16 SIndex, LIndex, VIndex, TIndex;
584 if(outbuflen < 8)
585 return PR_FALSE;
587 /* the following line are copy from Unicode 2.0 page 3-13 */
588 /* item 1 of Hangul Syllabel Decomposition */
589 SIndex = in - SBase;
591 /* the following lines are copy from Unicode 2.0 page 3-14 */
592 /* item 2 of Hangul Syllabel Decomposition w/ modification */
593 LIndex = SIndex / NCount;
594 VIndex = (SIndex % NCount) / TCount;
595 TIndex = SIndex % TCount;
598 * A Hangul syllable not enumerated in KS X 1001 is represented
599 * by a sequence of 8 bytes beginning with Hangul-filler
600 * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three
601 * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making
602 * up the syllable. ref. KS X 1001:1998 Annex 3
604 *outlen = 8;
605 out[0] = out[2] = out[4] = out[6] = 0xa4;
606 out[1] = 0xd4;
607 out[3] = lMap[LIndex] ;
608 out[5] = (VIndex + 0xbf);
609 out[7] = tMap[TIndex];
611 return PR_TRUE;
614 PRIVATE PRBool uCheckAndGenJohabHangul(
615 PRInt32* state,
616 PRUint16 in,
617 unsigned char* out,
618 PRUint32 outbuflen,
619 PRUint32* outlen
622 if(outbuflen < 2)
623 return PR_FALSE;
624 else
627 See Table 4-45 (page 183) of CJKV Information Processing
628 for detail explanation of the following table.
631 static const PRUint8 lMap[LCount] = {
632 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
634 Therefore lMap[i] == i+2;
637 static const PRUint8 vMap[VCount] = {
638 /* no 0,1,2 */
639 3,4,5,6,7, /* no 8,9 */
640 10,11,12,13,14,15, /* no 16,17 */
641 18,19,20,21,22,23, /* no 24,25 */
642 26,27,28,29
644 static const PRUint8 tMap[TCount] = {
645 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
646 19,20,21,22,23,24,25,26,27,28,29
648 PRUint16 SIndex, LIndex, VIndex, TIndex, ch;
649 /* the following line are copy from Unicode 2.0 page 3-13 */
650 /* item 1 of Hangul Syllabel Decomposition */
651 SIndex = in - SBase;
653 /* the following lines are copy from Unicode 2.0 page 3-14 */
654 /* item 2 of Hangul Syllabel Decomposition w/ modification */
655 LIndex = SIndex / NCount;
656 VIndex = (SIndex % NCount) / TCount;
657 TIndex = SIndex % TCount;
659 *outlen = 2;
660 ch = 0x8000 |
661 ((LIndex+2)<<10) |
662 (vMap[VIndex]<<5)|
663 tMap[TIndex];
664 out[0] = (ch >> 8);
665 out[1] = ch & 0x00FF;
666 #if 0
667 printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out[0], out[1], in, LIndex, VIndex, TIndex);
668 #endif
669 return PR_TRUE;
672 PRIVATE PRBool uCheckAndGenJohabSymbol(
673 PRInt32* state,
674 PRUint16 in,
675 unsigned char* out,
676 PRUint32 outbuflen,
677 PRUint32* outlen
680 if(outbuflen < 2)
681 return PR_FALSE;
682 else
684 /* The following code are based on the Perl code listed under
685 * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
686 * in the book "CJKV Information Processing" by
687 * Ken Lunde <lunde@adobe.com>
689 * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
690 * my @euc = unpack("C*", $_[0]);
691 * my ($fe_off, $hi_off, $lo_off) = (0,0,1);
692 * my @out = ();
693 * while(($hi, $lo) = splice(@euc, 0, 2)) {
694 * $hi &= 127; $lo &= 127;
695 * $fe_off = 21 if $hi == 73;
696 * $fe_off = 34 if $hi == 126;
697 * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
698 * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
699 * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));
701 * return pack("C*", @out);
704 unsigned char fe_off = 0;
705 unsigned char hi_off = 0;
706 unsigned char lo_off = 1;
707 unsigned char hi = (in >> 8) & 0x7F;
708 unsigned char lo = in & 0x7F;
709 if(73 == hi)
710 fe_off = 21;
711 if(126 == hi)
712 fe_off = 34;
713 if( (hi < 74) || ( hi > 125) )
715 hi_off = 1;
716 lo_off = 0;
718 *outlen = 2;
719 out[0] = ((hi+hi_off) >> 1) + ((hi<74) ? 200 : 187 ) - fe_off;
720 out[1] = lo + (((hi+lo_off) & 1) ? ((lo > 110) ? 34 : 16) :
721 128);
722 #if 0
723 printf("Johab Symbol %x %x in=%x\n", out[0], out[1], in);
724 #endif
725 return PR_TRUE;
728 PRIVATE PRBool uCheckAndGen4BytesGB18030(
729 PRInt32* state,
730 PRUint16 in,
731 unsigned char* out,
732 PRUint32 outbuflen,
733 PRUint32* outlen
736 if(outbuflen < 4)
737 return PR_FALSE;
738 out[0] = (in / (10*126*10)) + 0x81;
739 in %= (10*126*10);
740 out[1] = (in / (10*126)) + 0x30;
741 in %= (10*126);
742 out[2] = (in / (10)) + 0x81;
743 out[3] = (in % 10) + 0x30;
744 *outlen = 4;
745 return PR_TRUE;