1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
23 * jeroen.dobbelaere@acunia.com
25 * Alternatively, the contents of this file may be used under the terms of
26 * either of the GNU General Public License Version 2 or later (the "GPL"),
27 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
28 * in which case the provisions of the GPL or the LGPL are applicable instead
29 * of those above. If you wish to allow use of your version of this file only
30 * under the terms of either the GPL or the LGPL, and not to allow others to
31 * use your version of this file under the terms of the MPL, indicate your
32 * decision by deleting the provisions above and replace them with the notice
33 * and other provisions required by the GPL or the LGPL. If you do not delete
34 * the provisions above, a recipient may use your version of this file under
35 * the terms of any one of the MPL, the GPL or the LGPL.
37 * ***** END LICENSE BLOCK ***** */
39 /*=================================================================================
41 =================================================================================*/
42 typedef PRBool (*uSubGeneratorFunc
) (PRUint16 in
, unsigned char* out
);
43 /*=================================================================================
45 =================================================================================*/
47 typedef PRBool (*uGeneratorFunc
) (
55 MODULE_PRIVATE PRBool
uGenerate(
56 uScanClassID scanClass
,
64 #define uSubGenerator(sub,in,out) (* m_subgenerator[sub])((in),(out))
66 PRIVATE PRBool
uCheckAndGenAlways1Byte(
73 PRIVATE PRBool
uCheckAndGenAlways2Byte(
80 PRIVATE PRBool
uCheckAndGenAlways2ByteShiftGR(
87 MODULE_PRIVATE PRBool
uGenerateShift(
88 uShiftOutTable
*shift
,
95 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8F(
102 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA2(
110 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA3(
118 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA4(
126 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA5(
134 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA6(
142 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA7(
149 PRIVATE PRBool
uCnGAlways8BytesDecomposedHangul(
157 PRIVATE PRBool
uCheckAndGenJohabHangul(
165 PRIVATE PRBool
uCheckAndGenJohabSymbol(
174 PRIVATE PRBool
uCheckAndGen4BytesGB18030(
182 PRIVATE PRBool
uGenAlways2Byte(
186 PRIVATE PRBool
uGenAlways2ByteShiftGR(
190 PRIVATE PRBool
uGenAlways1Byte(
194 PRIVATE PRBool
uGenAlways1BytePrefix8E(
198 /*=================================================================================
200 =================================================================================*/
201 PRIVATE
const uGeneratorFunc m_generator
[uNumOfCharsetType
] =
203 uCheckAndGenAlways1Byte
,
204 uCheckAndGenAlways2Byte
,
205 uCheckAndGenAlways2ByteShiftGR
,
206 uCheckAndGen2ByteGRPrefix8F
,
207 uCheckAndGen2ByteGRPrefix8EA2
,
208 uCheckAndGen2ByteGRPrefix8EA3
,
209 uCheckAndGen2ByteGRPrefix8EA4
,
210 uCheckAndGen2ByteGRPrefix8EA5
,
211 uCheckAndGen2ByteGRPrefix8EA6
,
212 uCheckAndGen2ByteGRPrefix8EA7
,
213 uCnGAlways8BytesDecomposedHangul
,
214 uCheckAndGenJohabHangul
,
215 uCheckAndGenJohabSymbol
,
216 uCheckAndGen4BytesGB18030
,
217 uCheckAndGenAlways2Byte
/* place-holder for GR128 */
220 /*=================================================================================
222 =================================================================================*/
224 PRIVATE
const uSubGeneratorFunc m_subgenerator
[uNumOfCharType
] =
228 uGenAlways2ByteShiftGR
,
229 uGenAlways1BytePrefix8E
232 /*=================================================================================
234 =================================================================================*/
235 MODULE_PRIVATE PRBool
uGenerate(
236 uScanClassID scanClass
,
244 return (* m_generator
[scanClass
]) (state
,in
,out
,outbuflen
,outlen
);
246 /*=================================================================================
248 =================================================================================*/
249 PRIVATE PRBool
uGenAlways1Byte(
254 out
[0] = (unsigned char)in
;
258 /*=================================================================================
260 =================================================================================*/
261 PRIVATE PRBool
uGenAlways2Byte(
266 out
[0] = (unsigned char)((in
>> 8) & 0xff);
267 out
[1] = (unsigned char)(in
& 0xff);
270 /*=================================================================================
272 =================================================================================*/
273 PRIVATE PRBool
uGenAlways2ByteShiftGR(
278 out
[0] = (unsigned char)(((in
>> 8) & 0xff) | 0x80);
279 out
[1] = (unsigned char)((in
& 0xff) | 0x80);
282 /*=================================================================================
284 =================================================================================*/
285 PRIVATE PRBool
uGenAlways1BytePrefix8E(
291 out
[1] = (unsigned char)(in
& 0xff);
294 /*=================================================================================
296 =================================================================================*/
297 PRIVATE PRBool
uCheckAndGenAlways1Byte(
305 /* Don't check inlen. The caller should ensure it is larger than 0 */
306 /* Oops, I don't agree. Code changed to check every time. [CATA] */
317 /*=================================================================================
319 =================================================================================*/
320 PRIVATE PRBool
uCheckAndGenAlways2Byte(
333 out
[0] = ((in
>> 8 ) & 0xff);
338 /*=================================================================================
340 =================================================================================*/
341 PRIVATE PRBool
uCheckAndGenAlways2ByteShiftGR(
354 out
[0] = ((in
>> 8 ) & 0xff) | 0x80;
355 out
[1] = (in
& 0xff) | 0x80;
359 /*=================================================================================
361 =================================================================================*/
362 MODULE_PRIVATE PRBool
uGenerateShift(
363 uShiftOutTable
*shift
,
372 const uShiftOutCell
* cell
= &(shift
->shiftcell
[0]);
373 PRInt16 itemnum
= shift
->numOfItem
;
374 unsigned char inH
, inL
;
375 inH
= (in
>> 8) & 0xff;
377 for(i
=0;i
<itemnum
;i
++)
379 if( ( inL
>= cell
[i
].shiftout_MinLB
) &&
380 ( inL
<= cell
[i
].shiftout_MaxLB
) &&
381 ( inH
>= cell
[i
].shiftout_MinHB
) &&
382 ( inH
<= cell
[i
].shiftout_MaxHB
) )
384 if(outbuflen
< cell
[i
].reserveLen
)
390 *outlen
= cell
[i
].reserveLen
;
391 return (uSubGenerator(cell
[i
].classID
,in
,out
));
397 /*=================================================================================
399 =================================================================================*/
400 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8F( PRInt32
* state
,
413 out
[1] = ((in
>> 8 ) & 0xff) | 0x80;
414 out
[2] = (in
& 0xff) | 0x80;
418 /*=================================================================================
420 =================================================================================*/
421 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA2( PRInt32
* state
,
435 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
436 out
[3] = (in
& 0xff) | 0x80;
442 /*=================================================================================
444 =================================================================================*/
445 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA3( PRInt32
* state
,
459 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
460 out
[3] = (in
& 0xff) | 0x80;
464 /*=================================================================================
466 =================================================================================*/
467 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA4( PRInt32
* state
,
481 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
482 out
[3] = (in
& 0xff) | 0x80;
486 /*=================================================================================
488 =================================================================================*/
489 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA5( PRInt32
* state
,
503 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
504 out
[3] = (in
& 0xff) | 0x80;
508 /*=================================================================================
510 =================================================================================*/
511 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA6( PRInt32
* state
,
525 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
526 out
[3] = (in
& 0xff) | 0x80;
530 /*=================================================================================
532 =================================================================================*/
533 PRIVATE PRBool
uCheckAndGen2ByteGRPrefix8EA7( PRInt32
* state
,
547 out
[2] = ((in
>> 8 ) & 0xff) | 0x80;
548 out
[3] = (in
& 0xff) | 0x80;
552 /*=================================================================================
554 =================================================================================*/
559 #define NCount (VCount * TCount)
560 /*=================================================================================
562 =================================================================================*/
563 PRIVATE PRBool
uCnGAlways8BytesDecomposedHangul(
571 static const PRUint8 lMap
[LCount
] = {
572 0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2, 0xb3, 0xb5,
573 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
576 static const PRUint8 tMap
[TCount
] = {
577 0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa9, 0xaa,
578 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb4, 0xb5,
579 0xb6, 0xb7, 0xb8, 0xba, 0xbb, 0xbc, 0xbd, 0xbe
582 PRUint16 SIndex
, LIndex
, VIndex
, TIndex
;
587 /* the following line are copy from Unicode 2.0 page 3-13 */
588 /* item 1 of Hangul Syllabel Decomposition */
591 /* the following lines are copy from Unicode 2.0 page 3-14 */
592 /* item 2 of Hangul Syllabel Decomposition w/ modification */
593 LIndex
= SIndex
/ NCount
;
594 VIndex
= (SIndex
% NCount
) / TCount
;
595 TIndex
= SIndex
% TCount
;
598 * A Hangul syllable not enumerated in KS X 1001 is represented
599 * by a sequence of 8 bytes beginning with Hangul-filler
600 * (0xA4D4 in EUC-KR and 0x2454 in ISO-2022-KR) followed by three
601 * Jamos (2 bytes each the first of which is 0xA4 in EUC-KR) making
602 * up the syllable. ref. KS X 1001:1998 Annex 3
605 out
[0] = out
[2] = out
[4] = out
[6] = 0xa4;
607 out
[3] = lMap
[LIndex
] ;
608 out
[5] = (VIndex
+ 0xbf);
609 out
[7] = tMap
[TIndex
];
614 PRIVATE PRBool
uCheckAndGenJohabHangul(
627 See Table 4-45 (page 183) of CJKV Information Processing
628 for detail explanation of the following table.
631 static const PRUint8 lMap[LCount] = {
632 2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
634 Therefore lMap[i] == i+2;
637 static const PRUint8 vMap
[VCount
] = {
639 3,4,5,6,7, /* no 8,9 */
640 10,11,12,13,14,15, /* no 16,17 */
641 18,19,20,21,22,23, /* no 24,25 */
644 static const PRUint8 tMap
[TCount
] = {
645 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17, /* no 18 */
646 19,20,21,22,23,24,25,26,27,28,29
648 PRUint16 SIndex
, LIndex
, VIndex
, TIndex
, ch
;
649 /* the following line are copy from Unicode 2.0 page 3-13 */
650 /* item 1 of Hangul Syllabel Decomposition */
653 /* the following lines are copy from Unicode 2.0 page 3-14 */
654 /* item 2 of Hangul Syllabel Decomposition w/ modification */
655 LIndex
= SIndex
/ NCount
;
656 VIndex
= (SIndex
% NCount
) / TCount
;
657 TIndex
= SIndex
% TCount
;
665 out
[1] = ch
& 0x00FF;
667 printf("Johab Hangul %x %x in=%x L=%d V=%d T=%d\n", out
[0], out
[1], in
, LIndex
, VIndex
, TIndex
);
672 PRIVATE PRBool
uCheckAndGenJohabSymbol(
684 /* The following code are based on the Perl code listed under
685 * "ISO-2022-KR or EUC-KR to Johab Conversion" (page 1013)
686 * in the book "CJKV Information Processing" by
687 * Ken Lunde <lunde@adobe.com>
689 * sub convert2johab($) { # Convert ISO-2022-KR or EUC-KR to Johab
690 * my @euc = unpack("C*", $_[0]);
691 * my ($fe_off, $hi_off, $lo_off) = (0,0,1);
693 * while(($hi, $lo) = splice(@euc, 0, 2)) {
694 * $hi &= 127; $lo &= 127;
695 * $fe_off = 21 if $hi == 73;
696 * $fe_off = 34 if $hi == 126;
697 * ($hi_off, $lo_off) = ($lo_off, $hi_off) if ($hi <74 or $hi >125);
698 * push(@out, ((($hi+$hi_off) >> 1)+ ($hi <74 ? 200:187)- $fe_off),
699 * $lo + ((($hi+$lo_off) & 1) ? ($lo > 110 ? 34:16):128));
701 * return pack("C*", @out);
704 unsigned char fe_off
= 0;
705 unsigned char hi_off
= 0;
706 unsigned char lo_off
= 1;
707 unsigned char hi
= (in
>> 8) & 0x7F;
708 unsigned char lo
= in
& 0x7F;
713 if( (hi
< 74) || ( hi
> 125) )
719 out
[0] = ((hi
+hi_off
) >> 1) + ((hi
<74) ? 200 : 187 ) - fe_off
;
720 out
[1] = lo
+ (((hi
+lo_off
) & 1) ? ((lo
> 110) ? 34 : 16) :
723 printf("Johab Symbol %x %x in=%x\n", out
[0], out
[1], in
);
728 PRIVATE PRBool
uCheckAndGen4BytesGB18030(
738 out
[0] = (in
/ (10*126*10)) + 0x81;
740 out
[1] = (in
/ (10*126)) + 0x30;
742 out
[2] = (in
/ (10)) + 0x81;
743 out
[3] = (in
% 10) + 0x30;