here, have a Thumb back-end
[mozilla-central.git] / intl / uconv / ucvcn / nsGBKConvUtil.cpp
blob6b64fe55841c1cc6bd7b978a6d7cee2392d903cf
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is Mozilla Communicator client code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1998
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "nsGBKConvUtil.h"
39 #include "gbku.h"
40 #include "nsCRT.h"
41 #include "nsICharRepresentable.h"
42 #define MAX_GBK_LENGTH 24066 /* (0xfe-0x80)*(0xfe-0x3f) */
43 //--------------------------------------------------------------------
44 // nsGBKConvUtil
45 //--------------------------------------------------------------------
47 static PRBool gInitToGBKTable = PR_FALSE;
48 static const PRUnichar gGBKToUnicodeTable[MAX_GBK_LENGTH] = {
49 #include "cp936map.h"
51 static PRUint16 gUnicodeToGBKTable[0xA000-0x4e00];
53 PRBool nsGBKConvUtil::UnicodeToGBKChar(
54 PRUnichar aChar, PRBool aToGL, char*
55 aOutByte1, char* aOutByte2)
57 NS_ASSERTION(gInitToGBKTable, "gGBKToUnicodeTable is not init yet. need to call InitToGBKTable first");
58 PRBool found=PR_FALSE;
59 *aOutByte1 = *aOutByte2 = 0;
60 if(UNICHAR_IN_RANGE(0xd800, aChar, 0xdfff))
62 // surrogate is not in here
63 return PR_FALSE;
65 if(UNICHAR_IN_RANGE(0x4e00, aChar, 0x9FFF))
67 PRUint16 item = gUnicodeToGBKTable[aChar - 0x4e00];
68 if(item != 0)
70 *aOutByte1 = item >> 8;
71 *aOutByte2 = item & 0x00FF;
72 found = PR_TRUE;
73 } else {
74 return PR_FALSE;
76 } else {
77 // ugly linear search
78 for( PRInt32 i = 0; i < MAX_GBK_LENGTH; i++ )
80 if( aChar == gGBKToUnicodeTable[i])
82 *aOutByte1 = (i / 0x00BF + 0x0081) ;
83 *aOutByte2 = (i % 0x00BF + 0x0040) ;
84 found = PR_TRUE;
85 break;
89 if(! found)
90 return PR_FALSE;
92 if(aToGL) {
93 // to GL, we only return if it is in the range
94 if(UINT8_IN_RANGE(0xA1, *aOutByte1, 0xFE) &&
95 UINT8_IN_RANGE(0xA1, *aOutByte2, 0xFE))
97 // mask them to GL
98 *aOutByte1 &= 0x7F;
99 *aOutByte2 &= 0x7F;
100 } else {
101 // if it does not fit into 0xa1-0xfe 0xa1-0xfe range that mean
102 // it is not a GB2312 character, we cannot map to GL
103 *aOutByte1 = 0x00;
104 *aOutByte2 = 0x00;
105 return PR_FALSE;
108 return PR_TRUE;
110 PRUnichar nsGBKConvUtil::GBKCharToUnicode(char aByte1, char aByte2)
112 NS_ASSERTION(UINT8_IN_RANGE(0x81,aByte1, 0xFE), "first byte out of range");
113 NS_ASSERTION(UINT8_IN_RANGE(0x40,aByte2, 0xFE), "second byte out of range");
115 PRUint8 i1 = (PRUint8)aByte1;
116 PRUint8 i2 = (PRUint8)aByte2;
117 PRUint16 idx = (i1 - 0x0081) * 0x00bf + i2 - 0x0040 ;
119 NS_ASSERTION(idx < MAX_GBK_LENGTH, "ARB");
120 // play it safe- add if statement here ot protect ARB
121 // probably not necessary
122 if(idx < MAX_GBK_LENGTH)
123 return gGBKToUnicodeTable[ idx ];
124 else
125 return UCS2_NO_MAPPING;
127 void nsGBKConvUtil::InitToGBKTable()
129 if ( gInitToGBKTable )
130 return;
132 PRUnichar unicode;
133 PRUnichar i;
134 // zap it to zero first
135 memset(gUnicodeToGBKTable,0, sizeof(gUnicodeToGBKTable));
137 for ( i=0; i<MAX_GBK_LENGTH; i++ )
139 unicode = gGBKToUnicodeTable[i];
140 // to reduce size of gUnicodeToGBKTable, we only do direct unicode to GB
141 // table mapping between unicode 0x4E00 and 0xA000. Others by searching
142 // gGBKToUnicodeTable. There is a trade off between memory usage and speed.
143 if(UNICHAR_IN_RANGE(0x4e00, unicode, 0x9fff))
145 unicode -= 0x4E00;
146 gUnicodeToGBKTable[unicode] = (( i / 0x00BF + 0x0081) << 8) |
147 ( i % 0x00BF+ 0x0040);
150 gInitToGBKTable = PR_TRUE;
152 void nsGBKConvUtil::FillInfo(
153 PRUint32 *aInfo,
154 PRUint8 aStart1, PRUint8 aEnd1,
155 PRUint8 aStart2, PRUint8 aEnd2
158 PRUint16 i,j, k;
159 PRUnichar unicode;
161 for ( i=aStart1; i<=aEnd1; i++)
163 for( j=aStart2; j<=aEnd2; j++)
165 k = (i - 0x0081)*0x00BF +(j-0x0040);
166 unicode = gGBKToUnicodeTable[k];
167 NS_ASSERTION(unicode != 0xFFFF, "somehow the table still use 0xffff");
168 if (unicode != UCS2_NO_MAPPING)
170 SET_REPRESENTABLE(aInfo, unicode);
175 void nsGBKConvUtil::FillGB2312Info(
176 PRUint32 *aInfo
179 // The following range is coded by looking at the GB2312 standard
180 // and make sure we do not call FillInfo for undefined code point
181 // Symbol
182 // row 1 - 1 range (full)
183 FillInfo(aInfo, 0x21|0x80, 0x21|0x80, 0x21|0x80, 0x7E|0x80);
184 // row 2 - 3 range
185 FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+17)|0x80, (0x20+66)|0x80);
186 FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+69)|0x80, (0x20+78)|0x80);
187 FillInfo(aInfo, 0x22|0x80, 0x22|0x80, (0x20+81)|0x80, (0x20+92)|0x80);
188 // row 3 - 1 range (full)
189 FillInfo(aInfo, 0x23|0x80, 0x23|0x80, 0x21|0x80, 0x7E|0x80);
190 // row 4 - 1 range
191 FillInfo(aInfo, 0x24|0x80, 0x24|0x80, (0x20+ 1)|0x80, (0x20+83)|0x80);
192 // row 5 - 1 range
193 FillInfo(aInfo, 0x25|0x80, 0x25|0x80, (0x20+ 1)|0x80, (0x20+86)|0x80);
194 // row 6 - 2 range
195 FillInfo(aInfo, 0x26|0x80, 0x26|0x80, (0x20+ 1)|0x80, (0x20+24)|0x80);
196 FillInfo(aInfo, 0x26|0x80, 0x26|0x80, (0x20+33)|0x80, (0x20+56)|0x80);
197 // row 7
198 FillInfo(aInfo, 0x27|0x80, 0x27|0x80, (0x20+ 1)|0x80, (0x20+33)|0x80);
199 FillInfo(aInfo, 0x27|0x80, 0x27|0x80, (0x20+49)|0x80, (0x20+81)|0x80);
200 // row 8
201 FillInfo(aInfo, 0x28|0x80, 0x28|0x80, (0x20+ 1)|0x80, (0x20+26)|0x80);
202 FillInfo(aInfo, 0x28|0x80, 0x28|0x80, (0x20+36)|0x80, (0x20+73)|0x80);
203 // row 9
204 FillInfo(aInfo, 0x29|0x80, 0x29|0x80, (0x20+ 4)|0x80, (0x20+79)|0x80);
206 // Frequent used Hanzi
207 // 3021-567e
208 FillInfo(aInfo, 0x30|0x80, 0x56|0x80, 0x21|0x80, 0x7E|0x80);
209 // 5721-5779
210 FillInfo(aInfo, 0x57|0x80, 0x57|0x80, 0x21|0x80, 0x79|0x80);
212 // Infrequent used Hanzi
213 // 5821-777e
214 FillInfo(aInfo, 0x58|0x80, 0x77|0x80, 0x21|0x80, 0x7E|0x80);