4 # NOTE: Mono doesn't use it anymore: GB2312.TXT vanished from unicode.org.
7 # Usage: gb2312-build.sh GB2312.TXT > gb2312.table
9 # Get the input file from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/GB/GB2312.TXT
11 # This algorithm was taken from the glibc iconv documentation in
17 egrep '^0x' $1 | perl
-e \
21 local($gb, $uni, %rest) = split;
24 #printf("Setting vals[%d] to 0x%04x\n", int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff), $u);
25 @vals[int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff)]=$u;
28 printf("\001\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
29 for ($i=0; $i < $#vals+1; $i++) {
30 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
34 # Gb2312 from Unicode, table 1
39 @vals[0x00]=0x21 + (0x68 << 8);
40 @vals[0x03]=0x21 + (0x6c << 8);
41 @vals[0x04]=0x21 + (0x27 << 8);
42 @vals[0x0c]=0x21 + (0x63 << 8);
43 @vals[0x0d]=0x21 + (0x40 << 8);
44 @vals[0x33]=0x21 + (0x41 << 8);
45 @vals[0x3c]=0x28 + (0x24 << 8);
46 @vals[0x3d]=0x28 + (0x22 << 8);
47 @vals[0x44]=0x28 + (0x28 << 8);
48 @vals[0x45]=0x28 + (0x26 << 8);
49 @vals[0x46]=0x28 + (0x3a << 8);
50 @vals[0x48]=0x28 + (0x2c << 8);
51 @vals[0x49]=0x28 + (0x2a << 8);
52 @vals[0x4e]=0x28 + (0x30 << 8);
53 @vals[0x4f]=0x28 + (0x2e << 8);
54 @vals[0x53]=0x21 + (0x42 << 8);
55 @vals[0x55]=0x28 + (0x34 << 8);
56 @vals[0x56]=0x28 + (0x32 << 8);
57 @vals[0x58]=0x28 + (0x39 << 8);
58 @vals[0x5d]=0x28 + (0x21 << 8);
61 printf("\002\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
62 for ($i=0; $i < $#vals+1; $i++) {
63 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
67 # Gb2312 from Unicode, table 2
69 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x03' | perl
-e \
73 local($uni, $gb, %rest) = split;
76 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x391, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
77 @vals[$u - 0x391]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
80 printf("\003\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
81 for ($i=0; $i < $#vals+1; $i++) {
82 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
86 # Gb2312 from Unicode, table 3
88 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x04' | perl
-e \
92 local($uni, $gb, %rest) = split;
95 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x401, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
96 @vals[$u - 0x401]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
99 printf("\004\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
100 for ($i=0; $i < $#vals+1; $i++) {
101 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
105 # Gb2312 from Unicode, table 4
107 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x20' | perl
-e \
111 local($uni, $gb, %rest) = split;
114 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2015, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
115 @vals[$u - 0x2015]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
118 printf("\005\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
119 for ($i=0; $i < $#vals+1; $i++) {
120 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
124 # Gb2312 from Unicode, table 5
126 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x2[12]' | perl
-e \
130 local($uni, $gb, %rest) = split;
133 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2103, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
134 @vals[$u - 0x2103]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
137 printf("\006\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
138 for ($i=0; $i < $#vals+1; $i++) {
139 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
144 # Gb2312 from Unicode, table 6
146 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x24' | perl
-e \
150 local($uni, $gb, %rest) = split;
153 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2460, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
154 @vals[$u - 0x2460]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
157 printf("\007\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
158 for ($i=0; $i < $#vals+1; $i++) {
159 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
164 # Gb2312 from Unicode, table 7
166 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x3[01]' | perl
-e \
170 local($uni, $gb, %rest) = split;
173 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x3000, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
174 @vals[$u - 0x3000]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
177 printf("\010\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
178 for ($i=0; $i < $#vals+1; $i++) {
179 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
183 # Gb2312 from Unicode, table 8
185 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0x[4-9]' | perl
-e \
189 local($uni, $gb, %rest) = split;
192 printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x4e00, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
193 @vals[$u - 0x4e00]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
196 printf("\011\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
197 for ($i=0; $i < $#vals+1; $i++) {
198 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
202 # Gb2312 from Unicode, table 9
204 egrep '^0x' $1 |
awk '{ print $2, $1 }' |
sort |
egrep '^0xFF[0-5]' | perl
-e \
208 local($uni, $gb, %rest) = split;
211 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0xff01, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
212 @vals[$u - 0xff01]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
215 printf("\012\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
216 for ($i=0; $i < $#vals+1; $i++) {
217 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);