2010-04-15 Jb Evain <jbevain@novell.com>
[mcs.git] / class / I18N / CJK / gb2312-build.sh
blob3fc0607b3015902b2d9dbbed9f971ab2527ac4a8
1 #!/bin/sh
4 # NOTE: Mono doesn't use it anymore: GB2312.TXT vanished from unicode.org.
7 # Usage: gb2312-build.sh GB2312.TXT > gb2312.table
9 # Get the input file from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/GB/GB2312.TXT
11 # This algorithm was taken from the glibc iconv documentation in
12 # iconvdata/gb2312.c
15 # GB2312 to Unicode
17 egrep '^0x' $1 | perl -e \
19 @vals;
20 while (<>) {
21 local($gb, $uni, %rest) = split;
22 local($u)=hex($uni);
23 local($g)=hex($gb);
24 #printf("Setting vals[%d] to 0x%04x\n", int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff), $u);
25 @vals[int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff)]=$u;
27 $size=($#vals+1)*2;
28 printf("\001\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
29 for ($i=0; $i < $#vals+1; $i++) {
30 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
34 # Gb2312 from Unicode, table 1
36 perl -e \
38 @vals;
39 @vals[0x00]=0x21 + (0x68 << 8);
40 @vals[0x03]=0x21 + (0x6c << 8);
41 @vals[0x04]=0x21 + (0x27 << 8);
42 @vals[0x0c]=0x21 + (0x63 << 8);
43 @vals[0x0d]=0x21 + (0x40 << 8);
44 @vals[0x33]=0x21 + (0x41 << 8);
45 @vals[0x3c]=0x28 + (0x24 << 8);
46 @vals[0x3d]=0x28 + (0x22 << 8);
47 @vals[0x44]=0x28 + (0x28 << 8);
48 @vals[0x45]=0x28 + (0x26 << 8);
49 @vals[0x46]=0x28 + (0x3a << 8);
50 @vals[0x48]=0x28 + (0x2c << 8);
51 @vals[0x49]=0x28 + (0x2a << 8);
52 @vals[0x4e]=0x28 + (0x30 << 8);
53 @vals[0x4f]=0x28 + (0x2e << 8);
54 @vals[0x53]=0x21 + (0x42 << 8);
55 @vals[0x55]=0x28 + (0x34 << 8);
56 @vals[0x56]=0x28 + (0x32 << 8);
57 @vals[0x58]=0x28 + (0x39 << 8);
58 @vals[0x5d]=0x28 + (0x21 << 8);
60 $size=($#vals+1)*2;
61 printf("\002\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
62 for ($i=0; $i < $#vals+1; $i++) {
63 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
67 # Gb2312 from Unicode, table 2
69 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x03' | perl -e \
71 @vals;
72 while(<>) {
73 local($uni, $gb, %rest) = split;
74 local($u)=hex($uni);
75 local($g)=hex($gb);
76 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x391, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
77 @vals[$u - 0x391]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
79 $size=($#vals+1)*2;
80 printf("\003\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
81 for ($i=0; $i < $#vals+1; $i++) {
82 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
86 # Gb2312 from Unicode, table 3
88 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x04' | perl -e \
90 @vals;
91 while(<>) {
92 local($uni, $gb, %rest) = split;
93 local($u)=hex($uni);
94 local($g)=hex($gb);
95 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x401, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
96 @vals[$u - 0x401]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
98 $size=($#vals+1)*2;
99 printf("\004\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
100 for ($i=0; $i < $#vals+1; $i++) {
101 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
105 # Gb2312 from Unicode, table 4
107 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x20' | perl -e \
109 @vals;
110 while(<>) {
111 local($uni, $gb, %rest) = split;
112 local($u)=hex($uni);
113 local($g)=hex($gb);
114 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2015, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
115 @vals[$u - 0x2015]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
117 $size=($#vals+1)*2;
118 printf("\005\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
119 for ($i=0; $i < $#vals+1; $i++) {
120 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
124 # Gb2312 from Unicode, table 5
126 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x2[12]' | perl -e \
128 @vals;
129 while(<>) {
130 local($uni, $gb, %rest) = split;
131 local($u)=hex($uni);
132 local($g)=hex($gb);
133 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2103, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
134 @vals[$u - 0x2103]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
136 $size=($#vals+1)*2;
137 printf("\006\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
138 for ($i=0; $i < $#vals+1; $i++) {
139 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
144 # Gb2312 from Unicode, table 6
146 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x24' | perl -e \
148 @vals;
149 while(<>) {
150 local($uni, $gb, %rest) = split;
151 local($u)=hex($uni);
152 local($g)=hex($gb);
153 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2460, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
154 @vals[$u - 0x2460]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
156 $size=($#vals+1)*2;
157 printf("\007\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
158 for ($i=0; $i < $#vals+1; $i++) {
159 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
164 # Gb2312 from Unicode, table 7
166 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x3[01]' | perl -e \
168 @vals;
169 while(<>) {
170 local($uni, $gb, %rest) = split;
171 local($u)=hex($uni);
172 local($g)=hex($gb);
173 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x3000, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
174 @vals[$u - 0x3000]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
176 $size=($#vals+1)*2;
177 printf("\010\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
178 for ($i=0; $i < $#vals+1; $i++) {
179 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
183 # Gb2312 from Unicode, table 8
185 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x[4-9]' | perl -e \
187 @vals;
188 while(<>) {
189 local($uni, $gb, %rest) = split;
190 local($u)=hex($uni);
191 local($g)=hex($gb);
192 printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x4e00, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
193 @vals[$u - 0x4e00]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
195 $size=($#vals+1)*2;
196 printf("\011\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
197 for ($i=0; $i < $#vals+1; $i++) {
198 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
202 # Gb2312 from Unicode, table 9
204 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0xFF[0-5]' | perl -e \
206 @vals;
207 while(<>) {
208 local($uni, $gb, %rest) = split;
209 local($u)=hex($uni);
210 local($g)=hex($gb);
211 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0xff01, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
212 @vals[$u - 0xff01]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
214 $size=($#vals+1)*2;
215 printf("\012\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
216 for ($i=0; $i < $#vals+1; $i++) {
217 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);