(DISTFILES): Comment out a few missing files.
[mono-project.git] / mcs / class / I18N / CJK / gb2312-build.sh
blob29111a448621c57a30b3321f375e1bc27c573b56
1 #!/bin/sh
3 # Usage: gb2312-build.sh GB2312.TXT > gb2312.table
5 # Get the input file from ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/GB/GB2312.TXT
7 # This algorithm was taken from the glibc iconv documentation in
8 # iconvdata/gb2312.c
11 # GB2312 to Unicode
13 egrep '^0x' $1 | perl -e \
15 @vals;
16 while (<>) {
17 local($gb, $uni, %rest) = split;
18 local($u)=hex($uni);
19 local($g)=hex($gb);
20 #printf("Setting vals[%d] to 0x%04x\n", int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff), $u);
21 @vals[int(($g - 0x2121) / 256) * 94 + (($g - 0x2121) & 0xff)]=$u;
23 $size=($#vals+1)*2;
24 printf("\001\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
25 for ($i=0; $i < $#vals+1; $i++) {
26 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
30 # Gb2312 from Unicode, table 1
32 perl -e \
34 @vals;
35 @vals[0x00]=0x21 + (0x68 << 8);
36 @vals[0x03]=0x21 + (0x6c << 8);
37 @vals[0x04]=0x21 + (0x27 << 8);
38 @vals[0x0c]=0x21 + (0x63 << 8);
39 @vals[0x0d]=0x21 + (0x40 << 8);
40 @vals[0x33]=0x21 + (0x41 << 8);
41 @vals[0x3c]=0x28 + (0x24 << 8);
42 @vals[0x3d]=0x28 + (0x22 << 8);
43 @vals[0x44]=0x28 + (0x28 << 8);
44 @vals[0x45]=0x28 + (0x26 << 8);
45 @vals[0x46]=0x28 + (0x3a << 8);
46 @vals[0x48]=0x28 + (0x2c << 8);
47 @vals[0x49]=0x28 + (0x2a << 8);
48 @vals[0x4e]=0x28 + (0x30 << 8);
49 @vals[0x4f]=0x28 + (0x2e << 8);
50 @vals[0x53]=0x21 + (0x42 << 8);
51 @vals[0x55]=0x28 + (0x34 << 8);
52 @vals[0x56]=0x28 + (0x32 << 8);
53 @vals[0x58]=0x28 + (0x39 << 8);
54 @vals[0x5d]=0x28 + (0x21 << 8);
56 $size=($#vals+1)*2;
57 printf("\002\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
58 for ($i=0; $i < $#vals+1; $i++) {
59 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
63 # Gb2312 from Unicode, table 2
65 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x03' | perl -e \
67 @vals;
68 while(<>) {
69 local($uni, $gb, %rest) = split;
70 local($u)=hex($uni);
71 local($g)=hex($gb);
72 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x391, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
73 @vals[$u - 0x391]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
75 $size=($#vals+1)*2;
76 printf("\003\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
77 for ($i=0; $i < $#vals+1; $i++) {
78 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
82 # Gb2312 from Unicode, table 3
84 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x04' | perl -e \
86 @vals;
87 while(<>) {
88 local($uni, $gb, %rest) = split;
89 local($u)=hex($uni);
90 local($g)=hex($gb);
91 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x401, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
92 @vals[$u - 0x401]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
94 $size=($#vals+1)*2;
95 printf("\004\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
96 for ($i=0; $i < $#vals+1; $i++) {
97 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
101 # Gb2312 from Unicode, table 4
103 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x20' | perl -e \
105 @vals;
106 while(<>) {
107 local($uni, $gb, %rest) = split;
108 local($u)=hex($uni);
109 local($g)=hex($gb);
110 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2015, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
111 @vals[$u - 0x2015]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
113 $size=($#vals+1)*2;
114 printf("\005\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
115 for ($i=0; $i < $#vals+1; $i++) {
116 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
120 # Gb2312 from Unicode, table 5
122 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x2[12]' | perl -e \
124 @vals;
125 while(<>) {
126 local($uni, $gb, %rest) = split;
127 local($u)=hex($uni);
128 local($g)=hex($gb);
129 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2103, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
130 @vals[$u - 0x2103]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
132 $size=($#vals+1)*2;
133 printf("\006\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
134 for ($i=0; $i < $#vals+1; $i++) {
135 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
140 # Gb2312 from Unicode, table 6
142 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x24' | perl -e \
144 @vals;
145 while(<>) {
146 local($uni, $gb, %rest) = split;
147 local($u)=hex($uni);
148 local($g)=hex($gb);
149 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x2460, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
150 @vals[$u - 0x2460]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
152 $size=($#vals+1)*2;
153 printf("\007\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
154 for ($i=0; $i < $#vals+1; $i++) {
155 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
160 # Gb2312 from Unicode, table 7
162 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x3[01]' | perl -e \
164 @vals;
165 while(<>) {
166 local($uni, $gb, %rest) = split;
167 local($u)=hex($uni);
168 local($g)=hex($gb);
169 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x3000, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
170 @vals[$u - 0x3000]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
172 $size=($#vals+1)*2;
173 printf("\010\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
174 for ($i=0; $i < $#vals+1; $i++) {
175 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
179 # Gb2312 from Unicode, table 8
181 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0x[4-9]' | perl -e \
183 @vals;
184 while(<>) {
185 local($uni, $gb, %rest) = split;
186 local($u)=hex($uni);
187 local($g)=hex($gb);
188 printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0x4e00, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
189 @vals[$u - 0x4e00]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
191 $size=($#vals+1)*2;
192 printf("\011\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
193 for ($i=0; $i < $#vals+1; $i++) {
194 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);
198 # Gb2312 from Unicode, table 9
200 egrep '^0x' $1 | awk '{ print $2, $1 }' | sort | egrep '^0xFF[0-5]' | perl -e \
202 @vals;
203 while(<>) {
204 local($uni, $gb, %rest) = split;
205 local($u)=hex($uni);
206 local($g)=hex($gb);
207 #printf STDERR ("Setting vals[0x%04x] to 0x%04x\n", $u - 0xff01, ($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8));
208 @vals[$u - 0xff01]=($g < 0x100 ? $g : int($g/256)) + (($g < 0x100 ? 0 : $g&255) << 8);
210 $size=($#vals+1)*2;
211 printf("\012\000\000\000%c%c%c%c", $size & 0xFF, ($size >> 8) & 0xFF, ($size >> 16) & 0xFF, ($size >> 24) & 0xFF);
212 for ($i=0; $i < $#vals+1; $i++) {
213 printf("%c%c", $vals[$i] & 0xFF, ($vals[$i] >> 8) & 0xFF);