Bug 765: use load_uri to load the old tab's document to avoid crash
[elinks/miciah.git] / Unicode / gen-cp
blob83f8090d6d27739c5407fbceca14a228b89d49bc
1 #!/bin/sh
3 echo
4 echo Generating code page translation tables.
6 codepages=`cat index.txt`
8 if [ -n "$codepages" ]; then
12 n=0
14 echo '/* Automatically generated by gen-cp */'
15 echo '/* DO NOT EDIT THIS FILE! EDIT Unicode/<whatever> INSTEAD! */'
16 echo
18 for i in $codepages; do
19 echo -n $i' ' 1>&2
20 echo
22 echo
23 echo "/*** $i ***/"
24 echo
26 sed ' # Delete the name and aliases lines.
27 1,2d
28 # Delete comment-only and blank lines.
29 /^[ ]*\(#.*\)\{,1\}$/d
30 # Copy to the hold space.
32 # Delete everything except the comment.
33 s/^[^#]*//
34 # If there is a comment, change it to use /* */ delimiters.
35 s!#[ ]*\(.*\)!/* \1 */!
36 # Exchange spaces; now hold space = comment and pattern space = all.
38 # Delete the comment.
39 s/#.*//
40 # Canonicalize case so the strings can be used as lookup keys.
41 y/Xabcdef/xABCDEF/
42 # Delete mappings of bytes 0x00...0x7F. ELinks assumes those match ASCII.
43 /^0x[01234567]/d
44 # Delete lines that do not map the byte to exactly one character.
45 /^[ ]*0x[0123456789ABCDEF]\{2\}[ ]\{1,\}0x[0123456789ABCDEF]\{1,\}[ ]*$/!d
46 # Append a newline and the comment from the hold space.
48 # Delete the newline added by the previous command.
49 s/\n//' "$i.cp" | {
50 for left in 8 9 A B C D E F; do
51 for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do
52 eval "high0x$left$right="
53 done
54 done
55 table=
56 highuse=
57 while read byte unicode comment; do
58 if eval "[ \"\$high$byte\" ]"; then
59 table="$table {$byte, $unicode},${comment+ }$comment
61 else
62 eval "high$byte=\"\$unicode,\${comment+ }\$comment\""
63 highuse=1
65 done
66 if [ "$highuse" ]; then
67 printf "const uint16_t highhalf_%s [] = {\n" "$i"
68 for left in 8 9 A B C D E F; do
69 for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do
70 eval "printf \"\\t/* %s */ %s\\n\" \"0x$left$right\" \"\${high0x$left$right:-0xFFFF,}\""
71 done
72 done
73 printf "};\n\n"
74 else
75 printf "#define highhalf_%s highhalf_NULL\n\n" "$i"
77 if [ "$table" ]; then
78 printf "const struct table_entry table_%s [] = {\n%s\t{0, 0}\n};\n" "$i" "$table"
79 else
80 printf "#define table_%s table_NULL\n" "$i"
82 printf "\n"
85 echo 'unsigned char *const aliases_'$i' [] = {'
86 head -n 2 $i.cp | tail -n +2 | sed 's/ \+/ /g; s/ $//; s/\", /\",£/g; s/$/,/' | tr "£" "\n" \
87 | sed 's/^/£/g' | tr "£" "\t"
88 echo ' NULL
89 };'
90 n=`expr $n + 1`
91 done
93 printf "\n/*** NULL ***/\n\n"
94 printf "const uint16_t highhalf_NULL [] = {\n"
95 for r in `seq 16`; do
96 printf "\t0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF,\n"
97 done
98 printf "};\n\n"
99 printf "const struct table_entry table_NULL [] = {\n"
100 printf "\t{0, 0}\n"
101 printf "};\n"
103 echo
104 echo 'const struct codepage_desc codepages [] = {'
106 for i in $codepages; do
107 echo ' {"'`head -n 1 $i.cp`'", aliases_'$i', highhalf_'$i', table_'$i'},'
108 done
110 echo ' {NULL, NULL, NULL}'
111 echo '};'
113 echo
114 echo '#define N_CODEPAGES '$n | sed 's/ //g'
116 ) | sed 's/ //g' > ../src/intl/codepage.inc
118 echo
119 echo Done.
123 echo