Refresh charsets from www.unicode.org.
[elinks/kon.git] / Unicode / gen-cp
blob1cc5a0d4377e6881ec3c1c9029bbc8b6e2a26f09
1 #!/bin/sh
3 echo
4 echo Generating code page translation tables.
6 codepages=`cat index.txt`
8 if [ -n "$codepages" ]; then
12 n=0
14 echo '/* Automatically generated by gen-cp */'
15 echo '/* DO NOT EDIT THIS FILE! EDIT Unicode/<whatever> INSTEAD! */'
16 echo '/* See the input files for copyrights and licences. */'
17 echo
19 for i in $codepages; do
20 echo -n $i' ' 1>&2
21 echo
23 echo
24 echo "/*** $i ***/"
25 echo
27 sed ' # Delete the name and aliases lines.
28 1,2d
29 # Delete comment-only and blank lines.
30 /^[ ]*\(#.*\)\{,1\}$/d
31 # Copy to the hold space.
33 # Delete everything except the comment.
34 s/^[^#]*//
35 # If there is a comment, change it to use /* */ delimiters.
36 s!#[ ]*\(.*\)!/* \1 */!
37 # Exchange spaces; now hold space = comment and pattern space = all.
39 # Delete the comment.
40 s/#.*//
41 # Canonicalize case so the strings can be used as lookup keys.
42 y/Xabcdef/xABCDEF/
43 # Delete mappings of bytes 0x00...0x7F. ELinks assumes those match ASCII.
44 /^0x[01234567]/d
45 # Delete lines that do not map the byte to exactly one character.
46 /^[ ]*0x[0123456789ABCDEF]\{2\}[ ]\{1,\}0x[0123456789ABCDEF]\{1,\}[ ]*$/!d
47 # Append a newline and the comment from the hold space.
49 # Delete the newline added by the previous command.
50 s/\n//' "$i.cp" | {
51 for left in 8 9 A B C D E F; do
52 for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do
53 eval "high0x$left$right="
54 done
55 done
56 table=
57 highuse=
58 while read byte unicode comment; do
59 if eval "[ \"\$high$byte\" ]"; then
60 table="$table {$byte, $unicode},${comment+ }$comment
62 else
63 eval "high$byte=\"\$unicode,\${comment+ }\$comment\""
64 highuse=1
66 done
67 if [ "$highuse" ]; then
68 printf "const uint16_t highhalf_%s [] = {\n" "$i"
69 for left in 8 9 A B C D E F; do
70 for right in 0 1 2 3 4 5 6 7 8 9 A B C D E F; do
71 eval "printf \"\\t/* %s */ %s\\n\" \"0x$left$right\" \"\${high0x$left$right:-0xFFFF,}\""
72 done
73 done
74 printf "};\n\n"
75 else
76 printf "#define highhalf_%s highhalf_NULL\n\n" "$i"
78 if [ "$table" ]; then
79 printf "const struct table_entry table_%s [] = {\n%s\t{0, 0}\n};\n" "$i" "$table"
80 else
81 printf "#define table_%s table_NULL\n" "$i"
83 printf "\n"
86 echo 'unsigned char *const aliases_'$i' [] = {'
87 head -n 2 $i.cp | tail -n +2 | sed 's/ \+/ /g; s/ $//; s/\", /\",£/g; s/$/,/' | tr "£" "\n" \
88 | sed 's/^/£/g' | tr "£" "\t"
89 echo ' NULL
90 };'
91 n=`expr $n + 1`
92 done
94 printf "\n/*** NULL ***/\n\n"
95 printf "const uint16_t highhalf_NULL [] = {\n"
96 for r in `seq 16`; do
97 printf "\t0xFFFF,0xFFFF,0xFFFF,0xFFFF, 0xFFFF,0xFFFF,0xFFFF,0xFFFF,\n"
98 done
99 printf "};\n\n"
100 printf "const struct table_entry table_NULL [] = {\n"
101 printf "\t{0, 0}\n"
102 printf "};\n"
104 echo
105 echo 'const struct codepage_desc codepages [] = {'
107 for i in $codepages; do
108 echo ' {"'`head -n 1 $i.cp`'", aliases_'$i', highhalf_'$i', table_'$i'},'
109 done
111 echo ' {NULL, NULL, NULL}'
112 echo '};'
114 echo
115 echo '#define N_CODEPAGES '$n | sed 's/ //g'
117 ) | sed 's/ //g' > ../src/intl/codepage.inc
119 echo
120 echo Done.
124 echo