3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # to generate case mapping and display width tables
17 set USAGE
"Usage: parse-unidata.tcl \[-width\] UnicodeData.txt"
21 if {$arg eq
"-width"} {
24 if {[info exists
filename]} {
31 if {![info exists
filename]} {
36 # Why isn't this available in UnicodeData.txt?
38 0x1100 0x115f 0x2329 0x232a 0x2e80 0x2e99 0x2e9b 0x2ef3
39 0x2f00 0x2fd5 0x2ff0 0x2ffb 0x3000 0x303e 0x3041 0x3096
40 0x3099 0x30ff 0x3105 0x312d 0x3131 0x318e 0x3190 0x31ba
41 0x31c0 0x31e3 0x31f0 0x321e 0x3220 0x3247 0x3250 0x4dbf
42 0x4e00 0xa48c 0xa490 0xa4c6 0xa960 0xa97c 0xac00 0xd7a3
43 0xf900 0xfaff 0xfe10 0xfe19 0xfe30 0xfe52 0xfe54 0xfe66
44 0xfe68 0xfe6b 0xff01 0xffe6 0x1b000 0x1b001 0x1f200 0x1f202
45 0x1f210 0x1f23a 0x1f240 0x1f248 0x1f250 0x1f251 0x20000 0x3fffd
48 set f
[open $filename]
49 while {[gets $f buf
] >= 0} {
53 foreach {code name class x x x x x x x x x upper
lower title
} [split $buf ";"] break
54 set codex
[string tolower
0x
$code]
55 if {[string match M
* $class]} {
56 if {![info exists combining
]} {
60 } elseif
{[info exists combining
]} {
61 lappend map
(combining
) $combining $codex
67 if {$codex > 0xffff} {
70 if {![string match L
* $class]} {
74 lappend map
(upper
) $codex [string tolower
0x
$upper]
77 lappend map
(lower) $codex [string tolower
0x
$lower]
79 if {$title ne
"" && $title ne
$upper} {
80 if {$title eq
$code} {
83 lappend map
(title
) $codex [string tolower
0x
$title]
88 proc output-int-pairs
{list} {
90 foreach {v1 v2
} $list {
91 puts -nonewline "\t{ $v1, $v2 },"
92 if {[incr n
] % 4 == 0} {
101 foreach type
{upper
lower title
} {
102 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
103 output-int-pairs
$map($type)
107 foreach type
{combining wide
} {
108 puts "static const struct utf8range unicode_range_$type\[\] = \{"
110 output-int-pairs
$map($type)
112 # Just produce empty width tables in this case