3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # and http://unicode.org/Public/UNIDATA/EastAsianWidth.txt
12 # to generate case mapping and display width tables
19 set USAGE
"Usage: parse-unidata.tcl \[-width\] UnicodeData.txt \[EastAsianWidth.txt\]"
22 if {[lindex $argv 0] eq
"-width"} {
24 set argv
[lrange $argv 1 end
]
27 if {[llength $argv] ni
{1 2}} {
32 lassign
$argv unicodefile widthfile
34 set f
[open $unicodefile]
35 while {[gets $f buf
] >= 0} {
39 lassign
[split $buf ";"] code name class x x x x x x x x x upper
lower title
40 set codex
[string tolower
0x
$code]
41 if {[string match M
* $class]} {
42 if {![info exists combining
]} {
46 } elseif
{[info exists combining
]} {
47 lappend map
(combining
) $combining $codex
53 if {$codex > 0xffff} {
56 if {![string match L
* $class]} {
60 lappend map
(upper
) $codex [string tolower
0x
$upper]
63 lappend map
(lower) $codex [string tolower
0x
$lower]
65 if {$title ne
"" && $title ne
$upper} {
66 if {$title eq
$code} {
69 lappend map
(title
) $codex [string tolower
0x
$title]
74 proc output-int-pairs
{list} {
76 foreach {v1 v2
} $list {
77 puts -nonewline "\t{ $v1, $v2 },"
78 if {[incr n
] % 4 == 0} {
88 foreach type
{upper
lower title
} {
89 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
90 output-int-pairs
$map($type)
95 set f
[open $widthfile]
96 while {[gets $f buf
] >= 0} {
97 if {[regexp {^
([0-9A-F.
]+);W
} $buf -> range
]} {
98 lassign
[split $range .
] lower - upper
104 if {[info exists endrange
]} {
105 if {$upper == $endrange + 1} {
106 # Just extend the range
110 lappend map
(wide
) $startrange $endrange
112 set startrange
$lower
119 foreach type
{combining wide
} {
120 puts "static const struct utf8range unicode_range_$type\[\] = \{"
122 output-int-pairs
$map($type)
124 # Just produce empty width tables in this case