3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # and http://unicode.org/Public/UNIDATA/EastAsianWidth.txt
12 # to generate case mapping and display width tables
19 set USAGE
"Usage: parse-unidata.tcl \[-width\] UnicodeData.txt \[EastAsianWidth.txt\]"
22 if {[lindex $argv 0] eq
"-width"} {
24 set argv
[lrange $argv 1 end
]
27 if {[llength $argv] ni
{1 2}} {
32 lassign
$argv unicodefile widthfile
34 set f
[open $unicodefile]
35 while {[gets $f buf
] >= 0} {
39 lassign
[split $buf ";"] code name class x x x x x x x x x upper
lower title
40 set codex
[string tolower
0x
$code]
41 if {[string match M
* $class]} {
42 if {![info exists combining
]} {
46 } elseif
{[info exists combining
]} {
47 lappend map
(combining
) $combining $codex
53 if {$codex > 0xffff} {
56 if {![string match L
* $class]} {
60 lappend map
(upper
) $codex [string tolower
0x
$upper]
63 lappend map
(lower) $codex [string tolower
0x
$lower]
65 if {$title ne
"" && $title ne
$upper} {
66 if {$title eq
$code} {
69 lappend map
(title
) $codex [string tolower
0x
$title]
74 proc output-int-pairs
{list} {
76 foreach {v1 v2
} $list {
77 puts -nonewline "\t{ $v1, $v2 },"
78 if {[incr n
] % 4 == 0} {
87 # Merges adjacent ranges in a list of ranges (lower upper lower upper ...)
88 proc combine-adjacent-ranges
{list} {
90 foreach {lower upper
} $list {
91 if {[info exists prev_upper
]} {
92 if {$lower == $prev_upper + 1} {
98 lappend newlist
$prev_lower $prev_upper
101 set prev_lower
$lower
102 set prev_upper
$upper
104 # Now add the last range
105 lappend newlist
$prev_lower $prev_upper
109 foreach type
{upper
lower title
} {
110 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
111 output-int-pairs
$map($type)
116 set f
[open $widthfile]
117 while {[gets $f buf
] >= 0} {
118 if {[regexp {^
([0-9A-Fa-f.
]+);W
} $buf -> range
]} {
119 set range
[string tolower
$range]
120 lassign
[split $range .
] lower - upper
124 lappend map
(wide
) 0x
$lower 0x
$upper
130 foreach type
{combining wide
} {
131 puts "static const struct utf8range unicode_range_$type\[\] = \{"
133 output-int-pairs
[combine-adjacent-ranges
$map($type)]
135 # Just produce empty width tables in this case