tcltest: do a better job of cleanup up after tests
[jimtcl.git] / parse-unidata.tcl
blobe5b0c47bea6db94303d1e53ccec4b3dfb0cc6421
1 #!/usr/bin/env tclsh
3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
8 #/
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # and http://unicode.org/Public/UNIDATA/EastAsianWidth.txt
12 # to generate case mapping and display width tables
13 set map(lower) {}
14 set map(upper) {}
15 set map(title) {}
16 set map(combining) {}
17 set map(wide) {}
19 set USAGE "Usage: parse-unidata.tcl \[-width\] UnicodeData.txt \[EastAsianWidth.txt\]"
20 set do_width 0
22 if {[lindex $argv 0] eq "-width"} {
23 set do_width 1
24 set argv [lrange $argv 1 end]
27 if {[llength $argv] ni {1 2}} {
28 puts stderr $USAGE
29 exit 1
32 lassign $argv unicodefile widthfile
34 set f [open $unicodefile]
35 while {[gets $f buf] >= 0} {
36 set title ""
37 set lower ""
38 set upper ""
39 lassign [split $buf ";"] code name class x x x x x x x x x upper lower title
40 set codex [string tolower 0x$code]
41 if {[string match M* $class]} {
42 if {![info exists combining]} {
43 set combining $codex
45 continue
46 } elseif {[info exists combining]} {
47 lappend map(combining) $combining $codex
48 unset combining
50 if {$codex <= 0x7f} {
51 continue
53 if {$codex > 0xffff} {
54 break
56 if {![string match L* $class]} {
57 continue
59 if {$upper ne ""} {
60 lappend map(upper) $codex [string tolower 0x$upper]
62 if {$lower ne ""} {
63 lappend map(lower) $codex [string tolower 0x$lower]
65 if {$title ne "" && $title ne $upper} {
66 if {$title eq $code} {
67 set title 0
69 lappend map(title) $codex [string tolower 0x$title]
72 close $f
74 proc output-int-pairs {list} {
75 set n 0
76 foreach {v1 v2} $list {
77 puts -nonewline "\t{ $v1, $v2 },"
78 if {[incr n] % 4 == 0} {
79 puts ""
82 if {$n % 4} {
83 puts ""
88 foreach type {upper lower title} {
89 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
90 output-int-pairs $map($type)
91 puts "\};\n"
94 if {$do_width} {
95 set f [open $widthfile]
96 while {[gets $f buf] >= 0} {
97 if {[regexp {^([0-9A-F.]+);W} $buf -> range]} {
98 lassign [split $range .] lower - upper
99 if {$upper eq ""} {
100 set upper $lower
102 set lower 0x$lower
103 set upper 0x$upper
104 if {[info exists endrange]} {
105 if {$upper == $endrange + 1} {
106 # Just extend the range
107 set endrange $upper
108 continue
110 lappend map(wide) $startrange $endrange
112 set startrange $lower
113 set endrange $upper
116 close $f
119 foreach type {combining wide} {
120 puts "static const struct utf8range unicode_range_$type\[\] = \{"
121 if {$do_width} {
122 output-int-pairs $map($type)
123 } else {
124 # Just produce empty width tables in this case
125 output-int-pairs {}
127 puts "\};\n"