build: fix warnings on mingw
[jimtcl.git] / parse-unidata.tcl
blob5ae237fd23569719859205fa948316ff530e47c5
1 #!/usr/bin/env tclsh
3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
8 #/
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # to generate case mapping and display width tables
12 set map(lower) {}
13 set map(upper) {}
14 set map(title) {}
15 set map(combining) {}
17 set USAGE "Usage: parse-unidata.tcl \[-width\] UnicodeData.txt"
19 set do_width 0
20 foreach arg $argv {
21 if {$arg eq "-width"} {
22 incr do_width
23 } else {
24 if {[info exists filename]} {
25 puts stderr $USAGE
26 exit 1
28 set filename $arg
31 if {![info exists filename]} {
32 puts stderr $USAGE
33 exit 1
36 # Why isn't this available in UnicodeData.txt?
37 set map(wide) {
38 0x1100 0x115f 0x2329 0x232a 0x2e80 0x2e99 0x2e9b 0x2ef3
39 0x2f00 0x2fd5 0x2ff0 0x2ffb 0x3000 0x303e 0x3041 0x3096
40 0x3099 0x30ff 0x3105 0x312d 0x3131 0x318e 0x3190 0x31ba
41 0x31c0 0x31e3 0x31f0 0x321e 0x3220 0x3247 0x3250 0x4dbf
42 0x4e00 0xa48c 0xa490 0xa4c6 0xa960 0xa97c 0xac00 0xd7a3
43 0xf900 0xfaff 0xfe10 0xfe19 0xfe30 0xfe52 0xfe54 0xfe66
44 0xfe68 0xfe6b 0xff01 0xffe6 0x1b000 0x1b001 0x1f200 0x1f202
45 0x1f210 0x1f23a 0x1f240 0x1f248 0x1f250 0x1f251 0x20000 0x3fffd
48 set f [open $filename]
49 while {[gets $f buf] >= 0} {
50 set title ""
51 set lower ""
52 set upper ""
53 foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
54 set codex [string tolower 0x$code]
55 if {[string match M* $class]} {
56 if {![info exists combining]} {
57 set combining $codex
59 continue
60 } elseif {[info exists combining]} {
61 lappend map(combining) $combining $codex
62 unset combining
64 if {$codex <= 0x7f} {
65 continue
67 if {$codex > 0xffff} {
68 break
70 if {![string match L* $class]} {
71 continue
73 if {$upper ne ""} {
74 lappend map(upper) $codex [string tolower 0x$upper]
76 if {$lower ne ""} {
77 lappend map(lower) $codex [string tolower 0x$lower]
79 if {$title ne "" && $title ne $upper} {
80 if {$title eq $code} {
81 set title 0
83 lappend map(title) $codex [string tolower 0x$title]
86 close $f
88 proc output-int-pairs {list} {
89 set n 0
90 foreach {v1 v2} $list {
91 puts -nonewline "\t{ $v1, $v2 },"
92 if {[incr n] % 4 == 0} {
93 puts ""
96 if {$n % 4} {
97 puts ""
101 foreach type {upper lower title} {
102 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
103 output-int-pairs $map($type)
104 puts "\};\n"
107 foreach type {combining wide} {
108 puts "static const struct utf8range unicode_range_$type\[\] = \{"
109 if {$do_width} {
110 output-int-pairs $map($type)
111 } else {
112 # Just produce empty width tables in this case
113 output-int-pairs {}
115 puts "\};\n"