Add a general purpose hashtable pattern matcher
[jimtcl.git] / parse-unidata.tcl
blob1a927a310964b65f62f2314e12e5c006be7857d0
1 #!/usr/bin/env tclsh
3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
8 #/
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # to generate case mapping tables
12 set map(lower) {}
13 set map(upper) {}
15 set f [open [lindex $argv 0]]
16 while {[gets $f buf] >= 0} {
17 foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
18 set code [string tolower 0x$code]
19 if {$code <= 0x7f} {
20 continue
22 if {$code > 0xffff} {
23 break
25 if {![string match L* $class]} {
26 continue
28 if {$upper ne ""} {
29 lappend map(upper) $code [string tolower 0x$upper]
31 if {$lower ne ""} {
32 lappend map(lower) $code [string tolower 0x$lower]
35 close $f
37 foreach type {upper lower} {
38 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
39 foreach {code alt} $map($type) {
40 puts "\t{ $code, $alt },"
42 puts "\};\n"