Make some more static data read-only
[jimtcl.git] / parse-unidata.tcl
blob4b5ec3aae4cceffb150431ac799966c71ca18793
1 #!/usr/bin/env tclsh
3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
8 #/
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # to generate case mapping tables
13 set f [open [lindex $argv 0]]
14 set extoff 0
15 puts "static const struct casemap unicode_case_mapping\[\] = \{"
16 while {[gets $f buf] >= 0} {
17 foreach {code name class x x x x x x x x x upper lower} [split $buf ";"] break
18 set code 0x$code
19 if {$code <= 0x7f} {
20 continue
22 if {$code > 0xffff} {
23 break
25 if {$class ne "Lu" && $class ne "Ll"} {
26 continue
28 if {$upper eq ""} {
29 set upper $code
30 } else {
31 set upper 0x$upper
33 if {$lower eq ""} {
34 set lower $code
35 } else {
36 set lower 0x$lower
38 if {$upper == $code && $lower == $code} {
39 continue
41 set l [expr {$lower - $code}]
42 set u [expr {$upper - $code}]
43 if {abs($u) > 127 || abs($l) > 127} {
44 # Can't encode both in one byte, so use indirection
45 lappend jumptable $code $lower $upper
46 set l -128
47 set u $extoff
48 incr extoff
49 if {$extoff > 0xff} {
50 error "Too many entries in the offset table!"
53 set entry [string tolower "$code, $l, $u"]
54 puts " { $entry },"
56 close $f
57 puts "\};\n"
59 # Now the jump table
60 puts "static const struct caseextmap unicode_extmap\[\] = \{"
61 foreach {c l u} $jumptable {
62 puts " { $l, $u },"
64 puts "\};\n"