zlib: Don't use PASTE for INTMAX error messages
[jimtcl.git] / parse-unidata.tcl
blob348a114a34e177baf96fcbb99626e598e86de8b6
1 #!/usr/bin/env tclsh
3 # Generate UTF-8 case mapping tables
5 # (c) 2010 Steve Bennett <steveb@workware.net.au>
7 # See LICENCE for licence details.
8 #/
10 # Parse the unicode data from: http://unicode.org/Public/UNIDATA/UnicodeData.txt
11 # to generate case mapping tables
12 set map(lower) {}
13 set map(upper) {}
14 set map(title) {}
16 set f [open [lindex $argv 0]]
17 while {[gets $f buf] >= 0} {
18 set title ""
19 set lower ""
20 set upper ""
21 foreach {code name class x x x x x x x x x upper lower title} [split $buf ";"] break
22 set codex [string tolower 0x$code]
23 if {$codex <= 0x7f} {
24 continue
26 if {$codex > 0xffff} {
27 break
29 if {![string match L* $class]} {
30 continue
32 if {$upper ne ""} {
33 lappend map(upper) $codex [string tolower 0x$upper]
35 if {$lower ne ""} {
36 lappend map(lower) $codex [string tolower 0x$lower]
38 if {$title ne "" && $title ne $upper} {
39 if {$title eq $code} {
40 set title 0
42 lappend map(title) $codex [string tolower 0x$title]
45 close $f
47 foreach type {upper lower title} {
48 puts "static const struct casemap unicode_case_mapping_$type\[\] = \{"
49 foreach {code alt} $map($type) {
50 puts "\t{ $code, $alt },"
52 puts "\};\n"