FAT: make all codepage data derived from the same place
[syslinux.git] / codepage / cptable.pl
blob44c710ce025373479834adf97b6a90998f3e7989
1 #!/usr/bin/perl
3 # Produce a codepage matching table. For each 8-bit character, list
4 # a primary and an alternate match (the latter used for case-insensitive
5 # matching.)
7 # Usage:
8 # cptable.pl UnicodeData cpXXX.txt cpXXX.bin
11 ($ucd, $cpin, $cpout) = @ARGV;
13 %ucase = ();
14 %lcase = ();
15 %tcase = ();
17 open(UCD, '<', $ucd) or die;
18 while (defined($line = <UCD>)) {
19 chomp $line;
20 @f = split(/;/, $line);
21 $n = hex $f[0];
22 $ucase{$n} = hex $f[12] if ($f[12] ne '');
23 $lcase{$n} = hex $f[13] if ($f[13] ne '');
24 $tcase{$n} = hex $f[14] if ($f[14] ne '');
26 close(UCD);
28 @xtab = (undef) x 256;
29 %tabx = ();
31 open(CPIN, '<', $cpin) or die;
32 while (defined($line = <CPIN>)) {
33 $line =~ s/\s*(\#.*|)$//;
34 @f = split(/\s+/, $line);
35 next if (scalar @f != 2);
36 next if (hex $f[0] > 255);
37 $xtab[hex $f[0]] = hex $f[1]; # Codepage -> Unicode
38 $tabx{hex $f[1]} = hex $f[0]; # Unicode -> Codepage
40 close(CPIN);
42 open(CPOUT, '>', $cpout) or die;
44 # Magic number, in anticipation of being able to load these
45 # files dynamically...
47 print CPOUT pack("VV", 0x8fad232b, 0x9c295319);
49 # Header fields available for future use...
50 print CPOUT pack("VVVVVV", 0, 0, 0, 0, 0, 0);
53 # Self (shortname) uppercase table
55 for ($i = 0; $i < 256; $i++) {
56 $u = $tabx{$ucase{$xtab[$i]}};
57 $u = $i unless (defined($u));
58 print CPOUT pack("C", $u);
62 # Unicode (longname) matching table
64 for ($i = 0; $i < 256; $i++) {
65 if (!defined($xtab[$i])) {
66 $p0 = $p1 = 0xffff;
67 } else {
68 $p0 = $xtab[$i];
69 if (defined($ucase{$p0})) {
70 $p1 = $ucase{$p0};
71 } elsif (defined($lcase{$p0})) {
72 $p1 = $lcase{$p0};
73 } elsif (defined($tcase{$p0})) {
74 $p1 = $tcase{$p0};
75 } else {
76 $p1 = $p0;
79 # Only the BMP is supported...
80 $p0 = 0xffff if ($p0 > 0xffff);
81 $p1 = 0xffff if ($p1 > 0xffff);
82 print CPOUT pack("vv", $p0, $p1);
84 close (CPOUT);