Add support for creating codepage/Unicode mapping tables
[syslinux.git] / codepage / gensubset.pl
blob5fde460f53ad74467095aac7708a6f7e07c61a63
1 #!/usr/bin/perl
3 # Generate a subset of the UnicodeData.txt file, available from
4 # ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
6 # Usage:
7 # gensubset.pl [subset files] < UnicodeData.txt > MiniUCD.txt
10 %need_these = ();
12 foreach $file (@ARGV) {
13 open(F, '<', $file) or die;
14 while (defined($line = <F>)) {
15 $line =~ s/\s*(\#.*|)$//; # Remove comments and final blanks
16 @f = split(/\s+/, $line);
17 next if (scalar @f != 2);
18 $need_these{hex $f[1]}++;
20 close(F);
23 while (defined($line = <STDIN>)) {
24 ($v, $l) = split(/;/, $line, 2);
25 if ($v =~ /^([0-9a-f]+)\-([0-9a-f]+)$/i) {
26 $r1 = hex $1;
27 $r2 = hex $2;
28 } elsif ($v =~ /^([0-9a-f]+)$/i) {
29 $r1 = $r2 = hex $1;
30 } else {
31 next;
33 for ($r = $r1; $r <= $r2; $r++) {
34 printf "%04X;%s", $r, $l if ($need_these{$r});