ter-u12b.bdf: fix e? and u+?
[terpinus.git] / ucstoany.pl
blobea10581ab56a039a6e523d423cb375822e3bb041
1 #!/usr/bin/perl
3 push @ARGV, "";
5 if($ARGV[0] eq "--help")
7 print STDERR <<EOT;
8 usage: ucstoany.pl [-f][+u|-u]|[+f[+g|-g]] [+o|-o OUTPUT]
9 [+[CHAR]] [--] INPUT REGISTRY ENCODING [TABLE...]
11 -f Filter mode - don't output characters with unicode
12 FFFF. This is the default.
13 +u Unicode mode - encode characters with index >= 32 with
14 their unicodes. Default for ISO10646-1 output.
15 -u Index mode - encode the characters with their indexes
16 (FFFF counts). Default for any other output.
18 +f Fillout mode - encode with unicodes, output characters
19 with unicode FFFF as the default character.
20 +g Exchange the characters in range 00...1F with these at
21 C0...DF. Default for 8-pixel wide 256-character fonts
22 not starting with FFFD.
23 -g Do not exchange. Default for all other fonts.
25 +o Output to INPUT-REGISTRY-ENCODING (using the same name
26 as ucs2any, but preserving the INPUT directory).
27 -o NAME Output to NAME.
29 +b Use binary mode for output. Only affects the systems
30 that distinguish between text and binary files.
31 -b Use text mode for output. This is the default.
33 +[CHAR] Set the default character to CHAR. If no CHAR is
34 specified, the one from INPUT is used.
36 -- Terminate the option list.
38 INPUT Any BDF file.
40 TABLE An unicode table. Each line must either be blank or
41 contain exactly one hexadecimal unicode consisting of
42 maximum 4 digits. If no TABLE(s) are specified, the
43 standard input is read.
45 If no output is specified, the standard output is used.
47 If no default character is specified, 65533 (FFFD) is used for
48 unicode and fillout modes, 46 (period) for index mode.
50 Any options not specified in the above order are treated as
51 non-option arguments.
52 EOT
53 exit 0;
56 if($ARGV[0] eq "--version")
58 print STDERR <<EOT;
59 ucstoany.pl 0.1.0, Copyright (C) 2005 Dimitar Toshkov Zhekov
61 This program is free software; you can redistribute it and/or
62 modify it under the terms of the GNU General Public License as
63 published by the Free Software Foundation; either version 2 of
64 the License, or (at your option) any later version.
66 This program is distributed in the hope that it will be useful,
67 but WITHOUT ANY WARRANTY; without even the implied warranty of
68 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
69 GNU General Public License for more details.
71 Report bugs to jimmy\@is-vn.bg
72 EOT
73 exit 0;
76 if($ARGV[0] eq "+f")
78 $control = 0;
79 shift @ARGV;
81 if($ARGV[0] eq "+g")
83 $exchange = 1;
84 shift @ARGV;
86 elsif($ARGV[0] eq "-g")
88 $exchange = 0;
89 shift @ARGV;
92 else
94 $exchange = 0;
95 if($ARGV[0] eq "-f") { shift @ARGV; }
97 if($ARGV[0] eq "+u")
99 $control = 32;
100 shift @ARGV;
102 elsif($ARGV[0] eq "-u")
104 $control = 65536;
105 shift @ARGV;
109 if($ARGV[0] eq "+o")
111 $output = "";
112 shift @ARGV;
114 elsif($ARGV[0] eq "-o")
116 shift @ARGV;
118 $ARGV[0] ne "" || die("$0: -o requires a non-empty argument\n");
119 $output = $ARGV[0];
120 shift @ARGV;
123 if($ARGV[0] eq "+b")
125 $binary = 1;
126 shift @ARGV;
128 else
130 $binary = 0;
131 if($ARGV[0] eq "-b") { shift @ARGV; }
134 $_ = $ARGV[0];
135 if(/\+(.*)/)
137 $default = $1;
138 shift @ARGV;
141 if($ARGV[0] eq '--') { shift @ARGV; }
142 elsif($ARGV[0] =~ /^([-+][0-9a-z])$/) { print STDERR "$0: suspicuous $1, use -- to terminate the option list\n"; }
143 pop @ARGV;
145 if($#ARGV == 2) { $ARGV[3] = "-"; }
146 elsif($#ARGV < 2) { die("$0: invalid arguments, try --help\n"); }
148 open(BDF, "<$ARGV[0]") || die("$0: $ARGV[0]: $!\n");
150 while(<BDF>)
152 $header .= $_;
153 last if /^CHARS\s/;
156 while(<BDF>)
158 if(/^STARTCHAR\s+(.+)$/) { $startchar = $1; }
159 elsif(/^ENCODING\s+(.+)$/) { $encoding = $1; }
160 $buffer .= $_;
161 if(/^ENDCHAR$/)
163 $startchar ne "" || die("$0: $ARGV[0]: ENDCHAR without STARTCHAR\n");
164 $encoding ne "" || die("$0: $ARGV[0]: no ENCODING for $startchar\n");
165 $bitmap{$encoding} = $buffer;
166 $buffer = $startchar = $encoding = "";
168 elsif(!defined($exchange) && /^BBX\s+([0-9]+)/ && $1 != 8) { $exchange = 0; }
171 close BDF;
173 $charset = $_ = "-$ARGV[1]-$ARGV[2]";
174 if(!defined($control)) { $control = /^-iso10646-1$/i ? 32 : 65536; }
176 $chars = 0;
177 for($index = 3; $index <= $#ARGV; $index++)
179 open(UNI, "<$ARGV[$index]") || die("$0: $ARGV[$index]: $!\n");
180 while(<UNI>)
182 next if /^\s*$/;
183 /^([0-9a-fA-F]{1,4})$/ || die("$0: $ARGV[$index]: invalid unicode $_\n");
184 push @unimap, hex($1);
185 if(!$control || hex($1) != 65535) { $chars++; }
187 close UNI;
190 if(!defined($exchange)) { $exchange = $chars == 256 && $unimap[0] != 65533; }
191 if($exchange)
193 @unimap >= 0xE0 || die("$0: not enough characters for exchange\n");
194 for($index = 0x00; $index < 0x20; $index++)
196 $_ = $unimap[$index];
197 $unimap[$index] = $unimap[$index + 0xC0];
198 $unimap[$index + 0xC0] = $_;
202 if(!defined($default)) { $default = $control == 65536 ? 46 : 65533; }
203 elsif($default eq "")
205 $_ = $header;
206 /^DEFAULT_CHAR\s(.+)$/m || die("$0: $ARGV[0]: unable to obtain DEFAULT_CHAR\n");
207 $default = $1;
210 if(!defined($output)) { $output = "-"; }
211 elsif($output eq "") { if($ARGV[0] =~ /^(.*).bdf$/) { $output = "$1$charset.bdf" ; } else { $output = "$ARGV[0]$charset"; } }
213 $header =~ s/^(FONT\s.*)-.*-.*$/$1$charset/m || die("$0: $output: unable to change FONT registry-encoding\n");
214 $header =~ s/^(CHARSET_REGISTRY\s).*$/$1"$ARGV[1]"/m || die("$0: $output: unable to change CHARSET_REGISTRY\n");
215 $header =~ s/^(CHARSET_ENCODING\s).*$/$1"$ARGV[2]"/m || die("$0: $output: unable to change CHARSET_ENCODING\n");
216 $header =~ s/^(DEFAULT_CHAR\s).*$/$1$default/m || die("$0: $output: unable to change DEFAULT_CHAR\n");
217 $header =~ s/^(CHARS\s).*$/$1$chars/m || die("$0: $output: unable to change CHARS\n");
219 sub int { $int = $!; }
220 sub bye
222 close OUT;
223 $output ne "-" && unlink($output) != 1 && print STDERR "$0: $output: $!\n";
224 die("@_");
227 open(OUT, ">$output") || die("$0: $output: $!\n");
228 $SIG{INT} = 'int';
229 if($binary) { binmode(OUT) || bye("$0: $output: $!\n"); }
230 print OUT $header;
232 for($index = 0; $index < @unimap; $index++)
234 $_ = $unimap[$index];
235 $encoding = $index >= $control ? $_ : $index;
236 if($_ == 65535) { if($control) { next; } else { $_ = $default ; } }
237 $bitmap{$_} =~ s/^(ENCODING\s).*$/$1$encoding/m || bye("$0: $output: unable to change encoding for $_\n");
238 print OUT $bitmap{$_};
241 print OUT $buffer;
242 close OUT || bye("$0: $output: $!\n");
243 defined($int) && bye("$0: $int\n");