Use TopfieldUSBEcode for received values too.
[MacTF.git] / to-UTF-16.pl
blob44a3ab26cdc2c21cbdd4ef24f3976a5022452e9f
1 #! /usr/bin/perl
3 use strict;
4 use warnings;
6 my($in_name, $out_name) = @ARGV;
8 open my $in_h, "<", $in_name or die "$in_name: $!";
9 binmode $in_h or die "$in_name: $!";
10 my $head;
11 defined(sysread $in_h, $head, 4) or die "$in_name: $!";
13 my($skip, $charset);
14 if ($head =~ /^\x00\x00\xFF\xFE/) {
15 print "$in_name: Found big-endian UTF-32 BOM\n";
16 ($skip, $charset) = (0, "UTF-32");
17 } elsif ($head =~ /^\xFE\xFF\x00\x00/) {
18 print "$in_name: Found little-endian UTF-32 BOM\n";
19 ($skip, $charset) = (0, "UTF-32");
20 } elsif ($head =~ /^\xFE\xFF/) {
21 print "$in_name: Found big-endian UTF-16 BOM\n";
22 ($skip, $charset) = (0, "UTF-16");
23 } elsif ($head =~ /^\xFF\xFE/) {
24 print "$in_name: Found little-endian UTF-16 BOM\n";
25 ($skip, $charset) = (0, "UTF-16");
26 } elsif ($head =~ /^\xEF\xBB\xBF/) {
27 print "$in_name: Found UTF-8 BOM\n";
28 ($skip, $charset) = (3, "UTF-8");
29 } else {
30 print "$in_name: No BOM found -- assuming UTF-8\n";
31 ($skip, $charset) = (0, "UTF-8");
34 sysseek($in_h, $skip, 0) or die "$in_name: $!";
36 open STDOUT, ">", $out_name or die "$out_name: $!";
37 open STDIN, "<&", $in_h or die "$in_name: $!";
38 exec "iconv", "-f", $charset, "-t", "UTF-16"
39 or die "exec iconv: $!";