2015-03-03 Andrew Sutton <andrew.n.sutton@gmail.com>
[official-gcc.git] / libjava / scripts / encodings.pl
blob9af108769855c085e29b872d13cce227a802e992
1 # encodings.pl - Download IANA text and compute alias list.
2 # Assumes you are running this program from gnu/gcj/convert/.
3 # Output suitable for direct inclusion in IOConverter.java.
5 # Map IANA canonical names onto our canonical names.
6 %map = (
7 'ANSI_X3.4-1968' => 'ASCII',
8 'ISO_8859-1:1987' => '8859_1',
9 'UTF-8' => 'UTF8',
10 'Shift_JIS' => 'SJIS',
11 'Extended_UNIX_Code_Packed_Format_for_Japanese' => 'EUCJIS',
12 'UTF16-LE' => 'UnicodeLittle',
13 'UTF16-BE' => 'UnicodeBig'
16 if ($ARGV[0] eq '')
18 $file = 'character-sets';
19 if (! -f $file)
21 # Too painful to figure out how to get Perl to do it.
22 system 'wget -o .wget-log http://www.iana.org/assignments/character-sets';
25 else
27 $file = $ARGV[0];
30 # Include canonical names in the output.
31 foreach $key (keys %map)
33 $output{lc ($key)} = $map{$key};
36 open (INPUT, "< $file") || die "couldn't open $file: $!";
38 $body = 0;
39 $current = '';
40 while (<INPUT>)
42 chop;
43 $body = 1 if /^Name:/;
44 next unless $body;
46 if (/^$/)
48 $current = '';
49 next;
52 ($type, $name) = split (/\s+/);
53 # Encoding names are case-insensitive. We do all processing on
54 # the lower-case form.
55 my $lower = lc ($name);
56 if ($type eq 'Name:')
58 $current = $map{$name};
59 if ($current)
61 $output{$lower} = $current;
64 elsif ($type eq 'Alias:')
66 # The IANA list has some ugliness.
67 if ($name ne '' && $lower ne 'none' && $current)
69 $output{$lower} = $current;
74 close (INPUT);
76 foreach $key (sort keys %output)
78 print " hash.put (\"$key\", \"$output{$key}\");\n";