Add recodetree script to convert non-ASCII file names to UTF-8
[msysgit.git] / bin / piconv
blobf13daf81235f58ffe23848fc7312a4072fb11de4
1 #!/usr/bin/perl
2 eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
3 if $running_under_some_shell;
4 #!./perl
5 # $Id: piconv,v 2.1 2004/10/06 05:07:20 dankogai Exp $
7 use 5.8.0;
8 use strict;
9 use Encode ;
10 use Encode::Alias;
11 my %Scheme = map {$_ => 1} qw(from_to decode_encode perlio);
13 use File::Basename;
14 my $name = basename($0);
16 use Getopt::Long qw(:config no_ignore_case);
18 my %Opt;
20 help()
21 unless
22 GetOptions(\%Opt,
23 'from|f=s',
24 'to|t=s',
25 'list|l',
26 'string|s=s',
27 'check|C=i',
28 'c',
29 'perlqq|p',
30 'debug|D',
31 'scheme|S=s',
32 'resolve|r=s',
33 'help',
36 $Opt{help} and help();
37 $Opt{list} and list_encodings();
38 my $locale = $ENV{LC_CTYPE} || $ENV{LC_ALL} || $ENV{LANG};
39 defined $Opt{resolve} and resolve_encoding($Opt{resolve});
40 $Opt{from} || $Opt{to} || help();
41 my $from = $Opt{from} || $locale or help("from_encoding unspecified");
42 my $to = $Opt{to} || $locale or help("to_encoding unspecified");
43 $Opt{string} and Encode::from_to($Opt{string}, $from, $to) and print $Opt{string} and exit;
44 my $scheme = exists $Scheme{$Opt{Scheme}} ? $Opt{Scheme} : 'from_to';
45 $Opt{check} ||= $Opt{c};
46 $Opt{perlqq} and $Opt{check} = Encode::FB_PERLQQ;
48 if ($Opt{debug}){
49 my $cfrom = Encode->getEncoding($from)->name;
50 my $cto = Encode->getEncoding($to)->name;
51 print <<"EOT";
52 Scheme: $scheme
53 From: $from => $cfrom
54 To: $to => $cto
55 EOT
58 # we do not use <> (or ARGV) for the sake of binmode()
59 @ARGV or push @ARGV, \*STDIN;
61 unless ($scheme eq 'perlio'){
62 binmode STDOUT;
63 for my $argv (@ARGV){
64 my $ifh = ref $argv ? $argv : undef;
65 $ifh or open $ifh, "<", $argv or next;
66 binmode $ifh;
67 if ($scheme eq 'from_to'){ # default
68 while(<$ifh>){
69 Encode::from_to($_, $from, $to, $Opt{check});
70 print;
72 }elsif ($scheme eq 'decode_encode'){ # step-by-step
73 while(<$ifh>){
74 my $decoded = decode($from, $_, $Opt{check});
75 my $encoded = encode($to, $decoded);
76 print $encoded;
78 } else { # won't reach
79 die "$name: unknown scheme: $scheme";
82 }else{
83 # NI-S favorite
84 binmode STDOUT => "raw:encoding($to)";
85 for my $argv (@ARGV){
86 my $ifh = ref $argv ? $argv : undef;
87 $ifh or open $ifh, "<", $argv or next;
88 binmode $ifh => "raw:encoding($from)";
89 print while(<$ifh>);
93 sub list_encodings{
94 print join("\n", Encode->encodings(":all")), "\n";
95 exit 0;
98 sub resolve_encoding {
99 if (my $alias = Encode::resolve_alias($_[0])) {
100 print $alias, "\n";
101 exit 0;
102 } else {
103 warn "$name: $_[0] is not known to Encode\n";
104 exit 1;
108 sub help{
109 my $message = shift;
110 $message and print STDERR "$name error: $message\n";
111 print STDERR <<"EOT";
112 $name [-f from_encoding] [-t to_encoding] [-s string] [files...]
113 $name -l
114 $name -r encoding_alias
115 -l,--list
116 lists all available encodings
117 -r,--resolve encoding_alias
118 resolve encoding to its (Encode) canonical name
119 -f,--from from_encoding
120 when omitted, the current locale will be used
121 -t,--to to_encoding
122 when omitted, the current locale will be used
123 -s,--string string
124 "string" will be the input instead of STDIN or files
125 The following are mainly of interest to Encode hackers:
126 -D,--debug show debug information
127 -C N | -c | -p check the validity of the input
128 -S,--scheme scheme use the scheme for conversion
130 exit;
133 __END__
135 =head1 NAME
137 piconv -- iconv(1), reinvented in perl
139 =head1 SYNOPSIS
141 piconv [-f from_encoding] [-t to_encoding] [-s string] [files...]
142 piconv -l
143 piconv [-C N|-c|-p]
144 piconv -S scheme ...
145 piconv -r encoding
146 piconv -D ...
147 piconv -h
149 =head1 DESCRIPTION
151 B<piconv> is perl version of B<iconv>, a character encoding converter
152 widely available for various Unixen today. This script was primarily
153 a technology demonstrator for Perl 5.8.0, but you can use piconv in the
154 place of iconv for virtually any case.
156 piconv converts the character encoding of either STDIN or files
157 specified in the argument and prints out to STDOUT.
159 Here is the list of options. Each option can be in short format (-f)
160 or long (--from).
162 =over 4
164 =item -f,--from from_encoding
166 Specifies the encoding you are converting from. Unlike B<iconv>,
167 this option can be omitted. In such cases, the current locale is used.
169 =item -t,--to to_encoding
171 Specifies the encoding you are converting to. Unlike B<iconv>,
172 this option can be omitted. In such cases, the current locale is used.
174 Therefore, when both -f and -t are omitted, B<piconv> just acts
175 like B<cat>.
177 =item -s,--string I<string>
179 uses I<string> instead of file for the source of text.
181 =item -l,--list
183 Lists all available encodings, one per line, in case-insensitive
184 order. Note that only the canonical names are listed; many aliases
185 exist. For example, the names are case-insensitive, and many standard
186 and common aliases work, such as "latin1" for "ISO-8859-1", or "ibm850"
187 instead of "cp850", or "winlatin1" for "cp1252". See L<Encode::Supported>
188 for a full discussion.
190 =item -C,--check I<N>
192 Check the validity of the stream if I<N> = 1. When I<N> = -1, something
193 interesting happens when it encounters an invalid character.
195 =item -c
197 Same as C<-C 1>.
199 =item -p,--perlqq
201 Same as C<-C -1>.
203 =item -h,--help
205 Show usage.
207 =item -D,--debug
209 Invokes debugging mode. Primarily for Encode hackers.
211 =item -S,--scheme scheme
213 Selects which scheme is to be used for conversion. Available schemes
214 are as follows:
216 =over 4
218 =item from_to
220 Uses Encode::from_to for conversion. This is the default.
222 =item decode_encode
224 Input strings are decode()d then encode()d. A straight two-step
225 implementation.
227 =item perlio
229 The new perlIO layer is used. NI-S' favorite.
231 =back
233 Like the I<-D> option, this is also for Encode hackers.
235 =back
237 =head1 SEE ALSO
239 L<iconv/1>
240 L<locale/3>
241 L<Encode>
242 L<Encode::Supported>
243 L<Encode::Alias>
244 L<PerlIO>
246 =cut