2 # -*- coding: ascii -*-
5 # Copyright (C) 2010 Toni Gundogdu <legatvs@gmail.com>.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 binmode STDOUT
, ":utf8";
25 binmode STDERR
, ":utf8";
27 use version
0.77 (); our $VERSION = version
->declare("0.0.7");
29 use Getopt
::ArgvFile
( home
=> 1, startupFilename
=> [qw(.gcaprc)] );
30 use Getopt
::Long
qw(:config bundling);
46 'version' => \
&print_version
,
47 'license' => \
&print_license
,
48 'help' => \
&print_help
,
51 $config{regexp
} ||= "/(\\w|\\s)/g";
52 apply_regexp
( $config{regexp
} ); # Check syntax.
56 print "gcap version $VERSION\n";
62 "Copyright (C) 2010 Toni Gundogdu. GNU GPL v3+. This is free software;
63 see the source for copying conditions. There is NO warranty; not even
64 for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
71 Pod
::Usage
::pod2usage
( -exitstatus
=> 0, -verbose
=> 1 );
80 print_help
() unless scalar @ARGV;
83 "http://video.google.com/timedtext?hl=en&type=list&v=";
85 my $q = qr{v[=/]((?>[-_\w]{11}))};
87 if ( $url =~ /^http:/i ) {
93 "error: does not look like a youtube video page URL.\n";
98 $url = "$req_body$url";
101 print STDERR
"Checking ..." unless $config{quiet
};
105 my $a = new LWP
::UserAgent
;
107 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
109 $a->proxy( 'http', $config{proxy
} ) if $config{proxy
};
110 $a->no_proxy('') if $config{no_proxy
};
114 my $p = new XML
::DOM
::Parser
( LWP_UserAgent
=> $a );
115 my $d = $p->parsefile($url);
116 my $r = $d->getDocumentElement;
118 for my $e ( $r->getElementsByTagName("track") ) {
120 name
=> $e->getAttributeNode("name")->getValue || "",
121 lang_code
=> $e->getAttributeNode("lang_code")->getValue,
123 $e->getAttributeNode("lang_translated")->getValue,
126 push @captions, \
%tmp;
127 print STDERR
"." unless $config{quiet
};
130 print STDERR
"done.\n" unless $config{quiet
};
134 my $v = $1 if $url =~ /$q/ or die "error: no match: video id";
136 get_title
( $v, $a ) if $config{title
};
137 prompt
() if $config{interactive
};
141 foreach (@captions) {
142 ++$t if $_->{selected
};
145 require HTML
::Entities
;
149 foreach (@captions) {
151 next unless $_->{selected
};
154 "http://video.google.com/timedtext?"
155 . "hl=$_->{lang_code}"
156 . "&lang=$_->{lang_code}"
157 . "&name=$_->{name}" . "&v=$v";
159 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code
};
163 apply_regexp
( $config{regexp
}, $video_title );
164 $fname = sprintf "%s_%s.srt", $video_title, $_->{lang_code
};
167 open my $fh, ">", $fname or die "$fname: $!\n";
168 binmode $fh, ":utf8";
170 unless ( $config{quiet
} ) {
171 printf STDERR
"(%02d of %02d) ", ++$n, $t if $t > 0;
172 print STDERR
"Saving $fname ...";
175 $d = $p->parsefile($url);
176 $r = $d->getDocumentElement;
181 for my $e ( $r->getElementsByTagName("text") ) {
183 my $tmp = $e->getFirstChild;
186 my $text = trim
( $tmp->getNodeValue );
188 $text = HTML
::Entities
::decode_entities
($text);
190 my $start = $e->getAttributeNode("start")->getValue;
195 if ( $start =~ /(\d+)/ ) {
199 /\d+\.(\d+)/; # should only capture 3 first digits
202 my @start = gmtime($start_sec);
204 $tmp = $e->getAttributeNode("dur");
205 my $dur = $tmp ?
$tmp->getValue : $start - $last_start;
207 my $end_sec = $start + $dur;
209 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
210 my $end_msec = $1 || 0;
212 my @end = gmtime($end_sec);
215 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
216 $i++, @start[ 2, 1, 0 ], $start_msec, @end[ 2, 1, 0 ],
219 $last_start = $start;
226 print STDERR
"done.\n" unless $config{quiet
};
241 'n' => \
&select_none
,
242 'i' => \
&invert_selection
,
246 print STDERR
"Enter prompt. "
247 . qq/Type "help" to get a list of commands.\n/;
257 if ( $ln =~ /(\d+)/ ) {
261 next unless $ln =~ /(\w)/;
262 $cmds{$1}() if defined $cmds{$1};
270 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
271 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
273 my $r = $a->get($url);
275 unless ( $r->is_success ) {
276 printf STDERR
"\nerror: $url: %s\n", $r->status_line;
282 my $q = CGI
->new( $r->content );
284 if ( $q->param('reason') ) {
285 printf STDERR
"\nerror: %s: %s (errorcode: %d)\n",
286 $url, $q->param("reason"), $q->param("errorcode");
290 $video_title = Encode
::decode_utf8
( $q->param('title') );
293 unless ($video_title) {
294 print STDERR
"\nwarning: $url: use id instead\n"
295 unless $config{quiet
};
304 if ( $re =~ /^\/(.*)\
/(.*)$/ ) {
310 "error: invalid regexp syntax, expected `/pattern/flags'\n";
316 my $q = $flags =~ /i/ ?
qr/$pat/i : qr/$pat/;
318 return join '', $flags =~ /g/ ?
$s =~ /$q/g : $s =~ /$q/;
322 print STDERR
"Commands:
324 list .. display found captions (> indicates selected for download)
327 invert .. invert selection
328 (number) .. toggle caption
329 get .. download selected captions
330 quit .. quit without downloading captions\n"
331 . qq/Command name abbreviations are allowed, e.g. "h" instead of "help"\n/;
335 foreach (@captions) {
336 if ( $_->{selected
} ) {
341 print STDERR
"error: you have not selected anything\n";
348 foreach (@captions) {
349 printf STDERR
"%2s%02d: $_->{lang_transl}\n",
350 $_->{selected
} ?
">" : "", ++$i;
355 $_->{selected
} = 1 foreach @captions;
360 $_->{selected
} = 0 foreach @captions;
364 sub invert_selection
{
365 $_->{selected
} = !$_->{selected
} foreach @captions;
371 if ( $i >= 0 && exists $captions[$i] ) {
372 $captions[$i]->{selected
} = !$captions[$i]->{selected
};
376 print STDERR
"error: out of rate\n";
391 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [E<lt>urlE<gt> | E<lt>video_idE<gt>]
392 [--proxy E<lt>addrE<gt> | --no-proxy]
396 --help print help and exit
397 --version print version and exit
398 --license print license and exit
400 -i, --interactive run in interactive mode
401 -t, --title parse video title and use it in filename
402 -r, --regexp arg (="/(\w|\s)/g") cleanup title with regexp
403 --proxy arg (=http_env) use proxy for http connections
404 --no-proxy disable use of http proxy
408 # vim: set ts=2 sw=2 tw=72 expandtab: