Bump version to 0.0.7
[gcap.git] / bin / gcap
blob4409ba03f6209eeb0fb3c57246c6f0953978a787
1 #!/usr/bin/perl
2 # -*- coding: ascii -*-
5 # Copyright (C) 2010 Toni Gundogdu <legatvs@gmail.com>.
7 # This program is free software: you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation, either version 3 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21 use warnings;
22 use strict;
24 binmode STDOUT, ":utf8";
25 binmode STDERR, ":utf8";
27 use version 0.77 (); our $VERSION = version->declare("0.0.7");
29 use Getopt::ArgvFile( home => 1, startupFilename => [qw(.gcaprc)] );
30 use Getopt::Long qw(:config bundling);
32 my %config;
33 my $video_title;
35 exit main();
37 sub init {
38 GetOptions(
39 \%config,
40 'interactive|i',
41 'title|t',
42 'regexp|r=s',
43 'proxy=s',
44 'no_proxy|no-proxy',
45 'quiet|q',
46 'version' => \&print_version,
47 'license' => \&print_license,
48 'help' => \&print_help,
49 ) or exit 1;
51 $config{regexp} ||= "/(\\w|\\s)/g";
52 apply_regexp( $config{regexp} ); # Check syntax.
55 sub print_version {
56 print "gcap version $VERSION\n";
57 exit 0;
60 sub print_license {
61 print
62 "Copyright (C) 2010 Toni Gundogdu. GNU GPL v3+. This is free software;
63 see the source for copying conditions. There is NO warranty; not even
64 for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
66 exit 0;
69 sub print_help {
70 require Pod::Usage;
71 Pod::Usage::pod2usage( -exitstatus => 0, -verbose => 1 );
74 my @captions;
76 sub main {
78 init();
80 print_help() unless scalar @ARGV;
82 my $req_body =
83 "http://video.google.com/timedtext?hl=en&type=list&v=";
84 my $url = $ARGV[0];
85 my $q = qr{v[=/]((?>[-_\w]{11}))};
87 if ( $url =~ /^http:/i ) {
88 if ( $url =~ /$q/ ) {
89 $url = "$req_body$1";
91 else {
92 print STDERR
93 "error: does not look like a youtube video page URL.\n";
94 exit 1;
97 else {
98 $url = "$req_body$url";
101 print STDERR "Checking ..." unless $config{quiet};
103 require LWP;
105 my $a = new LWP::UserAgent;
107 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
109 $a->proxy( 'http', $config{proxy} ) if $config{proxy};
110 $a->no_proxy('') if $config{no_proxy};
112 require XML::DOM;
114 my $p = new XML::DOM::Parser( LWP_UserAgent => $a );
115 my $d = $p->parsefile($url);
116 my $r = $d->getDocumentElement;
118 for my $e ( $r->getElementsByTagName("track") ) {
119 my %tmp = (
120 name => $e->getAttributeNode("name")->getValue || "",
121 lang_code => $e->getAttributeNode("lang_code")->getValue,
122 lang_transl =>
123 $e->getAttributeNode("lang_translated")->getValue,
124 selected => 1
126 push @captions, \%tmp;
127 print STDERR "." unless $config{quiet};
130 print STDERR "done.\n" unless $config{quiet};
132 $d->dispose;
134 my $v = $1 if $url =~ /$q/ or die "error: no match: video id";
136 get_title( $v, $a ) if $config{title};
137 prompt() if $config{interactive};
139 my $t = 0;
141 foreach (@captions) {
142 ++$t if $_->{selected};
145 require HTML::Entities;
147 my $n = 0;
149 foreach (@captions) {
151 next unless $_->{selected};
153 $url =
154 "http://video.google.com/timedtext?"
155 . "hl=$_->{lang_code}"
156 . "&lang=$_->{lang_code}"
157 . "&name=$_->{name}" . "&v=$v";
159 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code};
161 if ($video_title) {
162 $video_title =
163 apply_regexp( $config{regexp}, $video_title );
164 $fname = sprintf "%s_%s.srt", $video_title, $_->{lang_code};
167 open my $fh, ">", $fname or die "$fname: $!\n";
168 binmode $fh, ":utf8";
170 unless ( $config{quiet} ) {
171 printf STDERR "(%02d of %02d) ", ++$n, $t if $t > 0;
172 print STDERR "Saving $fname ...";
175 $d = $p->parsefile($url);
176 $r = $d->getDocumentElement;
178 my $i = 1;
179 my $last_start = 0;
181 for my $e ( $r->getElementsByTagName("text") ) {
183 my $tmp = $e->getFirstChild;
184 next unless $tmp;
186 my $text = trim( $tmp->getNodeValue );
187 next unless $text;
188 $text = HTML::Entities::decode_entities($text);
190 my $start = $e->getAttributeNode("start")->getValue;
192 my $start_sec = 0;
193 my $start_msec = 0;
195 if ( $start =~ /(\d+)/ ) {
196 $start_sec = $1;
197 $start_msec = $1
198 if $start =~
199 /\d+\.(\d+)/; # should only capture 3 first digits
202 my @start = gmtime($start_sec);
204 $tmp = $e->getAttributeNode("dur");
205 my $dur = $tmp ? $tmp->getValue : $start - $last_start;
207 my $end_sec = $start + $dur;
209 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
210 my $end_msec = $1 || 0;
212 my @end = gmtime($end_sec);
214 printf $fh
215 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
216 $i++, @start[ 2, 1, 0 ], $start_msec, @end[ 2, 1, 0 ],
217 $end_msec, $text;
219 $last_start = $start;
222 $d->dispose;
224 close $fh;
226 print STDERR "done.\n" unless $config{quiet};
229 return 0;
232 my $done = 0;
234 sub prompt {
236 my %cmds = (
237 'h' => \&help,
238 'q' => \&quit,
239 'l' => \&list,
240 'a' => \&select_all,
241 'n' => \&select_none,
242 'i' => \&invert_selection,
243 'g' => \&get,
246 print STDERR "Enter prompt. "
247 . qq/Type "help" to get a list of commands.\n/;
248 list();
250 my $p = "(gcap) ";
252 while ( !$done ) {
253 print STDERR $p;
254 my $ln = <STDIN>;
255 next unless $ln;
256 chomp $ln;
257 if ( $ln =~ /(\d+)/ ) {
258 toggle_caption($1);
260 else {
261 next unless $ln =~ /(\w)/;
262 $cmds{$1}() if defined $cmds{$1};
267 sub get_title {
268 my ( $v, $a ) = @_;
270 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
271 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
273 my $r = $a->get($url);
275 unless ( $r->is_success ) {
276 printf STDERR "\nerror: $url: %s\n", $r->status_line;
277 return;
280 require CGI;
282 my $q = CGI->new( $r->content );
284 if ( $q->param('reason') ) {
285 printf STDERR "\nerror: %s: %s (errorcode: %d)\n",
286 $url, $q->param("reason"), $q->param("errorcode");
288 else {
289 require Encode;
290 $video_title = Encode::decode_utf8( $q->param('title') );
293 unless ($video_title) {
294 print STDERR "\nwarning: $url: use id instead\n"
295 unless $config{quiet};
299 sub apply_regexp {
301 my ( $re, $s ) = @_;
302 my ( $pat, $flags );
304 if ( $re =~ /^\/(.*)\/(.*)$/ ) {
305 $pat = $1;
306 $flags = $2;
308 else {
309 print STDERR
310 "error: invalid regexp syntax, expected `/pattern/flags'\n";
311 exit 1;
314 return unless $s;
316 my $q = $flags =~ /i/ ? qr/$pat/i : qr/$pat/;
318 return join '', $flags =~ /g/ ? $s =~ /$q/g : $s =~ /$q/;
321 sub help {
322 print STDERR "Commands:
323 help .. this
324 list .. display found captions (> indicates selected for download)
325 all .. select all
326 none .. select none
327 invert .. invert selection
328 (number) .. toggle caption
329 get .. download selected captions
330 quit .. quit without downloading captions\n"
331 . qq/Command name abbreviations are allowed, e.g. "h" instead of "help"\n/;
334 sub get {
335 foreach (@captions) {
336 if ( $_->{selected} ) {
337 $done = 1;
338 return;
341 print STDERR "error: you have not selected anything\n";
344 sub quit { exit 0; }
346 sub list {
347 my $i = 0;
348 foreach (@captions) {
349 printf STDERR "%2s%02d: $_->{lang_transl}\n",
350 $_->{selected} ? ">" : "", ++$i;
354 sub select_all {
355 $_->{selected} = 1 foreach @captions;
356 list();
359 sub select_none {
360 $_->{selected} = 0 foreach @captions;
361 list();
364 sub invert_selection {
365 $_->{selected} = !$_->{selected} foreach @captions;
366 list();
369 sub toggle_caption {
370 my $i = (shift) - 1;
371 if ( $i >= 0 && exists $captions[$i] ) {
372 $captions[$i]->{selected} = !$captions[$i]->{selected};
373 list();
375 else {
376 print STDERR "error: out of rate\n";
380 sub trim {
381 my $s = shift;
382 $s =~ s/^\s+//;
383 $s =~ s/\s+$//;
384 return $s;
387 __END__
389 =head1 SYNOPSIS
391 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [E<lt>urlE<gt> | E<lt>video_idE<gt>]
392 [--proxy E<lt>addrE<gt> | --no-proxy]
394 =head1 OPTIONS
396 --help print help and exit
397 --version print version and exit
398 --license print license and exit
399 -q, --quiet be quiet
400 -i, --interactive run in interactive mode
401 -t, --title parse video title and use it in filename
402 -r, --regexp arg (="/(\w|\s)/g") cleanup title with regexp
403 --proxy arg (=http_env) use proxy for http connections
404 --no-proxy disable use of http proxy
406 =cut
408 # vim: set ts=2 sw=2 tw=72 expandtab: