use Carp croak
[gcap.git] / bin / gcap
blobf7b8148b949e3bbe0bfd4df0ec44a6ef812b15ed
1 #!/usr/bin/perl
3 # gcap
4 # Copyright (C) 2010-2011 Toni Gundogdu <legatvs@gmail.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 use feature 'say';
22 use warnings;
23 use strict;
25 binmode STDOUT, ":utf8";
26 binmode STDERR, ":utf8";
28 use version 0.77 (); our $VERSION = version->declare("0.1.0");
30 use Getopt::ArgvFile(home => 1, startupFilename => [qw(.gcaprc)]);
31 use Getopt::Long qw(:config bundling);
32 use Carp qw(croak);
34 exit main();
36 sub treat_argv
39 # Convert args (of length of 11) to Youtube URLs. Do this
40 # before calling Getopt::* as some IDs may start with '-'
41 # which confuses the Getopt::*.
43 my @argv;
44 foreach my $arg (@ARGV)
46 if (length($arg) == 11)
48 push @argv, "http://youtube.com/v/$arg";
50 else
52 push @argv, $arg;
55 @ARGV = @argv;
58 sub print_version
60 eval "require Umph::Prompt";
61 my $p = $@ ? "" : ", Umph::Prompt version $Umph::Prompt::VERSION";
62 say "gcap version $VERSION$p
63 Copyright (C) 2010-2011 Toni Gundogdu
64 This is free software; see the source for copying conditions. There is NO
65 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.";
66 exit 0;
69 sub print_help
71 require Pod::Usage;
72 Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 1);
75 my %config;
77 sub init
79 treat_argv();
81 GetOptions(
82 \%config,
83 'interactive|i',
84 'title|t',
85 'regexp|r=s',
86 'proxy=s',
87 'no_proxy|no-proxy',
88 'quiet|q',
89 'version' => \&print_version,
90 'help' => \&print_help,
91 ) or exit 1;
93 print_help() unless scalar @ARGV;
95 $config{regexp} ||= "/(\\w|\\s)/g";
96 apply_regexp($config{regexp}); # Check syntax.
99 sub spew_qe {print STDERR @_ unless $config{quiet}}
101 my @captions;
102 my $title;
104 sub main
106 init;
108 my $req_body = "http://video.google.com/timedtext?hl=en&type=list&v=";
109 my $url = $ARGV[0];
111 my $q = qr{(?:embed|v)[=/]((?>[-_\w]{11}))};
113 if ($url =~ /^http:/i)
115 if ($url =~ /$q/)
117 $url = "$req_body$1";
119 else
121 croak qq/error: "$url" looks nothing like a youtube page url\n/;
124 else
126 $url = "$req_body$url";
129 spew_qe "Checking ...";
131 require LWP;
132 my $a = new LWP::UserAgent;
133 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
134 $a->proxy('http', $config{proxy}) if $config{proxy};
135 $a->no_proxy('') if $config{no_proxy};
137 require XML::DOM;
139 my $p = new XML::DOM::Parser(LWP_UserAgent => $a);
140 my $d = $p->parsefile($url);
141 my $r = $d->getDocumentElement;
142 my $n = 0;
144 for my $e ($r->getElementsByTagName("track"))
146 my %tmp = (
147 name => $e->getAttributeNode("name")->getValue || "",
148 lang_code => $e->getAttributeNode("lang_code")->getValue,
149 lang_transl => $e->getAttributeNode("lang_translated")->getValue,
150 selected => 1
152 push @captions, \%tmp;
153 spew_qe((++$n % 5 == 0) ? " " : ".");
155 $d->dispose;
157 spew_qe "done.\n";
159 my $v = $1
160 if $url =~ /$q/
161 or croak "error: $url: no match: video id\n";
163 get_title($v, $a) if $config{title};
164 prompt() if $config{interactive};
166 my $t = 0;
168 foreach (@captions)
170 ++$t if $_->{selected};
172 croak "error: no input: no captions found\n" unless $t;
174 require HTML::Entities;
176 $n = 0;
178 foreach (@captions)
180 next unless $_->{selected};
182 $url =
183 "http://video.google.com/timedtext?"
184 . "hl=$_->{lang_code}"
185 . "&lang=$_->{lang_code}"
186 . "&name=$_->{name}" . "&v=$v";
188 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code};
190 if ($title)
192 $title = apply_regexp($config{regexp}, $title);
193 $fname = sprintf "%s_%s.srt", $title, $_->{lang_code};
196 open my $fh, ">", $fname or die "$fname: $!\n";
197 binmode $fh, ":utf8";
199 spew_qe sprintf "(%02d of %02d) ", ++$n, $t if $t > 0;
200 spew_qe "Saving $fname ...";
202 $d = $p->parsefile($url);
203 $r = $d->getDocumentElement;
205 my $i = 1;
206 my $last_start = 0;
208 for my $e ($r->getElementsByTagName("text"))
211 my $tmp = $e->getFirstChild;
212 next unless $tmp;
214 my $text = trim($tmp->getNodeValue);
215 next unless $text;
216 $text = HTML::Entities::decode_entities($text);
218 my $start = $e->getAttributeNode("start")->getValue;
220 my $start_sec = 0;
221 my $start_msec = 0;
223 if ($start =~ /(\d+)/)
225 $start_sec = $1;
226 $start_msec = $1
227 if $start =~
228 /\d+\.(\d+)/; # should only capture 3 first digits
231 my @start = gmtime($start_sec);
233 $tmp = $e->getAttributeNode("dur");
234 my $dur = $tmp ? $tmp->getValue : $start - $last_start;
236 my $end_sec = $start + $dur;
238 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
239 my $end_msec = $1 || 0;
241 my @end = gmtime($end_sec);
243 printf $fh
244 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
245 $i++, @start[2, 1, 0], $start_msec, @end[2, 1, 0],
246 $end_msec, $text;
248 $last_start = $start;
250 $d->dispose;
251 close $fh;
252 spew_qe "done.\n";
257 my $done = 0;
259 sub prompt
262 return if scalar @captions == 0;
264 my %cmds = (
265 'h' => \&help,
266 'q' => \&quit,
267 'l' => \&list,
268 'a' => \&select_all,
269 'n' => \&select_none,
270 'i' => \&invert_selection,
271 'g' => \&get,
274 print STDERR "Enter prompt. "
275 . qq/Type "help" to get a list of commands.\n/;
276 list();
278 my $p = "(gcap) ";
280 while (!$done)
282 print STDERR $p;
283 my $ln = <STDIN>;
284 next unless $ln;
285 chomp $ln;
286 if ($ln =~ /(\d+)/)
288 toggle_caption($1);
290 else
292 next unless $ln =~ /(\w)/;
293 $cmds{$1}() if defined $cmds{$1};
298 sub get_title
300 my ($v, $a) = @_;
302 my $page_url = "http://youtube.com/watch?v=$v";
303 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
304 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
306 spew_qe ":: Getting video title ...";
308 my $r = $a->get($url);
310 unless ($r->is_success)
312 printf STDERR "\nerror: $page_url: %s\n", $r->status_line;
313 return;
316 require CGI;
318 my $q = CGI->new($r->content);
320 if ($q->param('reason'))
322 printf STDERR "\nerror: %s: %s (errorcode: %d)\n",
323 $page_url, trim($q->param("reason")),
324 $q->param("errorcode");
326 else
328 require Encode;
329 $title = trim(Encode::decode_utf8($q->param('title')));
330 spew_qe "done.\n";
332 $title;
335 sub apply_regexp
337 my ($re, $s) = @_;
338 my ($pat, $flags);
340 if ($re =~ /^\/(.*)\/(.*)$/)
342 $pat = $1;
343 $flags = $2;
345 else
347 croak
348 qq{error: --regexp: "$re" looks nothing like `/pattern/flags`\n};
350 return unless $s;
352 my $q = $flags =~ /i/ ? qr/$pat/i : qr/$pat/;
353 join '', $flags =~ /g/ ? $s =~ /$q/g : $s =~ /$q/;
356 sub help
358 print STDERR "Commands:
359 help .. this
360 list .. display found captions (> indicates selected for download)
361 all .. select all
362 none .. select none
363 invert .. invert selection
364 (number) .. toggle caption
365 get .. download selected captions
366 quit .. quit without downloading captions\n"
367 . qq/Command name abbreviations are allowed, e.g. "h" instead of "help"\n/;
370 sub get
372 foreach (@captions)
374 if ($_->{selected})
376 $done = 1;
377 return;
380 print STDERR "error: you have not selected anything\n";
383 sub quit {exit 0;}
385 sub list
387 my $i = 0;
388 foreach (@captions)
390 printf STDERR "%2s%02d: $_->{lang_transl}\n",
391 $_->{selected} ? ">" : "", ++$i;
395 sub select_all
397 $_->{selected} = 1 foreach @captions;
398 list();
401 sub select_none
403 $_->{selected} = 0 foreach @captions;
404 list();
407 sub invert_selection
409 $_->{selected} = !$_->{selected} foreach @captions;
410 list();
413 sub toggle_caption
415 my $i = (shift) - 1;
416 if ($i >= 0 && exists $captions[$i])
418 $captions[$i]->{selected} = !$captions[$i]->{selected};
419 list();
421 else
423 print STDERR "error: out of range\n";
427 sub trim
429 my $s = shift;
430 $s =~ s{^[\s]+}//;
431 $s =~ s{\s+$}//;
432 $s =~ s{\s\s+}/ /g;
436 __END__
438 =head1 SYNOPSIS
440 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [--proxy E<lt>addrE<gt> | --no-proxy]
441 [E<lt>urlE<gt> | E<lt>video_idE<gt>]
443 =head2 OPTIONS
445 --help Print help and exit
446 --version Print version and exit
447 -q, --quiet Be quiet
448 -i, --interactive Run in interactive mode
449 -t, --title Parse video title and use it in filename
450 -r, --regexp arg (="/(\w|\s)/g") Cleanup title with regexp
451 --proxy arg (=http_proxy) Use proxy for http connections
452 --no-proxy Disable use of HTTP proxy
454 =cut
456 # vim: set ts=2 sw=2 tw=72 expandtab: