Update NEWS for 0.1.1
[gcap.git] / bin / gcap
blob57ebbb7f93ea3725339e8bcfad0741d879ea67cc
1 #!/usr/bin/perl
3 # gcap
4 # Copyright (C) 2010-2011 Toni Gundogdu <legatvs@cpan.org>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 use 5.010001;
21 use feature 'say';
23 use warnings;
24 use strict;
26 binmode STDOUT, ":utf8";
27 binmode STDERR, ":utf8";
29 use version 0.77 (); our $VERSION = version->declare("0.1.1");
31 use Getopt::ArgvFile(home => 1, startupFilename => [qw(.gcaprc)]);
32 use Getopt::Long qw(:config bundling);
33 use Carp qw(croak);
35 exit main();
37 sub treat_argv
40 # Convert args (of length of 11) to Youtube URLs. Do this
41 # before calling Getopt::* as some IDs may start with '-'
42 # which confuses the Getopt::*.
44 my @argv;
45 foreach my $arg (@ARGV)
47 if (length($arg) == 11)
49 push @argv, "http://youtube.com/v/$arg";
51 else
53 push @argv, $arg;
56 @ARGV = @argv;
59 sub print_version
61 eval "require Umph::Prompt";
62 my $p = $@ ? "" : ", Umph::Prompt version $Umph::Prompt::VERSION";
63 say "gcap version $VERSION$p
64 Copyright (C) 2010-2011 Toni Gundogdu
65 This is free software; see the source for copying conditions. There is NO
66 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.";
67 exit 0;
70 sub print_help
72 require Pod::Usage;
73 Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 1);
76 my %config;
78 sub check_umph_prompt
80 if ($config{'interactive'} and not eval 'require Umph::Prompt')
82 say STDERR
83 qq/WARNING Umph::Prompt not found, ignoring --interactive option/;
84 $config{interactive} = 0;
88 sub init
90 treat_argv();
92 GetOptions(
93 \%config,
94 'interactive|i',
95 'title|t',
96 'regexp|r=s',
97 'proxy=s',
98 'no_proxy|no-proxy',
99 'quiet|q',
100 'version' => \&print_version,
101 'help' => \&print_help,
102 ) or exit 1;
104 print_help if scalar @ARGV == 0;
106 $config{regexp} ||= "/(\\w|\\s)/g";
108 apply_regexp($config{regexp}); # Check regexp syntax
109 check_umph_prompt;
112 sub spew_qe {print STDERR @_ unless $config{quiet}}
114 my @items;
115 my $title;
117 sub main
119 init;
121 my $req_body = "http://video.google.com/timedtext?hl=en&type=list&v=";
122 my $url = $ARGV[0];
124 my $q = qr{(?:embed|v)[=/]((?>[-_\w]{11}))};
126 if ($url =~ /^https?:/i)
128 if ($url =~ /$q/)
130 $url = "$req_body$1";
132 else
134 croak qq/error: "$url" looks nothing like a youtube page url\n/;
137 else
139 $url = "$req_body$url";
142 spew_qe "Checking ...";
144 require LWP;
145 my $a = new LWP::UserAgent;
146 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
147 $a->proxy('http', $config{proxy}) if $config{proxy};
148 $a->no_proxy('') if $config{no_proxy};
150 require XML::DOM;
151 my $p = new XML::DOM::Parser(LWP_UserAgent => $a);
152 my $d = $p->parsefile($url);
153 my $r = $d->getDocumentElement;
154 my $n = 0;
156 for my $e ($r->getElementsByTagName("track"))
158 my %tmp = (
159 name => $e->getAttributeNode("name")->getValue || "",
160 lang_code => $e->getAttributeNode("lang_code")->getValue,
161 lang_transl => $e->getAttributeNode("lang_translated")->getValue,
162 selected => 1,
164 $tmp{title} = $tmp{lang_transl}; # So that Umph::Prompt works
165 push @items, \%tmp;
166 spew_qe((++$n % 5 == 0) ? " " : ".");
168 $d->dispose;
170 spew_qe "done.\n";
172 my $v = $1
173 if $url =~ /$q/
174 or croak "error: $url: no match: video id\n";
176 get_title($v, $a) if $config{title};
177 open_prompt() if $config{interactive};
179 my $t = 0;
181 foreach (@items)
183 ++$t if $_->{selected};
185 croak "error: no input: no captions found\n" unless $t;
187 require HTML::Entities;
189 $n = 0;
191 foreach (@items)
193 next unless $_->{selected};
195 $url =
196 "http://video.google.com/timedtext?"
197 . "hl=$_->{lang_code}"
198 . "&lang=$_->{lang_code}"
199 . "&name=$_->{name}" . "&v=$v";
201 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code};
203 if ($title)
205 $title = apply_regexp($config{regexp}, $title);
206 $fname = sprintf "%s_%s.srt", $title, $_->{lang_code};
209 open my $fh, ">", $fname or die "$fname: $!\n";
210 binmode $fh, ":utf8";
212 spew_qe sprintf "(%02d of %02d) ", ++$n, $t if $t > 0;
213 spew_qe "Saving $fname ...";
215 $d = $p->parsefile($url);
216 $r = $d->getDocumentElement;
218 my $i = 1;
219 my $last_start = 0;
221 for my $e ($r->getElementsByTagName("text"))
224 my $tmp = $e->getFirstChild;
225 next unless $tmp;
227 my $text = trim($tmp->getNodeValue);
228 next unless $text;
229 $text = HTML::Entities::decode_entities($text);
231 my $start = $e->getAttributeNode("start")->getValue;
233 my $start_sec = 0;
234 my $start_msec = 0;
236 if ($start =~ /(\d+)/)
238 $start_sec = $1;
239 $start_msec = $1
240 if $start =~
241 /\d+\.(\d+)/; # should only capture 3 first digits
244 my @start = gmtime($start_sec);
246 $tmp = $e->getAttributeNode("dur");
247 my $dur = $tmp ? $tmp->getValue : $start - $last_start;
249 my $end_sec = $start + $dur;
251 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
252 my $end_msec = $1 || 0;
254 my @end = gmtime($end_sec);
256 printf $fh
257 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
258 $i++, @start[2, 1, 0], $start_msec, @end[2, 1, 0],
259 $end_msec, $text;
261 $last_start = $start;
263 $d->dispose;
264 close $fh;
265 spew_qe "done.\n";
270 sub get_title
272 my ($v, $a) = @_;
274 my $page_url = "http://youtube.com/watch?v=$v";
275 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
276 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
278 spew_qe ":: Getting video title ...";
280 my $r = $a->get($url);
282 unless ($r->is_success)
284 printf STDERR "\nerror: $page_url: %s\n", $r->status_line;
285 return;
288 require CGI;
289 my $q = CGI->new($r->content);
291 if ($q->param('reason'))
293 printf STDERR "\nerror: %s: %s (errorcode: %d)\n",
294 $page_url, trim($q->param("reason")),
295 $q->param("errorcode");
297 else
299 require Encode;
300 $title = trim(Encode::decode_utf8($q->param('title')));
301 spew_qe "done.\n";
303 $title;
306 sub apply_regexp
308 my ($re, $s) = @_;
309 my ($pat, $flags);
311 if ($re =~ /^\/(.*)\/(.*)$/)
313 $pat = $1;
314 $flags = $2;
316 else
318 croak
319 qq{error: --regexp: "$re" looks nothing like `/pattern/flags`\n};
321 return unless $s;
323 my $q = $flags =~ /i/ ? qr/$pat/i : qr/$pat/;
324 join '', $flags =~ /g/ ? $s =~ /$q/g : $s =~ /$q/;
327 sub trim
329 my $s = shift;
330 $s =~ s{^[\s]+}//;
331 $s =~ s{\s+$}//;
332 $s =~ s{\s\s+}/ /g;
336 sub open_prompt
338 my $p = new Umph::Prompt(
340 # Commands.
341 commands => {
342 q => sub {
343 my ($p, $args) = @_;
344 $p->exit(\@items, $args);
346 d => sub {
347 my ($p, $args) = @_;
348 $p->display(\@items, $args);
350 m => sub {
351 my ($p, $args) = @_;
352 $p->max_shown_items(@{$args});
354 s => sub {
355 my ($p, $args) = @_;
356 $p->select(\@items, $args);
358 h => sub {
359 my ($p, $args) = @_;
360 $p->help;
364 # Callbacks. All of these are optional.
365 ontoggle => sub {
366 my ($p, $args) = @_;
367 $p->toggle(\@items, $args);
369 onitems => sub {return \@items},
370 onloaded => sub {
371 my ($p, $args) = @_;
372 $p->display(\@items, $args);
375 # Other (required) settings
376 total_items => scalar @items,
377 prompt_msg => 'gcap',
378 max_shown_items => 20
381 say STDERR qq/Enter prompt. Type "help" to get a list of commands./;
382 $p->exec;
385 __END__
387 =head1 SYNOPSIS
389 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [--proxy=E<lt>addrE<gt> | --no-proxy]
390 [--help] E<lt>urlE<gt> | E<lt>video_idE<gt>
392 =head2 OPTIONS
394 --help Print help and exit
395 --version Print version and exit
396 -q, --quiet Be quiet
397 -i, --interactive Run in interactive mode
398 -t, --title Use video title in filename
399 -r, --regexp arg (="/(\w|\s)/g") Cleanup title with regexp
400 --proxy arg (=http_proxy) Use proxy for http connections
401 --no-proxy Disable use of HTTP proxy
403 =cut
405 # vim: set ts=2 sw=2 tw=72 expandtab: