4 # Copyright (C) 2010-2011 Toni Gundogdu <legatvs@cpan.org>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 binmode STDOUT
, ":utf8";
27 binmode STDERR
, ":utf8";
29 use version
0.77 (); our $VERSION = version
->declare("0.1.1");
31 use Getopt
::ArgvFile
(home
=> 1, startupFilename
=> [qw(.gcaprc)]);
32 use Getopt
::Long
qw(:config bundling);
40 # Convert args (of length of 11) to Youtube URLs. Do this
41 # before calling Getopt::* as some IDs may start with '-'
42 # which confuses the Getopt::*.
45 foreach my $arg (@ARGV)
47 if (length($arg) == 11)
49 push @argv, "http://youtube.com/v/$arg";
61 eval "require Umph::Prompt";
62 my $p = $@ ?
"" : ", Umph::Prompt version $Umph::Prompt::VERSION";
63 say "gcap version $VERSION$p
64 Copyright (C) 2010-2011 Toni Gundogdu
65 This is free software; see the source for copying conditions. There is NO
66 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.";
73 Pod
::Usage
::pod2usage
(-exitstatus
=> 0, -verbose
=> 1);
80 if ($config{'interactive'} and not eval 'require Umph::Prompt')
83 qq/WARNING Umph::Prompt not found, ignoring --interactive option/;
84 $config{interactive
} = 0;
100 'version' => \
&print_version
,
101 'help' => \
&print_help
,
104 print_help
if scalar @ARGV == 0;
106 $config{regexp
} ||= "/(\\w|\\s)/g";
108 apply_regexp
($config{regexp
}); # Check regexp syntax
112 sub spew_qe
{print STDERR
@_ unless $config{quiet
}}
121 my $req_body = "http://video.google.com/timedtext?hl=en&type=list&v=";
124 my $q = qr{(?:embed|v)[=/]((?>[-_\w]{11}))};
126 if ($url =~ /^https?:/i)
130 $url = "$req_body$1";
134 croak
qq/error: "$url" looks nothing like a youtube page url\n/;
139 $url = "$req_body$url";
142 spew_qe
"Checking ...";
145 my $a = new LWP
::UserAgent
;
146 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
147 $a->proxy('http', $config{proxy
}) if $config{proxy
};
148 $a->no_proxy('') if $config{no_proxy
};
151 my $p = new XML
::DOM
::Parser
(LWP_UserAgent
=> $a);
152 my $d = $p->parsefile($url);
153 my $r = $d->getDocumentElement;
156 for my $e ($r->getElementsByTagName("track"))
159 name
=> $e->getAttributeNode("name")->getValue || "",
160 lang_code
=> $e->getAttributeNode("lang_code")->getValue,
161 lang_transl
=> $e->getAttributeNode("lang_translated")->getValue,
164 $tmp{title
} = $tmp{lang_transl
}; # So that Umph::Prompt works
166 spew_qe
((++$n % 5 == 0) ?
" " : ".");
174 or croak
"error: $url: no match: video id\n";
176 get_title
($v, $a) if $config{title
};
177 open_prompt
() if $config{interactive
};
183 ++$t if $_->{selected
};
185 croak
"error: no input: no captions found\n" unless $t;
187 require HTML
::Entities
;
193 next unless $_->{selected
};
196 "http://video.google.com/timedtext?"
197 . "hl=$_->{lang_code}"
198 . "&lang=$_->{lang_code}"
199 . "&name=$_->{name}" . "&v=$v";
201 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code
};
205 $title = apply_regexp
($config{regexp
}, $title);
206 $fname = sprintf "%s_%s.srt", $title, $_->{lang_code
};
209 open my $fh, ">", $fname or die "$fname: $!\n";
210 binmode $fh, ":utf8";
212 spew_qe
sprintf "(%02d of %02d) ", ++$n, $t if $t > 0;
213 spew_qe
"Saving $fname ...";
215 $d = $p->parsefile($url);
216 $r = $d->getDocumentElement;
221 for my $e ($r->getElementsByTagName("text"))
224 my $tmp = $e->getFirstChild;
227 my $text = trim
($tmp->getNodeValue);
229 $text = HTML
::Entities
::decode_entities
($text);
231 my $start = $e->getAttributeNode("start")->getValue;
236 if ($start =~ /(\d+)/)
241 /\d+\.(\d+)/; # should only capture 3 first digits
244 my @start = gmtime($start_sec);
246 $tmp = $e->getAttributeNode("dur");
247 my $dur = $tmp ?
$tmp->getValue : $start - $last_start;
249 my $end_sec = $start + $dur;
251 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
252 my $end_msec = $1 || 0;
254 my @end = gmtime($end_sec);
257 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
258 $i++, @start[2, 1, 0], $start_msec, @end[2, 1, 0],
261 $last_start = $start;
274 my $page_url = "http://youtube.com/watch?v=$v";
275 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
276 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
278 spew_qe
":: Getting video title ...";
280 my $r = $a->get($url);
282 unless ($r->is_success)
284 printf STDERR
"\nerror: $page_url: %s\n", $r->status_line;
289 my $q = CGI
->new($r->content);
291 if ($q->param('reason'))
293 printf STDERR
"\nerror: %s: %s (errorcode: %d)\n",
294 $page_url, trim
($q->param("reason")),
295 $q->param("errorcode");
300 $title = trim
(Encode
::decode_utf8
($q->param('title')));
311 if ($re =~ /^\/(.*)\
/(.*)$/)
319 qq{error
: --regexp
: "$re" looks nothing like
`/pattern/flags`\n};
323 my $q = $flags =~ /i/ ?
qr/$pat/i : qr/$pat/;
324 join '', $flags =~ /g/ ?
$s =~ /$q/g : $s =~ /$q/;
338 my $p = new Umph
::Prompt
(
344 $p->exit(\
@items, $args);
348 $p->display(\
@items, $args);
352 $p->max_shown_items(@
{$args});
356 $p->select(\
@items, $args);
364 # Callbacks. All of these are optional.
367 $p->toggle(\
@items, $args);
369 onitems
=> sub {return \
@items},
372 $p->display(\
@items, $args);
375 # Other (required) settings
376 total_items
=> scalar @items,
377 prompt_msg
=> 'gcap',
378 max_shown_items
=> 20
381 say STDERR
qq/Enter prompt. Type "help" to get a list of commands./;
389 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [--proxy=E<lt>addrE<gt> | --no-proxy]
390 [--help] E<lt>urlE<gt> | E<lt>video_idE<gt>
394 --help Print help and exit
395 --version Print version and exit
397 -i, --interactive Run in interactive mode
398 -t, --title Use video title in filename
399 -r, --regexp arg (="/(\w|\s)/g") Cleanup title with regexp
400 --proxy arg (=http_proxy) Use proxy for http connections
401 --no-proxy Disable use of HTTP proxy
405 # vim: set ts=2 sw=2 tw=72 expandtab: