4 # Copyright (C) 2010-2011 Toni Gundogdu <legatvs@gmail.com>
6 # This program is free software: you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation, either version 3 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>.
25 binmode STDOUT
, ":utf8";
26 binmode STDERR
, ":utf8";
28 use version
0.77 (); our $VERSION = version
->declare("0.1.0");
30 use Getopt
::ArgvFile
(home
=> 1, startupFilename
=> [qw(.gcaprc)]);
31 use Getopt
::Long
qw(:config bundling);
39 # Convert args (of length of 11) to Youtube URLs. Do this
40 # before calling Getopt::* as some IDs may start with '-'
41 # which confuses the Getopt::*.
44 foreach my $arg (@ARGV)
46 if (length($arg) == 11)
48 push @argv, "http://youtube.com/v/$arg";
60 eval "require Umph::Prompt";
61 my $p = $@ ?
"" : ", Umph::Prompt version $Umph::Prompt::VERSION";
62 say "gcap version $VERSION$p
63 Copyright (C) 2010-2011 Toni Gundogdu
64 This is free software; see the source for copying conditions. There is NO
65 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.";
72 Pod
::Usage
::pod2usage
(-exitstatus
=> 0, -verbose
=> 1);
89 'version' => \
&print_version
,
90 'help' => \
&print_help
,
93 print_help
() unless scalar @ARGV;
95 $config{regexp
} ||= "/(\\w|\\s)/g";
96 apply_regexp
($config{regexp
}); # Check syntax.
99 sub spew_qe
{print STDERR
@_ unless $config{quiet
}}
108 my $req_body = "http://video.google.com/timedtext?hl=en&type=list&v=";
111 my $q = qr{(?:embed|v)[=/]((?>[-_\w]{11}))};
113 if ($url =~ /^http:/i)
117 $url = "$req_body$1";
121 croak
qq/error: "$url" looks nothing like a youtube page url\n/;
126 $url = "$req_body$url";
129 spew_qe
"Checking ...";
132 my $a = new LWP
::UserAgent
;
133 $a->env_proxy; # http://search.cpan.org/perldoc?LWP::UserAgent
134 $a->proxy('http', $config{proxy
}) if $config{proxy
};
135 $a->no_proxy('') if $config{no_proxy
};
139 my $p = new XML
::DOM
::Parser
(LWP_UserAgent
=> $a);
140 my $d = $p->parsefile($url);
141 my $r = $d->getDocumentElement;
144 for my $e ($r->getElementsByTagName("track"))
147 name
=> $e->getAttributeNode("name")->getValue || "",
148 lang_code
=> $e->getAttributeNode("lang_code")->getValue,
149 lang_transl
=> $e->getAttributeNode("lang_translated")->getValue,
152 push @captions, \
%tmp;
153 spew_qe
((++$n % 5 == 0) ?
" " : ".");
161 or croak
"error: $url: no match: video id\n";
163 get_title
($v, $a) if $config{title
};
164 prompt
() if $config{interactive
};
170 ++$t if $_->{selected
};
172 croak
"error: no input: no captions found\n" unless $t;
174 require HTML
::Entities
;
180 next unless $_->{selected
};
183 "http://video.google.com/timedtext?"
184 . "hl=$_->{lang_code}"
185 . "&lang=$_->{lang_code}"
186 . "&name=$_->{name}" . "&v=$v";
188 my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code
};
192 $title = apply_regexp
($config{regexp
}, $title);
193 $fname = sprintf "%s_%s.srt", $title, $_->{lang_code
};
196 open my $fh, ">", $fname or die "$fname: $!\n";
197 binmode $fh, ":utf8";
199 spew_qe
sprintf "(%02d of %02d) ", ++$n, $t if $t > 0;
200 spew_qe
"Saving $fname ...";
202 $d = $p->parsefile($url);
203 $r = $d->getDocumentElement;
208 for my $e ($r->getElementsByTagName("text"))
211 my $tmp = $e->getFirstChild;
214 my $text = trim
($tmp->getNodeValue);
216 $text = HTML
::Entities
::decode_entities
($text);
218 my $start = $e->getAttributeNode("start")->getValue;
223 if ($start =~ /(\d+)/)
228 /\d+\.(\d+)/; # should only capture 3 first digits
231 my @start = gmtime($start_sec);
233 $tmp = $e->getAttributeNode("dur");
234 my $dur = $tmp ?
$tmp->getValue : $start - $last_start;
236 my $end_sec = $start + $dur;
238 $dur =~ /\d+\.(\d+)/; # should only capture 3 first digits
239 my $end_msec = $1 || 0;
241 my @end = gmtime($end_sec);
244 "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
245 $i++, @start[2, 1, 0], $start_msec, @end[2, 1, 0],
248 $last_start = $start;
262 return if scalar @captions == 0;
269 'n' => \
&select_none
,
270 'i' => \
&invert_selection
,
274 print STDERR
"Enter prompt. "
275 . qq/Type "help" to get a list of commands.\n/;
292 next unless $ln =~ /(\w)/;
293 $cmds{$1}() if defined $cmds{$1};
302 my $page_url = "http://youtube.com/watch?v=$v";
303 my $url = "http://www.youtube.com/get_video_info?&video_id=$v"
304 . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";
306 spew_qe
":: Getting video title ...";
308 my $r = $a->get($url);
310 unless ($r->is_success)
312 printf STDERR
"\nerror: $page_url: %s\n", $r->status_line;
318 my $q = CGI
->new($r->content);
320 if ($q->param('reason'))
322 printf STDERR
"\nerror: %s: %s (errorcode: %d)\n",
323 $page_url, trim
($q->param("reason")),
324 $q->param("errorcode");
329 $title = trim
(Encode
::decode_utf8
($q->param('title')));
340 if ($re =~ /^\/(.*)\
/(.*)$/)
348 qq{error
: --regexp
: "$re" looks nothing like
`/pattern/flags`\n};
352 my $q = $flags =~ /i/ ?
qr/$pat/i : qr/$pat/;
353 join '', $flags =~ /g/ ?
$s =~ /$q/g : $s =~ /$q/;
358 print STDERR
"Commands:
360 list .. display found captions (> indicates selected for download)
363 invert .. invert selection
364 (number) .. toggle caption
365 get .. download selected captions
366 quit .. quit without downloading captions\n"
367 . qq/Command name abbreviations are allowed, e.g. "h" instead of "help"\n/;
380 print STDERR
"error: you have not selected anything\n";
390 printf STDERR
"%2s%02d: $_->{lang_transl}\n",
391 $_->{selected
} ?
">" : "", ++$i;
397 $_->{selected
} = 1 foreach @captions;
403 $_->{selected
} = 0 foreach @captions;
409 $_->{selected
} = !$_->{selected
} foreach @captions;
416 if ($i >= 0 && exists $captions[$i])
418 $captions[$i]->{selected
} = !$captions[$i]->{selected
};
423 print STDERR
"error: out of range\n";
440 gcap [-i] [-t] [-r E<lt>regexpE<gt>] [--proxy E<lt>addrE<gt> | --no-proxy]
441 [E<lt>urlE<gt> | E<lt>video_idE<gt>]
445 --help Print help and exit
446 --version Print version and exit
448 -i, --interactive Run in interactive mode
449 -t, --title Parse video title and use it in filename
450 -r, --regexp arg (="/(\w|\s)/g") Cleanup title with regexp
451 --proxy arg (=http_proxy) Use proxy for http connections
452 --no-proxy Disable use of HTTP proxy
456 # vim: set ts=2 sw=2 tw=72 expandtab: