Update CHANGES.
[clive.git] / clive
blob9c14fb2af0b3e2d53d62fda395a62531cba6158c
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clive, the non-interactive video extraction utility
6 # Copyright (c) 2007-2009 Toni Gundogdu <legatvs@gmail.com>
8 # Permission to use, copy, modify, and distribute this software for any
9 # purpose with or without fee is hereby granted, provided that the above
10 # copyright notice and this permission notice appear in all copies.
12 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 ###########################################################################
21 use warnings;
22 use strict;
24 use constant VERSION => "2.1.10";
25 use constant MBDIV => 0x100000;
26 use constant SHOWFMT_DEFAULT => qq/%D: "%t" | %mMB/;
28 binmode(STDOUT, ":utf8");
30 # NOTE: Using "require" instead of "use" causes "Can't locate
31 # auto/WWW/Curl/CURLOPT_USE.al in @INC".
32 use WWW::Curl::Easy 4.05;
33 use Getopt::Long qw(:config bundling);
34 use Cwd qw(getcwd);
35 use Config::Tiny;
36 use File::Spec;
37 use Encode;
39 # Non-essential modules: set flags indicating availability
40 my %opted_mods =
41 (Clipboard => 1, Expect => 1, IOPager => 1, ReadKey => 1);
42 eval "use Clipboard";
43 $opted_mods{Clipboard} = 0 if $@;
44 eval "use IO::Pager";
45 $opted_mods{IOPager} = 0 if $@;
46 sub exp_continue() { }; # Satisfies: "Bareword "exp_continue" not allowed while"
47 eval "use Expect";
48 $opted_mods{Expect} = 0 if $@;
49 eval "use Term::ReadKey";
50 $opted_mods{ReadKey} = 0 if $@;
52 my $CONFIGDIR = $ENV{CLIVE_HOME}
53 || File::Spec->catfile($ENV{HOME}, ".config/clive");
55 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
56 my $CACHEFILE = File::Spec->catfile($CONFIGDIR, "cache");
57 my $RECALLFILE = File::Spec->catfile($CONFIGDIR, "recall");
59 my %opts; # runtime options
60 my @queue; # input URLs
61 my $curl; # curl handle, reused throughout lifespan
62 my $cache_db; # handle to cache BDB
63 my %cache; # handle to cache BDB (tied hash)
64 my $hash; # sha1 hash of the current url used together with %cache
65 my %entry; # multi-purpose hash for caching
66 my $ytube_logged = 0; # youtube: whether logged-in
67 my $time_started; # time file transfer started
68 my @exec_files; # holds fnames for --exec
69 my @emit_queue; # videos to be emitted
70 my $logfile; # path to logfile (--output-file, --append-file)
71 my %dp; # dot progress data
72 my %bp; # bar progress data
73 my $workdir = getcwd; # startup workdir
74 my @stream = (0, -1); # 0=stream flag, 1=stream pid
75 my $curr_fpath; # current video output filepath
76 my $recv_sigwinch = 0; # whether SIGWINCH was received
77 my $term_width; # current terminal width
79 my %re_hosts = ( # Precompiled regex used to identify the host
80 IsYoutube => qr|youtube.com|i,
81 IsGoogle => qr|video.google.|i,
82 IsSevenload => qr|sevenload.com|i,
83 IsBreak => qr|break.com|i,
84 IsLastfm => qr|last.fm|i,
85 IsLiveleak => qr|liveleak.com|i,
86 IsEvisor => qr|evisor.tv|i,
87 IsDmotion => qr|dailymotion.com|i,
88 IsCctv => qr|tv.cctv.com|i,
91 my @re_hosts_arr = (
92 [$re_hosts{IsYoutube}, \&handle_youtube],
93 [$re_hosts{IsGoogle}, \&handle_google],
94 [$re_hosts{IsSevenload}, \&handle_sevenload],
95 [$re_hosts{IsBreak}, \&handle_break],
96 [$re_hosts{IsLastfm}, \&handle_lastfm],
97 [$re_hosts{IsLiveleak}, \&handle_liveleak],
98 [$re_hosts{IsEvisor}, \&handle_evisor],
99 [$re_hosts{IsDmotion}, \&handle_dmotion],
100 [$re_hosts{IsCctv}, \&handle_cctv],
103 # Parse config
104 my $c = Config::Tiny->read($CONFIGFILE);
105 %opts = (
106 progress => $c->{_}->{progress},
107 agent => $c->{http}->{agent},
108 proxy => $c->{http}->{proxy},
109 maxspeed => $c->{http}->{maxspeed},
110 minspeed => $c->{http}->{minspeed},
111 format => $c->{output}->{format},
112 savedir => $c->{output}->{savedir},
113 cclass => $c->{output}->{cclass},
114 fnfmt => $c->{output}->{file},
115 showfmt => $c->{output}->{show},
116 ytuser => $c->{youtube}->{user},
117 ytpass => $c->{youtube}->{pass},
118 exec => $c->{commands}->{exec},
119 streamexec => $c->{commands}->{stream},
120 clivepass => $c->{commands}->{clivepass},
123 $opts{clivepass} = $ENV{CLIVEPASS_PATH} unless $opts{clivepass};
124 $opts{progress} = 'bar' unless $opts{progress};
125 $opts{format} = $opts{format} || 'flv';
126 $opts{extract} = 1;
127 $opts{login} = 1;
128 $opts{case} = 1;
130 GetOptions(
131 \%opts,
132 'debug|d', 'help|h', 'overwrite|W', 'savebatch|T=s',
133 'paste|p', 'show|s', 'delete|D', 'clear|C',
134 'continue|c', 'renew|R', 'recall|r', 'format|f=s',
135 'output|o=s', 'append|a=s', 'background|b', 'quiet|q',
136 'grep|g=s', 'agent|U=s', 'proxy|y=s', 'savedir|S=s',
137 'cclass|l=s', 'exec|x=s', 'progress|G=s', 'clivepass|V=s',
138 'stream=i',
139 'hosts' => \&print_hosts,
140 'version|v' => \&print_version,
142 # Commented out until WWW::Curl is fixed:
143 # 'maxspeed!', 'minspeed!',
144 # Workarounds since $longopt!|$shortopt cannot be used.
145 'no-extract|n' => sub { $opts{extract} = 0 },
146 'no-login|L' => sub { $opts{login} = 0 },
147 'no-proxy|X' => sub { $opts{proxy} = "" },
149 # Workaround for options with dashes. There's likely a better way.
150 'ignore-case|i' => sub { $opts{case} = 0 },
151 'filename-format|N=s' => sub { $opts{fnfmt} = $_[1] },
152 'show-format|H=s' => sub { $opts{showfmt} = $_[1] },
153 'youtube-user|u=s' => sub { $opts{ytuser} = $_[1] },
154 'youtube-pass|t=s' => sub { $opts{ytpass} = $_[1] },
155 'emit-csv|e' => sub { $opts{emitcsv} = 1 },
156 'emit-xml|E' => sub { $opts{emitxml} = 1 },
157 'stream-exec=s' => sub { $opts{streamexec} = $_[1] },
158 'output-video|O=s' => sub { $opts{outputfname} = $_[1] },
159 ) or exit(1);
161 if ($opts{help})
163 require Pod::Usage;
164 Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 1);
167 main();
169 ## Subroutines: Signal handlers
171 sub handle_sigwinch
174 # my $sig_name = shift;
175 $recv_sigwinch = 1;
178 ## Subroutines: Connection
180 sub init_curl
182 $curl = WWW::Curl::Easy->new;
184 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
185 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
186 $curl->setopt(CURLOPT_AUTOREFERER, 1);
187 $curl->setopt(CURLOPT_HEADER, 1);
188 $curl->setopt(CURLOPT_NOBODY, 0);
190 $curl->setopt(CURLOPT_VERBOSE, 1)
191 if $opts{debug};
193 $curl->setopt(CURLOPT_PROXY, $opts{proxy})
194 if defined $opts{proxy};
196 $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE, $opts{maxspeed})
197 if $opts{maxpseed}; # NOTE: No effect. Bug in WWW::Curl::Easy?
199 $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $opts{minspeed})
200 if $opts{minspeed}; # Ditto.
203 sub auth_youtube
204 { # Log into Youtube
205 print "[youtube] attempt to login as $opts{ytuser} ..."
206 unless $opts{quiet};
208 my $response = "";
209 open my $fh, ">", \$response;
211 my $login_url =
212 "http://uk.youtube.com/login?current_form=loginform"
213 . "&username=$opts{ytuser}&password=$opts{ytpass}"
214 . "&action_login=log+in&hl=en-GB";
216 $curl->setopt(CURLOPT_URL, $login_url);
217 $curl->setopt(CURLOPT_COOKIEFILE, ""); # Enable cookies from here on
218 $curl->setopt(CURLOPT_ENCODING, ""); # Supported encodings
219 $curl->setopt(CURLOPT_WRITEDATA, $fh);
221 my $rc = $curl->perform;
222 my $errmsg;
224 if ($rc == 0)
226 $response =~ tr{\n}//d;
227 $errmsg = "error: login was incorrect"
228 if $response =~ /your log-in was incorrect/i;
229 $errmsg = "error: check your login password"
230 if $response =~ /check your password/i and !$errmsg;
231 $errmsg = "error: too many login failures, try again later"
232 if $response =~ /too many login failures/i and !$errmsg;
234 else
236 $errmsg = "error: " . $curl->strerror($rc) . " (http/$rc)";
238 close $fh;
240 print STDERR "\n$errmsg\n" and exit
241 if $errmsg;
243 print "done.\n"
244 unless $opts{quiet};
246 $curl->setopt(CURLOPT_COOKIE,
247 "is_adult=" . uc(Digest::SHA::sha1_hex(rand())));
249 $ytube_logged = 1;
252 # Subroutines: Queue
254 sub process_queue
256 init_curl();
258 require Digest::SHA;
259 require HTML::TokeParser;
260 require URI::Escape;
261 require File::Basename;
262 require POSIX;
264 foreach (@queue)
266 $hash = Digest::SHA::sha1_hex($_);
268 my $errmsg;
269 my ($rc, $rfh, $response) = fetch_page($_);
271 if ($rc == 0 or $rc == 0xff)
273 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE)
274 unless $rc == 0xff; # read from cache
276 if ($rc == 200 or $rc == 0xff)
278 if (!defined($entry{page_url}))
280 next if process_page($_, \$response, $rfh) == -1;
282 extract_video() if $entry{xurl};
284 else
286 $errmsg = $curl->strerror($rc) . " (http/$rc)";
289 else
291 $errmsg = $curl->strerror($rc) . " (http/$rc)";
293 close $rfh;
295 print STDERR "\nerror: $errmsg\n"
296 if $errmsg;
298 exec_cmd();
299 emit();
302 sub fetch_page
304 my ($url, $response, $from_cache, $rc) = (shift, "");
305 open my $fh, ">", \$response;
307 # Youtube: login only if both username and password are defined
308 if ($opts{ytuser} and $opts{ytpass} and $opts{login})
310 auth_youtube()
311 if !$ytube_logged and $url =~ /$re_hosts{IsYoutube}/;
314 if ($cache{$hash})
316 fetch_entry($hash)
317 ; # Make sure cached "format" matches with options
318 $from_cache = 1
319 if $opts{format} eq $entry{file_format};
322 $from_cache = 0
323 if $opts{renew};
325 printf "%s $url ...", $from_cache ? "cache" : "fetch"
326 unless $opts{quiet};
328 $rc = 0xff; # flag: read cache entry
330 unless ($from_cache)
332 %entry = ();
333 $curl->setopt(CURLOPT_URL, $url);
334 $curl->setopt(CURLOPT_ENCODING, "");
335 $curl->setopt(CURLOPT_WRITEDATA, $fh);
336 $rc = $curl->perform;
339 return ($rc, $fh, decode_utf8($response));
342 sub process_page
344 my ($url, $response_ref, $response_fh) = @_;
346 print "done.\nprocess page ..."
347 unless $opts{quiet};
349 #$$response_ref =~ tr{\n}//d;
351 my $p = HTML::TokeParser->new($response_ref);
352 $p->get_tag("title");
353 my $title = $p->get_trimmed_text;
355 my ($xurl, $id, $_title, $supported);
356 $supported = 0;
357 foreach (@re_hosts_arr)
359 my ($re, $handler) = @{$_};
360 if ($url =~ /$re/)
362 $supported = 1;
363 ($xurl, $id, $_title) =
364 &$handler($response_ref, $response_fh, $url);
365 $title = $_title || $title;
366 last;
369 die "error: lookup array missing handler; should never get here\n"
370 if !$supported;
372 return -1
373 if !$xurl
374 or !$id
375 or !$title;
377 $title =~ tr{;}//d; # Cache values cannot contain ';'
379 $entry{page_url} = $url;
380 $entry{xurl} = $xurl;
381 $entry{page_title} = $title;
382 $entry{video_id} = $id;
383 $entry{file_format} = $opts{format};
385 return 0;
388 sub query_video_length
390 my ($content_type, $errmsg);
392 unless ($entry{file_length})
394 print "done.\nquery length ..."
395 unless $opts{quiet};
397 $curl->setopt(CURLOPT_URL, $entry{xurl});
399 # Do not download: GET => HEAD request.
400 $curl->setopt(CURLOPT_NOBODY, 1);
401 my $rc = $curl->perform;
403 # Reset back: HEAD => GET
404 $curl->setopt(CURLOPT_HTTPGET, 1);
406 $entry{file_length} =
407 $curl->getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD);
409 $content_type = $entry{file_suffix} =
410 $curl->getinfo(CURLINFO_CONTENT_TYPE);
412 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
414 if ($rc == 200)
416 my $content_ok = 0;
417 if ($content_type =~ m{video/(.*)})
419 $entry{file_suffix} = $1;
420 if ($content_type =~ /(.*)-(.*)$/)
422 $entry{file_suffix} = $2;
424 $content_ok = 1;
427 # Evisor and Metacafe return "text/plain" for Content-Type
428 elsif ($content_type =~ m{text/plain})
430 if ($opts{format} eq "flv")
432 if ( $entry{page_url} =~ /$re_hosts{IsEvisor}/
433 or $entry{page_url} =~ /$re_hosts{IsCctv}/)
435 $entry{file_suffix} = "flv";
436 $content_ok = 1;
441 # Liveleak and Break return "(flv-)application/octet-stream"
442 elsif ($content_type =~ m{application/octet-stream})
444 if ($opts{format} eq "flv")
446 if ( $entry{page_url} =~ /$re_hosts{IsBreak}/
447 or $entry{page_url} =~ /$re_hosts{IsLiveleak}/)
449 $entry{file_suffix} = 'flv';
450 $content_ok = 1;
454 $errmsg =
455 "expected different content-type, "
456 . "received \"$content_type\""
457 unless $content_ok;
459 else
461 $errmsg = "server returned http/$rc";
464 else
465 { # Construct content-type from cache
466 $content_type = "video/$entry{file_suffix}";
469 unless ($opts{quiet})
471 if (!$errmsg) { print "done.\n"; }
472 else { print STDERR "\nerror: $errmsg\n"; }
475 return ($errmsg ? -1 : 0, $content_type);
478 sub extract_video
480 my ($rc, $content_type) = query_video_length();
482 return
483 if $rc != 0 or !defined $content_type;
485 my $fn = $opts{outputfname}
486 || title_to_filename($entry{page_title});
487 my $path = File::Spec->catfile($opts{savedir} || $workdir, $fn);
488 my $filemode = ">";
489 my $remaining = $entry{file_length};
490 my $size = -s $path || 0;
491 my $cont_from = 0;
493 save_entry($hash);
495 if ($size > 0 and !$opts{overwrite})
497 if ($size == $entry{file_length} and $opts{extract})
499 print STDERR
500 "error: file is already fully retrieved; nothing to do\n";
502 push @exec_files, $path
503 if $opts{exec};
505 return
506 unless $opts{emitcsv} or $opts{emitxml};
509 elsif ($size < $entry{file_length} and $opts{continue})
511 $cont_from = $size;
512 $filemode = ">>";
513 $remaining = ($entry{file_length} - $cont_from);
515 else
517 ($path, $fn) =
518 newname_if_exists($opts{savedir} || $workdir, $fn);
522 if ($opts{emitcsv} or $opts{emitxml})
524 $entry{fn} = $fn;
525 $entry{remaining} = $remaining;
526 $entry{cont_from} = $cont_from;
527 push @emit_queue, {%entry};
528 return;
531 unless ($opts{quiet})
533 print "file: $fn\n";
534 print "length: $entry{file_length} ";
536 printf "(%.2fMB) ", $entry{file_length} / MBDIV
537 if $entry{file_length};
539 printf "from: %u (left: %u) ", $cont_from, $remaining
540 if $cont_from;
542 printf "[$content_type]"
543 if $content_type;
545 print "\n";
548 my $errmsg;
549 if ($rc == 0)
551 return
552 unless $opts{extract};
554 if (open my $fh, "$filemode$path")
556 $curr_fpath = $path;
558 # Disable: encoding, header
559 $curl->setopt(CURLOPT_HEADER, 0);
560 $curl->setopt(CURLOPT_ENCODING, "identity");
561 $curl->setopt(CURLOPT_URL, $entry{xurl});
562 $curl->setopt(CURLOPT_WRITEDATA, $fh);
564 $curl->setopt(CURLOPT_RESUME_FROM, $cont_from)
565 if $cont_from;
567 unless ($opts{quiet})
569 $curl->setopt(CURLOPT_PROGRESSFUNCTION,
570 \&progress_callback);
571 $curl->setopt(CURLOPT_NOPROGRESS, 0);
572 $time_started = time;
574 # Use 'dot' progress if the output is not a TTY
575 if ( $opts{progress} !~ /^dot/
576 and $opts{progress} ne 'none')
578 $opts{progress} = 'dot'
579 if !-t STDOUT or !-t STDERR;
582 $stream[0] = 0; # reset streaming flag
584 if ($opts{progress} =~ /^bar/)
586 bar_init($cont_from, $entry{file_length});
588 elsif ($opts{progress} =~ /^dot/)
590 dot_init();
594 $rc = $curl->perform;
595 close $fh;
597 if ($rc == 0)
599 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
600 if ($rc == 200 or $rc == 206)
602 if ($opts{progress} =~ /^bar/) { bar_finish() }
603 elsif ($opts{progress} =~ /^dot/) { dot_finish() }
604 waitpid($stream[1], 0) if $stream[0];
606 else
608 $errmsg = $curl->strerror($rc) . " (http/$rc)";
611 else
613 $errmsg = $curl->strerror($rc) . " (http/$rc)";
616 # Reset
617 $curl->setopt(CURLOPT_RESUME_FROM, 0);
618 $curl->setopt(CURLOPT_HEADER, 1);
620 else
622 $errmsg = "$path: $!";
625 else
627 $errmsg = $curl->strerror($rc) . " (http/$rc)";
630 if (!$errmsg)
632 print "\nclosed http/$rc.\n"
633 unless $opts{quiet};
635 push @exec_files, $path
636 if $opts{exec};
638 else
640 print STDERR "\nerror: $errmsg\n";
643 # Disable: progress
644 $curl->setopt(CURLOPT_NOPROGRESS, 1);
647 sub get_queue
649 if ($opts{recall} and -e $RECALLFILE)
651 if (open my $fh, "<$RECALLFILE")
653 parse_input($_) while (<$fh>);
654 close $fh;
656 else
658 print STDERR "error: $RECALLFILE: $!";
662 if ($opts{paste})
664 print STDERR "error: Clipboard module not found\n" and exit
665 unless $opted_mods{Clipboard};
666 my $data = Clipboard->paste();
667 if ($data)
669 parse_input($_) foreach split(/\n/, $data);
673 parse_input($_) foreach @ARGV;
674 grep_cache() if $opts{grep};
675 unless (@queue) { parse_input($_) while (<STDIN>); }
677 if (open my $fh, ">$RECALLFILE")
679 print $fh "$_\n" foreach @queue;
680 close $fh;
682 else
684 print STDERR "error: $RECALLFILE: $!";
687 if ($opts{savebatch})
689 if (open my $fh, ">", $opts{savebatch})
691 print $fh "$_\n" foreach @queue;
692 close $fh;
694 else
696 print STDERR "error: $opts{savebatch}: $!";
701 sub parse_input
703 my $url = shift;
705 return if $url =~ /^$/;
706 return if $url =~ /^#/;
708 chomp $url;
710 if ($url =~ /&srcurl=(.*?)&/)
711 { # GVideo: one of many redirects
712 require URI::Escape;
713 printf "found redirect ...%s\n=> %s\n",
714 (split(/&/, $url))[0],
715 (split(/&/, URI::Escape::uri_unescape($1)))[0]
716 unless $opts{quiet};
717 $url = URI::Escape::uri_unescape($1);
720 # Insert http:// if not found
721 $url = "http://$url"
722 if $url !~ m{^http://}i;
724 # Translate embedded URL to video page URL
725 translate_embed(\$url);
727 # Last.fm wraps Youtube videos as their own
728 if ($url =~ /$re_hosts{IsLastfm}/)
730 $url =~ /\+1\-(.+)/;
732 print STDERR "error: nosupport: $url\n" and return -1
733 unless defined($1);
735 $url = "http://youtube.com/watch?v=$1";
738 # Remove params from the URL
739 ($url) = split(/&/, $url);
741 foreach my $re (%re_hosts)
743 push @queue, $url and return 0
744 if $url =~ /$re/;
747 print STDERR "error: nosupport: $url\n";
749 return -1;
752 # Subroutines: Video page handlers
754 sub handle_youtube
756 my ($response_ref, $xurl) = @_;
758 my %re = (
759 GrabID => qr/"video_id": "(.*?)"/,
760 GrabT => qr/"t": "(.*?)"/
763 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
764 my $t = $1 if $$response_ref =~ /$re{GrabT}/;
766 if ($id and $t)
768 $xurl = "http://youtube.com/get_video?video_id=$id&t=$t";
770 my $fmt;
771 if ($opts{format} eq "mp4") { $fmt = 18; }
772 elsif ($opts{format} eq "3gpp") { $fmt = 17; }
773 elsif ($opts{format} eq "xflv") { $fmt = 6; }
775 $xurl .= "&fmt=$fmt"
776 if $fmt;
778 else
780 printf STDERR "\nerror: failed to extract &%s\n", $id
781 ? "t"
782 : "video_id";
784 return ($xurl, $id);
787 sub handle_google
789 my ($response_ref) = @_;
791 my %re = (
793 GrabVideoURL => qr|videoUrl\\x3d(.*?)\\x26|,
794 GrabID => qr|docid:'(.*?)'|,
795 GrabMP4 => qr|href="http://vp\.(.*?)"|,
798 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
800 my $xurl = URI::Escape::uri_unescape($1)
801 if $$response_ref =~ /$re{GrabVideoURL}/;
803 my $mp4 = $1 if $$response_ref =~ /$re{GrabMP4}/;
805 my $errmsg;
806 $errmsg = "video id not found" if !$id;
807 $errmsg = "extraction url not found" if !$xurl && !$errmsg;
809 print STDERR "\nerror: $errmsg\n" if $errmsg;
811 $xurl = "http://vp.$mp4"
812 if $mp4 && $opts{format} eq "mp4" && $xurl;
814 return ($xurl, $id);
817 sub handle_sevenload
819 my ($response_ref, $response_fh) = @_;
821 my %re = (GrabConfigPath => qr|configPath=(.*?)"|);
823 my $conf_path = URI::Escape::uri_unescape($1)
824 if $$response_ref =~ /$re{GrabConfigPath}/;
826 my ($xurl, $id, $errmsg);
827 if ($conf_path)
829 ($xurl, $id) =
830 fetch_sevenload_configxml($conf_path, $response_fh);
832 else
834 $errmsg = "configPath not found";
836 $errmsg = "item id not found" if !$errmsg && !$id;
837 $errmsg = "extraction url not found" if !$errmsg && !$xurl;
838 print STDERR "\nerror: $errmsg\n" if $errmsg;
839 return ($xurl, $id);
842 sub handle_break
844 my ($response_ref) = @_;
846 my %re = (
847 GrabTitle => qr|id="vid_title" content="(.*?)"|,
848 GrabID => qr|ContentID='(.*?)'|,
849 GrabFilePath => qr|ContentFilePath='(.*?)'|,
850 GrabFileName => qr|FileName='(.*?)'|
853 my $title = $1 if $$response_ref =~ /$re{GrabTitle}/;
854 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
855 my $fpath = $1 if $$response_ref =~ /$re{GrabFilePath}/;
856 my $fname = $1 if $$response_ref =~ /$re{GrabFileName}/;
858 my ($xurl, $errmsg);
859 if ($fpath and $fname)
861 $xurl = "http://media1.break.com/dnet/media/$fpath/$fname.flv";
863 else
865 $errmsg = "failed to extract ContentFilePath"
866 if !$fpath;
868 $errmsg = "failed to extract FileName"
869 if !$fname and !$errmsg;
872 $errmsg = "failed to extract title"
873 if !$title and !$errmsg;
875 $errmsg = "failed to extract id"
876 if !$id and !$errmsg;
878 print STDERR "\nerror: $errmsg\n"
879 if $errmsg;
881 return ($xurl, $id, $title);
884 sub handle_liveleak
886 my ($response_ref, $response_fh) = @_;
888 my %re = (
889 GrabID => qr|token=(.*?)&|,
890 GrabConfigURL => qr|'config','(.*?)'|,
893 my $id = $1
894 if $$response_ref =~ /$re{GrabID}/;
896 my $conf_url = URI::Escape::uri_unescape($1)
897 if $$response_ref =~ /$re{GrabConfigURL}/;
899 my ($xurl, $errmsg);
900 if ($conf_url)
902 $xurl = fetch_liveleak_config($conf_url);
904 # Re-enable: header, reset WRITEDATA, the above overrides the
905 # original settings.
906 $curl->setopt(CURLOPT_HEADER, 0);
907 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
909 else
911 $errmsg = "config url not found";
914 $errmsg = "id not found" if !$id && !$errmsg;
915 print "error: $errmsg\n" if $errmsg;
917 return ($xurl, $id);
920 sub handle_evisor
922 my ($respr) = @_;
924 my %re = (
925 GrabXurl => qr|file=(.*?)"|,
926 GrabID => qr|.+/(.*?).flv|,
929 my ($xurl, $id, $errmsg);
931 $xurl = $1
932 if $$respr =~ /$re{GrabXurl}/;
934 $id = $1
935 if $xurl and $xurl =~ /$re{GrabID}/;
937 $errmsg = "video extraction url not found"
938 unless $xurl;
940 $errmsg = "video id not found"
941 unless $id and !$errmsg;
943 print STDERR "error: $errmsg\n"
944 if $errmsg;
946 return ($xurl, $id);
949 sub handle_dmotion
951 my ($resp) = @_;
953 my %re = (
954 GrabID => qr|swf%2F(.*?)"|,
955 GrabPaths => qr|"video", "(.*?)"|
958 my ($id, @paths);
959 $id = $1 if $$resp =~ /$re{GrabID}/;
960 my $paths = URI::Escape::uri_unescape($1)
961 if $$resp =~ /$re{GrabPaths}/;
963 use constant ADDR => "http://dailymotion.com";
965 my $xurl;
966 if ($id && $paths)
968 foreach (split(/\|\|/, $paths))
970 my ($path, $type) = split(/@@/, $_);
971 if ($type eq "spark")
972 { # same as regular flv
973 $xurl = ADDR . $path;
975 if ($type eq $opts{format})
977 $xurl = ADDR . $path;
978 last;
983 my $errmsg;
984 $errmsg = "id not found" if !$id;
985 $errmsg = "paths not found" if !$paths && !$errmsg;
986 $errmsg = "failed to construct xurl" if !$xurl && !$errmsg;
988 print STDERR "\nerror: $errmsg\n"
989 if $errmsg;
991 return ($xurl, $id);
994 sub handle_cctv
996 my ($resp, $resp_fh, $page_url) = @_;
997 my $re = qr|videoId=(.*?)&|;
999 my ($id, $xurl);
1000 $id = $1 if $$resp =~ /$re/;
1002 if ($id)
1004 my $domain = join('.', strdomain($page_url));
1005 my $conf_url =
1006 "http://$domain/playcfg/flv_info_new.jsp?videoId=$id";
1007 $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
1009 else
1011 print STDERR "\nerror: id not found\n";
1014 return ($xurl, $id);
1017 # Subroutines: Progress
1018 # NOTE: the 'dot' progress copies much from wget.
1020 sub progress_callback
1022 my $percent = 0;
1024 if ($opts{progress} =~ /^dot/) { $percent = dot_update(@_); }
1025 elsif ($opts{progress} =~ /^bar/) { $percent = bar_update(@_); }
1027 if ( $opts{stream}
1028 && $opts{streamexec}
1029 && !$stream[0])
1031 fork_streamer() if $percent >= $opts{stream};
1033 return 0;
1036 sub dot_init
1038 $dp{dots} = 0;
1039 $dp{rows} = 0;
1040 $dp{dlthen} = 0;
1041 $dp{accum} = 0;
1043 # Default style
1044 $dp{dot_bytes} = 1024;
1045 $dp{dot_spacing} = 10;
1046 $dp{dots_in_line} = 50;
1048 my ($type, $style) = split(/:/, $opts{progress});
1050 if ($style)
1052 if ($style eq 'binary')
1054 $dp{dot_bytes} = 8192;
1055 $dp{dot_spacing} = 16;
1056 $dp{dots_in_line} = 48;
1058 elsif ($style eq 'mega')
1060 $dp{dot_bytes} = 65536;
1061 $dp{dot_spacing} = 8;
1062 $dp{dots_in_line} = 48;
1067 sub dot_update
1069 my ($clientp, $dltotal, $dlnow, $ultotal, $ulnow) = @_;
1071 my ($percent, $elapsed, $rate, $eta) =
1072 calc_progress($dlnow, $dltotal);
1074 return 0
1075 if $elapsed < 1.0;
1077 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1079 $dp{accum} += $dlnow - $dp{dlthen};
1080 $dp{dlthen} = $dlnow;
1082 for (; $dp{accum} >= $dp{dot_bytes} ; $dp{accum} -= $dp{dot_bytes})
1085 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1086 if $dp{dots} == 0;
1088 print " "
1089 if $dp{dots} % $dp{dot_spacing} == 0;
1091 ++$dp{dots};
1092 print ".";
1094 if ($dp{dots} >= $dp{dots_in_line})
1096 ++$dp{rows};
1097 $dp{dots} = 0;
1099 dot_print_row_stats($percent, $elapsed, $eta, $rate, 0);
1102 return $percent;
1105 sub dot_finish
1107 return if $opts{quiet};
1109 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1111 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1112 if $dp{dots} == 0;
1114 for (my $i = $dp{dots} ; $i < $dp{dots_in_line} ; $i++)
1116 print " "
1117 if $i % $dp{dot_spacing} == 0;
1119 print " ";
1122 my $elapsed = time - $time_started;
1123 my $eta = time2str($elapsed, 1);
1124 my $rate = $entry{file_length} / $elapsed;
1126 dot_print_row_stats(100, $elapsed, $eta, $rate, 1);
1129 sub dot_print_row_stats
1131 my ($percent, $elapsed, $eta, $rate, $last) = @_;
1132 my ($unit, $_rate) = get_units($rate);
1134 printf "%3d%% %4.1f%s", $percent, $_rate, $unit;
1135 printf "%s%s", $last ? "=" : " ", $eta;
1138 use constant DEFAULT_TERM_WIDTH => 80;
1140 sub get_term_width
1142 return DEFAULT_TERM_WIDTH
1143 unless $opted_mods{ReadKey};
1144 my ($width) = GetTerminalSize();
1145 return $width;
1148 sub bar_init
1150 my ($initial, $total) = @_;
1152 $total = $initial
1153 if $initial > $total;
1155 $term_width = get_term_width();
1157 $bp{initial} = $initial; # bytes dl previously
1158 $bp{total} = $total; # expected bytes
1159 $bp{width} = DEFAULT_TERM_WIDTH - 1;
1160 $bp{started} = time;
1161 $bp{lastupd} = 0;
1162 $bp{done} = 0;
1165 use constant REFRESH_INTERVAL => 0.2;
1167 sub bar_update
1169 my ($clientp, $total, $now, $ultotal, $ulnow) = @_;
1171 my $force_update = 0;
1172 if ($recv_sigwinch)
1174 my $old_width = $term_width;
1175 $term_width = get_term_width();
1176 if ($term_width != $old_width)
1178 $bp{width} = $term_width - 1;
1179 $force_update = 1;
1181 $recv_sigwinch = 0;
1184 my $tnow = time;
1185 my $elapsed = $tnow - $bp{started};
1187 if (!$bp{done})
1189 return 0
1190 if (($elapsed - $bp{lastupd}) < REFRESH_INTERVAL
1191 && !$force_update);
1193 else
1195 $now = $bp{total};
1198 $bp{lastupd} = $elapsed;
1199 my $size = $bp{initial} + $now;
1201 my $fname_len = 32;
1202 if ($bp{width} > DEFAULT_TERM_WIDTH)
1204 $fname_len += $bp{width} - DEFAULT_TERM_WIDTH;
1207 my $buffer =
1208 substr(File::Basename::basename($curr_fpath), 0, $fname_len);
1210 my $percent = 0;
1211 if ($bp{total} > 0)
1213 my $_size = !$bp{done} ? $size : $now;
1214 $percent = 100.0 * $size / $bp{total};
1215 if ($percent < 100)
1217 $buffer .= sprintf(" %2d%% ", $percent);
1219 else
1221 $buffer .= sprintf(" 100%%");
1223 $buffer .= sprintf(" %4.1fM / %4.1fM",
1224 $_size / MBDIV, $bp{total} / MBDIV);
1227 my $rate = $elapsed ? ($now / $elapsed) : 0;
1228 my $tmp = "";
1229 if ($rate > 0)
1231 my $eta;
1232 if (!$bp{done})
1234 my $left = ($total - $now) / $rate;
1235 $eta = time2str($left);
1237 else
1239 $eta = time2str($elapsed);
1241 my ($unit, $_rate) = get_units($rate);
1242 $tmp = sprintf(" %4.1f%s %6s", $_rate, $unit, $eta);
1244 else
1246 $tmp = " --.-K/s --:--";
1249 # pad to max. width leaving enough space for rate+eta
1250 my $pad = $bp{width} - length($tmp) - length($buffer);
1251 $buffer .= sprintf("%${pad}s", " ");
1252 $buffer .= $tmp; # append rate+eta
1254 printf("\r%s", $buffer);
1255 $bp{count} = $now;
1257 return $percent;
1260 sub bar_finish
1262 return if $opts{quiet};
1264 if ( $bp{total} > 0
1265 && $bp{count} + $bp{initial} > $bp{total})
1267 $bp{total} = $bp{initial} + $bp{count};
1270 $bp{done} = 1;
1271 bar_update(-1, -1, -1, -1, -1);
1274 sub calc_progress
1276 my ($dlnow, $dltotal, $elapsed) = @_;
1278 my $percent = 0;
1280 $percent = int($dlnow / $dltotal * 100)
1281 if $dltotal;
1283 $elapsed = time - $time_started
1284 unless $elapsed;
1286 my $eta = '--:--';
1287 my $rate = 0;
1289 $rate = $dlnow / $elapsed
1290 if $elapsed;
1292 if ($rate > 0)
1294 my $left = ($dltotal - $dlnow) / $rate;
1295 $eta = time2str($left);
1298 return ($percent, $elapsed, $rate, $eta);
1301 sub time2str
1303 my ($secs) = @_;
1305 my $str;
1306 if ($secs < 100)
1308 $str = sprintf("%ds", $secs);
1310 elsif ($secs < 100 * 60)
1312 $str = sprintf("%dm%ds", $secs / 60, $secs % 60);
1314 elsif ($secs < 48 * 3600)
1316 $str = sprintf("%dh%dm", $secs / 3600, ($secs / 60) % 60);
1318 elsif ($secs < 100 * 86400)
1320 $str = sprintf("%dd%dh", $secs / 86400, ($secs / 3600) % 60);
1322 else
1324 $str = sprintf("%dd", $secs / 86400);
1326 return $str;
1329 sub get_units
1331 my ($rate) = @_;
1332 my @units = qw|K/s M/s G/s|;
1334 my $i = 0;
1335 if ($rate < 1024 * 1024)
1337 $rate /= 1024;
1339 elsif ($rate < 1024 * 1024)
1341 $rate /= 1024 * 1024;
1342 $i = 1;
1344 elsif ($rate < 1024 * 1024 * 1024)
1346 $rate /= 1024 * 1024 * 1024;
1347 $i = 2;
1349 return ($units[$i], $rate);
1352 # Subroutines: LittleHelpers
1354 sub main
1356 $SIG{WINCH} = \&handle_sigwinch;
1357 init_cache();
1359 if ($opts{clear}) { clear_cache(); }
1360 elsif ($opts{show}) { show_cache(); }
1362 verify_exec();
1364 grab_clivepass();
1365 get_queue();
1367 select STDERR;
1368 $| = 1; # => unbuffered
1369 select STDOUT;
1370 $| = 1;
1372 daemonize()
1373 if $opts{background};
1375 process_queue();
1377 free_cache();
1380 sub grab_clivepass
1383 # TODO: Supports only Youtube. Expand to support other websites as needed.
1384 return
1385 unless $opts{login}
1386 and $opts{ytuser}
1387 and $opts{ytpass} eq "-";
1389 print STDERR "error: no path to clivepass, use --clivepass\n"
1390 and exit
1391 unless $opts{clivepass};
1393 print STDERR "error: Expect module not found\n" and exit
1394 unless $opted_mods{Expect};
1396 my $phrase;
1397 $phrase = getpass("Enter passphrase for clivepass: ")
1398 while (!$phrase);
1400 my $e = Expect->new;
1401 $e->log_stdout(0);
1402 $e->spawn($opts{clivepass}, "-g", $opts{ytuser})
1403 or print STDERR "error: could not spawn: $!\n" and exit;
1405 my ($spawned, $pwd);
1406 $e->expect(
1409 qr'Enter passphrase: $',
1410 sub {
1411 my $fh = shift;
1412 $fh->send("$phrase\n");
1413 $spawned = 1;
1414 exp_continue;
1418 eof => sub {
1419 if ($spawned)
1421 my $fh = shift;
1422 $pwd = $fh->before();
1423 if ($pwd =~ /error: (.*?)$/)
1425 print STDERR "clivepass: error: $1\n";
1426 exit;
1428 else
1430 $pwd = $1
1431 if ($pwd =~ /login: $opts{ytuser}=(.*?)$/);
1434 else
1436 print STDERR
1437 "error: could not spawn $opts{clivepass}\n";
1438 exit;
1443 timeout => sub {
1444 print STDERR "error: clivepass: expect timed out\n";
1445 exit;
1450 $opts{ytpass} = $pwd;
1453 sub getpass
1455 system "stty -echo";
1456 print shift;
1457 chomp(my $pwd = <STDIN>);
1458 print "\n";
1459 system "stty echo";
1460 return $pwd;
1463 sub daemonize
1465 $logfile =
1466 $opts{append}
1467 || $opts{output}
1468 || File::Spec->catfile($workdir, "clive-log");
1470 my $pid = fork;
1471 if ($pid < 0)
1473 print STDERR "\nerror: fork failed: $!";
1474 exit 1;
1476 elsif ($pid != 0)
1478 print "continuing in background, pid $pid.\n";
1479 print "output will be written to $logfile.\n"
1480 unless $opts{quiet};
1481 exit 0;
1484 chdir $workdir;
1486 my $mode = $opts{append} ? ">>" : ">";
1487 $logfile = "/dev/null" if $opts{quiet};
1489 open STDOUT, "$mode", "$logfile"
1490 or die "error: cannot redirect STDOUT: $!";
1492 open STDERR, ">&STDOUT"
1493 or die "error: cannot dup STDOUT: $!";
1496 sub fork_streamer
1498 $stream[0] = 1; # set flag
1499 my $child = fork;
1501 if ($child < 0)
1503 print STDERR "error: fork failed: $!\n";
1505 elsif ($child == 0)
1507 my $cmd = $opts{streamexec};
1508 $cmd =~ s/%i/"$curr_fpath"/g;
1509 system("$cmd");
1510 exit(0);
1513 $stream[1] = $child;
1516 sub fetch_liveleak_playlist
1518 my $playlist_url = shift;
1520 print "done.\nfetch playlist xspf ..."
1521 unless $opts{quiet};
1523 my $playlist = "";
1524 open my $fh, ">", \$playlist;
1526 $curl->setopt(CURLOPT_URL, $playlist_url);
1527 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1529 my $rc = $curl->perform;
1530 close $fh;
1532 my ($xurl, $errmsg);
1533 if ($rc == 0)
1536 # NOTE: XML::XSPF exists in CPAN but this should work just as well.
1537 # Parsing with XML::Simple results in errors due unescaped values.
1538 $playlist =~ tr{\n}//d;
1539 $xurl = $1
1540 if $playlist =~ /<location>(.*?)<\/location>/;
1542 else
1544 $errmsg = $curl->strerror($rc) . " (http/$rc)";
1547 $errmsg = "location tag not found" if !$xurl && !$errmsg;
1548 print STDERR "\nerror: $errmsg\n" if $errmsg;
1550 return $xurl;
1553 sub fetch_liveleak_config
1555 my $config_url = shift;
1557 print "done.\nfetch config xml ..."
1558 unless $opts{quiet};
1560 my $config = "";
1561 open my $fh, ">", \$config;
1563 # Disable: header
1564 $curl->setopt(CURLOPT_HEADER, 0);
1565 $curl->setopt(CURLOPT_URL, $config_url);
1566 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1568 my $rc = $curl->perform;
1569 close $fh;
1571 my ($xurl, $errmsg);
1572 if ($rc == 0)
1574 if ($config =~ /<file>(.*?)<\/file>/)
1576 $xurl = fetch_liveleak_playlist($1);
1578 else
1580 $errmsg = "playlist url not found";
1583 else
1585 $errmsg = $curl->strerror($rc) . " (http/$rc)\n";
1588 print STDERR "\nerror: $errmsg\n" if $errmsg;
1590 return $xurl;
1593 sub fetch_sevenload_configxml
1595 my ($conf_url, $response_fh) = @_;
1597 print "done.\nfetch config xml..."
1598 unless $opts{quiet};
1600 my $conf_xml = "";
1601 open my $conf_fh, ">", \$conf_xml;
1603 # Disable: header
1604 $curl->setopt(CURLOPT_HEADER, 0);
1605 $curl->setopt(CURLOPT_URL, $conf_url);
1606 $curl->setopt(CURLOPT_WRITEDATA, $conf_fh);
1608 my $rc = $curl->perform;
1609 close $conf_fh;
1611 # Re-enable: header
1612 $curl->setopt(CURLOPT_HEADER, 1);
1613 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
1615 my ($xurl, $id);
1616 if ($rc == 0)
1618 my %re = (
1619 GrabXurl => qr|<location seeking="yes">(.*?)</location>|,
1620 GrabID => qr|item id="(.*?)"|,
1622 $id = $1
1623 if $conf_xml =~ /$re{GrabID}/;
1624 $xurl = $1
1625 if $conf_xml =~ /$re{GrabXurl}/;
1627 else
1629 print STDERR "\nerror: "
1630 . $curl->strerror($rc)
1631 . " (http/$rc)\n";
1633 return ($xurl, $id);
1636 sub fetch_cctv_space_config
1638 my ($conf_url, $resp_fh) = @_;
1640 print "done.\nfetch config file ..."
1641 unless $opts{quiet};
1643 my $conf = "";
1644 open my $fh, ">", \$conf;
1646 # Disable: header
1647 $curl->setopt(CURLOPT_HEADER, 0);
1648 $curl->setopt(CURLOPT_URL, $conf_url);
1649 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1651 my $rc = $curl->perform;
1652 close $fh;
1654 my ($xurl, $errmsg);
1655 if ($rc == 0)
1657 my $re = qr|"url":"(.*?)"|;
1658 if ($conf =~ /$re/)
1660 $xurl = "http://v.cctv.com/flash/$1";
1662 else
1664 $errmsg = "extraction url not found";
1667 else
1669 $errmsg = $curl->strerror($rc) . " http/$rc\n";
1672 print STDERR "\nerror: $errmsg\n" if $errmsg;
1674 # Re-enable: header, reset WRITEDATA, the above overrides the
1675 # original settings.
1676 $curl->setopt(CURLOPT_HEADER, 0);
1677 $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
1679 return $xurl;
1682 sub strdomain
1684 my $uri = shift;
1686 my ($scheme, $authority, $path, $query, $fragment) = $uri =~
1687 m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
1689 # Extract the domain from the URL.
1690 my @a = split(/\./, $authority);
1692 return @a;
1695 sub title_to_filename
1697 my $title = shift;
1699 $title =~
1700 s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
1701 $title =~ s/^\s+//;
1702 $title =~ s/\s+$//;
1704 my $r = $opts{cclass} || qr|\w|;
1705 $title = join('', $title =~ /$r/g);
1707 my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
1708 my $timestamp = POSIX::strftime("%F %T", localtime);
1710 my @a = strdomain($entry{page_url});
1712 my %h = (
1713 "%t" => $title,
1714 "%s" => $entry{file_suffix},
1715 "%d" => $a[scalar @a - 2], # Without the TLD.
1716 "%i" => $entry{video_id},
1717 "%D" => (split(/ /, $timestamp))[0],
1718 "%T" => (split(/ /, $timestamp))[1],
1719 "%S" => $timestamp,
1722 my $m = join('|', keys %h);
1723 $fn =~ s/($m)/$h{$1}/ig;
1725 return $fn;
1728 sub newname_if_exists
1730 my ($path, $orig, $new) = (shift, shift);
1732 for (my $i = 1 ; ; $i++)
1734 $new = File::Spec->catfile($path, "$orig.$i");
1735 last if !-e $new;
1738 my ($vol, $dir, $fn) = File::Spec->splitpath($new);
1739 return ($new, $fn);
1742 sub format_show
1744 my $s = shift;
1745 my %e = map_entry(shift);
1747 my $t =
1748 $opted_mods{IOPager}
1749 ? $e{page_title}
1750 : decode_utf8($e{page_title});
1752 my %h = (
1753 "%t" => $t,
1754 "%i" => $e{video_id},
1755 "%l" => $e{file_length},
1756 "%m" => sprintf("%.2f", $e{file_length} / MBDIV),
1757 "%u" => $e{page_url},
1758 "%x" => $e{xurl},
1759 "%D" => (split(/ /, $e{time_stamp}))[0],
1760 "%T" => (split(/ /, $e{time_stamp}))[1],
1761 "%S" => $e{time_stamp},
1764 my $m = join('|', keys %h);
1765 $s =~ s/($m)/$h{$1}/ig;
1767 return $s;
1770 sub init_cache
1772 require File::Path;
1773 File::Path::mkpath([$CONFIGDIR], 0, 0700);
1774 require BerkeleyDB;
1775 $cache_db = tie %cache, "BerkeleyDB::Hash",
1776 -Filename => $CACHEFILE,
1777 -Flags => BerkeleyDB->DB_CREATE
1778 or die "error: cannot open $CACHEFILE: $!\n";
1781 sub show_cache
1783 IO::Pager->new(*STDOUT)
1784 if $opted_mods{IOPager};
1786 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1787 my @entries = ();
1789 require Digest::SHA;
1791 if ($opts{grep})
1793 grep_cache(); # Stores matches => @queue
1794 push @entries, format_show($fmt, Digest::SHA::sha1_hex($_))
1795 foreach (@queue);
1797 else
1799 push @entries, format_show($fmt, $_) foreach (sort keys %cache);
1802 print STDOUT "$_\n" foreach sort @entries;
1804 close STDOUT
1805 if $opted_mods{IOPager};
1807 if ($opts{grep} and $opts{delete} and scalar @queue > 0)
1809 print "Confirm delete (y/N):";
1810 $_ = lc <STDIN>;
1811 chomp;
1812 if (lc $_ eq "y")
1814 delete $cache{Digest::SHA::sha1_hex($_)} foreach (@queue);
1817 exit;
1820 sub clear_cache
1822 unlink $CACHEFILE if -e $CACHEFILE;
1823 exit;
1826 sub free_cache
1828 undef $cache_db;
1829 untie %cache;
1832 sub map_entry
1834 my $key = shift;
1835 my @values = split(/;/, $cache{$key});
1837 my @keys = qw(
1838 file_suffix file_length file_format page_title
1839 page_url time_stamp video_id xurl
1840 ); # Order matters. See also save_entry.
1842 my $i = 0;
1843 return map { $_ => $values[$i++] } @keys;
1846 sub fetch_entry
1848 %entry = map_entry($hash);
1849 $entry{page_title} = decode_utf8($entry{page_title});
1851 #while (my ($key, $value) = each(%entry)) { print "$key => $value\n"; } die;
1854 sub save_entry
1856 my @values;
1858 $entry{time_stamp} = POSIX::strftime("%F %T", localtime);
1860 push @values, $entry{$_} foreach sort keys %entry;
1862 $cache{$hash} = join(';', @values);
1863 $cache_db->db_sync();
1866 sub grep_cache
1868 my $g =
1869 $opts{case}
1870 ? qr|$opts{grep}|
1871 : qr|$opts{grep}|i;
1873 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1875 foreach (sort keys %cache)
1877 my @e = split(/;/, $cache{$_});
1878 if (grep /$g/, @e)
1880 if ($opts{delete})
1882 if ($opts{show}) { push @queue, $e[4]; }
1883 else { delete $cache{$_}; }
1885 else { push @queue, $e[4]; } # 4=URL
1888 exit
1889 if $opts{delete} and not $opts{show};
1892 sub translate_embed
1894 my ($url) = @_;
1895 $$url =~ s!/v/!/watch?v=!i; # youtube
1896 $$url =~ s!googleplayer.swf!videoplay!i; # googlevideo
1897 $$url =~ s!/pl/!/videos/!i; # sevenload
1898 $$url =~ s!/e/!/view?i=!i; # liveleak
1901 sub verify_exec
1903 return if !$opts{exec};
1904 if ($opts{exec} !~ /[;+]$/)
1906 print "error: --exec expression must be terminated "
1907 . "by either ';' or '+'\n";
1908 exit;
1912 sub exec_cmd
1914 return if !$opts{exec};
1915 if ($opts{exec} =~ /;$/)
1916 { # semi
1917 foreach (@exec_files)
1919 my $cmd = $opts{exec};
1920 $cmd =~ s/%i/"$_"/g;
1921 $cmd =~ tr{;}//d;
1922 system("$cmd");
1925 else
1926 { # plus
1927 my $cmd = sprintf("%s ", $opts{exec});
1928 $cmd =~ s/%i//g;
1929 $cmd =~ tr{+}//d;
1930 $cmd .= sprintf('"%s" ', $_) foreach (@exec_files);
1931 system("$cmd");
1935 sub emit
1937 print "<?xml version=\"1.0\"?>\n<queue>\n"
1938 if $opts{emitxml} and @emit_queue;
1940 require URI::Escape;
1942 foreach (@emit_queue)
1944 if ($opts{emitxml})
1946 print " <video>\n";
1947 while (my ($key, $value) = each(%$_))
1949 $value = URI::Escape::uri_escape($value)
1950 if $key eq 'xurl'
1951 or $key eq 'page_url';
1952 print " <$key>$value</$key>\n";
1954 print " </video>\n";
1956 elsif ($opts{emitcsv})
1958 printf qq/csv:"%s","%s","%s","%.2fMB",/
1959 . qq/"%s","%s","%s","%s","%s","%s"\n/,
1960 $_->{page_url}, $_->{xurl}, $_->{fn},
1961 $_->{file_length} / MBDIV, $_->{file_length},
1962 $_->{video_id}, $_->{time_stamp}, $_->{page_title},
1963 $_->{cont_from}, $_->{remaining};
1966 print "</queue>\n"
1967 if $opts{emitxml} and @emit_queue;
1970 sub print_hosts
1972 print "$re_hosts{$_}\n" foreach (keys %re_hosts);
1973 exit;
1976 sub print_version
1978 my $perl_v = sprintf("--with-perl=%vd-%s", $^V, $^O);
1979 my $str = sprintf(
1980 "clive version %s with WWW::Curl version "
1981 . "$WWW::Curl::VERSION.\n"
1982 . "Copyright (c) 2007-2009 Toni Gundogdu "
1983 . "<legatvs\@gmail.com>.\n\n",
1984 VERSION
1986 $str .= "$perl_v ";
1987 my $i = 0;
1988 while (my ($key, $value) = each(%opted_mods))
1990 $str .= sprintf("--with-$key=%s ", $value ? "yes" : "no");
1991 $str .= "\n" if (++$i % 2 == 0);
1993 $str .=
1994 "\nclive is licensed under the ISC license which is functionally\n"
1995 . "equivalent to the 2-clause BSD licence.\n"
1996 . "\tReport bugs: <http://code.google.com/p/clive/issues/>\n";
1997 print "$str";
1998 exit;
2001 __END__
2003 =head1 SYNOPSIS
2005 clive [options]... [URL]...
2007 =head1 OPTIONS
2009 -h, --help print help and exit
2010 -v, --version print version and exit
2011 --hosts print supported hosts and exit
2012 -b, --background go to background after startup
2013 -e, --emit-csv emit video details as csv to stdout
2014 -E, --emit-xml emit video details as csv to stdout
2015 -V, --clivepass=PATH path to clivepass
2016 HTTP Options:
2017 -U, --agent=STRING identify as STRING to http server
2018 -y, --proxy=ADDR use ADDR for http proxy
2019 -X, --no-proxy do not use http proxy
2020 Cache Options:
2021 -R, --renew renew cache entry for visited url
2022 -s, --show dump cache entries to stdout
2023 -H, --show-format=STRING format dumped cache entries
2024 -g, --grep=PATTERN grep cache entries for PATTERN
2025 -i, --ignore-case ignore case-differences with --grep
2026 -D, --delete delete matched entries from cache
2027 -C, --clear clear cache of all entries
2028 Logging and Input Options:
2029 -o, --output=LOGFILE log messages to LOGFILE
2030 -a, --append=LOGFILE append to LOGFILE
2031 -d, --debug print libcurl debug messages
2032 -q, --quiet turn off all output
2033 -r, --recall recall last url batch
2034 -T, --savebatch=FILE save url batch to FILE
2035 -p, --paste paste input from clipboard
2036 Download Options:
2037 -O, --output-video=FNAME write video to file
2038 -n, --no-extract do not extract any videos
2039 -c, --continue continue partially downloaded file
2040 -W, --overwrite overwrite existing video file
2041 -G, --progress=TYPE use progress indicator TYPE
2042 -u, --youtube-user=UNAME youtube username
2043 -t, --youtube-pass=PASSW youtube password
2044 -L, --no-login do not log into youtube
2045 -S, --savedir=DIR save video files to DIR
2046 -f, --format=FORMAT extract video FORMAT
2047 -l, --cclass=CLASS use CLASS to filter titles
2048 -N, --filename-format=STR use STR to construct output filename
2049 -x, --exec=COMMAND execute COMMAND subsequently
2050 --stream-exec=COMMAND stream COMMAND to be executed
2051 --stream=PERCENT execute stream command when transfer reaches %