Update INSTALL with configrc -> examples/config rename.
[clive.git] / clive
blobc42080c183d4e4e7ea14e8d637ced341ef3c9da5
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clive, the non-interactive video extraction utility
6 # Copyright (c) 2007-2009 Toni Gundogdu <legatvs@gmail.com>
8 # Permission to use, copy, modify, and distribute this software for any
9 # purpose with or without fee is hereby granted, provided that the above
10 # copyright notice and this permission notice appear in all copies.
12 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 ###########################################################################
21 use warnings;
22 use strict;
24 use constant VERSION => "2.1.10";
25 use constant MBDIV => 0x100000;
26 use constant SHOWFMT_DEFAULT => qq/%D: "%t" | %mMB/;
28 binmode(STDOUT, ":utf8");
30 # NOTE: Using "require" instead of "use" causes "Can't locate
31 # auto/WWW/Curl/CURLOPT_USE.al in @INC".
32 use WWW::Curl::Easy 4.05;
33 use Getopt::Long qw(:config bundling);
34 use Cwd qw(getcwd);
35 use Config::Tiny;
36 use File::Spec;
37 use Encode;
39 # Non-essential modules: set flags indicating availability
40 my %opted_mods =
41 (Clipboard => 1, Expect => 1, IOPager => 1, ReadKey => 1);
42 eval "use Clipboard";
43 $opted_mods{Clipboard} = 0 if $@;
44 eval "use IO::Pager";
45 $opted_mods{IOPager} = 0 if $@;
46 sub exp_continue() { }; # Satisfies: "Bareword "exp_continue" not allowed while"
47 eval "use Expect";
48 $opted_mods{Expect} = 0 if $@;
49 eval "use Term::ReadKey";
50 $opted_mods{ReadKey} = 0 if $@;
52 my $CONFIGDIR = $ENV{CLIVE_HOME}
53 || File::Spec->catfile($ENV{HOME}, ".config/clive");
55 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
56 my $CACHEFILE = File::Spec->catfile($CONFIGDIR, "cache");
57 my $RECALLFILE = File::Spec->catfile($CONFIGDIR, "recall");
59 my %opts; # runtime options
60 my @queue; # input URLs
61 my $curl; # curl handle, reused throughout lifespan
62 my $cache_db; # handle to cache BDB
63 my %cache; # handle to cache BDB (tied hash)
64 my $hash; # sha1 hash of the current url used together with %cache
65 my %entry; # multi-purpose hash for caching
66 my $ytube_logged = 0; # youtube: whether logged-in
67 my $time_started; # time file transfer started
68 my @exec_files; # holds fnames for --exec
69 my @emit_queue; # videos to be emitted
70 my $logfile; # path to logfile (--output-file, --append-file)
71 my %dp; # dot progress data
72 my %bp; # bar progress data
73 my $workdir = getcwd; # startup workdir
74 my @stream = (0, -1); # 0=stream flag, 1=stream pid
75 my $curr_fpath; # current video output filepath
76 my $recv_sigwinch = 0; # whether SIGWINCH was received
77 my $term_width; # current terminal width
79 my %re_hosts = ( # Precompiled regex used to identify the host
80 IsYoutube => qr|youtube.com|i,
81 IsGoogle => qr|video.google.|i,
82 IsSevenload => qr|sevenload.com|i,
83 IsBreak => qr|break.com|i,
84 IsLastfm => qr|last.fm|i,
85 IsLiveleak => qr|liveleak.com|i,
86 IsEvisor => qr|evisor.tv|i,
87 IsDmotion => qr|dailymotion.com|i,
88 IsCctv => qr|tv.cctv.com|i,
91 my @re_hosts_arr = (
92 [$re_hosts{IsYoutube}, \&handle_youtube],
93 [$re_hosts{IsGoogle}, \&handle_google],
94 [$re_hosts{IsSevenload}, \&handle_sevenload],
95 [$re_hosts{IsBreak}, \&handle_break],
96 [$re_hosts{IsLastfm}, \&handle_lastfm],
97 [$re_hosts{IsLiveleak}, \&handle_liveleak],
98 [$re_hosts{IsEvisor}, \&handle_evisor],
99 [$re_hosts{IsDmotion}, \&handle_dmotion],
100 [$re_hosts{IsCctv}, \&handle_cctv],
103 # Parse config
104 my $c = Config::Tiny->read($CONFIGFILE);
105 %opts = (
106 progress => $c->{_}->{progress},
107 agent => $c->{http}->{agent},
108 proxy => $c->{http}->{proxy},
109 maxspeed => $c->{http}->{maxspeed},
110 minspeed => $c->{http}->{minspeed},
111 format => $c->{output}->{format},
112 savedir => $c->{output}->{savedir},
113 cclass => $c->{output}->{cclass},
114 fnfmt => $c->{output}->{file},
115 showfmt => $c->{output}->{show},
116 ytuser => $c->{youtube}->{user},
117 ytpass => $c->{youtube}->{pass},
118 exec => $c->{commands}->{exec},
119 streamexec => $c->{commands}->{stream},
120 clivepass => $c->{commands}->{clivepass},
123 $opts{clivepass} = $ENV{CLIVEPASS_PATH} unless $opts{clivepass};
124 $opts{progress} = 'bar' unless $opts{progress};
125 $opts{format} = $opts{format} || 'flv';
126 $opts{extract} = 1;
127 $opts{login} = 1;
128 $opts{case} = 1;
130 GetOptions(
131 \%opts,
132 'debug|d', 'help|h', 'overwrite|W', 'savebatch|T=s',
133 'paste|p', 'show|s', 'delete|D', 'clear|C',
134 'continue|c', 'renew|R', 'recall|r', 'format|f=s',
135 'output|o=s', 'append|a=s', 'background|b', 'quiet|q',
136 'grep|g=s', 'agent|U=s', 'proxy|y=s', 'savedir|S=s',
137 'cclass|l=s', 'exec|x=s', 'progress|G=s', 'clivepass|V=s',
138 'stream=i', 'stderr',
139 'hosts' => \&print_hosts,
140 'version|v' => \&print_version,
142 # Commented out until WWW::Curl is fixed:
143 # 'maxspeed!', 'minspeed!',
144 # Workarounds since $longopt!|$shortopt cannot be used.
145 'no-extract|n' => sub { $opts{extract} = 0 },
146 'no-login|L' => sub { $opts{login} = 0 },
147 'no-proxy|X' => sub { $opts{proxy} = "" },
149 # Workaround for options with dashes. There's likely a better way.
150 'ignore-case|i' => sub { $opts{case} = 0 },
151 'filename-format|N=s' => sub { $opts{fnfmt} = $_[1] },
152 'show-format|H=s' => sub { $opts{showfmt} = $_[1] },
153 'youtube-user|u=s' => sub { $opts{ytuser} = $_[1] },
154 'youtube-pass|t=s' => sub { $opts{ytpass} = $_[1] },
155 'emit-csv|e' => sub { $opts{emitcsv} = 1 },
156 'emit-xml|E' => sub { $opts{emitxml} = 1 },
157 'stream-exec=s' => sub { $opts{streamexec} = $_[1] },
158 'output-video|O=s' => sub { $opts{outputfname} = $_[1] },
159 ) or exit(1);
161 if ($opts{help})
163 require Pod::Usage;
164 Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 1);
167 main();
169 ## Subroutines: Signal handlers
171 sub handle_sigwinch
174 # my $sig_name = shift;
175 $recv_sigwinch = 1;
178 ## Subroutines: Connection
180 sub init_curl
182 $curl = WWW::Curl::Easy->new;
184 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
185 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
186 $curl->setopt(CURLOPT_AUTOREFERER, 1);
187 $curl->setopt(CURLOPT_HEADER, 1);
188 $curl->setopt(CURLOPT_NOBODY, 0);
190 $curl->setopt(CURLOPT_VERBOSE, 1)
191 if $opts{debug};
193 $curl->setopt(CURLOPT_PROXY, $opts{proxy})
194 if defined $opts{proxy};
196 $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE, $opts{maxspeed})
197 if $opts{maxpseed}; # NOTE: No effect. Bug in WWW::Curl::Easy?
199 $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $opts{minspeed})
200 if $opts{minspeed}; # Ditto.
203 sub auth_youtube
204 { # Log into Youtube
205 print "[youtube] attempt to login as $opts{ytuser} ..."
206 unless $opts{quiet};
208 my $response = "";
209 open my $fh, ">", \$response;
211 my $login_url =
212 "http://uk.youtube.com/login?current_form=loginform"
213 . "&username=$opts{ytuser}&password=$opts{ytpass}"
214 . "&action_login=log+in&hl=en-GB";
216 $curl->setopt(CURLOPT_URL, $login_url);
217 $curl->setopt(CURLOPT_COOKIEFILE, ""); # Enable cookies from here on
218 $curl->setopt(CURLOPT_ENCODING, ""); # Supported encodings
219 $curl->setopt(CURLOPT_WRITEDATA, $fh);
221 my $rc = $curl->perform;
222 my $errmsg;
224 if ($rc == 0)
226 $response =~ tr{\n}//d;
227 $errmsg = "error: login was incorrect"
228 if $response =~ /your log-in was incorrect/i;
229 $errmsg = "error: check your login password"
230 if $response =~ /check your password/i and !$errmsg;
231 $errmsg = "error: too many login failures, try again later"
232 if $response =~ /too many login failures/i and !$errmsg;
234 else
236 $errmsg = "error: " . $curl->strerror($rc) . " (http/$rc)";
238 close $fh;
240 print STDERR "\n$errmsg\n" and exit
241 if $errmsg;
243 print "done.\n"
244 unless $opts{quiet};
246 $curl->setopt(CURLOPT_COOKIE,
247 "is_adult=" . uc(Digest::SHA::sha1_hex(rand())));
249 $ytube_logged = 1;
252 # Subroutines: Queue
254 sub process_queue
256 init_curl();
258 require Digest::SHA;
259 require HTML::TokeParser;
260 require URI::Escape;
261 require File::Basename;
262 require POSIX;
264 foreach (@queue)
266 $hash = Digest::SHA::sha1_hex($_);
268 my $errmsg;
269 my ($rc, $rfh, $response) = fetch_page($_);
271 if ($rc == 0 or $rc == 0xff)
273 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE)
274 unless $rc == 0xff; # read from cache
276 if ($rc == 200 or $rc == 0xff)
278 if (!defined($entry{page_url}))
280 next if process_page($_, \$response, $rfh) == -1;
282 extract_video() if $entry{xurl};
284 else
286 $errmsg = $curl->strerror($rc) . " (http/$rc)";
289 else
291 $errmsg = $curl->strerror($rc) . " (http/$rc)";
293 close $rfh;
295 print STDERR "\nerror: $errmsg\n"
296 if $errmsg;
298 exec_cmd();
299 emit();
302 sub fetch_page
304 my ($url, $response, $from_cache, $rc) = (shift, "");
305 open my $fh, ">", \$response;
307 # Youtube: login only if both username and password are defined
308 if ($opts{ytuser} and $opts{ytpass} and $opts{login})
310 auth_youtube()
311 if !$ytube_logged and $url =~ /$re_hosts{IsYoutube}/;
314 if ($cache{$hash})
316 fetch_entry($hash)
317 ; # Make sure cached "format" matches with options
318 $from_cache = 1
319 if $opts{format} eq $entry{file_format};
322 $from_cache = 0
323 if $opts{renew};
325 printf "%s $url ...", $from_cache ? "cache" : "fetch"
326 unless $opts{quiet};
328 $rc = 0xff; # flag: read cache entry
330 unless ($from_cache)
332 %entry = ();
333 $curl->setopt(CURLOPT_URL, $url);
334 $curl->setopt(CURLOPT_ENCODING, "");
335 $curl->setopt(CURLOPT_WRITEDATA, $fh);
336 $rc = $curl->perform;
339 return ($rc, $fh, decode_utf8($response));
342 sub process_page
344 my ($url, $response_ref, $response_fh) = @_;
346 #$$response_ref =~ tr{\n}//d;
348 my $p = HTML::TokeParser->new($response_ref);
349 $p->get_tag("title");
350 my $title = $p->get_trimmed_text;
352 my ($xurl, $id, $_title, $supported);
353 $supported = 0;
354 foreach (@re_hosts_arr)
356 my ($re, $handler) = @{$_};
357 if ($url =~ /$re/)
359 $supported = 1;
360 ($xurl, $id, $_title) =
361 &$handler($response_ref, $response_fh, $url);
362 $title = $_title || $title;
363 last;
366 die "error: lookup array missing handler; should never get here\n"
367 if !$supported;
369 return -1
370 if !$xurl
371 or !$id
372 or !$title;
374 $title =~ tr{;}//d; # Cache values cannot contain ';'
376 $entry{page_url} = $url;
377 $entry{xurl} = $xurl;
378 $entry{page_title} = $title;
379 $entry{video_id} = $id;
380 $entry{file_format} = $opts{format};
382 return 0;
385 sub query_video_length
387 my ($content_type, $errmsg);
389 unless ($entry{file_length})
391 print "done.\nverify video link ..."
392 unless $opts{quiet};
394 $curl->setopt(CURLOPT_URL, $entry{xurl});
396 # Do not download: GET => HEAD request.
397 $curl->setopt(CURLOPT_NOBODY, 1);
398 my $rc = $curl->perform;
400 # Reset back: HEAD => GET
401 $curl->setopt(CURLOPT_HTTPGET, 1);
403 $entry{file_length} =
404 $curl->getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD);
406 $content_type = $entry{file_suffix} =
407 $curl->getinfo(CURLINFO_CONTENT_TYPE);
409 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
411 if ($rc == 200)
413 my $content_ok = 0;
414 if ($content_type =~ m{video/(.*)})
416 $entry{file_suffix} = $1;
417 if ($content_type =~ /(.*)-(.*)$/)
419 $entry{file_suffix} = $2;
421 $content_ok = 1;
424 # Evisor and Metacafe return "text/plain" for Content-Type
425 elsif ($content_type =~ m{text/plain})
427 if ($opts{format} eq "flv")
429 if ( $entry{page_url} =~ /$re_hosts{IsEvisor}/
430 or $entry{page_url} =~ /$re_hosts{IsCctv}/)
432 $entry{file_suffix} = "flv";
433 $content_ok = 1;
438 # Liveleak and Break return "(flv-)application/octet-stream"
439 elsif ($content_type =~ m{application/octet-stream})
441 if ($opts{format} eq "flv")
443 if ( $entry{page_url} =~ /$re_hosts{IsBreak}/
444 or $entry{page_url} =~ /$re_hosts{IsLiveleak}/)
446 $entry{file_suffix} = 'flv';
447 $content_ok = 1;
451 $errmsg =
452 "expected different content-type, "
453 . "received \"$content_type\""
454 unless $content_ok;
456 else
458 $errmsg = "server returned http/$rc";
461 else
462 { # Construct content-type from cache
463 $content_type = "video/$entry{file_suffix}";
466 unless ($opts{quiet})
468 if (!$errmsg) { print "done.\n"; }
469 else { print STDERR "\nerror: $errmsg\n"; }
472 return ($errmsg ? -1 : 0, $content_type);
475 sub extract_video
477 my ($rc, $content_type) = query_video_length();
479 return
480 if $rc != 0 or !defined $content_type;
482 my $fn = $opts{outputfname}
483 || title_to_filename($entry{page_title});
484 my $path = File::Spec->catfile($opts{savedir} || $workdir, $fn);
485 my $filemode = ">";
486 my $remaining = $entry{file_length};
487 my $size = -s $path || 0;
488 my $cont_from = 0;
490 save_entry($hash);
492 if ($size > 0 and !$opts{overwrite})
494 if ($size == $entry{file_length} and $opts{extract})
496 print STDERR
497 "error: file is already fully retrieved; nothing to do\n";
499 push @exec_files, $path
500 if $opts{exec};
502 return
503 unless $opts{emitcsv} or $opts{emitxml};
506 elsif ($size < $entry{file_length} and $opts{continue})
508 $cont_from = $size;
509 $filemode = ">>";
510 $remaining = ($entry{file_length} - $cont_from);
512 else
514 ($path, $fn) =
515 newname_if_exists($opts{savedir} || $workdir, $fn);
519 if ($opts{emitcsv} or $opts{emitxml})
521 $entry{fn} = $fn;
522 $entry{remaining} = $remaining;
523 $entry{cont_from} = $cont_from;
524 push @emit_queue, {%entry};
525 return;
528 unless ($opts{quiet})
530 print "file: $fn";
532 if ($cont_from)
534 printf("\nfrom: $cont_from (%.1fMB) "
535 . "remaining: $remaining (%.1fMB)",
536 $cont_from / MBDIV, $remaining / MBDIV);
539 print "\n";
542 my $errmsg;
543 if ($rc == 0)
545 return
546 unless $opts{extract};
548 if (open my $fh, "$filemode$path")
550 $curr_fpath = $path;
552 # Disable: encoding, header
553 $curl->setopt(CURLOPT_HEADER, 0);
554 $curl->setopt(CURLOPT_ENCODING, "identity");
555 $curl->setopt(CURLOPT_URL, $entry{xurl});
556 $curl->setopt(CURLOPT_WRITEDATA, $fh);
558 $curl->setopt(CURLOPT_RESUME_FROM, $cont_from)
559 if $cont_from;
561 unless ($opts{quiet})
563 $curl->setopt(CURLOPT_PROGRESSFUNCTION,
564 \&progress_callback);
565 $curl->setopt(CURLOPT_NOPROGRESS, 0);
566 $time_started = time;
568 # Use 'dot' progress if the output is not a TTY
569 if ( $opts{progress} !~ /^dot/
570 and $opts{progress} ne 'none'
571 and !$opts{stderr})
573 $opts{progress} = 'dot'
574 if !-t STDOUT or !-t STDERR;
577 $stream[0] = 0; # reset streaming flag
579 if ($opts{progress} =~ /^bar/)
581 bar_init($cont_from, $entry{file_length});
583 elsif ($opts{progress} =~ /^dot/)
585 dot_init();
589 $rc = $curl->perform;
590 close $fh;
592 if ($rc == 0)
594 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
595 if ($rc == 200 or $rc == 206)
597 if ($opts{progress} =~ /^bar/) { bar_finish() }
598 elsif ($opts{progress} =~ /^dot/) { dot_finish() }
599 waitpid($stream[1], 0) if $stream[0];
601 else
603 $errmsg = $curl->strerror($rc) . " (http/$rc)";
606 else
608 $errmsg = $curl->strerror($rc) . " (http/$rc)";
611 # Reset
612 $curl->setopt(CURLOPT_RESUME_FROM, 0);
613 $curl->setopt(CURLOPT_HEADER, 1);
615 else
617 $errmsg = "$path: $!";
620 else
622 $errmsg = $curl->strerror($rc) . " (http/$rc)";
625 if (!$errmsg)
627 print "\n"
628 unless $opts{quiet};
629 push @exec_files, $path
630 if $opts{exec};
632 else
634 print STDERR "\nerror: $errmsg\n";
637 # Disable: progress
638 $curl->setopt(CURLOPT_NOPROGRESS, 1);
641 sub get_queue
643 if ($opts{recall} and -e $RECALLFILE)
645 if (open my $fh, "<$RECALLFILE")
647 parse_input($_) while (<$fh>);
648 close $fh;
650 else
652 print STDERR "error: $RECALLFILE: $!";
656 if ($opts{paste})
658 print STDERR "error: Clipboard module not found\n" and exit
659 unless $opted_mods{Clipboard};
660 my $data = Clipboard->paste();
661 if ($data)
663 parse_input($_) foreach split(/\n/, $data);
667 parse_input($_) foreach @ARGV;
668 grep_cache() if $opts{grep};
669 unless (@queue) { parse_input($_) while (<STDIN>); }
671 if (open my $fh, ">$RECALLFILE")
673 print $fh "$_\n" foreach @queue;
674 close $fh;
676 else
678 print STDERR "error: $RECALLFILE: $!";
681 if ($opts{savebatch})
683 if (open my $fh, ">", $opts{savebatch})
685 print $fh "$_\n" foreach @queue;
686 close $fh;
688 else
690 print STDERR "error: $opts{savebatch}: $!";
695 sub parse_input
697 my $url = shift;
699 return if $url =~ /^$/;
700 return if $url =~ /^#/;
702 chomp $url;
704 if ($url =~ /&srcurl=(.*?)&/)
705 { # GVideo: one of many redirects
706 require URI::Escape;
707 printf "found redirect ...%s\n=> %s\n",
708 (split(/&/, $url))[0],
709 (split(/&/, URI::Escape::uri_unescape($1)))[0]
710 unless $opts{quiet};
711 $url = URI::Escape::uri_unescape($1);
714 # Insert http:// if not found
715 $url = "http://$url"
716 if $url !~ m{^http://}i;
718 # Translate embedded URL to video page URL
719 translate_embed(\$url);
721 # Last.fm wraps Youtube videos as their own
722 if ($url =~ /$re_hosts{IsLastfm}/)
724 $url =~ /\+1\-(.+)/;
726 print STDERR "error: nosupport: $url\n" and return -1
727 unless defined($1);
729 $url = "http://youtube.com/watch?v=$1";
732 # Remove params from the URL
733 ($url) = split(/&/, $url);
735 foreach my $re (%re_hosts)
737 push @queue, $url and return 0
738 if $url =~ /$re/;
741 print STDERR "error: nosupport: $url\n";
743 return -1;
746 # Subroutines: Video page handlers
748 sub handle_youtube
750 my ($response_ref, $xurl) = @_;
752 my %re = (
753 GrabID => qr/"video_id": "(.*?)"/,
754 GrabT => qr/"t": "(.*?)"/
757 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
758 my $t = $1 if $$response_ref =~ /$re{GrabT}/;
760 if ($id and $t)
762 $xurl = "http://youtube.com/get_video?video_id=$id&t=$t";
764 my $fmt;
765 if ($opts{format} eq "mp4") { $fmt = 18; }
766 elsif ($opts{format} eq "3gpp") { $fmt = 17; }
767 elsif ($opts{format} eq "xflv") { $fmt = 6; }
769 $xurl .= "&fmt=$fmt"
770 if $fmt;
772 else
774 printf STDERR "\nerror: failed to extract &%s\n", $id
775 ? "t"
776 : "video_id";
778 return ($xurl, $id);
781 sub handle_google
783 my ($response_ref) = @_;
785 my %re = (
787 GrabVideoURL => qr|videoUrl\\x3d(.*?)\\x26|,
788 GrabID => qr|docid:'(.*?)'|,
789 GrabMP4 => qr|href="http://vp\.(.*?)"|,
792 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
794 my $xurl = URI::Escape::uri_unescape($1)
795 if $$response_ref =~ /$re{GrabVideoURL}/;
797 my $mp4 = $1 if $$response_ref =~ /$re{GrabMP4}/;
799 my $errmsg;
800 $errmsg = "video id not found" if !$id;
801 $errmsg = "extraction url not found" if !$xurl && !$errmsg;
803 print STDERR "\nerror: $errmsg\n" if $errmsg;
805 $xurl = "http://vp.$mp4"
806 if $mp4 && $opts{format} eq "mp4" && $xurl;
808 return ($xurl, $id);
811 sub handle_sevenload
813 my ($response_ref, $response_fh) = @_;
815 my %re = (GrabConfigPath => qr|configPath=(.*?)"|);
817 my $conf_path = URI::Escape::uri_unescape($1)
818 if $$response_ref =~ /$re{GrabConfigPath}/;
820 my ($xurl, $id, $errmsg);
821 if ($conf_path)
823 ($xurl, $id) =
824 fetch_sevenload_configxml($conf_path, $response_fh);
826 else
828 $errmsg = "configPath not found";
830 $errmsg = "item id not found" if !$errmsg && !$id;
831 $errmsg = "extraction url not found" if !$errmsg && !$xurl;
832 print STDERR "\nerror: $errmsg\n" if $errmsg;
833 return ($xurl, $id);
836 sub handle_break
838 my ($response_ref) = @_;
840 my %re = (
841 GrabTitle => qr|id="vid_title" content="(.*?)"|,
842 GrabID => qr|ContentID='(.*?)'|,
843 GrabFilePath => qr|ContentFilePath='(.*?)'|,
844 GrabFileName => qr|FileName='(.*?)'|
847 my $title = $1 if $$response_ref =~ /$re{GrabTitle}/;
848 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
849 my $fpath = $1 if $$response_ref =~ /$re{GrabFilePath}/;
850 my $fname = $1 if $$response_ref =~ /$re{GrabFileName}/;
852 my ($xurl, $errmsg);
853 if ($fpath and $fname)
855 $xurl = "http://media1.break.com/dnet/media/$fpath/$fname.flv";
857 else
859 $errmsg = "failed to extract ContentFilePath"
860 if !$fpath;
862 $errmsg = "failed to extract FileName"
863 if !$fname and !$errmsg;
866 $errmsg = "failed to extract title"
867 if !$title and !$errmsg;
869 $errmsg = "failed to extract id"
870 if !$id and !$errmsg;
872 print STDERR "\nerror: $errmsg\n"
873 if $errmsg;
875 return ($xurl, $id, $title);
878 sub handle_liveleak
880 my ($response_ref, $response_fh) = @_;
882 my %re = (
883 GrabID => qr|token=(.*?)&|,
884 GrabConfigURL => qr|'config','(.*?)'|,
887 my $id = $1
888 if $$response_ref =~ /$re{GrabID}/;
890 my $conf_url = URI::Escape::uri_unescape($1)
891 if $$response_ref =~ /$re{GrabConfigURL}/;
893 my ($xurl, $errmsg);
894 if ($conf_url)
896 $xurl = fetch_liveleak_config($conf_url);
898 # Re-enable: header, reset WRITEDATA, the above overrides the
899 # original settings.
900 $curl->setopt(CURLOPT_HEADER, 0);
901 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
903 else
905 $errmsg = "config url not found";
908 $errmsg = "id not found" if !$id && !$errmsg;
909 print "error: $errmsg\n" if $errmsg;
911 return ($xurl, $id);
914 sub handle_evisor
916 my ($respr) = @_;
918 my %re = (
919 GrabXurl => qr|file=(.*?)"|,
920 GrabID => qr|.+/(.*?).flv|,
923 my ($xurl, $id, $errmsg);
925 $xurl = $1
926 if $$respr =~ /$re{GrabXurl}/;
928 $id = $1
929 if $xurl and $xurl =~ /$re{GrabID}/;
931 $errmsg = "video extraction url not found"
932 unless $xurl;
934 $errmsg = "video id not found"
935 unless $id and !$errmsg;
937 print STDERR "error: $errmsg\n"
938 if $errmsg;
940 return ($xurl, $id);
943 sub handle_dmotion
945 my ($resp) = @_;
947 my %re = (
948 GrabID => qr|swf%2F(.*?)"|,
949 GrabPaths => qr|"video", "(.*?)"|
952 my ($id, @paths);
953 $id = $1 if $$resp =~ /$re{GrabID}/;
954 my $paths = URI::Escape::uri_unescape($1)
955 if $$resp =~ /$re{GrabPaths}/;
957 use constant ADDR => "http://dailymotion.com";
959 my $xurl;
960 if ($id && $paths)
962 foreach (split(/\|\|/, $paths))
964 my ($path, $type) = split(/@@/, $_);
965 if ($type eq "spark")
966 { # same as regular flv
967 $xurl = ADDR . $path;
969 if ($type eq $opts{format})
971 $xurl = ADDR . $path;
972 last;
977 my $errmsg;
978 $errmsg = "id not found" if !$id;
979 $errmsg = "paths not found" if !$paths && !$errmsg;
980 $errmsg = "failed to construct xurl" if !$xurl && !$errmsg;
982 print STDERR "\nerror: $errmsg\n"
983 if $errmsg;
985 return ($xurl, $id);
988 sub handle_cctv
990 my ($resp, $resp_fh, $page_url) = @_;
991 my $re = qr|videoId=(.*?)&|;
993 my ($id, $xurl);
994 $id = $1 if $$resp =~ /$re/;
996 if ($id)
998 my $domain = join('.', strdomain($page_url));
999 my $conf_url =
1000 "http://$domain/playcfg/flv_info_new.jsp?videoId=$id";
1001 $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
1003 else
1005 print STDERR "\nerror: id not found\n";
1008 return ($xurl, $id);
1011 # Subroutines: Progress
1012 # NOTE: the 'dot' progress copies much from wget.
1014 sub progress_callback
1016 my $percent = 0;
1018 if ($opts{progress} =~ /^dot/) { $percent = dot_update(@_); }
1019 elsif ($opts{progress} =~ /^bar/) { $percent = bar_update(@_); }
1021 if ( $opts{stream}
1022 && $opts{streamexec}
1023 && !$stream[0])
1025 fork_streamer() if $percent >= $opts{stream};
1027 return 0;
1030 sub dot_init
1032 $dp{dots} = 0;
1033 $dp{rows} = 0;
1034 $dp{dlthen} = 0;
1035 $dp{accum} = 0;
1037 # Default style
1038 $dp{dot_bytes} = 1024;
1039 $dp{dot_spacing} = 10;
1040 $dp{dots_in_line} = 50;
1042 my ($type, $style) = split(/:/, $opts{progress});
1044 if ($style)
1046 if ($style eq 'binary')
1048 $dp{dot_bytes} = 8192;
1049 $dp{dot_spacing} = 16;
1050 $dp{dots_in_line} = 48;
1052 elsif ($style eq 'mega')
1054 $dp{dot_bytes} = 65536;
1055 $dp{dot_spacing} = 8;
1056 $dp{dots_in_line} = 48;
1061 sub dot_update
1063 my ($clientp, $dltotal, $dlnow, $ultotal, $ulnow) = @_;
1065 my ($percent, $elapsed, $rate, $eta) =
1066 calc_progress($dlnow, $dltotal);
1068 return 0
1069 if $elapsed < 1.0;
1071 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1073 $dp{accum} += $dlnow - $dp{dlthen};
1074 $dp{dlthen} = $dlnow;
1076 for (; $dp{accum} >= $dp{dot_bytes} ; $dp{accum} -= $dp{dot_bytes})
1079 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1080 if $dp{dots} == 0;
1082 print " "
1083 if $dp{dots} % $dp{dot_spacing} == 0;
1085 ++$dp{dots};
1086 print ".";
1088 if ($dp{dots} >= $dp{dots_in_line})
1090 ++$dp{rows};
1091 $dp{dots} = 0;
1093 dot_print_row_stats($percent, $elapsed, $eta, $rate, 0);
1096 return $percent;
1099 sub dot_finish
1101 return if $opts{quiet};
1103 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1105 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1106 if $dp{dots} == 0;
1108 for (my $i = $dp{dots} ; $i < $dp{dots_in_line} ; $i++)
1110 print " "
1111 if $i % $dp{dot_spacing} == 0;
1113 print " ";
1116 my $elapsed = time - $time_started;
1117 my $eta = time2str($elapsed, 1);
1118 my $rate = $entry{file_length} / $elapsed;
1120 dot_print_row_stats(100, $elapsed, $eta, $rate, 1);
1123 sub dot_print_row_stats
1125 my ($percent, $elapsed, $eta, $rate, $last) = @_;
1126 my ($unit, $_rate) = get_units($rate);
1128 printf "%3d%% %4.1f%s", $percent, $_rate, $unit;
1129 printf "%s%s", $last ? "=" : " ", $eta;
1132 use constant DEFAULT_TERM_WIDTH => 80;
1134 sub get_term_width
1136 return DEFAULT_TERM_WIDTH
1137 unless $opted_mods{ReadKey};
1138 my ($width) = GetTerminalSize();
1139 return $width;
1142 sub bar_init
1144 my ($initial, $total) = @_;
1146 $total = $initial
1147 if $initial > $total;
1149 $term_width = get_term_width();
1151 $bp{initial} = $initial; # bytes dl previously
1152 $bp{total} = $total; # expected bytes
1153 $bp{width} = DEFAULT_TERM_WIDTH - 1;
1154 $bp{started} = time;
1155 $bp{lastupd} = 0;
1156 $bp{done} = 0;
1159 use constant REFRESH_INTERVAL => 0.2;
1161 sub bar_update
1163 my ($clientp, $total, $now, $ultotal, $ulnow) = @_;
1165 my $force_update = 0;
1166 if ($recv_sigwinch)
1168 my $old_width = $term_width;
1169 $term_width = get_term_width();
1170 if ($term_width != $old_width)
1172 $bp{width} = $term_width - 1;
1173 $force_update = 1;
1175 $recv_sigwinch = 0;
1178 my $tnow = time;
1179 my $elapsed = $tnow - $bp{started};
1181 if (!$bp{done})
1183 return 0
1184 if (($elapsed - $bp{lastupd}) < REFRESH_INTERVAL
1185 && !$force_update);
1187 else
1189 $now = $bp{total};
1192 $bp{lastupd} = $elapsed;
1193 my $size = $bp{initial} + $now;
1195 my $fname_len = 32;
1196 if ($bp{width} > DEFAULT_TERM_WIDTH)
1198 $fname_len += $bp{width} - DEFAULT_TERM_WIDTH;
1201 my $buffer =
1202 substr(File::Basename::basename($curr_fpath), 0, $fname_len);
1204 my $percent = 0;
1205 if ($bp{total} > 0)
1207 my $_size = !$bp{done} ? $size : $now;
1208 $percent = 100.0 * $size / $bp{total};
1209 if ($percent < 100)
1211 $buffer .= sprintf(" %2d%% ", $percent);
1213 else
1215 $buffer .= sprintf(" 100%%");
1217 $buffer .= sprintf(" %4.1fM / %4.1fM",
1218 $_size / MBDIV, $bp{total} / MBDIV);
1221 my $rate = $elapsed ? ($now / $elapsed) : 0;
1222 my $tmp = "";
1223 if ($rate > 0)
1225 my $eta;
1226 if (!$bp{done})
1228 my $left = ($total - $now) / $rate;
1229 $eta = time2str($left);
1231 else
1233 $eta = time2str($elapsed);
1235 my ($unit, $_rate) = get_units($rate);
1236 $tmp = sprintf(" %4.1f%s %6s", $_rate, $unit, $eta);
1238 else
1240 $tmp = " --.-K/s --:--";
1243 # pad to max. width leaving enough space for rate+eta
1244 my $pad = $bp{width} - length($tmp) - length($buffer);
1245 $buffer .= sprintf("%${pad}s", " ");
1246 $buffer .= $tmp; # append rate+eta
1248 printf("\r%s", $buffer);
1249 $bp{count} = $now;
1251 return $percent;
1254 sub bar_finish
1256 return if $opts{quiet};
1258 if ( $bp{total} > 0
1259 && $bp{count} + $bp{initial} > $bp{total})
1261 $bp{total} = $bp{initial} + $bp{count};
1264 $bp{done} = 1;
1265 bar_update(-1, -1, -1, -1, -1);
1268 sub calc_progress
1270 my ($dlnow, $dltotal, $elapsed) = @_;
1272 my $percent = 0;
1274 $percent = int($dlnow / $dltotal * 100)
1275 if $dltotal;
1277 $elapsed = time - $time_started
1278 unless $elapsed;
1280 my $eta = '--:--';
1281 my $rate = 0;
1283 $rate = $dlnow / $elapsed
1284 if $elapsed;
1286 if ($rate > 0)
1288 my $left = ($dltotal - $dlnow) / $rate;
1289 $eta = time2str($left);
1292 return ($percent, $elapsed, $rate, $eta);
1295 sub time2str
1297 my ($secs) = @_;
1299 my $str;
1300 if ($secs < 100)
1302 $str = sprintf("%ds", $secs);
1304 elsif ($secs < 100 * 60)
1306 $str = sprintf("%dm%ds", $secs / 60, $secs % 60);
1308 elsif ($secs < 48 * 3600)
1310 $str = sprintf("%dh%dm", $secs / 3600, ($secs / 60) % 60);
1312 elsif ($secs < 100 * 86400)
1314 $str = sprintf("%dd%dh", $secs / 86400, ($secs / 3600) % 60);
1316 else
1318 $str = sprintf("%dd", $secs / 86400);
1320 return $str;
1323 sub get_units
1325 my ($rate) = @_;
1326 my @units = qw|K/s M/s G/s|;
1328 my $i = 0;
1329 if ($rate < 1024 * 1024)
1331 $rate /= 1024;
1333 elsif ($rate < 1024 * 1024)
1335 $rate /= 1024 * 1024;
1336 $i = 1;
1338 elsif ($rate < 1024 * 1024 * 1024)
1340 $rate /= 1024 * 1024 * 1024;
1341 $i = 2;
1343 return ($units[$i], $rate);
1346 # Subroutines: LittleHelpers
1348 sub main
1350 $SIG{WINCH} = \&handle_sigwinch;
1351 init_cache();
1353 if ($opts{clear}) { clear_cache(); }
1354 elsif ($opts{show}) { show_cache(); }
1356 verify_exec();
1358 grab_clivepass();
1359 get_queue();
1361 select STDERR;
1362 $| = 1; # => unbuffered
1363 select STDOUT;
1364 $| = 1;
1366 if ($opts{background})
1368 daemonize();
1370 else
1372 if ($opts{stderr})
1375 # redirect stdout to stderr
1376 open STDOUT, ">&STDERR"
1377 or die "error: cannot dup STDOUT: $!";
1381 process_queue();
1382 free_cache();
1385 sub grab_clivepass
1388 # TODO: Supports only Youtube. Expand to support other websites as needed.
1389 return
1390 unless $opts{login}
1391 and $opts{ytuser}
1392 and $opts{ytpass} eq "-";
1394 print STDERR "error: no path to clivepass, use --clivepass\n"
1395 and exit
1396 unless $opts{clivepass};
1398 print STDERR "error: Expect module not found\n" and exit
1399 unless $opted_mods{Expect};
1401 my $phrase;
1402 $phrase = getpass("Enter passphrase for clivepass: ")
1403 while (!$phrase);
1405 my $e = Expect->new;
1406 $e->log_stdout(0);
1407 $e->spawn($opts{clivepass}, "-g", $opts{ytuser})
1408 or print STDERR "error: could not spawn: $!\n" and exit;
1410 my ($spawned, $pwd);
1411 $e->expect(
1414 qr'Enter passphrase: $',
1415 sub {
1416 my $fh = shift;
1417 $fh->send("$phrase\n");
1418 $spawned = 1;
1419 exp_continue;
1423 eof => sub {
1424 if ($spawned)
1426 my $fh = shift;
1427 $pwd = $fh->before();
1428 if ($pwd =~ /error: (.*?)$/)
1430 print STDERR "clivepass: error: $1\n";
1431 exit;
1433 else
1435 $pwd = $1
1436 if ($pwd =~ /login: $opts{ytuser}=(.*?)$/);
1439 else
1441 print STDERR
1442 "error: could not spawn $opts{clivepass}\n";
1443 exit;
1448 timeout => sub {
1449 print STDERR "error: clivepass: expect timed out\n";
1450 exit;
1455 $opts{ytpass} = $pwd;
1458 sub getpass
1460 system "stty -echo";
1461 print shift;
1462 chomp(my $pwd = <STDIN>);
1463 print "\n";
1464 system "stty echo";
1465 return $pwd;
1468 sub daemonize
1470 $logfile =
1471 $opts{append}
1472 || $opts{output}
1473 || File::Spec->catfile($workdir, "clive-log");
1475 my $pid = fork;
1476 if ($pid < 0)
1478 print STDERR "\nerror: fork failed: $!";
1479 exit 1;
1481 elsif ($pid != 0)
1483 print "continuing in background, pid $pid.\n";
1484 print "output will be written to $logfile.\n"
1485 unless $opts{quiet};
1486 exit 0;
1489 chdir $workdir;
1491 my $mode = $opts{append} ? ">>" : ">";
1492 $logfile = "/dev/null" if $opts{quiet};
1494 open STDOUT, "$mode", "$logfile"
1495 or die "error: cannot redirect STDOUT: $!";
1497 open STDERR, ">&STDOUT"
1498 or die "error: cannot dup STDOUT: $!";
1501 sub fork_streamer
1503 $stream[0] = 1; # set flag
1504 my $child = fork;
1506 if ($child < 0)
1508 print STDERR "error: fork failed: $!\n";
1510 elsif ($child == 0)
1512 my $cmd = $opts{streamexec};
1513 $cmd =~ s/%i/"$curr_fpath"/g;
1514 system("$cmd");
1515 exit(0);
1518 $stream[1] = $child;
1521 sub fetch_liveleak_playlist
1523 my $playlist_url = shift;
1525 print "done.\nfetch playlist xspf ..."
1526 unless $opts{quiet};
1528 my $playlist = "";
1529 open my $fh, ">", \$playlist;
1531 $curl->setopt(CURLOPT_URL, $playlist_url);
1532 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1534 my $rc = $curl->perform;
1535 close $fh;
1537 my ($xurl, $errmsg);
1538 if ($rc == 0)
1541 # NOTE: XML::XSPF exists in CPAN but this should work just as well.
1542 # Parsing with XML::Simple results in errors due unescaped values.
1543 $playlist =~ tr{\n}//d;
1544 $xurl = $1
1545 if $playlist =~ /<location>(.*?)<\/location>/;
1547 else
1549 $errmsg = $curl->strerror($rc) . " (http/$rc)";
1552 $errmsg = "location tag not found" if !$xurl && !$errmsg;
1553 print STDERR "\nerror: $errmsg\n" if $errmsg;
1555 return $xurl;
1558 sub fetch_liveleak_config
1560 my $config_url = shift;
1562 print "done.\nfetch config xml ..."
1563 unless $opts{quiet};
1565 my $config = "";
1566 open my $fh, ">", \$config;
1568 # Disable: header
1569 $curl->setopt(CURLOPT_HEADER, 0);
1570 $curl->setopt(CURLOPT_URL, $config_url);
1571 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1573 my $rc = $curl->perform;
1574 close $fh;
1576 my ($xurl, $errmsg);
1577 if ($rc == 0)
1579 if ($config =~ /<file>(.*?)<\/file>/)
1581 $xurl = fetch_liveleak_playlist($1);
1583 else
1585 $errmsg = "playlist url not found";
1588 else
1590 $errmsg = $curl->strerror($rc) . " (http/$rc)\n";
1593 print STDERR "\nerror: $errmsg\n" if $errmsg;
1595 return $xurl;
1598 sub fetch_sevenload_configxml
1600 my ($conf_url, $response_fh) = @_;
1602 print "done.\nfetch config xml..."
1603 unless $opts{quiet};
1605 my $conf_xml = "";
1606 open my $conf_fh, ">", \$conf_xml;
1608 # Disable: header
1609 $curl->setopt(CURLOPT_HEADER, 0);
1610 $curl->setopt(CURLOPT_URL, $conf_url);
1611 $curl->setopt(CURLOPT_WRITEDATA, $conf_fh);
1613 my $rc = $curl->perform;
1614 close $conf_fh;
1616 # Re-enable: header
1617 $curl->setopt(CURLOPT_HEADER, 1);
1618 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
1620 my ($xurl, $id);
1621 if ($rc == 0)
1623 my %re = (
1624 GrabXurl => qr|<location seeking="yes">(.*?)</location>|,
1625 GrabID => qr|item id="(.*?)"|,
1627 $id = $1
1628 if $conf_xml =~ /$re{GrabID}/;
1629 $xurl = $1
1630 if $conf_xml =~ /$re{GrabXurl}/;
1632 else
1634 print STDERR "\nerror: "
1635 . $curl->strerror($rc)
1636 . " (http/$rc)\n";
1638 return ($xurl, $id);
1641 sub fetch_cctv_space_config
1643 my ($conf_url, $resp_fh) = @_;
1645 print "done.\nfetch config file ..."
1646 unless $opts{quiet};
1648 my $conf = "";
1649 open my $fh, ">", \$conf;
1651 # Disable: header
1652 $curl->setopt(CURLOPT_HEADER, 0);
1653 $curl->setopt(CURLOPT_URL, $conf_url);
1654 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1656 my $rc = $curl->perform;
1657 close $fh;
1659 my ($xurl, $errmsg);
1660 if ($rc == 0)
1662 my $re = qr|"url":"(.*?)"|;
1663 if ($conf =~ /$re/)
1665 $xurl = "http://v.cctv.com/flash/$1";
1667 else
1669 $errmsg = "extraction url not found";
1672 else
1674 $errmsg = $curl->strerror($rc) . " http/$rc\n";
1677 print STDERR "\nerror: $errmsg\n" if $errmsg;
1679 # Re-enable: header, reset WRITEDATA, the above overrides the
1680 # original settings.
1681 $curl->setopt(CURLOPT_HEADER, 0);
1682 $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
1684 return $xurl;
1687 sub strdomain
1689 my $uri = shift;
1691 my ($scheme, $authority, $path, $query, $fragment) = $uri =~
1692 m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
1694 # Extract the domain from the URL.
1695 my @a = split(/\./, $authority);
1697 return @a;
1700 sub title_to_filename
1702 my $title = shift;
1704 $title =~
1705 s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
1706 $title =~ s/^\s+//;
1707 $title =~ s/\s+$//;
1709 my $r = $opts{cclass} || qr|\w|;
1710 $title = join('', $title =~ /$r/g);
1712 my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
1713 my $timestamp = POSIX::strftime("%F %T", localtime);
1715 my @a = strdomain($entry{page_url});
1717 my %h = (
1718 "%t" => $title,
1719 "%s" => $entry{file_suffix},
1720 "%d" => $a[scalar @a - 2], # Without the TLD.
1721 "%i" => $entry{video_id},
1722 "%D" => (split(/ /, $timestamp))[0],
1723 "%T" => (split(/ /, $timestamp))[1],
1724 "%S" => $timestamp,
1727 my $m = join('|', keys %h);
1728 $fn =~ s/($m)/$h{$1}/ig;
1730 return $fn;
1733 sub newname_if_exists
1735 my ($path, $orig, $new) = (shift, shift);
1737 for (my $i = 1 ; ; $i++)
1739 $new = File::Spec->catfile($path, "$orig.$i");
1740 last if !-e $new;
1743 my ($vol, $dir, $fn) = File::Spec->splitpath($new);
1744 return ($new, $fn);
1747 sub format_show
1749 my $s = shift;
1750 my %e = map_entry(shift);
1752 my $t =
1753 $opted_mods{IOPager}
1754 ? $e{page_title}
1755 : decode_utf8($e{page_title});
1757 my %h = (
1758 "%t" => $t,
1759 "%i" => $e{video_id},
1760 "%l" => $e{file_length},
1761 "%m" => sprintf("%.2f", $e{file_length} / MBDIV),
1762 "%u" => $e{page_url},
1763 "%x" => $e{xurl},
1764 "%D" => (split(/ /, $e{time_stamp}))[0],
1765 "%T" => (split(/ /, $e{time_stamp}))[1],
1766 "%S" => $e{time_stamp},
1769 my $m = join('|', keys %h);
1770 $s =~ s/($m)/$h{$1}/ig;
1772 return $s;
1775 sub init_cache
1777 require File::Path;
1778 File::Path::mkpath([$CONFIGDIR], 0, 0700);
1779 require BerkeleyDB;
1780 $cache_db = tie %cache, "BerkeleyDB::Hash",
1781 -Filename => $CACHEFILE,
1782 -Flags => BerkeleyDB->DB_CREATE
1783 or die "error: cannot open $CACHEFILE: $!\n";
1786 sub show_cache
1788 IO::Pager->new(*STDOUT)
1789 if $opted_mods{IOPager};
1791 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1792 my @entries = ();
1794 require Digest::SHA;
1796 if ($opts{grep})
1798 grep_cache(); # Stores matches => @queue
1799 push @entries, format_show($fmt, Digest::SHA::sha1_hex($_))
1800 foreach (@queue);
1802 else
1804 push @entries, format_show($fmt, $_) foreach (sort keys %cache);
1807 print STDOUT "$_\n" foreach sort @entries;
1809 close STDOUT
1810 if $opted_mods{IOPager};
1812 if ($opts{grep} and $opts{delete} and scalar @queue > 0)
1814 print "Confirm delete (y/N):";
1815 $_ = lc <STDIN>;
1816 chomp;
1817 if (lc $_ eq "y")
1819 delete $cache{Digest::SHA::sha1_hex($_)} foreach (@queue);
1822 exit;
1825 sub clear_cache
1827 unlink $CACHEFILE if -e $CACHEFILE;
1828 exit;
1831 sub free_cache
1833 undef $cache_db;
1834 untie %cache;
1837 sub map_entry
1839 my $key = shift;
1840 my @values = split(/;/, $cache{$key});
1842 my @keys = qw(
1843 file_suffix file_length file_format page_title
1844 page_url time_stamp video_id xurl
1845 ); # Order matters. See also save_entry.
1847 my $i = 0;
1848 return map { $_ => $values[$i++] } @keys;
1851 sub fetch_entry
1853 %entry = map_entry($hash);
1854 $entry{page_title} = decode_utf8($entry{page_title});
1856 #while (my ($key, $value) = each(%entry)) { print "$key => $value\n"; } die;
1859 sub save_entry
1861 my @values;
1863 $entry{time_stamp} = POSIX::strftime("%F %T", localtime);
1865 push @values, $entry{$_} foreach sort keys %entry;
1867 $cache{$hash} = join(';', @values);
1868 $cache_db->db_sync();
1871 sub grep_cache
1873 my $g =
1874 $opts{case}
1875 ? qr|$opts{grep}|
1876 : qr|$opts{grep}|i;
1878 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1880 foreach (sort keys %cache)
1882 my @e = split(/;/, $cache{$_});
1883 if (grep /$g/, @e)
1885 if ($opts{delete})
1887 if ($opts{show}) { push @queue, $e[4]; }
1888 else { delete $cache{$_}; }
1890 else { push @queue, $e[4]; } # 4=URL
1893 exit
1894 if $opts{delete} and not $opts{show};
1897 sub translate_embed
1899 my ($url) = @_;
1900 $$url =~ s!/v/!/watch?v=!i; # youtube
1901 $$url =~ s!googleplayer.swf!videoplay!i; # googlevideo
1902 $$url =~ s!/pl/!/videos/!i; # sevenload
1903 $$url =~ s!/e/!/view?i=!i; # liveleak
1906 sub verify_exec
1908 return if !$opts{exec};
1909 if ($opts{exec} !~ /[;+]$/)
1911 print "error: --exec expression must be terminated "
1912 . "by either ';' or '+'\n";
1913 exit;
1917 sub exec_cmd
1919 return if !$opts{exec};
1920 if ($opts{exec} =~ /;$/)
1921 { # semi
1922 foreach (@exec_files)
1924 my $cmd = $opts{exec};
1925 $cmd =~ s/%i/"$_"/g;
1926 $cmd =~ tr{;}//d;
1927 system("$cmd");
1930 else
1931 { # plus
1932 my $cmd = sprintf("%s ", $opts{exec});
1933 $cmd =~ s/%i//g;
1934 $cmd =~ tr{+}//d;
1935 $cmd .= sprintf('"%s" ', $_) foreach (@exec_files);
1936 system("$cmd");
1940 sub emit
1942 print "<?xml version=\"1.0\"?>\n<queue>\n"
1943 if $opts{emitxml} and @emit_queue;
1945 require URI::Escape;
1947 foreach (@emit_queue)
1949 if ($opts{emitxml})
1951 print " <video>\n";
1952 while (my ($key, $value) = each(%$_))
1954 $value = URI::Escape::uri_escape($value)
1955 if $key eq 'xurl'
1956 or $key eq 'page_url';
1957 print " <$key>$value</$key>\n";
1959 print " </video>\n";
1961 elsif ($opts{emitcsv})
1963 printf qq/csv:"%s","%s","%s","%.2fMB",/
1964 . qq/"%s","%s","%s","%s","%s","%s"\n/,
1965 $_->{page_url}, $_->{xurl}, $_->{fn},
1966 $_->{file_length} / MBDIV, $_->{file_length},
1967 $_->{video_id}, $_->{time_stamp}, $_->{page_title},
1968 $_->{cont_from}, $_->{remaining};
1971 print "</queue>\n"
1972 if $opts{emitxml} and @emit_queue;
1975 sub print_hosts
1977 print "$re_hosts{$_}\n" foreach (keys %re_hosts);
1978 exit;
1981 sub print_version
1983 my $perl_v = sprintf("--with-perl=%vd-%s", $^V, $^O);
1984 my $str =
1985 sprintf("clive version %s with WWW::Curl version "
1986 . "$WWW::Curl::VERSION [%s].\n"
1987 . "Copyright (c) 2007-2009 Toni Gundogdu "
1988 . "<legatvs\@gmail.com>.\n\n",
1989 VERSION, $^O);
1990 $str .= "$perl_v ";
1991 my $i = 0;
1992 while (my ($key, $value) = each(%opted_mods))
1994 $str .= sprintf("--with-$key=%s ", $value ? "yes" : "no");
1995 $str .= "\n" if (++$i % 2 == 0);
1997 $str .=
1998 "\nclive is licensed under the ISC license which is functionally\n"
1999 . "equivalent to the 2-clause BSD licence.\n"
2000 . "\tReport bugs: <http://code.google.com/p/clive/issues/>\n";
2001 print "$str";
2002 exit;
2005 __END__
2007 =head1 SYNOPSIS
2009 clive [options]... [URL]...
2011 =head1 OPTIONS
2013 -h, --help print help and exit
2014 -v, --version print version and exit
2015 --hosts print supported hosts and exit
2016 -b, --background go to background after startup
2017 -e, --emit-csv emit video details as csv to stdout
2018 -E, --emit-xml emit video details as csv to stdout
2019 -V, --clivepass=PATH path to clivepass
2020 HTTP Options:
2021 -U, --agent=STRING identify as STRING to http server
2022 -y, --proxy=ADDR use ADDR for http proxy
2023 -X, --no-proxy do not use http proxy
2024 Cache Options:
2025 -R, --renew renew cache entry for visited url
2026 -s, --show dump cache entries to stdout
2027 -H, --show-format=STRING format dumped cache entries
2028 -g, --grep=PATTERN grep cache entries for PATTERN
2029 -i, --ignore-case ignore case-differences with --grep
2030 -D, --delete delete matched entries from cache
2031 -C, --clear clear cache of all entries
2032 Logging and Input Options:
2033 -o, --output=LOGFILE log messages to LOGFILE
2034 -a, --append=LOGFILE append to LOGFILE
2035 -d, --debug print libcurl debug messages
2036 -q, --quiet turn off all output
2037 -r, --recall recall last url batch
2038 -T, --savebatch=FILE save url batch to FILE
2039 -p, --paste paste input from clipboard
2040 --stderr redirect all output to stderr even when no tty
2041 Download Options:
2042 -O, --output-video=FNAME write video to file
2043 -n, --no-extract do not extract any videos
2044 -c, --continue continue partially downloaded file
2045 -W, --overwrite overwrite existing video file
2046 -G, --progress=TYPE use progress indicator TYPE
2047 -u, --youtube-user=UNAME youtube username
2048 -t, --youtube-pass=PASSW youtube password
2049 -L, --no-login do not log into youtube
2050 -S, --savedir=DIR save video files to DIR
2051 -f, --format=FORMAT extract video FORMAT
2052 -l, --cclass=CLASS use CLASS to filter titles
2053 -N, --filename-format=STR use STR to construct output filename
2054 -x, --exec=COMMAND execute COMMAND subsequently
2055 --stream-exec=COMMAND stream COMMAND to be executed
2056 --stream=PERCENT execute stream command when transfer reaches %