2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clive, the non-interactive video extraction utility
6 # Copyright (c) 2007-2009 Toni Gundogdu <legatvs@gmail.com>
8 # Permission to use, copy, modify, and distribute this software for any
9 # purpose with or without fee is hereby granted, provided that the above
10 # copyright notice and this permission notice appear in all copies.
12 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 ###########################################################################
24 use constant VERSION
=> "2.1.10";
25 use constant MBDIV
=> 0x100000;
26 use constant SHOWFMT_DEFAULT
=> qq/%D: "%t" | %mMB/;
28 binmode(STDOUT
, ":utf8");
30 # NOTE: Using "require" instead of "use" causes "Can't locate
31 # auto/WWW/Curl/CURLOPT_USE.al in @INC".
32 use WWW
::Curl
::Easy
4.05;
33 use Getopt
::Long
qw(:config bundling);
39 # Non-essential modules: set flags indicating availability
41 (Clipboard
=> 1, Expect
=> 1, IOPager
=> 1, ReadKey
=> 1);
43 $opted_mods{Clipboard
} = 0 if $@
;
45 $opted_mods{IOPager
} = 0 if $@
;
46 sub exp_continue
() { }; # Satisfies: "Bareword "exp_continue" not allowed while"
48 $opted_mods{Expect
} = 0 if $@
;
49 eval "use Term::ReadKey";
50 $opted_mods{ReadKey
} = 0 if $@
;
52 my $CONFIGDIR = $ENV{CLIVE_HOME
}
53 || File
::Spec
->catfile($ENV{HOME
}, ".config/clive");
55 my $CONFIGFILE = File
::Spec
->catfile($CONFIGDIR, "config");
56 my $CACHEFILE = File
::Spec
->catfile($CONFIGDIR, "cache");
57 my $RECALLFILE = File
::Spec
->catfile($CONFIGDIR, "recall");
59 my %opts; # runtime options
60 my @queue; # input URLs
61 my $curl; # curl handle, reused throughout lifespan
62 my $cache_db; # handle to cache BDB
63 my %cache; # handle to cache BDB (tied hash)
64 my $hash; # sha1 hash of the current url used together with %cache
65 my %entry; # multi-purpose hash for caching
66 my $ytube_logged = 0; # youtube: whether logged-in
67 my $time_started; # time file transfer started
68 my @exec_files; # holds fnames for --exec
69 my @emit_queue; # videos to be emitted
70 my $logfile; # path to logfile (--output-file, --append-file)
71 my %dp; # dot progress data
72 my %bp; # bar progress data
73 my $workdir = getcwd
; # startup workdir
74 my @stream = (0, -1); # 0=stream flag, 1=stream pid
75 my $curr_fpath; # current video output filepath
76 my $recv_sigwinch = 0; # whether SIGWINCH was received
77 my $term_width; # current terminal width
79 my %re_hosts = ( # Precompiled regex used to identify the host
80 IsYoutube
=> qr
|youtube
.com
|i
,
81 IsGoogle
=> qr
|video
.google
.|i
,
82 IsSevenload
=> qr
|sevenload
.com
|i
,
83 IsBreak
=> qr
|break.com
|i
,
84 IsLastfm
=> qr
|last.fm
|i
,
85 IsLiveleak
=> qr
|liveleak
.com
|i
,
86 IsEvisor
=> qr
|evisor
.tv
|i
,
87 IsDmotion
=> qr
|dailymotion
.com
|i
,
88 IsCctv
=> qr
|tv
.cctv
.com
|i
,
92 [$re_hosts{IsYoutube
}, \
&handle_youtube
],
93 [$re_hosts{IsGoogle
}, \
&handle_google
],
94 [$re_hosts{IsSevenload
}, \
&handle_sevenload
],
95 [$re_hosts{IsBreak
}, \
&handle_break
],
96 [$re_hosts{IsLastfm
}, \
&handle_lastfm
],
97 [$re_hosts{IsLiveleak
}, \
&handle_liveleak
],
98 [$re_hosts{IsEvisor
}, \
&handle_evisor
],
99 [$re_hosts{IsDmotion
}, \
&handle_dmotion
],
100 [$re_hosts{IsCctv
}, \
&handle_cctv
],
104 my $c = Config
::Tiny
->read($CONFIGFILE);
106 progress
=> $c->{_
}->{progress
},
107 agent
=> $c->{http
}->{agent
},
108 proxy
=> $c->{http
}->{proxy
},
109 maxspeed
=> $c->{http
}->{maxspeed
},
110 minspeed
=> $c->{http
}->{minspeed
},
111 format
=> $c->{output
}->{format
},
112 savedir
=> $c->{output
}->{savedir
},
113 cclass
=> $c->{output
}->{cclass
},
114 fnfmt
=> $c->{output
}->{file
},
115 showfmt
=> $c->{output
}->{show
},
116 ytuser
=> $c->{youtube
}->{user
},
117 ytpass
=> $c->{youtube
}->{pass
},
118 exec => $c->{commands
}->{exec},
119 streamexec
=> $c->{commands
}->{stream
},
120 clivepass
=> $c->{commands
}->{clivepass
},
123 $opts{clivepass
} = $ENV{CLIVEPASS_PATH
} unless $opts{clivepass
};
124 $opts{progress
} = 'bar' unless $opts{progress
};
125 $opts{format
} = $opts{format
} || 'flv';
132 'debug|d', 'help|h', 'overwrite|W', 'savebatch|T=s',
133 'paste|p', 'show|s', 'delete|D', 'clear|C',
134 'continue|c', 'renew|R', 'recall|r', 'format|f=s',
135 'output|o=s', 'append|a=s', 'background|b', 'quiet|q',
136 'grep|g=s', 'agent|U=s', 'proxy|y=s', 'savedir|S=s',
137 'cclass|l=s', 'exec|x=s', 'progress|G=s', 'clivepass|V=s',
138 'stream=i', 'stderr',
139 'hosts' => \
&print_hosts
,
140 'version|v' => \
&print_version
,
142 # Commented out until WWW::Curl is fixed:
143 # 'maxspeed!', 'minspeed!',
144 # Workarounds since $longopt!|$shortopt cannot be used.
145 'no-extract|n' => sub { $opts{extract
} = 0 },
146 'no-login|L' => sub { $opts{login
} = 0 },
147 'no-proxy|X' => sub { $opts{proxy
} = "" },
149 # Workaround for options with dashes. There's likely a better way.
150 'ignore-case|i' => sub { $opts{case
} = 0 },
151 'filename-format|N=s' => sub { $opts{fnfmt
} = $_[1] },
152 'show-format|H=s' => sub { $opts{showfmt
} = $_[1] },
153 'youtube-user|u=s' => sub { $opts{ytuser
} = $_[1] },
154 'youtube-pass|t=s' => sub { $opts{ytpass
} = $_[1] },
155 'emit-csv|e' => sub { $opts{emitcsv
} = 1 },
156 'emit-xml|E' => sub { $opts{emitxml
} = 1 },
157 'stream-exec=s' => sub { $opts{streamexec
} = $_[1] },
158 'output-video|O=s' => sub { $opts{outputfname
} = $_[1] },
164 Pod
::Usage
::pod2usage
(-exitstatus
=> 0, -verbose
=> 1);
169 ## Subroutines: Signal handlers
174 # my $sig_name = shift;
178 ## Subroutines: Connection
182 $curl = WWW
::Curl
::Easy
->new;
184 $curl->setopt(CURLOPT_USERAGENT
, $opts{agent
} || "Mozilla/5.0");
185 $curl->setopt(CURLOPT_FOLLOWLOCATION
, 1);
186 $curl->setopt(CURLOPT_AUTOREFERER
, 1);
187 $curl->setopt(CURLOPT_HEADER
, 1);
188 $curl->setopt(CURLOPT_NOBODY
, 0);
190 $curl->setopt(CURLOPT_VERBOSE
, 1)
193 $curl->setopt(CURLOPT_PROXY
, $opts{proxy
})
194 if defined $opts{proxy
};
196 $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE
, $opts{maxspeed
})
197 if $opts{maxpseed
}; # NOTE: No effect. Bug in WWW::Curl::Easy?
199 $curl->setopt(CURLOPT_LOW_SPEED_LIMIT
, $opts{minspeed
})
200 if $opts{minspeed
}; # Ditto.
205 print "[youtube] attempt to login as $opts{ytuser} ..."
209 open my $fh, ">", \
$response;
212 "http://uk.youtube.com/login?current_form=loginform"
213 . "&username=$opts{ytuser}&password=$opts{ytpass}"
214 . "&action_login=log+in&hl=en-GB";
216 $curl->setopt(CURLOPT_URL
, $login_url);
217 $curl->setopt(CURLOPT_COOKIEFILE
, ""); # Enable cookies from here on
218 $curl->setopt(CURLOPT_ENCODING
, ""); # Supported encodings
219 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
221 my $rc = $curl->perform;
226 $response =~ tr
{\n}//d;
227 $errmsg = "error: login was incorrect"
228 if $response =~ /your log-in was incorrect/i;
229 $errmsg = "error: check your login password"
230 if $response =~ /check your password/i and !$errmsg;
231 $errmsg = "error: too many login failures, try again later"
232 if $response =~ /too many login failures/i and !$errmsg;
236 $errmsg = "error: " . $curl->strerror($rc) . " (http/$rc)";
240 print STDERR
"\n$errmsg\n" and exit
246 $curl->setopt(CURLOPT_COOKIE
,
247 "is_adult=" . uc(Digest
::SHA
::sha1_hex
(rand())));
259 require HTML
::TokeParser
;
261 require File
::Basename
;
266 $hash = Digest
::SHA
::sha1_hex
($_);
269 my ($rc, $rfh, $response) = fetch_page
($_);
271 if ($rc == 0 or $rc == 0xff)
273 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
)
274 unless $rc == 0xff; # read from cache
276 if ($rc == 200 or $rc == 0xff)
278 if (!defined($entry{page_url
}))
280 next if process_page
($_, \
$response, $rfh) == -1;
282 extract_video
() if $entry{xurl
};
286 $errmsg = $curl->strerror($rc) . " (http/$rc)";
291 $errmsg = $curl->strerror($rc) . " (http/$rc)";
295 print STDERR
"\nerror: $errmsg\n"
304 my ($url, $response, $from_cache, $rc) = (shift, "");
305 open my $fh, ">", \
$response;
307 # Youtube: login only if both username and password are defined
308 if ($opts{ytuser
} and $opts{ytpass
} and $opts{login
})
311 if !$ytube_logged and $url =~ /$re_hosts{IsYoutube}/;
317 ; # Make sure cached "format" matches with options
319 if $opts{format
} eq $entry{file_format
};
325 printf "%s $url ...", $from_cache ?
"cache" : "fetch"
328 $rc = 0xff; # flag: read cache entry
333 $curl->setopt(CURLOPT_URL
, $url);
334 $curl->setopt(CURLOPT_ENCODING
, "");
335 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
336 $rc = $curl->perform;
339 return ($rc, $fh, decode_utf8
($response));
344 my ($url, $response_ref, $response_fh) = @_;
346 #$$response_ref =~ tr{\n}//d;
348 my $p = HTML
::TokeParser
->new($response_ref);
349 $p->get_tag("title");
350 my $title = $p->get_trimmed_text;
352 my ($xurl, $id, $_title, $supported);
354 foreach (@re_hosts_arr)
356 my ($re, $handler) = @
{$_};
360 ($xurl, $id, $_title) =
361 &$handler($response_ref, $response_fh, $url);
362 $title = $_title || $title;
366 die "error: lookup array missing handler; should never get here\n"
374 $title =~ tr
{;}//d; # Cache values cannot contain ';'
376 $entry{page_url
} = $url;
377 $entry{xurl
} = $xurl;
378 $entry{page_title
} = $title;
379 $entry{video_id
} = $id;
380 $entry{file_format
} = $opts{format
};
385 sub query_video_length
387 my ($content_type, $errmsg);
389 unless ($entry{file_length
})
391 print "done.\nverify video link ..."
394 $curl->setopt(CURLOPT_URL
, $entry{xurl
});
396 # Do not download: GET => HEAD request.
397 $curl->setopt(CURLOPT_NOBODY
, 1);
398 my $rc = $curl->perform;
400 # Reset back: HEAD => GET
401 $curl->setopt(CURLOPT_HTTPGET
, 1);
403 $entry{file_length
} =
404 $curl->getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD
);
406 $content_type = $entry{file_suffix
} =
407 $curl->getinfo(CURLINFO_CONTENT_TYPE
);
409 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
);
414 if ($content_type =~ m{video/(.*)})
416 $entry{file_suffix
} = $1;
417 if ($content_type =~ /(.*)-(.*)$/)
419 $entry{file_suffix
} = $2;
424 # Evisor and Metacafe return "text/plain" for Content-Type
425 elsif ($content_type =~ m{text/plain})
427 if ($opts{format
} eq "flv")
429 if ( $entry{page_url
} =~ /$re_hosts{IsEvisor}/
430 or $entry{page_url
} =~ /$re_hosts{IsCctv}/)
432 $entry{file_suffix
} = "flv";
438 # Liveleak and Break return "(flv-)application/octet-stream"
439 elsif ($content_type =~ m{application/octet-stream})
441 if ($opts{format
} eq "flv")
443 if ( $entry{page_url
} =~ /$re_hosts{IsBreak}/
444 or $entry{page_url
} =~ /$re_hosts{IsLiveleak}/)
446 $entry{file_suffix
} = 'flv';
452 "expected different content-type, "
453 . "received \"$content_type\""
458 $errmsg = "server returned http/$rc";
462 { # Construct content-type from cache
463 $content_type = "video/$entry{file_suffix}";
466 unless ($opts{quiet
})
468 if (!$errmsg) { print "done.\n"; }
469 else { print STDERR
"\nerror: $errmsg\n"; }
472 return ($errmsg ?
-1 : 0, $content_type);
477 my ($rc, $content_type) = query_video_length
();
480 if $rc != 0 or !defined $content_type;
482 my $fn = $opts{outputfname
}
483 || title_to_filename
($entry{page_title
});
484 my $path = File
::Spec
->catfile($opts{savedir
} || $workdir, $fn);
486 my $remaining = $entry{file_length
};
487 my $size = -s
$path || 0;
492 if ($size > 0 and !$opts{overwrite
})
494 if ($size == $entry{file_length
} and $opts{extract
})
497 "error: file is already fully retrieved; nothing to do\n";
499 push @exec_files, $path
503 unless $opts{emitcsv
} or $opts{emitxml
};
506 elsif ($size < $entry{file_length
} and $opts{continue})
510 $remaining = ($entry{file_length
} - $cont_from);
515 newname_if_exists
($opts{savedir
} || $workdir, $fn);
519 if ($opts{emitcsv
} or $opts{emitxml
})
522 $entry{remaining
} = $remaining;
523 $entry{cont_from
} = $cont_from;
524 push @emit_queue, {%entry};
528 unless ($opts{quiet
})
534 printf("\nfrom: $cont_from (%.1fMB) "
535 . "remaining: $remaining (%.1fMB)",
536 $cont_from / MBDIV, $remaining / MBDIV
);
546 unless $opts{extract
};
548 if (open my $fh, "$filemode$path")
552 # Disable: encoding, header
553 $curl->setopt(CURLOPT_HEADER
, 0);
554 $curl->setopt(CURLOPT_ENCODING
, "identity");
555 $curl->setopt(CURLOPT_URL
, $entry{xurl
});
556 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
558 $curl->setopt(CURLOPT_RESUME_FROM
, $cont_from)
561 unless ($opts{quiet
})
563 $curl->setopt(CURLOPT_PROGRESSFUNCTION
,
564 \
&progress_callback
);
565 $curl->setopt(CURLOPT_NOPROGRESS
, 0);
566 $time_started = time;
568 # Use 'dot' progress if the output is not a TTY
569 if ( $opts{progress
} !~ /^dot/
570 and $opts{progress
} ne 'none'
573 $opts{progress
} = 'dot'
574 if !-t STDOUT
or !-t STDERR
;
577 $stream[0] = 0; # reset streaming flag
579 if ($opts{progress
} =~ /^bar/)
581 bar_init
($cont_from, $entry{file_length
});
583 elsif ($opts{progress
} =~ /^dot/)
589 $rc = $curl->perform;
594 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
);
595 if ($rc == 200 or $rc == 206)
597 if ($opts{progress
} =~ /^bar/) { bar_finish
() }
598 elsif ($opts{progress
} =~ /^dot/) { dot_finish
() }
599 waitpid($stream[1], 0) if $stream[0];
603 $errmsg = $curl->strerror($rc) . " (http/$rc)";
608 $errmsg = $curl->strerror($rc) . " (http/$rc)";
612 $curl->setopt(CURLOPT_RESUME_FROM
, 0);
613 $curl->setopt(CURLOPT_HEADER
, 1);
617 $errmsg = "$path: $!";
622 $errmsg = $curl->strerror($rc) . " (http/$rc)";
629 push @exec_files, $path
634 print STDERR
"\nerror: $errmsg\n";
638 $curl->setopt(CURLOPT_NOPROGRESS
, 1);
643 if ($opts{recall
} and -e
$RECALLFILE)
645 if (open my $fh, "<$RECALLFILE")
647 parse_input
($_) while (<$fh>);
652 print STDERR
"error: $RECALLFILE: $!";
658 print STDERR
"error: Clipboard module not found\n" and exit
659 unless $opted_mods{Clipboard
};
660 my $data = Clipboard
->paste();
663 parse_input
($_) foreach split(/\n/, $data);
667 parse_input
($_) foreach @ARGV;
668 grep_cache
() if $opts{grep};
669 unless (@queue) { parse_input
($_) while (<STDIN
>); }
671 if (open my $fh, ">$RECALLFILE")
673 print $fh "$_\n" foreach @queue;
678 print STDERR
"error: $RECALLFILE: $!";
681 if ($opts{savebatch
})
683 if (open my $fh, ">", $opts{savebatch
})
685 print $fh "$_\n" foreach @queue;
690 print STDERR
"error: $opts{savebatch}: $!";
699 return if $url =~ /^$/;
700 return if $url =~ /^#/;
704 if ($url =~ /&srcurl=(.*?)&/)
705 { # GVideo: one of many redirects
707 printf "found redirect ...%s\n=> %s\n",
708 (split(/&/, $url))[0],
709 (split(/&/, URI
::Escape
::uri_unescape
($1)))[0]
711 $url = URI
::Escape
::uri_unescape
($1);
714 # Insert http:// if not found
716 if $url !~ m{^http://}i;
718 # Translate embedded URL to video page URL
719 translate_embed
(\
$url);
721 # Last.fm wraps Youtube videos as their own
722 if ($url =~ /$re_hosts{IsLastfm}/)
726 print STDERR
"error: nosupport: $url\n" and return -1
729 $url = "http://youtube.com/watch?v=$1";
732 # Remove params from the URL
733 ($url) = split(/&/, $url);
735 foreach my $re (%re_hosts)
737 push @queue, $url and return 0
741 print STDERR
"error: nosupport: $url\n";
746 # Subroutines: Video page handlers
750 my ($response_ref, $xurl) = @_;
753 GrabID
=> qr/"video_id": "(.*?)"/,
754 GrabT
=> qr/"t": "(.*?)"/
757 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
758 my $t = $1 if $$response_ref =~ /$re{GrabT}/;
762 $xurl = "http://youtube.com/get_video?video_id=$id&t=$t";
765 if ($opts{format
} eq "mp4") { $fmt = 18; }
766 elsif ($opts{format
} eq "3gpp") { $fmt = 17; }
767 elsif ($opts{format
} eq "xflv") { $fmt = 6; }
774 printf STDERR
"\nerror: failed to extract &%s\n", $id
783 my ($response_ref) = @_;
787 GrabVideoURL
=> qr
|videoUrl
\\x3d
(.*?
)\\x26
|,
788 GrabID
=> qr
|docid
:'(.*?)'|,
789 GrabMP4
=> qr
|href
="http://vp\.(.*?)"|,
792 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
794 my $xurl = URI
::Escape
::uri_unescape
($1)
795 if $$response_ref =~ /$re{GrabVideoURL}/;
797 my $mp4 = $1 if $$response_ref =~ /$re{GrabMP4}/;
800 $errmsg = "video id not found" if !$id;
801 $errmsg = "extraction url not found" if !$xurl && !$errmsg;
803 print STDERR
"\nerror: $errmsg\n" if $errmsg;
805 $xurl = "http://vp.$mp4"
806 if $mp4 && $opts{format
} eq "mp4" && $xurl;
813 my ($response_ref, $response_fh) = @_;
815 my %re = (GrabConfigPath
=> qr
|configPath
=(.*?
)"|);
817 my $conf_path = URI::Escape::uri_unescape($1)
818 if $$response_ref =~ /$re{GrabConfigPath}/;
820 my ($xurl, $id, $errmsg);
824 fetch_sevenload_configxml($conf_path, $response_fh);
828 $errmsg = "configPath
not found
";
830 $errmsg = "item id
not found
" if !$errmsg && !$id;
831 $errmsg = "extraction url
not found
" if !$errmsg && !$xurl;
832 print STDERR "\nerror
: $errmsg\n" if $errmsg;
838 my ($response_ref) = @_;
841 GrabTitle => qr|id="vid_title
" content="(.*?
)"|,
842 GrabID => qr|ContentID='(.*?)'|,
843 GrabFilePath => qr|ContentFilePath='(.*?)'|,
844 GrabFileName => qr|FileName='(.*?)'|
847 my $title = $1 if $$response_ref =~ /$re{GrabTitle}/;
848 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
849 my $fpath = $1 if $$response_ref =~ /$re{GrabFilePath}/;
850 my $fname = $1 if $$response_ref =~ /$re{GrabFileName}/;
853 if ($fpath and $fname)
855 $xurl = "http
://media
1.break.com
/dnet
/media
/$fpath/$fname.flv
";
859 $errmsg = "failed to extract ContentFilePath
"
862 $errmsg = "failed to extract FileName
"
863 if !$fname and !$errmsg;
866 $errmsg = "failed to extract title
"
867 if !$title and !$errmsg;
869 $errmsg = "failed to extract id
"
870 if !$id and !$errmsg;
872 print STDERR "\nerror
: $errmsg\n"
875 return ($xurl, $id, $title);
880 my ($response_ref, $response_fh) = @_;
883 GrabID => qr|token=(.*?)&|,
884 GrabConfigURL => qr|'config','(.*?)'|,
888 if $$response_ref =~ /$re{GrabID}/;
890 my $conf_url = URI::Escape::uri_unescape($1)
891 if $$response_ref =~ /$re{GrabConfigURL}/;
896 $xurl = fetch_liveleak_config($conf_url);
898 # Re-enable: header, reset WRITEDATA, the above overrides the
900 $curl->setopt(CURLOPT_HEADER, 0);
901 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
905 $errmsg = "config url
not found
";
908 $errmsg = "id
not found
" if !$id && !$errmsg;
909 print "error
: $errmsg\n" if $errmsg;
919 GrabXurl => qr|file=(.*?)"|,
920 GrabID
=> qr
|.+/(.*?
).flv
|,
923 my ($xurl, $id, $errmsg);
926 if $$respr =~ /$re{GrabXurl}/;
929 if $xurl and $xurl =~ /$re{GrabID}/;
931 $errmsg = "video extraction url not found"
934 $errmsg = "video id not found"
935 unless $id and !$errmsg;
937 print STDERR
"error: $errmsg\n"
948 GrabID
=> qr
|swf
%2F(.*?
)"|,
949 GrabPaths => qr|"video
", "(.*?
)"|
953 $id = $1 if $$resp =~ /$re{GrabID}/;
954 my $paths = URI::Escape::uri_unescape($1)
955 if $$resp =~ /$re{GrabPaths}/;
957 use constant ADDR => "http
://dailymotion
.com
";
962 foreach (split(/\|\|/, $paths))
964 my ($path, $type) = split(/@@/, $_);
965 if ($type eq "spark
")
966 { # same as regular flv
967 $xurl = ADDR . $path;
969 if ($type eq $opts{format})
971 $xurl = ADDR . $path;
978 $errmsg = "id
not found
" if !$id;
979 $errmsg = "paths
not found
" if !$paths && !$errmsg;
980 $errmsg = "failed to construct xurl
" if !$xurl && !$errmsg;
982 print STDERR "\nerror
: $errmsg\n"
990 my ($resp, $resp_fh, $page_url) = @_;
991 my $re = qr|videoId=(.*?)&|;
994 $id = $1 if $$resp =~ /$re/;
998 my $domain = join('.', strdomain($page_url));
1000 "http
://$domain/playcfg/flv_info_new
.jsp?videoId
=$id";
1001 $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
1005 print STDERR "\nerror
: id
not found
\n";
1008 return ($xurl, $id);
1011 # Subroutines: Progress
1012 # NOTE: the 'dot' progress copies much from wget.
1014 sub progress_callback
1018 if ($opts{progress} =~ /^dot/) { $percent = dot_update(@_); }
1019 elsif ($opts{progress} =~ /^bar/) { $percent = bar_update(@_); }
1022 && $opts{streamexec}
1025 fork_streamer() if $percent >= $opts{stream};
1038 $dp{dot_bytes} = 1024;
1039 $dp{dot_spacing} = 10;
1040 $dp{dots_in_line} = 50;
1042 my ($type, $style) = split(/:/, $opts{progress});
1046 if ($style eq 'binary')
1048 $dp{dot_bytes} = 8192;
1049 $dp{dot_spacing} = 16;
1050 $dp{dots_in_line} = 48;
1052 elsif ($style eq 'mega')
1054 $dp{dot_bytes} = 65536;
1055 $dp{dot_spacing} = 8;
1056 $dp{dots_in_line} = 48;
1063 my ($clientp, $dltotal, $dlnow, $ultotal, $ulnow) = @_;
1065 my ($percent, $elapsed, $rate, $eta) =
1066 calc_progress($dlnow, $dltotal);
1071 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1073 $dp{accum} += $dlnow - $dp{dlthen};
1074 $dp{dlthen} = $dlnow;
1076 for (; $dp{accum} >= $dp{dot_bytes} ; $dp{accum} -= $dp{dot_bytes})
1079 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1083 if $dp{dots} % $dp{dot_spacing} == 0;
1088 if ($dp{dots} >= $dp{dots_in_line})
1093 dot_print_row_stats($percent, $elapsed, $eta, $rate, 0);
1101 return if $opts{quiet};
1103 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1105 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1108 for (my $i = $dp{dots} ; $i < $dp{dots_in_line} ; $i++)
1111 if $i % $dp{dot_spacing} == 0;
1116 my $elapsed = time - $time_started;
1117 my $eta = time2str($elapsed, 1);
1118 my $rate = $entry{file_length} / $elapsed;
1120 dot_print_row_stats(100, $elapsed, $eta, $rate, 1);
1123 sub dot_print_row_stats
1125 my ($percent, $elapsed, $eta, $rate, $last) = @_;
1126 my ($unit, $_rate) = get_units($rate);
1128 printf "%3d%% %4.1f
%s", $percent, $_rate, $unit;
1129 printf "%s%s", $last ? "=" : " ", $eta;
1132 use constant DEFAULT_TERM_WIDTH => 80;
1136 return DEFAULT_TERM_WIDTH
1137 unless $opted_mods{ReadKey};
1138 my ($width) = GetTerminalSize();
1144 my ($initial, $total) = @_;
1147 if $initial > $total;
1149 $term_width = get_term_width();
1151 $bp{initial} = $initial; # bytes dl previously
1152 $bp{total} = $total; # expected bytes
1153 $bp{width} = DEFAULT_TERM_WIDTH - 1;
1154 $bp{started} = time;
1159 use constant REFRESH_INTERVAL => 0.2;
1163 my ($clientp, $total, $now, $ultotal, $ulnow) = @_;
1165 my $force_update = 0;
1168 my $old_width = $term_width;
1169 $term_width = get_term_width();
1170 if ($term_width != $old_width)
1172 $bp{width} = $term_width - 1;
1179 my $elapsed = $tnow - $bp{started};
1184 if (($elapsed - $bp{lastupd}) < REFRESH_INTERVAL
1192 $bp{lastupd} = $elapsed;
1193 my $size = $bp{initial} + $now;
1196 if ($bp{width} > DEFAULT_TERM_WIDTH)
1198 $fname_len += $bp{width} - DEFAULT_TERM_WIDTH;
1202 substr(File::Basename::basename($curr_fpath), 0, $fname_len);
1207 my $_size = !$bp{done} ? $size : $now;
1208 $percent = 100.0 * $size / $bp{total};
1211 $buffer .= sprintf(" %2d%% ", $percent);
1215 $buffer .= sprintf(" 100%%");
1217 $buffer .= sprintf(" %4.1fM
/ %4.1fM
",
1218 $_size / MBDIV, $bp{total} / MBDIV);
1221 my $rate = $elapsed ? ($now / $elapsed) : 0;
1228 my $left = ($total - $now) / $rate;
1229 $eta = time2str($left);
1233 $eta = time2str($elapsed);
1235 my ($unit, $_rate) = get_units($rate);
1236 $tmp = sprintf(" %4.1f
%s %6s", $_rate, $unit, $eta);
1240 $tmp = " --.-K
/s
--:--";
1243 # pad to max. width leaving enough space for rate+eta
1244 my $pad = $bp{width} - length($tmp) - length($buffer);
1245 $buffer .= sprintf("%${pad
}s
", " ");
1246 $buffer .= $tmp; # append rate+eta
1248 printf("\r%s", $buffer);
1256 return if $opts{quiet};
1259 && $bp{count} + $bp{initial} > $bp{total})
1261 $bp{total} = $bp{initial} + $bp{count};
1265 bar_update(-1, -1, -1, -1, -1);
1270 my ($dlnow, $dltotal, $elapsed) = @_;
1274 $percent = int($dlnow / $dltotal * 100)
1277 $elapsed = time - $time_started
1283 $rate = $dlnow / $elapsed
1288 my $left = ($dltotal - $dlnow) / $rate;
1289 $eta = time2str($left);
1292 return ($percent, $elapsed, $rate, $eta);
1302 $str = sprintf("%ds", $secs);
1304 elsif ($secs < 100 * 60)
1306 $str = sprintf("%dm%ds", $secs / 60, $secs % 60);
1308 elsif ($secs < 48 * 3600)
1310 $str = sprintf("%dh%dm", $secs / 3600, ($secs / 60) % 60);
1312 elsif ($secs < 100 * 86400)
1314 $str = sprintf("%dd%dh", $secs / 86400, ($secs / 3600) % 60);
1318 $str = sprintf("%dd", $secs / 86400);
1326 my @units = qw|K/s M/s G/s|;
1329 if ($rate < 1024 * 1024)
1333 elsif ($rate < 1024 * 1024)
1335 $rate /= 1024 * 1024;
1338 elsif ($rate < 1024 * 1024 * 1024)
1340 $rate /= 1024 * 1024 * 1024;
1343 return ($units[$i], $rate);
1346 # Subroutines: LittleHelpers
1350 $SIG{WINCH} = \&handle_sigwinch;
1353 if ($opts{clear}) { clear_cache(); }
1354 elsif ($opts{show}) { show_cache(); }
1362 $| = 1; # => unbuffered
1366 if ($opts{background})
1375 # redirect stdout to stderr
1376 open STDOUT, ">&STDERR
"
1377 or die "error
: cannot dup STDOUT
: $!";
1388 # TODO: Supports only Youtube. Expand to support other websites as needed.
1392 and $opts{ytpass} eq "-";
1394 print STDERR "error
: no path to clivepass
, use --clivepass
\n"
1396 unless $opts{clivepass};
1398 print STDERR "error
: Expect module
not found
\n" and exit
1399 unless $opted_mods{Expect};
1402 $phrase = getpass("Enter passphrase
for clivepass
: ")
1405 my $e = Expect->new;
1407 $e->spawn($opts{clivepass}, "-g
", $opts{ytuser})
1408 or print STDERR "error
: could
not spawn
: $!\n" and exit;
1410 my ($spawned, $pwd);
1414 qr'Enter passphrase: $',
1417 $fh->send("$phrase\n");
1427 $pwd = $fh->before();
1428 if ($pwd =~ /error: (.*?)$/)
1430 print STDERR "clivepass
: error
: $1\n";
1436 if ($pwd =~ /login: $opts{ytuser}=(.*?)$/);
1442 "error
: could
not spawn
$opts{clivepass
}\n";
1449 print STDERR "error
: clivepass
: expect timed out
\n";
1455 $opts{ytpass} = $pwd;
1460 system "stty
-echo
";
1462 chomp(my $pwd = <STDIN>);
1473 || File::Spec->catfile($workdir, "clive
-log");
1478 print STDERR "\nerror
: fork failed
: $!";
1483 print "continuing
in background
, pid
$pid.\n";
1484 print "output will be written to
$logfile.\n"
1485 unless $opts{quiet};
1491 my $mode = $opts{append} ? ">>" : ">";
1492 $logfile = "/dev/null
" if $opts{quiet};
1494 open STDOUT, "$mode", "$logfile"
1495 or die "error
: cannot redirect STDOUT
: $!";
1497 open STDERR, ">&STDOUT
"
1498 or die "error
: cannot dup STDOUT
: $!";
1503 $stream[0] = 1; # set flag
1508 print STDERR "error
: fork failed
: $!\n";
1512 my $cmd = $opts{streamexec};
1513 $cmd =~ s/%i/"$curr_fpath"/g;
1518 $stream[1] = $child;
1521 sub fetch_liveleak_playlist
1523 my $playlist_url = shift;
1525 print "done
.\nfetch playlist xspf
..."
1526 unless $opts{quiet};
1529 open my $fh, ">", \$playlist;
1531 $curl->setopt(CURLOPT_URL, $playlist_url);
1532 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1534 my $rc = $curl->perform;
1537 my ($xurl, $errmsg);
1541 # NOTE: XML::XSPF exists in CPAN but this should work just as well.
1542 # Parsing with XML::Simple results in errors due unescaped values.
1543 $playlist =~ tr{\n}//d;
1545 if $playlist =~ /<location>(.*?)<\/location>/;
1549 $errmsg = $curl->strerror($rc) . " (http
/$rc)";
1552 $errmsg = "location tag
not found
" if !$xurl && !$errmsg;
1553 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1558 sub fetch_liveleak_config
1560 my $config_url = shift;
1562 print "done
.\nfetch config xml
..."
1563 unless $opts{quiet};
1566 open my $fh, ">", \$config;
1569 $curl->setopt(CURLOPT_HEADER, 0);
1570 $curl->setopt(CURLOPT_URL, $config_url);
1571 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1573 my $rc = $curl->perform;
1576 my ($xurl, $errmsg);
1579 if ($config =~ /<file>(.*?)<\/file>/)
1581 $xurl = fetch_liveleak_playlist($1);
1585 $errmsg = "playlist url
not found
";
1590 $errmsg = $curl->strerror($rc) . " (http
/$rc)\n";
1593 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1598 sub fetch_sevenload_configxml
1600 my ($conf_url, $response_fh) = @_;
1602 print "done
.\nfetch config xml
..."
1603 unless $opts{quiet};
1606 open my $conf_fh, ">", \$conf_xml;
1609 $curl->setopt(CURLOPT_HEADER, 0);
1610 $curl->setopt(CURLOPT_URL, $conf_url);
1611 $curl->setopt(CURLOPT_WRITEDATA, $conf_fh);
1613 my $rc = $curl->perform;
1617 $curl->setopt(CURLOPT_HEADER, 1);
1618 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
1624 GrabXurl => qr|<location seeking="yes
">(.*?)</location>|,
1625 GrabID => qr|item id="(.*?
)"|,
1628 if $conf_xml =~ /$re{GrabID}/;
1630 if $conf_xml =~ /$re{GrabXurl}/;
1634 print STDERR "\nerror
: "
1635 . $curl->strerror($rc)
1638 return ($xurl, $id);
1641 sub fetch_cctv_space_config
1643 my ($conf_url, $resp_fh) = @_;
1645 print "done
.\nfetch config file
..."
1646 unless $opts{quiet};
1649 open my $fh, ">", \$conf;
1652 $curl->setopt(CURLOPT_HEADER, 0);
1653 $curl->setopt(CURLOPT_URL, $conf_url);
1654 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1656 my $rc = $curl->perform;
1659 my ($xurl, $errmsg);
1662 my $re = qr|"url
":"(.*?
)"|;
1665 $xurl = "http
://v
.cctv
.com
/flash
/$1";
1669 $errmsg = "extraction url
not found
";
1674 $errmsg = $curl->strerror($rc) . " http
/$rc\n";
1677 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1679 # Re-enable: header, reset WRITEDATA, the above overrides the
1680 # original settings.
1681 $curl->setopt(CURLOPT_HEADER, 0);
1682 $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
1691 my ($scheme, $authority, $path, $query, $fragment) = $uri =~
1692 m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
1694 # Extract the domain from the URL.
1695 my @a = split(/\./, $authority);
1700 sub title_to_filename
1705 s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
1709 my $r = $opts{cclass} || qr|\w|;
1710 $title = join('', $title =~ /$r/g);
1712 my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
1713 my $timestamp = POSIX::strftime("%F %T", localtime);
1715 my @a = strdomain($entry{page_url});
1719 "%s" => $entry{file_suffix},
1720 "%d" => $a[scalar @a - 2], # Without the TLD.
1721 "%i" => $entry{video_id},
1722 "%D" => (split(/ /, $timestamp))[0],
1723 "%T" => (split(/ /, $timestamp))[1],
1727 my $m = join('|', keys %h);
1728 $fn =~ s/($m)/$h{$1}/ig;
1733 sub newname_if_exists
1735 my ($path, $orig, $new) = (shift, shift);
1737 for (my $i = 1 ; ; $i++)
1739 $new = File::Spec->catfile($path, "$orig.$i");
1743 my ($vol, $dir, $fn) = File::Spec->splitpath($new);
1750 my %e = map_entry(shift);
1753 $opted_mods{IOPager}
1755 : decode_utf8($e{page_title});
1759 "%i" => $e{video_id},
1760 "%l" => $e{file_length},
1761 "%m" => sprintf("%.2f
", $e{file_length} / MBDIV),
1762 "%u" => $e{page_url},
1764 "%D" => (split(/ /, $e{time_stamp}))[0],
1765 "%T" => (split(/ /, $e{time_stamp}))[1],
1766 "%S" => $e{time_stamp},
1769 my $m = join('|', keys %h);
1770 $s =~ s/($m)/$h{$1}/ig;
1778 File::Path::mkpath([$CONFIGDIR], 0, 0700);
1780 $cache_db = tie %cache, "BerkeleyDB
::Hash
",
1781 -Filename => $CACHEFILE,
1782 -Flags => BerkeleyDB->DB_CREATE
1783 or die "error
: cannot
open $CACHEFILE: $!\n";
1788 IO::Pager->new(*STDOUT)
1789 if $opted_mods{IOPager};
1791 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1794 require Digest::SHA;
1798 grep_cache(); # Stores matches => @queue
1799 push @entries, format_show($fmt, Digest::SHA::sha1_hex($_))
1804 push @entries, format_show($fmt, $_) foreach (sort keys %cache);
1807 print STDOUT "$_\n" foreach sort @entries;
1810 if $opted_mods{IOPager};
1812 if ($opts{grep} and $opts{delete} and scalar @queue > 0)
1814 print "Confirm
delete (y
/N
):";
1819 delete $cache{Digest::SHA::sha1_hex($_)} foreach (@queue);
1827 unlink $CACHEFILE if -e $CACHEFILE;
1840 my @values = split(/;/, $cache{$key});
1843 file_suffix file_length file_format page_title
1844 page_url time_stamp video_id xurl
1845 ); # Order matters. See also save_entry.
1848 return map { $_ => $values[$i++] } @keys;
1853 %entry = map_entry
($hash);
1854 $entry{page_title
} = decode_utf8
($entry{page_title
});
1856 #while (my ($key, $value) = each(%entry)) { print "$key => $value\n"; } die;
1863 $entry{time_stamp
} = POSIX
::strftime
("%F %T", localtime);
1865 push @values, $entry{$_} foreach sort keys %entry;
1867 $cache{$hash} = join(';', @values);
1868 $cache_db->db_sync();
1878 my $fmt = $opts{showfmt
} || SHOWFMT_DEFAULT
;
1880 foreach (sort keys %cache)
1882 my @e = split(/;/, $cache{$_});
1887 if ($opts{show
}) { push @queue, $e[4]; }
1888 else { delete $cache{$_}; }
1890 else { push @queue, $e[4]; } # 4=URL
1894 if $opts{delete} and not $opts{show
};
1900 $$url =~ s!/v/!/watch?v=!i; # youtube
1901 $$url =~ s!googleplayer.swf!videoplay!i; # googlevideo
1902 $$url =~ s!/pl/!/videos/!i; # sevenload
1903 $$url =~ s!/e/!/view?i=!i; # liveleak
1908 return if !$opts{exec};
1909 if ($opts{exec} !~ /[;+]$/)
1911 print "error: --exec expression must be terminated "
1912 . "by either ';' or '+'\n";
1919 return if !$opts{exec};
1920 if ($opts{exec} =~ /;$/)
1922 foreach (@exec_files)
1924 my $cmd = $opts{exec};
1925 $cmd =~ s/%i/"$_"/g;
1932 my $cmd = sprintf("%s ", $opts{exec});
1935 $cmd .= sprintf('"%s" ', $_) foreach (@exec_files);
1942 print "<?xml version=\"1.0\"?>\n<queue>\n"
1943 if $opts{emitxml
} and @emit_queue;
1945 require URI
::Escape
;
1947 foreach (@emit_queue)
1952 while (my ($key, $value) = each(%$_))
1954 $value = URI
::Escape
::uri_escape
($value)
1956 or $key eq 'page_url';
1957 print " <$key>$value</$key>\n";
1959 print " </video>\n";
1961 elsif ($opts{emitcsv
})
1963 printf qq/csv:"%s","%s","%s","%.2fMB",/
1964 . qq/"%s","%s","%s","%s","%s","%s"\n/,
1965 $_->{page_url
}, $_->{xurl
}, $_->{fn
},
1966 $_->{file_length
} / MBDIV
, $_->{file_length
},
1967 $_->{video_id
}, $_->{time_stamp
}, $_->{page_title
},
1968 $_->{cont_from
}, $_->{remaining
};
1972 if $opts{emitxml
} and @emit_queue;
1977 print "$re_hosts{$_}\n" foreach (keys %re_hosts);
1983 my $perl_v = sprintf("--with-perl=%vd-%s", $^V
, $^O
);
1985 sprintf("clive version %s with WWW::Curl version "
1986 . "$WWW::Curl::VERSION [%s].\n"
1987 . "Copyright (c) 2007-2009 Toni Gundogdu "
1988 . "<legatvs\@gmail.com>.\n\n",
1992 while (my ($key, $value) = each(%opted_mods))
1994 $str .= sprintf("--with-$key=%s ", $value ?
"yes" : "no");
1995 $str .= "\n" if (++$i % 2 == 0);
1998 "\nclive is licensed under the ISC license which is functionally\n"
1999 . "equivalent to the 2-clause BSD licence.\n"
2000 . "\tReport bugs: <http://code.google.com/p/clive/issues/>\n";
2009 clive [options]... [URL]...
2013 -h, --help print help and exit
2014 -v, --version print version and exit
2015 --hosts print supported hosts and exit
2016 -b, --background go to background after startup
2017 -e, --emit-csv emit video details as csv to stdout
2018 -E, --emit-xml emit video details as csv to stdout
2019 -V, --clivepass=PATH path to clivepass
2021 -U, --agent=STRING identify as STRING to http server
2022 -y, --proxy=ADDR use ADDR for http proxy
2023 -X, --no-proxy do not use http proxy
2025 -R, --renew renew cache entry for visited url
2026 -s, --show dump cache entries to stdout
2027 -H, --show-format=STRING format dumped cache entries
2028 -g, --grep=PATTERN grep cache entries for PATTERN
2029 -i, --ignore-case ignore case-differences with --grep
2030 -D, --delete delete matched entries from cache
2031 -C, --clear clear cache of all entries
2032 Logging and Input Options:
2033 -o, --output=LOGFILE log messages to LOGFILE
2034 -a, --append=LOGFILE append to LOGFILE
2035 -d, --debug print libcurl debug messages
2036 -q, --quiet turn off all output
2037 -r, --recall recall last url batch
2038 -T, --savebatch=FILE save url batch to FILE
2039 -p, --paste paste input from clipboard
2040 --stderr redirect all output to stderr even when no tty
2042 -O, --output-video=FNAME write video to file
2043 -n, --no-extract do not extract any videos
2044 -c, --continue continue partially downloaded file
2045 -W, --overwrite overwrite existing video file
2046 -G, --progress=TYPE use progress indicator TYPE
2047 -u, --youtube-user=UNAME youtube username
2048 -t, --youtube-pass=PASSW youtube password
2049 -L, --no-login do not log into youtube
2050 -S, --savedir=DIR save video files to DIR
2051 -f, --format=FORMAT extract video FORMAT
2052 -l, --cclass=CLASS use CLASS to filter titles
2053 -N, --filename-format=STR use STR to construct output filename
2054 -x, --exec=COMMAND execute COMMAND subsequently
2055 --stream-exec=COMMAND stream COMMAND to be executed
2056 --stream=PERCENT execute stream command when transfer reaches %