2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clive, the non-interactive video extraction utility
6 # Copyright (c) 2007-2009 Toni Gundogdu <legatvs@gmail.com>
8 # Permission to use, copy, modify, and distribute this software for any
9 # purpose with or without fee is hereby granted, provided that the above
10 # copyright notice and this permission notice appear in all copies.
12 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 ###########################################################################
24 use constant VERSION
=> "2.1.10";
25 use constant MBDIV
=> 0x100000;
26 use constant SHOWFMT_DEFAULT
=> qq/%D: "%t" | %mMB/;
28 binmode(STDOUT
, ":utf8");
30 # NOTE: Using "require" instead of "use" causes "Can't locate
31 # auto/WWW/Curl/CURLOPT_USE.al in @INC".
32 use WWW
::Curl
::Easy
4.05;
33 use Getopt
::Long
qw(:config bundling);
39 # Non-essential modules: set flags indicating availability
41 (Clipboard
=> 1, Expect
=> 1, IOPager
=> 1, ReadKey
=> 1);
43 $opted_mods{Clipboard
} = 0 if $@
;
45 $opted_mods{IOPager
} = 0 if $@
;
46 sub exp_continue
() { }; # Satisfies: "Bareword "exp_continue" not allowed while"
48 $opted_mods{Expect
} = 0 if $@
;
49 eval "use Term::ReadKey";
50 $opted_mods{ReadKey
} = 0 if $@
;
52 my $CONFIGDIR = $ENV{CLIVE_HOME
}
53 || File
::Spec
->catfile($ENV{HOME
}, ".config/clive");
55 my $CONFIGFILE = File
::Spec
->catfile($CONFIGDIR, "config");
56 my $CACHEFILE = File
::Spec
->catfile($CONFIGDIR, "cache");
57 my $RECALLFILE = File
::Spec
->catfile($CONFIGDIR, "recall");
59 my %opts; # runtime options
60 my @queue; # input URLs
61 my $curl; # curl handle, reused throughout lifespan
62 my $cache_db; # handle to cache BDB
63 my %cache; # handle to cache BDB (tied hash)
64 my $hash; # sha1 hash of the current url used together with %cache
65 my %entry; # multi-purpose hash for caching
66 my $ytube_logged = 0; # youtube: whether logged-in
67 my $time_started; # time file transfer started
68 my @exec_files; # holds fnames for --exec
69 my @emit_queue; # videos to be emitted
70 my $logfile; # path to logfile (--output-file, --append-file)
71 my %dp; # dot progress data
72 my %bp; # bar progress data
73 my $workdir = getcwd
; # startup workdir
74 my @stream = (0, -1); # 0=stream flag, 1=stream pid
75 my $curr_fpath; # current video output filepath
76 my $recv_sigwinch = 0; # whether SIGWINCH was received
77 my $term_width; # current terminal width
79 my %re_hosts = ( # Precompiled regex used to identify the host
80 IsYoutube
=> qr
|youtube
.com
|i
,
81 IsGoogle
=> qr
|video
.google
.|i
,
82 IsSevenload
=> qr
|sevenload
.com
|i
,
83 IsBreak
=> qr
|break.com
|i
,
84 IsLastfm
=> qr
|last.fm
|i
,
85 IsLiveleak
=> qr
|liveleak
.com
|i
,
86 IsEvisor
=> qr
|evisor
.tv
|i
,
87 IsDmotion
=> qr
|dailymotion
.com
|i
,
88 IsCctv
=> qr
|tv
.cctv
.com
|i
,
92 [$re_hosts{IsYoutube
}, \
&handle_youtube
],
93 [$re_hosts{IsGoogle
}, \
&handle_google
],
94 [$re_hosts{IsSevenload
}, \
&handle_sevenload
],
95 [$re_hosts{IsBreak
}, \
&handle_break
],
96 [$re_hosts{IsLastfm
}, \
&handle_lastfm
],
97 [$re_hosts{IsLiveleak
}, \
&handle_liveleak
],
98 [$re_hosts{IsEvisor
}, \
&handle_evisor
],
99 [$re_hosts{IsDmotion
}, \
&handle_dmotion
],
100 [$re_hosts{IsCctv
}, \
&handle_cctv
],
104 my $c = Config
::Tiny
->read($CONFIGFILE);
106 progress
=> $c->{_
}->{progress
},
107 agent
=> $c->{http
}->{agent
},
108 proxy
=> $c->{http
}->{proxy
},
109 maxspeed
=> $c->{http
}->{maxspeed
},
110 minspeed
=> $c->{http
}->{minspeed
},
111 format
=> $c->{output
}->{format
},
112 savedir
=> $c->{output
}->{savedir
},
113 cclass
=> $c->{output
}->{cclass
},
114 fnfmt
=> $c->{output
}->{file
},
115 showfmt
=> $c->{output
}->{show
},
116 ytuser
=> $c->{youtube
}->{user
},
117 ytpass
=> $c->{youtube
}->{pass
},
118 exec => $c->{commands
}->{exec},
119 streamexec
=> $c->{commands
}->{stream
},
120 clivepass
=> $c->{commands
}->{clivepass
},
123 $opts{clivepass
} = $ENV{CLIVEPASS_PATH
} unless $opts{clivepass
};
124 $opts{progress
} = 'bar' unless $opts{progress
};
125 $opts{format
} = $opts{format
} || 'flv';
132 'debug|d', 'help|h', 'overwrite|W', 'savebatch|T=s',
133 'paste|p', 'show|s', 'delete|D', 'clear|C',
134 'continue|c', 'renew|R', 'recall|r', 'format|f=s',
135 'output|o=s', 'append|a=s', 'background|b', 'quiet|q',
136 'grep|g=s', 'agent|U=s', 'proxy|y=s', 'savedir|S=s',
137 'cclass|l=s', 'exec|x=s', 'progress|G=s', 'clivepass|V=s',
139 'hosts' => \
&print_hosts
,
140 'version|v' => \
&print_version
,
142 # Commented out until WWW::Curl is fixed:
143 # 'maxspeed!', 'minspeed!',
144 # Workarounds since $longopt!|$shortopt cannot be used.
145 'no-extract|n' => sub { $opts{extract
} = 0 },
146 'no-login|L' => sub { $opts{login
} = 0 },
147 'no-proxy|X' => sub { $opts{proxy
} = "" },
149 # Workaround for options with dashes. There's likely a better way.
150 'ignore-case|i' => sub { $opts{case
} = 0 },
151 'filename-format|N=s' => sub { $opts{fnfmt
} = $_[1] },
152 'show-format|H=s' => sub { $opts{showfmt
} = $_[1] },
153 'youtube-user|u=s' => sub { $opts{ytuser
} = $_[1] },
154 'youtube-pass|t=s' => sub { $opts{ytpass
} = $_[1] },
155 'emit-csv|e' => sub { $opts{emitcsv
} = 1 },
156 'emit-xml|E' => sub { $opts{emitxml
} = 1 },
157 'stream-exec=s' => sub { $opts{streamexec
} = $_[1] },
158 'output-video|O=s' => sub { $opts{outputfname
} = $_[1] },
164 Pod
::Usage
::pod2usage
(-exitstatus
=> 0, -verbose
=> 1);
169 ## Subroutines: Signal handlers
174 # my $sig_name = shift;
178 ## Subroutines: Connection
182 $curl = WWW
::Curl
::Easy
->new;
184 $curl->setopt(CURLOPT_USERAGENT
, $opts{agent
} || "Mozilla/5.0");
185 $curl->setopt(CURLOPT_FOLLOWLOCATION
, 1);
186 $curl->setopt(CURLOPT_AUTOREFERER
, 1);
187 $curl->setopt(CURLOPT_HEADER
, 1);
188 $curl->setopt(CURLOPT_NOBODY
, 0);
190 $curl->setopt(CURLOPT_VERBOSE
, 1)
193 $curl->setopt(CURLOPT_PROXY
, $opts{proxy
})
194 if defined $opts{proxy
};
196 $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE
, $opts{maxspeed
})
197 if $opts{maxpseed
}; # NOTE: No effect. Bug in WWW::Curl::Easy?
199 $curl->setopt(CURLOPT_LOW_SPEED_LIMIT
, $opts{minspeed
})
200 if $opts{minspeed
}; # Ditto.
205 print "[youtube] attempt to login as $opts{ytuser} ..."
209 open my $fh, ">", \
$response;
212 "http://uk.youtube.com/login?current_form=loginform"
213 . "&username=$opts{ytuser}&password=$opts{ytpass}"
214 . "&action_login=log+in&hl=en-GB";
216 $curl->setopt(CURLOPT_URL
, $login_url);
217 $curl->setopt(CURLOPT_COOKIEFILE
, ""); # Enable cookies from here on
218 $curl->setopt(CURLOPT_ENCODING
, ""); # Supported encodings
219 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
221 my $rc = $curl->perform;
226 $response =~ tr
{\n}//d;
227 $errmsg = "error: login was incorrect"
228 if $response =~ /your log-in was incorrect/i;
229 $errmsg = "error: check your login password"
230 if $response =~ /check your password/i and !$errmsg;
231 $errmsg = "error: too many login failures, try again later"
232 if $response =~ /too many login failures/i and !$errmsg;
236 $errmsg = "error: " . $curl->strerror($rc) . " (http/$rc)";
240 print STDERR
"\n$errmsg\n" and exit
246 $curl->setopt(CURLOPT_COOKIE
,
247 "is_adult=" . uc(Digest
::SHA
::sha1_hex
(rand())));
259 require HTML
::TokeParser
;
261 require File
::Basename
;
266 $hash = Digest
::SHA
::sha1_hex
($_);
269 my ($rc, $rfh, $response) = fetch_page
($_);
271 if ($rc == 0 or $rc == 0xff)
273 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
)
274 unless $rc == 0xff; # read from cache
276 if ($rc == 200 or $rc == 0xff)
278 if (!defined($entry{page_url
}))
280 next if process_page
($_, \
$response, $rfh) == -1;
282 extract_video
() if $entry{xurl
};
286 $errmsg = $curl->strerror($rc) . " (http/$rc)";
291 $errmsg = $curl->strerror($rc) . " (http/$rc)";
295 print STDERR
"\nerror: $errmsg\n"
304 my ($url, $response, $from_cache, $rc) = (shift, "");
305 open my $fh, ">", \
$response;
307 # Youtube: login only if both username and password are defined
308 if ($opts{ytuser
} and $opts{ytpass
} and $opts{login
})
311 if !$ytube_logged and $url =~ /$re_hosts{IsYoutube}/;
317 ; # Make sure cached "format" matches with options
319 if $opts{format
} eq $entry{file_format
};
325 printf "%s $url ...", $from_cache ?
"cache" : "fetch"
328 $rc = 0xff; # flag: read cache entry
333 $curl->setopt(CURLOPT_URL
, $url);
334 $curl->setopt(CURLOPT_ENCODING
, "");
335 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
336 $rc = $curl->perform;
339 return ($rc, $fh, decode_utf8
($response));
344 my ($url, $response_ref, $response_fh) = @_;
346 print "done.\nprocess page ..."
349 #$$response_ref =~ tr{\n}//d;
351 my $p = HTML
::TokeParser
->new($response_ref);
352 $p->get_tag("title");
353 my $title = $p->get_trimmed_text;
355 my ($xurl, $id, $_title, $supported);
357 foreach (@re_hosts_arr)
359 my ($re, $handler) = @
{$_};
363 ($xurl, $id, $_title) =
364 &$handler($response_ref, $response_fh, $url);
365 $title = $_title || $title;
369 die "error: lookup array missing handler; should never get here\n"
377 $title =~ tr
{;}//d; # Cache values cannot contain ';'
379 $entry{page_url
} = $url;
380 $entry{xurl
} = $xurl;
381 $entry{page_title
} = $title;
382 $entry{video_id
} = $id;
383 $entry{file_format
} = $opts{format
};
388 sub query_video_length
390 my ($content_type, $errmsg);
392 unless ($entry{file_length
})
394 print "done.\nquery length ..."
397 $curl->setopt(CURLOPT_URL
, $entry{xurl
});
399 # Do not download: GET => HEAD request.
400 $curl->setopt(CURLOPT_NOBODY
, 1);
401 my $rc = $curl->perform;
403 # Reset back: HEAD => GET
404 $curl->setopt(CURLOPT_HTTPGET
, 1);
406 $entry{file_length
} =
407 $curl->getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD
);
409 $content_type = $entry{file_suffix
} =
410 $curl->getinfo(CURLINFO_CONTENT_TYPE
);
412 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
);
417 if ($content_type =~ m{video/(.*)})
419 $entry{file_suffix
} = $1;
420 if ($content_type =~ /(.*)-(.*)$/)
422 $entry{file_suffix
} = $2;
427 # Evisor and Metacafe return "text/plain" for Content-Type
428 elsif ($content_type =~ m{text/plain})
430 if ($opts{format
} eq "flv")
432 if ( $entry{page_url
} =~ /$re_hosts{IsEvisor}/
433 or $entry{page_url
} =~ /$re_hosts{IsCctv}/)
435 $entry{file_suffix
} = "flv";
441 # Liveleak and Break return "(flv-)application/octet-stream"
442 elsif ($content_type =~ m{application/octet-stream})
444 if ($opts{format
} eq "flv")
446 if ( $entry{page_url
} =~ /$re_hosts{IsBreak}/
447 or $entry{page_url
} =~ /$re_hosts{IsLiveleak}/)
449 $entry{file_suffix
} = 'flv';
455 "expected different content-type, "
456 . "received \"$content_type\""
461 $errmsg = "server returned http/$rc";
465 { # Construct content-type from cache
466 $content_type = "video/$entry{file_suffix}";
469 unless ($opts{quiet
})
471 if (!$errmsg) { print "done.\n"; }
472 else { print STDERR
"\nerror: $errmsg\n"; }
475 return ($errmsg ?
-1 : 0, $content_type);
480 my ($rc, $content_type) = query_video_length
();
483 if $rc != 0 or !defined $content_type;
485 my $fn = $opts{outputfname
}
486 || title_to_filename
($entry{page_title
});
487 my $path = File
::Spec
->catfile($opts{savedir
} || $workdir, $fn);
489 my $remaining = $entry{file_length
};
490 my $size = -s
$path || 0;
495 if ($size > 0 and !$opts{overwrite
})
497 if ($size == $entry{file_length
} and $opts{extract
})
500 "error: file is already fully retrieved; nothing to do\n";
502 push @exec_files, $path
506 unless $opts{emitcsv
} or $opts{emitxml
};
509 elsif ($size < $entry{file_length
} and $opts{continue})
513 $remaining = ($entry{file_length
} - $cont_from);
518 newname_if_exists
($opts{savedir
} || $workdir, $fn);
522 if ($opts{emitcsv
} or $opts{emitxml
})
525 $entry{remaining
} = $remaining;
526 $entry{cont_from
} = $cont_from;
527 push @emit_queue, {%entry};
531 unless ($opts{quiet
})
534 print "length: $entry{file_length} ";
536 printf "(%.2fMB) ", $entry{file_length
} / MBDIV
537 if $entry{file_length
};
539 printf "from: %u (left: %u) ", $cont_from, $remaining
542 printf "[$content_type]"
552 unless $opts{extract
};
554 if (open my $fh, "$filemode$path")
558 # Disable: encoding, header
559 $curl->setopt(CURLOPT_HEADER
, 0);
560 $curl->setopt(CURLOPT_ENCODING
, "identity");
561 $curl->setopt(CURLOPT_URL
, $entry{xurl
});
562 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
564 $curl->setopt(CURLOPT_RESUME_FROM
, $cont_from)
567 unless ($opts{quiet
})
569 $curl->setopt(CURLOPT_PROGRESSFUNCTION
,
570 \
&progress_callback
);
571 $curl->setopt(CURLOPT_NOPROGRESS
, 0);
572 $time_started = time;
574 # Use 'dot' progress if the output is not a TTY
575 if ( $opts{progress
} !~ /^dot/
576 and $opts{progress
} ne 'none')
578 $opts{progress
} = 'dot'
579 if !-t STDOUT
or !-t STDERR
;
582 $stream[0] = 0; # reset streaming flag
584 if ($opts{progress
} =~ /^bar/)
586 bar_init
($cont_from, $entry{file_length
});
588 elsif ($opts{progress
} =~ /^dot/)
594 $rc = $curl->perform;
599 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
);
600 if ($rc == 200 or $rc == 206)
602 if ($opts{progress
} =~ /^bar/) { bar_finish
() }
603 elsif ($opts{progress
} =~ /^dot/) { dot_finish
() }
604 waitpid($stream[1], 0) if $stream[0];
608 $errmsg = $curl->strerror($rc) . " (http/$rc)";
613 $errmsg = $curl->strerror($rc) . " (http/$rc)";
617 $curl->setopt(CURLOPT_RESUME_FROM
, 0);
618 $curl->setopt(CURLOPT_HEADER
, 1);
622 $errmsg = "$path: $!";
627 $errmsg = $curl->strerror($rc) . " (http/$rc)";
632 print "\nclosed http/$rc.\n"
635 push @exec_files, $path
640 print STDERR
"\nerror: $errmsg\n";
644 $curl->setopt(CURLOPT_NOPROGRESS
, 1);
649 if ($opts{recall
} and -e
$RECALLFILE)
651 if (open my $fh, "<$RECALLFILE")
653 parse_input
($_) while (<$fh>);
658 print STDERR
"error: $RECALLFILE: $!";
664 print STDERR
"error: Clipboard module not found\n" and exit
665 unless $opted_mods{Clipboard
};
666 my $data = Clipboard
->paste();
669 parse_input
($_) foreach split(/\n/, $data);
673 parse_input
($_) foreach @ARGV;
674 grep_cache
() if $opts{grep};
675 unless (@queue) { parse_input
($_) while (<STDIN
>); }
677 if (open my $fh, ">$RECALLFILE")
679 print $fh "$_\n" foreach @queue;
684 print STDERR
"error: $RECALLFILE: $!";
687 if ($opts{savebatch
})
689 if (open my $fh, ">", $opts{savebatch
})
691 print $fh "$_\n" foreach @queue;
696 print STDERR
"error: $opts{savebatch}: $!";
705 return if $url =~ /^$/;
706 return if $url =~ /^#/;
710 if ($url =~ /&srcurl=(.*?)&/)
711 { # GVideo: one of many redirects
713 printf "found redirect ...%s\n=> %s\n",
714 (split(/&/, $url))[0],
715 (split(/&/, URI
::Escape
::uri_unescape
($1)))[0]
717 $url = URI
::Escape
::uri_unescape
($1);
720 # Insert http:// if not found
722 if $url !~ m{^http://}i;
724 # Translate embedded URL to video page URL
725 translate_embed
(\
$url);
727 # Last.fm wraps Youtube videos as their own
728 if ($url =~ /$re_hosts{IsLastfm}/)
732 print STDERR
"error: nosupport: $url\n" and return -1
735 $url = "http://youtube.com/watch?v=$1";
738 # Remove params from the URL
739 ($url) = split(/&/, $url);
741 foreach my $re (%re_hosts)
743 push @queue, $url and return 0
747 print STDERR
"error: nosupport: $url\n";
752 # Subroutines: Video page handlers
756 my ($response_ref, $xurl) = @_;
759 GrabID
=> qr/"video_id": "(.*?)"/,
760 GrabT
=> qr/"t": "(.*?)"/
763 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
764 my $t = $1 if $$response_ref =~ /$re{GrabT}/;
768 $xurl = "http://youtube.com/get_video?video_id=$id&t=$t";
771 if ($opts{format
} eq "mp4") { $fmt = 18; }
772 elsif ($opts{format
} eq "3gpp") { $fmt = 17; }
773 elsif ($opts{format
} eq "xflv") { $fmt = 6; }
780 printf STDERR
"\nerror: failed to extract &%s\n", $id
789 my ($response_ref) = @_;
793 GrabVideoURL
=> qr
|videoUrl
\\x3d
(.*?
)\\x26
|,
794 GrabID
=> qr
|docid
:'(.*?)'|,
795 GrabMP4
=> qr
|href
="http://vp\.(.*?)"|,
798 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
800 my $xurl = URI
::Escape
::uri_unescape
($1)
801 if $$response_ref =~ /$re{GrabVideoURL}/;
803 my $mp4 = $1 if $$response_ref =~ /$re{GrabMP4}/;
806 $errmsg = "video id not found" if !$id;
807 $errmsg = "extraction url not found" if !$xurl && !$errmsg;
809 print STDERR
"\nerror: $errmsg\n" if $errmsg;
811 $xurl = "http://vp.$mp4"
812 if $mp4 && $opts{format
} eq "mp4" && $xurl;
819 my ($response_ref, $response_fh) = @_;
821 my %re = (GrabConfigPath
=> qr
|configPath
=(.*?
)"|);
823 my $conf_path = URI::Escape::uri_unescape($1)
824 if $$response_ref =~ /$re{GrabConfigPath}/;
826 my ($xurl, $id, $errmsg);
830 fetch_sevenload_configxml($conf_path, $response_fh);
834 $errmsg = "configPath
not found
";
836 $errmsg = "item id
not found
" if !$errmsg && !$id;
837 $errmsg = "extraction url
not found
" if !$errmsg && !$xurl;
838 print STDERR "\nerror
: $errmsg\n" if $errmsg;
844 my ($response_ref) = @_;
847 GrabTitle => qr|id="vid_title
" content="(.*?
)"|,
848 GrabID => qr|ContentID='(.*?)'|,
849 GrabFilePath => qr|ContentFilePath='(.*?)'|,
850 GrabFileName => qr|FileName='(.*?)'|
853 my $title = $1 if $$response_ref =~ /$re{GrabTitle}/;
854 my $id = $1 if $$response_ref =~ /$re{GrabID}/;
855 my $fpath = $1 if $$response_ref =~ /$re{GrabFilePath}/;
856 my $fname = $1 if $$response_ref =~ /$re{GrabFileName}/;
859 if ($fpath and $fname)
861 $xurl = "http
://media
1.break.com
/dnet
/media
/$fpath/$fname.flv
";
865 $errmsg = "failed to extract ContentFilePath
"
868 $errmsg = "failed to extract FileName
"
869 if !$fname and !$errmsg;
872 $errmsg = "failed to extract title
"
873 if !$title and !$errmsg;
875 $errmsg = "failed to extract id
"
876 if !$id and !$errmsg;
878 print STDERR "\nerror
: $errmsg\n"
881 return ($xurl, $id, $title);
886 my ($response_ref, $response_fh) = @_;
889 GrabID => qr|token=(.*?)&|,
890 GrabConfigURL => qr|'config','(.*?)'|,
894 if $$response_ref =~ /$re{GrabID}/;
896 my $conf_url = URI::Escape::uri_unescape($1)
897 if $$response_ref =~ /$re{GrabConfigURL}/;
902 $xurl = fetch_liveleak_config($conf_url);
904 # Re-enable: header, reset WRITEDATA, the above overrides the
906 $curl->setopt(CURLOPT_HEADER, 0);
907 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
911 $errmsg = "config url
not found
";
914 $errmsg = "id
not found
" if !$id && !$errmsg;
915 print "error
: $errmsg\n" if $errmsg;
925 GrabXurl => qr|file=(.*?)"|,
926 GrabID
=> qr
|.+/(.*?
).flv
|,
929 my ($xurl, $id, $errmsg);
932 if $$respr =~ /$re{GrabXurl}/;
935 if $xurl and $xurl =~ /$re{GrabID}/;
937 $errmsg = "video extraction url not found"
940 $errmsg = "video id not found"
941 unless $id and !$errmsg;
943 print STDERR
"error: $errmsg\n"
954 GrabID
=> qr
|swf
%2F(.*?
)"|,
955 GrabPaths => qr|"video
", "(.*?
)"|
959 $id = $1 if $$resp =~ /$re{GrabID}/;
960 my $paths = URI::Escape::uri_unescape($1)
961 if $$resp =~ /$re{GrabPaths}/;
963 use constant ADDR => "http
://dailymotion
.com
";
968 foreach (split(/\|\|/, $paths))
970 my ($path, $type) = split(/@@/, $_);
971 if ($type eq "spark
")
972 { # same as regular flv
973 $xurl = ADDR . $path;
975 if ($type eq $opts{format})
977 $xurl = ADDR . $path;
984 $errmsg = "id
not found
" if !$id;
985 $errmsg = "paths
not found
" if !$paths && !$errmsg;
986 $errmsg = "failed to construct xurl
" if !$xurl && !$errmsg;
988 print STDERR "\nerror
: $errmsg\n"
996 my ($resp, $resp_fh, $page_url) = @_;
997 my $re = qr|videoId=(.*?)&|;
1000 $id = $1 if $$resp =~ /$re/;
1004 my $domain = join('.', strdomain($page_url));
1006 "http
://$domain/playcfg/flv_info_new
.jsp?videoId
=$id";
1007 $xurl = fetch_cctv_space_config($conf_url, $resp_fh);
1011 print STDERR "\nerror
: id
not found
\n";
1014 return ($xurl, $id);
1017 # Subroutines: Progress
1018 # NOTE: the 'dot' progress copies much from wget.
1020 sub progress_callback
1024 if ($opts{progress} =~ /^dot/) { $percent = dot_update(@_); }
1025 elsif ($opts{progress} =~ /^bar/) { $percent = bar_update(@_); }
1028 && $opts{streamexec}
1031 fork_streamer() if $percent >= $opts{stream};
1044 $dp{dot_bytes} = 1024;
1045 $dp{dot_spacing} = 10;
1046 $dp{dots_in_line} = 50;
1048 my ($type, $style) = split(/:/, $opts{progress});
1052 if ($style eq 'binary')
1054 $dp{dot_bytes} = 8192;
1055 $dp{dot_spacing} = 16;
1056 $dp{dots_in_line} = 48;
1058 elsif ($style eq 'mega')
1060 $dp{dot_bytes} = 65536;
1061 $dp{dot_spacing} = 8;
1062 $dp{dots_in_line} = 48;
1069 my ($clientp, $dltotal, $dlnow, $ultotal, $ulnow) = @_;
1071 my ($percent, $elapsed, $rate, $eta) =
1072 calc_progress($dlnow, $dltotal);
1077 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1079 $dp{accum} += $dlnow - $dp{dlthen};
1080 $dp{dlthen} = $dlnow;
1082 for (; $dp{accum} >= $dp{dot_bytes} ; $dp{accum} -= $dp{dot_bytes})
1085 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1089 if $dp{dots} % $dp{dot_spacing} == 0;
1094 if ($dp{dots} >= $dp{dots_in_line})
1099 dot_print_row_stats($percent, $elapsed, $eta, $rate, 0);
1107 return if $opts{quiet};
1109 my $row_bytes = $dp{dot_bytes} * $dp{dots_in_line};
1111 printf "\n%6dK", $dp{rows} * $row_bytes / 1024
1114 for (my $i = $dp{dots} ; $i < $dp{dots_in_line} ; $i++)
1117 if $i % $dp{dot_spacing} == 0;
1122 my $elapsed = time - $time_started;
1123 my $eta = time2str($elapsed, 1);
1124 my $rate = $entry{file_length} / $elapsed;
1126 dot_print_row_stats(100, $elapsed, $eta, $rate, 1);
1129 sub dot_print_row_stats
1131 my ($percent, $elapsed, $eta, $rate, $last) = @_;
1132 my ($unit, $_rate) = get_units($rate);
1134 printf "%3d%% %4.1f
%s", $percent, $_rate, $unit;
1135 printf "%s%s", $last ? "=" : " ", $eta;
1138 use constant DEFAULT_TERM_WIDTH => 80;
1142 return DEFAULT_TERM_WIDTH
1143 unless $opted_mods{ReadKey};
1144 my ($width) = GetTerminalSize();
1150 my ($initial, $total) = @_;
1153 if $initial > $total;
1155 $term_width = get_term_width();
1157 $bp{initial} = $initial; # bytes dl previously
1158 $bp{total} = $total; # expected bytes
1159 $bp{width} = DEFAULT_TERM_WIDTH - 1;
1160 $bp{started} = time;
1165 use constant REFRESH_INTERVAL => 0.2;
1169 my ($clientp, $total, $now, $ultotal, $ulnow) = @_;
1171 my $force_update = 0;
1174 my $old_width = $term_width;
1175 $term_width = get_term_width();
1176 if ($term_width != $old_width)
1178 $bp{width} = $term_width - 1;
1185 my $elapsed = $tnow - $bp{started};
1190 if (($elapsed - $bp{lastupd}) < REFRESH_INTERVAL
1198 $bp{lastupd} = $elapsed;
1199 my $size = $bp{initial} + $now;
1202 if ($bp{width} > DEFAULT_TERM_WIDTH)
1204 $fname_len += $bp{width} - DEFAULT_TERM_WIDTH;
1208 substr(File::Basename::basename($curr_fpath), 0, $fname_len);
1213 my $_size = !$bp{done} ? $size : $now;
1214 $percent = 100.0 * $size / $bp{total};
1217 $buffer .= sprintf(" %2d%% ", $percent);
1221 $buffer .= sprintf(" 100%%");
1223 $buffer .= sprintf(" %4.1fM
/ %4.1fM
",
1224 $_size / MBDIV, $bp{total} / MBDIV);
1227 my $rate = $elapsed ? ($now / $elapsed) : 0;
1234 my $left = ($total - $now) / $rate;
1235 $eta = time2str($left);
1239 $eta = time2str($elapsed);
1241 my ($unit, $_rate) = get_units($rate);
1242 $tmp = sprintf(" %4.1f
%s %6s", $_rate, $unit, $eta);
1246 $tmp = " --.-K
/s
--:--";
1249 # pad to max. width leaving enough space for rate+eta
1250 my $pad = $bp{width} - length($tmp) - length($buffer);
1251 $buffer .= sprintf("%${pad
}s
", " ");
1252 $buffer .= $tmp; # append rate+eta
1254 printf("\r%s", $buffer);
1262 return if $opts{quiet};
1265 && $bp{count} + $bp{initial} > $bp{total})
1267 $bp{total} = $bp{initial} + $bp{count};
1271 bar_update(-1, -1, -1, -1, -1);
1276 my ($dlnow, $dltotal, $elapsed) = @_;
1280 $percent = int($dlnow / $dltotal * 100)
1283 $elapsed = time - $time_started
1289 $rate = $dlnow / $elapsed
1294 my $left = ($dltotal - $dlnow) / $rate;
1295 $eta = time2str($left);
1298 return ($percent, $elapsed, $rate, $eta);
1308 $str = sprintf("%ds", $secs);
1310 elsif ($secs < 100 * 60)
1312 $str = sprintf("%dm%ds", $secs / 60, $secs % 60);
1314 elsif ($secs < 48 * 3600)
1316 $str = sprintf("%dh%dm", $secs / 3600, ($secs / 60) % 60);
1318 elsif ($secs < 100 * 86400)
1320 $str = sprintf("%dd%dh", $secs / 86400, ($secs / 3600) % 60);
1324 $str = sprintf("%dd", $secs / 86400);
1332 my @units = qw|K/s M/s G/s|;
1335 if ($rate < 1024 * 1024)
1339 elsif ($rate < 1024 * 1024)
1341 $rate /= 1024 * 1024;
1344 elsif ($rate < 1024 * 1024 * 1024)
1346 $rate /= 1024 * 1024 * 1024;
1349 return ($units[$i], $rate);
1352 # Subroutines: LittleHelpers
1356 $SIG{WINCH} = \&handle_sigwinch;
1359 if ($opts{clear}) { clear_cache(); }
1360 elsif ($opts{show}) { show_cache(); }
1368 $| = 1; # => unbuffered
1373 if $opts{background};
1383 # TODO: Supports only Youtube. Expand to support other websites as needed.
1387 and $opts{ytpass} eq "-";
1389 print STDERR "error
: no path to clivepass
, use --clivepass
\n"
1391 unless $opts{clivepass};
1393 print STDERR "error
: Expect module
not found
\n" and exit
1394 unless $opted_mods{Expect};
1397 $phrase = getpass("Enter passphrase
for clivepass
: ")
1400 my $e = Expect->new;
1402 $e->spawn($opts{clivepass}, "-g
", $opts{ytuser})
1403 or print STDERR "error
: could
not spawn
: $!\n" and exit;
1405 my ($spawned, $pwd);
1409 qr'Enter passphrase: $',
1412 $fh->send("$phrase\n");
1422 $pwd = $fh->before();
1423 if ($pwd =~ /error: (.*?)$/)
1425 print STDERR "clivepass
: error
: $1\n";
1431 if ($pwd =~ /login: $opts{ytuser}=(.*?)$/);
1437 "error
: could
not spawn
$opts{clivepass
}\n";
1444 print STDERR "error
: clivepass
: expect timed out
\n";
1450 $opts{ytpass} = $pwd;
1455 system "stty
-echo
";
1457 chomp(my $pwd = <STDIN>);
1468 || File::Spec->catfile($workdir, "clive
-log");
1473 print STDERR "\nerror
: fork failed
: $!";
1478 print "continuing
in background
, pid
$pid.\n";
1479 print "output will be written to
$logfile.\n"
1480 unless $opts{quiet};
1486 my $mode = $opts{append} ? ">>" : ">";
1487 $logfile = "/dev/null
" if $opts{quiet};
1489 open STDOUT, "$mode", "$logfile"
1490 or die "error
: cannot redirect STDOUT
: $!";
1492 open STDERR, ">&STDOUT
"
1493 or die "error
: cannot dup STDOUT
: $!";
1498 $stream[0] = 1; # set flag
1503 print STDERR "error
: fork failed
: $!\n";
1507 my $cmd = $opts{streamexec};
1508 $cmd =~ s/%i/"$curr_fpath"/g;
1513 $stream[1] = $child;
1516 sub fetch_liveleak_playlist
1518 my $playlist_url = shift;
1520 print "done
.\nfetch playlist xspf
..."
1521 unless $opts{quiet};
1524 open my $fh, ">", \$playlist;
1526 $curl->setopt(CURLOPT_URL, $playlist_url);
1527 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1529 my $rc = $curl->perform;
1532 my ($xurl, $errmsg);
1536 # NOTE: XML::XSPF exists in CPAN but this should work just as well.
1537 # Parsing with XML::Simple results in errors due unescaped values.
1538 $playlist =~ tr{\n}//d;
1540 if $playlist =~ /<location>(.*?)<\/location>/;
1544 $errmsg = $curl->strerror($rc) . " (http
/$rc)";
1547 $errmsg = "location tag
not found
" if !$xurl && !$errmsg;
1548 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1553 sub fetch_liveleak_config
1555 my $config_url = shift;
1557 print "done
.\nfetch config xml
..."
1558 unless $opts{quiet};
1561 open my $fh, ">", \$config;
1564 $curl->setopt(CURLOPT_HEADER, 0);
1565 $curl->setopt(CURLOPT_URL, $config_url);
1566 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1568 my $rc = $curl->perform;
1571 my ($xurl, $errmsg);
1574 if ($config =~ /<file>(.*?)<\/file>/)
1576 $xurl = fetch_liveleak_playlist($1);
1580 $errmsg = "playlist url
not found
";
1585 $errmsg = $curl->strerror($rc) . " (http
/$rc)\n";
1588 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1593 sub fetch_sevenload_configxml
1595 my ($conf_url, $response_fh) = @_;
1597 print "done
.\nfetch config xml
..."
1598 unless $opts{quiet};
1601 open my $conf_fh, ">", \$conf_xml;
1604 $curl->setopt(CURLOPT_HEADER, 0);
1605 $curl->setopt(CURLOPT_URL, $conf_url);
1606 $curl->setopt(CURLOPT_WRITEDATA, $conf_fh);
1608 my $rc = $curl->perform;
1612 $curl->setopt(CURLOPT_HEADER, 1);
1613 $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
1619 GrabXurl => qr|<location seeking="yes
">(.*?)</location>|,
1620 GrabID => qr|item id="(.*?
)"|,
1623 if $conf_xml =~ /$re{GrabID}/;
1625 if $conf_xml =~ /$re{GrabXurl}/;
1629 print STDERR "\nerror
: "
1630 . $curl->strerror($rc)
1633 return ($xurl, $id);
1636 sub fetch_cctv_space_config
1638 my ($conf_url, $resp_fh) = @_;
1640 print "done
.\nfetch config file
..."
1641 unless $opts{quiet};
1644 open my $fh, ">", \$conf;
1647 $curl->setopt(CURLOPT_HEADER, 0);
1648 $curl->setopt(CURLOPT_URL, $conf_url);
1649 $curl->setopt(CURLOPT_WRITEDATA, $fh);
1651 my $rc = $curl->perform;
1654 my ($xurl, $errmsg);
1657 my $re = qr|"url
":"(.*?
)"|;
1660 $xurl = "http
://v
.cctv
.com
/flash
/$1";
1664 $errmsg = "extraction url
not found
";
1669 $errmsg = $curl->strerror($rc) . " http
/$rc\n";
1672 print STDERR "\nerror
: $errmsg\n" if $errmsg;
1674 # Re-enable: header, reset WRITEDATA, the above overrides the
1675 # original settings.
1676 $curl->setopt(CURLOPT_HEADER, 0);
1677 $curl->setopt(CURLOPT_WRITEDATA, $resp_fh);
1686 my ($scheme, $authority, $path, $query, $fragment) = $uri =~
1687 m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
1689 # Extract the domain from the URL.
1690 my @a = split(/\./, $authority);
1695 sub title_to_filename
1700 s/(youtube|video|liveleak.com|sevenload|dailymotion|cctv.com)//gi;
1704 my $r = $opts{cclass} || qr|\w|;
1705 $title = join('', $title =~ /$r/g);
1707 my $fn = $opts{fnfmt} || "%t_%d_%i.%s";
1708 my $timestamp = POSIX::strftime("%F %T", localtime);
1710 my @a = strdomain($entry{page_url});
1714 "%s" => $entry{file_suffix},
1715 "%d" => $a[scalar @a - 2], # Without the TLD.
1716 "%i" => $entry{video_id},
1717 "%D" => (split(/ /, $timestamp))[0],
1718 "%T" => (split(/ /, $timestamp))[1],
1722 my $m = join('|', keys %h);
1723 $fn =~ s/($m)/$h{$1}/ig;
1728 sub newname_if_exists
1730 my ($path, $orig, $new) = (shift, shift);
1732 for (my $i = 1 ; ; $i++)
1734 $new = File::Spec->catfile($path, "$orig.$i");
1738 my ($vol, $dir, $fn) = File::Spec->splitpath($new);
1745 my %e = map_entry(shift);
1748 $opted_mods{IOPager}
1750 : decode_utf8($e{page_title});
1754 "%i" => $e{video_id},
1755 "%l" => $e{file_length},
1756 "%m" => sprintf("%.2f
", $e{file_length} / MBDIV),
1757 "%u" => $e{page_url},
1759 "%D" => (split(/ /, $e{time_stamp}))[0],
1760 "%T" => (split(/ /, $e{time_stamp}))[1],
1761 "%S" => $e{time_stamp},
1764 my $m = join('|', keys %h);
1765 $s =~ s/($m)/$h{$1}/ig;
1773 File::Path::mkpath([$CONFIGDIR], 0, 0700);
1775 $cache_db = tie %cache, "BerkeleyDB
::Hash
",
1776 -Filename => $CACHEFILE,
1777 -Flags => BerkeleyDB->DB_CREATE
1778 or die "error
: cannot
open $CACHEFILE: $!\n";
1783 IO::Pager->new(*STDOUT)
1784 if $opted_mods{IOPager};
1786 my $fmt = $opts{showfmt} || SHOWFMT_DEFAULT;
1789 require Digest::SHA;
1793 grep_cache(); # Stores matches => @queue
1794 push @entries, format_show($fmt, Digest::SHA::sha1_hex($_))
1799 push @entries, format_show($fmt, $_) foreach (sort keys %cache);
1802 print STDOUT "$_\n" foreach sort @entries;
1805 if $opted_mods{IOPager};
1807 if ($opts{grep} and $opts{delete} and scalar @queue > 0)
1809 print "Confirm
delete (y
/N
):";
1814 delete $cache{Digest::SHA::sha1_hex($_)} foreach (@queue);
1822 unlink $CACHEFILE if -e $CACHEFILE;
1835 my @values = split(/;/, $cache{$key});
1838 file_suffix file_length file_format page_title
1839 page_url time_stamp video_id xurl
1840 ); # Order matters. See also save_entry.
1843 return map { $_ => $values[$i++] } @keys;
1848 %entry = map_entry
($hash);
1849 $entry{page_title
} = decode_utf8
($entry{page_title
});
1851 #while (my ($key, $value) = each(%entry)) { print "$key => $value\n"; } die;
1858 $entry{time_stamp
} = POSIX
::strftime
("%F %T", localtime);
1860 push @values, $entry{$_} foreach sort keys %entry;
1862 $cache{$hash} = join(';', @values);
1863 $cache_db->db_sync();
1873 my $fmt = $opts{showfmt
} || SHOWFMT_DEFAULT
;
1875 foreach (sort keys %cache)
1877 my @e = split(/;/, $cache{$_});
1882 if ($opts{show
}) { push @queue, $e[4]; }
1883 else { delete $cache{$_}; }
1885 else { push @queue, $e[4]; } # 4=URL
1889 if $opts{delete} and not $opts{show
};
1895 $$url =~ s!/v/!/watch?v=!i; # youtube
1896 $$url =~ s!googleplayer.swf!videoplay!i; # googlevideo
1897 $$url =~ s!/pl/!/videos/!i; # sevenload
1898 $$url =~ s!/e/!/view?i=!i; # liveleak
1903 return if !$opts{exec};
1904 if ($opts{exec} !~ /[;+]$/)
1906 print "error: --exec expression must be terminated "
1907 . "by either ';' or '+'\n";
1914 return if !$opts{exec};
1915 if ($opts{exec} =~ /;$/)
1917 foreach (@exec_files)
1919 my $cmd = $opts{exec};
1920 $cmd =~ s/%i/"$_"/g;
1927 my $cmd = sprintf("%s ", $opts{exec});
1930 $cmd .= sprintf('"%s" ', $_) foreach (@exec_files);
1937 print "<?xml version=\"1.0\"?>\n<queue>\n"
1938 if $opts{emitxml
} and @emit_queue;
1940 require URI
::Escape
;
1942 foreach (@emit_queue)
1947 while (my ($key, $value) = each(%$_))
1949 $value = URI
::Escape
::uri_escape
($value)
1951 or $key eq 'page_url';
1952 print " <$key>$value</$key>\n";
1954 print " </video>\n";
1956 elsif ($opts{emitcsv
})
1958 printf qq/csv:"%s","%s","%s","%.2fMB",/
1959 . qq/"%s","%s","%s","%s","%s","%s"\n/,
1960 $_->{page_url
}, $_->{xurl
}, $_->{fn
},
1961 $_->{file_length
} / MBDIV
, $_->{file_length
},
1962 $_->{video_id
}, $_->{time_stamp
}, $_->{page_title
},
1963 $_->{cont_from
}, $_->{remaining
};
1967 if $opts{emitxml
} and @emit_queue;
1972 print "$re_hosts{$_}\n" foreach (keys %re_hosts);
1978 my $perl_v = sprintf("--with-perl=%vd-%s", $^V
, $^O
);
1980 "clive version %s with WWW::Curl version "
1981 . "$WWW::Curl::VERSION.\n"
1982 . "Copyright (c) 2007-2009 Toni Gundogdu "
1983 . "<legatvs\@gmail.com>.\n\n",
1988 while (my ($key, $value) = each(%opted_mods))
1990 $str .= sprintf("--with-$key=%s ", $value ?
"yes" : "no");
1991 $str .= "\n" if (++$i % 2 == 0);
1994 "\nclive is licensed under the ISC license which is functionally\n"
1995 . "equivalent to the 2-clause BSD licence.\n"
1996 . "\tReport bugs: <http://code.google.com/p/clive/issues/>\n";
2005 clive [options]... [URL]...
2009 -h, --help print help and exit
2010 -v, --version print version and exit
2011 --hosts print supported hosts and exit
2012 -b, --background go to background after startup
2013 -e, --emit-csv emit video details as csv to stdout
2014 -E, --emit-xml emit video details as csv to stdout
2015 -V, --clivepass=PATH path to clivepass
2017 -U, --agent=STRING identify as STRING to http server
2018 -y, --proxy=ADDR use ADDR for http proxy
2019 -X, --no-proxy do not use http proxy
2021 -R, --renew renew cache entry for visited url
2022 -s, --show dump cache entries to stdout
2023 -H, --show-format=STRING format dumped cache entries
2024 -g, --grep=PATTERN grep cache entries for PATTERN
2025 -i, --ignore-case ignore case-differences with --grep
2026 -D, --delete delete matched entries from cache
2027 -C, --clear clear cache of all entries
2028 Logging and Input Options:
2029 -o, --output=LOGFILE log messages to LOGFILE
2030 -a, --append=LOGFILE append to LOGFILE
2031 -d, --debug print libcurl debug messages
2032 -q, --quiet turn off all output
2033 -r, --recall recall last url batch
2034 -T, --savebatch=FILE save url batch to FILE
2035 -p, --paste paste input from clipboard
2037 -O, --output-video=FNAME write video to file
2038 -n, --no-extract do not extract any videos
2039 -c, --continue continue partially downloaded file
2040 -W, --overwrite overwrite existing video file
2041 -G, --progress=TYPE use progress indicator TYPE
2042 -u, --youtube-user=UNAME youtube username
2043 -t, --youtube-pass=PASSW youtube password
2044 -L, --no-login do not log into youtube
2045 -S, --savedir=DIR save video files to DIR
2046 -f, --format=FORMAT extract video FORMAT
2047 -l, --cclass=CLASS use CLASS to filter titles
2048 -N, --filename-format=STR use STR to construct output filename
2049 -x, --exec=COMMAND execute COMMAND subsequently
2050 --stream-exec=COMMAND stream COMMAND to be executed
2051 --stream=PERCENT execute stream command when transfer reaches %