clive

   1 #!/usr/bin/env perl
   2 # -*- coding: ascii -*-
   3 ###########################################################################
   4 # clive, the non-interactive video extraction utility
   5 # Copyright (C) 2007,2008 Toni Gundogdu.
   6 #
   7 # clive is free software: you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation, either version 3 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # clive is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with clive.  If not, see <http://www.gnu.org/licenses/>.
  19 ###########################################################################
  20
  21 # Keep it simple.
  22
  23 use strict;
  24 use warnings;
  25
  26 binmode(STDOUT, ":utf8");
  27
  28 use HTML::TokeParser;
  29 use WWW::Curl::Easy;
  30 use Config::Tiny;
  31 use URI::Escape;
  32 use BerkeleyDB;
  33 use IO::Pager;
  34
  35 # Core modules:
  36 use Digest::SHA qw(sha1_hex);
  37 use POSIX qw(strftime);
  38 use Getopt::Long;
  39 use XML::Simple;
  40 use File::Path;
  41 use File::Spec;
  42 use Pod::Usage;
  43 use Encode;
  44 use Cwd;
  45
  46 # Check for non-essential modules: set flags that indicate their availability
  47 my %optional_mods = (Clipboard => 1);
  48 eval "use Clipboard;"; $optional_mods{Clipboard}=0 if $@;
  49
  50 my $VERSION     = "2.0beta1";
  51 my $HOMEDIR     = $ENV{HOME} or die "error: HOME environment variable not set";
  52 my $CONFIGDIR   = File::Spec->catfile(  $HOMEDIR, ".config/clive");
  53 my $CONFIGFILE  = File::Spec->catfile($CONFIGDIR, "config");
  54 my $CACHEFILE   = File::Spec->catfile($CONFIGDIR, "cache");
  55 my $RECALLFILE  = File::Spec->catfile($CONFIGDIR, "recall");
  56
  57 my %opts;           # Holds runtime options
  58 my @queue;          # Holds input URLs
  59 my $workdir=getcwd; # Holds startup workdir
  60 my $logfile;        # Holds path to logfile (--output-file, --append-file)
  61 my $curl;           # Holds the curl handle: reused throughout lifespan
  62 my $cache_db;       # Holds the handle to cache BDB
  63 my %cache;          # Holds the handle to cache BDB (tied hash)
  64 my $hash;           # Hash (SHA1) of the current URL
  65 my %entry;          # Multi-purpose video (cache) record (hold/read/write)
  66 my $youtube_on=0;   # Flag: Whether logged into Youtube
  67 my $last_bspaces;   # Progress: Keeps count of the last printed backspaces
  68 my $curr_fn;        # Progress: Holds the name of the current video file
  69 my $time_started;   # Progress: Holds transfer started time
  70 my $last_eta;       # Progress: Holds last saved ETA for file transfer
  71
  72 my $default_showfmt # Default --show format
  73     = qq/%D: "%t" | %mMB/;
  74
  75 my %re_hosts = (    # Precompiled regex used to identify the host
  76 IsYoutube   => qr|\Qyoutube.com\E|i,    IsGoogle => qr|\Qvideo.google.\E|i,
  77 IsSevenload => qr|\Qsevenload.com\E|i,  IsBreak  => qr|\Qbreak.com\E|i,
  78 IsMetacafe  => qr|\Qmetacafe.com\E|i
  79 );
  80
  81 # Parse config
  82 my $c = Config::Tiny->read($CONFIGFILE);
  83 %opts = (
  84     agent   => $c->{http}->{agent},
  85     proxy   => $c->{http}->{proxy},
  86     maxspeed=> $c->{http}->{maxspeed},
  87     minspeed=> $c->{http}->{minspeed},
  88     savedir => $c->{output}->{savedir},
  89     cclass  => $c->{output}->{cclass},
  90     fnfmt   => $c->{output}->{file},
  91     showfmt => $c->{output}->{show},
  92     ytuser  => $c->{youtube}->{user},
  93     ytpass  => $c->{youtube}->{pass},
  94     play    => $c->{commands}->{play},
  95 );
  96
  97 # Parse cmdline
  98 # Define those not read from config and init with defaults
  99 $opts{quiet}        = 0;
 100 $opts{paste}        = 0;
 101 $opts{format}       = 'flv';
 102 $opts{extract}      = 1;
 103 $opts{renew}        = 0;
 104 $opts{clear}        = 0;
 105 $opts{recall}       = 0;
 106 $opts{login}        = 1;
 107 $opts{show}         = 0;
 108 $opts{grep}         = undef;
 109 $opts{case}         = 1;
 110 $opts{delete}       = 0;
 111 $opts{background}   = 0;
 112 $opts{output}       = undef;
 113 $opts{append}       = undef;
 114 $opts{progress}     = 1;
 115 $opts{debug}        = 0;
 116 $opts{help}         = 0;
 117 $opts{manual}       = 0;
 118 $opts{version}      = 0;
 119
 120 Getopt::Long::Configure("bundling");
 121 GetOptions(\%opts,
 122     'debug|d',          'help|h',           'manual|m',     'version|v',
 123     'paste|x',          'show|s',           'delete|D',     'clear|C',
 124     'continue|c',       'renew|R',          'recall|r',     'format|f=s',
 125     'output|o=s',       'append|a=s',       'background|b', 'quiet|q',
 126     'grep|g=s',
 127 #'maxspeed!',    'minspeed!',
 128     # Since '$longopt!|$shortopt' is a no-no.
 129     'ignore-case|i' => sub { $opts{case}    = 0; },
 130     'noextract|n'   => sub { $opts{extract} = 0; },
 131     'noplay|P'      => sub { $opts{play}    = 0; },
 132     'nologin|L'     => sub { $opts{login}   = 0; },
 133     'noproxy|X'     => sub { $opts{proxy}   = ""; },
 134     'noprogress|G'  => sub { $opts{progress}= 0; },
 135 ) or pod2usage(1);
 136
 137 print_version() if $opts{version};
 138 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
 139 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
 140
 141 init_cache();
 142
 143 if      ( $opts{clear} )    { clear_cache(); }
 144 elsif   ( $opts{show} )     { show_cache(); }
 145
 146 get_queue();
 147
 148 select STDERR; $| = 1; # Make unbuffered
 149 select STDOUT; $| = 1;
 150
 151 daemonize() if $opts{background};
 152
 153 process_queue();
 154
 155 free_cache();
 156
 157
 158 ## Subroutines: Connection
 159
 160 sub set_curl_opts { # Set common curl options for handle
 161     $curl->setopt(CURLOPT_USERAGENT,
 162         $opts{agent} ? $opts{agent} : "Mozilla/5.0");
 163
 164     $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
 165     $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
 166     $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
 167     $curl->setopt(CURLOPT_AUTOREFERER, 1);
 168     $curl->setopt(CURLOPT_HEADER, 1);
 169     $curl->setopt(CURLOPT_NOBODY, 0);
 170
 171     # NOTE: No effect. Bug in WWW::Curl::Easy?
 172     $curl->setopt(CURLOPT_MAX_RECV_SPEED_LARGE, $opts{maxspeed})
 173         if $opts{maxpseed};
 174
 175     $curl->setopt(CURLOPT_LOW_SPEED_LIMIT, $opts{minspeed})
 176         if $opts{minspeed};
 177 }
 178
 179 sub auth_youtube { # Log into Youtube
 180     print "=> Youtube: Attempting to login as $opts{ytuser} ..."
 181         unless $opts{quiet};
 182
 183     my $response = "";
 184     open my $fh, ">", \$response;
 185
 186     my $login_url = "http://youtube.com/login?current_form=loginform"
 187         ."&username=$opts{ytuser}&password=$opts{ytpass}&action_login=log+in";
 188
 189     $curl->setopt(CURLOPT_URL, $login_url);
 190     $curl->setopt(CURLOPT_COOKIEFILE, ""); # Enable cookies from here on
 191     $curl->setopt(CURLOPT_ENCODING, ""); # Supported encodings
 192     $curl->setopt(CURLOPT_WRITEDATA, $fh);
 193
 194     my $rc = $curl->perform;
 195     my $errmsg;
 196
 197     if ( $rc == 0 ) {
 198         foreach ( $response ) {
 199             $errmsg = "error: incorrect login for $opts{ytuser}" and last
 200                 if /login was incorrect/i;
 201         }
 202     } else {
 203         $errmsg = "error: ".$curl->strerror($rc)." (http/$rc)";
 204     }
 205     close $fh;
 206
 207     print STDERR "\n$errmsg\n" and exit if $errmsg;
 208
 209     print "done.\n=> Youtube: Bypassing age check ..." unless $opts{quiet};
 210     $curl->setopt(CURLOPT_COOKIE, "is_adult=" . uc( sha1_hex(rand()) ) );
 211     print "Done.\n" unless $opts{quiet};
 212
 213     $youtube_on = 1;
 214 }
 215
 216
 217 # Subroutines: Queue
 218
 219 sub process_queue {
 220     $curl = WWW::Curl::Easy->new;
 221     set_curl_opts();
 222
 223     foreach ( @queue ) {
 224         $hash = sha1_hex($_);
 225
 226         my ($rc, $rfh, $response) = fetch_page($_);
 227         my $errmsg;
 228
 229         # TODO: Clean up
 230         if ( $rc == 0 ) {
 231             $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
 232             if ( $rc == 0 or $rc == 200) {
 233                 if ( ! defined( $entry{page_url} ) ) {
 234                     next if process_page($_, \$response, $rfh) == -1;
 235                 }
 236                 if ( $entry{xurl} ) {
 237                     my ($rc, $content_type) = query_video_length();
 238                     if ( $rc == 0 ) {
 239                         my ($rc, $path) = extract_video($content_type)
 240                             if $content_type;
 241                         if ( $opts{play} and $rc == 0 ) {
 242                             print "=> Play: $path\n";
 243                             my $cmd = $opts{play};
 244                             $cmd =~ s/%i/"$path"/;
 245                             system($cmd.">/dev/null") == 0
 246                                 or die "system exited with $?";
 247                         }
 248                     }
 249                 }
 250             } else {
 251                 $errmsg = $curl->strerror($rc)." (http/$rc)";
 252             }
 253         } else {
 254             $errmsg = $curl->strerror($rc)." (http/$rc)";
 255         }
 256         close $rfh;
 257         print STDERR "\n==> error: $errmsg\n" if $errmsg;
 258     }
 259 }
 260
 261 sub fetch_page {
 262     my ($url, $from_cache, $response, $rc) = (shift, 0, "", 0);
 263
 264     open my $fh, ">", \$response;
 265
 266     # Log into Youtube if username and password are defined
 267     if ( $opts{ytuser} and $opts{ytpass} and $opts{login} ) {
 268         auth_youtube() if ! $youtube_on and $url =~ /$re_hosts{IsYoutube}/;
 269     }
 270
 271     if ( $cache{$hash} ) {
 272         fetch_entry($hash); # Make sure cached "format" matches with options
 273         $from_cache = 1 if $opts{format} eq $entry{file_format};
 274     }
 275
 276     $from_cache = 0 if $opts{renew};
 277
 278     printf "%s $url ...", ! $from_cache ? "Fetching":"Caching"
 279         unless $opts{quiet};
 280
 281     if ( ! $from_cache ) {
 282         %entry = ();
 283         $curl->setopt(CURLOPT_URL, $url);
 284         $curl->setopt(CURLOPT_ENCODING, ""); # Supported encodings
 285         $curl->setopt(CURLOPT_WRITEDATA, $fh);
 286         $rc = $curl->perform;
 287     }
 288
 289     return ($rc, $fh, $response);
 290 }
 291
 292 sub process_page {
 293     my ($url, $response_ref, $response_fh) = @_;
 294     print "done.\n=> Processing page ..." unless $opts{quiet};
 295
 296     $$response_ref =~ tr{\n}//d;
 297
 298     my $p = HTML::TokeParser->new($response_ref);
 299     $p->get_tag("title");
 300     my $title = $p->get_trimmed_text;
 301
 302     my ($xurl, $id);
 303     if      ( $url =~ /$re_hosts{IsYoutube}/ ) {
 304         ($xurl, $id) = handle_youtube($response_ref);
 305     } elsif ( $url =~ /$re_hosts{IsGoogle}/ ) {
 306         ($xurl, $id) = handle_google($response_ref);
 307     } elsif ( $url =~ /$re_hosts{IsSevenload}/ ) {
 308         ($xurl, $id, $title) = handle_sevenload($response_ref, $response_fh);
 309     } elsif ( $url =~ /$re_hosts{IsBreak}/ ) {
 310         ($xurl, $id, $title) = handle_break($response_ref);
 311     } elsif ( $url =~ /$re_hosts{IsMetacafe}/ ) {
 312         ($xurl, $id, $title) = handle_metacafe($response_ref);
 313     }
 314     return -1 if ! $xurl or ! $id or ! $title;
 315
 316     $title = decode_utf8($title); # sevenload, break grab title from elsewhere
 317     $title =~ tr{;}//d; # Cache values cannot contain ';'
 318
 319     $entry{page_url}      = $url;
 320     $entry{xurl}          = $xurl;
 321     $entry{page_title}    = $title;
 322     $entry{video_id}      = $id;
 323     $entry{file_format}   = $opts{format};
 324
 325     return 0;
 326 }
 327
 328 sub query_video_length {
 329     my ($content_type, $errmsg);
 330
 331     if ( ! $entry{file_length} ) {
 332         print "done.\n=> Querying file length ..." unless $opts{quiet};
 333
 334         $curl->setopt(CURLOPT_URL, $entry{xurl});
 335         # We're not interested in downloading the file. GET => HEAD request.
 336         $curl->setopt(CURLOPT_NOBODY, 1);
 337         my $rc = $curl->perform;
 338         # Reset HEAD => GET
 339         $curl->setopt(CURLOPT_HTTPGET, 1);
 340
 341         $entry{file_length} =
 342             $curl->getinfo(CURLINFO_CONTENT_LENGTH_DOWNLOAD);
 343
 344         $content_type           =
 345             $entry{file_suffix} =
 346             $curl->getinfo(CURLINFO_CONTENT_TYPE);
 347
 348         $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
 349
 350         # TODO: Clean up
 351         if ( $rc == 200 ) {
 352             my $content_ok = 0;
 353             if ( $content_type =~ m!video/(.*)! ) {
 354                 $entry{file_suffix} = $1;
 355                 if ( $content_type =~ /(.*)-(.*)$/ ) {
 356                     $entry{file_suffix} = $2;
 357                 } $content_ok = 1;
 358             # Break and Metacafe return "text/plain" for Content-Type
 359             } elsif ( $content_type =~ m!text/plain! ) {
 360                 if ( $opts{format} eq "flv" ) {
 361                     if ( $entry{page_url} =~ /$re_hosts{IsBreak}/
 362                      or $entry{page_url} =~ /$re_hosts{IsMetacafe}/ ) {
 363                         $entry{file_suffix} = "flv";
 364                         $content_ok = 1;
 365                     }
 366                 }
 367             }
 368             $errmsg = "expected different content-type, "
 369                 . "received \"$content_type\"" unless $content_ok;
 370         } else {
 371             $errmsg = "server returned HTTP/$rc";
 372         }
 373     } else { # Construct content-type from cache
 374         $content_type = "video/$entry{file_suffix}";
 375     }
 376
 377     unless ( $opts{quiet} ) {
 378         if ( ! $errmsg ) { print "done.\n"; }
 379         else { print STDERR "\n==> error: $errmsg\n"; }
 380     }
 381
 382     return ($errmsg ? -1:0, $content_type);
 383 }
 384
 385 sub extract_video {
 386     my $content_type= shift;
 387     my $fn          = title_to_filename($entry{page_title});
 388     my $path        = File::Spec->catfile( $opts{savedir} || $workdir, $fn );
 389     my $filemode    = ">";
 390     my $cont_from   = 0;
 391     my $remaining   = $entry{file_length};
 392     my $size        = -s $path;
 393     my $rc          = 0;
 394     my $errmsg;
 395
 396     # We have everything for cache. Add/update the bdb entry.
 397     save_entry($hash);
 398
 399     $curl->setopt(CURLOPT_ENCODING, "identity"); # Disable
 400
 401     if ( $size ) {
 402         if ( $size == $entry{file_length} and $opts{extract} ) {
 403             print "=> Refusing to extract. "
 404                 . "localfile length matches remotefile length.\n";
 405             return (0, $path);
 406         }
 407         if ( $size < $entry{file_length} and $opts{continue} ) {
 408             $cont_from  = $size;
 409             $filemode   = ">>";
 410             $remaining  = ($entry{file_length} - $cont_from);
 411         } else {
 412             $path = newname_if_exists( $opts{savedir} || $workdir, $fn );
 413         }
 414     }
 415
 416     unless ( $opts{quiet} ) {
 417         print "=> File: $fn\n" if ( ! $opts{extract} );
 418         print "=> Length: $entry{file_length} ";
 419         printf"(%.2fMB)  ",$entry{file_length}/1024/1024 if $entry{file_length};
 420         printf "From: %u (Left: %u)  ", $cont_from, $remaining if $cont_from;
 421         printf "[$content_type]" if $content_type;
 422         print "\n";
 423     }
 424
 425     if ( $rc == 0 ) { #
 426         # -1 = Disable subsequent play with --noextract
 427         return (-1, $path) unless $opts{extract};
 428
 429         if ( open my $fh, "$filemode$path" ) {
 430             $curl->setopt(CURLOPT_URL, $entry{xurl});
 431             $curl->setopt(CURLOPT_HEADER, 0); # Disable
 432             $curl->setopt(CURLOPT_RESUME_FROM, $cont_from) if $cont_from;
 433             $curl->setopt(CURLOPT_WRITEDATA, $fh);
 434
 435             unless ( $opts{quiet} ) {
 436                 $curl->setopt(CURLOPT_PROGRESSFUNCTION, \&progress_callback);
 437                 $curl->setopt(CURLOPT_NOPROGRESS, 0);
 438                 $curr_fn        = $fn;
 439                 $last_bspaces   = 0;
 440                 $time_started   = time;
 441                 $last_eta       = '';
 442             }
 443
 444             $rc = $curl->perform;
 445             close $fh;
 446
 447             # Reset
 448             $curl->setopt(CURLOPT_HEADER, 1);
 449
 450             if ( $rc == 0 ) { $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE); }
 451             else { $errmsg = $curl->strerror($rc)." (http/$rc)"; }
 452         } else {
 453             $errmsg = "$path: $!";
 454             $rc = -1; # Disable subsequent play
 455         }
 456     } else {
 457         $errmsg = $curl->strerror($rc)." (http/$rc)";
 458     }
 459
 460     if ( $errmsg ) { print STDERR "\n==> error: $errmsg\n"; }
 461     else { print "\n==> Closed with HTTP/$rc.\n" unless $opts{quiet}; }
 462
 463     return ($errmsg ? -1:0, $path);
 464 }
 465
 466 sub get_queue {
 467     if ( $opts{recall} and -e $RECALLFILE ) {
 468         open my $fh, "<$RECALLFILE" or die "error: $RECALLFILE: $!";
 469         parse_input($_) while ( <$fh> );
 470         close $fh;
 471     }
 472
 473     if ( $opts{paste} ) {
 474         print STDERR "error: Clipboard module not found" and exit
 475             unless $optional_mods{Clipboard};
 476         parse_input($_) foreach Clipboard->paste();
 477     }
 478
 479     parse_input($_) foreach @ARGV;
 480     grep_cache() if $opts{grep};
 481     read_stdin() unless @queue;
 482
 483     my %h = map { $_, 1 } @queue; # Remove duplicates
 484     @queue = keys %h;
 485
 486     open my $fh, ">$RECALLFILE" or die "error: $RECALLFILE: $!";
 487     print $fh "$_\n" foreach @queue;
 488     close $fh;
 489 }
 490
 491 sub read_stdin {
 492     parse_input($_) while ( <STDIN> );
 493 }
 494
 495 sub parse_input {
 496     my $url = shift;
 497
 498     return if $url =~ /^$/;
 499     chomp $url;
 500
 501     if ( $url =~ /&srcurl=(.*?)&/ ) { # GVideo: one of many redirects
 502         unless ( $opts{quiet} ) {
 503             print "Found redirect in  ...".(split /&/,$url)[0]."\n";
 504             print "=> Using the redirect URL instead\n";
 505         }
 506         $url = uri_unescape($1);
 507     }
 508
 509     # Insert http:// if not found
 510     if ( $url !~ /^http:\/\//i ) { $url = "http://$url"; }
 511
 512     # Translate embedded URL to video page URL
 513     $url =~ s{/v/}{/watch?v=}ig; # Youtube
 514     $url =~ s{\Q/googleplayer.swf?docid=}{/videoplay?docid=\E}ig; # GVideo
 515
 516     # Remove params from the URL  NOTE: May require tweaking
 517     $url = (split /&/, $url) [0];
 518
 519     foreach my $re ( %re_hosts ) {
 520         push @queue,$url and return 0 if $url =~ /$re/;
 521     }
 522     print STDERR "error: nosupport: $url\n";
 523     return -1;
 524 }
 525
 526
 527 # Subroutines: Video page handlers
 528
 529 sub handle_youtube {
 530     my ($response_ref, $xurl) = (shift);
 531
 532     my %re = (
 533         GrabID  => qr/"video_id": "(.*?)"/,
 534         GrabT   => qr/"t": "(.*?)"/
 535     );
 536
 537     my $id = $1 if $$response_ref =~ /$re{GrabID}/;
 538     my $t  = $1 if $$response_ref =~ /$re{GrabT}/;
 539
 540     if ( $id and $t ) {
 541         $xurl = "http://youtube.com/get_video?video_id=$id&t=$t";
 542         my $fmt;
 543         if      ( $opts{format} eq  "mp4" ) { $fmt = 18; }
 544         elsif   ( $opts{format} eq "3gpp" ) { $fmt = 17; }
 545         elsif   ( $opts{format} eq "xflv" ) { $fmt =  6; }
 546         $xurl .= "&fmt=$fmt" if $fmt;
 547     } else {
 548         printf STDERR "\nerror: failed to extract &%s\n", $id ? "t":"video_id";
 549     }
 550     return ($xurl, $id);
 551 }
 552
 553 sub handle_google {
 554     my $response_ref = shift;
 555
 556     my %re = (
 557         GrabRedirect => qr|lfRedirect\('(.*?)'|,
 558         GrabVideoURL => qr|\Qgoogleplayer.swf?videoUrl\x3d\E(.*?)\Q\x26|,
 559         GrabID       => qr|docid: '(.*?)'|,
 560         GrabMP4      => qr|\Qhref="http://vp.\E(.*?)"|,
 561     );
 562
 563     my $redir  = $1 if $$response_ref =~ /$re{GrabRedirect}/;
 564     my $xurl   = uri_unescape($1) if $$response_ref =~ /$re{GrabVideoURL}/;
 565     my $id     = $1 if $$response_ref =~ /$re{GrabID}/;
 566     my $mp4    = $1 if $$response_ref =~ /$re{GrabMP4}/;
 567
 568     if ( $redir ) {
 569         $redir =~ s{\\x3d}{=};
 570         push @queue, $redir;
 571         print "Found a redirect to another host. Pushed into queue.\n"
 572             unless $opts{quiet};
 573     } else {
 574         $xurl = $mp4 if ( $mp4 and $opts{format} eq "mp4" );
 575         print STDERR "\nerror: extraction url not found\n" unless $xurl;
 576     }
 577     return ($xurl, $id);
 578 }
 579
 580 sub handle_sevenload {
 581     my ($response_ref, $response_fh) = @_;
 582
 583     my %re       = ( GrabConfigPath => qr|configPath=(.*?)"| );
 584     my $confpath = uri_unescape($1) if $$response_ref =~ /$re{GrabConfigPath}/;
 585
 586     my ($id, $xurl, $title);
 587     if ( $confpath ) {
 588         ($xurl, $id, $title) =
 589             fetch_sevenload_configxml($confpath, $response_fh);
 590     } else {
 591         print STDERR "\nerror: configPath not found\n";
 592     }
 593     return ($xurl, $id, $title);
 594 }
 595
 596 sub handle_break {
 597     my $response_ref = shift;
 598
 599     my %re = (
 600         GrabTitle    => qr|id="vid_title" content="(.*?)"|,
 601         GrabID       => qr|ContentID='(.*?)'|,
 602         GrabFilePath => qr|ContentFilePath='(.*?)'|,
 603         GrabFileName => qr|FileName='(.*?)'|
 604     );
 605
 606     my $title   = $1 if $$response_ref =~ /$re{GrabTitle}/;
 607     my $id      = $1 if $$response_ref =~ /$re{GrabID}/;
 608     my $fpath   = $1 if $$response_ref =~ /$re{GrabFilePath}/;
 609     my $fname   = $1 if $$response_ref =~ /$re{GrabFileName}/;
 610
 611     my ($xurl, $errmsg);
 612     if ( $fpath and $fname ) {
 613         $xurl = "http://media1.break.com/dnet/media/$fpath/$fname";
 614         my $fmt = $opts{format};
 615         $fmt = 'flv' if not grep /$opts{format}/, ('flv','wmv');
 616         $xurl .= ".$fmt";
 617     } else {
 618         $errmsg = "failed to extract ContentFilePath" if ! $fpath;
 619         $errmsg = "failed to extract FileName" if ! $fname and ! $errmsg;
 620     }
 621
 622     $errmsg = "failed to extract title" if ! $title and ! $errmsg;
 623     $errmsg = "failed to extract id"    if ! $id and ! $errmsg;
 624     print STDERR "\nerror: " . $errmsg . "\n" if $errmsg;
 625
 626     return ($xurl, $id, $title);
 627 }
 628
 629 sub handle_metacafe {
 630     my $response_ref = shift;
 631
 632     my %re = (
 633         GrabTitle     => qr|"title":"(.*?)"|,
 634         GrabID        => qr|"itemID":"(.*?)"|,
 635         GrabItemFiles => qr|ItemFiles(.*?)"|,
 636         GrabVideoCDN  => qr|"videoCDNURL":"(.*?)"|
 637     );
 638
 639     my $title     = $1 if $$response_ref =~ /$re{GrabTitle}/;
 640     my $id        = $1 if $$response_ref =~ /$re{GrabID}/;
 641     my $itemfiles = $1 if $$response_ref =~ /$re{GrabItemFiles}/;
 642     my $videocdn  = $1 if $$response_ref =~ /$re{GrabVideoCDN}/;
 643
 644     my ($xurl, $errmsg);
 645     if ( $itemfiles and $videocdn ) {
 646         $itemfiles =~ tr{\\}//d;
 647         $videocdn  =~ tr{\\}//d;
 648         $xurl = $videocdn.$itemfiles;
 649     } else {
 650         $errmsg = "failed to extract ItemFiles" if ! $itemfiles;
 651         $errmsg = "failed to extract videoCDNURL" if ! $videocdn and ! $errmsg;
 652     }
 653
 654     $errmsg = "failed to extract title" if ! $title and ! $errmsg;
 655     $errmsg = "failed to extract itemID" if ! $id and ! $errmsg;
 656     print STDERR "\nerror: " . $errmsg . "\n" if $errmsg;
 657
 658     return ($xurl, $id, $title);
 659 }
 660
 661
 662 # Subroutines: LittleHelpers
 663
 664 sub daemonize {
 665     $logfile = $opts{append}
 666         || $opts{output}
 667         || File::Spec->catfile( $workdir, "clive-log" );
 668
 669     my $pid = fork;
 670     if ( $pid < 0 ) {
 671         print STDERR "\nfork failed: $!";
 672         exit 1;
 673     } elsif ( $pid != 0 ) {
 674         print "Continuing in background, pid $pid.\n";
 675         print "Output will be written to $logfile.\n" unless $opts{quiet};
 676         exit 0;
 677     }
 678
 679     chdir $workdir;
 680
 681     my $mode = $opts{append} ? ">>" : ">";
 682     $logfile = "/dev/null" if $opts{quiet};
 683
 684     open STDOUT, "$mode", "$logfile" or die "cannot redirect STDOUT: $!";
 685     open STDERR, ">&STDOUT" or die "cannot dup STDOUT: $!";
 686 }
 687
 688 sub fetch_sevenload_configxml {
 689     my ($conf_url, $response_fh) = @_;
 690     print "done.\n=> Fetching config XML..." unless $opts{quiet};
 691
 692     my $conf_xml = "";
 693     open my $conf_fh, ">", \$conf_xml;
 694
 695     $curl->setopt(CURLOPT_URL, $conf_url);
 696     $curl->setopt(CURLOPT_HEADER, 0);
 697     $curl->setopt(CURLOPT_WRITEDATA, $conf_fh);
 698
 699     my $rc = $curl->perform;
 700
 701     # Reset
 702     $curl->setopt(CURLOPT_HEADER, 1);
 703     $curl->setopt(CURLOPT_WRITEDATA, $response_fh);
 704
 705     close $conf_fh;
 706
 707     my ($id, $xurl, $title);
 708
 709     if ( $rc == 0 ) {
 710         my $xml = XMLin($conf_xml);
 711         #use Data::Dumper; print Dumper($xml);
 712         $title  = $xml->{playlists}{playlist}{items}{item}{title}; # Monstrous.
 713         $id     = $xml->{playlists}{playlist}{items}{item}{id};
 714         $xurl   = $xml->{playlists}{playlist}{items}{item}{videos}{video}{url};
 715     } else {
 716         print STDERR "\nerror: " . $curl->strerror($rc) . " (http/$rc)\n";
 717     }
 718
 719     my $errmsg;
 720     $errmsg = "failed to extract item title" if ! $title;
 721     $errmsg = "failed to extract item id"    if ! $id and ! $errmsg;
 722     print STDERR "\nerror: " . $errmsg . "\n" if $errmsg;
 723
 724     return ($xurl, $id, $title);
 725 }
 726
 727 sub title_to_filename {
 728     my $title = shift;
 729
 730     $title =~ s/youtube - //i; # Remove host specific strings from title
 731     $title =~ s/ video//i; # Breakcom
 732
 733     my $r = $opts{cclass} || qr|\w|;
 734     $title = join '', $title =~ /$r/g;
 735
 736     # Courtesy of:
 737     #   http://search.cpan.org/~gaas/URI-1.37/URI.pm#PARSING_URIs_WITH_REGEXP
 738     my ($scheme, $authority, $path, $query, $fragment) =
 739         m{(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?}o;
 740     # Extract the domain from the URL.
 741     my @a = split /\./, $authority;
 742
 743     my $fn          = $opts{fnfmt} || "%t-(%i)-[%d].%s";
 744     my $timestamp   = strftime("%F %T",localtime);
 745
 746     my %h = (
 747         "%t" => $title,
 748         "%s" => $entry{file_suffix},
 749         "%d" => $a[scalar @a-2], # Without the TLD.
 750         "%i" => $entry{video_id},
 751         "%D" => (split / /, $timestamp)[0],
 752         "%T" => (split / /, $timestamp)[1],
 753         "%S" => $timestamp,
 754     );
 755
 756     my $m = join '|', keys %h;
 757     $fn =~ s/($m)/$h{$1}/ig;
 758
 759     return $fn;
 760 }
 761
 762 sub newname_if_exists {
 763     my ($path, $orig, $new) = (shift, shift);
 764
 765     for ( my $i=1;; $i++ ) {
 766         $new = File::Spec->catfile( $path, "$orig.$i" );
 767         last if ! -e $new;
 768     }
 769     return $new;
 770 }
 771
 772 sub progress_callback {
 773     return 0 unless $opts{progress};
 774
 775     my ($clientp, $dltotal, $dlnow, $ultotal, $ulnow) = @_;
 776
 777     my $percent = 0;
 778     $percent = int ( $dlnow / $dltotal * 100 ) if $dlnow;
 779
 780     my $elapsed = time - $time_started;
 781     return 0 if $elapsed < 1.0;
 782
 783     my $rate = $dlnow / $elapsed;
 784
 785     my $eta = "--:--:--";
 786     if ( $rate > 0 ) {
 787         my $left = ( $dltotal - $dlnow ) / $rate;
 788         my $ss = $left % 60;
 789         my $mm = int( ( $left % 3600 ) / 60 );
 790         my $hh = int( $left / 3600 );
 791         if ( $hh > 99 ) { $eta = sprintf "%2dh%02dm", $hh, $mm; }
 792         else { $eta = sprintf "%2dh%02dm%02ds", $hh, $mm, $ss; }
 793     }
 794     return 0 if $eta eq $last_eta;
 795
 796     my $s = sprintf "%.50s%4.4g%%%8.1fKB/s%12s",
 797         $curr_fn, $percent, $rate/1024, $eta;
 798
 799     print "\b" x $last_bspaces . $s unless $opts{quiet};
 800     $last_bspaces = length(encode_utf8($s));
 801
 802     return 0;
 803 }
 804
 805 sub init_cache {
 806     mkpath( [$CONFIGDIR], 1, 0700 );
 807     $cache_db = tie %cache, "BerkeleyDB::Hash",
 808         -Filename => $CACHEFILE,
 809         -Flags => DB_CREATE
 810     or die "error: cannot open $CACHEFILE: $! $BerkeleyDB::Error\n";
 811 }
 812
 813 sub format_show {
 814     my $s   = shift;
 815     my %e   = map_entry(shift);
 816
 817     my %h   = (
 818         "%t" => $e{page_title},
 819         "%i" => $e{video_id},
 820         "%l" => $e{file_length},
 821         "%m" => sprintf("%.2f", $e{file_length}/1048576),
 822         "%u" => $e{page_url},
 823         "%x" => $e{xurl},
 824         "%D" => (split / /, $e{time_stamp})[0],
 825         "%T" => (split / /, $e{time_stamp})[1],
 826         "%S" => $e{time_stamp},
 827     );
 828
 829     my $m = join '|', keys %h;
 830     $s =~ s/($m)/$h{$1}/ig;
 831
 832     return $s;
 833 }
 834
 835 sub show_cache {
 836     IO::Pager->new(*STDOUT);
 837
 838     my $fmt = $opts{showfmt} || $default_showfmt;
 839     my @entries = ();
 840
 841     if ( $opts{grep} ) {
 842         grep_cache(); # Stores matches => @queue
 843         push @entries, format_show( $fmt, sha1_hex($_) )
 844             foreach ( @queue );
 845     } else {
 846         push @entries, format_show( $fmt, $_ )
 847             foreach ( sort keys %cache );
 848     }
 849     print STDOUT "$_\n" foreach sort @entries;
 850     close STDOUT;
 851
 852     if ( $opts{grep} and $opts{delete} and scalar @queue > 0 ) {
 853         print "Confirm delete (y/N):";
 854         $_ = lc <STDIN>;
 855         chomp;
 856         if ( lc $_ eq "y" ) { delete $cache{sha1_hex($_)} foreach ( @queue ); }
 857     }
 858     exit;
 859 }
 860
 861 sub clear_cache {
 862     unlink $CACHEFILE if -e $CACHEFILE;
 863     exit;
 864 }
 865
 866 sub free_cache {
 867     undef $cache_db;
 868     untie %cache;
 869 }
 870
 871 sub map_entry {
 872     my $key     = shift;
 873     my @values  = split /;/, $cache{$key};
 874
 875     my @keys = qw(
 876         file_suffix file_length file_format page_title
 877         page_url    time_stamp  video_id    xurl
 878     ); # Order matters. See also save_cache_entry.
 879
 880     my $i = 0;
 881     return map { $_ => $values[$i++] } @keys;
 882 }
 883
 884 sub fetch_entry {
 885     %entry = map_entry($hash);
 886     $entry{page_title} = decode_utf8($entry{page_title});
 887     #while (my ($key, $value) = each(%entry)) { print "$key => $value\n"; } die;
 888 }
 889
 890 sub save_entry {
 891     my @values;
 892
 893     $entry{time_stamp} = strftime("%F %T",localtime);
 894     push @values,$entry{$_} foreach sort keys %entry;
 895
 896     $cache{$hash} = join ';', @values;
 897     $cache_db->db_sync();
 898 }
 899
 900 sub grep_cache {
 901     my $g = $opts{case} ? qr|$opts{grep}| : qr|$opts{grep}|i;
 902     my $fmt = $opts{showfmt} || $default_showfmt;
 903     foreach ( sort keys %cache ) {
 904         my @e = split /;/, $cache{$_};
 905         if ( grep /$g/, @e ) {
 906             if   ( $opts{delete} ) {
 907                 if ( $opts{show} ) { push @queue,$e[4]; }
 908                 else { delete $cache{$_}; }
 909             }
 910             else { push @queue,$e[4]; } # 4=URL
 911         }
 912     }
 913     exit if $opts{delete} and not $opts{show};
 914 }
 915
 916 sub print_version {
 917     my $perl_v      = sprintf "%vd", $^V;
 918     my $clipboard_v = $optional_mods{Clipboard} ? $Clipboard::VERSION : "-";
 919 print
 920 "clive version $VERSION.  Copyright (C) 2007,2008 Toni Gundogdu.
 921
 922 Perl: $perl_v ($^O)
 923 Modules:
 924   * Config::Tiny/$Config::Tiny::VERSION\t\t* BerkeleyDB/$BerkeleyDB::VERSION
 925   * WWW::Curl/$WWW::Curl::VERSION\t\t* URI::Escape/$URI::Escape::VERSION
 926   * HTML::TokeParser/$HTML::TokeParser::VERSION\t* Clipboard/$clipboard_v
 927   * IO::Pager/$IO::Pager::VERSION
 928 Core modules:
 929   * POSIX/$POSIX::VERSION\t\t\t* Cwd/$Cwd::VERSION
 930   * Getopt::Long/$Getopt::Long::VERSION\t\t* Pod::Usage/$Pod::Usage::VERSION
 931   * File::Path/$File::Path::VERSION\t\t* File::Spec/$File::Spec::VERSION
 932   * Digest::SHA/$Digest::SHA::VERSION\t\t* Encode/$Encode::VERSION
 933
 934 See --manual for a list of the supported websites.
 935
 936 This program comes with ABSOLUTELY NO WARRANTY. You may redistribute copies of
 937 clive under the terms of the GNU General Public License as published by the
 938 Free Software Foundation, either version 3 of the License, or (at your option)
 939 any later version. You should have received a copy of the General Public License
 940 along with this program. If not, see http://www.gnu.org/licenses/.
 941 "; exit;
 942 }
 943
 944
 945 __END__
 946
 947 =head1 NAME
 948
 949 clive - the non-interactive video extraction utility
 950
 951 =head1 SYNOPSIS
 952
 953 clive [option]... [URL]...
 954
 955 =head1 DESCRIPTION
 956
 957 clive is an open source command-line utility for extracting videos from Youtube
 958 and other video sharing websites. It was originally written to bypass the
 959 Adobe Flash requirement needed to view the hosted videos.
 960
 961 clive is non-interactive meaning it can work in the background while the user
 962 is not logged on. This allows the user to start an extraction and disconnect
 963 from the system letting clive finish the work. By contrast, most of the
 964 extraction websites and UNIX scripts require constant user's presence
 965 which can be a great hindrance when transferring a lot of data.
 966
 967 Users familiar with the GNU L<wget(1)> utility will notice that clive borrows
 968 some of the feature concepts from wget, as well as, with some changes,
 969 the above paragraph, option syntax description below and the output option
 970 descriptions. Kudos to the wget team for their original work.
 971
 972 =head1 OPTIONS
 973
 974 =over 4
 975
 976 =item B<Option Syntax>
 977
 978 Every option has a long form along with the short one. Long options are more
 979 convenient to remember but take time to type. You may freely mix different
 980 option styles, or specify options after the command-line arguments. For example:
 981
 982     clive -c --format=mp4 URL -n
 983
 984 You may also put several options together that do not require arguments.
 985 For example:
 986
 987     clive -xcn URL
 988
 989 Which is equivalent to:
 990
 991     clive -x -c -n URL
 992
 993 =back
 994
 995 B<Basic Options>
 996
 997 =over 4
 998
 999 =item B<-h --help>
1000
1001 Print this help and exit.
1002
1003 =item B<-m --manual>
1004
1005 Display the manual page and exit.
1006
1007 =item B<-v --version>
1008
1009 Display version details and exit.
1010
1011 =item B<-b --background>
1012
1013 Go to background immediately after startup. If no output file is specified
1014 using the B<--output> or B<--append>, the output is redirected to I<clive-log>
1015 file.
1016
1017 =back
1018
1019 B<Cache Options>
1020
1021 =over 4
1022
1023 =item B<-R --renew>
1024
1025 Renew the cache entries for the input URLs. See L</CACHE> in the manual page.
1026
1027 =item B<-s --show>
1028
1029 Print cache entries to standard output. See L</CONFIG> in the manual page
1030 for how to configure the output.
1031
1032 =item B<-g --grep=>I<pattern>
1033
1034 Grep cache entries for I<pattern>. All cache entry values are included in
1035 the search.
1036
1037     % clive --grep=git
1038     % clive --grep=^git --ignore-case
1039
1040 The matched entries are then extracted. To only show the matches, use the
1041 B<--show> option. For example:
1042
1043     % clive -sg ^git
1044
1045 =item B<-i --ignore-case>
1046
1047 When used with B<--grep>, causes clive to ignore case differences between
1048 the patterns.
1049
1050 =item B<-D --delete>
1051
1052 When used with B<--grep>, deletes the matched entries from cache. If used
1053 together with the B<--show> option, causes clive to prompt to confirm delete.
1054 For example:
1055
1056     % clive -siDg ^git
1057
1058 =item B<-C --clear>
1059
1060 Clear cache and exit.
1061
1062 =back
1063
1064 B<Logging and Input Options>
1065
1066 =over 4
1067
1068 =item B<-o --output=>I<logfile>
1069
1070 Log all messages to I<logfile>. The messages are normally reported to
1071 stdout and stderr.
1072
1073 =item B<-a --append=>I<logfile>
1074
1075 Append to I<logfile>. This is the same as B<--output> but it appends to
1076 I<logfile> instead of overwriting it. If the I<logfile> does not exist,
1077 the file is created.
1078
1079 =item B<-d --debug>
1080
1081 Causes the program to print debug messages.
1082
1083 =item B<-G --noprogress>
1084
1085 Turn off progress meter.
1086
1087 =item B<-q --quiet>
1088
1089 Turn off all output.
1090
1091 =item B<-r --recall>
1092
1093 Recall the last URL batch. If this option is used, no URLs need to be present
1094 on the command-line.
1095
1096 =item B<-x --paste>
1097
1098 Paste input from clipboard. If this option is used, no URLs need to be present
1099 on the command-line. The pasted URLs are expected to be separated with newlines.
1100
1101 =back
1102
1103 B<Download Options>
1104
1105 =over 4
1106
1107 =item B<-c --continue>
1108
1109 Continue extraction of a partially downloaded file. Note that this works only
1110 with HTTP servers that support the "Range" header. Ignored unless I<localfile>
1111 E<lt> I<remotefile>.
1112
1113 The "requested range was not delivered" error typically implies that the
1114 host does not allow continuing partially extracted video files. You will
1115 see this error if you attempt to continue a partially downloaded flv video
1116 from Youtube, for example.
1117
1118 =item B<-X --noproxy>
1119
1120 Do not use of the proxy defined in the config or the http_proxy environment
1121 variable.
1122
1123 =item B<-L --nologin>
1124
1125 Do not log in. Ignored unless [youtube]:user and [youtube]:pass are used
1126 in the config file.
1127
1128 =item B<-n --noextract>
1129
1130 Do not actually extract any videos.
1131
1132 =item B<-f --format=>I<format>
1133
1134 Extract I<format> of the video. See L</FORMATS> in the manual page.
1135
1136 =item B<-P --noplay>
1137
1138 Disable subsequent play. Ignored unless [commands]:play is used in the
1139 configuration file.
1140
1141 =back
1142
1143 =head1 EXAMPLES
1144
1145 =over 4
1146
1147 =item clive "http://youtube.com/watch?v=3HD220e0bx4"
1148
1149 Extracts the video from the specified URL.
1150
1151 =item cat url.lst | clive
1152
1153 Reads input from UNIX pipe. Separate each URL with a newline.
1154
1155 =item clive -x URL URL
1156
1157 Combines input from the command-line and the clipboard (each URL separated
1158 with a newline).
1159
1160 =item clive -rf mp4
1161
1162 Recalls the last URL batch and extracts the mp4 format.
1163
1164 =item clive -g 3HD220e0bx4
1165
1166 Greps the pattern from the cache and extracts the matched videos.
1167
1168 =item clive -iDg ^3hd2
1169
1170 Same as above but I<deletes> the matched entries from the cache instead of
1171 extracting them.
1172
1173 =item clive -s
1174
1175 Dumps the contents of the cache to stdout.
1176
1177 =item clive -sig ^3hd2
1178
1179 Instead of displaying all of the cache entries, show only the matching ones.
1180
1181 =item clive -big ^3hd2 -o my.log
1182
1183 Goes to background immediately after startup, redirects output to I<my.log>
1184 file, greps for the pattern and extracts the video.
1185
1186 =item clive -bqig ^3hd2
1187
1188 Same as above but turns off all output. See also the B<--noprogress> option.
1189
1190 =back
1191
1192 =head1 FORMATS
1193
1194 clive defaults to extract the flv format unless the B<--format> option is
1195 used. The requested format may not always be available and in such case
1196 the server usually returns the HTTP/404 or the HTTP/403 error.
1197
1198 The quality of the video depends on the uploaded video quality. Each
1199 website typically recompresses the uploaded videos to 320x240 resolution
1200 (sometimes higher). As this varies per video and website, you should not
1201 read too much into the video quality information listed below.
1202
1203 =over 4
1204
1205 =item B<www.youtube.com>
1206
1207 Formats: flv | mp4 | 3gpp | xflv
1208
1209 The flv format is usually available unless the video has been removed or
1210 set private. The mp4 and 3gpp formats are often, or will become, available.
1211 The xflv on the other hand appears to be very rarely available.
1212
1213 Videos dating back to 2006 are usually available as flv only. The B<--continue>
1214 option should work with all other formats but flv.
1215
1216 =back
1217
1218 =over 4
1219
1220 =item B<video.google.com>
1221
1222 Formats: flv | mp4
1223
1224 The mp4 may not always be available.
1225
1226 The B<--continue> option does not work with the flv format. Streaming seems
1227 impossible with the mp4. For a comparison, this is possible with Youtube's
1228 mp4 videos which are compressed using a different mp4 codec.
1229
1230 =back
1231
1232 =over 4
1233
1234 =item B<www.sevenload.com>
1235
1236 Formats: flv
1237
1238 The B<--continue> option works.
1239
1240 =back
1241
1242 =over 4
1243
1244 =item B<www.break.com>
1245
1246 Formats: flv | wmv
1247
1248 The B<--continue> option works.
1249
1250 =back
1251
1252 =over 4
1253
1254 =item B<www.metacafe.com>
1255
1256 Formats: flv
1257
1258 The B<--continue> option works.
1259
1260 =back
1261
1262 =head1 CACHE
1263
1264 The cache has two purposes:
1265
1266 =over 4
1267
1268 =item 1.
1269
1270 Gather reusable info for a fast re-extraction without having to fetch the
1271 same data again.
1272
1273 =item 2.
1274
1275 Keep a record of videos. The B<--grep> option can then later be used to
1276 extract the videos.
1277
1278 =back
1279
1280 Each cache entry contains information about a video, including, but not limited
1281 to, page title, file length and extraction URL.
1282
1283 Some entries may need to be renewed from time to time as some websites have
1284 their extraction URLs expire after awhile. Youtube is an example of this.
1285 Youtube servers usually return the HTTP/410 error if the extraction URL has
1286 expired. You can use the B<--renew> option to fix this.
1287
1288 Note that if you use a different B<--format> than previously, clive will renew
1289 the cache entry automatically. This is done for two reasons:
1290
1291 =over 4
1292
1293 =item 1.
1294
1295 The cached extraction URL would point to a wrong file
1296
1297 =item 2.
1298
1299 The file length would be incorrect
1300
1301 =back
1302
1303 =head1 UNICODE
1304
1305 As long as the terminal can handle unicode, so should clive. Details of enabling
1306 unicode in your terminal falls outside the scope of this manual page.
1307 If you are running X, switching to a unicode capable terminal
1308 (e.g. L<uxterm(1)>) may also provide some remedy to this.
1309
1310 If you are using a user-defined character class in the config file,
1311 make sure it is not blocking any unicode characters. See L</CONFIG>.
1312
1313 =head1 FILES
1314
1315 =over 4
1316
1317 =item ~/.config/clive/config
1318
1319 Configuration file for clive. See L</CONFIG>.
1320
1321 =item ~/.config/clive/cache
1322
1323 Contains the cache entries of the visited URLs. A Berkeley DB (Hash) file.
1324
1325 =item ~/.config/clive/recall
1326
1327 Contains the last URL batch. Can be recalled with the B<--recall> option.
1328
1329 =back
1330
1331 =head1 CONFIG
1332
1333     ## Example config file for clive.
1334     ## Recommended: chmod 0600 ~/.config/clive/config
1335
1336     [http]
1337         ## HTTP User-agent string (default: Mozilla/5.0).
1338         agent = Furball/0.2
1339
1340         ## HTTP proxy.
1341         proxy = http://foo:1234
1342
1343     [output]
1344         ## Save videos to directory (default: cwd).
1345         savedir = /home/user/videos
1346
1347         ## Character class used to filter out garbage characters from
1348         ## video filenames (default: \w).
1349         cclass = [A-Za-z0-9]
1350         #cclass = .
1351
1352         ## Extracted video filename format (default: %t-(%i)-[%d].%s).
1353         ## %t = video name after applying the character class regex
1354         ## %s = video file suffix (e.g. flv)
1355         ## %d = video domain
1356         ## %i = video id
1357         ## %D = current date
1358         ## %T = current time
1359         ## %S = timestamp (same as: %D %T)
1360         file = %t.%e
1361
1362         ## Format for --show (default: %D: "%t" | %mMB)
1363         ## %t = video page title
1364         ## %i = video id
1365         ## %l = video file length (bytes)
1366         ## %u = video page url
1367         ## %x = video extraction url
1368         ## %D = video extraction date
1369         ## %T = video extraction time
1370         ## %S = video extraction timestamp (same as: %D %T)
1371         show = %t (id: %i | bytes: %l)
1372
1373     [youtube]
1374         ## Username and password for Youtube. OPTIONAL unless you
1375         ## plan on extracting flagged content.
1376         user = myusername
1377         pass = mypassword
1378
1379     [commands]
1380         ## Path to a player command. If used, clive will play the
1381         ## extracted videos subsequently. Be sure to use the %i
1382         ## specifier for input file.
1383         play = /usr/local/bin/xine -f %i
1384
1385 =head1 SEE ALSO
1386
1387 =over 4
1388
1389 =item Website:
1390
1391 http://clive.sf.net/
1392
1393 =item Project:
1394
1395 http://googlecode.com/p/clive/
1396
1397 =item Issue Tracker:
1398
1399 http://googlecode.com/p/clive/issues/
1400
1401 =item Announcements:
1402
1403 http://googlegroups.com/group/clive-announce/
1404
1405 =back
1406
1407 =head1 OTHER
1408
1409 A clive development repository can be obtained from:
1410
1411     git clone git://repo.or.cz/clive.git
1412
1413 Patches welcome.
1414
1415 =head1 AUTHOR
1416
1417 Written by Toni Gundogdu <legatvs@gmail.com>.
1418
1419 =cut