Version bump.
[clive-utils.git] / clivescan
blobe966f1718f30610ee54f16d12159288978465869
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivescan, the video link scanning utility for clive
6 # Copyright (c) 2008 Toni Gundogdu <legatvs@gmail.com>
8 # Permission to use, copy, modify, and distribute this software for any
9 # purpose with or without fee is hereby granted, provided that the above
10 # copyright notice and this permission notice appear in all copies.
12 # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
13 # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
14 # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
15 # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
16 # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
17 # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
18 # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 ###########################################################################
21 use warnings;
22 use strict;
24 binmode(STDOUT, ":utf8");
26 use WWW::Curl::Easy 4.05;
27 use HTML::TokeParser;
28 use Tk::DialogBox;
29 use Config::Tiny;
30 use Tk::Tree;
31 use Tk;
33 # Core modules:
34 use Getopt::Long qw(:config bundling);
35 use Digest::SHA qw(sha1_hex);
36 use Pod::Usage qw(pod2usage);
37 use File::Path qw(mkpath);
38 use File::Find qw(find);
39 use Cwd qw(getcwd);
40 use File::Spec;
41 use Encode;
43 # Non-essentials
44 my %opted_mods = (Clipboard => 1);
45 eval "use Clipboard"; $opted_mods{Clipboard} = 0 if $@;
46 eval "use Tk::FontDialog"; $opted_mods{FontDialog} = 0 if $@;
48 my $VERSION = "2.1.1-dev";
49 my $CONFIGDIR = $ENV{CLIVESCAN_HOME}
50 || File::Spec->catfile($ENV{HOME}, ".config/clivescan");
51 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
52 my $PREFSFILE = File::Spec->catfile($CONFIGDIR, "prefs");
54 my %opts; # options
55 my @queue; # current URL queue
56 my %found_queue;# results of the scanned video page links
57 my $curl; # curl handle (reused throught lifespan)
58 my $mw; # main window handle (GUI)
59 my $pwmain; # handle to the main paned window
60 my $pwtop; # handle to the top paned window
61 my $pwbottom; # handle to the bottom paned window
62 my $lbtlink; # handle to the listbox tree of found links
63 my $lbtqueue; # handle to the listbox tree of queued links
65 # Parse config
66 my $conf = Config::Tiny->read($CONFIGFILE);
67 my $prefs = Config::Tiny->read($PREFSFILE);
68 %opts = (
69 clive => $conf->{clive}->{path},
70 opts => $conf->{clive}->{opts},
71 agent => $conf->{http}->{agent},
72 proxy => $conf->{http}->{proxy},
74 geometry=> $prefs->{gui}->{geometry},
75 pwmain => $prefs->{gui}->{pwmain},
76 pwtop => $prefs->{gui}->{pwtop},
77 pwbottom=> $prefs->{gui}->{pwbottom},
78 mainfont=> $prefs->{gui}->{mainfont},
81 $opts{strict} = 1;
82 $opts{mainfont} = $opts{mainfont} || "{helvetica} -12 bold";
84 GetOptions(\%opts,
85 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
86 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
87 'proxy|y=s',
88 # Workaround since '$longopt|shortopt' is a no-no.
89 'noproxy|X' => sub { $opts{proxy} = "" },
90 'nostrict|n' => sub { $opts{strict} = 0 },
91 ) or pod2usage(1);
93 # Since 'version|v' => \&print_version and exit cannot tango with tk
94 print_version(0) if $opts{version};
95 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
96 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
98 $opts{clive} = $opts{clive} || $ENV{CLIVE_PATH};
99 find_clive() unless $opts{clive};
101 get_queue();
103 select STDERR; $| = 1; # => unbuffered
104 select STDOUT; $| = 1;
106 process_queue();
108 unless ( $opts{all} ) { init_gui(); }
109 else { grab_all(); }
112 ## Subroutines: Connection
114 sub init_curl {
115 $curl = WWW::Curl::Easy->new;
116 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
117 $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
118 $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
119 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
120 $curl->setopt(CURLOPT_AUTOREFERER, 1);
121 $curl->setopt(CURLOPT_HEADER, 0);
122 $curl->setopt(CURLOPT_NOBODY, 0);
125 sub fetch_page {
126 my ($url, $resp, $rc) = (shift, 0, 0);
127 open my $fh, ">", \$resp;
129 $curl->setopt(CURLOPT_URL, $url);
130 $curl->setopt(CURLOPT_ENCODING, "");
131 $curl->setopt(CURLOPT_WRITEDATA, $fh);
132 $rc = $curl->perform;
134 return ($rc, $fh, decode_utf8($resp));
138 ## Subroutines: Queue
140 sub get_queue {
141 if ( $opts{paste} ) {
142 print STDERR "error: Clipboard module not found" and exit
143 unless $opted_mods{Clipboard};
144 my $data = Clipboard->paste();
145 if ( $data ) {
146 parse_input($_) foreach split/\n/,$data;
150 parse_input($_) foreach @ARGV;
151 unless ( @queue ) { parse_input($_) while ( <STDIN> ); }
153 my %h = map {$_,1} @queue; # Remove duplicates
154 @queue = keys %h;
157 sub process_queue {
158 init_curl();
159 foreach ( @queue ) {
160 print "fetch $_ ..." unless $opts{quiet};
161 my ($rc, $fh, $resp, $errmsg) = fetch_page($_);
162 if ( $rc == 0 ) {
163 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
164 if ( $rc == 0 or $rc == 200 ) {
165 scan_page($_, \$resp);
166 } else {
167 $errmsg = $curl->strerror($rc)." (http/$rc)";
169 } else {
170 $errmsg = $curl->strerror($rc)." (http/$rc)";
172 close $fh;
173 print STDERR "\nerror: $errmsg\n" if $errmsg;
177 sub scan_page {
178 my ($scanurl, $pageref) = @_;
179 print "done.\n" unless $opts{quiet};
180 $$pageref =~ tr{\n}//d;
182 my $p = HTML::TokeParser->new($pageref);
183 $p->get_tag("title");
184 my $pagetitle = $p->get_trimmed_text;
186 my %re = (
187 # in_scanurl: regex used to bind this search pattern to specified
188 # domain. Undefined for embedded link searches. See clivescan(1).
189 # search_for: regex used to grab the video ID
190 # url_prefix: combined with video ID to construct video page URL
192 # NOTE: We're not using domains in the search patterns because
193 # most of the supported hosts refer to their videos using local
194 # paths, e.g. <a href="/watch?v=...">.
195 Youtube => {
196 in_scanurl => qr|\Qyoutube.com\E|i,
197 search_for => qr|\Q/watch?v=\E(.*?)["< &#%]|i,
198 url_prefix => "http://youtube.com/watch?v=",
200 YoutubeEmbed => {
201 in_scanurl => undef,
202 search_for => qr|\Qyoutube.com/v/\E(.*?)["< &#%]|i,
203 url_prefix => "http://youtube.com/watch?v=",
205 GVideo => { # NOTE: Ignores original TLD, uses .com for extraction
206 in_scanurl => qr|\Qvideo.google.\E|i,
207 search_for => qr|\Q/videoplay?docid=\E(.*?)["< &#%]|i,
208 url_prefix => "http://video.google.com/videoplay?docid=",
210 GVideoEmbed => { # NOTE: Ditto.
211 in_scanurl => undef,
212 search_for => qr|\Q/googleplayer.swf?docid=\E(.*?)["< &#%]|i,
213 url_prefix => "http://video.google.com/videoplay?docid=",
215 # Metacafe => { # NOTE: metacafe.com/watch/$id is enough for redirect
216 # in_scanurl => qr|\Qmetacafe.com\E|i,
217 # search_for => qr|\Q/watch/\E(.*?)/|i,
218 # url_prefix => "http://metacafe.com/watch/",
219 # },
220 # MetacafeEmbed => {
221 # in_scanurl => undef,
222 # search_for => qr|\Qmetacafe.com/fplayer/\E(.*?)/|i,
223 # url_prefix => "http://metacafe.com/watch/",
224 # },
225 SevenLoad => { # NOTE: Ditto. Subdomain can be ignored.
226 in_scanurl => qr|\Qsevenload.com\E|i,
227 search_for => qr|\Q/videos/\E(.*?)\-|i,
228 url_prefix => "http://sevenload.com/videos/",
230 SevenLoadEmbed => {
231 in_scanurl => undef,
232 search_for => qr|\Qsevenload.com/pl/\E(.*?)/|i,
233 url_prefix => "http://sevenload.com/videos/",
235 LastfmYoutube => { # Lastfm wraps some of the Youtube videos
236 in_scanurl => qr|\Qlast.fm\E|i,
237 search_for => qr|\Q/+videos/\E\Q+1-\E(.*?)["< &#%]|i,
238 url_prefix => "http://youtube.com/watch?v=",
240 Break => {
241 in_scanurl => qr|\Qbreak.com\E|i,
242 search_for => qr|\Q/index/\E(.*?)["< &#%]|i,
243 url_prefix => "http://break.com/index/",
245 # TODO: add BreakEmbed, e.g.:
246 # Page URL: http://break.com/index/if-all-movies-had-cell-phones.html
247 # Embed URL: http://embed.break.com/600081
248 Liveleak => {
249 in_scanurl => qr|\Qliveleak.com\E|i,
250 search_for => qr|\Q/view?i=\E(.*?)["< &#%]|i,
251 url_prefix => "http://liveleak.com/view?i=",
253 LiveleakEmbed => {
254 in_scanurl => undef,
255 url_prefix => "http://liveleak.com/view?i=",
256 search_for => qr|\Qliveleak.com/e/\E(.*?)["< &#%]|i,
260 print "scan " unless $opts{quiet};
262 sub _scan_progress {
263 my ($linksref, $link) = @_;
264 push @$linksref,$link;
265 unless ( $opts{quiet} ) {
266 if ( scalar (@$linksref) % 5 == 0 ) { print scalar (@$linksref); }
267 else { print "."; }
271 my @links;
272 while ( my $host = each( %re ) ) {
273 if ( defined $re{$host}{in_scanurl} and $opts{strict} ) {
274 next unless $scanurl =~ /$re{$host}{in_scanurl}/;
276 _scan_progress(\@links, "$re{$host}{url_prefix}$1")
277 while ( $$pageref =~ /$re{$host}{search_for}/g );
280 print "\nremove duplicates ..." unless $opts{quiet};
282 my %h = map { $_, 1 } @links; # Weed out duplicates
283 @links = keys %h;
285 print " found " .scalar @links. " unique link(s).\n"
286 unless $opts{quiet};
288 my %verified_links;
289 foreach my $link ( @links ) {
290 print "fetch $link ..." unless $opts{quiet};
291 my ($rc, $fh, $resp, $errmsg) = fetch_page($link);
292 if ( $rc == 0 ) {
293 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
294 if ( $rc == 0 or $rc == 200 ) {
295 print "done.\n" unless $opts{quiet};
297 # Grab title
298 $p = HTML::TokeParser->new(\$resp);
299 $p->get_tag("title");
300 my $title = $p->get_trimmed_text;
302 # Store, skip if link exists already
303 my $sha1 = sha1_hex($link);
305 $verified_links{$sha1} = {link => $link, title => $title}
306 unless defined $verified_links{$sha1};
307 } else {
308 $errmsg = $curl->strerror($rc)." (http/$rc)";
310 } else {
311 $errmsg = $curl->strerror($rc)." (http/$rc)";
313 close $fh;
314 print STDERR "\nerror: $errmsg\n" if $errmsg;
317 if ( $pagetitle and scalar keys %verified_links > 0 ) {
318 $found_queue{ sha1_hex($scanurl) } = {
319 title => $pagetitle,
320 url => $scanurl, videos => {%verified_links}
325 sub grab_all {
326 my @q;
327 for my $i ( keys %found_queue ) {
328 my %videos = %{$found_queue{$i}{videos}};
329 for my $j ( keys %videos ) {
330 push @q, $videos{$j}{link};
333 run_clive(@q);
337 ## Subroutines: Helpers
339 sub parse_input {
340 my $url = shift;
342 return if $url =~ /^$/;
343 chomp $url;
345 $url = "http://$url" if $url !~ m!^http://!i;
346 push @queue, $url;
349 sub find_clive {
350 print "locate clive ..." unless $opts{quiet};
352 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
353 split /:/, $ENV{PATH} || getcwd);
355 if ( $opts{clive} ) { print "$opts{clive}\n" unless $opts{quiet}; }
356 else { print STDERR "\nerror: not found, use --clive=path\n"; exit; }
359 sub run_clive {
360 my (@q) = @_;
361 system "$opts{clive} $opts{opts} " . join(' ', @q);
364 sub print_version {
365 my $noexit = shift;
366 my $perl_v = sprintf "%vd", $^V;
367 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
368 my $fntdlg_v= $opted_mods{FontDialog} ? $Tk::FontDialog::VERSION : "-";
369 my $s = sprintf
370 "clivescan version $VERSION [$^O]
371 Copyright (c) 2008 Toni Gundogdu.
372 * Perl/$perl_v
373 Modules:
374 * Config::Tiny/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl::VERSION
375 * Tk/$Tk::VERSION\t\t\t* Tk::Tree/$Tk::Tree::VERSION
376 * Tk::DialogBox/$Tk::DialogBox::VERSION\t\t* Clipboard/$clipb_v
377 * Tk::FontDialog/$fntdlg_v\t\t* HTML::TokeParser/$HTML::TokeParser::VERSION
378 Core modules:
379 * Getopt::Long/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA::VERSION
380 * File::Spec/$File::Spec::VERSION\t\t* File::Find/$File::Find::VERSION
381 * File::Path/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
382 * Pod::Usage/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
384 return $s if $noexit;
385 print $s; exit;
389 # GUI:
391 sub init_gui {
392 return if scalar keys %found_queue == 0;
394 $mw = MainWindow->new;
395 $mw->geometry($opts{geometry}) if defined $opts{geometry};
396 $mw->title('clivescan');
397 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); exit; });
399 # Menubar
400 my $mb = $mw->Menu;
401 $mw->configure(-menu => $mb);
403 # Menu: File
404 my $file = $mb->cascade(-label => '~File', -tearoff => 0);
405 $file->command(-label => '~Extract videos in queue...',
406 -command => \&on_extract);
407 $file->separator;
408 $file->command(-label => '~Quit',
409 -command => sub { save_prefs(); exit; } );
411 # Menu: Edit
412 if ( $opted_mods{FontDialog} ) {
413 my $edit =
414 $mb->cascade(-label => '~Edit', -tearoff => 0);
415 $edit->command(-label => 'Prefere~nces...',
416 -command => \&on_prefs);
419 # Menu: Help
420 my $help = $mb->cascade(-label => '~Help', -tearoff => 0);
421 $help->command(-label => '~About...',
422 -command => \&on_about);
424 # The GUI has an upper and a lower part
425 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
427 # Upper part
428 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
430 # Upper: Channels
431 my $lbar = $pwtop->Frame;
433 $lbtlink = $lbar->Scrolled('Tree',
434 -scrollbars => 'osoe',
435 -itemtype => 'text',
436 -selectmode => 'extended',
437 -indicator => 1,
438 -drawbranch => 1,
439 )->pack(-side => 'top', -expand => 1, -fill => 'both');
441 for my $i ( keys %found_queue ) {
442 my $scantitle = $found_queue{$i}{title};
443 $scantitle =~ tr{.}//d;
445 $lbtlink->add($scantitle);
446 $lbtlink->itemCreate($scantitle, 0, -text => $scantitle, -itemtype => 'text');
448 for my $j ( keys %{$found_queue{$i}{videos}} ) {
449 my %video = %{$found_queue{$i}{videos}{$j}};
451 my $title = $video{title};
452 $title =~ tr{.}//d;
454 my $path;
455 for ( my $k=0;; ++$k ) {
456 $path = "$scantitle.$title (#$k)";
457 last unless $lbtlink->infoExists($path);
460 $lbtlink->add($path, -data => {%video});
461 $lbtlink->itemCreate($path, 0,
462 -text => $title, -itemtype => 'text');
465 $lbtlink->autosetmode;
466 $lbtlink->close($_) foreach ( $lbtlink->infoChildren('') );
468 my $rbar = $pwtop->Frame; # Button toolbar
469 $rbar->Button(-text => 'Grab', -command => \&on_grab
470 )->pack(-fill => 'x');
472 $rbar->Button(-text => 'Grab everything', -command => \&on_grab_all
473 )->pack(-fill => 'x');
475 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} || 200);
477 # Lower part
478 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
480 $lbtqueue = $pwbottom->Scrolled('Tree',
481 -scrollbars => 'osoe',
482 -itemtype => 'text',
483 -selectmode => 'extended',
484 -indicator => 1,
485 -drawbranch => 1,
488 my $bar = $pwbottom->Frame; # Button toolbar
490 $bar->Button(-text => 'Remove', -command => \&on_remove
491 )->pack(-fill => 'x');
493 $bar->Button(-text => 'Clear', -command => \&on_clear
494 )->pack(-fill => 'x');
496 $bar->Button(-text => 'Extract videos...', -command => \&on_extract
497 )->pack(-fill => 'x', -side => 'bottom');
499 $pwbottom->add($lbtqueue, $bar, -width => $opts{pwbottom} || 200);
501 # Add upper and lower parts to main paned window
502 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
504 $mw->RefontTree(-font => $opts{mainfont})
505 if $opted_mods{FontDialog};
507 $pwmain->pack(-expand => 1, -fill => 'both');
509 MainLoop;
512 sub save_prefs {
513 mkpath( [$CONFIGDIR], 0, 0700 );
515 my $c = Config::Tiny->new;
516 $c->{gui}->{geometry} = $mw->geometry();
517 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
518 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
519 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
520 $c->{gui}->{mainfont} = $opts{mainfont};
522 $c->write($PREFSFILE);
525 sub on_prefs_ok {
526 ($opts{mainfont}) = @_;
527 $mw->RefontTree(-font => $opts{mainfont});
528 save_prefs();
531 sub queue_item {
532 my $path = shift;
533 return if $path !~ /\./;
534 return if $lbtqueue->infoExists($path);
536 my %video = %{$lbtlink->infoData($path)};
537 my ($link) = split /\./, $path;
539 unless ( $lbtqueue->infoExists($link) ) {
540 $lbtqueue->add($link);
541 $lbtqueue->itemCreate($link, 0,
542 -text => $link, -itemtype => 'text');
545 $lbtqueue->add($path, -data => {%video});
546 $lbtqueue->itemCreate($path, 0,
547 -text => $video{title}, -itemtype => 'text');
550 sub on_grab {
551 queue_item($_) foreach ( $lbtlink->infoSelection );
552 $lbtqueue->autosetmode;
555 sub on_grab_all {
556 foreach ( $lbtlink->infoChildren("") ) {
557 my ($parent) = split /\./;
558 queue_item($_)
559 foreach ($lbtlink->infoChildren($parent) );
561 $lbtqueue->autosetmode;
564 sub on_remove {
565 $lbtqueue->deleteEntry($_)
566 foreach ( $lbtqueue->infoSelection );
569 sub on_clear {
570 $lbtqueue->deleteAll;
573 sub on_about {
574 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
575 my $txt = $dlg->add('Text')->pack;
576 $txt->insert('end', print_version(1));
577 $dlg->Show;
580 sub change_font {
581 my ($top, $lblv, $lbl) = @_;
582 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
584 if ( defined $font ) {
585 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
586 $lbl->configure(-font => $descr);
587 $$lblv = $descr;
591 sub on_prefs {
592 my $dlg = $mw->DialogBox(-title => 'clivescan preferences',
593 -buttons => ['OK','Cancel']);
595 $dlg->add('Label', -text => 'Fonts: press to choose'
596 )->grid(-sticky => 'w', -pady => 10);
598 my ($mainfont) = ($opts{mainfont});
599 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
601 $dlg->add('Button', -text => 'Main font',
602 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
603 )->grid($mainfontl, -sticky => 'w', -padx => '5');
605 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
608 sub on_extract {
609 my @q;
610 foreach ( $lbtqueue->infoChildren('') ) {
611 foreach ( $lbtqueue->infoChildren($_) ) {
612 my %video = %{$lbtqueue->infoData($_)};
613 push @q, $video{link};
616 return unless @q;
618 # Prompt for clive(1) options
619 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
620 -buttons => ['OK','Cancel']);
622 $dlg->add('Label', -text => 'Path to clive'
623 )->grid(my $clivepath = $dlg->Entry(-width => 60),
624 -sticky => 'w', -padx => '5');
626 $dlg->add('Label', -text => 'Runtime options'
627 )->grid(my $cliveopts = $dlg->Entry(-width => 60),
628 -sticky => 'w', -padx => '5');
630 $clivepath->insert('end', $opts{clive});
631 $cliveopts->insert('end', $opts{opts});
633 if ( $dlg->Show() eq 'OK' ) {
634 $opts{clive} = $clivepath->get;
635 $opts{opts} = $cliveopts->get;
636 $mw->destroy;
637 run_clive(@q);
641 __END__
643 =head1 NAME
645 clivescan - the video link scanning utility for clive
647 =head1 SYNOPSIS
649 clivescan [option]... [URL]...
651 =head1 DESCRIPTION
653 clivescan is an utility that scans video pages for video links and
654 uses L<clive(1)> to extract them. The utility scans for video page
655 and embedded video links.
657 Historically, the video link scanning function was part of L<clive(1)>
658 and it was written in Python/Newt. The clivescan utility was written
659 in Perl/Tk to replace the feature that was removed in clive 2.0. This
660 utility is part of the B<clive-utils> project.
662 =head1 OPTIONS
664 You may freely specify options after the command-line arguments. For example:
666 % clivescan -a URL --opts=--noextract
668 B<Basic Options>
670 =over 4
672 =item B<-h --help>
674 Show help and exit.
676 =item B<-v --version>
678 Show version and exit.
680 =item B<-c --clive=>I<path>
682 I<path> to L<clive(1)> command. If unspecified, clivescan will attempt to
683 locate it in the $PATH. Additionally, the B<CLIVE_PATH> environment variable
684 can be used. See also L</CONFIG>.
686 =item B<-o --opts=>I<opts>
688 I<opts> to append to clive call. See L<clive(1)> for more on the available
689 options.
691 =item B<-a --all>
693 Grab all videos without prompting the GUI.
695 =item B<-n --nostrict>
697 This option provides a workaround for some search pattern issues that
698 are known to occur. For example:
700 % clivescan "http://video.google.com/videosearch?q=inurl%3Abreak"
702 Does not return any break.com videos even though the page lists them.
703 This happens because clivescan assumes that the domain break.com can
704 be found in the URL. To override this restrictive default behaviour:
706 % clivescan -S "http://video.google.com/videosearch?q=inurl%3Abreak"
708 =back
710 B<HTTP Options>
712 =over 4
714 =item B<-U --agent=>I<string>
716 Identify as I<string> to the HTTP server. Defaults to "Mozilla/5.0".
718 =item B<-y --proxy=>I<address>
720 Use I<address> for HTTP proxy, e.g. http://foo:1234. If http_proxy
721 environment variable is defined, it will be used.
723 =item B<-X --noproxy>
725 Do not use the defined HTTP proxy (B<--proxy>, config or http_proxy).
727 =back
729 =head1 EXAMPLES
731 =over 4
733 =item % clivescan youtube.com video.google.com
735 Scans both, Youtube and GoogleVideo front pages for video links.
737 =item % cat E<gt>E<gt> url.lst
739 http://video.google.com
740 http://youtube.com/communitychannel
741 http://sevenload.com
742 http://break.com
744 =item % cat url.lst | clivescan
746 Reads input from UNIX pipe.
748 =item % clivescan --opts="-f mp4"
750 Appends the I<opts> to the L<clive(1)> call.
752 =item % clivescan --all http://youtube.com
754 Grabs all found videos from the Youtube front page.
756 =back
758 =head1 FILES
760 clivescan searches the ~/.config/clivescan directory for the config file.
761 You can override this by setting the B<CLIVESCAN_HOME> environment variable.
763 =over 4
765 =item ~/.config/clivescan/config
767 Configuration file.
769 =item ~/.config/clivescan/prefs
771 GUI preferences (e.g. fonts, window position, sash coords, ...).
773 =back
775 =head1 CONFIG
777 ## Example config file for clivescan.
779 [clive]
780 path = /usr/local/bin/clive
781 opts = -f mp4
783 [http]
784 agent = Mozilla/5.0
785 proxy = http://foo:1234
787 =head1 SEE ALSO
789 L<clive(1)> L<clivefeed(1)>
791 =head1 OTHER
793 Project: http://googlecode.com/p/clive-utils/
795 A clive-utils development repository can be obtained from:
797 % git clone git://repo.or.cz/clive-utils.git
799 Patches welcome.
801 =head1 AUTHOR
803 Written by Toni Gundogdu <legatvs@gmail.com>
805 =cut