Fixed terminating if queue is empty.
[clive-utils.git] / clivescan
blob953e5211910970a3a58c58349dae5f07e2e4fe40
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivescan, the video link scanning utility for clive
5 # Copyright (C) 2008 Toni Gundogdu.
7 # This file is part of clive-utils.
9 # clivescan is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # clivescan is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with clivescan. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
23 # Keep it simple.
25 use warnings;
26 use strict;
28 binmode(STDOUT, ":utf8");
30 use HTML::TokeParser;
31 use WWW::Curl::Easy;
32 use Tk::FontDialog;
33 use Tk::DialogBox;
34 use Config::Tiny;
35 use Tk::Tree;
36 use Tk;
38 # Core modules:
39 use Getopt::Long qw(:config bundling);
40 use Digest::SHA qw(sha1_hex);
41 use File::Spec;
42 use File::Find;
43 use File::Path;
44 use Pod::Usage;
45 use Encode;
46 use Cwd;
48 # Non-essentials
49 my %opted_mods = (Clipboard => 1);
50 eval "use Clipboard"; $opted_mods{Clipboard}=0 if $@;
52 my $VERSION = "2.0beta2";
53 my $CONFIGDIR = $ENV{CLIVESCAN_CONFIGDIR}
54 || File::Spec->catfile($ENV{HOME}, ".config/clivescan");
55 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
56 my $PREFSFILE = File::Spec->catfile($CONFIGDIR, "prefs");
58 my %opts; # Holds the options
59 my @queue; # Holds the current URL queue
60 my %found_queue;# Holds the results of the scanned video page links
61 my $curl; # Holds the curl handle (reused throught lifespan)
62 my $mw; # Holds the main window handle (GUI)
63 my $pwmain; # Holds the handle to the main paned window
64 my $pwtop; # Holds the handle to the top paned window
65 my $pwbottom; # Holds the handle to the bottom paned window
66 my $lbtlink; # Holds the handle to the listbox tree of found links
67 my $lbtqueue; # Holds the handle to the listbox tree of queued links
69 # Parse config
70 my $conf = Config::Tiny->read($CONFIGFILE);
71 my $prefs = Config::Tiny->read($PREFSFILE);
72 %opts = (
73 clive => $conf->{clive}->{path},
74 opts => $conf->{clive}->{opts},
75 agent => $conf->{http}->{agent},
76 proxy => $conf->{http}->{proxy},
78 geometry=> $prefs->{gui}->{geometry},
79 pwmain => $prefs->{gui}->{pwmain},
80 pwtop => $prefs->{gui}->{pwtop},
81 pwbottom=> $prefs->{gui}->{pwbottom},
82 mainfont=> $prefs->{gui}->{mainfont},
85 # Parse cmdline
86 # Define those not read from config, init with defaults
87 $opts{quiet} = 0;
88 $opts{paste} = 0;
89 $opts{all} = 0;
90 $opts{strict} = 1;
91 $opts{debug} = 0;
92 $opts{help} = 0;
93 $opts{manual} = 0;
94 $opts{version} = 0;
95 $opts{mainfont} = $opts{mainfont} || "{helvetica} -12 bold";
97 GetOptions(\%opts,
98 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
99 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
100 'proxy|y=s',
101 # Workaround since '$longopt|shortopt' is a no-no.
102 'noproxy|X' => sub { $opts{proxy} = "" },
103 'nostrict|S' => sub { $opts{strict} = 0 },
104 ) or pod2usage(1);
106 # Since 'version|v' => \&print_version and exit cannot tango with tk
107 print_version(0) if $opts{version};
108 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
109 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
111 $opts{clive} = $opts{clive} || $ENV{CLIVE_PATH};
112 find_clive() unless $opts{clive};
114 get_queue();
116 select STDERR; $| = 1; # => unbuffered
117 select STDOUT; $| = 1;
119 process_queue();
121 unless ( $opts{all} ) { init_gui(); }
122 else { grab_all(); }
125 ## Subroutines: Connection
127 sub init_curl {
128 $curl = WWW::Curl::Easy->new;
129 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
130 $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
131 $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
132 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
133 $curl->setopt(CURLOPT_AUTOREFERER, 1);
134 $curl->setopt(CURLOPT_HEADER, 0);
135 $curl->setopt(CURLOPT_NOBODY, 0);
138 sub fetch_page {
139 my ($url, $resp, $rc) = (shift, 0, 0);
140 open my $fh, ">", \$resp;
142 $curl->setopt(CURLOPT_URL, $url);
143 $curl->setopt(CURLOPT_ENCODING, "");
144 $curl->setopt(CURLOPT_WRITEDATA, $fh);
145 $rc = $curl->perform;
147 return ($rc, $fh, $resp);
151 ## Subroutines: Queue
153 sub get_queue {
154 if ( $opts{paste} ) {
155 print STDERR "error: Clipboard module not found" and exit
156 unless $opted_mods{Clipboard};
157 my $data = Clipboard->paste();
158 if ( $data ) {
159 parse_input($_) foreach split/\n/,$data;
163 parse_input($_) foreach @ARGV;
164 unless ( @queue ) { parse_input($_) while ( <STDIN> ); }
166 my %h = map {$_,1} @queue; # Remove duplicates
167 @queue = keys %h;
170 sub process_queue {
171 init_curl();
172 foreach ( @queue ) {
173 print "Fetching $_ ..." unless $opts{quiet};
174 my ($rc, $fh, $resp, $errmsg) = fetch_page($_);
175 if ( $rc == 0 ) {
176 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
177 if ( $rc == 0 or $rc == 200 ) {
178 scan_page($_, \$resp);
179 } else {
180 $errmsg = $curl->strerror($rc)." (http/$rc)";
182 } else {
183 $errmsg = $curl->strerror($rc)." (http/$rc)";
185 close $fh;
186 print STDERR "\n==> error: $errmsg\n" if $errmsg;
190 sub scan_page {
191 my ($scanurl, $pageref) = @_;
192 print "done.\n" unless $opts{quiet};
193 $$pageref =~ tr{\n}//d;
195 my $p = HTML::TokeParser->new($pageref);
196 $p->get_tag("title");
197 my $pagetitle = $p->get_trimmed_text;
199 my %re = (
200 # in_scanurl: regex used to bind this search pattern to specified
201 # domain. Undefined for embedded link searches. See clivescan(1).
202 # search_for: regex used to grab the video ID
203 # url_prefix: combined with video ID to construct video page URL
204 Youtube => {
205 in_scanurl => qr|\Qyoutube.com\E|i,
206 search_for => qr|\Q/watch?v=\E(.*?)["< &]|i,
207 url_prefix => "http://youtube.com/watch?v=",
209 YoutubeEmbed => {
210 in_scanurl => undef,
211 search_for => qr|\Qyoutube.com/v/\E(.*?)$|i,
212 url_prefix => "http://youtube.com/watch?v=",
214 GVideo => { # NOTE: Ignores original TLD, uses .com for extraction
215 in_scanurl => qr|\Qvideo.google.\E|i,
216 search_for => qr|\Q/videoplay?docid=\E(.*?)["< &]|i,
217 url_prefix => "http://video.google.com/videoplay?docid=",
219 GVideoEmbed => { # NOTE: Ditto.
220 in_scanurl => undef,
221 search_for => qr|\Q/googleplayer.swf?docid=\E(.*?)|i,
222 url_prefix => "http://video.google.com/videoplay?docid=",
224 Metacafe => { # NOTE: metacafe.com/watch/$id is enough for redirect
225 in_scanurl => qr|\Qmetacafe.com\E|i,
226 search_for => qr|\Q/watch/\E(.*?)/|i,
227 url_prefix => "http://metacafe.com/watch/",
229 MetacafeEmbed => {
230 in_scanurl => undef,
231 search_for => qr|\Qmetacafe.com/fplayer/\E(.*?)/|i,
232 url_prefix => "http://metacafe.com/watch/",
234 SevenLoad => { # NOTE: Ditto. Subdomain can be ignored.
235 in_scanurl => qr|\Qsevenload.com\E|i,
236 search_for => qr|\Q/videos/\E(.*?)\-|i,
237 url_prefix => "http://sevenload.com/videos/",
239 SevenLoadEmbed => {
240 in_scanurl => undef,
241 search_for => qr|\Qsevenload.com/pl/\E(.*?)/|i,
242 url_prefix => "http://sevenload.com/videos/",
244 Break => {
245 in_scanurl => qr|\Qbreak.com\E|i,
246 search_for => qr|\Q/index/\E(.*?)["< &]|i,
247 url_prefix => "http://break.com/index/",
249 # TODO: add BreakEmbed, e.g.:
250 # Page URL: http://break.com/index/if-all-movies-had-cell-phones.html
251 # Embed URL: http://embed.break.com/600081
254 print "=> Scanning page for links " unless $opts{quiet};
256 sub _scan_progress {
257 my ($linksref, $link) = @_;
258 push @$linksref,$link;
259 if ( scalar (@$linksref) % 5 == 0 ) { print scalar (@$linksref); }
260 else { print "."; }
263 my @links;
264 while ( my $host = each( %re ) ) {
265 if ( defined $re{$host}{in_scanurl} and $opts{strict} ) {
266 next unless $scanurl =~ /$re{$host}{in_scanurl}/;
268 _scan_progress(\@links, "$re{$host}{url_prefix}$1")
269 while ( $$pageref =~ /$re{$host}{search_for}/g );
272 my %h = map { $_, 1 } @links; # Weed out duplicates
273 @links = keys %h;
275 print "\n=> Found " .scalar @links. " links after removing duplicates.\n"
276 unless $opts{quiet};
278 my %verified_links;
279 foreach my $link ( @links ) {
280 print "==> Fetching $link ..." unless $opts{quiet};
281 my ($rc, $fh, $resp, $errmsg) = fetch_page($link);
282 if ( $rc == 0 ) {
283 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
284 if ( $rc == 0 or $rc == 200 ) {
285 print "done.\n" unless $opts{quiet};
286 # Grab title
287 $p = HTML::TokeParser->new(\$resp);
288 $p->get_tag("title");
289 my $title = $p->get_trimmed_text;
290 # Store, prevent link duplicates
291 my $sha1 = sha1_hex($link);
292 $verified_links{$sha1} = {link => $link, title => $title}
293 unless defined $verified_links{$sha1};
294 } else {
295 $errmsg = $curl->strerror($rc)." (http/$rc)";
297 } else {
298 $errmsg = $curl->strerror($rc)." (http/$rc)";
300 close $fh;
301 print STDERR "\n==> error: $errmsg\n" if $errmsg;
304 $found_queue{ sha1_hex($scanurl) } =
305 { title => $pagetitle, url => $scanurl, videos => {%verified_links} };
308 sub grab_all {
309 my @q;
310 for my $i ( keys %found_queue ) {
311 my %videos = %{$found_queue{$i}{videos}};
312 for my $j ( keys %videos ) {
313 push @q, $videos{$j}{link};
316 run_clive(@q);
320 ## Subroutines: Helpers
322 sub parse_input {
323 my $url = shift;
325 return if $url =~ /^$/;
326 chomp $url;
328 $url = "http://$url" if $url !~ m!^http://!i;
329 push @queue, $url;
332 sub find_clive {
333 print "Trying to locate 'clive' ...";
335 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
336 split /:/, $ENV{PATH} || getcwd);
338 if ( $opts{clive} ) { print "$opts{clive}\n"; }
339 else { print STDERR "error: not found, use --clive=path\n"; exit; }
342 sub run_clive {
343 my (@q) = @_;
344 system "$opts{clive} $opts{opts} " . join(' ', @q);
347 sub print_version {
348 my $noexit = shift;
349 my $perl_v = sprintf "%vd", $^V;
350 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
351 my $s = sprintf
352 "clivescan version $VERSION. Copyright (C) 2008 Toni Gundogdu.
354 Perl: $perl_v ($^O)
355 Modules:
356 * Config::Tiny/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl::VERSION
357 * Tk/$Tk::VERSION\t\t\t* Tk::Tree/$Tk::Tree::VERSION
358 * Tk::DialogBox/$Tk::DialogBox::VERSION\t\t* Clipboard/$clipb_v
359 * Tk::FontDialog/$Tk::FontDialog::VERSION
360 Core modules:
361 * Getopt::Long/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA::VERSION
362 * File::Spec/$File::Spec::VERSION\t\t* File::Find/$File::Find::VERSION
363 * File::Path/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
364 * Pod::Usage/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
366 This program comes with ABSOLUTELY NO WARRANTY. You may redistribute copies of
367 clivescan under the terms of the GNU General Public License as published by the
368 Free Software Foundation, either version 3 of the License, or (at your option)
369 any later version. You should have received a copy of the General Public License
370 along with this program. If not, see http://www.gnu.org/licenses/.
372 return $s if $noexit;
373 print $s; exit;
377 # GUI:
379 sub init_gui {
380 return unless keys ( %found_queue ) == 0;
382 $mw = MainWindow->new;
383 $mw->geometry($opts{geometry}) if defined $opts{geometry};
384 $mw->title('clivescan');
385 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); exit; });
387 # Menubar
388 my $mb = $mw->Menu;
389 $mw->configure(-menu => $mb);
391 # Menu: File
392 my $file = $mb->cascade(-label => 'File', -underline => 0, -tearoff => 0);
393 $file->command(-label => 'Extract videos in queue...',
394 -underline => 0, -command => \&on_extract);
395 $file->separator;
396 $file->command(-label => 'Quit', -underline => 0,
397 -command => sub { save_prefs(); exit; } );
399 # Menu: Edit
400 my $edit = $mb->cascade(-label => 'Edit', -underline => 0, -tearoff => 0);
401 $edit->command(-label => 'Preferences...',
402 -underline => 0, -command => \&on_prefs);
404 # Menu: Help
405 my $help = $mb->cascade(-label => 'Help', -underline => 0, -tearoff => 0);
406 $help->command(-label => 'About...',
407 -underline => 0, -command => \&on_about);
409 # The GUI has an upper and a lower part
410 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
412 # Upper part
413 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
415 # Upper: Channels
416 my $lbar = $pwtop->Frame;
418 $lbtlink = $lbar->Scrolled('Tree',
419 -scrollbars => 'osoe',
420 -itemtype => 'text',
421 -selectmode => 'extended',
422 -indicator => 1,
423 -drawbranch => 1,
424 )->pack(-side => 'top', -expand => 1, -fill => 'both');
426 for my $i ( keys %found_queue ) {
427 my $scantitle = $found_queue{$i}{title};
428 $scantitle =~ tr{.}//d;
430 $lbtlink->add($scantitle);
431 $lbtlink->itemCreate($scantitle, 0, -text => $scantitle, -itemtype => 'text');
433 for my $j ( keys %{$found_queue{$i}{videos}} ) {
434 my %video = %{$found_queue{$i}{videos}{$j}};
436 my $title = $video{title};
437 $title =~ tr{.}//d;
439 my $path = "$scantitle.$title";
441 $lbtlink->add($path, -data => {%video});
442 $lbtlink->itemCreate($path, 0,
443 -text => $title, -itemtype => 'text');
446 $lbtlink->autosetmode;
447 $lbtlink->close($_) foreach ( $lbtlink->infoChildren('') );
449 my $rbar = $pwtop->Frame; # Button toolbar
450 $rbar->Button(-text => 'Grab', -command => \&on_grab
451 )->pack(-fill => 'x');
453 $rbar->Button(-text => 'Grab everything', -command => \&on_grab_all
454 )->pack(-fill => 'x');
456 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} || 200);
458 # Lower part
459 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
461 $lbtqueue = $pwbottom->Scrolled('Tree',
462 -scrollbars => 'osoe',
463 -itemtype => 'text',
464 -selectmode => 'extended',
465 -indicator => 1,
466 -drawbranch => 1,
469 my $bar = $pwbottom->Frame; # Button toolbar
471 $bar->Button(-text => 'Remove', -command => \&on_remove
472 )->pack(-fill => 'x');
474 $bar->Button(-text => 'Clear', -command => \&on_clear
475 )->pack(-fill => 'x');
477 $bar->Button(-text => 'Extract videos...', -command => \&on_extract
478 )->pack(-fill => 'x', -side => 'bottom');
480 $pwbottom->add($lbtqueue, $bar, -width => $opts{pwbottom} || 200);
482 # Add upper and lower parts to main paned window
483 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
485 $mw->RefontTree(-font => $opts{mainfont});
486 $pwmain->pack(-expand => 1, -fill => 'both');
488 MainLoop;
491 sub save_prefs {
492 mkpath( [$CONFIGDIR], 1, 0700 );
494 my $c = Config::Tiny->new;
495 $c->{gui}->{geometry} = $mw->geometry();
496 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
497 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
498 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
499 $c->{gui}->{mainfont} = $opts{mainfont};
501 $c->write($PREFSFILE);
504 sub on_prefs_ok {
505 ($opts{mainfont}) = @_;
506 $mw->RefontTree(-font => $opts{mainfont});
507 save_prefs();
510 sub queue_item {
511 my $path = shift;
512 return if $path !~ /\./;
513 return if $lbtqueue->infoExists($path);
515 my %video = %{$lbtlink->infoData($path)};
516 my ($link) = split /\./, $path;
518 unless ( $lbtqueue->infoExists($link) ) {
519 $lbtqueue->add($link);
520 $lbtqueue->itemCreate($link, 0,
521 -text => $link, -itemtype => 'text');
524 $lbtqueue->add($path, -data => {%video});
525 $lbtqueue->itemCreate($path, 0,
526 -text => $video{title}, -itemtype => 'text');
529 sub on_grab {
530 queue_item($_) foreach ( $lbtlink->infoSelection );
531 $lbtqueue->autosetmode;
534 sub on_grab_all {
535 foreach ( $lbtlink->infoChildren("") ) {
536 my ($parent) = split /\./;
537 queue_item($_)
538 foreach ($lbtlink->infoChildren($parent) );
540 $lbtqueue->autosetmode;
543 sub on_remove {
544 $lbtqueue->deleteEntry($_)
545 foreach ( $lbtqueue->infoSelection );
548 sub on_clear {
549 $lbtqueue->deleteAll;
552 sub on_about {
553 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
554 my $txt = $dlg->add('Text')->pack;
555 $txt->insert('end', print_version(1));
556 $dlg->Show;
559 sub change_font {
560 my ($top, $lblv, $lbl) = @_;
561 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
563 if ( defined $font ) {
564 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
565 $lbl->configure(-font => $descr);
566 $$lblv = $descr;
570 sub on_prefs {
571 my $dlg = $mw->DialogBox(-title => 'clivescan preferences',
572 -buttons => ['OK','Cancel']);
574 $dlg->add('Label', -text => 'Fonts: press to choose'
575 )->grid(-sticky => 'w', -pady => 10);
577 my ($mainfont) = ($opts{mainfont});
578 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
580 $dlg->add('Button', -text => 'Main font',
581 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
582 )->grid($mainfontl, -sticky => 'w', -padx => '5');
584 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
587 sub on_extract {
588 my @q;
589 foreach ( $lbtqueue->infoChildren('') ) {
590 foreach ( $lbtqueue->infoChildren($_) ) {
591 my %video = %{$lbtqueue->infoData($_)};
592 push @q, $video{link};
595 return unless @q;
597 # Prompt for clive(1) options
598 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
599 -buttons => ['OK','Cancel']);
601 $dlg->add('Label', -text => 'Path to clive'
602 )->grid(my $clivepath = $dlg->Entry(-width => 60),
603 -sticky => 'w', -padx => '5');
605 $dlg->add('Label', -text => 'Runtime options'
606 )->grid(my $cliveopts = $dlg->Entry(-width => 60),
607 -sticky => 'w', -padx => '5');
609 $clivepath->insert('end', $opts{clive});
610 $cliveopts->insert('end', $opts{opts});
612 if ( $dlg->Show() eq 'OK' ) {
613 $opts{clive} = $clivepath->get;
614 $opts{opts} = $cliveopts->get;
615 $mw->destroy;
616 run_clive(@q);
620 __END__
622 =head1 NAME
624 clivescan - the video link scanning utility for clive
626 =head1 SYNOPSIS
628 clivescan [option]... [URL]...
630 =head1 DESCRIPTION
632 clivescan is an utility that scans video pages for video links and
633 uses L<clive(1)> to extract them. The utility scans for video page
634 and embedded video links.
636 Historically, the video link scanning function was part of L<clive(1)>
637 and it was written in Python/Newt. The clivescan utility was written
638 in Perl/Tk to replace the feature that was removed in clive 2.0. This
639 utility is part of the B<clive-utils> project.
641 =head1 OPTIONS
643 You may freely specify options after the command-line arguments. For example:
645 clivescan -a URL --opts=--noextract
647 B<Basic Options>
649 =over 4
651 =item B<-h --help>
653 Show help and exit.
655 =item B<--version>
657 Show version and exit.
659 =item B<--clive=>I<path>
661 I<path> to L<clive(1)> command. If unspecified, clivescan will attempt to
662 locate it in the $PATH. Additionally, the B<CLIVE_PATH> environment variable
663 can be used. See also L</CONFIG>.
665 =item B<--opts=>I<opts>
667 I<opts> to append to clive call. See L<clive(1)> for more on the available
668 options.
670 =item B<-a --all>
672 Grab all videos without prompting the GUI.
674 =item B<-S --nostrict>
676 This may come as a shock but clivescan is B<not> a perfect utility. This
677 option was added as a workaround for some search pattern issues that are
678 known to occur in some cases.
680 When searching for break.com videos, for example, clivescan looks for the
681 "/index/" pattern. This is, unfortunately, a fairly common string to be
682 found anywhere in the web -- like in the Youtube front page. To demonstrate
683 this, try:
685 % clivescan --nostrict "http://youtube.com"
687 Let's dwell deeper into this. Observe.
689 % clivescan "http://video.google.com/videosearch?q=inurl%3Abreak"
691 Returns no break.com videos even though it should. clivescan defaults to
692 "strict" host binding, meaning that it makes sure that the B<scan URL>
693 contains a hard-coded B<domain pattern>, or "break.com" in this case.
695 % clivescan "http://break.com"
697 Hence the above works as expected. To fix the I<original> issue with the
698 video.google.com URL, you need to use the B<--nostrict> option:
700 % clivescan -S "http://video.google.com/videosearch?q=inurl%3Abreak"
702 This causes clivescan to ignore the B<domain pattern>.
704 Using domains in the search patterns would have been a sound idea but this
705 has been made impossible by the video hosts that often refer to their
706 video pages using local paths (e.g. href="/watch?v=$id").
708 It is obviously not a elegant solution, and is even likely to cause other issues
709 when scanning multiple sources at once. Should someone come up with a better
710 solution, please see L</AUTHOR> for contact details.
712 =back
714 B<HTTP Options>
716 =over 4
718 =item B<-U --agent=>I<string>
720 Identify as I<string> to the HTTP server. Defaults to "Mozilla/5.0".
722 =item B<-y --proxy=>I<address>
724 Use I<address> for HTTP proxy, e.g. http://foo:1234. If http_proxy
725 environment variable is defined, it will be used.
727 =item B<-X --noproxy>
729 Do not use the defined HTTP proxy (B<--proxy>, config or http_proxy).
731 =back
733 =head1 EXAMPLES
735 =over 4
737 =item % clivescan youtube.com video.google.com
739 Scans both, Youtube and GoogleVideo front pages for video links.
741 =item % cat E<gt>E<gt> url.lst
743 http://video.google.com
744 http://youtube.com/communitychannel
745 http://sevenload.com
746 http://break.com
748 =item % cat url.lst | clivescan
750 Reads input from UNIX pipe.
752 =item % clivescan --opts="-f mp4"
754 Appends the I<opts> to the L<clive(1)> call.
756 =item % clivescan --all http://youtube.com
758 Grabs all found videos from the Youtube front page.
760 =back
762 =head1 FILES
764 By default, clivescan searches the ~/.config/clivescan directory for the
765 config file. The B<CLIVESCAN_CONFIGDIR> environment variable can be used
766 to override this behaviour.
768 =over 4
770 =item ~/.config/clivescan/config
772 Configuration file.
774 =item ~/.config/clivescan/prefs
776 GUI preferences (e.g. fonts, window position, sash coords, ...).
778 =back
780 =head1 CONFIG
782 ## Example config file for clivescan.
784 [clive]
785 path = /usr/local/bin/clive
786 opts = -f mp4
788 [http]
789 agent = Mozilla/5.0
790 proxy = http://foo:1234
792 =head1 SEE ALSO
794 L<clive(1)> <clivefeed(1)>
796 =head1 OTHER
798 Project: http://googlecode.com/p/clive-utils/
800 A clive-utils development repository can be obtained from:
802 % git clone git://repo.or.cz/clive-utils.git
804 Patches welcome.
806 =head1 AUTHOR
808 Written by Toni Gundogdu <legatvs@gmail.com>
810 =cut