Reverted the [$verb] status message changes.
[clive-utils.git] / clivescan
blob128091fb0e95a97639659aeaccc51718dac20d23
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivescan, the video link scanning utility for clive
5 # Copyright (C) 2008 Toni Gundogdu.
7 # This file is part of clive-utils.
9 # clivescan is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # clivescan is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with clivescan. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
23 # Keep it simple.
25 use warnings;
26 use strict;
28 binmode(STDOUT, ":utf8");
30 use HTML::TokeParser;
31 use WWW::Curl::Easy;
32 use Tk::FontDialog;
33 use Tk::DialogBox;
34 use Config::Tiny;
35 use Tk::Tree;
36 use Tk;
38 # Core modules:
39 use Getopt::Long qw(:config bundling);
40 use Digest::SHA qw(sha1_hex);
41 use File::Spec;
42 use File::Find;
43 use File::Path;
44 use Pod::Usage;
45 use Encode;
46 use Cwd;
48 # Non-essentials
49 my %opted_mods = (Clipboard => 1);
50 eval "use Clipboard"; $opted_mods{Clipboard}=0 if $@;
52 my $VERSION = "2.0.0";
53 my $CONFIGDIR = $ENV{CLIVESCAN_CONFIGDIR}
54 || File::Spec->catfile($ENV{HOME}, ".config/clivescan");
55 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
56 my $PREFSFILE = File::Spec->catfile($CONFIGDIR, "prefs");
58 my %opts; # Holds the options
59 my @queue; # Holds the current URL queue
60 my %found_queue;# Holds the results of the scanned video page links
61 my $curl; # Holds the curl handle (reused throught lifespan)
62 my $mw; # Holds the main window handle (GUI)
63 my $pwmain; # Holds the handle to the main paned window
64 my $pwtop; # Holds the handle to the top paned window
65 my $pwbottom; # Holds the handle to the bottom paned window
66 my $lbtlink; # Holds the handle to the listbox tree of found links
67 my $lbtqueue; # Holds the handle to the listbox tree of queued links
69 # Parse config
70 my $conf = Config::Tiny->read($CONFIGFILE);
71 my $prefs = Config::Tiny->read($PREFSFILE);
72 %opts = (
73 clive => $conf->{clive}->{path},
74 opts => $conf->{clive}->{opts},
75 agent => $conf->{http}->{agent},
76 proxy => $conf->{http}->{proxy},
78 geometry=> $prefs->{gui}->{geometry},
79 pwmain => $prefs->{gui}->{pwmain},
80 pwtop => $prefs->{gui}->{pwtop},
81 pwbottom=> $prefs->{gui}->{pwbottom},
82 mainfont=> $prefs->{gui}->{mainfont},
85 # Parse cmdline
86 # Define those not read from config, init with defaults
87 $opts{quiet} = 0;
88 $opts{paste} = 0;
89 $opts{all} = 0;
90 $opts{strict} = 1;
91 $opts{debug} = 0;
92 $opts{help} = 0;
93 $opts{manual} = 0;
94 $opts{version} = 0;
95 $opts{mainfont} = $opts{mainfont} || "{helvetica} -12 bold";
97 GetOptions(\%opts,
98 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
99 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
100 'proxy|y=s',
101 # Workaround since '$longopt|shortopt' is a no-no.
102 'noproxy|X' => sub { $opts{proxy} = "" },
103 'nostrict|S' => sub { $opts{strict} = 0 },
104 ) or pod2usage(1);
106 # Since 'version|v' => \&print_version and exit cannot tango with tk
107 print_version(0) if $opts{version};
108 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
109 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
111 $opts{clive} = $opts{clive} || $ENV{CLIVE_PATH};
112 find_clive() unless $opts{clive};
114 get_queue();
116 select STDERR; $| = 1; # => unbuffered
117 select STDOUT; $| = 1;
119 process_queue();
121 unless ( $opts{all} ) { init_gui(); }
122 else { grab_all(); }
125 ## Subroutines: Connection
127 sub init_curl {
128 $curl = WWW::Curl::Easy->new;
129 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
130 $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
131 $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
132 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
133 $curl->setopt(CURLOPT_AUTOREFERER, 1);
134 $curl->setopt(CURLOPT_HEADER, 0);
135 $curl->setopt(CURLOPT_NOBODY, 0);
138 sub fetch_page {
139 my ($url, $resp, $rc) = (shift, 0, 0);
140 open my $fh, ">", \$resp;
142 $curl->setopt(CURLOPT_URL, $url);
143 $curl->setopt(CURLOPT_ENCODING, "");
144 $curl->setopt(CURLOPT_WRITEDATA, $fh);
145 $rc = $curl->perform;
147 return ($rc, $fh, $resp);
151 ## Subroutines: Queue
153 sub get_queue {
154 if ( $opts{paste} ) {
155 print STDERR "error: Clipboard module not found" and exit
156 unless $opted_mods{Clipboard};
157 my $data = Clipboard->paste();
158 if ( $data ) {
159 parse_input($_) foreach split/\n/,$data;
163 parse_input($_) foreach @ARGV;
164 unless ( @queue ) { parse_input($_) while ( <STDIN> ); }
166 my %h = map {$_,1} @queue; # Remove duplicates
167 @queue = keys %h;
170 sub process_queue {
171 init_curl();
172 foreach ( @queue ) {
173 print "fetch $_ ..." unless $opts{quiet};
174 my ($rc, $fh, $resp, $errmsg) = fetch_page($_);
175 if ( $rc == 0 ) {
176 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
177 if ( $rc == 0 or $rc == 200 ) {
178 scan_page($_, \$resp);
179 } else {
180 $errmsg = $curl->strerror($rc)." (http/$rc)";
182 } else {
183 $errmsg = $curl->strerror($rc)." (http/$rc)";
185 close $fh;
186 print STDERR "\nerror: $errmsg\n" if $errmsg;
190 sub scan_page {
191 my ($scanurl, $pageref) = @_;
192 print "done.\n" unless $opts{quiet};
193 $$pageref =~ tr{\n}//d;
195 my $p = HTML::TokeParser->new($pageref);
196 $p->get_tag("title");
197 my $pagetitle = $p->get_trimmed_text;
199 my %re = (
200 # in_scanurl: regex used to bind this search pattern to specified
201 # domain. Undefined for embedded link searches. See clivescan(1).
202 # search_for: regex used to grab the video ID
203 # url_prefix: combined with video ID to construct video page URL
205 # NOTE: We're not using domains in the search patterns because
206 # most of the supported hosts refer to their videos using local
207 # paths, e.g. <a href="/watch?v=...">.
208 Youtube => {
209 in_scanurl => qr|\Qyoutube.com\E|i,
210 search_for => qr|\Q/watch?v=\E(.*?)["< &]|i,
211 url_prefix => "http://youtube.com/watch?v=",
213 YoutubeEmbed => {
214 in_scanurl => undef,
215 search_for => qr|\Qyoutube.com/v/\E(.*?)["< &]|i,
216 url_prefix => "http://youtube.com/watch?v=",
218 GVideo => { # NOTE: Ignores original TLD, uses .com for extraction
219 in_scanurl => qr|\Qvideo.google.\E|i,
220 search_for => qr|\Q/videoplay?docid=\E(.*?)["< &]|i,
221 url_prefix => "http://video.google.com/videoplay?docid=",
223 GVideoEmbed => { # NOTE: Ditto.
224 in_scanurl => undef,
225 search_for => qr|\Q/googleplayer.swf?docid=\E(.*?)["< &]|i,
226 url_prefix => "http://video.google.com/videoplay?docid=",
228 # Metacafe => { # NOTE: metacafe.com/watch/$id is enough for redirect
229 # in_scanurl => qr|\Qmetacafe.com\E|i,
230 # search_for => qr|\Q/watch/\E(.*?)/|i,
231 # url_prefix => "http://metacafe.com/watch/",
232 # },
233 # MetacafeEmbed => {
234 # in_scanurl => undef,
235 # search_for => qr|\Qmetacafe.com/fplayer/\E(.*?)/|i,
236 # url_prefix => "http://metacafe.com/watch/",
237 # },
238 SevenLoad => { # NOTE: Ditto. Subdomain can be ignored.
239 in_scanurl => qr|\Qsevenload.com\E|i,
240 search_for => qr|\Q/videos/\E(.*?)\-|i,
241 url_prefix => "http://sevenload.com/videos/",
243 SevenLoadEmbed => {
244 in_scanurl => undef,
245 search_for => qr|\Qsevenload.com/pl/\E(.*?)/|i,
246 url_prefix => "http://sevenload.com/videos/",
248 Break => {
249 in_scanurl => qr|\Qbreak.com\E|i,
250 search_for => qr|\Q/index/\E(.*?)["< &]|i,
251 url_prefix => "http://break.com/index/",
253 # TODO: add BreakEmbed, e.g.:
254 # Page URL: http://break.com/index/if-all-movies-had-cell-phones.html
255 # Embed URL: http://embed.break.com/600081
258 print "scan page" unless $opts{quiet};
260 sub _scan_progress {
261 my ($linksref, $link) = @_;
262 push @$linksref,$link;
263 unless ( $opts{quiet} ) {
264 if ( scalar (@$linksref) % 5 == 0 ) { print scalar (@$linksref); }
265 else { print "."; }
269 my @links;
270 while ( my $host = each( %re ) ) {
271 if ( defined $re{$host}{in_scanurl} and $opts{strict} ) {
272 next unless $scanurl =~ /$re{$host}{in_scanurl}/;
274 _scan_progress(\@links, "$re{$host}{url_prefix}$1")
275 while ( $$pageref =~ /$re{$host}{search_for}/g );
278 my %h = map { $_, 1 } @links; # Weed out duplicates
279 @links = keys %h;
281 print " removed duplicates. found " .scalar @links. " link(s).\n"
282 unless $opts{quiet};
284 my %verified_links;
285 foreach my $link ( @links ) {
286 print "fetch $link ..." unless $opts{quiet};
287 my ($rc, $fh, $resp, $errmsg) = fetch_page($link);
288 if ( $rc == 0 ) {
289 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
290 if ( $rc == 0 or $rc == 200 ) {
291 print "done.\n" unless $opts{quiet};
292 # Grab title
293 $p = HTML::TokeParser->new(\$resp);
294 $p->get_tag("title");
295 my $title = $p->get_trimmed_text;
296 # Store, prevent link duplicates
297 my $sha1 = sha1_hex($link);
298 $verified_links{$sha1} = {link => $link, title => $title}
299 unless defined $verified_links{$sha1};
300 } else {
301 $errmsg = $curl->strerror($rc)." (http/$rc)";
303 } else {
304 $errmsg = $curl->strerror($rc)." (http/$rc)";
306 close $fh;
307 print STDERR "\nerror: $errmsg\n" if $errmsg;
310 $found_queue{ sha1_hex($scanurl) } =
311 { title => $pagetitle, url => $scanurl, videos => {%verified_links} };
314 sub grab_all {
315 my @q;
316 for my $i ( keys %found_queue ) {
317 my %videos = %{$found_queue{$i}{videos}};
318 for my $j ( keys %videos ) {
319 push @q, $videos{$j}{link};
322 run_clive(@q);
326 ## Subroutines: Helpers
328 sub parse_input {
329 my $url = shift;
331 return if $url =~ /^$/;
332 chomp $url;
334 $url = "http://$url" if $url !~ m!^http://!i;
335 push @queue, $url;
338 sub find_clive {
339 print "locate clive ..." unless $opts{quiet};
341 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
342 split /:/, $ENV{PATH} || getcwd);
344 if ( $opts{clive} ) { print "$opts{clive}\n" unless $opts{quiet}; }
345 else { print STDERR "\nerror: not found, use --clive=path\n"; exit; }
348 sub run_clive {
349 my (@q) = @_;
350 system "$opts{clive} $opts{opts} " . join(' ', @q);
353 sub print_version {
354 my $noexit = shift;
355 my $perl_v = sprintf "%vd", $^V;
356 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
357 my $s = sprintf
358 "clivescan version $VERSION. Copyright (C) 2008 Toni Gundogdu.
360 Perl: $perl_v ($^O)
361 Modules:
362 * Config::Tiny/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl::VERSION
363 * Tk/$Tk::VERSION\t\t\t* Tk::Tree/$Tk::Tree::VERSION
364 * Tk::DialogBox/$Tk::DialogBox::VERSION\t\t* Clipboard/$clipb_v
365 * Tk::FontDialog/$Tk::FontDialog::VERSION\t\t* HTML::TokeParser/$HTML::TokeParser::VERSION
366 Core modules:
367 * Getopt::Long/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA::VERSION
368 * File::Spec/$File::Spec::VERSION\t\t* File::Find/$File::Find::VERSION
369 * File::Path/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
370 * Pod::Usage/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
372 This program comes with ABSOLUTELY NO WARRANTY. You may redistribute copies of
373 clivescan under the terms of the GNU General Public License as published by the
374 Free Software Foundation, either version 3 of the License, or (at your option)
375 any later version. You should have received a copy of the General Public License
376 along with this program. If not, see http://www.gnu.org/licenses/.
378 return $s if $noexit;
379 print $s; exit;
383 # GUI:
385 sub init_gui {
386 return if keys %found_queue == 0;
388 $mw = MainWindow->new;
389 $mw->geometry($opts{geometry}) if defined $opts{geometry};
390 $mw->title('clivescan');
391 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); exit; });
393 # Menubar
394 my $mb = $mw->Menu;
395 $mw->configure(-menu => $mb);
397 # Menu: File
398 my $file = $mb->cascade(-label => 'File', -underline => 0, -tearoff => 0);
399 $file->command(-label => 'Extract videos in queue...',
400 -underline => 0, -command => \&on_extract);
401 $file->separator;
402 $file->command(-label => 'Quit', -underline => 0,
403 -command => sub { save_prefs(); exit; } );
405 # Menu: Edit
406 my $edit = $mb->cascade(-label => 'Edit', -underline => 0, -tearoff => 0);
407 $edit->command(-label => 'Preferences...',
408 -underline => 0, -command => \&on_prefs);
410 # Menu: Help
411 my $help = $mb->cascade(-label => 'Help', -underline => 0, -tearoff => 0);
412 $help->command(-label => 'About...',
413 -underline => 0, -command => \&on_about);
415 # The GUI has an upper and a lower part
416 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
418 # Upper part
419 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
421 # Upper: Channels
422 my $lbar = $pwtop->Frame;
424 $lbtlink = $lbar->Scrolled('Tree',
425 -scrollbars => 'osoe',
426 -itemtype => 'text',
427 -selectmode => 'extended',
428 -indicator => 1,
429 -drawbranch => 1,
430 )->pack(-side => 'top', -expand => 1, -fill => 'both');
432 for my $i ( keys %found_queue ) {
433 my $scantitle = $found_queue{$i}{title};
434 $scantitle =~ tr{.}//d;
436 $lbtlink->add($scantitle);
437 $lbtlink->itemCreate($scantitle, 0, -text => $scantitle, -itemtype => 'text');
439 for my $j ( keys %{$found_queue{$i}{videos}} ) {
440 my %video = %{$found_queue{$i}{videos}{$j}};
442 my $title = $video{title};
443 $title =~ tr{.}//d;
445 my $path;
446 for ( my $k=0;; ++$k ) {
447 $path = "$scantitle.$title (#$k)";
448 last unless $lbtlink->infoExists($path);
451 $lbtlink->add($path, -data => {%video});
452 $lbtlink->itemCreate($path, 0,
453 -text => $title, -itemtype => 'text');
456 $lbtlink->autosetmode;
457 $lbtlink->close($_) foreach ( $lbtlink->infoChildren('') );
459 my $rbar = $pwtop->Frame; # Button toolbar
460 $rbar->Button(-text => 'Grab', -command => \&on_grab
461 )->pack(-fill => 'x');
463 $rbar->Button(-text => 'Grab everything', -command => \&on_grab_all
464 )->pack(-fill => 'x');
466 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} || 200);
468 # Lower part
469 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
471 $lbtqueue = $pwbottom->Scrolled('Tree',
472 -scrollbars => 'osoe',
473 -itemtype => 'text',
474 -selectmode => 'extended',
475 -indicator => 1,
476 -drawbranch => 1,
479 my $bar = $pwbottom->Frame; # Button toolbar
481 $bar->Button(-text => 'Remove', -command => \&on_remove
482 )->pack(-fill => 'x');
484 $bar->Button(-text => 'Clear', -command => \&on_clear
485 )->pack(-fill => 'x');
487 $bar->Button(-text => 'Extract videos...', -command => \&on_extract
488 )->pack(-fill => 'x', -side => 'bottom');
490 $pwbottom->add($lbtqueue, $bar, -width => $opts{pwbottom} || 200);
492 # Add upper and lower parts to main paned window
493 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
495 $mw->RefontTree(-font => $opts{mainfont});
496 $pwmain->pack(-expand => 1, -fill => 'both');
498 MainLoop;
501 sub save_prefs {
502 mkpath( [$CONFIGDIR], 1, 0700 );
504 my $c = Config::Tiny->new;
505 $c->{gui}->{geometry} = $mw->geometry();
506 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
507 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
508 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
509 $c->{gui}->{mainfont} = $opts{mainfont};
511 $c->write($PREFSFILE);
514 sub on_prefs_ok {
515 ($opts{mainfont}) = @_;
516 $mw->RefontTree(-font => $opts{mainfont});
517 save_prefs();
520 sub queue_item {
521 my $path = shift;
522 return if $path !~ /\./;
523 return if $lbtqueue->infoExists($path);
525 my %video = %{$lbtlink->infoData($path)};
526 my ($link) = split /\./, $path;
528 unless ( $lbtqueue->infoExists($link) ) {
529 $lbtqueue->add($link);
530 $lbtqueue->itemCreate($link, 0,
531 -text => $link, -itemtype => 'text');
534 $lbtqueue->add($path, -data => {%video});
535 $lbtqueue->itemCreate($path, 0,
536 -text => $video{title}, -itemtype => 'text');
539 sub on_grab {
540 queue_item($_) foreach ( $lbtlink->infoSelection );
541 $lbtqueue->autosetmode;
544 sub on_grab_all {
545 foreach ( $lbtlink->infoChildren("") ) {
546 my ($parent) = split /\./;
547 queue_item($_)
548 foreach ($lbtlink->infoChildren($parent) );
550 $lbtqueue->autosetmode;
553 sub on_remove {
554 $lbtqueue->deleteEntry($_)
555 foreach ( $lbtqueue->infoSelection );
558 sub on_clear {
559 $lbtqueue->deleteAll;
562 sub on_about {
563 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
564 my $txt = $dlg->add('Text')->pack;
565 $txt->insert('end', print_version(1));
566 $dlg->Show;
569 sub change_font {
570 my ($top, $lblv, $lbl) = @_;
571 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
573 if ( defined $font ) {
574 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
575 $lbl->configure(-font => $descr);
576 $$lblv = $descr;
580 sub on_prefs {
581 my $dlg = $mw->DialogBox(-title => 'clivescan preferences',
582 -buttons => ['OK','Cancel']);
584 $dlg->add('Label', -text => 'Fonts: press to choose'
585 )->grid(-sticky => 'w', -pady => 10);
587 my ($mainfont) = ($opts{mainfont});
588 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
590 $dlg->add('Button', -text => 'Main font',
591 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
592 )->grid($mainfontl, -sticky => 'w', -padx => '5');
594 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
597 sub on_extract {
598 my @q;
599 foreach ( $lbtqueue->infoChildren('') ) {
600 foreach ( $lbtqueue->infoChildren($_) ) {
601 my %video = %{$lbtqueue->infoData($_)};
602 push @q, $video{link};
605 return unless @q;
607 # Prompt for clive(1) options
608 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
609 -buttons => ['OK','Cancel']);
611 $dlg->add('Label', -text => 'Path to clive'
612 )->grid(my $clivepath = $dlg->Entry(-width => 60),
613 -sticky => 'w', -padx => '5');
615 $dlg->add('Label', -text => 'Runtime options'
616 )->grid(my $cliveopts = $dlg->Entry(-width => 60),
617 -sticky => 'w', -padx => '5');
619 $clivepath->insert('end', $opts{clive});
620 $cliveopts->insert('end', $opts{opts});
622 if ( $dlg->Show() eq 'OK' ) {
623 $opts{clive} = $clivepath->get;
624 $opts{opts} = $cliveopts->get;
625 $mw->destroy;
626 run_clive(@q);
630 __END__
632 =head1 NAME
634 clivescan - the video link scanning utility for clive
636 =head1 SYNOPSIS
638 clivescan [option]... [URL]...
640 =head1 DESCRIPTION
642 clivescan is an utility that scans video pages for video links and
643 uses L<clive(1)> to extract them. The utility scans for video page
644 and embedded video links.
646 Historically, the video link scanning function was part of L<clive(1)>
647 and it was written in Python/Newt. The clivescan utility was written
648 in Perl/Tk to replace the feature that was removed in clive 2.0. This
649 utility is part of the B<clive-utils> project.
651 =head1 OPTIONS
653 You may freely specify options after the command-line arguments. For example:
655 % clivescan -a URL --opts=--noextract
657 B<Basic Options>
659 =over 4
661 =item B<-h --help>
663 Show help and exit.
665 =item B<-v --version>
667 Show version and exit.
669 =item B<-c --clive=>I<path>
671 I<path> to L<clive(1)> command. If unspecified, clivescan will attempt to
672 locate it in the $PATH. Additionally, the B<CLIVE_PATH> environment variable
673 can be used. See also L</CONFIG>.
675 =item B<-o --opts=>I<opts>
677 I<opts> to append to clive call. See L<clive(1)> for more on the available
678 options.
680 =item B<-a --all>
682 Grab all videos without prompting the GUI.
684 =item B<-S --nostrict>
686 This option provides a workaround for some search pattern issues that
687 are known to occur. For example:
689 % clivescan "http://video.google.com/videosearch?q=inurl%3Abreak"
691 Does not return any break.com videos even though the page lists them.
692 This happens because clivescan assumes that the domain break.com can
693 be found in the URL. To override this restrictive default behaviour:
695 % clivescan -S "http://video.google.com/videosearch?q=inurl%3Abreak"
697 =back
699 B<HTTP Options>
701 =over 4
703 =item B<-U --agent=>I<string>
705 Identify as I<string> to the HTTP server. Defaults to "Mozilla/5.0".
707 =item B<-y --proxy=>I<address>
709 Use I<address> for HTTP proxy, e.g. http://foo:1234. If http_proxy
710 environment variable is defined, it will be used.
712 =item B<-X --noproxy>
714 Do not use the defined HTTP proxy (B<--proxy>, config or http_proxy).
716 =back
718 =head1 EXAMPLES
720 =over 4
722 =item % clivescan youtube.com video.google.com
724 Scans both, Youtube and GoogleVideo front pages for video links.
726 =item % cat E<gt>E<gt> url.lst
728 http://video.google.com
729 http://youtube.com/communitychannel
730 http://sevenload.com
731 http://break.com
733 =item % cat url.lst | clivescan
735 Reads input from UNIX pipe.
737 =item % clivescan --opts="-f mp4"
739 Appends the I<opts> to the L<clive(1)> call.
741 =item % clivescan --all http://youtube.com
743 Grabs all found videos from the Youtube front page.
745 =back
747 =head1 FILES
749 By default, clivescan searches the ~/.config/clivescan directory for the
750 config file. The B<CLIVESCAN_CONFIGDIR> environment variable can be used
751 to override this behaviour.
753 =over 4
755 =item ~/.config/clivescan/config
757 Configuration file.
759 =item ~/.config/clivescan/prefs
761 GUI preferences (e.g. fonts, window position, sash coords, ...).
763 =back
765 =head1 CONFIG
767 ## Example config file for clivescan.
769 [clive]
770 path = /usr/local/bin/clive
771 opts = -f mp4
773 [http]
774 agent = Mozilla/5.0
775 proxy = http://foo:1234
777 =head1 SEE ALSO
779 L<clive(1)> L<clivefeed(1)>
781 =head1 OTHER
783 Project: http://googlecode.com/p/clive-utils/
785 A clive-utils development repository can be obtained from:
787 % git clone git://repo.or.cz/clive-utils.git
789 Patches welcome.
791 =head1 AUTHOR
793 Written by Toni Gundogdu <legatvs@gmail.com>
795 =cut