2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivescan, the video link scanning utility for clive
5 # Copyright (C) 2008 Toni Gundogdu.
7 # This file is part of clive-utils.
9 # clivescan is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # clivescan is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with clivescan. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
28 binmode(STDOUT
, ":utf8");
39 use Getopt
::Long
qw(:config bundling);
40 use Digest
::SHA
qw(sha1_hex);
49 my %opted_mods = (Clipboard
=> 1);
50 eval "use Clipboard"; $opted_mods{Clipboard
}=0 if $@
;
52 my $VERSION = "2.0beta2";
53 my $CONFIGDIR = $ENV{CLIVESCAN_CONFIGDIR
}
54 || File
::Spec
->catfile($ENV{HOME
}, ".config/clivescan");
55 my $CONFIGFILE = File
::Spec
->catfile($CONFIGDIR, "config");
56 my $PREFSFILE = File
::Spec
->catfile($CONFIGDIR, "prefs");
58 my %opts; # Holds the options
59 my @queue; # Holds the current URL queue
60 my %found_queue;# Holds the results of the scanned video page links
61 my $curl; # Holds the curl handle (reused throught lifespan)
62 my $mw; # Holds the main window handle (GUI)
63 my $pwmain; # Holds the handle to the main paned window
64 my $pwtop; # Holds the handle to the top paned window
65 my $pwbottom; # Holds the handle to the bottom paned window
66 my $lbtlink; # Holds the handle to the listbox tree of found links
67 my $lbtqueue; # Holds the handle to the listbox tree of queued links
70 my $conf = Config
::Tiny
->read($CONFIGFILE);
71 my $prefs = Config
::Tiny
->read($PREFSFILE);
73 clive
=> $conf->{clive
}->{path
},
74 opts
=> $conf->{clive
}->{opts
},
75 agent
=> $conf->{http
}->{agent
},
76 proxy
=> $conf->{http
}->{proxy
},
78 geometry
=> $prefs->{gui
}->{geometry
},
79 pwmain
=> $prefs->{gui
}->{pwmain
},
80 pwtop
=> $prefs->{gui
}->{pwtop
},
81 pwbottom
=> $prefs->{gui
}->{pwbottom
},
82 mainfont
=> $prefs->{gui
}->{mainfont
},
86 # Define those not read from config, init with defaults
94 $opts{mainfont
} = $opts{mainfont
} || "{helvetica} -12 bold";
97 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
98 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
100 # Workaround since '$longopt|shortopt' is a no-no.
101 'noproxy|X' => sub { $opts{proxy
} = ""; },
104 # Since 'version|v' => \&print_version and exit cannot tango with tk
105 print_version
(0) if $opts{version
};
106 pod2usage
(-exitstatus
=> 0, -verbose
=> 1) if $opts{help
};
107 pod2usage
(-exitstatus
=> 0, -verbose
=> 2) if $opts{manual
};
109 $opts{clive
} = $opts{clive
} || $ENV{CLIVE_PATH
};
110 find_clive
() unless $opts{clive
};
114 select STDERR
; $| = 1; # => unbuffered
115 select STDOUT
; $| = 1;
119 unless ( $opts{all
} ) { init_gui
(); }
123 ## Subroutines: Connection
126 $curl = WWW
::Curl
::Easy
->new;
127 $curl->setopt(CURLOPT_USERAGENT
, $opts{agent
} || "Mozilla/5.0");
128 $curl->setopt(CURLOPT_PROXY
, $opts{proxy
}) if defined $opts{proxy
};
129 $curl->setopt(CURLOPT_VERBOSE
, 1) if $opts{debug
};
130 $curl->setopt(CURLOPT_FOLLOWLOCATION
, 1);
131 $curl->setopt(CURLOPT_AUTOREFERER
, 1);
132 $curl->setopt(CURLOPT_HEADER
, 0);
133 $curl->setopt(CURLOPT_NOBODY
, 0);
137 my ($url, $resp, $rc) = (shift, 0, 0);
138 open my $fh, ">", \
$resp;
140 $curl->setopt(CURLOPT_URL
, $url);
141 $curl->setopt(CURLOPT_ENCODING
, "");
142 $curl->setopt(CURLOPT_WRITEDATA
, $fh);
143 $rc = $curl->perform;
145 return ($rc, $fh, $resp);
149 ## Subroutines: Queue
152 if ( $opts{paste
} ) {
153 print STDERR
"error: Clipboard module not found" and exit
154 unless $opted_mods{Clipboard
};
155 my $data = Clipboard
->paste();
157 parse_input
($_) foreach split/\n/,$data;
161 parse_input
($_) foreach @ARGV;
162 unless ( @queue ) { parse_input
($_) while ( <STDIN
> ); }
164 my %h = map {$_,1} @queue; # Remove duplicates
171 print "Fetching $_ ..." unless $opts{quiet
};
172 my ($rc, $fh, $resp, $errmsg) = fetch_page
($_);
174 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE
);
175 if ( $rc == 0 or $rc == 200 ) {
176 scan_page
($_, \
$resp);
178 $errmsg = $curl->strerror($rc)." (http/$rc)";
181 $errmsg = $curl->strerror($rc)." (http/$rc)";
184 print STDERR
"\n==> error: $errmsg\n" if $errmsg;
189 my ($scanurl, $pageref) = @_;
190 print "done.\n" unless $opts{quiet
};
191 $$pageref =~ tr
{\n}//d;
193 my $p = HTML
::TokeParser
->new($pageref);
194 $p->get_tag("title");
195 my $pagetitle = $p->get_trimmed_text;
199 url_prefix
=> "http://youtube.com/watch?v=",
200 search_for
=> qr
|\Q
/watch?v
=\E
(.*?
)["< &]|i,
203 url_prefix => "http
://youtube
.com
/watch?v
=",
204 search_for => qr|\Qyoutube.com/v/\E(.*?)/|i,
206 GVideo => { # NOTE: Ignores original TLD, uses .com for extraction
207 url_prefix => "http
://video
.google
.com
/videoplay?docid
=",
208 search_for => qr|\Q/googleplayer.swf?docid=\E(.*?)["< &]|i
,
210 GVideoEmbed
=> { # NOTE: Ditto.
211 url_prefix
=> "http://video.google.com/videoplay?docid=",
212 search_for
=> qr
|\Q
/videoplay?docid
=\E
(.*?
)["< &]|i,
216 print "=> Scanning page
for links
" unless $opts{quiet};
219 my ($linksref, $link) = @_;
220 push @$linksref,$link;
221 if ( scalar (@$linksref) % 5 == 0 ) { print scalar (@$linksref); }
226 while ( my $host = each( %re ) ) {
227 _scan_progress(\@links, "$re{$host}{url_prefix
}$1")
228 while ( $$pageref =~ /$re{$host}{search_for}/g );
231 my %h = map { $_, 1 } @links; # Weed out duplicates
233 #print "$_\n" foreach(@links);
235 print "\n=> Found
" .scalar @links. " links after removing duplicates
.\n"
239 foreach my $link ( @links ) {
240 print "==> Fetching
$link ..." unless $opts{quiet};
241 my ($rc, $fh, $resp, $errmsg) = fetch_page($link);
243 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
244 if ( $rc == 0 or $rc == 200 ) {
245 print "done
.\n" unless $opts{quiet};
247 $p = HTML::TokeParser->new(\$resp);
248 $p->get_tag("title
");
249 my $title = $p->get_trimmed_text;
250 # Store, prevent link duplicates
251 my $sha1 = sha1_hex($link);
252 $verified_links{$sha1} = {link => $link, title => $title}
253 unless defined $verified_links{$sha1};
255 $errmsg = $curl->strerror($rc)." (http
/$rc)";
258 $errmsg = $curl->strerror($rc)." (http
/$rc)";
261 print STDERR "\n==> error
: $errmsg\n" if $errmsg;
264 $found_queue{ sha1_hex($scanurl) } =
265 { title => $pagetitle, url => $scanurl, videos => {%verified_links} };
270 for my $i ( keys %found_queue ) {
271 my %videos = %{$found_queue{$i}{videos}};
272 for my $j ( keys %videos ) {
273 push @q, $videos{$j}{link};
280 ## Subroutines: Helpers
285 return if $url =~ /^$/;
288 $url = "http
://$url" if $url !~ m!^http://!i;
293 print "Trying to locate
'clive' ...";
295 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
296 split /:/, $ENV{PATH} || getcwd);
298 if ( $opts{clive} ) { print "$opts{clive
}\n"; }
299 else { print STDERR "error
: not found
, use --clive
=path
\n"; exit; }
304 system "$opts{clive
} $opts{opts
} " . join(' ', @q);
309 my $perl_v = sprintf "%vd", $^V;
310 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
312 "clivescan version
$VERSION. Copyright
(C
) 2008 Toni Gundogdu
.
316 * Config
::Tiny
/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl
::VERSION
317 * Tk
/$Tk::VERSION\t\t\t* Tk::Tree/$Tk::Tree
::VERSION
318 * Tk
::DialogBox
/$Tk::DialogBox::VERSION\t\t* Clipboard/$clipb_v
319 * Tk
::FontDialog
/$Tk::FontDialog
::VERSION
321 * Getopt
::Long
/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA
::VERSION
322 * File
::Spec
/$File::Spec::VERSION\t\t* File::Find/$File::Find
::VERSION
323 * File
::Path
/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
324 * Pod
::Usage
/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
326 This program comes with ABSOLUTELY NO WARRANTY
. You may redistribute copies of
327 clivescan under the terms of the GNU General Public License as published by the
328 Free Software Foundation
, either version
3 of the License
, or (at your option
)
329 any later version
. You should have received a copy of the General Public License
330 along with this program
. If
not, see http
://www
.gnu
.org
/licenses/.
332 return $s if $noexit;
340 return unless %found_queue;
342 $mw = MainWindow->new;
343 $mw->geometry($opts{geometry}) if defined $opts{geometry};
344 $mw->title('clivescan');
345 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); exit; });
349 $mw->configure(-menu => $mb);
352 my $file = $mb->cascade(-label => 'File', -underline => 0, -tearoff => 0);
353 $file->command(-label => 'Extract videos in queue...',
354 -underline => 0, -command => \&on_extract);
356 $file->command(-label => 'Quit', -underline => 0,
357 -command => sub { save_prefs(); exit; } );
360 my $edit = $mb->cascade(-label => 'Edit', -underline => 0, -tearoff => 0);
361 $edit->command(-label => 'Preferences...',
362 -underline => 0, -command => \&on_prefs);
365 my $help = $mb->cascade(-label => 'Help', -underline => 0, -tearoff => 0);
366 $help->command(-label => 'About...',
367 -underline => 0, -command => \&on_about);
369 # The GUI has an upper and a lower part
370 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
373 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
376 my $lbar = $pwtop->Frame;
378 $lbtlink = $lbar->Scrolled('Tree',
379 -scrollbars => 'osoe',
381 -selectmode => 'extended',
384 )->pack(-side => 'top', -expand => 1, -fill => 'both');
386 for my $i ( keys %found_queue ) {
387 my $scantitle = $found_queue{$i}{title};
388 $scantitle =~ tr{.}//d;
390 $lbtlink->add($scantitle);
391 $lbtlink->itemCreate($scantitle, 0, -text => $scantitle, -itemtype => 'text');
393 for my $j ( keys %{$found_queue{$i}{videos}} ) {
394 my %video = %{$found_queue{$i}{videos}{$j}};
396 my $title = $video{title};
399 my $path = "$scantitle.$title";
401 $lbtlink->add($path, -data => {%video});
402 $lbtlink->itemCreate($path, 0,
403 -text => $title, -itemtype => 'text');
406 $lbtlink->autosetmode;
407 $lbtlink->close($_) foreach ( $lbtlink->infoChildren('') );
409 my $rbar = $pwtop->Frame; # Button toolbar
410 $rbar->Button(-text => 'Grab', -command => \&on_grab
411 )->pack(-fill => 'x');
413 $rbar->Button(-text => 'Grab everything', -command => \&on_grab_all
414 )->pack(-fill => 'x');
416 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} || 200);
419 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
421 $lbtqueue = $pwbottom->Scrolled('Tree',
422 -scrollbars => 'osoe',
424 -selectmode => 'extended',
429 my $bar = $pwbottom->Frame; # Button toolbar
431 $bar->Button(-text => 'Remove', -command => \&on_remove
432 )->pack(-fill => 'x');
434 $bar->Button(-text => 'Clear', -command => \&on_clear
435 )->pack(-fill => 'x');
437 $bar->Button(-text => 'Extract videos...', -command => \&on_extract
438 )->pack(-fill => 'x', -side => 'bottom');
440 $pwbottom->add($lbtqueue, $bar, -width => $opts{pwbottom} || 200);
442 # Add upper and lower parts to main paned window
443 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
445 $mw->RefontTree(-font => $opts{mainfont});
446 $pwmain->pack(-expand => 1, -fill => 'both');
452 mkpath( [$CONFIGDIR], 1, 0700 );
454 my $c = Config::Tiny->new;
455 $c->{gui}->{geometry} = $mw->geometry();
456 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
457 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
458 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
459 $c->{gui}->{mainfont} = $opts{mainfont};
461 $c->write($PREFSFILE);
465 ($opts{mainfont}) = @_;
466 $mw->RefontTree(-font => $opts{mainfont});
472 return if $path !~ /\./;
473 return if $lbtqueue->infoExists($path);
475 my %video = %{$lbtlink->infoData($path)};
476 my ($link) = split /\./, $path;
478 unless ( $lbtqueue->infoExists($link) ) {
479 $lbtqueue->add($link);
480 $lbtqueue->itemCreate($link, 0,
481 -text => $link, -itemtype => 'text');
484 $lbtqueue->add($path, -data => {%video});
485 $lbtqueue->itemCreate($path, 0,
486 -text => $video{title}, -itemtype => 'text');
490 queue_item($_) foreach ( $lbtlink->infoSelection );
491 $lbtqueue->autosetmode;
495 foreach ( $lbtlink->infoChildren("") ) {
496 my ($parent) = split /\./;
498 foreach ($lbtlink->infoChildren($parent) );
500 $lbtqueue->autosetmode;
504 $lbtqueue->deleteEntry($_)
505 foreach ( $lbtqueue->infoSelection );
509 $lbtqueue->deleteAll;
513 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
514 my $txt = $dlg->add('Text')->pack;
515 $txt->insert('end', print_version(1));
520 my ($top, $lblv, $lbl) = @_;
521 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
523 if ( defined $font ) {
524 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
525 $lbl->configure(-font => $descr);
531 my $dlg = $mw->DialogBox(-title => 'clivescan preferences',
532 -buttons => ['OK','Cancel']);
534 $dlg->add('Label', -text => 'Fonts: press to choose'
535 )->grid(-sticky => 'w', -pady => 10);
537 my ($mainfont) = ($opts{mainfont});
538 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
540 $dlg->add('Button', -text => 'Main font',
541 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
542 )->grid($mainfontl, -sticky => 'w', -padx => '5');
544 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
549 foreach ( $lbtqueue->infoChildren('') ) {
550 foreach ( $lbtqueue->infoChildren($_) ) {
551 my %video = %{$lbtqueue->infoData($_)};
552 push @q, $video{link};
557 # Prompt for clive(1) options
558 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
559 -buttons => ['OK','Cancel']);
561 $dlg->add('Label', -text => 'Path to clive'
562 )->grid(my $clivepath = $dlg->Entry(-width => 25),
563 -sticky => 'w', -padx => '5');
565 $dlg->add('Label', -text => 'Runtime options'
566 )->grid(my $cliveopts = $dlg->Entry(-width => 25),
567 -sticky => 'w', -padx => '5');
569 $clivepath->insert('end', $opts{clive});
570 $cliveopts->insert('end', $opts{opts});
572 if ( $dlg->Show() eq 'OK' ) {
573 $opts{clive} = $clivepath->get;
574 $opts{opts} = $cliveopts->get;