Rewrote --nostrict manual entry. Added local path note.
[clive-utils.git] / clivefeed
blob5f1e5168b5e2ee7a5a996f4ed10edea742d89c0c
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivefeed, the feed parsing utility for clive
5 # Copyright (C) 2008 Toni Gundogdu.
7 # This file is part of clive-utils.
9 # clivefeed is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # clivefeed is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with clivefeed. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
23 # Keep it simple.
25 use warnings;
26 use strict;
28 binmode(STDOUT, ":utf8");
30 use XML::RSS::LibXML;
31 use WWW::Curl::Easy;
32 use Tk::FontDialog;
33 use Tk::DialogBox;
34 use Config::Tiny;
35 use HTML::Strip;
36 use URI::Escape;
37 use Tk::Tree;
38 use Tk;
40 # Core modules:
41 use Getopt::Long qw(:config bundling);
42 use File::Spec;
43 use File::Find;
44 use File::Path;
45 use Pod::Usage;
46 use Encode;
47 use Cwd;
49 # Non-essentials
50 my %opted_mods = (Clipboard => 1);
51 eval "use Clipboard"; $opted_mods{Clipboard}=0 if $@;
53 my $VERSION = "2.0beta2";
54 my $CONFIGDIR = $ENV{CLIVEFEED_CONFIGDIR}
55 || File::Spec->catfile($ENV{HOME}, ".config/clivefeed");
56 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
57 my $PREFSFILE = File::Spec->catfile($CONFIGDIR, "prefs");
59 my %opts; # Holds the options
60 my @queue; # Holds the current URL queue
61 my $curl; # Holds the curl handle (reused throught lifespan)
62 my @channels; # Holds parsed channel data
63 my $mw; # Holds the main window handle (GUI)
64 my $pwmain; # Holds the handle to the main paned window
65 my $pwtop; # Holds the handle to the top paned window
66 my $pwbottom; # Holds the handle to the bottom paned window
67 my $lbchann; # Listbox: channels
68 my $lbitems; # Listbox: (channel) items
69 my $lbqueue; # Listbox: queued video items
70 my $txtdescr; # Text: video description
71 my %usersel; # Holds user-selected videos
73 # Parse config
74 my $conf = Config::Tiny->read($CONFIGFILE);
75 my $prefs = Config::Tiny->read($PREFSFILE);
76 %opts = (
77 clive => $conf->{clive}->{path},
78 opts => $conf->{clive}->{opts},
79 agent => $conf->{http}->{agent},
80 proxy => $conf->{http}->{proxy},
82 geometry=> $prefs->{gui}->{geometry},
83 pwmain => $prefs->{gui}->{pwmain},
84 pwtop => $prefs->{gui}->{pwtop},
85 pwbottom=> $prefs->{gui}->{pwbottom},
86 mainfont=> $prefs->{gui}->{mainfont},
89 # Parse cmdline
90 # Define those not read from config, init with defaults
91 $opts{quiet} = 0;
92 $opts{paste} = 0;
93 $opts{all} = 0;
94 $opts{debug} = 0;
95 $opts{help} = 0;
96 $opts{manual} = 0;
97 $opts{version} = 0;
98 $opts{mainfont} = $opts{mainfont} || "{helvetica} -12 bold";
100 GetOptions(\%opts,
101 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
102 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
103 'proxy|y=s',
104 # Workaround since '$longopt|shortopt' is a no-no.
105 'noproxy|X' => sub { $opts{proxy} = ""; },
106 ) or pod2usage(1);
108 # Since 'version|v' => \&print_version and exit cannot tango with tk
109 print_version(0) if $opts{version};
110 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
111 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
113 $opts{clive} = $opts{clive} || $ENV{CLIVE_PATH};
114 find_clive() unless $opts{clive};
116 get_queue();
117 select STDERR; $| = 1; # => unbuffered
118 select STDOUT; $| = 1;
119 process_queue();
121 unless ( $opts{all} ) { init_gui(); }
122 else { grab_all(); }
125 ## Subroutines: Connection
127 sub init_curl {
128 $curl = WWW::Curl::Easy->new;
129 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
130 $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
131 $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
132 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
133 $curl->setopt(CURLOPT_AUTOREFERER, 1);
134 $curl->setopt(CURLOPT_HEADER, 0);
135 $curl->setopt(CURLOPT_NOBODY, 0);
138 sub fetch_feed {
139 my ($url, $response, $rc) = (shift, "", 0);
140 open my $rfh, ">", \$response;
142 print "Fetching $url ..." unless $opts{quiet};
143 $curl->setopt(CURLOPT_URL, $url);
144 $curl->setopt(CURLOPT_ENCODING, "");
145 $curl->setopt(CURLOPT_WRITEDATA, $rfh);
146 $rc = $curl->perform;
147 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
149 if ( $rc == 200 ) {
150 print "done.\n";
151 process_feed($url, $response);
152 } else {
153 print STDERR "\nerror: " .$curl->strerror($rc)." (http/$rc)\n";
155 close $rfh;
159 ## Subroutines: Queue
161 sub get_queue {
162 if ( $opts{paste} ) {
163 print STDERR "error: Clipboard module not found" and exit
164 unless $opted_mods{Clipboard};
165 my $data = Clipboard->paste();
166 if ( $data ) {
167 parse_input($_) foreach split/\n/,$data;
171 parse_input($_) foreach @ARGV;
172 unless ( @queue ) { parse_input($_) while ( <STDIN> ); }
174 my %h = map {$_,1} @queue; # Remove duplicates
175 @queue = keys %h;
178 sub process_queue {
179 init_curl();
180 fetch_feed($_) foreach (@queue);
183 sub process_feed {
184 my ($url, $response) = @_;
185 print "=> Processing feed ..." unless $opts{quiet};
187 my $rss = XML::RSS::LibXML->new;
188 $rss->parse($response);
189 push @channels, $rss;
191 print "done.\n" unless $opts{quiet};
194 sub grab_all {
195 my @q;
196 foreach my $rss ( @channels ) {
197 foreach my $item ( @{$rss->{items}} ) {
198 push @q, $item->{link};
201 run_clive(@q);
205 ## Subroutines: Helpers
207 sub parse_input {
208 my $url = shift;
210 return if $url =~ /^$/;
211 chomp $url;
213 $url = "http://$url" if $url !~ m!^http://!i;
214 push @queue, $url;
217 sub find_clive {
218 print "Trying to locate 'clive' ...";
220 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
221 split /:/, $ENV{PATH} || getcwd);
223 if ( $opts{clive} ) { print "$opts{clive}\n"; }
224 else { print STDERR "error: not found, use --clive=path\n"; exit; }
227 sub run_clive {
228 my (@q) = @_;
229 system "$opts{clive} $opts{opts} " . join(' ', @q);
232 sub print_version {
233 my $noexit = shift;
234 my $perl_v = sprintf "%vd", $^V;
235 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
236 my $s = sprintf
237 "clivefeed version $VERSION. Copyright (C) 2008 Toni Gundogdu.
239 Perl: $perl_v ($^O)
240 Modules:
241 * Config::Tiny/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl::VERSION
242 * XML::RSS::LibXML/$XML::RSS::LibXML::VERSION\t* Clipboard/$clipb_v
243 * Tk/$Tk::VERSION\t\t\t* URI::Escape/$URI::Escape::VERSION
244 * Tk::DialogBox/$Tk::DialogBox::VERSION\t\t* HTML::Strip/$HTML::Strip::VERSION
245 * Tk::FontDialog/$Tk::FontDialog::VERSION\t\t* Tk::Tree/$Tk::Tree::VERSION
246 Core modules:
247 * Getopt::Long/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA::VERSION
248 * File::Spec/$File::Spec::VERSION\t\t* File::Find/$File::Find::VERSION
249 * File::Path/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
250 * Pod::Usage/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
252 This program comes with ABSOLUTELY NO WARRANTY. You may redistribute copies of
253 clivefeed under the terms of the GNU General Public License as published by the
254 Free Software Foundation, either version 3 of the License, or (at your option)
255 any later version. You should have received a copy of the General Public License
256 along with this program. If not, see http://www.gnu.org/licenses/.
258 return $s if $noexit;
259 print $s; exit;
263 # GUI:
265 sub init_gui {
266 return unless @channels;
268 $mw = MainWindow->new;
269 $mw->geometry($opts{geometry}) if defined $opts{geometry};
270 $mw->title('clivefeed');
271 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); $mw->destroy } );
273 # Menubar
274 my $mb = $mw->Menu;
275 $mw->configure(-menu => $mb);
277 # Menu: File
278 my $file = $mb->cascade(-label => 'File', -underline => 0, -tearoff => 0);
279 $file->command(-label => 'Extract videos in queue...',
280 -underline => 0, -command => \&on_extract);
281 $file->separator;
282 $file->command(-label => 'Quit', -underline => 0,
283 -command => sub { save_prefs(); exit; } );
285 # Menu: Edit
286 my $edit = $mb->cascade(-label => 'Edit', -underline => 0, -tearoff => 0);
287 $edit->command(-label => 'Preferences...',
288 -underline => 0, -command => \&on_prefs);
290 # Menu: Help
291 my $help = $mb->cascade(-label => 'Help', -underline => 0, -tearoff => 0);
292 $help->command(-label => 'About...',
293 -underline => 0, -command => \&on_about);
295 # The GUI has an upper and a lower part
296 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
298 # Upper part
299 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
301 # Upper: Channels
302 my $lbar = $pwtop->Frame;
304 $lbchann = $lbar->Scrolled('Tree',
305 -scrollbars => 'osoe',
306 -itemtype => 'text',
307 -selectmode => 'extended',
308 -browsecmd => \&on_chann,
309 -indicator => 1,
310 -drawbranch => 1,
311 )->pack(-side => 'top', -expand => 1, -fill => 'both');
313 foreach my $rss ( @channels ) {
314 my $chann = $rss->{channel}->{title};
315 $chann =~ tr{.}{}d;
317 $lbchann->add($chann);
318 $lbchann->itemCreate($chann, 0, -text => $chann, -itemtype => 'text');
320 foreach my $item ( @{$rss->{items}} ) {
321 my $title = $item->{title};
322 $title =~ tr{.}{}d;
324 my $path;
325 for ( my $i=0;; ++$i ) {
326 $path = "$chann.$title (#$i)";
327 last unless $lbchann->infoExists($path);
330 $lbchann->add($path, -data => $item);
331 $lbchann->itemCreate($path, 0,
332 -text => $item->{title}, -itemtype => 'text');
335 $lbchann->autosetmode;
336 $lbchann->close($_) foreach ( $lbchann->infoChildren('') );
338 $lbar->Button(-text => 'Grab video', -command => \&on_grab
339 )->pack(-fill => 'x', -side => 'left');
340 $lbar->Button(-text => 'Grab channel', -command => \&on_grab_chann
341 )->pack(-fill => 'x', -side => 'left');
342 $lbar->Button(-text => 'Grab everything', -command => \&on_grab_all
343 )->pack(-fill => 'x', -side => 'left');
345 my $rbar = $pwtop->Frame;
346 $txtdescr = $rbar->Scrolled('Text', -scrollbars => 'osoe',
347 )->pack(-fill => 'both', -expand => 1);
349 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} ? $opts{pwtop}:200);
351 # Lower part
352 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
354 $lbqueue = $pwbottom->Scrolled('Tree',
355 -scrollbars => 'osoe',
356 -itemtype => 'text',
357 -selectmode => 'extended',
358 -browsecmd => \&on_queue,
359 -indicator => 1,
360 -drawbranch => 1,
363 my $bar = $pwbottom->Frame; # Button toorbar
364 $bar->Button(-text => 'Remove', -command=> \&on_remove,
365 )->pack(-fill => 'x');
367 $bar->Button(-text => 'Clear', -command=> \&on_clear,
368 )->pack(-fill => 'x');
370 $bar->Button(-text => 'Extract videos...', -command=> \&on_extract,
371 )->pack(-fill => 'x', -side => 'bottom');
373 $pwbottom->add($lbqueue, $bar, -width => $opts{pwbottom} || 200);
375 # Add upper and lower parts to main paned window
376 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
378 $mw->RefontTree(-font => $opts{mainfont});
379 $pwmain->pack(-expand => 1, -fill => 'both');
381 MainLoop;
384 sub set_descr {
385 my ($lb, $path) = @_;
387 $txtdescr->delete('1.0', 'end');
389 my $item = $lb->infoData($path);
390 return unless defined $item;
392 my $strip = HTML::Strip->new;
393 my $descr = $strip->parse($item->{description});
394 $descr =~ s/^\s+|\s+$//g;
396 $txtdescr->insert('end', $descr);
399 sub on_chann {
400 set_descr($lbchann, shift);
403 sub on_queue {
404 set_descr($lbqueue, shift);
407 sub queue_item {
408 my $path = shift;
409 return if $path !~ /\./;
410 return if $lbqueue->infoExists($path);
412 my $item = $lbchann->infoData($path);
413 my ($chann) = split /\./, $path;
415 unless ( $lbqueue->infoExists($chann) ) {
416 $lbqueue->add($chann);
417 $lbqueue->itemCreate($chann, 0,
418 -text => $chann, -itemtype => 'text');
421 $lbqueue->add($path, -data => $item);
422 $lbqueue->itemCreate($path, 0,
423 -text => $item->{title}, -itemtype => 'text');
426 sub on_grab {
427 queue_item($_) foreach ( $lbchann->infoSelection );
428 $lbqueue->autosetmode;
431 sub on_grab_chann {
432 foreach ( $lbchann->infoSelection ) {
433 my ($parent) = split /\./;
434 queue_item($_)
435 foreach ( $lbchann->infoChildren($parent) );
437 $lbqueue->autosetmode;
440 sub on_grab_all {
441 foreach ( $lbchann->infoChildren("") ) {
442 my ($parent) = split /\./;
443 queue_item($_)
444 foreach ($lbchann->infoChildren($parent) );
446 $lbqueue->autosetmode;
449 sub on_remove {
450 $lbqueue->deleteEntry($_)
451 foreach ( $lbqueue->infoSelection );
454 sub on_clear {
455 $lbqueue->deleteAll;
458 sub on_about {
459 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
460 my $txt = $dlg->add('Text')->pack;
461 $txt->insert('end', print_version(1));
462 $dlg->Show;
465 sub change_font {
466 my ($top, $lblv, $lbl) = @_;
467 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
469 if ( defined $font ) {
470 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
471 $lbl->configure(-font => $descr);
472 $$lblv = $descr;
476 sub on_prefs {
477 my $dlg = $mw->DialogBox(-title => 'clivefeed preferences',
478 -buttons => ['OK','Cancel']);
480 $dlg->add('Label', -text => 'Fonts: press to choose'
481 )->grid(-sticky => 'w', -pady => 10);
483 my ($mainfont) = ($opts{mainfont});
484 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
486 $dlg->add('Button', -text => 'Main font',
487 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
488 )->grid($mainfontl, -sticky => 'w', -padx => '5');
490 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
493 sub on_prefs_ok {
494 ($opts{mainfont}) = @_;
495 $mw->RefontTree(-font => $opts{mainfont});
496 save_prefs();
499 sub save_prefs {
500 mkpath( [$CONFIGDIR], 1, 0700);
502 my $c = Config::Tiny->new;
503 $c->{gui}->{geometry} = $mw->geometry();
505 # FIXME: +7 is added to the coords even if the sashes have not been
506 # dragged. Unsure why. The increase is probably system specific.
507 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
508 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
509 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
510 $c->{gui}->{mainfont} = $opts{mainfont};
512 $c->write($PREFSFILE);
515 sub on_extract {
516 my %re = ( # GVideo has the tendency to wrap everything.
517 UnwrapGVideo => qr|\Qgoogle.com/url?q=\E(.*?)\&|i,
520 my @q;
521 foreach ( $lbqueue->infoChildren('') ) {
522 foreach ( $lbqueue->infoChildren($_) ) {
523 my $item = $lbqueue->infoData($_);
524 my $link = uri_unescape($item->{link});
525 $link = $1 if $link =~ /$re{UnwrapGVideo}/;
526 push @q, $link;
529 return unless @q;
531 # Prompt for clive(1) options
532 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
533 -buttons => ['OK','Cancel']);
535 $dlg->add('Label', -text => 'Path to clive'
536 )->grid(my $clivepath = $dlg->Entry(-width => 60),
537 -sticky => 'w', -padx => '5');
539 $dlg->add('Label', -text => 'Runtime options'
540 )->grid(my $cliveopts = $dlg->Entry(-width => 60),
541 -sticky => 'w', -padx => '5');
543 $clivepath->insert('end', $opts{clive});
544 $cliveopts->insert('end', $opts{opts});
546 if ( $dlg->Show() eq 'OK' ) {
547 $opts{clive} = $clivepath->get;
548 $opts{opts} = $cliveopts->get;
549 $mw->destroy;
550 run_clive(@q);
555 __END__
557 =head1 NAME
559 clivefeed - the feed parsing utility for clive
561 =head1 SYNOPSIS
563 clivefeed [option]... [URL]...
565 =head1 DESCRIPTION
567 clivefeed is an utility that parses RSS feeds containing video page links and
568 uses L<clive(1)> to extract them.
570 Historically, the feed parsing function was part of L<clive(1)>
571 and it was written in Python/Newt. The clivefeed utility was written
572 in Perl/Tk to replace the feature that was removed in clive 2.0. This
573 utility is part of the B<clive-utils> project.
575 =head1 OPTIONS
577 You may freely specify options after the command-line arguments. For example:
579 % clivefeed -a URL --opts=--noextract
581 B<Basic Options>
583 =over 4
585 =item B<-h --help>
587 Show help and exit.
589 =item B<--version>
591 Show version and exit.
593 =item B<--clive=>I<path>
595 I<path> to L<clive(1)> command. If unspecified, clivefeed will attempt to
596 locate it in the $PATH. Additionally, the B<CLIVE_PATH> environment variable
597 can be used. See also L</CONFIG>.
599 =item B<--opts=>I<opts>
601 I<opts> to append to clive call. See L<clive(1)> for more on the available
602 options.
604 =item B<-a --all>
606 Grab all videos without prompting the GUI.
608 =back
610 B<HTTP Options>
612 =over 4
614 =item B<-U --agent=>I<string>
616 Identify as I<string> to the HTTP server. Defaults to "Mozilla/5.0".
618 =item B<-y --proxy=>I<address>
620 Use I<address> for HTTP proxy, e.g. http://foo:1234. If http_proxy
621 environment variable is defined, it will be used.
623 =item B<-X --noproxy>
625 Do not use the defined HTTP proxy (B<--proxy>, config or http_proxy).
627 =back
629 =head1 EXAMPLES
631 =over 4
633 =item % clivefeed "http://youtube.com/rss/user/communitychannel/videos.rss"
635 Parses the feed at the specified URL.
637 =item % cat E<gt>E<gt> url.lst
639 http://youtube.com/rss/user/googletechtalks/videos.rss
640 http://youtube.com/rss/user/theonion/videos.rss
641 http://youtube.com/rss/user/lisanova/videos.rss
642 http://youtube.com/rss/user/clipcritics/videos.rss
643 http://youtube.com/rss/user/communitychannel/videos.rss
644 http://youtube.com/rss/user/manintheboxshow/videos.rss
646 =item % cat url.lst | clivefeed
648 Reads input from UNIX pipe.
650 =item % clivefeed --opts="-f mp4"
652 Append the I<opts> to the L<clive(1)> call.
654 =item % clivefeed --all URL
656 Grabs all found videos without prompting.
658 =back
660 =head1 FILES
662 By default, clivefeed searches the ~/.config/clivefeed directory for the
663 config file. The B<CLIVEFEED_CONFIGDIR> environment variable can be used
664 to override this behaviour.
666 =over 4
668 =item ~/.config/clivefeed/config
670 Configuration file.
672 =item ~/.config/clivefeed/prefs
674 GUI preferences (e.g. fonts, window position, sash coords, ...).
676 =back
678 =head1 CONFIG
680 ## Example config file for clivefeed.
682 [clive]
683 path = /usr/local/bin/clive
684 opts = -f mp4
686 [http]
687 agent = Mozilla/5.0
688 proxy = http://foo:1234
690 =head1 SEE ALSO
692 L<clive(1)> L<clivescan(1)>
694 =head1 OTHER
696 Project: http://googlecode.com/p/clive-utils/
698 A clive-utils development repository can be obtained from:
700 % git clone git://repo.or.cz/clive-utils.git
702 Patches welcome.
704 =head1 AUTHOR
706 Written by Toni Gundogdu <legatvs@gmail.com>
708 =cut