Fixed crashing with duplicate item paths.
[clive-utils.git] / clivefeed
blob9a64d00c0854e7f12c0ecbaf802324f1b8d73f9d
1 #!/usr/bin/env perl
2 # -*- coding: ascii -*-
3 ###########################################################################
4 # clivefeed, the feed parsing utility for clive
5 # Copyright (C) 2008 Toni Gundogdu.
7 # This file is part of clive-utils.
9 # clivefeed is free software: you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation, either version 3 of the License, or
12 # (at your option) any later version.
14 # clivefeed is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with clivefeed. If not, see <http://www.gnu.org/licenses/>.
21 ###########################################################################
23 # Keep it simple.
25 use warnings;
26 use strict;
28 binmode(STDOUT, ":utf8");
30 use XML::RSS::LibXML;
31 use WWW::Curl::Easy;
32 use Tk::FontDialog;
33 use Tk::DialogBox;
34 use Config::Tiny;
35 use HTML::Strip;
36 use Tk::Tree;
37 use Tk;
39 # Core modules:
40 use Getopt::Long qw(:config bundling);
41 use File::Spec;
42 use File::Find;
43 use File::Path;
44 use Pod::Usage;
45 use Encode;
46 use Cwd;
48 # Non-essentials
49 my %opted_mods = (Clipboard => 1);
50 eval "use Clipboard"; $opted_mods{Clipboard}=0 if $@;
52 my $VERSION = "2.0beta2";
53 my $CONFIGDIR = $ENV{CLIVEFEED_CONFIGDIR}
54 || File::Spec->catfile($ENV{HOME}, ".config/clivefeed");
55 my $CONFIGFILE = File::Spec->catfile($CONFIGDIR, "config");
56 my $PREFSFILE = File::Spec->catfile($CONFIGDIR, "prefs");
58 my %opts; # Holds the options
59 my @queue; # Holds the current URL queue
60 my $curl; # Holds the curl handle (reused throught lifespan)
61 my @channels; # Holds parsed channel data
62 my $mw; # Holds the main window handle (GUI)
63 my $pwmain; # Holds the handle to the main paned window
64 my $pwtop; # Holds the handle to the top paned window
65 my $pwbottom; # Holds the handle to the bottom paned window
66 my $lbchann; # Listbox: channels
67 my $lbitems; # Listbox: (channel) items
68 my $lbqueue; # Listbox: queued video items
69 my $txtdescr; # Text: video description
70 my %usersel; # Holds user-selected videos
72 # Parse config
73 my $conf = Config::Tiny->read($CONFIGFILE);
74 my $prefs = Config::Tiny->read($PREFSFILE);
75 %opts = (
76 clive => $conf->{clive}->{path},
77 opts => $conf->{clive}->{opts},
78 agent => $conf->{http}->{agent},
79 proxy => $conf->{http}->{proxy},
81 geometry=> $prefs->{gui}->{geometry},
82 pwmain => $prefs->{gui}->{pwmain},
83 pwtop => $prefs->{gui}->{pwtop},
84 pwbottom=> $prefs->{gui}->{pwbottom},
85 mainfont=> $prefs->{gui}->{mainfont},
88 # Parse cmdline
89 # Define those not read from config, init with defaults
90 $opts{quiet} = 0;
91 $opts{paste} = 0;
92 $opts{all} = 0;
93 $opts{debug} = 0;
94 $opts{help} = 0;
95 $opts{manual} = 0;
96 $opts{version} = 0;
97 $opts{mainfont} = $opts{mainfont} || "{helvetica} -12 bold";
99 GetOptions(\%opts,
100 'debug|d', 'help|h', 'manual|m', 'version|v', 'all|a',
101 'paste|x', 'quiet|q', 'clive|c=s', 'opts|o=s', 'agent|U=s',
102 'proxy|y=s',
103 # Workaround since '$longopt|shortopt' is a no-no.
104 'noproxy|X' => sub { $opts{proxy} = ""; },
105 ) or pod2usage(1);
107 # Since 'version|v' => \&print_version and exit cannot tango with tk
108 print_version(0) if $opts{version};
109 pod2usage(-exitstatus => 0, -verbose => 1) if $opts{help};
110 pod2usage(-exitstatus => 0, -verbose => 2) if $opts{manual};
112 $opts{clive} = $opts{clive} || $ENV{CLIVE_PATH};
113 find_clive() unless $opts{clive};
115 get_queue();
116 select STDERR; $| = 1; # => unbuffered
117 select STDOUT; $| = 1;
118 process_queue();
120 unless ( $opts{all} ) { init_gui(); }
121 else { grab_all(); }
124 ## Subroutines: Connection
126 sub init_curl {
127 $curl = WWW::Curl::Easy->new;
128 $curl->setopt(CURLOPT_USERAGENT, $opts{agent} || "Mozilla/5.0");
129 $curl->setopt(CURLOPT_PROXY, $opts{proxy}) if defined $opts{proxy};
130 $curl->setopt(CURLOPT_VERBOSE, 1) if $opts{debug};
131 $curl->setopt(CURLOPT_FOLLOWLOCATION, 1);
132 $curl->setopt(CURLOPT_AUTOREFERER, 1);
133 $curl->setopt(CURLOPT_HEADER, 0);
134 $curl->setopt(CURLOPT_NOBODY, 0);
137 sub fetch_feed {
138 my ($url, $response, $rc) = (shift, "", 0);
139 open my $rfh, ">", \$response;
141 print "Fetching $url ..." unless $opts{quiet};
142 $curl->setopt(CURLOPT_URL, $url);
143 $curl->setopt(CURLOPT_ENCODING, "");
144 $curl->setopt(CURLOPT_WRITEDATA, $rfh);
145 $rc = $curl->perform;
146 $rc = $curl->getinfo(CURLINFO_RESPONSE_CODE);
148 if ( $rc == 200 ) {
149 print "done.\n";
150 process_feed($url, $response);
151 } else {
152 print STDERR "\nerror: " .$curl->strerror($rc)." (http/$rc)\n";
154 close $rfh;
158 ## Subroutines: Queue
160 sub get_queue {
161 if ( $opts{paste} ) {
162 print STDERR "error: Clipboard module not found" and exit
163 unless $opted_mods{Clipboard};
164 my $data = Clipboard->paste();
165 if ( $data ) {
166 parse_input($_) foreach split/\n/,$data;
170 parse_input($_) foreach @ARGV;
171 unless ( @queue ) { parse_input($_) while ( <STDIN> ); }
173 my %h = map {$_,1} @queue; # Remove duplicates
174 @queue = keys %h;
177 sub process_queue {
178 init_curl();
179 fetch_feed($_) foreach (@queue);
182 sub process_feed {
183 my ($url, $response) = @_;
184 print "=> Processing feed ..." unless $opts{quiet};
186 my $rss = XML::RSS::LibXML->new;
187 $rss->parse($response);
188 push @channels, $rss;
190 print "done.\n" unless $opts{quiet};
193 sub grab_all {
194 my @q;
195 foreach my $rss ( @channels ) {
196 foreach my $item ( @{$rss->{items}} ) {
197 push @q, $item->{link};
200 run_clive(@q);
204 ## Subroutines: Helpers
206 sub parse_input {
207 my $url = shift;
209 return if $url =~ /^$/;
210 chomp $url;
212 $url = "http://$url" if $url !~ m!^http://!i;
213 push @queue, $url;
216 sub find_clive {
217 print "Trying to locate 'clive' ...";
219 find ( sub { $opts{clive} = $File::Find::name if ( $_ eq 'clive' ) },
220 split /:/, $ENV{PATH} || getcwd);
222 if ( $opts{clive} ) { print "$opts{clive}\n"; }
223 else { print STDERR "error: not found, use --clive=path\n"; exit; }
226 sub run_clive {
227 my (@q) = @_;
228 system "$opts{clive} $opts{opts} " . join(' ', @q);
231 sub print_version {
232 my $noexit = shift;
233 my $perl_v = sprintf "%vd", $^V;
234 my $clipb_v = $opted_mods{Clipboard} ? $Clipboard::VERSION : "-";
235 my $s = sprintf
236 "clivefeed version $VERSION. Copyright (C) 2008 Toni Gundogdu.
238 Perl: $perl_v ($^O)
239 Modules:
240 * Config::Tiny/$Config::Tiny::VERSION\t\t* WWW::Curl/$WWW::Curl::VERSION
241 * XML::RSS::LibXML/$XML::RSS::LibXML::VERSION\t* Clipboard/$clipb_v
242 * Tk/$Tk::VERSION\t\t\t* Tk::Tree/$Tk::Tree::VERSION
243 * Tk::DialogBox/$Tk::DialogBox::VERSION\t\t* HTML::Strip/$HTML::Strip::VERSION
244 * Tk::FontDialog/$Tk::FontDialog::VERSION
245 Core modules:
246 * Getopt::Long/$Getopt::Long::VERSION\t\t* Digest::SHA/$Digest::SHA::VERSION
247 * File::Spec/$File::Spec::VERSION\t\t* File::Find/$File::Find::VERSION
248 * File::Path/$File::Path::VERSION\t\t* Encode/$Encode::VERSION
249 * Pod::Usage/$Pod::Usage::VERSION\t\t* Cwd/$Cwd::VERSION
251 This program comes with ABSOLUTELY NO WARRANTY. You may redistribute copies of
252 clivefeed under the terms of the GNU General Public License as published by the
253 Free Software Foundation, either version 3 of the License, or (at your option)
254 any later version. You should have received a copy of the General Public License
255 along with this program. If not, see http://www.gnu.org/licenses/.
257 return $s if $noexit;
258 print $s; exit;
262 # GUI:
264 sub init_gui {
265 return unless @channels;
267 $mw = MainWindow->new;
268 $mw->geometry($opts{geometry}) if defined $opts{geometry};
269 $mw->title('clivefeed');
270 $mw->protocol('WM_DELETE_WINDOW', sub { save_prefs(); $mw->destroy } );
272 # Menubar
273 my $mb = $mw->Menu;
274 $mw->configure(-menu => $mb);
276 # Menu: File
277 my $file = $mb->cascade(-label => 'File', -underline => 0, -tearoff => 0);
278 $file->command(-label => 'Extract videos in queue...',
279 -underline => 0, -command => \&on_extract);
280 $file->separator;
281 $file->command(-label => 'Quit', -underline => 0,
282 -command => sub { save_prefs(); exit; } );
284 # Menu: Edit
285 my $edit = $mb->cascade(-label => 'Edit', -underline => 0, -tearoff => 0);
286 $edit->command(-label => 'Preferences...',
287 -underline => 0, -command => \&on_prefs);
289 # Menu: Help
290 my $help = $mb->cascade(-label => 'Help', -underline => 0, -tearoff => 0);
291 $help->command(-label => 'About...',
292 -underline => 0, -command => \&on_about);
294 # The GUI has an upper and a lower part
295 $pwmain = $mw->Panedwindow(-orient => 'v', -opaqueresize => 0);
297 # Upper part
298 $pwtop = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
300 # Upper: Channels
301 my $lbar = $pwtop->Frame;
303 $lbchann = $lbar->Scrolled('Tree',
304 -scrollbars => 'osoe',
305 -itemtype => 'text',
306 -selectmode => 'extended',
307 -browsecmd => \&on_chann,
308 -indicator => 1,
309 -drawbranch => 1,
310 )->pack(-side => 'top', -expand => 1, -fill => 'both');
312 foreach my $rss ( @channels ) {
313 my $chann = $rss->{channel}->{title};
314 $chann =~ tr{.}{}d;
316 $lbchann->add($chann);
317 $lbchann->itemCreate($chann, 0, -text => $chann, -itemtype => 'text');
319 foreach my $item ( @{$rss->{items}} ) {
320 my $title = $item->{title};
321 $title =~ tr{.}{}d;
323 my $path;
324 for ( my $i=0;; ++$i ) {
325 $path = "$chann.$title (#$i)";
326 last unless $lbchann->infoExists($path);
329 $lbchann->add($path, -data => $item);
330 $lbchann->itemCreate($path, 0,
331 -text => $item->{title}, -itemtype => 'text');
334 $lbchann->autosetmode;
335 $lbchann->close($_) foreach ( $lbchann->infoChildren('') );
337 $lbar->Button(-text => 'Grab video', -command => \&on_grab
338 )->pack(-fill => 'x', -side => 'left');
339 $lbar->Button(-text => 'Grab channel', -command => \&on_grab_chann
340 )->pack(-fill => 'x', -side => 'left');
341 $lbar->Button(-text => 'Grab everything', -command => \&on_grab_all
342 )->pack(-fill => 'x', -side => 'left');
344 my $rbar = $pwtop->Frame;
345 $txtdescr = $rbar->Scrolled('Text', -scrollbars => 'osoe',
346 )->pack(-fill => 'both', -expand => 1);
348 $pwtop->add($lbar, $rbar, -width => $opts{pwtop} ? $opts{pwtop}:200);
350 # Lower part
351 $pwbottom = $pwmain->Panedwindow(-orient => 'h', -opaqueresize => 0);
353 $lbqueue = $pwbottom->Scrolled('Tree',
354 -scrollbars => 'osoe',
355 -itemtype => 'text',
356 -selectmode => 'extended',
357 -browsecmd => \&on_queue,
358 -indicator => 1,
359 -drawbranch => 1,
362 my $bar = $pwbottom->Frame; # Button toorbar
363 $bar->Button(-text => 'Remove', -command=> \&on_remove,
364 )->pack(-fill => 'x');
366 $bar->Button(-text => 'Clear', -command=> \&on_clear,
367 )->pack(-fill => 'x');
369 $bar->Button(-text => 'Extract videos...', -command=> \&on_extract,
370 )->pack(-fill => 'x', -side => 'bottom');
372 $pwbottom->add($lbqueue, $bar, -width => $opts{pwbottom} || 200);
374 # Add upper and lower parts to main paned window
375 $pwmain->add($pwtop, $pwbottom, -height => $opts{pwmain} || 200);
377 $mw->RefontTree(-font => $opts{mainfont});
378 $pwmain->pack(-expand => 1, -fill => 'both');
380 MainLoop;
383 sub set_descr {
384 my ($lb, $path) = @_;
386 $txtdescr->delete('1.0', 'end');
388 my $item = $lb->infoData($path);
389 return unless defined $item;
391 my $strip = HTML::Strip->new;
392 my $descr = $strip->parse($item->{description});
393 $descr =~ s/^\s+|\s+$//g;
395 $txtdescr->insert('end', $descr);
398 sub on_chann {
399 set_descr($lbchann, shift);
402 sub on_queue {
403 set_descr($lbqueue, shift);
406 sub queue_item {
407 my $path = shift;
408 return if $path !~ /\./;
409 return if $lbqueue->infoExists($path);
411 my $item = $lbchann->infoData($path);
412 my ($chann) = split /\./, $path;
414 unless ( $lbqueue->infoExists($chann) ) {
415 $lbqueue->add($chann);
416 $lbqueue->itemCreate($chann, 0,
417 -text => $chann, -itemtype => 'text');
420 $lbqueue->add($path, -data => $item);
421 $lbqueue->itemCreate($path, 0,
422 -text => $item->{title}, -itemtype => 'text');
425 sub on_grab {
426 queue_item($_) foreach ( $lbchann->infoSelection );
427 $lbqueue->autosetmode;
430 sub on_grab_chann {
431 foreach ( $lbchann->infoSelection ) {
432 my ($parent) = split /\./;
433 queue_item($_)
434 foreach ( $lbchann->infoChildren($parent) );
436 $lbqueue->autosetmode;
439 sub on_grab_all {
440 foreach ( $lbchann->infoChildren("") ) {
441 my ($parent) = split /\./;
442 queue_item($_)
443 foreach ($lbchann->infoChildren($parent) );
445 $lbqueue->autosetmode;
448 sub on_remove {
449 $lbqueue->deleteEntry($_)
450 foreach ( $lbqueue->infoSelection );
453 sub on_clear {
454 $lbqueue->deleteAll;
457 sub on_about {
458 my $dlg = $mw->DialogBox(-title => 'About', -buttons => ['OK']);
459 my $txt = $dlg->add('Text')->pack;
460 $txt->insert('end', print_version(1));
461 $dlg->Show;
464 sub change_font {
465 my ($top, $lblv, $lbl) = @_;
466 my $font = $top->FontDialog(-initfont => $$lblv)->Show;
468 if ( defined $font ) {
469 my $descr = $top->FontDialog->GetDescriptiveFontName($font);
470 $lbl->configure(-font => $descr);
471 $$lblv = $descr;
475 sub on_prefs {
476 my $dlg = $mw->DialogBox(-title => 'clivefeed preferences',
477 -buttons => ['OK','Cancel']);
479 $dlg->add('Label', -text => 'Fonts: press to choose'
480 )->grid(-sticky => 'w', -pady => 10);
482 my ($mainfont) = ($opts{mainfont});
483 my $mainfontl = $dlg->Label(-textvariable => \$mainfont);
485 $dlg->add('Button', -text => 'Main font',
486 -command => sub { change_font($dlg, \$mainfont, $mainfontl) }
487 )->grid($mainfontl, -sticky => 'w', -padx => '5');
489 on_prefs_ok($mainfont) if $dlg->Show eq 'OK';
492 sub on_prefs_ok {
493 ($opts{mainfont}) = @_;
494 $mw->RefontTree(-font => $opts{mainfont});
495 save_prefs();
498 sub save_prefs {
499 mkpath( [$CONFIGDIR], 1, 0700);
501 my $c = Config::Tiny->new;
502 $c->{gui}->{geometry} = $mw->geometry();
504 # FIXME: +7 is added to the coords even if the sashes have not been
505 # dragged. Unsure why. The increase is probably system specific.
506 $c->{gui}->{pwmain} = ($pwmain->sashCoord(0))[1]-7;
507 $c->{gui}->{pwtop} = ($pwtop->sashCoord(0))[0]-7;
508 $c->{gui}->{pwbottom} = ($pwbottom->sashCoord(0))[0]-7;
509 $c->{gui}->{mainfont} = $opts{mainfont};
511 $c->write($PREFSFILE);
514 sub on_extract {
515 my @q;
516 foreach ( $lbqueue->infoChildren('') ) {
517 foreach ( $lbqueue->infoChildren($_) ) {
518 my $item = $lbqueue->infoData($_);
519 push @q, $item->{link};
522 return unless @q;
524 # Prompt for clive(1) options
525 my $dlg = $mw->DialogBox(-title => 'clive(1) options',
526 -buttons => ['OK','Cancel']);
528 $dlg->add('Label', -text => 'Path to clive'
529 )->grid(my $clivepath = $dlg->Entry(-width => 60),
530 -sticky => 'w', -padx => '5');
532 $dlg->add('Label', -text => 'Runtime options'
533 )->grid(my $cliveopts = $dlg->Entry(-width => 60),
534 -sticky => 'w', -padx => '5');
536 $clivepath->insert('end', $opts{clive});
537 $cliveopts->insert('end', $opts{opts});
539 if ( $dlg->Show() eq 'OK' ) {
540 $opts{clive} = $clivepath->get;
541 $opts{opts} = $cliveopts->get;
542 $mw->destroy;
543 run_clive(@q);
548 __END__
550 =head1 NAME
552 clivefeed - the feed parsing utility for clive
554 =head1 SYNOPSIS
556 clivefeed [option]... [URL]...
558 =head1 DESCRIPTION
560 clivefeed is an utility that parses RSS feeds containing video page links and
561 uses L<clive(1)> to extract them.
563 Historically, the feed parsing function was part of L<clive(1)>
564 and it was written in Python/Newt. The clivefeed utility was written
565 in Perl/Tk to replace the feature that was removed in clive 2.0. This
566 utility is part of the B<clive-utils> project.
568 =head1 OPTIONS
570 You may freely specify options after the command-line arguments. For example:
572 % clivefeed -a URL --opts=--noextract
574 B<Basic Options>
576 =over 4
578 =item B<-h --help>
580 Show help and exit.
582 =item B<--version>
584 Show version and exit.
586 =item B<--clive=>I<path>
588 I<path> to L<clive(1)> command. If unspecified, clivefeed will attempt to
589 locate it in the $PATH. Additionally, the B<CLIVE_PATH> environment variable
590 can be used. See also L</CONFIG>.
592 =item B<--opts=>I<opts>
594 I<opts> to append to clive call. See L<clive(1)> for more on the available
595 options.
597 =item B<-a --all>
599 Grab all videos without prompting the GUI.
601 =back
603 B<HTTP Options>
605 =over 4
607 =item B<-U --agent=>I<string>
609 Identify as I<string> to the HTTP server. Defaults to "Mozilla/5.0".
611 =item B<-y --proxy=>I<address>
613 Use I<address> for HTTP proxy, e.g. http://foo:1234. If http_proxy
614 environment variable is defined, it will be used.
616 =item B<-X --noproxy>
618 Do not use the defined HTTP proxy (B<--proxy>, config or http_proxy).
620 =back
622 =head1 EXAMPLES
624 =over 4
626 =item % clivefeed "http://youtube.com/rss/user/communitychannel/videos.rss"
628 Parses the feed at the specified URL.
630 =item % cat E<gt>E<gt> url.lst
632 http://youtube.com/rss/user/googletechtalks/videos.rss
633 http://youtube.com/rss/user/theonion/videos.rss
634 http://youtube.com/rss/user/lisanova/videos.rss
635 http://youtube.com/rss/user/clipcritics/videos.rss
636 http://youtube.com/rss/user/communitychannel/videos.rss
637 http://youtube.com/rss/user/manintheboxshow/videos.rss
639 =item % cat url.lst | clivefeed
641 Reads input from UNIX pipe.
643 =item % clivefeed --opts="-f mp4"
645 Append the I<opts> to the L<clive(1)> call.
647 =item % clivefeed --all URL
649 Grabs all found videos without prompting.
651 =back
653 =head1 FILES
655 By default, clivefeed searches the ~/.config/clivefeed directory for the
656 config file. The B<CLIVEFEED_CONFIGDIR> environment variable can be used
657 to override this behaviour.
659 =over 4
661 =item ~/.config/clivefeed/config
663 Configuration file.
665 =item ~/.config/clivefeed/prefs
667 GUI preferences (e.g. fonts, window position, sash coords, ...).
669 =back
671 =head1 CONFIG
673 ## Example config file for clivefeed.
675 [clive]
676 path = /usr/local/bin/clive
677 opts = -f mp4
679 [http]
680 agent = Mozilla/5.0
681 proxy = http://foo:1234
683 =head1 SEE ALSO
685 L<clive(1)> L<clivescan(1)>
687 =head1 OTHER
689 Project: http://googlecode.com/p/clive-utils/
691 A clive-utils development repository can be obtained from:
693 % git clone git://repo.or.cz/clive-utils.git
695 Patches welcome.
697 =head1 AUTHOR
699 Written by Toni Gundogdu <legatvs@gmail.com>
701 =cut