[tpwd] Fix segfault when exactly one argument given
[tinyapps.git] / getlyrics.pl
blobe5f33cb98f636b18a766d2722615055d0c7eb43c
1 #!/usr/bin/perl
2 ##
3 ## Get lyrics from Internet for specified song
4 ## Copyright (c) 2005-2010 by Michal Nazarewicz (mina86/AT/mina86.com)
5 ## Copyright (c) 2009 by Mirosław "Minio" Zalewski <miniopl@gmail.com>
6 ## http://minio.xt.pl (lyrics retrieving code)
7 ##
8 ## This program is free software; you can redistribute it and/or modify
9 ## it under the terms of the GNU General Public License as published by
10 ## the Free Software Foundation; either version 3 of the License, or
11 ## (at your option) any later version.
13 ## This program is distributed in the hope that it will be useful,
14 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ## GNU General Public License for more details.
18 ## You should have received a copy of the GNU General Public License
19 ## along with this program; if not, see <http://www.gnu.org/licenses/>.
21 ## This is part of Tiny Applications Collection
22 ## -> http://tinyapps.sourceforge.net/
26 # Documentation at the end of file.
29 use strict;
30 use warnings;
32 use Pod::Usage;
34 use LWP::UserAgent;
35 use HTML::TreeBuilder;
37 use encoding "utf8";
38 use Encode;
39 use Text::Unidecode;
41 my $VERSION = '0.5';
42 my $GLOBAL_CACHE = '/usr/share/lyrics/';
43 my $LOCAL_CACHE = $ENV{'HOME'} . '/.lyrics/';
46 ## editFile ($file, $editor, $delete)
47 sub editFile ($$$);
51 ## Parse args
53 my ($edit, $editor) = ( 0, '' );
54 if (@ARGV && $ARGV[0] =~ /^--edit(?:=(.*))?$/) {
55 $edit = 1;
56 $editor = $1 ? $1 : '';
57 shift @ARGV;
60 if (!@ARGV) {
61 pod2usage( -exitval => 1, -verbose => 0, -output => \*STDERR );
62 } elsif ($ARGV[0] eq '--help' || $ARGV[0] eq '-h' || $ARGV[0] eq '-?') {
63 pod2usage( -exitval => 0, -verbose => 1 );
64 } elsif ($ARGV[0] eq '--man') {
65 pod2usage( -exitval => 0, -verbose => 2 );
69 my ($src, $arg, $foo, @foo) = ('args');
70 if ($ARGV[0] =~ /^--(.*)$/) {
71 $src = $1;
72 shift;
74 $arg = "@ARGV";
77 sub run($) {
78 my $ret = `$_[0]`;
79 if ($?) {
80 die $_[1] . ": failed\n";
82 $ret;
87 ## Source: mpc
89 if ($src eq 'mpc') {
90 $foo[0] = run 'mpc --format %artist% | head -n 1';
91 $foo[1] = run 'mpc --format %title% | head -n 1';
95 ## Source: audacious
97 } elsif ($src eq 'audacious' || $src eq 'aud' || $src eq 'audtool') {
98 $foo[0] = run 'audtool current-song-tuple-data performer' ;
99 $foo[1] = run 'audtool current-song-tuple-data track_name';
103 ## Source: mpd
105 } elsif ($src eq 'mpd') {
106 my $port = undef;
107 if ($arg =~ m/(.*)(?::(\d+))/) {
108 $arg = $1;
109 $port = $2;
112 eval { require Audio::MPD; } or
113 die "You are missing Audio::MPD module.\n";
115 $arg = new Audio::MPD($arg, $port);
116 if ($arg) {
117 eval { # New API
118 my $song = $arg->song();
119 @foo = ( $song->artist(), $song->title() );
121 } or eval { # Old API
122 $foo = $arg->get_title();
123 $foo =~ s#^.*/##;
124 $arg->close_connection();
126 } else {
127 die "Could not connect to MPD.\n";
132 ## Source: pipe
134 } elsif ($src eq 'pipe') {
135 $foo = run $arg;
139 ## Source: file or read
141 } elsif ($src eq 'file' || $src eq 'read') {
142 $foo = join '', <>;
146 ## Source: xmms
148 } elsif ($src eq 'xmms') {
150 $foo = eval {
151 require XMMS::InfoPipe;
152 my $xmms = XMMS::InfoPipe->new();
153 return unless $xmms->is_running;
154 return $xmms->{'info'}->{'Title'} if defined $xmms->{'info'}->{'Title'};
155 return $xmms->{'info'}->{'File'} if defined $xmms->{'info'}->{'File'} ;
156 return; # unusual event
158 } || eval {
159 my ($in, $out, $ret);
160 return unless sysopen($out, $ENV{'HOME'} . '/.xmms/inpipe' , 1);
161 return unless sysopen($in , $ENV{'HOME'} . '/.xmms/outpipe', 0);
162 syswrite $out, "\nout flush\nreport title\n";
163 return unless sysread $in, $ret, 4096;
164 return $ret;
167 if ($foo) {
168 $foo =~ s#.*[/\\]##;
169 } else {
170 die "Could not obtain song from XMMS\n";
175 ## Source: args
177 } elsif ($src eq 'args' || $src eq '') {
178 if (!@ARGV) {
179 die "Artist and song name required\n";
180 } elsif (@ARGV == 2) {
181 @foo = @ARGV;
182 } elsif (@ARGV == 3 && $ARGV[1] =~ /\s*-\s*/) {
183 @foo = ($ARGV[0], $ARGV[1]);
184 } else {
185 $foo = "@ARGV";
190 ## Unknown source
192 } else {
193 die "Invalid argument: --$src\n";
194 exit 1;
199 ## Parse $foo and @foo
201 if (defined($foo)) {
202 $_ = $foo;
203 s/^[\s-]+|[\s-]*$//g;
204 s/\s+/ /g;
205 if (m/^(.+?) - (.+)$/ || m/^(\S+) (.+)$/ || m/^(.+?)-([^ ].*)$/) {
206 @foo = ($1, $2);
207 } else {
208 die "Cannot parse: $foo\n";
212 $_ =~ s/\s+/ /mg for @foo;
213 $_ =~ s/^ | $//g for @foo;
214 $_ =~ s/--+/-/g for @foo;
218 ## Try cache
220 for ($LOCAL_CACHE . '/' . lc($foo[0]) . '/' . lc($foo[1]),
221 $GLOBAL_CACHE . '/' . lc($foo[0]) . '/' . lc($foo[1])) {
222 if (-e) {
223 if ($edit) {
224 editFile $_, $editor, 0;
225 exit 0;
226 } elsif (open FH, '<', $_) {
227 print while <FH>;
228 close FH;
229 exit 0;
230 } else {
231 warn "$_: $!\n";
239 ## Download page
242 sub sane_str($) {
243 my $string = shift;
244 $string = decode('UTF-8', $string) if utf8::is_utf8($string) != 1;
245 $string = unidecode($string);
246 $string =~ tr/ /-/;
247 $string =~ s/[,'\.]//gi;
248 $string;
252 sub get_page($) {
253 my $url = shift;
255 print STDERR $url . "\n";
257 my $ua = new LWP::UserAgent;
258 my $r = $ua->get($url);
260 if (!$r->is_success) {
261 if (int($r->code / 100) == 5) {
262 print STDERR $r->code, ", trying again in 5 s\n";
263 sleep 5;
264 $r = $ua->get($url);
266 if (!$r->is_success) {
267 print STDERR $r->code, ", aborting\n";
271 $r->is_success ? $r->content : undef;
275 my @sites = (
276 sub { # http://www.lyricsondemand.com/
277 my $artist = sane_str shift;
278 my $title = sane_str shift;
280 my $f = substr($artist, ($artist =~ m/^the /i) ? 4 : 0, 1);
281 $artist =~ s/[ \(\)-]//g;
282 $title =~ s/[ \(\)-]//g;
284 my $url = 'http://www.lyricsondemand.com/' . lc($f) . '/' .
285 lc($artist) . 'lyrics/' . lc($title) . 'lyrics.html';
287 my $content = get_page($url);
288 return unless $content;
290 my $text = HTML::TreeBuilder->new_from_content($content)->look_down(
291 "_tag", "font",
292 "face", "Verdana",
293 "size", "2",
294 )->as_HTML('<>&');
296 my @splitted = split(/<br \/>/i, $text);
297 $text = '';
299 foreach my $line (@splitted) {
300 $line =~ s:<.+?>::gi;
301 $line =~ s:^\s*::gi;
302 $text .= $line . "\n";
304 $text;
307 sub { # http://www.azlyrics.com/
308 my $artist = sane_str shift;
309 my $title = sane_str shift;
311 $artist =~ s/[ \(\)-]//g;
312 $title =~ s/[ \(\)-]//g;
314 my $url = 'http://www.azlyrics.com/lyrics/' . lc($artist) . '/' .
315 lc($title) . '.html';
317 my $content = get_page($url);
318 return unless $content;
320 my $text = HTML::TreeBuilder->new_from_content($content)->look_down(
321 "_tag", "font",
322 "face", "Verdana",
323 "size", "5",
324 )->look_down(
325 "_tag", "font",
326 "size", "2",
327 )->as_HTML('<>&');
329 my @splitted = split(/<br \/>/i, $text);
330 $text = '';
332 my $begin;
333 foreach my $line (@splitted) {
334 if ($line =~ m:<b>.+</b>:i) {
335 $begin = 1;
336 next;
338 if ($line =~ m:\[Thanks to:i) {
339 last;
341 if ($begin == 1) {
342 $line =~ s:^\s*::gi;
343 $text .= $line ."\n";
346 $text;
349 sub { # http://www.tekstowo.pl/
350 my ($artist, $title) = @_;
351 my $text;
353 # Tekstowo zamiast ń chce mieć podkreślnik. Z tego powodu nie
354 # mogę użyć sane_str, gdyż po nim ń byłoby nie do odróżnienia
355 # od zwykłęgo n, i nie wiadomo co wtedy powinno być zamieniane
356 # na podkreślnik. Niby można stworzyć funkcję która
357 # przyjmowałaby tekst przed transliteracją, wyszukiwała ń,
358 # i w tych miejscach zamieniała znak na podkreślnik w tekstach
359 # po transliteracji, ale przysporzyłoby to więcej roboty niż
360 # to warte. Dlatego dokonuję prostej transliteracji.
361 for my $str ($artist, $title) {
362 $str = decode('UTF-8', $str) if utf8::is_utf8($str) != 1;
363 $str =~ tr/ĄĆĘŁŃÓŚŻŹąćęłńóśćżź /ACEL_OSZZacel_oszz_/;
364 $str =~ s/[,'\.]//gi;
367 my $url = 'http://www.tekstowo.pl/piosenka,' . lc($artist) . ',' .
368 lc($title) . '.html';
370 my $content = get_page($url);
371 return unless $content;
373 # Tekstowo zwraca 200 OK nawet jeżeli nie znaleziono szukanego tekstu.
374 eval {
375 $text = HTML::TreeBuilder->new_from_content($content)->look_down(
376 "_tag", "div",
377 "id", "tex",
378 )->look_down(
379 "_tag", "div",
380 )->as_HTML('<>&');
382 return if $@;
384 $text = decode('ISO-8859-2', $text);
385 $text =~ s/<br \/>\s?/\n/gi;
386 $text =~ s:<.+?>::gi;
387 $text;
393 print STDERR "Getting lyrics for ${foo[0]} - ${foo[1]}\n";
395 my $res;
396 for (@sites) {
397 $res = $_->(@foo);
398 last if defined $res;
401 die "Could not find lyrics.\n" unless defined $res;
403 $res =~ s/^\s+|\s+$//g;
404 $res .= "\n";
406 unless ($edit) {
407 print $res;
412 ## Save in cache
414 my ($dir, $file);
415 for ($GLOBAL_CACHE, $LOCAL_CACHE) {
416 $dir = $_ . '/' . lc $foo[0];
417 $file = $dir . '/' . lc $foo[1];
419 if (-d $dir) {
420 next if ! -w _;
421 } else {
422 next if ! -d || ! -w _;
423 if (!mkdir $dir) {
424 warn "$dir: $!\n";
425 next;
429 if (open FH, '>', $file) {
430 print FH $res;
431 close FH;
432 if ($edit) {
433 editFile $file, $editor, 0;
435 exit 0;
438 # warn "$file: $!\n";
443 ## Save in /tmp and edit
445 if ($edit) {
446 $file = $foo[0] . ' - ' . $foo[1];
447 for ($ENV{'TEMP'}, $ENV{'TMP'}, '/tmp') {
448 next unless -d && -w _;
449 unless (open FH, '>', "$_/$file") {
450 warn "$_/$file: $!\n";
451 next;
453 print FH $res;
454 close FH;
455 editFile "$_/$file", $editor, 1;
461 sub editFile ($$$) {
462 my ($file, $editor, $delete) = @_;
464 for my $e ($editor, $ENV{'VISUAL'}, $ENV{'EDITOR'}, 'vi') {
465 if (defined $e && $e) {
466 $editor = $e;
467 last;
471 $ENV{'GETLYRICS_LYRICS_FILE'} = $file;
472 unless ($delete) {
473 exec "$editor \"\$GETLYRICS_LYRICS_FILE\"";
474 die "exec: $!\n";
477 system "$editor \"\$GETLYRICS_LYRICS_FILE\"";
478 unlink $file;
479 exit $?;
483 __END__
485 =head1 NAME
487 getlyrics - Get lyrics from Internet for specified song
489 =head1 DESCRIPTION
491 The getlyrics utility retrieves lyrics from the Internet for the specified
492 artist and song name.
494 =head1 SYNOPSIS
496 getlyrics.pl --help | --man
498 getlyrics.pl [ --edit[=<editor>] ] --I<scheme> I<arguments>
500 =head1 OPTIONS
502 Script need to know artist and song name of the track you want lyrics
503 for. There are several schemes which instructs getlyrics.pl how should
504 it obtain this information. Some schemes requires arguments and if
505 they do you shall specify them as command line arguments just after the
506 B<-->I<scheme> part.
508 If no scheme is given (ie. the first command line argument (or the
509 second if first was B<--edit>) does not start with B<-->) or empty
510 scheme (ie. the first command line argument is B<-->) B<--args> is
511 assumed.
513 If B<--edit> option was given instead of printing lyrics script will
514 run an editor. If lyrics were not in cache and script didn't manage
515 to save them there it will save it in temporary location ($TEMP, $TMP
516 or "/tmp") and delete it after editor exists. As an editor script
517 will try argument given to B<--edit> option, $VISUAL environment
518 variable, $EDITOR environment variable or "vi" whatever is set.
520 =over 8
522 =item B<--args>
524 Will get artist and song name from given arguments. If two arguments
525 are given the first will be used as artist and the second as song
526 name. If three arguments are given and the second is a single minus
527 sign with optional white spaces the first argument will be used as
528 artist and the third as song name. Otherwise all arguments will be
529 concated and parsed as described in PARSING STRING section of man page (see
530 B<--man>).
532 =item B<--pipe>
534 Script will run command given as arguments and parse it's output
535 according to the rules described in PARSING STRING section of man page
536 (see B<--man>).
538 =item B<--read>
540 Reads content of files given as arguments or from standard input if no
541 files where given.
543 =item B<--mpd>
545 Instructs the script to get the song artist and title from MPD.
546 Argument to this scheme is of the fallowing format:
547 [password@][host][:port]. If host or port is not specified environment
548 variables B<MPD_HOST> and B<MPD_PORT> are checked. Finally if all
549 else fails the defaults B<localhost> and B<6600> are used.
551 If tags for artist and song name are missing script will try to parse
552 current song's file name according to the rules described in PARSING
553 STRING section of man page (see B<--man>).
555 =item B<--mpc>
557 Runs B<mpc --format %artist%> to obtain artist name and then B<mpc
558 --format %title%> to obtain title.
560 =item B<--audacious>
562 =item B<--audtool>
564 =item B<--aud>
566 Runs B<audtool current-song-tuple-data performer> to obtain artist
567 name and then B<audtool current-song-tuple-data track_name> to obtain
568 title.
570 =item B<--xmms>
572 Instructs the script to get the song artist from XMMS. Script first
573 tries to use XMMS::InfoPipe module. If it doesn't exist or
574 xmms-infopipe is not running, script tries to open
575 B<$HOME/.xmms/inpipe> and B<$HOME/.xmms/outpipe> pipes to use xmmspipe
576 plugin.
578 =back
580 =head1 PARSING STRING
582 Output of a pipe and some other strings need to be parsed to obtain
583 artist and song name. First, white space and minus signs are removed
584 from the beginning and the end of the string. Then, all white spaces
585 are changed into a single space character. Finally, the string is
586 splited into two strings first using ' - ' and if that fails a single
587 space character and if that fails a single minus sign.
589 For example, "Metallica - One", "Metallica One" and "Metallica-One"
590 are splited into "Metallica" being artist and "One" being song
591 name. However, "Metallica -One" will be splited into "Metallica" and
592 "-One". Also "Black Sabbath Changes" and "Black Sabbath-Changes" won't
593 be splited into "Black Sabbath" and "Changes" so you have to use
594 "Black Sabbath - Changes".
596 =head1 CACHE
598 Starting from version 0.5 this script supports caching. That is,
599 lyrics once downloaded don't get downloaded again. There are two
600 places where cached lyrics are kept: a global cache under
601 B</usr/share/lyrics/> and a local, per-user cache under B<~/.lyrics>.
602 When reading lyrics or saving them script first tries global and then
603 local cache. To enable caching at least one of those directories have
604 to be created. Note also, that the global cache is meant to be filled
605 by root and that lyrics kept there are available for all users to read
606 but only for root (or some other privileged user) to write.
609 =head1 AUTHOR
611 Berislav Kovacki <beca@sezampro.yu>,
612 Michal Nazarewicz <mina86@mina86.com>
614 =cut