Run through perltidy
[emiya.git] / emiya.pl
blobb10b0042f6f404ca43d90d0437c5e520832efb18
1 #! /usr/bin/env perl
3 use strict;
4 use warnings;
5 use utf8;
7 use POE qw(Component::Client::HTTP);
9 use HTTP::Request;
10 use HTML::Entities;
11 use Encode;
12 use Digest::MD5 'md5_hex';
13 use Time::HiRes 'time';
15 use YAML::XS qw(LoadFile DumpFile);
17 use lib '.';
18 use PortIO;
20 our $VERSION = "0.9";
21 printout("Emiya $VERSION - <3 Saber\n");
23 printout("Loading settings... ");
25 if ( !file_exists("emiya.conf") ) {
26 my $settings = {
27 username => "PR",
28 password => "",
29 proxy => "",
30 move_to => "Done",
31 allow => {
32 title => [
33 '\[Group\] Series - \d{1,3}v?\d? \([0-9A-Fa-f]{8}\)\.ext',
34 '\[Group\] Series - EpNNNvN \(CRC32\)\.ext',
35 'Batch_Folder_Name',
38 deny => { title => [], },
39 max_display_length => "80",
40 DELETE_ME => "DELETE ME"
42 DumpFile( "emiya.conf", $settings );
43 printout("Sample configuration saved on 'emiya.conf'.\n");
44 printout("Edit it and then re-run this script.\n");
45 exit 5;
48 my $settings = LoadFile("emiya.conf")
49 || die "emiya.conf is invalid, delete it and re-run $0";
51 die "Default configuration detected. Edit emiya.conf and re-run this script"
52 if $settings->{DELETE_ME};
54 my $username = $settings->{username};
55 my $password = $settings->{password};
56 my $proxy = $settings->{proxy} || $ENV{http_proxy};
57 my $maxlen = $settings->{max_display_length} || 80;
59 my $move_to = $settings->{move_to};
61 # FILTERS
63 # The main purpose of this script.
64 # Only files that pass any of the allow filters and fail all of the deny filters
65 # get downloaded.
67 # Use [0-9A-Fa-f]{8} for CRC32s
68 # Use \d{1,3}v?\d? for episode numbers (catches v2s and v3s as well)
69 # Remember to escape all '.', '[', ']', '(' and ')' on filenames with a '\'.
71 my %file_allow_filters = %{ $settings->{allow} };
72 my %file_deny_filters = %{ $settings->{deny} };
74 printout("Compiling regexps... ");
76 # Compile regexps
77 foreach ( keys %file_allow_filters ) {
78 foreach ( @{ $file_allow_filters{$_} } ) {
79 $_ = qr/$_/;
82 foreach ( keys %file_deny_filters ) {
83 foreach ( @{ $file_deny_filters{$_} } ) {
84 $_ = qr/$_/;
88 printout("OK\n");
90 # HTTP Digest auth hack
91 my $realm = "DeathWolf's Auto-Torrent Loving Realm";
92 my $A1 = md5_hex("$username:$realm:$password");
93 my $A2 = md5_hex("GET:/~DeathWolf/Recent/Latest-Files.html");
94 my $cnonce = int( time() );
95 my $reqid = 1;
97 sub make_digest {
98 my $www_auth = shift;
100 $www_auth =~ /nonce=\"(.*?)\"/;
101 my $nonce = $1;
103 my $header = "Digest username=\"$username\", ";
104 $header .= "realm=\"$realm\", nonce=\"$nonce\", ";
105 $header .= "uri=\"/~DeathWolf/Recent/Latest-Files.html\", ";
106 $header .= "qop=\"auth\", nc=$reqid, cnonce=\"$cnonce\", ";
107 my $resp_digest = md5_hex("$A1:$nonce:$reqid:$cnonce:auth:$A2");
108 $header .= "response=\"$resp_digest\"";
109 return $header;
112 use constant CHUNK_SIZE => 16384;
115 my $p = $proxy;
116 $p = undef if ( !defined($proxy) or $proxy =~ m!^socks://! );
117 POE::Component::Client::HTTP->spawn(
118 Agent => 'Emiya/0.2',
119 Alias => 'ua',
120 Timeout => 45,
121 Streaming => CHUNK_SIZE,
122 Proxy => $p
126 sub entity_clean {
127 my $str = decode_utf8(shift);
128 use bytes; $str =~ s/%([0-9A-Fa-f]{2})/@{[chr(hex($1))]}/g; no bytes;
129 decode_entities($str);
130 return $str;
133 sub shorten {
134 my ( $str, $maxlen ) = @_;
136 return $str if length($str) <= $maxlen;
138 return undef if $maxlen < 3;
139 return "..." if $maxlen == 3;
140 return substr( $str, 0, 1 ) . "..." if $maxlen == 4;
142 my $prelen = ( $maxlen - 3 )/2;
143 my $postlen = ( $maxlen - 3 ) - $prelen;
145 my $toggle = 0;
146 while ( $prelen > int($prelen) ) {
147 if ( ( $toggle = !$toggle ) ) {
148 $prelen -= 1/2;
149 $prelen++;
150 } else {
151 $postlen -= 1/2;
152 $postlen++;
156 return substr( $str, 0, $prelen ) . "..." . substr( $str, -$postlen );
159 sub shorten_path {
160 my ( $path, $maxlen ) = @_;
161 if ( length($path) > $maxlen ) {
162 my @paths = split( '/', $path );
163 if ( scalar(@paths) < 2 ) {
164 $path = shorten( $path, $maxlen );
165 } else {
166 $path = "";
167 $maxlen -= length( $path = shift(@paths) . "/" )
168 if length( $paths[0] ) < $maxlen/2;
170 my $count = scalar(@paths);
171 my $len = int( $maxlen/$count );
172 my $extra = int($maxlen) % $count;
173 my $last = pop @paths;
175 foreach (@paths) {
176 $path .= shorten( "$_", $len - 1 ) . '/';
179 $path .= shorten( $last, $len + $extra );
182 return $path;
185 POE::Session->create(
186 inline_states => {
188 # Makes initial request for the Digest authentication
189 _start => sub {
190 my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
192 my $req
193 = HTTP::Request->new( GET =>
194 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html'
196 $heap->{progress_message} = "Requesting List...";
197 printout( $heap->{progress_message} . " " );
198 $kernel->post( 'ua', 'request', 'authenticate', $req, 0 );
200 _stop => sub {
201 my ($kernel) = $_[KERNEL];
202 $kernel->post( 'ua', 'shutdown' );
205 # Authenticates and then actually fetch the list
206 authenticate => sub {
207 my $res = $_[ARG1]->[0];
208 return
209 if defined $_[ARG1]->[1]
210 ; # Wait until the error page finishes downloading because of streaming
211 my $req
212 = HTTP::Request->new( GET =>
213 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html'
215 die $res->status_line unless ( $res->status_line =~ /^(401|2)/ );
216 unless ( $res->is_success ) {
217 my $header = make_digest( $res->header('WWW-Authenticate') );
218 $req->header( 'Authorization', $header );
220 $_[KERNEL]->post( 'ua', 'request', 'stream_latest_files', $req,
221 $reqid++, 'progress' );
223 progress => sub {
224 my ( $heap, $done, $left ) = ( $_[HEAP], @{ $_[ARG1] }[ 0, 1 ] );
225 my $msg = $heap->{progress_message};
226 printout( sprintf( "%3.2f%%\r$msg ", $done*100/$left ) );
227 printout("\n") if $done == $left;
230 # Puts the HTML in a temporary var
231 stream_latest_files => sub {
232 my ( $kernel, $session, $heap, $res, $data )
233 = ( @_[ KERNEL, SESSION, HEAP ], @{ $_[ARG1] } );
235 die $res->status_line unless ( $res->is_success );
237 if ( defined $data ) {
238 $heap->{latest_files_data} = ""
239 unless exists $heap->{latest_files_data};
240 $heap->{latest_files_data} .= $data;
241 } else {
242 $kernel->call( $session, 'parse_latest_files' );
243 $kernel->yield('load_completed_files');
244 $kernel->yield('filter_files');
248 # Parses the HTML, puts hashrefs in $_[HEAP]->{latest_files}
249 parse_latest_files => sub {
250 my $heap = $_[HEAP];
252 my $content = $heap->{latest_files_data};
253 delete $heap->{latest_files_data};
254 my @files = ();
255 my $info = {};
257 my @lines = split m/\n/, $content;
259 foreach (@lines) {
260 if (m/^\s*(<td.*)/) {
261 $_ = $1;
262 if (m!class="date".*?>(.+?)</td>!) {
263 $info->{date} = $1;
264 } elsif (m!class="category".*?>(.+?)</td>!) {
265 $info->{category} = $1;
266 } elsif (m!class="size".*?>(.+?)</td>!) {
267 $info->{size} = $1;
268 } elsif (m!<td><a href="(.+?)">(.+?)</a></td>!) {
269 $info->{url} = entity_clean($1);
270 $info->{title} = entity_clean($2);
272 } elsif ( m!^\s*</tr>! and $info->{url} ) {
273 push @files, $info;
274 $info = {};
278 $heap->{latest_files} = \@files;
281 # Loads the list of completed files
282 load_completed_files => sub {
283 my $heap = $_[HEAP];
284 $heap->{completed_files} = [];
285 if ( file_exists("completed_files.txt") ) {
286 my $in = file_open( "<", "completed_files.txt" );
287 foreach (<$in>) {
288 chomp;
289 s/\r$//;
290 push @{ $heap->{completed_files} }, decode_utf8($_);
292 close $in;
295 mark_as_completed => sub {
296 my $completed_files = file_open( ">>", "completed_files.txt" );
297 print $completed_files encode_utf8( $_[ARG0] . "\n" );
298 close $completed_files;
301 # Filters the file list
302 filter_files => sub {
303 my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
304 $heap->{allowed_files} = [];
305 my $skip;
306 my $filecount = scalar( @{ $heap->{latest_files} } );
307 my $i = 1;
308 foreach my $info ( @{ $heap->{latest_files} } ) {
309 printout( "\rFiltering " . $i++ . "/$filecount... " );
310 $skip = 0;
311 foreach ( @{ $heap->{completed_files} } ) {
312 if ( $info->{title} eq $_ ) {
313 $skip = 1;
314 last;
317 next if $skip;
318 my $allowed = 0;
319 foreach my $group ( keys %file_allow_filters ) {
320 foreach my $filter ( @{ $file_allow_filters{$group} } ) {
321 if ( $info->{$group} =~ /$filter/ ) {
322 $allowed = 1;
323 last;
326 last if $allowed;
329 foreach my $group ( keys %file_deny_filters ) {
330 foreach my $filter ( @{ $file_deny_filters{$group} } ) {
331 if ( $info->{$group} =~ /$filter/ ) {
332 $allowed = 0;
333 last;
336 last unless $allowed;
339 push @{ $heap->{allowed_files} }, $info if $allowed;
341 delete $heap->{latest_files};
342 $kernel->yield('sort_allowed_files');
343 $kernel->yield('print_allowed_files');
344 $kernel->yield('download_entries');
346 sort_allowed_files => sub {
348 # TODO: actually sort them
349 # NOTE: the list is used in reverse order
351 print_allowed_files => sub {
352 my $heap = $_[HEAP];
353 my $a = @{ $heap->{allowed_files} } > 0 ? ":" : ".";
354 printout( @{ $heap->{allowed_files} } . " entries accepted$a\n" );
355 foreach ( @{ $heap->{allowed_files} } ) {
356 printout( " \""
357 . $_->{title}
358 . ( $_->{url} =~ m!/$! ? "/" : "" ) . "\" ("
359 . $_->{size}
360 . ")\n" );
364 # This state is called after each file finishes downloading
365 download_entries => sub {
366 my ( $kernel, $session, $heap ) = @_[ KERNEL, SESSION, HEAP ];
367 my $info = pop @{ $heap->{allowed_files} };
368 if ($info) {
369 if ( $info->{url} =~ /\/$/ ) {
370 $kernel->call( $session, 'subfolder_recurse', $info );
371 } else {
372 $kernel->call( $session, 'download_file', $info );
377 # Downloads folder listings
378 subfolder_recurse => sub {
379 my ( $kernel, $heap, $info ) = @_[ KERNEL, HEAP, ARG0 ];
381 my $req = HTTP::Request->new( GET => $info->{url} );
382 $heap->{info} = $info;
383 $heap->{progress_message}
384 = "Listing \"" . $info->{title} . "/\":";
385 $kernel->call( 'ua', 'request', 'stream_subfolder', $req,
386 $reqid++, 'progress' );
387 while ( defined $heap->{info} ) { $kernel->run_one_timeslice(); }
388 $kernel->yield('download_entries');
390 stream_subfolder => sub {
391 my ( $kernel, $heap, $res, $data )
392 = ( @_[ KERNEL, HEAP ], @{ $_[ARG1] } );
393 my $info = $heap->{info};
395 die $res->status_line unless ( $res->is_success );
397 # HACK: I dunno why I use md5_hex, but it looked like a good idea at the time
398 # HACK: A very bizarre and most certainly broken filename made perl whine, so I added encode_entities
399 if ( defined $data ) {
400 $heap->{ "folder-"
401 . md5_hex( encode_entities( $info->{title} ) ) } = ""
402 unless exists $heap->{
403 "folder-"
404 . md5_hex( encode_entities( $info->{title} ) )
406 $heap->{ "folder-"
407 . md5_hex( encode_entities( $info->{title} ) ) }
408 .= $data;
409 } else {
410 $kernel->yield('subfolder_parse');
414 # Parses folder listings, pushes subitems to allowed_files, folders last
415 # This makes subfolders be checked before files
416 subfolder_parse => sub {
417 my $heap = $_[HEAP];
419 my $info = $heap->{info};
420 my @lines = split m/\n/,
421 $heap->{ "folder-"
422 . md5_hex( encode_entities( $info->{title} ) ) };
423 my @files = ();
424 my @folders = ();
425 foreach (@lines) {
426 if (m!<td class="n"><a href="(.*?)">.*<td class="t">Directory</td>!
428 next if $1 eq "../";
429 push @folders, $1;
430 } elsif (
431 m!<td class="n"><a href=".*?">(.*?)</a></td>.*<td class="s">(.*?)</td>!
433 push @files, [ $1, $2 ];
437 while ( $_ = pop @files ) {
438 my $finfo = { %{$info} };
439 $finfo->{title} = $info->{title};
440 $finfo->{title} .= '/' unless $finfo->{title} =~ m!/$!;
441 $finfo->{title} .= entity_clean( $_->[0] );
442 $finfo->{url} = $info->{url} . entity_clean( $_->[0] ),
443 $finfo->{size} = $_->[1];
445 my $skip = 0;
446 foreach ( @{ $heap->{completed_files} } ) {
447 if ( $finfo->{title} eq $_ ) {
448 $skip = 1;
449 last;
452 push @{ $heap->{allowed_files} }, $finfo unless $skip;
455 # There is no trailing slash on the title
456 # There IS a trailing slash on folder URLs
457 foreach (@folders) {
458 my $subinfo = { %{$info} }; # make a copy
459 $subinfo->{title} = $info->{title};
460 $subinfo->{title} .= '/' unless $subinfo->{title} =~ m!/$!;
461 $subinfo->{title} .= entity_clean($_);
462 $subinfo->{title} .= '/' unless $subinfo->{title} =~ m!/$!;
463 $subinfo->{url} = $info->{url} . entity_clean($_);
465 my $skip = 0;
466 foreach ( @{ $heap->{completed_files} } ) {
467 if ( $subinfo->{title} eq $_ ) {
468 $skip = 1;
469 last;
472 push @{ $heap->{allowed_files} }, $subinfo unless $skip;
475 delete $heap->{ "folder-"
476 . md5_hex( encode_entities( $info->{title} ) ) };
477 $heap->{info} = undef;
478 delete $heap->{info};
481 # Actually downloads files
482 download_file => sub {
483 my ( $kernel, $heap, $info ) = @_[ KERNEL, HEAP, ARG0 ];
485 my $req = HTTP::Request->new( GET => $info->{url} );
486 $req->header( Accept_Ranges => "bytes" );
487 $heap->{have_size} = file_size( $info->{title} );
488 if ( $heap->{have_size} ) {
489 $req->header( Range => "bytes=" . $heap->{have_size} . "-" );
490 } else {
491 $heap->{have_size} = 0;
493 my $sizelen = length( $info->{size} ) + 2;
494 my $proglen = 23;
495 $heap->{progress_message} = "\""
496 . shorten_path( $info->{title},
497 $maxlen - $sizelen - $proglen - 3 )
498 . "\" ("
499 . $info->{size} . "): ";
500 printout( $heap->{progress_message} );
502 $heap->{filename} = $info->{title};
504 $heap->{last_time} = time;
505 $heap->{start_time} = $heap->{last_time};
506 $heap->{len} = 0;
507 $heap->{oct} = 0;
508 $kernel->delay( 'calculate_speed' => 2 );
509 $kernel->call( 'ua', 'request', 'stream_file', $req, $reqid++,
510 'file_progress' );
513 # Does the actual file downloading
514 stream_file => sub {
515 my ( $kernel, $session, $heap, $req, $res, $data )
516 = ( @_[ KERNEL, SESSION, HEAP, ARG0 ], @{ $_[ARG1] } );
518 die $res->status_line
519 unless $res->is_success
520 or $res->status_line =~ /^416/;
522 if ( defined($data) ) {
523 unless ( $heap->{outfh} and $heap->{content_length} ) {
524 $heap->{filename} =~ m!(.*)/!;
525 mkpath($1) if $1;
526 $heap->{outfh} = file_open( '>>', $heap->{filename} )
527 or die "$^E";
528 $heap->{content_length} = $res->header('Content-Length');
530 # This is probably wrong
531 if ( $heap->{content_length} == $heap->{have_size} ) {
532 close( $heap->{outfh} );
533 $kernel->call( $session, 'mark_as_completed',
534 $heap->{filename} );
535 $kernel->post( 'ua', 'cancel', $req );
536 $kernel->yield('download_entries');
539 unless ( $heap->{outfh} && syswrite( $heap->{outfh}, $data ) )
541 die "$^E";
543 } else { # Finished
544 unless ( $res->is_success or $res->status_line =~ /^416/ )
545 { # Requested Range not satisfiable
546 die $res->status_line;
548 close( $heap->{outfh} );
549 delete $heap->{outfh};
550 $kernel->call( $session, 'mark_as_completed',
551 $heap->{filename} );
552 $kernel->call( $session, 'move_completed',
553 $heap->{filename} );
554 $kernel->yield('download_entries');
557 calculate_speed => sub {
558 my ( $kernel, $heap ) = @_[ KERNEL, HEAP ];
560 my $now = time;
561 if ( $now == $heap->{last_time} ) {
562 $kernel->delay( 'calculate_speed' => 0.5 );
563 return;
565 my $spd = $heap->{oct}/1024/( $now - $heap->{last_time} );
566 unless ( defined $heap->{speeds} ) {
567 $heap->{speeds} = [];
568 $heap->{speeds_i} = 0;
569 for ( my $i = 0; $i < 8; $i++ ) {
570 $heap->{speeds}->[$i] = $spd;
573 $heap->{speeds}->[ $heap->{speeds_i}++ ] = $spd;
574 $heap->{speeds_i} %= @{ $heap->{speeds} };
575 $heap->{oct} = 0;
577 $heap->{spd} = 0;
578 foreach ( @{ $heap->{speeds} } ) { $heap->{spd} += $_; }
579 $heap->{spd} /= @{ $heap->{speeds} };
581 $heap->{last_time} = time;
582 $kernel->delay( 'calculate_speed' => 0.5 );
584 calculate_avg_speed => sub {
585 my $heap = $_[HEAP];
587 my $now = time;
588 $heap->{spd} = $heap->{len}/1024/( $now - $heap->{start_time} );
591 # Prints the progress report
592 file_progress => sub {
593 my ( $kernel, $heap, $pos, $len, $oct )
594 = ( @_[ KERNEL, HEAP ], @{ $_[ARG1] } );
596 if ( $pos eq $len ) {
597 $kernel->call( $_[SESSION], 'calculate_avg_speed' );
598 my $line
599 = sprintf( "100.00%% (avg. %4.2f kB/s)", $heap->{spd} );
600 $heap->{prevlen} = 0 unless $heap->{prevlen};
601 printout( "\r"
602 . $heap->{progress_message} . "$line"
603 . ( " " x ( $heap->{prevlen} - length($line) ) )
604 . "\n" );
605 delete $heap->{have_size};
606 delete $heap->{prev_time};
607 delete $heap->{prevlen};
608 delete $heap->{spd};
609 delete $heap->{speeds};
610 $heap->{speeds_i} = 0;
611 $kernel->delay('calculate_speed');
612 } else {
613 my $have_size
614 = exists $heap->{have_size} ? $heap->{have_size} : 0;
616 my $spd = $heap->{spd} ? $heap->{spd} : undef;
618 my $octlen = length($oct);
619 $heap->{oct} += $octlen;
620 $heap->{len} += $octlen;
622 # $have_size may be a BigInt, use fixed point math / formatters
623 my $pct = ( $pos + $have_size )*10000/( $len + $have_size );
625 my $line;
626 if ( defined($spd) ) {
627 $line = sprintf( "%2d.%02d%% (%4.2f kB/s)",
628 $pct/100, $pct % 100, $spd );
629 } else {
630 $line = sprintf( "%2d.%02d%%", $pct/100, $pct % 100 );
632 $heap->{prevlen} = length($line)
633 unless defined $heap->{prevlen}
634 and $heap->{prevlen} > length($line);
635 printout( "$line"
636 . ( " " x ( $heap->{prevlen} - length($line) ) )
637 . "\r"
638 . $heap->{progress_message} );
639 $heap->{prevlen} = length($line);
642 move_completed => sub {
643 my $filename = $_[ARG0];
644 if ($move_to) {
645 my $dest = $move_to;
646 my $fname;
647 $filename =~ m!^(.+)/(.+?)$!;
648 if ($1) {
649 $dest .= "/$1";
650 $fname = $2;
651 } else {
652 $fname = $filename;
654 mkpath("$dest");
655 printout("Moving \"$filename\" to \"$dest/$fname\"... ");
656 if ( move( "$filename", "$dest/$fname" ) ) {
657 printout("OK\n");
658 } else {
659 printout("$^E\n");
663 } );
665 POE::Kernel->run();
667 # vim: set noexpandtab tabstop=4 shiftwidth=4 :