Cosmetics
[emiya.git] / emiya.pl
blob7858a9e55c8811e9e12531c526854fb5e8f84c34
1 #! /usr/bin/env perl
3 use strict;
4 use warnings;
5 use utf8;
7 use POE qw(Component::Client::HTTP);
9 use HTTP::Request;
10 use HTML::Entities;
11 use Encode;
12 use Digest::MD5 'md5_hex';
13 use Time::HiRes 'time';
15 use YAML::Any qw(LoadFile DumpFile);
17 use lib '.';
18 use PortIO;
20 our $VERSION = "0.9";
21 printout("Emiya $VERSION - <3 Saber\n");
23 printout("Loading settings... ");
25 if (!file_exists("emiya.conf")) {
26 my $settings = {
27 username => "PR",
28 password => "",
29 proxy => "",
30 move_to => "Done",
31 allow => {
32 title => [
33 '\[Group\] Series - \d{1,3}v?\d? \([0-9A-Fa-f]{8}\)\.ext',
34 '\[Group\] Series - EpNNNvN \(CRC32\)\.ext',
35 'Batch_Folder_Name',
38 deny => {
39 title => [],
41 max_display_path_length => "80",
42 DELETE_ME => "DELETE ME"
44 DumpFile("emiya.conf", $settings);
45 printout("Sample configuration saved on 'emiya.conf'.\n");
46 printout("Edit it and then re-run this script.\n");
47 exit 5;
50 my $settings = LoadFile("emiya.conf");
52 die "Default configuration detected. Edit emiya.conf and re-run this script" if $settings->{DELETE_ME};
54 my $username = decode("UTF-8", $settings->{username}, Encode::FB_CROAK);
55 my $password = decode("UTF-8", $settings->{password}, Encode::FB_CROAK);
56 my $proxy = decode("UTF-8", $settings->{proxy}, Encode::FB_CROAK) || $ENV{http_proxy};
57 my $maxlen = decode("UTF-8", $settings->{max_display_path_length}, Encode::FB_CROAK) || 80;
59 my $move_to = decode("UTF-8", $settings->{move_to}, Encode::FB_CROAK);
61 # FILTERS
63 # The main purpose of this script.
64 # Only files that pass any of the allow filters and fail all of the deny filters
65 # get downloaded.
67 # Use [0-9A-Fa-f]{8} for CRC32s
68 # Use \d{1,3}v?\d? for episode numbers (catches v2s and v3s as well)
69 # Remember to escape all '.', '[', ']', '(' and ')' on filenames with a '\'.
71 my %file_allow_filters = %{$settings->{allow}};
72 my %file_deny_filters = %{$settings->{deny}};
74 printout("Compiling regexps... ");
76 # Compile regexps
77 foreach (keys %file_allow_filters) {
78 foreach (@{$file_allow_filters{$_}}) {
79 $_ = decode("UTF-8", $_, Encode::FB_CROAK);
80 $_ = qr/$_/;
83 foreach (keys %file_deny_filters) {
84 foreach (@{$file_deny_filters{$_}}) {
85 $_ = decode("UTF-8", $_, Encode::FB_CROAK);
86 $_ = qr/$_/;
90 printout("OK\n");
92 # HTTP Digest auth hack
93 my $realm = "DeathWolf's Auto-Torrent Loving Realm";
94 my $A1 = md5_hex("$username:$realm:$password");
95 my $A2 = md5_hex("GET:/~DeathWolf/Recent/Latest-Files.html");
96 my $cnonce = int(time());
97 my $reqid = 1;
99 sub make_digest {
100 my $www_auth = shift;
102 $www_auth =~ /nonce=\"(.*?)\"/;
103 my $nonce = $1;
105 my $header = "Digest username=\"$username\", ";
106 $header .= "realm=\"$realm\", nonce=\"$nonce\", ";
107 $header .= "uri=\"/~DeathWolf/Recent/Latest-Files.html\", ";
108 $header .= "qop=\"auth\", nc=$reqid, cnonce=\"$cnonce\", ";
109 my $resp_digest = md5_hex("$A1:$nonce:$reqid:$cnonce:auth:$A2");
110 $header .= "response=\"$resp_digest\"";
111 return $header;
114 use constant CHUNK_SIZE => 16384;
117 my $p = $proxy;
118 $p = undef if (!defined($proxy) or $proxy =~ m!^socks://!);
119 POE::Component::Client::HTTP->spawn(
120 Agent => 'Emiya/0.2',
121 Alias => 'ua',
122 Timeout => 45,
123 Streaming => CHUNK_SIZE,
124 Proxy => $p
128 sub entity_clean {
129 my $str = decode_utf8(shift);
130 use bytes; $str =~ s/%([0-9A-Fa-f]{2})/@{[chr(hex($1))]}/g; no bytes;
131 decode_entities($str);
132 return $str;
135 sub shorten {
136 my ($str, $maxlen) = @_;
138 return $str if length($str) <= $maxlen;
140 return undef if $maxlen < 3;
141 return "..." if $maxlen == 3;
142 return substr($str, 0, 1) . "..." if $maxlen == 4;
144 my $prelen = ($maxlen-3)/2;
145 my $postlen = ($maxlen-3) - $prelen;
147 my $toggle = 0;
148 while($prelen > int($prelen)) {
149 if (($toggle = !$toggle)) {
150 $prelen -= 1/2;
151 $prelen++;
152 } else {
153 $postlen -= 1/2;
154 $postlen++;
158 return substr($str, 0, $prelen) . "..." . substr($str, -$postlen);
161 sub shorten_path {
162 my ($path, $maxlen) = @_;
163 if (length($path) > $maxlen) {
164 my @paths = split('/', $path);
165 if (scalar(@paths) < 2) {
166 $path = shorten($path, $maxlen);
167 } else {
168 $path = "";
169 $maxlen -= length($path = shift(@paths) . "/") if length($paths[0]) < $maxlen/2;
171 my $count = scalar(@paths);
172 my $len = int($maxlen / $count);
173 my $extra = int($maxlen) % $count;
174 my $last = pop @paths;
176 foreach (@paths) {
177 $path .= shorten("$_", $len - 1) . '/';
180 $path .= shorten($last, $len + $extra);
183 return $path
186 POE::Session->create(
187 inline_states => {
188 # Makes initial request for the Digest authentication
189 _start => sub {
190 my ($kernel, $heap) = @_[KERNEL, HEAP];
192 my $req = HTTP::Request->new(GET => 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
193 $heap->{progress_message} = "Requesting List...";
194 printout($heap->{progress_message} . " ");
195 $kernel->post('ua', 'request', 'authenticate', $req, 0);
197 _stop => sub {
198 my ($kernel) = $_[KERNEL];
199 $kernel->post('ua', 'shutdown');
201 # Authenticates and then actually fetch the list
202 authenticate => sub {
203 my $res = $_[ARG1]->[0];
204 return if defined $_[ARG1]->[1]; # Wait until the error page finishes downloading because of streaming
205 my $req = HTTP::Request->new(GET => 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
206 die $res->status_line unless ($res->status_line =~ /^(401|2)/);
207 unless ($res->is_success) {
208 my $header = make_digest($res->header('WWW-Authenticate'));
209 $req->header('Authorization', $header);
211 $_[KERNEL]->post('ua', 'request', 'stream_latest_files', $req, $reqid++, 'progress');
213 progress => sub {
214 my ($heap, $done, $left) = ($_[HEAP], @{$_[ARG1]}[0,1]);
215 my $msg = $heap->{progress_message};
216 printout(sprintf("%3.2f%%\r$msg ", $done * 100 / $left));
217 printout("\n") if $done == $left;
219 # Puts the HTML in a temporary var
220 stream_latest_files => sub {
221 my ($kernel, $session, $heap, $res, $data) = (@_[KERNEL, SESSION, HEAP], @{$_[ARG1]});
223 die $res->status_line unless ($res->is_success);
225 if (defined $data) {
226 $heap->{latest_files_data} = "" unless exists $heap->{latest_files_data};
227 $heap->{latest_files_data} .= $data;
228 } else {
229 $kernel->call($session, 'parse_latest_files');
230 $kernel->yield('load_completed_files');
231 $kernel->yield('filter_files');
234 # Parses the HTML, puts hashrefs in $_[HEAP]->{latest_files}
235 parse_latest_files => sub {
236 my $heap = $_[HEAP];
238 my $content = $heap->{latest_files_data};
239 delete $heap->{latest_files_data};
240 my @files = ();
241 my $info = {};
243 my @lines = split m/\n/, $content;
245 foreach (@lines) {
246 if(m/^\s*(<td.*)/) {
247 $_ = $1;
248 if (m!class="date".*?>(.+?)</td>!) {
249 $info->{date} = $1;
250 } elsif (m!class="category".*?>(.+?)</td>!) {
251 $info->{category} = $1;
252 } elsif (m!class="size".*?>(.+?)</td>!) {
253 $info->{size} = $1;
254 } elsif (m!<td><a href="(.+?)">(.+?)</a></td>!) {
255 $info->{url} = entity_clean($1);
256 $info->{title} = entity_clean($2);
258 } elsif (m!^\s*</tr>! and $info->{url}) {
259 push @files, $info;
260 $info = {};
264 $heap->{latest_files} = \@files;
266 # Loads the list of completed files
267 load_completed_files => sub {
268 my $heap = $_[HEAP];
269 $heap->{completed_files} = [];
270 if (file_exists("completed_files.txt")) {
271 my $in = file_open("<", "completed_files.txt");
272 foreach (<$in>) {
273 chomp;
274 s/\r$//;
275 push @{$heap->{completed_files}}, decode_utf8($_);
277 close $in;
280 mark_as_completed => sub {
281 my $completed_files = file_open(">>", "completed_files.txt");
282 print $completed_files encode_utf8($_[ARG0] . "\n");
283 close $completed_files;
285 # Filters the file list
286 filter_files => sub {
287 my ($kernel, $heap) = @_[KERNEL, HEAP];
288 $heap->{allowed_files} = [];
289 my $skip;
290 my $filecount = scalar(@{$heap->{latest_files}});
291 my $i = 1;
292 foreach my $info (@{$heap->{latest_files}}) {
293 printout("\rFiltering " . $i++ . "/$filecount... ");
294 $skip = 0;
295 foreach (@{$heap->{completed_files}}) {
296 if ($info->{title} eq $_) {
297 $skip = 1;
298 last;
301 next if $skip;
302 my $allowed = 0;
303 foreach my $group (keys %file_allow_filters) {
304 foreach my $filter (@{$file_allow_filters{$group}}) {
305 if ($info->{$group} =~ /$filter/) {
306 $allowed = 1;
307 last;
310 last if $allowed;
313 foreach my $group (keys %file_deny_filters) {
314 foreach my $filter (@{$file_deny_filters{$group}}) {
315 if ($info->{$group} =~ /$filter/) {
316 $allowed = 0;
317 last;
320 last unless $allowed;
323 push @{$heap->{allowed_files}}, $info if $allowed;
325 delete $heap->{latest_files};
326 $kernel->yield('sort_allowed_files');
327 $kernel->yield('print_allowed_files');
328 $kernel->yield('download_entries');
330 sort_allowed_files => sub {
331 # TODO: actually sort them
332 # NOTE: the list is used in reverse order
334 print_allowed_files => sub {
335 my $heap = $_[HEAP];
336 my $a = @{$heap->{allowed_files}} > 0 ? ":" : ".";
337 printout(@{$heap->{allowed_files}} . " entries accepted$a\n");
338 foreach (@{$heap->{allowed_files}}) {
339 printout(" \"" . $_->{title} . ($_->{url} =~ m!/$! ? "/" : "") . "\" (" . $_->{size} . ")\n");
342 # This state is called after each file finishes downloading
343 download_entries => sub {
344 my ($kernel, $session, $heap) = @_[KERNEL, SESSION, HEAP];
345 my $info = pop @{$heap->{allowed_files}};
346 if ($info) {
347 if ($info->{url} =~ /\/$/) {
348 $kernel->call($session, 'subfolder_recurse', $info);
349 } else {
350 $kernel->call($session, 'download_file', $info);
354 # Downloads folder listings
355 subfolder_recurse => sub {
356 my ($kernel, $heap, $info) = @_[KERNEL, HEAP, ARG0];
358 my $req = HTTP::Request->new(GET => $info->{url});
359 $heap->{info} = $info;
360 $heap->{progress_message} = "Listing \"". $info->{title} ."/\":";
361 $kernel->call('ua', 'request', 'stream_subfolder', $req, $reqid++, 'progress');
362 while (defined $heap->{info}) { $kernel->run_one_timeslice(); }
363 $kernel->yield('download_entries');
365 stream_subfolder => sub {
366 my ($kernel, $heap, $res, $data) = (@_[KERNEL, HEAP], @{$_[ARG1]});
367 my $info = $heap->{info};
369 die $res->status_line unless ($res->is_success);
371 # HACK: I dunno why I use md5_hex, but it looked like a good idea at the time
372 # HACK: A very bizarre and most certainly broken filename made perl whine, so I added encode_entities
373 if (defined $data) {
374 $heap->{"folder-" . md5_hex(encode_entities($info->{title}))} = "" unless exists $heap->{"folder-" . md5_hex(encode_entities($info->{title}))};
375 $heap->{"folder-" . md5_hex(encode_entities($info->{title}))} .= $data;
376 } else {
377 $kernel->yield('subfolder_parse');
380 # Parses folder listings, pushes subitems to allowed_files, folders last
381 # This makes subfolders be checked before files
382 subfolder_parse => sub {
383 my $heap = $_[HEAP];
385 my $info = $heap->{info};
386 my @lines = split m/\n/, $heap->{"folder-" . md5_hex(encode_entities($info->{title}))};
387 my @files = ();
388 my @folders = ();
389 foreach (@lines) {
390 if (m!<td class="n"><a href="(.*?)">.*<td class="t">Directory</td>!) {
391 next if $1 eq "../";
392 push @folders, $1;
393 } elsif (m!<td class="n"><a href=".*?">(.*?)</a></td>.*<td class="s">(.*?)</td>!) {
394 push @files, [$1, $2];
398 while ($_ = pop @files) {
399 my $finfo = {%{$info}};
400 $finfo->{title} = $info->{title};
401 $finfo->{title} .= '/' unless $finfo->{title} =~ m!/$!;
402 $finfo->{title} .= entity_clean($_->[0]);
403 $finfo->{url} = $info->{url} . entity_clean($_->[0]),
404 $finfo->{size} = $_->[1];
406 my $skip = 0;
407 foreach (@{$heap->{completed_files}}) {
408 if ($finfo->{title} eq $_) {
409 $skip = 1;
410 last;
413 push @{$heap->{allowed_files}}, $finfo unless $skip;
415 # There is no trailing slash on the title
416 # There IS a trailing slash on folder URLs
417 foreach (@folders) {
418 my $subinfo = {%{$info}}; # make a copy
419 $subinfo->{title} = $info->{title};
420 $subinfo->{title} .= '/' unless $subinfo->{title} =~ m!/$!;
421 $subinfo->{title} .= entity_clean($_);
422 $subinfo->{title} .= '/' unless $subinfo->{title} =~ m!/$!;
423 $subinfo->{url} = $info->{url} . entity_clean($_);
425 my $skip = 0;
426 foreach (@{$heap->{completed_files}}) {
427 if ($subinfo->{title} eq $_) {
428 $skip = 1;
429 last;
432 push @{$heap->{allowed_files}}, $subinfo unless $skip;
435 delete $heap->{"folder-" . md5_hex(encode_entities($info->{title}))};
436 $heap->{info} = undef;
437 delete $heap->{info};
439 # Actually downloads files
440 download_file => sub {
441 my ($kernel, $heap, $info) = @_[KERNEL, HEAP, ARG0];
443 my $req = HTTP::Request->new(GET => $info->{url});
444 $req->header (Accept_Ranges => "bytes");
445 $heap->{have_size} = file_size($info->{title});
446 if ($heap->{have_size}) {
447 $req->header (Range => "bytes=". $heap->{have_size} ."-");
448 } else {
449 $heap->{have_size} = 0;
451 my $sizelen = length($info->{size}) + 2;
452 my $proglen = 23;
453 $heap->{progress_message} = "\"".
454 shorten_path($info->{title}, $maxlen - $sizelen - $proglen - 3) ."\" (". $info->{size} ."): ";
455 printout($heap->{progress_message});
457 $heap->{filename} = $info->{title};
459 $heap->{last_time} = time;
460 $heap->{start_time} = $heap->{last_time};
461 $heap->{len} = 0;
462 $heap->{oct} = 0;
463 $kernel->delay('calculate_speed' => 2);
464 $kernel->call('ua', 'request', 'stream_file', $req, $reqid++, 'file_progress');
466 # Does the actual file downloading
467 stream_file => sub {
468 my ($kernel, $session, $heap, $req, $res, $data) =
469 (@_[KERNEL, SESSION, HEAP, ARG0], @{$_[ARG1]});
471 die $res->status_line unless $res->is_success or $res->status_line =~ /^416/;
473 if (defined($data)) {
474 unless ($heap->{outfh} and $heap->{content_length}) {
475 $heap->{filename} =~ m!(.*)/!;
476 mkpath($1) if $1;
477 $heap->{outfh} = file_open('>>',$heap->{filename}) or die "$^E";
478 $heap->{content_length} = $res->header('Content-Length');
479 # This is probably wrong
480 if ($heap->{content_length} == $heap->{have_size}) {
481 close($heap->{outfh});
482 $kernel->call($session, 'mark_as_completed', $heap->{filename});
483 $kernel->post('ua', 'cancel', $req);
484 $kernel->yield('download_entries');
487 unless ($heap->{outfh} && syswrite($heap->{outfh}, $data)) {
488 die "$^E";
490 } else { # Finished
491 unless ($res->is_success or $res->status_line =~ /^416/) { # Requested Range not satisfiable
492 die $res->status_line;
494 close($heap->{outfh});
495 delete $heap->{outfh};
496 $kernel->call($session, 'mark_as_completed', $heap->{filename});
497 $kernel->call($session, 'move_completed', $heap->{filename});
498 $kernel->yield('download_entries');
501 calculate_speed => sub {
502 my ($kernel, $heap) = @_[KERNEL, HEAP];
504 my $now = time;
505 if ($now == $heap->{last_time}) {
506 $kernel->delay('calculate_speed' => 0.5);
507 return;
509 my $spd = $heap->{oct} / 1024 / ($now - $heap->{last_time});
510 unless (defined $heap->{speeds}) {
511 $heap->{speeds} = [];
512 $heap->{speeds_i} = 0;
513 for(my $i = 0; $i < 8; $i++) { $heap->{speeds}->[$i] = $spd; }
515 $heap->{speeds}->[$heap->{speeds_i}++] = $spd;
516 $heap->{speeds_i} %= @{$heap->{speeds}};
517 $heap->{oct} = 0;
519 $heap->{spd} = 0;
520 foreach (@{$heap->{speeds}}) { $heap->{spd} += $_; }
521 $heap->{spd} /= @{$heap->{speeds}};
523 $heap->{last_time} = time;
524 $kernel->delay('calculate_speed' => 0.5);
526 calculate_avg_speed => sub {
527 my $heap = $_[HEAP];
529 my $now = time;
530 $heap->{spd} = $heap->{len} / 1024 / ($now - $heap->{start_time});
532 # Prints the progress report
533 file_progress => sub {
534 my ($kernel, $heap, $pos, $len, $oct) = (@_[KERNEL, HEAP], @{$_[ARG1]});
536 if ($pos eq $len) {
537 $kernel->call($_[SESSION], 'calculate_avg_speed');
538 my $line = sprintf("100.00%% (avg. %4.2f kB/s)", $heap->{spd});
539 $heap->{prevlen} = 0 unless $heap->{prevlen};
540 printout("\r" . $heap->{progress_message} . "$line" . (" " x ($heap->{prevlen} - length($line))) . "\n");
541 delete $heap->{have_size};
542 delete $heap->{prev_time};
543 delete $heap->{prevlen};
544 delete $heap->{spd};
545 delete $heap->{speeds};
546 $heap->{speeds_i} = 0;
547 $kernel->delay('calculate_speed');
548 } else {
549 my $have_size = exists $heap->{have_size} ? $heap->{have_size} : 0;
551 my $spd = $heap->{spd} ? $heap->{spd} : undef;
553 my $octlen = length($oct);
554 $heap->{oct} += $octlen;
555 $heap->{len} += $octlen;
557 # $have_size may be a BigInt, use fixed point math / formatters
558 my $pct = ($pos + $have_size)*10000/($len + $have_size);
560 my $line;
561 if (defined($spd)) {
562 $line = sprintf("%2d.%02d%% (%4.2f kB/s)", $pct / 100, $pct % 100, $spd);
563 } else {
564 $line = sprintf("%2d.%02d%%", $pct / 100, $pct % 100);
566 $heap->{prevlen} = length($line) unless defined $heap->{prevlen} and $heap->{prevlen} > length($line);
567 printout("$line" . (" " x ($heap->{prevlen} - length($line))) . "\r" . $heap->{progress_message});
568 $heap->{prevlen} = length($line);
571 move_completed => sub {
572 my $filename = $_[ARG0];
573 if ($move_to) {
574 my $dest = $move_to;
575 my $fname;
576 $filename =~ m!^(.+)/(.+?)$!;
577 if ($1) {
578 $dest .= "/$1";
579 $fname = $2;
580 } else {
581 $fname = $filename;
583 mkpath("$dest");
584 printout("Moving \"$filename\" to \"$dest/$fname\"... ");
585 if (move("$filename", "$dest/$fname")) {
586 printout("OK\n");
587 } else {
588 printout("$^E\n");
595 POE::Kernel->run();
597 # vim: set noexpandtab tabstop=4 shiftwidth=4 :