7 use POE
qw(Component::Client::HTTP);
12 use Digest
::MD5
'md5_hex';
13 use Time
::HiRes
'time';
15 use YAML
::Any
qw(LoadFile DumpFile);
21 printout
("Emiya $VERSION - <3 Saber\n");
23 printout
("Loading settings... ");
25 if (!file_exists
("emiya.conf")) {
33 '\[Group\] Series - \d{1,3}v?\d? \([0-9A-Fa-f]{8}\)\.ext',
34 '\[Group\] Series - EpNNNvN \(CRC32\)\.ext',
41 max_display_path_length
=> "80",
42 DELETE_ME
=> "DELETE ME"
44 DumpFile
("emiya.conf", $settings);
45 printout
("Sample configuration saved on 'emiya.conf'.\n");
46 printout
("Edit it and then re-run this script.\n");
50 my $settings = LoadFile
("emiya.conf");
52 die "Default configuration detected. Edit emiya.conf and re-run this script" if $settings->{DELETE_ME
};
54 my $username = decode
("UTF-8", $settings->{username
}, Encode
::FB_CROAK
);
55 my $password = decode
("UTF-8", $settings->{password
}, Encode
::FB_CROAK
);
56 my $proxy = decode
("UTF-8", $settings->{proxy
}, Encode
::FB_CROAK
) || $ENV{http_proxy
};
57 my $maxlen = decode
("UTF-8", $settings->{max_display_path_length
}, Encode
::FB_CROAK
) || 80;
59 my $move_to = decode
("UTF-8", $settings->{move_to
}, Encode
::FB_CROAK
);
63 # The main purpose of this script.
64 # Only files that pass any of the allow filters and fail all of the deny filters
67 # Use [0-9A-Fa-f]{8} for CRC32s
68 # Use \d{1,3}v?\d? for episode numbers (catches v2s and v3s as well)
69 # Remember to escape all '.', '[', ']', '(' and ')' on filenames with a '\'.
71 my %file_allow_filters = %{$settings->{allow
}};
72 my %file_deny_filters = %{$settings->{deny
}};
74 printout
("Compiling regexps... ");
77 foreach (keys %file_allow_filters) {
78 foreach (@
{$file_allow_filters{$_}}) {
79 $_ = decode
("UTF-8", $_, Encode
::FB_CROAK
);
83 foreach (keys %file_deny_filters) {
84 foreach (@
{$file_deny_filters{$_}}) {
85 $_ = decode
("UTF-8", $_, Encode
::FB_CROAK
);
92 # HTTP Digest auth hack
93 my $realm = "DeathWolf's Auto-Torrent Loving Realm";
94 my $A1 = md5_hex
("$username:$realm:$password");
95 my $A2 = md5_hex
("GET:/~DeathWolf/Recent/Latest-Files.html");
96 my $cnonce = int(time());
100 my $www_auth = shift;
102 $www_auth =~ /nonce=\"(.*?)\"/;
105 my $header = "Digest username=\"$username\", ";
106 $header .= "realm=\"$realm\", nonce=\"$nonce\", ";
107 $header .= "uri=\"/~DeathWolf/Recent/Latest-Files.html\", ";
108 $header .= "qop=\"auth\", nc=$reqid, cnonce=\"$cnonce\", ";
109 my $resp_digest = md5_hex
("$A1:$nonce:$reqid:$cnonce:auth:$A2");
110 $header .= "response=\"$resp_digest\"";
114 use constant CHUNK_SIZE
=> 16384;
118 $p = undef if (!defined($proxy) or $proxy =~ m!^socks://!);
119 POE
::Component
::Client
::HTTP
->spawn(
120 Agent
=> 'Emiya/0.2',
123 Streaming
=> CHUNK_SIZE
,
129 my $str = decode_utf8
(shift);
130 use bytes
; $str =~ s/%([0-9A-Fa-f]{2})/@{[chr(hex($1))]}/g; no bytes
;
131 decode_entities
($str);
136 my ($str, $maxlen) = @_;
138 return $str if length($str) <= $maxlen;
140 return undef if $maxlen < 3;
141 return "..." if $maxlen == 3;
142 return substr($str, 0, 1) . "..." if $maxlen == 4;
144 my $prelen = ($maxlen-3)/2;
145 my $postlen = ($maxlen-3) - $prelen;
148 while($prelen > int($prelen)) {
149 if (($toggle = !$toggle)) {
158 return substr($str, 0, $prelen) . "..." . substr($str, -$postlen);
162 my ($path, $maxlen) = @_;
163 if (length($path) > $maxlen) {
164 my @paths = split('/', $path);
165 if (scalar(@paths) < 2) {
166 $path = shorten
($path, $maxlen);
169 $maxlen -= length($path = shift(@paths) . "/") if length($paths[0]) < $maxlen/2;
171 my $count = scalar(@paths);
172 my $len = int($maxlen / $count);
173 my $extra = int($maxlen) % $count;
174 my $last = pop @paths;
177 $path .= shorten
("$_", $len - 1) . '/';
180 $path .= shorten
($last, $len + $extra);
186 POE
::Session
->create(
188 # Makes initial request for the Digest authentication
190 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
192 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
193 $heap->{progress_message
} = "Requesting List...";
194 printout
($heap->{progress_message
} . " ");
195 $kernel->post('ua', 'request', 'authenticate', $req, 0);
198 my ($kernel) = $_[KERNEL
];
199 $kernel->post('ua', 'shutdown');
201 # Authenticates and then actually fetch the list
202 authenticate
=> sub {
203 my $res = $_[ARG1
]->[0];
204 return if defined $_[ARG1
]->[1]; # Wait until the error page finishes downloading because of streaming
205 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
206 die $res->status_line unless ($res->status_line =~ /^(401|2)/);
207 unless ($res->is_success) {
208 my $header = make_digest
($res->header('WWW-Authenticate'));
209 $req->header('Authorization', $header);
211 $_[KERNEL
]->post('ua', 'request', 'stream_latest_files', $req, $reqid++, 'progress');
214 my ($heap, $done, $left) = ($_[HEAP
], @
{$_[ARG1
]}[0,1]);
215 my $msg = $heap->{progress_message
};
216 printout
(sprintf("%3.2f%%\r$msg ", $done * 100 / $left));
217 printout
("\n") if $done == $left;
219 # Puts the HTML in a temporary var
220 stream_latest_files
=> sub {
221 my ($kernel, $session, $heap, $res, $data) = (@_[KERNEL
, SESSION
, HEAP
], @
{$_[ARG1
]});
223 die $res->status_line unless ($res->is_success);
226 $heap->{latest_files_data
} = "" unless exists $heap->{latest_files_data
};
227 $heap->{latest_files_data
} .= $data;
229 $kernel->call($session, 'parse_latest_files');
230 $kernel->yield('load_completed_files');
231 $kernel->yield('filter_files');
234 # Parses the HTML, puts hashrefs in $_[HEAP]->{latest_files}
235 parse_latest_files
=> sub {
238 my $content = $heap->{latest_files_data
};
239 delete $heap->{latest_files_data
};
243 my @lines = split m/\n/, $content;
248 if (m!class="date".*?>(.+?)</td>!) {
250 } elsif (m!class="category".*?>(.+?)</td>!) {
251 $info->{category
} = $1;
252 } elsif (m!class="size".*?>(.+?)</td>!) {
254 } elsif (m!<td><a href="(.+?)">(.+?)</a></td>!) {
255 $info->{url
} = entity_clean
($1);
256 $info->{title
} = entity_clean
($2);
258 } elsif (m!^\s*</tr>! and $info->{url
}) {
264 $heap->{latest_files
} = \
@files;
266 # Loads the list of completed files
267 load_completed_files
=> sub {
269 $heap->{completed_files
} = [];
270 if (file_exists
("completed_files.txt")) {
271 my $in = file_open
("<", "completed_files.txt");
275 push @
{$heap->{completed_files
}}, decode_utf8
($_);
280 mark_as_completed
=> sub {
281 my $completed_files = file_open
(">>", "completed_files.txt");
282 print $completed_files encode_utf8
($_[ARG0
] . "\n");
283 close $completed_files;
285 # Filters the file list
286 filter_files
=> sub {
287 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
288 $heap->{allowed_files
} = [];
290 my $filecount = scalar(@
{$heap->{latest_files
}});
292 foreach my $info (@
{$heap->{latest_files
}}) {
293 printout
("\rFiltering " . $i++ . "/$filecount... ");
295 foreach (@
{$heap->{completed_files
}}) {
296 if ($info->{title
} eq $_) {
303 foreach my $group (keys %file_allow_filters) {
304 foreach my $filter (@
{$file_allow_filters{$group}}) {
305 if ($info->{$group} =~ /$filter/) {
313 foreach my $group (keys %file_deny_filters) {
314 foreach my $filter (@
{$file_deny_filters{$group}}) {
315 if ($info->{$group} =~ /$filter/) {
320 last unless $allowed;
323 push @
{$heap->{allowed_files
}}, $info if $allowed;
325 delete $heap->{latest_files
};
326 $kernel->yield('sort_allowed_files');
327 $kernel->yield('print_allowed_files');
328 $kernel->yield('download_entries');
330 sort_allowed_files
=> sub {
331 # TODO: actually sort them
332 # NOTE: the list is used in reverse order
334 print_allowed_files
=> sub {
336 my $a = @
{$heap->{allowed_files
}} > 0 ?
":" : ".";
337 printout
(@
{$heap->{allowed_files
}} . " entries accepted$a\n");
338 foreach (@
{$heap->{allowed_files
}}) {
339 printout
(" \"" . $_->{title
} . ($_->{url
} =~ m!/$! ?
"/" : "") . "\" (" . $_->{size
} . ")\n");
342 # This state is called after each file finishes downloading
343 download_entries
=> sub {
344 my ($kernel, $session, $heap) = @_[KERNEL
, SESSION
, HEAP
];
345 my $info = pop @
{$heap->{allowed_files
}};
347 if ($info->{url
} =~ /\/$/) {
348 $kernel->call($session, 'subfolder_recurse', $info);
350 $kernel->call($session, 'download_file', $info);
354 # Downloads folder listings
355 subfolder_recurse
=> sub {
356 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
358 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
359 $heap->{info
} = $info;
360 $heap->{progress_message
} = "Listing \"". $info->{title
} ."/\":";
361 $kernel->call('ua', 'request', 'stream_subfolder', $req, $reqid++, 'progress');
362 while (defined $heap->{info
}) { $kernel->run_one_timeslice(); }
363 $kernel->yield('download_entries');
365 stream_subfolder
=> sub {
366 my ($kernel, $heap, $res, $data) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
367 my $info = $heap->{info
};
369 die $res->status_line unless ($res->is_success);
371 # HACK: I dunno why I use md5_hex, but it looked like a good idea at the time
372 # HACK: A very bizarre and most certainly broken filename made perl whine, so I added encode_entities
374 $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))} = "" unless exists $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
375 $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))} .= $data;
377 $kernel->yield('subfolder_parse');
380 # Parses folder listings, pushes subitems to allowed_files, folders last
381 # This makes subfolders be checked before files
382 subfolder_parse
=> sub {
385 my $info = $heap->{info
};
386 my @lines = split m/\n/, $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
390 if (m!<td class="n"><a href="(.*?)">.*<td class="t">Directory</td>!) {
393 } elsif (m!<td class="n"><a href=".*?">(.*?)</a></td>.*<td class="s">(.*?)</td>!) {
394 push @files, [$1, $2];
398 while ($_ = pop @files) {
399 my $finfo = {%{$info}};
400 $finfo->{title
} = $info->{title
};
401 $finfo->{title
} .= '/' unless $finfo->{title
} =~ m
!/$!;
402 $finfo->{title
} .= entity_clean
($_->[0]);
403 $finfo->{url
} = $info->{url
} . entity_clean
($_->[0]),
404 $finfo->{size
} = $_->[1];
407 foreach (@
{$heap->{completed_files
}}) {
408 if ($finfo->{title
} eq $_) {
413 push @
{$heap->{allowed_files
}}, $finfo unless $skip;
415 # There is no trailing slash on the title
416 # There IS a trailing slash on folder URLs
418 my $subinfo = {%{$info}}; # make a copy
419 $subinfo->{title
} = $info->{title
};
420 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
421 $subinfo->{title
} .= entity_clean
($_);
422 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
423 $subinfo->{url
} = $info->{url
} . entity_clean
($_);
426 foreach (@
{$heap->{completed_files
}}) {
427 if ($subinfo->{title
} eq $_) {
432 push @
{$heap->{allowed_files
}}, $subinfo unless $skip;
435 delete $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
436 $heap->{info
} = undef;
437 delete $heap->{info
};
439 # Actually downloads files
440 download_file
=> sub {
441 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
443 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
444 $req->header (Accept_Ranges
=> "bytes");
445 $heap->{have_size
} = file_size
($info->{title
});
446 if ($heap->{have_size
}) {
447 $req->header (Range
=> "bytes=". $heap->{have_size
} ."-");
449 $heap->{have_size
} = 0;
451 my $sizelen = length($info->{size
}) + 2;
453 $heap->{progress_message
} = "\"".
454 shorten_path
($info->{title
}, $maxlen - $sizelen - $proglen - 3) ."\" (". $info->{size
} ."): ";
455 printout
($heap->{progress_message
});
457 $heap->{filename
} = $info->{title
};
459 $heap->{last_time
} = time;
460 $heap->{start_time
} = $heap->{last_time
};
463 $kernel->delay('calculate_speed' => 2);
464 $kernel->call('ua', 'request', 'stream_file', $req, $reqid++, 'file_progress');
466 # Does the actual file downloading
468 my ($kernel, $session, $heap, $req, $res, $data) =
469 (@_[KERNEL
, SESSION
, HEAP
, ARG0
], @
{$_[ARG1
]});
471 die $res->status_line unless $res->is_success or $res->status_line =~ /^416/;
473 if (defined($data)) {
474 unless ($heap->{outfh
} and $heap->{content_length
}) {
475 $heap->{filename
} =~ m!(.*)/!;
477 $heap->{outfh
} = file_open
('>>',$heap->{filename
}) or die "$^E";
478 $heap->{content_length
} = $res->header('Content-Length');
479 # This is probably wrong
480 if ($heap->{content_length
} == $heap->{have_size
}) {
481 close($heap->{outfh
});
482 $kernel->call($session, 'mark_as_completed', $heap->{filename
});
483 $kernel->post('ua', 'cancel', $req);
484 $kernel->yield('download_entries');
487 unless ($heap->{outfh
} && syswrite($heap->{outfh
}, $data)) {
491 unless ($res->is_success or $res->status_line =~ /^416/) { # Requested Range not satisfiable
492 die $res->status_line;
494 close($heap->{outfh
});
495 delete $heap->{outfh
};
496 $kernel->call($session, 'mark_as_completed', $heap->{filename
});
497 $kernel->call($session, 'move_completed', $heap->{filename
});
498 $kernel->yield('download_entries');
501 calculate_speed
=> sub {
502 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
505 if ($now == $heap->{last_time
}) {
506 $kernel->delay('calculate_speed' => 0.5);
509 my $spd = $heap->{oct} / 1024 / ($now - $heap->{last_time
});
510 unless (defined $heap->{speeds
}) {
511 $heap->{speeds
} = [];
512 $heap->{speeds_i
} = 0;
513 for(my $i = 0; $i < 8; $i++) { $heap->{speeds
}->[$i] = $spd; }
515 $heap->{speeds
}->[$heap->{speeds_i
}++] = $spd;
516 $heap->{speeds_i
} %= @
{$heap->{speeds
}};
520 foreach (@
{$heap->{speeds
}}) { $heap->{spd
} += $_; }
521 $heap->{spd
} /= @
{$heap->{speeds
}};
523 $heap->{last_time
} = time;
524 $kernel->delay('calculate_speed' => 0.5);
526 calculate_avg_speed
=> sub {
530 $heap->{spd
} = $heap->{len
} / 1024 / ($now - $heap->{start_time
});
532 # Prints the progress report
533 file_progress
=> sub {
534 my ($kernel, $heap, $pos, $len, $oct) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
537 $kernel->call($_[SESSION
], 'calculate_avg_speed');
538 my $line = sprintf("100.00%% (avg. %4.2f kB/s)", $heap->{spd
});
539 $heap->{prevlen
} = 0 unless $heap->{prevlen
};
540 printout
("\r" . $heap->{progress_message
} . "$line" . (" " x
($heap->{prevlen
} - length($line))) . "\n");
541 delete $heap->{have_size
};
542 delete $heap->{prev_time
};
543 delete $heap->{prevlen
};
545 delete $heap->{speeds
};
546 $heap->{speeds_i
} = 0;
547 $kernel->delay('calculate_speed');
549 my $have_size = exists $heap->{have_size
} ?
$heap->{have_size
} : 0;
551 my $spd = $heap->{spd
} ?
$heap->{spd
} : undef;
553 my $octlen = length($oct);
554 $heap->{oct} += $octlen;
555 $heap->{len
} += $octlen;
557 # $have_size may be a BigInt, use fixed point math / formatters
558 my $pct = ($pos + $have_size)*10000/($len + $have_size);
562 $line = sprintf("%2d.%02d%% (%4.2f kB/s)", $pct / 100, $pct % 100, $spd);
564 $line = sprintf("%2d.%02d%%", $pct / 100, $pct % 100);
566 $heap->{prevlen
} = length($line) unless defined $heap->{prevlen
} and $heap->{prevlen
} > length($line);
567 printout
("$line" . (" " x
($heap->{prevlen
} - length($line))) . "\r" . $heap->{progress_message
});
568 $heap->{prevlen
} = length($line);
571 move_completed
=> sub {
572 my $filename = $_[ARG0
];
576 $filename =~ m!^(.+)/(.+?)$!;
584 printout
("Moving \"$filename\" to \"$dest/$fname\"... ");
585 if (move
("$filename", "$dest/$fname")) {
597 # vim: set noexpandtab tabstop=4 shiftwidth=4 :