7 #sub POE::Kernel::ASSERT_DEFAULT () { 1 }
8 use POE
qw(Component::Client::HTTP);
13 use Digest
::MD5
'md5_hex';
14 use Time
::HiRes
'time';
16 # TODO: allow loading all those variables from a configuration file
18 # Required for using the script. If you're using this script,
19 # you already know what these are for and what you should put in here
23 # HTTP proxy. Must be in the form 'http://host:port'.
24 # Port is required even if 80.
25 # May work for SOCKS proxies by using the 'socks://' protocol, untested.
26 my $proxy = $ENV{http_proxy
};
30 # The main purpose of this script.
31 # Only files that pass any of the allow filters and fail all of the deny filters
34 # These are hashes to arrayrefs. Each key is used to filter a field in the $info
35 # hashref. Each element of the arrayrefs is treated as a case-sensitive regexp.
36 # The fields are title, url and category. There are other fields, but they are
37 # not really filterable through this method. You are welcome to try though.
38 # Example: title => ['Kara_no_Kyoukai', 'Black Lagoon\[niizk\]']
40 # Use [0-9A-Fa-f]{8} for CRC32s
41 # Use \d{1,3}v?\d? for episode numbers (catches v2s and v3s as well)
42 # Remember to escape all '.', '[' and ']' on filenames with a '\'.
43 my %file_allow_filters = ( title
=> [] );
44 my %file_deny_filters = ( url
=> [] );
46 # HTTP Digest auth hack
47 my $realm = "DeathWolf's Auto-Torrent Loving Realm";
48 my $A1 = md5_hex
("$username:$realm:$password");
49 my $A2 = md5_hex
("GET:/~DeathWolf/Recent/Latest-Files.html");
50 my $cnonce = int(time());
56 $www_auth =~ /nonce=\"(.*?)\"/;
59 my $header = "Digest username=\"$username\", ";
60 $header .= "realm=\"$realm\", nonce=\"$nonce\", ";
61 $header .= "uri=\"/~DeathWolf/Recent/Latest-Files.html\", ";
62 $header .= "qop=\"auth\", nc=$reqid, cnonce=\"$cnonce\", ";
63 my $resp_digest = md5_hex
("$A1:$nonce:$reqid:$cnonce:auth:$A2");
64 $header .= "response=\"$resp_digest\"";
68 use constant CHUNK_SIZE
=> 16384;
70 POE
::Component
::Client
::HTTP
->spawn(
73 Timeout
=> 45, # TODO: make it configurable
74 Streaming
=> CHUNK_SIZE
,
79 my $str = decode_utf8
(shift);
80 use bytes
; $str =~ s/%([0-9A-Fa-f]{2})/@{[chr(hex($1))]}/g; no bytes
;
81 decode_entities
($str);
85 ### Windows Unicode support wrappers
88 ### NOTE: ALL file manipulation files defined here *ONLY WORK WITH ABSOLUTE PATHS*
90 require Win32API
::File
; Win32API
::File
->import qw(:FuncW :Func :MOVEFILE_ :GENERIC_ :FILE_ FILE_END :FILE_SHARE_ OPEN_ALWAYS);
91 require Win32
::API
; Win32
::API
->import;
93 # Enable output of UTF-8 text
95 my $SetConsoleOutputCP = Win32
::API
->new('kernel32.dll', 'SetConsoleOutputCP',
96 'I', '') or die "SetConsoleOutputCP: $^E";
98 # if ($SetConsoleOutputCP->Call(65001)) {
99 # die "Error " . Win32::GetLastError () . " - " . Win32::FormatMessage (Win32::GetLastError ());
102 my $GetStdHandle = Win32
::API
->new('kernel32.dll', 'GetStdHandle',
103 'I', 'I') or die "GetStdHandle: $^E";
104 my $WriteConsoleW = Win32
::API
->new('kernel32.dll', 'WriteConsoleW',
105 'IPIPP', 'I') or die "WriteConsoleW: $^E";
107 use constant STD_OUTPUT_HANDLE
=> 0xfffffff5;
109 my $stdout = $GetStdHandle->Call(STD_OUTPUT_HANDLE
);
111 # Use Win32::API to load needed functions not defined by Win32API::File
113 use constant ERROR_NO_MORE_FILES
=> 18;
114 use constant INVALID_HANDLE_VALUE
=> -1;
116 typedef Win32
::API
::Struct FILETIME
=> qw{
118 DWORD dwHighDateTime
;
121 use constant FILE_ATTRIBUTE_READONLY
=> 0x00000001;
122 use constant FILE_ATTRIBUTE_HIDDEN
=> 0x00000002;
123 use constant FILE_ATTRIBUTE_SYSTEM
=> 0x00000004;
124 use constant FILE_ATTRIBUTE_DIRECTORY
=> 0x00000010;
125 use constant FILE_ATTRIBUTE_ARCHIVE
=> 0x00000020;
126 use constant FILE_ATTRIBUTE_NORMAL
=> 0x00000080;
127 use constant FILE_ATTRIBUTE_TEMPORARY
=> 0x00000100;
128 use constant FILE_ATTRIBUTE_COMPRESSED
=> 0x00000800;
129 use constant MAX_PATH
=> 260;
131 typedef Win32
::API
::Struct WIN32_FIND_DATAW
=> qw{
132 DWORD dwFileAttributes
;
133 FILETIME ftCreationTime
;
134 FILETIME ftLastAccessTime
;
135 FILETIME ftLastWriteTime
;
140 WCHAR cFileName
[520];
141 WCHAR cAlternateFileName
[28];
142 }; # 4 + 8 x 3 + 4 x 4 + 520 + 28 = 592 bytes
144 my $FindFirstFileW = Win32
::API
->new('kernel32.dll', 'FindFirstFileW',
145 'PS', 'N') or die "FindFirstFile: $^E";
146 my $FindNextFileW = Win32
::API
->new('kernel32.dll', 'FindNextFileW',
147 'NS', 'I') or die "FindNextFile $^E";
148 my $FindClose = Win32
::API
->new('kernel32.dll', 'FindClose',
149 'N', 'I') or die "FileClose $^E";
151 typedef Win32
::API
::Struct SHFILEOPSTRUCT
=> qw{
157 BOOL fAnyOperationsAborted
;
158 LPVOID hNameMappings
;
159 LPCTSTR lpszProgressTitle
;
162 my $SHFileOperationW = Win32
::API
->new('shell32.dll', 'SHFileOperationW',
163 'S', 'I') or die "SHFileOperation: $^E";
165 my $CreateDirectoryW = Win32
::API
->new('kernel32.dll',
166 'BOOL CreateDirectoryW(LPCWSTR lpPathName, LPVOID lpSecurityAttributes);') or die "CreateDirectoryW: $^E";
168 my $RemoveDirectoryW = Win32
::API
->new('kernel32.dll',
169 'BOOL RemoveDirectoryW(LPCWSTR lpPathName);') or die "RemoveDirectoryW: $^E";
173 return encode
("UTF-16LE", decode_utf8
("$str\0"));
180 return $count if $WriteConsoleW->Call($stdout, $str, (bytes
::length($str)-1)/2, $count, []);
188 unless (myexists
($path)) {
190 unless ($CreateDirectoryW->Call($enc->("$p"), undef)) {
192 $parent =~ s/\/[^\/]*$//;
193 if ($parent eq $path) {
196 unless (mymkpath
($parent)) {
199 # Parent made, try again making the child
200 unless ($CreateDirectoryW->Call($enc->("$p"), undef)) {
211 return GetFileAttributesW
($enc->("$p")) != fileConstant
("INVALID_FILE_ATTRIBUTES");
216 return 0 unless myexists
($p);
218 my $handle = CreateFileW
($enc->("$p"), 0, fileConstant
("FILE_SHARE_READ") |
219 fileConstant
("FILE_SHARE_WRITE"), [],
220 fileConstant
("OPEN_EXISTING"), 0, []);
221 my $size = getFileSize
($handle);
222 CloseHandle
($handle);
230 $file = $enc->("$file");
232 die "Not implemented";
234 my $handle = CreateFileW
($file, $mode eq '>>' ? FILE_APPEND_DATA
() : GENERIC_WRITE
(), FILE_SHARE_READ
(), [], OPEN_ALWAYS
(), 0, []);
235 setFilePointer
($handle, 0, FILE_END
());
243 return WriteFile
($handle, $data, 0, $outbytes, []) ?
$outbytes : undef;
251 DeleteFileW
($enc->(shift));
256 *myprint
= sub { print shift; };
257 *mymkpath
= sub { mkpath
(shift); };
258 *myexists
= sub { -e
shift; };
259 *mysize
= sub { -s
shift; };
260 *myopen
= sub { open ($_, shift, shift); };
261 *mysyswrite
= sub { syswrite(shift, shift); };
262 *myclose
= sub { close(shift); };
263 *myunlink
= sub { unlink(shift); };
267 my ($str, $maxlen) = @_;
269 return $str if length($str) <= $maxlen;
271 return undef if $maxlen < 3;
272 return "..." if $maxlen == 3;
273 return substr($str, 0, 1) . "..." if $maxlen == 4;
275 my $prelen = ($maxlen-3)/2;
276 my $postlen = ($maxlen-3) - $prelen;
279 while($prelen > int($prelen)) {
280 if (($toggle = !$toggle)) {
289 return substr($str, 0, $prelen) . "..." . substr($str, -$postlen);
293 my ($path, $maxlen) = @_;
294 if (length($path) > $maxlen) {
295 my @paths = split('/', $path);
296 if (scalar(@paths) < 2) {
297 $path = shorten
($path, $maxlen);
300 $maxlen -= length(($path = shift(@paths) . "/")) if length($paths[0]) < $maxlen/2;
302 my $count = scalar(@paths);
303 my $len = int($maxlen / $count);
304 my $extra = int($maxlen) % $count;
305 my $last = pop @paths;
308 $path .= shorten
("$_", $len - 1) . '/';
311 $path .= shorten
($last, $len + $extra);
317 POE
::Session
->create(
319 # Makes initial request for the Digest authentication
321 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
323 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
324 $heap->{progress_message
} = "Requesting List...";
325 myprint
($heap->{progress_message
} . " ");
326 $kernel->post('ua', 'request', 'authenticate', $req, 0);
329 my ($kernel) = $_[KERNEL
];
330 $kernel->post('ua', 'shutdown');
332 # Authenticates and then actually fetch the list
333 authenticate
=> sub {
334 my $res = $_[ARG1
]->[0];
335 return if defined $_[ARG1
]->[1]; # Wait until the error page finishes downloading because of streaming
336 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
337 die $res->status_line unless ($res->status_line =~ /^(401|2)/);
338 unless ($res->is_success) {
339 my $header = make_digest
($res->header('WWW-Authenticate'));
340 $req->header('Authorization', $header);
342 $_[KERNEL
]->post('ua', 'request', 'stream_latest_files', $req, $reqid++, 'progress');
345 my ($heap, $done, $left) = ($_[HEAP
], @
{$_[ARG1
]}[0,1]);
346 my $msg = $heap->{progress_message
};
347 myprint
(sprintf("%3.2f%%\r$msg ", $done * 100 / $left));
348 myprint
("\n") if $done == $left;
350 # Puts the HTML in a temporary var
351 stream_latest_files
=> sub {
352 my ($kernel, $session, $heap, $res, $data) = (@_[KERNEL
, SESSION
, HEAP
], @
{$_[ARG1
]});
354 die $res->status_line unless ($res->is_success);
357 $heap->{latest_files_data
} = "" unless exists $heap->{latest_files_data
};
358 $heap->{latest_files_data
} .= $data;
360 $kernel->call($session, 'parse_latest_files');
361 $kernel->yield('load_completed_files');
362 $kernel->yield('filter_files');
365 # Parses the HTML, puts hashrefs in $_[HEAP]->{latest_files}
366 parse_latest_files
=> sub {
369 my $content = $heap->{latest_files_data
};
370 delete $heap->{latest_files_data
};
374 # TODO: use a XML parser, saber now uses XHTML 1.0
375 my @lines = split m/\n/, $content;
380 if (m!class="date".*?>(.+?)</td>!) {
382 } elsif (m!class="category".*?>(.+?)</td>!) {
383 $info->{category
} = $1;
384 } elsif (m!class="size".*?>(.+?)</td>!) {
386 } elsif (m!<td><a href="(.+?)">(.+?)</a></td>!) {
387 $info->{url
} = entity_clean
($1);
388 $info->{title
} = entity_clean
($2);
390 } elsif (m!^\s*</tr>! and $info->{url
}) {
396 $heap->{latest_files
} = \
@files;
398 # Loads the list of completed files
399 load_completed_files
=> sub {
401 $heap->{completed_files
} = [];
402 if (myexists
("completed_files.txt")) {
403 open my $in, "<", "completed_files.txt";
407 push @
{$heap->{completed_files
}}, decode_utf8
($_);
412 mark_as_completed
=> sub {
413 open my $completed_files, ">>", "completed_files.txt";
414 print $completed_files encode_utf8
($_[ARG0
] . "\n");
415 close $completed_files;
417 # Filters the file list
418 filter_files
=> sub {
419 # TODO: allow filtering by batch contents (subfolders)
420 # TODO: allow filtering by min / max filesize
421 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
422 $heap->{allowed_files
} = [];
424 my $filecount = scalar(@
{$heap->{latest_files
}});
426 foreach my $info (@
{$heap->{latest_files
}}) {
427 myprint
("\rFiltering " . $i++ . "/$filecount... ");
429 foreach (@
{$heap->{completed_files
}}) {
430 if ($info->{title
} eq $_) {
437 foreach my $group (keys %file_allow_filters) {
438 foreach my $filter (@
{$file_allow_filters{$group}}) {
439 if (entity_clean
($info->{$group}) =~ /$filter/) {
447 foreach my $group (keys %file_deny_filters) {
448 foreach my $filter (@
{$file_deny_filters{$group}}) {
449 if (entity_clean
($info->{$group}) =~ /$filter/) {
454 last unless $allowed;
457 push @
{$heap->{allowed_files
}}, $info if $allowed;
459 delete $heap->{latest_files
};
460 $kernel->yield('sort_allowed_files');
461 $kernel->yield('print_allowed_files');
462 $kernel->yield('download_entries');
464 sort_allowed_files
=> sub {
465 # TODO: actually sort them
466 # NOTE: the list is used in reverse order
468 print_allowed_files
=> sub {
470 my $a = @
{$heap->{allowed_files
}} > 0 ?
":" : ".";
471 myprint
(@
{$heap->{allowed_files
}} . " entries accepted$a\n");
472 foreach (@
{$heap->{allowed_files
}}) {
473 myprint
(" \"" . $_->{title
} . ($_->{url
} =~ m!/$! ?
"/" : "") . "\" (" . $_->{size
} . ")\n");
476 # This state is called after each file finishes downloading
477 download_entries
=> sub {
478 my ($kernel, $session, $heap) = @_[KERNEL
, SESSION
, HEAP
];
479 my $info = pop @
{$heap->{allowed_files
}};
481 if ($info->{url
} =~ /\/$/) {
482 $kernel->call($session, 'subfolder_recurse', $info);
484 $kernel->call($session, 'download_file', $info);
488 # Downloads folder listings
489 subfolder_recurse
=> sub {
490 # TODO: write the folder name to completed_files.txt after getting all files
491 # and skip if the folder name is found
492 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
494 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
495 $heap->{info
} = $info;
496 $heap->{progress_message
} = "Listing \"". $info->{title
} ."/\":";
497 $kernel->call('ua', 'request', 'stream_subfolder', $req, $reqid++, 'progress');
498 while (defined $heap->{info
}) { $kernel->run_one_timeslice(); }
499 $kernel->yield('download_entries');
501 stream_subfolder
=> sub {
502 my ($kernel, $heap, $res, $data) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
503 my $info = $heap->{info
};
505 die $res->status_line unless ($res->is_success);
507 # HACK: I dunno why I use md5_hex, but it looked like a good idea at the time
508 # HACK: A very bizarre and most certainly broken filename made perl whine, so I added encode_entities
510 $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))} = "" unless exists $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
511 $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))} .= $data;
513 $kernel->yield('subfolder_parse');
516 # Parses folder listings, pushes subitems to allowed_files, folders last
517 # This makes subfolders be checked before files
518 subfolder_parse
=> sub {
521 my $info = $heap->{info
};
522 my @lines = split m/\n/, $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
526 if (m!<td class="n"><a href="(.*?)">.*<td class="t">Directory</td>!) {
529 } elsif (m!<td class="n"><a href=".*?">(.*?)</a></td>.*<td class="s">(.*?)</td>!) {
530 push @files, [$1, $2];
534 while ($_ = pop @files) {
535 my $finfo = {%{$info}};
536 $finfo->{title
} = $info->{title
};
537 $finfo->{title
} .= '/' unless $finfo->{title
} =~ m
!/$!;
538 $finfo->{title
} .= entity_clean
($_->[0]);
539 $finfo->{url
} = $info->{url
} . entity_clean
($_->[0]),
540 $finfo->{size
} = $_->[1];
543 foreach (@
{$heap->{completed_files
}}) {
544 if ($finfo->{title
} eq $_) {
549 push @
{$heap->{allowed_files
}}, $finfo unless $skip;
551 # There is no trailing slash on the title
552 # There IS a trailing slash on folder URLs
554 my $subinfo = {%{$info}}; # make a copy
555 $subinfo->{title
} = $info->{title
};
556 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
557 $subinfo->{title
} .= entity_clean
($_);
558 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
559 $subinfo->{url
} = $info->{url
} . entity_clean
($_);
562 foreach (@
{$heap->{completed_files
}}) {
563 if ($subinfo->{title
} eq $_) {
568 push @
{$heap->{allowed_files
}}, $subinfo unless $skip;
571 delete $heap->{"folder-" . md5_hex
(encode_entities
($info->{title
}))};
572 $heap->{info
} = undef;
573 delete $heap->{info
};
575 # Actually downloads files
576 download_file
=> sub {
577 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
579 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
580 $req->header (Accept_Ranges
=> "bytes");
581 $heap->{have_size
} = mysize
($info->{title
});
582 if ($heap->{have_size
}) {
583 $req->header (Range
=> "bytes=". $heap->{have_size
} ."-");
585 $heap->{have_size
} = 0;
587 # 120 is the max length of the string on-screen
588 # TODO: make length customizable and/or somehow detect from the terminal the optimal length
589 $heap->{progress_message
} = "\"" . shorten_path
($info->{title
}, 120) ."\" (". $info->{size
} ."): ";
590 myprint
($heap->{progress_message
});
592 $heap->{filename
} = $info->{title
};
594 $heap->{last_time
} = time;
595 $heap->{start_time
} = $heap->{last_time
};
598 $kernel->delay('calculate_speed' => 2);
599 $kernel->call('ua', 'request', 'stream_file', $req, $reqid++, 'file_progress');
601 # Does the actual file downloading
603 my ($kernel, $heap, $req, $res, $data) =
604 (@_[KERNEL
, HEAP
, ARG0
], @
{$_[ARG1
]});
606 die $res->status_line unless $res->is_success or $res->status_line =~ /^416/;
608 if (defined($data)) {
609 unless ($heap->{outfh
} and $heap->{content_length
}) {
610 $heap->{filename
} =~ m!(.*)/!;
612 $heap->{outfh
} = myopen
('>>',$heap->{filename
});
613 $heap->{content_length
} = $res->header('Content-Length');
614 # This is probably wrong
615 if ($heap->{content_length
} == $heap->{have_size
}) {
616 myclose
($heap->{outfh
});
617 $kernel->call($_[SESSION
], 'mark_as_completed', $heap->{filename
});
618 $kernel->post('ua', 'cancel', $req);
619 $kernel->yield('download_entries');
622 unless ($heap->{outfh
} && mysyswrite
($heap->{outfh
}, $data)) {
626 unless ($res->is_success or $res->status_line =~ /^416/) { # Requested Range not satisfiable
627 die $res->status_line;
629 $kernel->call($_[SESSION
], 'mark_as_completed', $heap->{filename
});
630 myclose
($heap->{outfh
});
631 delete $heap->{outfh
};
632 $kernel->yield('download_entries');
635 calculate_speed
=> sub {
636 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
639 if ($now == $heap->{last_time
}) {
640 $kernel->delay('calculate_speed' => 0.5);
643 my $spd = $heap->{oct} / 1024 / ($now - $heap->{last_time
});
644 unless (defined $heap->{speeds
}) {
645 $heap->{speeds
} = [];
646 $heap->{speeds_i
} = 0;
647 for(my $i = 0; $i < 8; $i++) { $heap->{speeds
}->[$i] = $spd; }
649 $heap->{speeds
}->[$heap->{speeds_i
}++] = $spd;
650 $heap->{speeds_i
} %= @
{$heap->{speeds
}};
654 foreach (@
{$heap->{speeds
}}) { $heap->{spd
} += $_; }
655 $heap->{spd
} /= @
{$heap->{speeds
}};
657 $heap->{last_time
} = time;
658 $kernel->delay('calculate_speed' => 0.5);
660 calculate_avg_speed
=> sub {
664 $heap->{spd
} = $heap->{len
} / 1024 / ($now - $heap->{start_time
});
666 # Prints the progress report
667 file_progress
=> sub {
668 my ($kernel, $heap, $pos, $len, $oct) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
671 $kernel->call($_[SESSION
], 'calculate_avg_speed');
672 my $line = sprintf("100.00%% (avg. %4.2f kB/s)", $heap->{spd
});
673 $heap->{prevlen
} = 0 unless $heap->{prevlen
};
674 myprint
("\r" . $heap->{progress_message
} . " $line" . (" " x
($heap->{prevlen
} - length($line))) . "\n");
675 delete $heap->{have_size
};
676 delete $heap->{prev_time
};
677 delete $heap->{prevlen
};
679 delete $heap->{speeds
};
680 $heap->{speeds_i
} = 0;
681 $kernel->delay('calculate_speed');
683 my $have_size = exists $heap->{have_size
} ?
$heap->{have_size
} : 0;
685 my $spd = $heap->{spd
} ?
$heap->{spd
} : undef;
687 my $octlen = length($oct);
688 $heap->{oct} += $octlen;
689 $heap->{len
} += $octlen;
691 # $have_size may be a BigInt, use fixed point math / formatters
692 my $pct = ($pos + $have_size)*10000/($len + $have_size);
696 $line = sprintf("%2d.%02d%% (%4.2f kB/s)", $pct / 100, $pct % 100, $spd);
698 $line = sprintf("%2d.%02d%%", $pct / 100, $pct % 100);
700 $heap->{prevlen
} = length($line) unless defined $heap->{prevlen
} and $heap->{prevlen
} > length($line);
701 myprint
("$line" . (" " x
($heap->{prevlen
} - length($line))) . "\r" . $heap->{progress_message
});
702 $heap->{prevlen
} = length($line);