7 #sub POE::Kernel::ASSERT_DEFAULT () { 1 }
8 use POE
qw(Component::Client::HTTP);
13 use Digest
::MD5
'md5_hex';
14 use Time
::HiRes
'time';
16 # TODO: allow loading all those variables from a configuration file
18 # Required for using the script. If you're using this script,
19 # you already know what these are for and what you should put in here
23 # HTTP proxy. Must be in the form 'http://host:port'.
24 # Port is required even if 80.
25 # May work for SOCKS proxies by using the 'socks://' protocol, untested.
26 my $proxy = $ENV{http_proxy
};
30 # The main purpose of this script.
31 # Only files that pass any of the allow filters and fail all of the deny filters
34 # These are hashes to arrayrefs. Each key is used to filter a field in the $info
35 # hashref. Each element of the arrayrefs is treated as a case-sensitive regexp.
36 # The fields are title, url and category. There are other fields, but they are
37 # not really filterable through this method. You are welcome to try though.
38 # Example: title => ['Kara_no_Kyoukai', 'Black Lagoon\[niizk\]']
40 # Use [0-9A-Fa-f]{8} for CRC32s
41 # Use \d{1,3}v?\d? for episode numbers (catches v2s and v3s as well)
42 # Remember to escape all '.', '[' and ']' on filenames with a '\'.
43 my %file_allow_filters = ( title
=> [] );
44 my %file_deny_filters = ( url
=> [] );
46 # HTTP Digest auth hack
47 my $realm = "DeathWolf's Auto-Torrent Loving Realm";
48 my $A1 = md5_hex
("$username:$realm:$password");
49 my $A2 = md5_hex
("GET:/~DeathWolf/Recent/Latest-Files.html");
50 my $cnonce = int(time());
56 $www_auth =~ /nonce=\"(.*?)\"/;
59 my $header = "Digest username=\"$username\", ";
60 $header .= "realm=\"$realm\", nonce=\"$nonce\", ";
61 $header .= "uri=\"/~DeathWolf/Recent/Latest-Files.html\", ";
62 $header .= "qop=\"auth\", nc=$reqid, cnonce=\"$cnonce\", ";
63 my $resp_digest = md5_hex
("$A1:$nonce:$reqid:$cnonce:auth:$A2");
64 $header .= "response=\"$resp_digest\"";
68 use constant CHUNK_SIZE
=> 16384;
70 POE
::Component
::Client
::HTTP
->spawn(
73 Timeout
=> 45, # TODO: make it configurable
74 Streaming
=> CHUNK_SIZE
,
79 my $str = decode_utf8
(shift);
80 use bytes
; $str =~ s/%([0-9A-Fa-f]{2})/@{[chr(hex($1))]}/g; no bytes
;
81 decode_entities
($str);
85 ### Windows Unicode support wrappers
88 ### NOTE: ALL file manipulation files defined here *ONLY WORK WITH ABSOLUTE PATHS*
90 require Win32API
::File
; Win32API
::File
->import qw(:FuncW :Func :MOVEFILE_ :GENERIC_ :FILE_ FILE_END :FILE_SHARE_ OPEN_ALWAYS);
91 require Win32
::API
; Win32
::API
->import;
93 # Enable output of UTF-8 text
95 my $SetConsoleOutputCP = Win32
::API
->new('kernel32.dll', 'SetConsoleOutputCP',
96 'I', '') or die "SetConsoleOutputCP: $^E";
98 # if ($SetConsoleOutputCP->Call(65001)) {
99 # die "Error " . Win32::GetLastError () . " - " . Win32::FormatMessage (Win32::GetLastError ());
102 my $GetStdHandle = Win32
::API
->new('kernel32.dll', 'GetStdHandle',
103 'I', 'I') or die "GetStdHandle: $^E";
104 my $WriteConsoleW = Win32
::API
->new('kernel32.dll', 'WriteConsoleW',
105 'IPIPP', 'I') or die "WriteConsoleW: $^E";
107 use constant STD_OUTPUT_HANDLE
=> 0xfffffff5;
109 my $stdout = $GetStdHandle->Call(STD_OUTPUT_HANDLE
);
111 # Use Win32::API to load needed functions not defined by Win32API::File
113 use constant ERROR_NO_MORE_FILES
=> 18;
114 use constant INVALID_HANDLE_VALUE
=> -1;
116 typedef Win32
::API
::Struct FILETIME
=> qw{
118 DWORD dwHighDateTime
;
121 use constant FILE_ATTRIBUTE_READONLY
=> 0x00000001;
122 use constant FILE_ATTRIBUTE_HIDDEN
=> 0x00000002;
123 use constant FILE_ATTRIBUTE_SYSTEM
=> 0x00000004;
124 use constant FILE_ATTRIBUTE_DIRECTORY
=> 0x00000010;
125 use constant FILE_ATTRIBUTE_ARCHIVE
=> 0x00000020;
126 use constant FILE_ATTRIBUTE_NORMAL
=> 0x00000080;
127 use constant FILE_ATTRIBUTE_TEMPORARY
=> 0x00000100;
128 use constant FILE_ATTRIBUTE_COMPRESSED
=> 0x00000800;
129 use constant MAX_PATH
=> 260;
131 typedef Win32
::API
::Struct WIN32_FIND_DATAW
=> qw{
132 DWORD dwFileAttributes
;
133 FILETIME ftCreationTime
;
134 FILETIME ftLastAccessTime
;
135 FILETIME ftLastWriteTime
;
140 WCHAR cFileName
[520];
141 WCHAR cAlternateFileName
[28];
142 }; # 4 + 8 x 3 + 4 x 4 + 520 + 28 = 592 bytes
144 my $FindFirstFileW = Win32
::API
->new('kernel32.dll', 'FindFirstFileW',
145 'PS', 'N') or die "FindFirstFile: $^E";
146 my $FindNextFileW = Win32
::API
->new('kernel32.dll', 'FindNextFileW',
147 'NS', 'I') or die "FindNextFile $^E";
148 my $FindClose = Win32
::API
->new('kernel32.dll', 'FindClose',
149 'N', 'I') or die "FileClose $^E";
151 typedef Win32
::API
::Struct SHFILEOPSTRUCT
=> qw{
157 BOOL fAnyOperationsAborted
;
158 LPVOID hNameMappings
;
159 LPCTSTR lpszProgressTitle
;
162 my $SHFileOperationW = Win32
::API
->new('shell32.dll', 'SHFileOperationW',
163 'S', 'I') or die "SHFileOperation: $^E";
165 my $CreateDirectoryW = Win32
::API
->new('kernel32.dll',
166 'BOOL CreateDirectoryW(LPCWSTR lpPathName, LPVOID lpSecurityAttributes);') or die "CreateDirectoryW: $^E";
168 my $RemoveDirectoryW = Win32
::API
->new('kernel32.dll',
169 'BOOL RemoveDirectoryW(LPCWSTR lpPathName);') or die "RemoveDirectoryW: $^E";
173 return encode
("UTF-16LE", decode_utf8
("$str\0"));
180 return $count if $WriteConsoleW->Call($stdout, $str, (bytes
::length($str)-1)/2, $count, []);
188 unless (myexists
($path)) {
190 unless ($CreateDirectoryW->Call($enc->("$p"), undef)) {
192 $parent =~ s/\/[^\/]*$//;
193 if ($parent eq $path) {
196 unless (mymkpath
($parent)) {
199 # Parent made, try again making the child
200 unless ($CreateDirectoryW->Call($enc->("$p"), undef)) {
211 return GetFileAttributesW
($enc->("$p")) != fileConstant
("INVALID_FILE_ATTRIBUTES");
216 return 0 unless myexists
($p);
218 my $handle = CreateFileW
($enc->("$p"), 0, fileConstant
("FILE_SHARE_READ") |
219 fileConstant
("FILE_SHARE_WRITE"), [],
220 fileConstant
("OPEN_EXISTING"), 0, []);
221 my $size = getFileSize
($handle);
222 CloseHandle
($handle);
230 $file = $enc->("$file");
232 die "Not implemented";
234 my $handle = CreateFileW
($file, $mode eq '>>' ? FILE_APPEND_DATA
() : GENERIC_WRITE
(), FILE_SHARE_READ
(), [], OPEN_ALWAYS
(), 0, []);
235 setFilePointer
($handle, 0, FILE_END
());
243 return WriteFile
($handle, $data, 0, $outbytes, []) ?
$outbytes : undef;
251 DeleteFileW
($enc->(shift));
256 *myprint
= sub { print shift; };
257 *mymkpath
= sub { mkpath
(shift); };
258 *myexists
= sub { -e
shift; };
259 *mysize
= sub { -s
shift; };
260 *myopen
= sub { open ($_, shift, shift); };
261 *mysyswrite
= sub { syswrite(shift, shift); };
262 *myclose
= sub { close(shift); };
263 *myunlink
= sub { unlink(shift); };
267 my ($str, $maxlen) = @_;
269 return $str if length($str) <= $maxlen;
271 return undef if $maxlen < 3;
272 return "..." if $maxlen == 3;
273 return substr($str, 0, 1) . "..." if $maxlen == 4;
275 my $prelen = ($maxlen-3)/2;
276 my $postlen = ($maxlen-3) - $prelen;
279 while($prelen > int($prelen)) {
280 if (($toggle = !$toggle)) {
289 return substr($str, 0, $prelen) . "..." . substr($str, -$postlen);
293 my ($path, $maxlen) = @_;
294 if (length($path) > $maxlen) {
295 my @paths = split('/', $path);
296 if (scalar(@paths) < 2) {
297 $path = shorten
($path, $maxlen);
300 $maxlen -= length(($path = shift(@paths) . "/")) if length($paths[0]) < $maxlen/2;
302 my $count = scalar(@paths);
303 my $len = int($maxlen / $count);
304 my $extra = int($maxlen) % $count;
305 my $last = pop @paths;
308 $path .= shorten
("$_", $len - 1) . '/';
311 $path .= shorten
($last, $len + $extra);
317 POE
::Session
->create(
319 # Makes initial request for the Digest authentication
321 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
322 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
323 $heap->{progress_message
} = "Requesting List...";
324 myprint
($heap->{progress_message
} . " ");
325 $kernel->post('ua', 'request', 'authenticate', $req, 0);
328 $_[KERNEL
]->post('ua', 'shutdown');
330 # Authenticates and theen actually fetch the list
331 authenticate
=> sub {
332 my $res = $_[ARG1
]->[0];
333 return if defined $_[ARG1
]->[1]; # Wait until the error page finishes downloading because of streaming
334 my $req = HTTP
::Request
->new(GET
=> 'http://saber.kawaii-shoujo.net/~DeathWolf/Recent/Latest-Files.html');
335 die $res->status_line unless ($res->status_line =~ /^(401|2)/);
336 unless ($res->is_success) {
337 my $header = make_digest
($res->header('WWW-Authenticate'));
338 $req->header('Authorization', $header);
340 $_[KERNEL
]->post('ua', 'request', 'stream_latest_files', $req, $reqid++, 'progress');
343 my ($heap, $done, $left) = ($_[HEAP
], @
{$_[ARG1
]}[0,1]);
344 my $msg = $heap->{progress_message
};
345 myprint
(sprintf("%3.2f%%\r$msg ", $done * 100 / $left));
346 myprint
("\n") if $done == $left;
348 # Puts the HTML in a temporary var
349 stream_latest_files
=> sub {
350 my ($kernel, $session, $heap, $res, $data) = (@_[KERNEL
, SESSION
, HEAP
], @
{$_[ARG1
]});
352 die $res->status_line unless ($res->is_success);
355 $heap->{latest_files_data
} = "" unless exists $heap->{latest_files_data
};
356 $heap->{latest_files_data
} .= $data;
358 $kernel->call($session, 'parse_latest_files');
359 $kernel->yield('load_completed_files');
360 $kernel->yield('filter_files');
363 # Parses the HTML, puts hashrefs in $_[HEAP]->{latest_files}
364 parse_latest_files
=> sub {
367 my $content = $heap->{latest_files_data
};
368 delete $heap->{latest_files_data
};
372 # TODO: use a XML parser, saber now uses XHTML 1.0
373 my @lines = split m/\n/, $content;
378 if (m!class="date".*?>(.+)</td>!) {
380 } elsif (m!class="category".*?>(.+)</td>!) {
381 $info->{category
} = $1;
382 } elsif (m!class="size".*?>(.+)</td>!) {
384 } elsif (m!<td><a href="(.+)">(.+)</a></td>!) {
385 $info->{url
} = entity_clean
($1);
386 $info->{title
} = entity_clean
($2);
388 } elsif (m!^\s*</tr>! and $info->{url
}) {
394 $heap->{latest_files
} = \
@files;
396 # Loads the list of completed files
397 load_completed_files
=> sub {
399 $heap->{completed_files
} = [];
400 if (myexists
("completed_files.txt")) {
401 open my $in, "<", "completed_files.txt";
404 push @
{$heap->{completed_files
}}, decode_utf8
($_);
409 mark_as_completed
=> sub {
410 open my $completed_files, ">>", "completed_files.txt";
411 print $completed_files encode_utf8
($_[ARG0
] . "\n");
412 close $completed_files;
414 # Filters the file list
415 filter_files
=> sub {
416 # TODO: allow filtering by batch contents (subfolders)
417 # TODO: allow filtering by min / max filesize
418 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
419 $heap->{allowed_files
} = [];
421 my $filecount = scalar(@
{$heap->{latest_files
}});
423 foreach my $info (@
{$heap->{latest_files
}}) {
424 myprint
("\rFiltering " . $i++ . "/$filecount... ");
426 foreach (@
{$heap->{completed_files
}}) {
427 if ($info->{title
} eq $_) {
434 foreach my $group (keys %file_allow_filters) {
435 foreach my $filter (@
{$file_allow_filters{$group}}) {
436 if (entity_clean
($info->{$group}) =~ /$filter/) {
444 foreach my $group (keys %file_deny_filters) {
445 foreach my $filter (@
{$file_deny_filters{$group}}) {
446 if (entity_clean
($info->{$group}) =~ /$filter/) {
451 last unless $allowed;
454 push @
{$heap->{allowed_files
}}, $info if $allowed;
456 delete $heap->{latest_files
};
457 $kernel->yield('sort_allowed_files');
458 $kernel->yield('print_allowed_files');
459 $kernel->yield('download_entries');
461 sort_allowed_files
=> sub {
462 # TODO: actually sort them
463 # NOTE: the list is used in reverse order
465 print_allowed_files
=> sub {
467 my $a = @
{$heap->{allowed_files
}} > 0 ?
":" : ".";
468 myprint
(@
{$heap->{allowed_files
}} . " entries accepted$a\n");
469 foreach (@
{$heap->{allowed_files
}}) {
470 myprint
(" \"" . $_->{title
} . ($_->{url
} =~ m!/$! ?
"/" : "") . "\" (" . $_->{size
} . ")\n");
473 # This state is called after each file finishes downloading
474 download_entries
=> sub {
475 my ($kernel, $session, $heap) = @_[KERNEL
, SESSION
, HEAP
];
476 my $info = pop @
{$heap->{allowed_files
}};
478 if ($info->{url
} =~ /\/$/) {
479 $kernel->call($session, 'subfolder_recurse', $info);
481 $kernel->call($session, 'download_file', $info);
485 # Downloads folder listings
486 subfolder_recurse
=> sub {
487 # TODO: write the folder name to completed_files.txt after getting all files
488 # and skip if the folder name is found
489 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
491 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
492 $heap->{info
} = $info;
493 $heap->{progress_message
} = "Listing \"". $info->{title
} ."/\":";
494 $kernel->call('ua', 'request', 'stream_subfolder', $req, $reqid++, 'progress');
495 while (defined $heap->{info
}) { $kernel->run_one_timeslice(); }
496 $kernel->yield('download_entries');
498 stream_subfolder
=> sub {
499 my ($kernel, $heap, $res, $data) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
500 my $info = $heap->{info
};
502 die $res->status_line unless ($res->is_success);
504 # HACK: I dunno why I use md5_hex, but it looked like a good idea at the time
506 $heap->{"folder-" . md5_hex
($info->{title
})} = "" unless exists $heap->{"folder-" . md5_hex
($info->{title
})};
507 $heap->{"folder-" . md5_hex
($info->{title
})} .= $data;
509 $kernel->yield('subfolder_parse');
512 # Parses folder listings, pushes subitems to allowed_files, folders last
513 # This makes subfolders be checked before files
514 subfolder_parse
=> sub {
517 my $info = $heap->{info
};
518 my @lines = split m/\n/, $heap->{"folder-" . md5_hex
($info->{title
})};
522 if (m!<td class="n"><a href="(.*?)">.*<td class="t">Directory</td>!) {
525 } elsif (m!<td class="n"><a href=".*?">(.*?)</a></td>.*<td class="s">(.*?)</td>!) {
526 push @files, [$1, $2];
530 while ($_ = pop @files) {
531 my $finfo = {%{$info}};
532 $finfo->{title
} = $info->{title
};
533 $finfo->{title
} .= '/' unless $finfo->{title
} =~ m
!/$!;
534 $finfo->{title
} .= entity_clean
($_->[0]);
535 $finfo->{url
} = $info->{url
} . entity_clean
($_->[0]),
536 $finfo->{size
} = $_->[1];
539 foreach (@
{$heap->{completed_files
}}) {
540 if ($finfo->{title
} eq $_) {
545 push @
{$heap->{allowed_files
}}, $finfo unless $skip;
547 # There is no trailing slash on the title
548 # There IS a trailing slash on folder URLs
550 my $subinfo = {%{$info}}; # make a copy
551 $subinfo->{title
} = $info->{title
};
552 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
553 $subinfo->{title
} .= entity_clean
($_);
554 $subinfo->{title
} .= '/' unless $subinfo->{title
} =~ m
!/$!;
555 $subinfo->{url
} = $info->{url
} . entity_clean
($_);
558 foreach (@
{$heap->{completed_files
}}) {
559 if ($subinfo->{title
} eq $_) {
564 push @
{$heap->{allowed_files
}}, $subinfo unless $skip;
567 delete $heap->{"folder-" . md5_hex
($info->{title
})};
568 $heap->{info
} = undef;
569 delete $heap->{info
};
571 # Actually downloads files
572 download_file
=> sub {
573 my ($kernel, $heap, $info) = @_[KERNEL
, HEAP
, ARG0
];
575 my $req = HTTP
::Request
->new(GET
=> $info->{url
});
576 $req->header (Accept_Ranges
=> "bytes");
577 $heap->{have_size
} = mysize
($info->{title
});
578 if ($heap->{have_size
}) {
579 $req->header (Range
=> "bytes=". $heap->{have_size
} ."-");
581 $heap->{have_size
} = 0;
583 # 120 is the max length of the string on-screen
584 # TODO: make length customizable and/or somehow detect from the terminal the optimal length
585 $heap->{progress_message
} = "\"" . shorten_path
($info->{title
}, 120) ."\" (". $info->{size
} ."): ";
586 myprint
($heap->{progress_message
});
588 $heap->{filename
} = $info->{title
};
590 $heap->{last_time
} = time;
591 $heap->{start_time
} = $heap->{last_time
};
594 $kernel->delay('calculate_speed' => 2);
595 $kernel->call('ua', 'request', 'stream_file', $req, $reqid++, 'file_progress');
597 # Does the actual file downloading
599 my ($kernel, $heap, $req, $res, $data) =
600 (@_[KERNEL
, HEAP
, ARG0
], @
{$_[ARG1
]});
602 die $res->status_line unless $res->is_success or $res->status_line =~ /^416/;
604 if (defined($data)) {
605 unless ($heap->{outfh
} and $heap->{content_length
}) {
606 $heap->{filename
} =~ m!(.*)/!;
608 $heap->{outfh
} = myopen
('>>',$heap->{filename
});
609 $heap->{content_length
} = $res->header('Content-Length');
610 # This is probably wrong
611 if ($heap->{content_length
} == $heap->{have_size
}) {
612 myclose
($heap->{outfh
});
613 $kernel->call($_[SESSION
], 'mark_as_completed', $heap->{filename
});
614 $kernel->post('ua', 'cancel', $req);
615 $kernel->yield('download_entries');
618 unless ($heap->{outfh
} && mysyswrite
($heap->{outfh
}, $data)) {
622 unless ($res->is_success or $res->status_line =~ /^416/) { # Requested Range not satisfiable
623 die $res->status_line;
625 $kernel->call($_[SESSION
], 'mark_as_completed', $heap->{filename
});
626 myclose
($heap->{outfh
});
627 delete $heap->{outfh
};
628 $kernel->yield('download_entries');
631 calculate_speed
=> sub {
632 my ($kernel, $heap) = @_[KERNEL
, HEAP
];
635 if ($now == $heap->{last_time
}) {
636 $kernel->delay('calculate_speed' => 0.5);
639 my $spd = $heap->{oct} / 1024 / ($now - $heap->{last_time
});
640 unless (defined $heap->{speeds
}) {
641 $heap->{speeds
} = [];
642 $heap->{speeds_i
} = 0;
643 for(my $i = 0; $i < 8; $i++) { $heap->{speeds
}->[$i] = $spd; }
645 $heap->{speeds
}->[$heap->{speeds_i
}++] = $spd;
646 $heap->{speeds_i
} %= @
{$heap->{speeds
}};
650 foreach (@
{$heap->{speeds
}}) { $heap->{spd
} += $_; }
651 $heap->{spd
} /= @
{$heap->{speeds
}};
653 $heap->{last_time
} = time;
654 $kernel->delay('calculate_speed' => 0.5);
656 calculate_avg_speed
=> sub {
660 $heap->{spd
} = $heap->{len
} / 1024 / ($now - $heap->{start_time
});
662 # Prints the progress report
663 file_progress
=> sub {
664 my ($kernel, $heap, $pos, $len, $oct) = (@_[KERNEL
, HEAP
], @
{$_[ARG1
]});
667 $kernel->call($_[SESSION
], 'calculate_avg_speed');
668 my $line = sprintf("100.00%% (avg. %4.2f kB/s)", $heap->{spd
});
669 $heap->{prevlen
} = 0 unless $heap->{prevlen
};
670 myprint
("\r" . $heap->{progress_message
} . " $line" . (" " x
($heap->{prevlen
} - length($line))) . "\n");
671 delete $heap->{have_size
};
672 delete $heap->{prev_time
};
673 delete $heap->{prevlen
};
675 delete $heap->{speeds
};
676 $heap->{speeds_i
} = 0;
677 $kernel->delay('calculate_speed');
679 my $have_size = exists $heap->{have_size
} ?
$heap->{have_size
} : 0;
681 my $spd = $heap->{spd
} ?
$heap->{spd
} : undef;
683 my $octlen = length($oct);
684 $heap->{oct} += $octlen;
685 $heap->{len
} += $octlen;
687 # $have_size may be a BigInt, use fixed point math / formatters
688 my $pct = ($pos + $have_size)*10000/($len + $have_size);
692 $line = sprintf("%2d.%02d%% (%4.2f kB/s)", $pct / 100, $pct % 100, $spd);
694 $line = sprintf("%2d.%02d%%", $pct / 100, $pct % 100);
696 $heap->{prevlen
} = length($line) unless defined $heap->{prevlen
} and $heap->{prevlen
} > length($line);
697 myprint
("$line" . (" " x
($heap->{prevlen
} - length($line))) . "\r" . $heap->{progress_message
});
698 $heap->{prevlen
} = length($line);