Initial commit
[www-rapidshare-free.git] / lib / WWW / Rapidshare / Free.pm
blob9ec7f5b2ddf5c5ffdaf03d32bd77ed2ee1911415
1 package WWW::Rapidshare::Free;
3 use strict;
5 BEGIN {
6 $^W = 1;
7 $| = 1;
10 use WWW::Mechanize;
11 use HTML::Form;
12 use HTML::Parser;
13 use Data::Validate::URI qw( is_http_uri );
14 use Carp qw( croak );
15 use Exporter;
17 our $VERSION = '0.01';
19 our @ISA = qw( Exporter );
20 our @EXPORT_OK = qw( links add_links add_links_from_file check_links download
21 verbose connection clear_links );
22 our %EXPORT_TAGS = ( all => \@EXPORT_OK );
24 my $parser = HTML::Parser->new(
25 api_version => 3,
26 start_h => [ \&_start, 'tagname, attr' ],
27 text_h => [ \&_text, 'text' ],
28 end_document_h => [ \&_end_document ],
30 my $mech = WWW::Mechanize->new;
31 my ( $tagname, $class, $verbose, $counter, $check_links, $error, @links,
32 @download_links )
33 = ( '', '', 1, 0 );
34 my $delay = 120; # An explicit value which will most likely be overwritten
36 my @text;
38 # Gets the tagname and also marks the start of the error tag:
39 # <div class='klappbox'>
40 sub _start {
41 $tagname = shift;
42 $class = 'klappbox'
43 if ( exists $_[0]->{'class'} && $_[0]->{'class'} eq 'klappbox' );
46 # Stores contents of <script> tag and also stores the error message
47 sub _text {
48 my $text = shift;
49 push @text, $text if $tagname eq 'script';
50 if ( $class eq 'klappbox' ) {
51 if ( $counter == 1 ) {
52 $text =~ s/\s+$//;
53 $error = $text;
55 elsif ( $counter == 2 ) {
56 $class = '';
57 $counter = 0;
59 $counter++;
63 # Fetches the `time to wait'
64 sub _end_document {
65 @download_links = ();
66 map {
67 $delay = $1 if /var c=(\d+)/;
68 push @download_links, $1
69 if m#
70 '<input\ \ type="radio"\ name="mirror"\
71 onclick="document.dlf.action=\\'(.+)\\'
72 #x;
73 } map { split /\n/, $_ } @text;
74 @text = ();
77 sub links { return @links }
79 sub clear_links {
80 my @temp = @links;
81 @links = ();
82 return @temp;
85 sub check_links {
86 $check_links = 1;
87 my @erroneous_links = &download;
88 return @erroneous_links;
91 sub download {
92 my %callbacks = @_;
93 my @erroneous_links;
94 my $index = 0;
96 for (@links) {
97 my $link = $_;
98 $mech->get($link);
99 my @forms = HTML::Form->parse( $mech->content, $mech->base );
101 my ( $dl, $file_name );
102 if (@forms) {
103 my $response = $mech->request( $forms[0]->click );
104 $parser->parse( $response->content );
105 $parser->eof;
107 $dl = shift @download_links
108 or croak
109 ' Simultaneous downloads are not available for free users';
110 ( $file_name = $dl ) =~ s{.*/}{};
112 else {
113 ( $counter, $error ) = ( 0, '' );
114 $parser->parse( $mech->content );
115 $parser->eof;
116 push @erroneous_links, [ $link, $error ];
117 splice @links, $index, 1;
118 next;
121 unless ( defined $check_links ) {
122 if ( defined $callbacks{'delay'} ) {
123 &_delay( $delay, $callbacks{'delay'} );
125 else {
126 &_delay($delay);
127 print "\r";
129 open my $fh, '>', $file_name
130 or croak "$file_name cannot be opened for output";
131 my ( $output, $file_size, $next_so_far ) = ( 0, 0, 0 );
132 $mech->get(
133 $dl,
134 ':content_cb' => sub {
135 my ( $chunk, $response ) = @_;
136 unless ($file_size) {
137 $file_size = $response->content_length;
138 &{ $callbacks{'properties'} }( $file_name, $file_size )
139 if defined( $callbacks{'properties'} );
141 $output += length $chunk;
142 print {$fh} $chunk;
143 &_progress( $output, $file_size ) if $verbose;
144 &{ $callbacks{'progress'} }($output)
145 if defined( $callbacks{'progress'} );
149 &_progress( $file_size, $file_size ) if $verbose;
150 if ( -e $file_name && defined( $callbacks{'file_complete'} ) ) {
151 &{ $callbacks{'file_complete'} }(1);
154 $index++;
156 if ( defined $check_links ) {
157 undef $check_links;
158 return @erroneous_links;
162 my $prev_size = 0;
164 # Print a fancy progress bar
165 sub _progress {
166 my ( $output, $max ) = @_;
167 my $current = ( $output / $max ) * 100;
168 printf
169 "\rProgress:\t %4.2f%% [ %4.2f MB / %4.2f MB ]",
170 $current, $output / ( 1024 * 1024 ), $max / ( 1024 * 1024 );
171 $prev_size = $output;
174 # Filter links
175 sub _store_links {
176 my $link = shift;
177 unless ( /^\s*#/
178 || !is_http_uri($link)
179 || !m#^http://(?:www.)?rapidshare.com/# )
180 { # Ignore comments
181 push @links, $link;
182 return 1;
184 else {
185 return 0;
189 sub add_links {
190 my @added_links;
191 map { push @added_links, $_ if &_store_links($_) } @_;
192 return @added_links;
195 sub add_links_from_file {
196 my $file_name = shift;
197 my @added_links;
198 open my ($fh), '<', $file_name
199 or croak "$file_name cannot be opened for input";
200 if ( -f $file_name ) {
201 while (<$fh>) {
202 chomp;
203 push @added_links, $_ if &_store_links($_);
206 else { croak " $file_name does not exist or is not a file" }
207 return @added_links;
210 sub verbose { $verbose = shift }
212 sub connection {
213 my %connection = @_;
214 if ( exists $connection{'reconnect'} ) {
215 system $connection{'reconnect'};
217 elsif ( keys %connection != 2 ) {
218 croak ' Incorrect number of parametres';
220 else {
221 system $connection{'disconnect'};
222 system $connection{'connect'};
226 # Fancy delay
227 sub _delay {
228 my ( $delay, $callback ) = @_;
229 for ( my $i = $delay ; $i >= 0 ; $i-- ) {
230 sleep 1;
231 if ($verbose) {
232 printf "\rTime Left:\t %3d", $i;
234 else {
235 &$callback($i);
240 1; # End of WWW::Rapidshare::Free
242 __END__
245 =head1 NAME
247 WWW::Rapidshare::Free - Automates downloading from Rapidshare.com and checking links for free users
249 =head1 VERSION
251 Version 0.01
253 =head1 SYNOPSIS
255 use strict;
256 use warnings;
257 use WWW::Rapidshare::Free qw( verbose add_links check_links
258 download connection );
260 # We are going to let the module be verbose and display a delay metre and
261 # progress bar.
262 verbose(1);
264 my @links = add_links(
266 http://rapidshare.com/files/175658683/perl-51.zip
267 http://rapidshare.com/files/175662062/perl-52.zip
271 print "Added links:\n";
272 map print("\t$_\n"), @links;
274 my @erroneous_links = check_links;
275 map {
276 my ( $uri, $error ) = @{$_};
277 print "URI: $uri\nError: $error\n";
278 } @erroneous_links;
280 download(
281 properties => \&properties,
282 file_complete => \&file_complete,
285 sub properties {
286 my ( $file_name, $file_size ) = @_;
287 print "Filename: $file_name\nFile size: $file_size bytes\n";
290 sub file_complete {
291 # Let us restart the modem. I have updated my /etc/sudoers file to allow me
292 # to execute sudo pppoe-start and sudo pppoe-stop without a password.
293 connection(
294 connect => 'sudo pppoe-start',
295 disconnect => 'sudo pppoe-stop',
299 =head1 FUNCTIONS
301 By default, the module does not export any function. An export tag C<all> has
302 been defined to export all functions. The following functions can be exported:
304 =over 4
306 =item * add_links
308 Adds links to be downloaded and returns the added links as an array. Accepts an
309 array of values as argument. Ignores commented links (links that start with a
310 C<#>) and invalid links.
312 =item * add_links_from_file
314 Adds links from a file which is given as an argument and returns the added
315 links as an array. Ignores commented links (links that start with a C<#>) and
316 invalid links.
318 =item * links
320 Returns current links which have been added by C<add_links> or
321 C<add_links_from_file>.
323 =item * clear_links
325 Clears current links and returns them as an array.
327 =item * check_links
329 Checks if the links are alive or not. Returns an array of array references if
330 there are dead links. The latter arrays are of the form
331 C<[ link, error message ]>. If all links are alive, returns false. Additionally
332 it also removes the dead links.
334 my @erroneous_links = check_links;
335 map {
336 my ( $uri, $error ) = @{$_};
337 print "URI: $uri\nError: $error\n";
338 } @erroneous_links;
341 =item * download
343 Downloads files off valid links. Accepts a hash with a maximum of four keys
344 having callbacks as their values. The hash should be of the form:
347 delay => \&delay_callback,
348 properties => \&properties_callback,
349 progress => \&progress_callback,
350 file_complete => \&file_complete
354 Callbacks are passed values as follows:
356 =over 4
358 =item * C<delay>
360 C<delay> callback is passed the number of seconds until download begins. It is
361 called every second until the delay is zero. Delay is decremented each time the
362 callback is executed.
364 =item * C<properties>
366 C<properties> is passed the file name and file size as two arguments.
368 =item * C<progress>
370 Sole argument is the number of bytes of the current file downloaded so far. This
371 callback is executed every instant in which data is written to the file which is
372 being downloaded.
374 =item * C<file_complete>
376 This callback passes control after each file is downloaded.
377 Disconnection/connection establishment or reconnection is possible by invoking
378 C<connection>.
380 =back
382 =item * verbose
384 Controls the output verbosity. Pass it a false value such as 0 or '' (empty
385 string) to turn off the delay metre and progress bar. Everything else turns on
386 verbosity. Verbosity is true by default.
388 =item * connection
390 Most useful within the callback of C<download> pertaining to the hash key
391 C<file_complete>. Accepts a hash:
393 connection(
394 connect => '', # Command to start a connection
395 disconnect => '', # Command to disconnect
396 reconnect => '' # Command to reconnect
399 Either both C<connect> and C<disconnect> have to be specified, or C<reconnect>
400 has to be specified. If a single command can reconnect, then a value for
401 C<reconnect> will be apt, else C<connect> and C<disconnect> should be assigned
402 the respective commands to connect and disconnect. The commands should be your
403 operating system's commands to connect/disconnect/reconnect the internet
404 connection.
406 Windows users can use the rasdial utility to connect/disconnect:
407 L<http://technet.microsoft.com/en-us/library/bb490979.aspx>.
409 =back
411 Check C<download.pl> file inside C<example> directory for usage example of the
412 module.
414 =head1 AUTHOR
416 Alan Haggai Alavi, C<< <alanhaggai at alanhaggai.org> >>
418 =head1 BUGS
420 Please report any bugs or feature requests to
421 C<bug-www-rapidshare-free at rt.cpan.org>, or through the web interface at
422 L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-Rapidshare-Free>. I will
423 be notified, and then you will automatically be notified of progress on your
424 bug as I make changes.
426 =head1 SUPPORT
428 You can find documentation for this module with the perldoc command.
430 perldoc WWW::Rapidshare::Free
433 You can also look for information at:
435 =over 4
437 =item * RT: CPAN's request tracker
439 L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-Rapidshare-Free>
441 =item * AnnoCPAN: Annotated CPAN documentation
443 L<http://annocpan.org/dist/WWW-Rapidshare-Free>
445 =item * CPAN Ratings
447 L<http://cpanratings.perl.org/d/WWW-Rapidshare-Free>
449 =item * Search CPAN
451 L<http://search.cpan.org/dist/WWW-Rapidshare-Free/>
453 =back
455 =head1 COPYRIGHT & LICENSE
457 Copyright 2008 Alan Haggai Alavi, all rights reserved.
459 This program is free software; you can redistribute it and/or modify it
460 under the same terms as Perl itself.
462 =cut