updated git and svn scripts
[xrzperl.git] / batchget
blobbad0b60c93078d17b02c2713bc7798e97a32c167
1 #!/usr/bin/perl -w
2 # $Id$
3 use strict;
4 require v5.10.0;
5 our $VERSION = 'v0.3';
7 BEGIN
9 my $PROGRAM_DIR = $0;
10 $PROGRAM_DIR =~ s/[^\/\\]+$//;
11 $PROGRAM_DIR = "./" unless($PROGRAM_DIR);
12 unshift @INC,
13 map "$PROGRAM_DIR$_",qw{modules lib ../modules ..lib};
16 my %OPTS;
17 my @OPTIONS = qw/
18 help|h|? version|ver edit-me manual|man
19 autoname|a cookie|b=s directory|d ext|e=s
20 fullname|f logger|L=s maxtime|M=i maxtask|m=i
21 taskname|n=s referer|r=s workdir|w=s urlhist|U
22 no-clobber|nc|c numlen|i=i
25 if(@ARGV)
27 require Getopt::Long;
28 require MyPlace::Usage;
29 Getopt::Long::Configure('no_ignore_case');
30 Getopt::Long::GetOptions(\%OPTS,@OPTIONS);
31 MyPlace::Usage::Process(\%OPTS,$VERSION);
34 use MyPlace::ParallelRun;
35 use URI::Escape;
36 use MyPlace::Script::Message;
39 my $def_mul=3;
40 my $createdir = $OPTS{"directory"} ? $OPTS{"directory"} : 0;
41 my $maxtime = $OPTS{"maxtime"} ? $OPTS{"maxtime"} : undef;
42 my $muldown = $OPTS{"maxtask"} ? $OPTS{"maxtask"} : $def_mul;
43 my $taskname = $OPTS{"taskname"} ? $OPTS{"taskname"} : "";
44 my $autoname = $OPTS{"autoname"} ? $OPTS{"autoname"} : 0;
45 my $extname = $OPTS{"ext"} ? $OPTS{"ext"} : "";
46 my $workdir = $OPTS{"workdir"} ? $OPTS{"workdir"} : "";
47 my $refer = $OPTS{"referer"} ? $OPTS{"referer"} : "";
48 my $logger = $OPTS{"logging"} ? $OPTS{"logging"} : "";
49 my $cookie = $OPTS{"cookie"} ? $OPTS{"cookie"} : "";
50 my $number = $OPTS{"numlen"} ? $OPTS{"numlen"} : "";
51 my $fullname = $OPTS{"fullname"} ? 1 : 0;
52 my $urlhist = $OPTS{'urlhist'} ? 1 : 0;
53 my $URL_DATABASE_FILE = 'URLS.txt';
54 my %Local_Map;
56 $autoname="true" if($number);
57 unless($taskname) {
58 $taskname = shift(@ARGV) if(@ARGV);
60 $taskname = "" unless($taskname);
61 $muldown = 1 if( $muldown<1);
63 #if($workdir) {
64 # mkdir $workdir unless(-d $workdir);
65 # $workdir .= "/" unless($workdir =~ /\/$/);
66 # chdir($workdir);
69 my %URL_DATABASE;
71 sub load_database {
72 open FI,"<",$URL_DATABASE_FILE or return;
73 while(<FI>) {
74 chomp;
75 $URL_DATABASE{$_}=1;
77 close FI;
79 sub check_database {
80 my $url = shift;
81 if($URL_DATABASE{$url}) {
82 # use Data::Dumper;
83 # print STDERR Data::Dumper->Dump([\%URL_DATABASE],['*URL_DATABASE']),"\n";
84 # die($url);
85 return 1;
87 else {
88 $URL_DATABASE{$url}=1;
89 return undef;
92 sub save_database {
93 open FO,">",$URL_DATABASE_FILE or return;
94 foreach (keys %URL_DATABASE) {
95 print FO $_,"\n";
97 close FO;
99 sub Uniqname($) {
100 my $ext =shift;
101 my $max = 10000000000;
102 my $result;
103 do {
104 my $n1 = int (rand($max));
105 my $n2 = log($max / $n1)/log(10);
106 $result=$n1 . "0"x$n2 . $ext;
107 } until (! -f $result);
108 return $result;
110 sub GetFilename_Fullname {
111 my $result=shift;
112 $result =~ s/^.*:\/\///;
113 $result =~ s/[\/\?\:\\\*\&]/_/g;
114 $result =~ s/&//g;
115 return $result;
118 sub GetFilename_Auto {
119 my $URL=shift;
120 my $num=shift;
121 my $result;
122 #my $ext=$extname;
123 #($ext=$URL) =~ s/^.*\.([^\.]*)$/.$1/ unless($ext);
124 #$result=$num ? "$num$ext" : Uniqname($ext);
125 $result = $URL;
126 $result =~ s/^.*:\/\///;
127 $result =~ s/[\/\?\:\\\*\&]/_/g;
128 $result =~ s/&//g;
129 if(length($result)>=128) {
130 $result = substr($result,0,127);
132 $result = "$num.$result" if(defined $num);
133 if($createdir) {
134 my $dirname=$URL;
135 $dirname =~ s/^.*:\/*[^\/]*\///;
136 $dirname =~ s/\/[^\/]*//;
137 $dirname .= "/" if($dirname);
138 $result = $dirname . $result;
140 return $result;
142 sub GetFilename_NoAuto {
143 my $result=shift;
144 if($createdir) {
145 $result =~ s/^.*:\/*[^\/]*\///;
147 else {
148 $result =~ s/^.*\///;
150 return $result;
153 sub pushArgPair(\@$$) {
154 my $DARG=shift;
155 my $key=shift;
156 my $value=shift;
157 if($value) {
158 push @{$DARG},$key;
159 push @{$DARG},$value;
163 my %record;
164 sub inqueue {
165 my ($URL,$index,$count,$prefix)=@_;
166 my $curname= "[$prefix$index/$count]";
167 my $stridx = "0" x (length($count)-length($index)+1) . $index if($number);
168 if($record{$URL}) {
169 app_warning($curname . "Duplicated, $URL [Ignored]\n");
170 return undef;
172 $record{$URL} = 1;
173 my $filename =
174 $Local_Map{$URL} ?
175 $Local_Map{$URL}
176 : $fullname ?
177 GetFilename_Fullname($URL)
178 #GetFilename_Fullname(uri_unescape($URL))
179 : $autoname ?
180 GetFilename_Auto($URL,$stridx)
181 #GetFilename_Auto(uri_unescape($URL),$stridx)
182 : GetFilename_NoAuto($URL);
184 if($OPTS{"no-clobber"}) {
185 if(-f $workdir . $filename) {
186 app_warning($curname . "$URL\t[Ignored, TARGET EXISTS]\n");
187 return undef;
191 my $thisrefer= $refer ? $refer : $URL;
192 if($logger) {system($logger,$filename,$URL);}
193 my @DARG;
194 pushArgPair(@DARG,"-u",$URL);
195 pushArgPair(@DARG,"-s",$filename);
196 #pushArgPair(@DARG,"-s",$workdir . $filename);
197 pushArgPair(@DARG,"-n",$curname);
198 pushArgPair(@DARG,"-r",$thisrefer);
199 pushArgPair(@DARG,"-b",$cookie);
200 pushArgPair(@DARG,"-m",$maxtime);
201 push(@DARG,"-d");
202 para_queue "download",@DARG;
203 return 1;
207 my $prefix = $taskname ? $taskname . " " : "";
208 my @URLS;
209 my $index=0;
210 my $count=0;
213 sub set_workdir {
214 my $w = shift;
215 return undef unless($w);
216 if(! -d $w) {
217 system("mkdir","-p","--",$w) and die("$!\n");
219 chdir $w or die("$!\n");
220 return $w;
223 use Cwd;
224 my $PWD;
225 if($workdir) {
226 set_workdir($workdir);
228 $PWD = getcwd;
230 if($cookie) {
231 system("mkcookie '$cookie' >download.cookie");
232 $cookie="download.cookie";
235 load_database() if($urlhist);
236 para_init $muldown;
237 my $no_urls_count=0;
238 while(<STDIN>) {
239 chomp;
240 s/^\s+//;
241 s/\s+$//;
242 if(!$_) {
243 next;
245 elsif($_ =~ m/^#BATCHGET:/) {
246 $no_urls_count++;
247 push @URLS,$_;
248 $count++;
249 next;
251 elsif($_ =~ /([^\t]+)\t+([^\t]+)/) {
252 $Local_Map{$1} = $2;
253 $_ = $1;
254 $Local_Map{$_} =~ s/^(http|ftp|https):?\/*//gi;
255 $Local_Map{$_} =~ s/[\/:,\?\*\\]/_/g;
258 if(check_database($_)) {
259 app_warning("[Ignored, In DATABASE]$_\n");
260 next;
262 #if($urlhist) {
263 # next if $is_old;
265 #my $is_old = check_database($_);
266 #if($urlhist and $is_old) {
267 # app_warning("$_\t[Ingored, IN DATABASE]\n");
268 # next;
270 #check_database($_));
271 push @URLS,$_;
272 $count++;
273 #if(0 and &para_isfree()) { # disable pre processing! (08/03/2010 xiaoranzzz@gmail.com)
274 # my $URL = shift @URLS;
275 # $index++;
276 # &inqueue($URL,$index,$count,$prefix);
278 # print STDERR ("\r$count URLS enqueued... ");
280 $count = $count - $no_urls_count;
281 foreach(@URLS) {
282 if(m/^#BATCHGET:chdir:(.+)$/) {
283 my $w = $1;
284 $w =~ s/[:\?\*]+//g;
285 if($w) {
286 app_message('Change working directory to ' . "$w\n");
287 chdir $PWD or die("$!\n");
288 set_workdir($w);
291 else {
292 $index++;
293 if(inqueue($_,$index,$count,$prefix)) {
294 sleep 1;
298 chdir $PWD;
299 #or die("$!\n");
300 para_cleanup();
301 save_database() if($urlhist);
303 #print STDERR ("\n");
304 #exit 0 unless($count);
308 __END__
310 =pod
312 =head1 NAME
314 batchget - A batch mode downloader
316 =head1 SYNOPSIS
318 batchget [options] ...
320 cat url.lst | batchget
322 cat url.lst | batchget -a -d
324 =head1 OPTIONS
326 =over 12
328 =item B<-a,--autoname>
330 Use indexing of URLs as output filename
332 =item B<-b,--cookie>
334 Use cookie jar
336 =item B<-c,--nc,--no-clobber>
338 No clobber when target exists.
340 =item B<-d,--directory>
342 Create directories
344 =item B<-e,--ext>
346 Extension name for autonaming
348 =item B<-f,--fullname>
350 Use URL as output filename
352 =item B<-i,--numlen>
354 Number length for index filename
356 =item B<-M,--maxtime>
358 Max time for a single download process
360 =item B<-m,--maxtask>
362 Max number of simulatanous downloading task
364 =item B<-n,--taskname>
366 Task name
368 =item B<-r,--referer>
370 Global referer URL
372 =item B<-w,--workdir>
374 Global working directory
376 =item B<-U,--urlhist>
378 Use URL downloading history databasa
380 =item B<--version>
382 Print version infomation.
384 =item B<-h>,B<--help>
386 Print a brief help message and exits.
388 =item B<--manual>,B<--man>
390 View application manual
392 =item B<--edit-me>
394 Invoke 'editor' against the source
396 =back
398 =head1 DESCRIPTION
400 A downloader which can download multiple urls at the same time and/or in queue.
402 =head1 CHANGELOG
404 2007-10-28 xiaoranzzz <xiaoranzzz@myplace.hell>
406 * file created, version 0.1
408 2010-08-03 xiaoranzzz <xiaoranzzz@myplace.hell>
410 * update to version 0.2
412 =head1 AUTHOR
414 xiaoranzzz <xiaoranzzz@myplace.hell>
416 =cut