updated git and svn scripts
[xrzperl.git] / urlrule_task
blobe3877dfa22b21f09550d22e75b222854688d2695
1 #!/usr/bin/perl
2 # $Id$
3 use Cwd qw/getcwd/;
4 use lib $ENV{XR_PERL_MODULE_DIR};
5 use MyPlace::URLRule;
6 use MyPlace::ReEnterable;
7 use MyPlace::Script::Message;
8 use Cwd;
9 use strict;
10 use warnings;
12 #use MyPlace::URLRule::HostMap qw/%HOST_MAP get_task valid_hostname add_host get_hosts/;
14 my $phnd;
15 use constant {
16 TASK_FILE_RESUME => '.urlrule_task_resume',
17 TASK_FILE_LOG => 'urlrule_task.log',
20 my $TASK_LOGGING = 1;
21 my $TASK_DATABASE = "DATABASE.URL";
22 my $HOST_DATABASE = $URLRULE_DIRECTORY . "/database/HOSTS.URL";
23 my $U_DOWNLOADER = 'batchget --maxtime 600 -U -c -m 3 -f';
24 my $S_DOWNLOADER = 'batchget --maxtime 600 -c -m 3 -f';
25 my $TASK_WORKDIR = getcwd();;
26 my $TASK_FILEHANDLE_LOG;
27 use MyPlace::XConfig;
28 my $Names = MyPlace::XConfig->new();
29 $Names->read_plainfile($TASK_DATABASE);
31 sub change_directory {
32 my $path = shift;
33 return 1 unless($path);
34 if(! -d $path) {
35 app_message("Creating directory $path...");
36 if(mkdir $path) {
37 print STDERR "\t[OK]\n";
39 else {
40 print STDERR "\t[Failed]\n";
41 return undef;
44 app_message("Changing working directory to $path...");
45 if(chdir $path) {
46 print STDERR "\t[OK]\n";
48 else {
49 print STDERR "\t[Failed]\n";
50 return undef;
52 return 1;
55 ### NOT FINISHED
56 sub urlrule_process_task {
57 my $task = shift;
58 return 1 unless($task);
59 return 1 unless(ref $task);
60 my $do_dir = shift;
61 if($do_dir and $task->{work_dir}) {
62 return undef unless(change_directory($task->{work_dir}));
64 my $task_wd = getcwd();
65 if($task->{pass_data}) {
66 my $idx = 0;
67 foreach my $url (@{$task->{pass_data}}) {
68 if($task->{pass_name} and @{$task->{pass_name}}) {
69 my $sub_wd = $task->{pass_name}->[$idx];
70 next unless(change_directory($sub_wd));
74 return 1;
78 urlrule_set_callback('process_passdown',\&process_passdown_task);
79 sub process_passdown_task {
80 my($status2,$pass_count,@passdown)
81 = urlrule_process_result(@_);
82 if($status2 and $pass_count and $pass_count>0) {
83 my $CWD = getcwd;
84 foreach(@passdown) {
85 my($cwd,@args) = @{$_};
86 if(@args) {
87 $phnd->push($CWD,'load_rule',$cwd,@args);
90 chdir $CWD;
93 sub load_rule {
94 my $CWD = getcwd;
95 my ($status1,$rule,$result) = urlrule_process_args(@_);
96 if($status1) {
97 process_passdown_task($rule,$result);
99 chdir $CWD;
102 sub convert_records {
103 my ($Hosts,@records) = @_;
104 my @r;
105 foreach my $record (@records) {
106 foreach my $path (@{$record}) {
107 my($name,$id,$host) = @{$path};
108 next unless($name);
109 next unless($id);
110 next unless($host);
111 if($host =~ m/^#/) {
112 push @r,[$name,$id,$host];
113 next;
115 my $hostname = $host;
116 if($hostname =~ m/^([^\|]+)\|(.*)$/) {
117 $hostname = $1;
119 my ($url) = $Hosts->propget($hostname);
120 if($url) {
121 my ($level) = $Hosts->propget($hostname,$url);
122 my ($id_name,@id_text) = split(/\s*:\s*/,$id);
123 $url =~ s/###(?:ID|NAME])###/$id_name/g;
124 my $index = 0;
125 foreach(@id_text) {
126 $index++;
127 $url =~ s/###TEXT$index###/$_/g;
129 push @r,[$name,$id,$host,$url,$level];
131 else {
132 app_error("HOST $host isn't valid, or not defined in :\n\t$HOST_DATABASE\n");
136 return @r;
139 sub action_ECHO {
140 my $task = shift;
141 my($name,$id,$host,$url,$level) = @{$task};
142 $level=0 unless($level);
143 if($host =~ m/^#(.*)$/) {
144 app_message("$name/$1 = \'$id\'\n");
145 #app_message("$name/$1 $id\n");
147 else {
148 app_message("$name/$host [$level]$url\n");
150 return;
154 sub action_DUMP {
155 my $task = shift;
156 my($name,$id,$host,$url,$level) = @{$task};
157 my($status,$rule,$result,@msg) = urlrule_process_args(undef,$url,$level);
158 if($status) {
159 use Data::Dumper;
160 print Data::Dumper->Dump([$result],['*result']);
161 return 1;
163 else {
164 app_error("Error:",$rule,$result,@msg,"\n");
165 return undef;
168 sub action_DOWNLOAD {
169 my $task = shift;
170 my($name,$id,$host,$url,$level) = @{$task};
171 my $hostname;
172 if($host =~ /^[^\|]+\|(.+)$/)
174 $hostname = $1;
176 elsif($host =~ m/^#(.+)$/) {
177 app_message("$name/$1 = \'$id\'\n");
178 return 1;
180 else
182 $hostname = $host;
184 my $cwd = getcwd();
185 if(! -d "$name")
187 app_message("Create directory: $name\n");
188 mkdir $name;
190 if(! -d "$name/$hostname")
192 app_message("Create directory: $name/$hostname\n");
193 mkdir "$name/$hostname";
195 if(! -d "$name/$hostname")
197 app_error("Can't access $name/$hostname!\n");
198 return undef;
200 my $Names = MyPlace::XConfig->new();
201 $Names->read_plainfile($TASK_DATABASE);
202 $Names->propset(scalar(localtime),$name,$id,$host,"last");
203 print STDERR "Saving $TASK_DATABASE...\n";
204 $Names->write_plainfile($TASK_DATABASE);
205 app_message("Processing $name/$host\n");
206 &log("DOWNLOAD - $name/$host $url\[$level\]");
207 if($host =~ /google|baidu|yahoo|sogua/) {
208 load_rule("$name/$hostname",$url,$level,$U_DOWNLOADER);
210 else {
211 load_rule("$name/$hostname",$url,$level,$S_DOWNLOADER);
213 return 1;
215 sub log {
216 open $TASK_FILEHANDLE_LOG,">>",TASK_FILE_LOG;
217 print $TASK_FILEHANDLE_LOG scalar(localtime),": ",@_,"\n" if(@_);
218 close $TASK_FILEHANDLE_LOG;
222 sub sig_killed {
223 app_message("saving remained tasks...\n");
224 if($phnd->{lastStack}) {
225 $phnd->unshift(@{$phnd->{lastStack}});
227 chdir($TASK_WORKDIR) if($TASK_WORKDIR);
228 $phnd->saveToFile(TASK_FILE_RESUME);
229 app_message($phnd->length," task(s) saved to " . TASK_FILE_RESUME . "\n");
230 clean_up();
231 exit 1;
235 $SIG{INT} = \&sig_killed;
236 sub init {
237 $phnd= MyPlace::ReEnterable->new('main');
240 sub clean_up {
243 if(@ARGV) {
244 die("No DATA_FILE(" . $TASK_DATABASE . ") found\n") unless(-r $TASK_DATABASE);
245 my ($action,$query) = @ARGV;
246 if($action eq 'DUMP') {
248 elsif($action eq 'DOWNLOAD') {
250 elsif($action eq 'ECHO') {
252 else {
253 $query = $action;
254 $action = 'ECHO';
256 my $Hosts = MyPlace::XConfig->new();
257 $Hosts->read_plainfile($HOST_DATABASE);
258 my @targets = $Names->query($query);
259 if(!@targets) {
260 app_error("Nothing match query $query\n");
261 exit 1;
263 my @records = $Names->get_records(@targets);
264 my @tasks = convert_records($Hosts,\@records);
265 if($action eq 'DOWNLOAD') {
266 &init;
267 &log($action," - ",$query);
268 foreach(@tasks) {
269 $phnd->push($TASK_WORKDIR,"action_$action",$_);
272 else {
273 foreach(@tasks) {
274 eval("action_$action(\$_)");
276 exit 0;
279 else {
280 &init;
281 $phnd->loadFromFile(TASK_FILE_RESUME);
284 unless($phnd->length) {
285 app_error("Nothing to do!\n");
286 exit 0;
289 while(my $tasks = $phnd->length) {
290 app_error("[$tasks] tasks remained\n");
291 $phnd->run();
293 app_message "All tasks completed.\n";
294 unlink TASK_FILE_RESUME if(-f TASK_FILE_RESUME);
295 exit 0;