4 use lib
$ENV{XR_PERL_MODULE_DIR
};
6 use MyPlace
::ReEnterable
;
7 use MyPlace
::Script
::Message
;
12 #use MyPlace::URLRule::HostMap qw/%HOST_MAP get_task valid_hostname add_host get_hosts/;
16 TASK_FILE_RESUME
=> '.urlrule_task_resume',
17 TASK_FILE_LOG
=> 'urlrule_task.log',
21 my $TASK_DATABASE = "DATABASE.URL";
22 my $HOST_DATABASE = $URLRULE_DIRECTORY . "/database/HOSTS.URL";
23 my $U_DOWNLOADER = 'batchget --maxtime 600 -U -c -m 3 -f';
24 my $S_DOWNLOADER = 'batchget --maxtime 600 -c -m 3 -f';
25 my $TASK_WORKDIR = getcwd
();;
26 my $TASK_FILEHANDLE_LOG;
28 my $Names = MyPlace
::XConfig
->new();
29 $Names->read_plainfile($TASK_DATABASE);
31 sub change_directory
{
33 return 1 unless($path);
35 app_message
("Creating directory $path...");
37 print STDERR
"\t[OK]\n";
40 print STDERR
"\t[Failed]\n";
44 app_message
("Changing working directory to $path...");
46 print STDERR
"\t[OK]\n";
49 print STDERR
"\t[Failed]\n";
56 sub urlrule_process_task
{
58 return 1 unless($task);
59 return 1 unless(ref $task);
61 if($do_dir and $task->{work_dir
}) {
62 return undef unless(change_directory
($task->{work_dir
}));
64 my $task_wd = getcwd
();
65 if($task->{pass_data
}) {
67 foreach my $url (@
{$task->{pass_data
}}) {
68 if($task->{pass_name
} and @
{$task->{pass_name
}}) {
69 my $sub_wd = $task->{pass_name
}->[$idx];
70 next unless(change_directory
($sub_wd));
78 urlrule_set_callback
('process_passdown',\
&process_passdown_task
);
79 sub process_passdown_task
{
80 my($status2,$pass_count,@passdown)
81 = urlrule_process_result
(@_);
82 if($status2 and $pass_count and $pass_count>0) {
85 my($cwd,@args) = @
{$_};
87 $phnd->push($CWD,'load_rule',$cwd,@args);
95 my ($status1,$rule,$result) = urlrule_process_args
(@_);
97 process_passdown_task
($rule,$result);
102 sub convert_records
{
103 my ($Hosts,@records) = @_;
105 foreach my $record (@records) {
106 foreach my $path (@
{$record}) {
107 my($name,$id,$host) = @
{$path};
112 push @r,[$name,$id,$host];
115 my $hostname = $host;
116 if($hostname =~ m/^([^\|]+)\|(.*)$/) {
119 my ($url) = $Hosts->propget($hostname);
121 my ($level) = $Hosts->propget($hostname,$url);
122 my ($id_name,@id_text) = split(/\s*:\s*/,$id);
123 $url =~ s/###(?:ID|NAME])###/$id_name/g;
127 $url =~ s/###TEXT$index###/$_/g;
129 push @r,[$name,$id,$host,$url,$level];
132 app_error
("HOST $host isn't valid, or not defined in :\n\t$HOST_DATABASE\n");
141 my($name,$id,$host,$url,$level) = @
{$task};
142 $level=0 unless($level);
143 if($host =~ m/^#(.*)$/) {
144 app_message
("$name/$1 = \'$id\'\n");
145 #app_message("$name/$1 $id\n");
148 app_message
("$name/$host [$level]$url\n");
156 my($name,$id,$host,$url,$level) = @
{$task};
157 my($status,$rule,$result,@msg) = urlrule_process_args
(undef,$url,$level);
160 print Data
::Dumper
->Dump([$result],['*result']);
164 app_error
("Error:",$rule,$result,@msg,"\n");
168 sub action_DOWNLOAD
{
170 my($name,$id,$host,$url,$level) = @
{$task};
172 if($host =~ /^[^\|]+\|(.+)$/)
176 elsif($host =~ m/^#(.+)$/) {
177 app_message
("$name/$1 = \'$id\'\n");
187 app_message
("Create directory: $name\n");
190 if(! -d
"$name/$hostname")
192 app_message
("Create directory: $name/$hostname\n");
193 mkdir "$name/$hostname";
195 if(! -d
"$name/$hostname")
197 app_error
("Can't access $name/$hostname!\n");
200 my $Names = MyPlace
::XConfig
->new();
201 $Names->read_plainfile($TASK_DATABASE);
202 $Names->propset(scalar(localtime),$name,$id,$host,"last");
203 print STDERR
"Saving $TASK_DATABASE...\n";
204 $Names->write_plainfile($TASK_DATABASE);
205 app_message
("Processing $name/$host\n");
206 &log("DOWNLOAD - $name/$host $url\[$level\]");
207 if($host =~ /google|baidu|yahoo|sogua/) {
208 load_rule
("$name/$hostname",$url,$level,$U_DOWNLOADER);
211 load_rule
("$name/$hostname",$url,$level,$S_DOWNLOADER);
216 open $TASK_FILEHANDLE_LOG,">>",TASK_FILE_LOG
;
217 print $TASK_FILEHANDLE_LOG scalar(localtime),": ",@_,"\n" if(@_);
218 close $TASK_FILEHANDLE_LOG;
223 app_message
("saving remained tasks...\n");
224 if($phnd->{lastStack
}) {
225 $phnd->unshift(@
{$phnd->{lastStack
}});
227 chdir($TASK_WORKDIR) if($TASK_WORKDIR);
228 $phnd->saveToFile(TASK_FILE_RESUME
);
229 app_message
($phnd->length," task(s) saved to " . TASK_FILE_RESUME
. "\n");
235 $SIG{INT
} = \
&sig_killed
;
237 $phnd= MyPlace
::ReEnterable
->new('main');
244 die("No DATA_FILE(" . $TASK_DATABASE . ") found\n") unless(-r
$TASK_DATABASE);
245 my ($action,$query) = @ARGV;
246 if($action eq 'DUMP') {
248 elsif($action eq 'DOWNLOAD') {
250 elsif($action eq 'ECHO') {
256 my $Hosts = MyPlace
::XConfig
->new();
257 $Hosts->read_plainfile($HOST_DATABASE);
258 my @targets = $Names->query($query);
260 app_error
("Nothing match query $query\n");
263 my @records = $Names->get_records(@targets);
264 my @tasks = convert_records
($Hosts,\
@records);
265 if($action eq 'DOWNLOAD') {
267 &log($action," - ",$query);
269 $phnd->push($TASK_WORKDIR,"action_$action",$_);
274 eval("action_$action(\$_)");
281 $phnd->loadFromFile(TASK_FILE_RESUME
);
284 unless($phnd->length) {
285 app_error
("Nothing to do!\n");
289 while(my $tasks = $phnd->length) {
290 app_error
("[$tasks] tasks remained\n");
293 app_message
"All tasks completed.\n";
294 unlink TASK_FILE_RESUME
if(-f TASK_FILE_RESUME
);