updated git and svn scripts
[xrzperl.git] / xrbook_writeinfo
blob03369d0746bfdf0b026c784a4b33dc8f9c6c5ab9
1 #!/usr/bin/perl
2 ###APPNAME: xrbook_writeinfo.pl
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 04:58:02 2008
5 ###APPVER: 0.1
6 ###APPDESC: create bookinfo.xml
7 ###APPUSAGE: indexURL [directory]
8 ###APPEXAMPLE: xrbook_writeinfo http://book.sina.com.cn/nzt/lit/wodemingzijiaohong/index.shtml src
9 use strict;
10 use XML::Simple;
11 use Data::Dumper;
14 $ARGV[0]="-h" unless(@ARGV);
15 foreach(@ARGV){
16 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
19 die("Usage: $0 indexURL [directory]\n") unless(@ARGV);
21 sub getTextBetween {
22 my $text=shift;
23 my $begin=shift;
24 my $end=shift;
25 $end='[\<\n]' unless($end);
26 my @match = $text =~ m{$begin\s*([^\<\>]*)\s*$end}i;
27 return $match[0] if(@match);
28 return "";
31 my %book;
32 $book{URL}=$ARGV[0];
33 $book{saveto}= $ARGV[1];
35 my @match = ($book{URL} =~ /^(http:\/\/|)(.*\/)catalog\.php\?book=([0-9]+)/);
36 if(@match) {
37 $book{base}=$match[1];
38 $book{dirname}=$match[2];
40 else {
41 if($book{URL} =~ /\/$/) {
42 $book{URL} .= "index.shtml";
44 @match=($book{URL} =~ /^(http:\/\/|)([^\/]+)\/.*\/([^\/]+)\/[^\/]*$/);
45 if(@match) {
46 $book{base}=$match[1];
47 $book{dirname}=$match[2];
51 die("Invalid index URL:\n$book{URL}\n") unless(@match);
53 open(SOURCE,"-|", "netcat '$book{URL}' | iconv -f gb2312 -t utf8 -c")
54 or die("Cant't fork netcat | iconv\n");
55 my $TEXT="[^\<\>]*";
56 my @pages=(());
57 while(<SOURCE>) {
58 unless($book{title}) {
59 $book{title} = getTextBetween($_,'\<\s*title\s*\>');
60 $book{title} =~ s/_.*$//g;
61 $book{title} =~ s/.*(代表作|作品)://g;
63 unless($book{author}) {
64 $book{author} = getTextBetween($_,'\<span class=td_m\>作者:\<\/span\>\<a[^\>\<]*\>\<span class=td_m\>');
65 $book{author} = getTextBetween($_,"\<span\>作者:") unless($book{author});
67 unless($book{description}) {
68 $book{description} = getTextBetween($_,
69 '\<meta\s*name=\"*description\"*\s*content=\"',
70 '\"\s*\>');
72 unless($book{about}) {
73 $book{about} = getTextBetween($_,'\<span class=td_m\>');
75 unless($book{cover}) {
76 my @match = $_ =~ /src\s*=\s*(.*$book{dirname}.*\.jpg)/;
77 if(@match) {
78 $book{cover}=$match[0];
81 my @match = $_ =~ /\<a[^\<\>]*href\s*=\s*($TEXT$book{dirname}${TEXT}html)$TEXT\>($TEXT)\<\/a\>/i;
82 @match = $_ =~ /\<a[^\<\>]*href\s*=\s*(\/longbook\/$TEXT\.shtml)$TEXT\>($TEXT)\<\/a\>/i unless(@match);
83 if(@match) {
84 next if($match[0] =~ /index/);
85 $match[1] =~ s/^\s*[0-9]*//g;
86 $match[1] =~ s/^.\s*//;
87 $match[0] =~ s/\"//g;
88 push(@pages,{"NAME"=>$match[1],"URL"=>$match[0]});
92 $book{pages}=\@pages;
93 $book{title}=$book{dirname} unless($book{title});
94 if(! $book{saveto}) {
95 if($book{author}) {
96 $book{saveto}=$book{author} . "/" . $book{title};
97 if(! -d $book{author}) {mkdir $book{author} or die("$!\n");}
99 else {
100 $book{saveto}=$book{title};
102 if(! -d $book{saveto}) {mkdir $book{saveto} or die("$!\n");}
103 $book{saveto} .= "/src";
106 if(! -d $book{saveto}) {
107 mkdir $book{saveto} or die("$!\n");
109 chdir $book{saveto};
111 open fO,"|-","batchget -m 1 -n '$book{title}' -r '$book{URL}'";
112 for(my $i=0;$i<@pages;$i++) {
113 print fO "http://$book{base}$pages[$i]{URL}\n";
115 print fO $book{cover},"\n" if($book{cover});
116 print fO $book{URL},"\n";
117 close fO;
119 my $xml = XMLout(\%book,SuppressEmpty=>1,KeyAttr=>[],NoAttr=>1,NumericEscape=>0) or die("$!\n");
120 open fO,">","bookinfo.xml";
121 print fO $xml;
122 close fO;
123 print "Done saved in \"$book{saveto}\"\n";