updated git and svn scripts
[xrzperl.git] / tidywenku2
blobf0beeae48025a9018b7c470ac7c01a9343e16b92
1 #!/usr/bin/perl -w
2 ###APPNAME: tidywenku2
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Fri Oct 5 12:37:53 2007
5 ###APPVER: 0.2
6 ###APPDESC: 将文心阁小说转为UTF8,并整理页面
7 ###APPUSAGE: [目录名]
8 ###APPEXAMPLE: tidywenk2 "在人间"
9 ###APPOPTION:
10 use strict;
11 $ARGV[0]="-h" unless(@ARGV);
12 foreach(@ARGV){
13 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
16 my @PAT;
17 my @REP;
19 sub buildRule($$) {
20 push @PAT,shift;
21 push @REP,shift;
24 #my $SIGN="<div align=right id=sign>Reproduced by <b>Xiaoranzzz<\/b><br>" . `date` . "<\/div>";
25 my $SIGN='<div align="right" id="sign">WenKuV2.0 Geek@MYPLACE</b><br>' . `date` . '</div>';
27 buildRule qr//,'';
28 buildRule qr/--.*<\/title>/,'</title>';
29 buildRule qr/<script src=.*<\/script>/,"";
30 buildRule qr/gb2312/,"utf-8";
31 buildRule qr/<body .* marginwidth="20">/,'<body class="mainBody">' . "\n";
32 buildRule qr/class='middle'/,'class="mtext"';
33 buildRule qr/.*<a href='(http\:\/\/www\.wenku\.com|\/)'>.*$/i,"$SIGN";
34 buildRule qr/<script language='JavaScript'>/,"\n" . '<!--/';
35 buildRule qr/<\/script>/,"-->\n" .
36 '<script language="JavaScript" src="wenku.js"></script>' . "\n" .
37 '<script language="JavaScript" src="file:///c:/windows/wenku.js"></script>' . "\n" .
38 '<script language="JavaScript" src="file:///etc/wenku.js"></script>' . "\n";
39 buildRule qr/<link[^<>]*new.css[^<>]*>/,
40 '<link rel="stylesheet" href="wenku.css" type="text/css">' . "\n" .
41 '<link rel="stylesheet" href="file:///c:/windows/wenku.css" type="text/css">' . "\n" .
42 '<link rel="stylesheet" href="file:///etc/wenku.css" type="text/css">' . "\n";
44 buildRule qr/<table[^<>]*width=[\'\"]*100%[^><]*>/,'<table id="headTable">';
45 buildRule qr/<table[^<>]*width=[\'\"]*93%[^<>]*>/,'<table id="contentTable">';
46 buildRule qr/<td[^<>]*width=[\"\']*80%[^<>]*>/,'<td id="contentText">';
47 buildRule qr/class=\"title\"/,'id="bookTitle"';
48 buildRule qr/<center><\/center>/,'';
49 buildRule qr/<hr[^<>]*>/,'';
51 my $TEMPFILE=`mktemp`;
53 sub GB2UTF($) {
54 my $filename=shift;
55 return 0 unless(-f "$filename");
56 if (system("iconv","-c","-f","gb18030","-t","utf8","-o","$TEMPFILE","--","$filename")==0){
57 return 1;
59 else {
60 print STDERR "Error while convert $filename from gb2312 to utf8\n";
61 return 0;
64 sub TidyFile($) {
65 my $filename=shift;
66 if(GB2UTF($filename)) {
67 open(TEMP,"<",$TEMPFILE) or die("$!\n");
68 open(FH,">",$filename) or die("$!\n");
69 while(<TEMP>) {
70 my $LINE=$_;
71 $LINE =~ s#<a.*bookroom.php.*>(.*)<\/a>》#<b id="title">$1</b>#;
72 for(my $i=0;$i<@PAT;$i++) {
73 $LINE =~ s/$PAT[$i]/$REP[$i]/g;
75 print FH ($LINE,"\n");
77 close(TEMP);
78 close(FH);
83 foreach my $cur(@ARGV) {
84 if(-f $cur) {
85 print STDERR ("$cur ... \r");
86 TidyFile $cur;
87 print STDERR ("$cur [Done] \n");
89 elsif(-d $cur) {
90 foreach my $sub(glob("$cur/*")) {
91 if( -f $sub) {
92 print STDERR ("$sub ... \r");
93 TidyFile $sub;
96 print STDERR ("$cur [Done] \n");
98 else {
99 print STDERR ("$cur [Ingored]\n");