2 ###APPNAME: sinabook_maketext
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 05:01:19 2008
6 ###APPDESC: create plain-text from bookinfo.xml created by sinabook_download
7 ###APPUSAGE: <bookinfo directory> [text directory] [chapter directory name]\n\t"text directory" and "chapter directory name" may be specified using following text substitions:\n\t%t\t-\tbook title\n\t%a\t-\tbook author\n
8 ###APPEXAMPLE: sinabook_maketext booksrc "%a" "%t"
12 $ARGV[0]="-h" unless(@ARGV);
14 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
21 $TXTD="." unless($TXTD);
24 $CHAD = "txt" unless($CHAD);
26 my $xmlfile = "$WD/bookinfo.xml";
27 die("File not exist: $xmlfile\n") unless(-f
$xmlfile);
29 binmode STDERR
,":utf8";
31 my $book = XMLin
("$xmlfile",SuppressEmpty
=>1,NoAttr
=>1,KeyAttr
=>[]);
32 #print Dumper($book),"\n";
34 $TXTD =~ s/%a/$book->{author}/g;
35 $TXTD =~ s/%t/$book->{title}/g;
36 $TXTD="." unless($TXTD);
37 $CHAD =~ s/%a/$book->{author}/g;
38 $CHAD =~ s/%t/$book->{title}/g;
39 $CHAD = "txt" unless($CHAD);
45 open FI
,"-|:utf8","sinabook_h2t '$fn'";
46 my $result=join("",<FI
>);
52 mkdir "$TXTD" or die("$!\n");
55 if(!-d
"$TXTD/$CHAD") {
56 mkdir "$TXTD/$CHAD" or die("$!\n");
59 my $header = $book->{title
} . "\n\n";
60 $header .= " -- " . $book->{author
} . "\n\n" if($book->{author
});
61 $header .= "\t" . $book->{about
} . "\n" . "=" x
80 . "\n\n" if($book->{about
});
63 my $fulltxt = "$TXTD" . "/" . $book->{title
} . ".txt";
65 print STDERR
"Generating book header ...\n";
69 open FO
,">:utf8","$TXTD/$CHAD/00 - about.txt";
73 open FTXT
,">:utf8",$fulltxt;
77 @pages = @
{$book->{pages
}} if($book->{pages
});
81 for(my $i=0;$i<@pages;$i++) {
82 my $pagename = $pages[$i]{NAME
};
83 $pagename =~ s/\s+$//g;
84 $pagename =~ s/\(\d+\)$//g;
85 $index++ unless($pagename eq $lastpage);
86 my $pageindex = sprintf("%.2d",$index);
87 print STDERR
"Generating $TXTD/$CHAD/$pageindex - $pagename.txt ...\n";
88 my $text = getTextFrom
($pages[$i]{URL
});
89 if($pagename eq $lastpage) {
90 open FO
,">>:utf8","$TXTD/$CHAD/$pageindex - $pagename.txt";
96 open FO
,">:utf8","$TXTD/$CHAD/$pageindex - $pagename.txt";
97 print FO
"\n$pagename\n\n\n$text";
99 print FTXT
"\n\n\n\n$pagename\n\n\n$text";
106 print STDERR
"All done!\n\n";