updated git and svn scripts
[xrzperl.git] / sinabook_maketext
blobab397bc4e3afe708f18def4b0a9f048381acd200
1 #!/usr/bin/perl
2 ###APPNAME: sinabook_maketext
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 05:01:19 2008
5 ###APPVER: 0.1
6 ###APPDESC: create plain-text from bookinfo.xml created by sinabook_download
7 ###APPUSAGE: <bookinfo directory> [text directory] [chapter directory name]\n\t"text directory" and "chapter directory name" may be specified using following text substitions:\n\t%t\t-\tbook title\n\t%a\t-\tbook author\n
8 ###APPEXAMPLE: sinabook_maketext booksrc "%a" "%t"
9 use strict;
10 use XML::Simple;
11 use Data::Dumper;
12 $ARGV[0]="-h" unless(@ARGV);
13 foreach(@ARGV){
14 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
17 my $WD = shift;
18 $WD="." unless($WD);
20 my $TXTD = shift;
21 $TXTD="." unless($TXTD);
23 my $CHAD = shift;
24 $CHAD = "txt" unless($CHAD);
26 my $xmlfile = "$WD/bookinfo.xml";
27 die("File not exist: $xmlfile\n") unless(-f $xmlfile);
29 binmode STDERR,":utf8";
31 my $book = XMLin("$xmlfile",SuppressEmpty=>1,NoAttr=>1,KeyAttr=>[]);
32 #print Dumper($book),"\n";
33 #exit 0;
34 $TXTD =~ s/%a/$book->{author}/g;
35 $TXTD =~ s/%t/$book->{title}/g;
36 $TXTD="." unless($TXTD);
37 $CHAD =~ s/%a/$book->{author}/g;
38 $CHAD =~ s/%t/$book->{title}/g;
39 $CHAD = "txt" unless($CHAD);
41 sub getTextFrom($) {
42 my $fn = shift;
43 $fn =~ s/.*\///g;
44 $fn = "$WD/$fn";
45 open FI,"-|:utf8","sinabook_h2t '$fn'";
46 my $result=join("",<FI>);
47 close FI;
48 return $result;
51 if(!-d "$TXTD") {
52 mkdir "$TXTD" or die("$!\n");
55 if(!-d "$TXTD/$CHAD") {
56 mkdir "$TXTD/$CHAD" or die("$!\n");
59 my $header = $book->{title} . "\n\n";
60 $header .= " -- " . $book->{author} . "\n\n" if($book->{author});
61 $header .= "\t" . $book->{about} . "\n" . "=" x 80 . "\n\n" if($book->{about});
63 my $fulltxt = "$TXTD" . "/" . $book->{title} . ".txt";
65 print STDERR "Generating book header ...\n";
66 #Write header
67 #to txt\about.txt
69 open FO,">:utf8","$TXTD/$CHAD/00 - about.txt";
70 print FO $header;
71 close FO;
72 #to fulltxt
73 open FTXT,">:utf8",$fulltxt;
74 print FTXT $header;
76 my @pages;
77 @pages = @{$book->{pages}} if($book->{pages});
78 my $lastpage;
79 my $index=0;
81 for(my $i=0;$i<@pages;$i++) {
82 my $pagename = $pages[$i]{NAME};
83 $pagename =~ s/\s+$//g;
84 $pagename =~ s/\(\d+\)$//g;
85 $index++ unless($pagename eq $lastpage);
86 my $pageindex = sprintf("%.2d",$index);
87 print STDERR "Generating $TXTD/$CHAD/$pageindex - $pagename.txt ...\n";
88 my $text = getTextFrom($pages[$i]{URL});
89 if($pagename eq $lastpage) {
90 open FO,">>:utf8","$TXTD/$CHAD/$pageindex - $pagename.txt";
91 print FO "$text";
92 close FO;
93 print FTXT "$text";
95 else {
96 open FO,">:utf8","$TXTD/$CHAD/$pageindex - $pagename.txt";
97 print FO "\n$pagename\n\n\n$text";
98 close FO;
99 print FTXT "\n\n\n\n$pagename\n\n\n$text";
101 $lastpage=$pagename;
104 close FTXT;
106 print STDERR "All done!\n\n";