2 ###APPNAME: sinabook_h2t
4 ###APPDATE: Wed Mar 12 01:44:35 2008
6 ###APPDESC: convert html file download from book.sina.com.cn to plain-text
7 ###APPUSAGE: [html file]
8 ###APPEXAMPLE: sinabook_h2t foo.htm\n\tcat foo.htm | sinabook_h2t
10 $ARGV[0]="-h" unless(@ARGV);
12 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
15 my $processor="htm2txt";
16 my $rootid = '-s "<font id=\"zoom\" class=f14><p>" -e "<table align=right><tr><td>"';
17 #my $rootid = '-r contTxt,artibody,article -s "<div id=article>" -e "返回目录"';
19 open FI
,"-|","iconv -f gb2312 -t utf8 '$ARGV[0]' | $processor $rootid";
22 open FI
,"-|","$processor $rootid";
25 s/^.*(新浪|相关链接|读书频道).*$//g;