updated git and svn scripts
[xrzperl.git] / htm2txt
blobb9471dc47903cf08f99dce93e0776ad8b96b4a8c
1 #!/usr/bin/perl -w
2 ###APPNAME: htm2txt
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Tue Mar 11 05:03:49 2008
5 ###APPVER: 0.1
6 ###APPDESC: convert html file download from book.sina.bom.cn to txt
7 ###APPUSAGE: [htmlfile] [[-r rootId,[rootId,...]] [-f filter,[filter,...]] [-s regexp -e regexp]]
8 ###APPEXAMPLE: htm2txt booksrc/1.shtml "content,article" "sina.com" \n\tcat booksrc/1.shtml | htm2txt
9 ###APPOPTION: -r:rootIds|-f:filters|-s:starting mark|-e ending mark
10 use strict;
11 use HTML::TreeBuilder;
12 use lib $ENV{XR_PERL_MODULE_DIR};
13 use MyPlace::Script::Usage qw/help_required/;
14 use MyPlace::HTML::Convertor;
16 exit 0 if(help_required($0,@ARGV));
18 my $fn;
19 my @rootid = ();
20 my @filter = ();
21 my $start;
22 my $end;
23 my $what="";
25 if(@ARGV) {
26 foreach my $opt (@ARGV) {
27 if($opt =~ /^-[rfse]$/) {
28 $what=$opt;
29 next;
31 else {
32 if($what eq "-r") {
33 push @rootid,split(",",$opt);
35 elsif($what eq "-f") {
36 push @filter,split(",",$opt);
38 elsif($what eq "-s") {
39 $start = $opt;
41 elsif($what eq "-e") {
42 $end = $opt;
44 else {
45 $fn=$opt;
49 if(!$fn) {
50 $fn="/dev/stdin";
53 else {
54 $fn="/dev/stdin";
57 open FI,"<",$fn or die("$!\n");
58 my @src=<FI>;
59 close FI;
61 my @result = @{text_from_string(\@src,$start,$end,\@rootid,\@filter)};
62 print @result;