updated git and svn scripts
[xrzperl.git] / htmllinks
blob02398e8df28afe9b8d9f74c9c8d2e332ba802776
1 #!/usr/bin/perl -w
2 ###APPNAME: htmllinks
3 ###APPAUTHOR: xiaoranzzz
4 ###APPDATE: Thu Mar 27 22:43:15 2008
5 ###APPVER: 0.1
6 ###APPDESC: strip links from html source
7 ###APPUSAGE: [BaseURL]
8 ###APPEXAMPLE: cat index.htm | htmllinks www.google.com
9 ###APPOPTION:
10 use strict;
11 use URI;
12 use HTML::TreeBuilder;
13 foreach(@ARGV){
14 exit(system("plhelp",$0,@ARGV)) if($_ eq "-h" || $_ eq "--help");
17 my $base=shift;
18 if($base) {
19 $base=URI->new($base);
20 $base="http://$base" unless($base->scheme);
23 my $tree=HTML::TreeBuilder->new();
24 while(<STDIN>) {
25 $tree->parse($_);
27 $tree->eof();
28 if($base) {
29 foreach my $node($tree->find("a","link")){
30 my $url=$node->attr("href");
31 print URI->new_abs($url,$base),"\n" if($url);
33 foreach my $node($tree->find("img")){
34 my $url=$node->attr("src");
35 print URI->new_abs($url,$base),"\n" if($url);
38 else {
39 foreach my $node($tree->find("a","link")){
40 my $url=$node->attr("href");
41 print ("$url\n") if($url);
43 foreach my $node($tree->find("img")){
44 my $url=$node->attr("src");
45 print ("$url\n") if($url);
48 $tree->delete();
49 exit 0