updated git and svn scripts
[xrzperl.git] / WGet_CodeProjectArticle
blobdae609e6885bedba130a37ec9d0cad6b93c687a9
1 #!/usr/bin/perl -w
3 #$0 http://www.codeproject.com/KB/cpp/SDIMultiSplit_Part1.aspx?display=Print
4 #$0 http://www.codeproject.com/KB/mobile/CHMReader.aspx
7 sub file_name {
8 my $r = shift;
9 $r =~ s/.*\/\///;
10 $r =~ s/^[^\/]*\///;
11 $r =~ s/\?/@/g;
12 $r = $r . ".html";
13 return $r;
17 sub article_name {
18 my $r = shift;
19 $r =~ s/.*[\\\/]//g;
20 $r =~ s/\.aspx@.*//;
21 # $r =~ s/\.aspx$/\.htm/g;
22 return $r;
25 sub file_title {
26 my $file = shift;
27 open FI,"<",$file or return undef;
28 while(<FI>) {
29 if(/\<\s*title\s*\>\s*([^\<\>]+)\s*\</) {
30 my $title = $1;
31 $title =~ s/^CodeProject:?\s*//;
32 $title =~ s/\s*Free source code and.*$//;
33 return $title;
36 return undef;
39 my @exclude = (
40 "/script/Jobs",
41 "/script/Catalog",
42 "/script/Membership",
43 "/Forums",
44 "/script/Forums",
45 "/info",
46 "/webservices",
47 "/script/Ratings",
48 "/script/Catalog",
50 sub start_wget {
51 my $url = shift;
52 my @wget = ("wget","-N","-nH","-p","-k","-E","-t",3);
53 if(@exclude) {
54 push @wget,"-X";
55 push @wget,join(",",@exclude);
57 return (system(@wget,$url)==0);
60 sub get_article {
61 my $url=shift;
62 return unless($url);
63 $url =~ s/\.aspx.*/\.aspx\?display=PrintAll/;
64 my $file_name = file_name($url);
65 my $article_name = article_name($file_name);
66 if(start_wget($url)) {
67 my $title = file_title($file_name);
68 $article_name = $title if($title);
69 if($url =~ /[^\/]+\/([^\/]+)\/([^\/]+)\//) {
70 if(-f "files.js") {
71 open FO,">>","files.js";
73 else {
74 open FO,">","files.js";
75 print FO "var files=new Array();\n";
77 print FO "files.push([\"$1\",\"$2\",\"$article_name\",\"$file_name\"]);\n";
79 close FO;
80 print STDERR "$article_name ==> $file_name\n";
81 return 1,$url,"OK";
84 return undef,$url,"Download failed";
87 if(@ARGV) {
88 foreach(@ARGV) {
89 my @r = get_article($_);
90 if($r[0]) {
91 print STDERR "$r[2]:$r[1]\n";
93 else {
94 print STDERR "$r[2]:$r[1]\n";
98 else {
99 print STDERR $0,"\n";
100 print STDERR "Usage:\n\t$0 (CodeProject URLs)\n";