wiki.pl: Port some fixes from upstream
[Orgmuse.git] / wikicopy
blob35a5baa32505fc3dbd51498cd205d98fcfb5a92d
1 #!/usr/bin/perl -w
3 # Copyright (C) 2004, 2005, 2006 Alex Schroeder <alex@emacswiki.org>
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the
17 # Free Software Foundation, Inc.
18 # 59 Temple Place, Suite 330
19 # Boston, MA 02111-1307 USA
21 require LWP;
22 use Getopt::Std;
24 our ($opt_v, $opt_w);
26 # We make our own specialization of LWP::UserAgent that asks for
27 # user/password if document is protected.
29 package RequestAgent;
30 @ISA = qw(LWP::UserAgent);
32 sub new {
33 my $self = LWP::UserAgent::new(@_);
34 $self;
37 sub get_basic_credentials {
38 my($self, $realm, $uri) = @_;
39 return split(':', $main::opt_w, 2);
43 my $usage = qq{$0 [-i URL] [-d STRING] [-t SECONDS]
44 \t[-u USERNAME] [-p PASSWORD] [-w USERNAME:PASSWORD]
45 \t[-q QUESTION] [-a ANSWER] [-z SECRET]
46 \t[SOURCE] TARGET
48 SOURCE and TARGET are the base URLs for the two wikis. Visiting these
49 two URLs should show you the respective homepages.
51 Provide the page names to copy on STDIN or use -i to point to a page.
52 You can use the index action with the raw parameter from the source
53 wiki to copy all the pages. See example below.
55 The list of page names should use the MIME type text/plain.
57 By default, wikicopy will copy a page every five seconds. Use -t to
58 override this. SECONDS is the number of seconds to wait between
59 requests.
61 If you use -d instead of providing a SOURCE, all the pages will be
62 replaced with STRING. This is useful when replacing the page content
63 with "DeletedPage", for example.
65 -d Delete target pages instead of providing SOURCE (default: none)
66 -s The summary for RecentChanges (default: none)
67 -u The username for RecentChanges (default: none)
68 -p The password to use for locked pages (default: none)
69 -w The username:password combo for basic authentication (default:none)
70 -q The question number to answer (default: 0, ie. the first question)
71 -a The answer to the question (default: none)
72 -z Alternatively, the secret key (default: question)
73 -v Verbose output for debugging (default: none)
75 Examples:
77 wikicopy -i 'http://www.emacswiki.org/cgi-bin/alex?action=index;raw=1' \\
78 http://www.emacswiki.org/cgi-bin/alex \\
79 http://localhost/cgi-bin/wiki.pl
81 wikicopy -d DeletedPage http://localhost/cgi-bin/wiki.pl < list.txt
83 wikicopy -v -u 'ElGordo' -w 'simple:mind' \\
84 -i 'http://www.communitywiki.org/odd/LosAngelesEcoVillage?action=index;raw=1' \\
85 'http://www.communitywiki.org/odd/LosAngelesEcoVillage' \\
86 'http://www.tentacle.net/~eeio/cgi/wiki.cgi'
89 sub UrlEncode {
90 my $str = shift;
91 return '' unless $str;
92 my @letters = split(//, $str);
93 my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')', '#');
94 foreach my $letter (@letters) {
95 my $pattern = quotemeta($letter);
96 if (not grep(/$pattern/, @safe)) {
97 $letter = sprintf("%%%02x", ord($letter));
100 return join('', @letters);
103 sub GetRaw {
104 my ($uri) = @_;
105 my $ua = RequestAgent->new;
106 my $response = $ua->get($uri);
107 print "no response\n" unless $response->code;
108 print "GET ", $response->code, " ", $response->message, "\n" if $opt_v;
109 return $response->content if $response->is_success;
112 sub post {
113 my ($uri, $id, $data, $minor, $summary, $username, $password,
114 $question, $answer, $secret) = @_;
115 my $ua = RequestAgent->new;
116 my %params = (title=>$id, text=>$data, raw=>1,
117 username=>$username, pwd=>$password,
118 summary=>$summary, question_num=>$question,
119 answer=>$answer, $secret=>1,
120 recent_edit=>$minor);
121 if ($opt_v) {
122 foreach my $key (keys %params) {
123 my $value = $params{$key} || '(none)';
124 $value = substr($value,0,50) . '...'
125 if $key eq 'text' and length($value) > 53;
126 warn "$key: $value\n";
129 my $response = $ua->post($uri, \%params);
130 my $status = $response->code . ' ' . $response->message;
131 warn "POST $id failed: $status.\n" unless $response->is_success;
134 sub copy {
135 my ($source, $replacement, $target, $interval, $minor, $summary,
136 $username, $password, $question, $answer, $secret,
137 @pages) = @_;
138 foreach my $id (@pages) {
139 print "$id\n";
140 my $page = UrlEncode ($id);
141 # fix URL for other wikis
142 my $data = $replacement || GetRaw("$source?action=browse;id=$page;raw=1");
143 next unless $data;
144 post($target, $id, $data, $minor, $summary, $username, $password,
145 $question, $answer, $secret);
146 sleep($interval);
150 sub main {
151 our($opt_m, $opt_i, $opt_t, $opt_d, $opt_s, $opt_u, $opt_p,
152 $opt_q, $opt_a, $opt_z);
153 getopts('mi:t:d:s:u:p:q:a:z:w:v');
154 my $interval = $opt_t ? $opt_t : 5;
155 my $replacement = $opt_d;
156 my ($source, $target);
157 $source = shift(@ARGV) unless $replacement;
158 $target = shift(@ARGV);
159 die $usage if not $target or @ARGV; # not enough or too many
160 my @pages = ();
161 if ($opt_i) {
162 my $data = GetRaw($opt_i);
163 @pages = split(/\n/, $data);
164 } else {
165 print "List of pages:\n";
166 while (<STDIN>) {
167 chomp;
168 push(@pages, $_);
171 die "The list of pages is missing. Did you use -i?\n" unless @pages;
172 copy($source, $replacement, $target, $interval, $opt_m ? 'on' : '', $opt_s,
173 $opt_u, $opt_p, $opt_q, $opt_a, $opt_z||'question',
174 @pages);
177 main();