* stepmake/stepmake/omf-targets.make (local-install): add
[lilypond.git] / buildscripts / wiki-slurp.py
blobbb2519226c0527cc10f3df78a9fd15d91bb333da
1 #!@PYTHON@
4 # translate an entire Wiki site into local html.
7 import re
8 import urllib
9 import sys
10 import getopt
11 import os
12 program_version = '@TOPLEVEL_VERSION@'
13 if program_version == '@' + 'TOPLEVEL_VERSION' + '@':
14 program_version = '1.5.69'
17 def help ():
18 print """Usage: wiki-slurp.py [OPTION]... ENTRY-PATTERN...
20 Download a WikiWiki site and convert to local html.
22 Example: wiki-slurp.py -d /tmp/output 'http://c2.com/cgi-bin/wiki?'
24 Options:
25 -h,--help this help
26 -m,--mangle mangle file names to be shorter
27 -d,--outdir=DIR set output directory to DIR
28 -v,--version version information
30 Warning: downloading an entire Wiki site generates a huge amount of
31 traffic and server load. Consider asking for a copy of the database.
32 See also http://c2.com/cgi-bin/wiki?WikiSlurp
34 Report bugs to bug-lilypond@gnu.org.
36 Written by Han-Wen Nienhuys <hanwen@cs.uu.nl>
37 """
39 def print_version ():
40 print r"""wiki-slurp.py %s
42 This is free software. It is covered by the GNU General Public License,
43 and you are welcome to change it and/or distribute copies of it under
44 certain conditions. Invoke as `midi2ly --warranty' for more information.
46 Copyright (c) 2000-2002 by Han-Wen Nienhuys <hanwen@cs.uu.nl>
47 """ % program_version
49 (options, files) = getopt.getopt (sys.argv[1:], 'vd:hm', ['help','mangle','version', 'outdir='])
52 def identity (name):
53 return name
55 def mangle (name):
56 return '%d' % hash (name)
58 mangler = identity
60 outdir = '/tmp/'
62 for opt in options:
63 o = opt[0]
64 a = opt[1]
65 if o== '--help' or o == '-h':
66 help ()
67 sys.exit (0)
68 elif o == '--version' or o == '-v':
69 print_version ()
70 sys.exit(0)
71 elif o == '--mangle' or o == '-m':
72 mangler = mangle
73 elif o == '--outdir' or o == '-d':
74 outdir = a
75 else:
76 print o
77 raise getopt.error
80 patterns = files
82 if not patterns:
83 help ()
84 sys.stderr.write ("\n")
85 sys.exit (2)
87 re_patterns = []
88 for pattern in patterns:
89 re_patterns.append (re.sub ('([?.])', '\\\\\\1', pattern))
91 todo = ["FrontPage"]
93 print 'here!'
94 done = {
95 'EditText': 1,
98 def unwiki (str, pat, mangler):
99 local = '<a href="%s([A-Za-z]+)">([A-Za-z]+)</a>' % pat
101 newurls = []
102 def do_replace (match, us = newurls, mangler = mangler):
103 newurl = match.group (1)
104 local = mangler (newurl)
106 replacement = '<a href="%s.html">%s</a>' % (local,newurl)
107 us.append (newurl)
108 return replacement
110 str = re.sub (local, do_replace, str)
111 otherurl = '<a href="%s[^>]*">([?A-Za-z]+)</a>' % pat
112 str = re.sub (otherurl, '\\1', str)
114 imagesrc = '<a href="%s[^>]*">(<img[^>]*>)</a>' % pat
115 str = re.sub (imagesrc, '\\1', str)
117 return (str, newurls)
120 while todo:
121 f = todo[-1]
122 todo = todo[:-1]
124 if done.has_key (f):
125 continue
126 done [f] = 1
128 mangled = mangler (f)
130 sys.stderr.write ("reading `%s' ... " % f)
131 sys.stderr.flush ()
133 page = urllib.urlopen (patterns[0] + f).read ()
134 sys.stderr.write ('done. ')
135 sys.stderr.flush ()
137 for re_pattern in re_patterns:
138 (page, nus) = unwiki (page, re_pattern, mangler)
139 todo.extend (nus)
141 outname = os.path.join (outdir, mangled) + '.html'
142 fo = open (outname, 'w')
144 sys.stderr.write ("Writing `%s'\n" % outname)
145 fo.write (page)
146 fo.close ()
149 # test
150 if 0:
151 page = open ('/tmp/FrontPage.html').read()
152 (str, us)=unwiki (page, re_patterns)
153 print str
154 print us