Exceptions raised during renaming in rotating file handlers are now passed to handleE...
[python.git] / Doc / tools / prechm.py
blob7b2f3936cecf89c98a01157847e4dedbb1c99baf
1 """
2 Makes the necesary files to convert from plain html of
3 Python 1.5 and 1.5.x Documentation to
4 Microsoft HTML Help format version 1.1
5 Doesn't change the html's docs.
7 by hernan.foffani@iname.com
8 no copyright and no responsabilities.
10 modified by Dale Nagata for Python 1.5.2
12 Renamed from make_chm.py to prechm.py, and checked into the Python
13 project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
14 and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
15 Python 2.2 docs, at <http://alldunn.com/python/>.
16 """
18 import sys
19 import os
20 from formatter import NullWriter, AbstractFormatter
21 from htmllib import HTMLParser
22 import getopt
23 import cgi
25 usage_mode = '''
26 Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
27 -c: does not build filename.hhc (Table of Contents)
28 -k: does not build filename.hhk (Index)
29 -p: does not build filename.hhp (Project File)
30 -v 1.5[.x]: makes help for the python 1.5[.x] docs
31 (default is python 1.5.2 docs)
32 '''
34 # Project file (*.hhp) template. 'arch' is the file basename (like
35 # the pythlp in pythlp.hhp); 'version' is the doc version number (like
36 # the 2.2 in Python 2.2).
37 # The magical numbers in the long line under [WINDOWS] set most of the
38 # user-visible features (visible buttons, tabs, etc).
39 # About 0x10384e: This defines the buttons in the help viewer. The
40 # following defns are taken from htmlhelp.h. Not all possibilities
41 # actually work, and not all those that work are available from the Help
42 # Workshop GUI. In particular, the Zoom/Font button works and is not
43 # available from the GUI. The ones we're using are marked with 'x':
45 # 0x000002 Hide/Show x
46 # 0x000004 Back x
47 # 0x000008 Forward x
48 # 0x000010 Stop
49 # 0x000020 Refresh
50 # 0x000040 Home x
51 # 0x000080 Forward
52 # 0x000100 Back
53 # 0x000200 Notes
54 # 0x000400 Contents
55 # 0x000800 Locate x
56 # 0x001000 Options x
57 # 0x002000 Print x
58 # 0x004000 Index
59 # 0x008000 Search
60 # 0x010000 History
61 # 0x020000 Favorites
62 # 0x040000 Jump 1
63 # 0x080000 Jump 2
64 # 0x100000 Zoom/Font x
65 # 0x200000 TOC Next
66 # 0x400000 TOC Prev
68 project_template = '''
69 [OPTIONS]
70 Compiled file=%(arch)s.chm
71 Contents file=%(arch)s.hhc
72 Default Window=%(arch)s
73 Default topic=index.html
74 Display compile progress=No
75 Full text search stop list file=%(arch)s.stp
76 Full-text search=Yes
77 Index file=%(arch)s.hhk
78 Language=0x409
79 Title=Python %(version)s Documentation
81 [WINDOWS]
82 %(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
83 "index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
85 [FILES]
86 '''
88 contents_header = '''\
89 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
90 <HTML>
91 <HEAD>
92 <meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
93 <!-- Sitemap 1.0 -->
94 </HEAD><BODY>
95 <OBJECT type="text/site properties">
96 <param name="Window Styles" value="0x801227">
97 <param name="ImageType" value="Folder">
98 </OBJECT>
99 <UL>
102 contents_footer = '''\
103 </UL></BODY></HTML>
106 object_sitemap = '''\
107 <OBJECT type="text/sitemap">
108 <param name="Name" value="%s">
109 <param name="Local" value="%s">
110 </OBJECT>
113 # List of words the full text search facility shouldn't index. This
114 # becomes file ARCH.stp. Note that this list must be pretty small!
115 # Different versions of the MS docs claim the file has a maximum size of
116 # 256 or 512 bytes (including \r\n at the end of each line).
117 # Note that "and", "or", "not" and "near" are operators in the search
118 # language, so no point indexing them even if we wanted to.
119 stop_list = '''
120 a and are as at
121 be but by
123 if in into is it
124 near no not
125 of on or
126 such
127 that the their then there these they this to
128 was will with
131 # s is a string or None. If None or empty, return None. Else tack '.html'
132 # on to the end, unless it's already there.
133 def addhtml(s):
134 if s:
135 if not s.endswith('.html'):
136 s += '.html'
137 return s
139 # Convenience class to hold info about "a book" in HTMLHelp terms == a doc
140 # directory in Python terms.
141 class Book:
142 def __init__(self, directory, title, firstpage,
143 contentpage=None, indexpage=None):
144 self.directory = directory
145 self.title = title
146 self.firstpage = addhtml(firstpage)
147 self.contentpage = addhtml(contentpage)
148 self.indexpage = addhtml(indexpage)
150 # Library Doc list of books:
151 # each 'book' : (Dir, Title, First page, Content page, Index page)
152 supported_libraries = {
153 '2.4':
155 Book('.', 'Main page', 'index'),
156 Book('.', 'Global Module Index', 'modindex'),
157 Book('whatsnew', "What's New", 'index', 'contents'),
158 Book('tut','Tutorial','tut','node2'),
159 Book('lib','Library Reference','lib','contents','genindex'),
160 Book('ref','Language Reference','ref','contents','genindex'),
161 Book('mac','Macintosh Reference','mac','contents','genindex'),
162 Book('ext','Extending and Embedding','ext','contents'),
163 Book('api','Python/C API','api','contents','genindex'),
164 Book('doc','Documenting Python','doc','contents'),
165 Book('inst','Installing Python Modules', 'inst', 'index'),
166 Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'),
169 '2.3':
171 Book('.', 'Main page', 'index'),
172 Book('.', 'Global Module Index', 'modindex'),
173 Book('whatsnew', "What's New", 'index', 'contents'),
174 Book('tut','Tutorial','tut','node2'),
175 Book('lib','Library Reference','lib','contents','genindex'),
176 Book('ref','Language Reference','ref','contents','genindex'),
177 Book('mac','Macintosh Reference','mac','contents','genindex'),
178 Book('ext','Extending and Embedding','ext','contents'),
179 Book('api','Python/C API','api','contents','genindex'),
180 Book('doc','Documenting Python','doc','contents'),
181 Book('inst','Installing Python Modules', 'inst', 'index'),
182 Book('dist','Distributing Python Modules', 'dist', 'index'),
185 '2.2':
187 Book('.', 'Main page', 'index'),
188 Book('.', 'Global Module Index', 'modindex'),
189 Book('whatsnew', "What's New", 'index', 'contents'),
190 Book('tut','Tutorial','tut','node2'),
191 Book('lib','Library Reference','lib','contents','genindex'),
192 Book('ref','Language Reference','ref','contents','genindex'),
193 Book('mac','Macintosh Reference','mac','contents','genindex'),
194 Book('ext','Extending and Embedding','ext','contents'),
195 Book('api','Python/C API','api','contents','genindex'),
196 Book('doc','Documenting Python','doc','contents'),
197 Book('inst','Installing Python Modules', 'inst', 'index'),
198 Book('dist','Distributing Python Modules', 'dist', 'index'),
201 '2.1.1':
203 Book('.', 'Main page', 'index'),
204 Book('.', 'Global Module Index', 'modindex'),
205 Book('tut','Tutorial','tut','node2'),
206 Book('lib','Library Reference','lib','contents','genindex'),
207 Book('ref','Language Reference','ref','contents','genindex'),
208 Book('mac','Macintosh Reference','mac','contents','genindex'),
209 Book('ext','Extending and Embedding','ext','contents'),
210 Book('api','Python/C API','api','contents','genindex'),
211 Book('doc','Documenting Python','doc','contents'),
212 Book('inst','Installing Python Modules', 'inst', 'index'),
213 Book('dist','Distributing Python Modules', 'dist', 'index'),
216 '2.0.0':
218 Book('.', 'Global Module Index', 'modindex'),
219 Book('tut','Tutorial','tut','node2'),
220 Book('lib','Library Reference','lib','contents','genindex'),
221 Book('ref','Language Reference','ref','contents','genindex'),
222 Book('mac','Macintosh Reference','mac','contents','genindex'),
223 Book('ext','Extending and Embedding','ext','contents'),
224 Book('api','Python/C API','api','contents','genindex'),
225 Book('doc','Documenting Python','doc','contents'),
226 Book('inst','Installing Python Modules', 'inst', 'contents'),
227 Book('dist','Distributing Python Modules', 'dist', 'contents'),
230 # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
231 # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
232 '1.5.2':
234 Book('tut','Tutorial','tut','node2'),
235 Book('lib','Library Reference','lib','contents','genindex'),
236 Book('ref','Language Reference','ref','contents','genindex'),
237 Book('mac','Macintosh Reference','mac','contents','genindex'),
238 Book('ext','Extending and Embedding','ext','contents'),
239 Book('api','Python/C API','api','contents','genindex'),
240 Book('doc','Documenting Python','doc','contents')
243 # library for 1.5.1 version:
244 '1.5.1':
246 Book('tut','Tutorial','tut','contents'),
247 Book('lib','Library Reference','lib','contents','genindex'),
248 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
249 Book('ext','Extending and Embedding','ext','contents'),
250 Book('api','Python/C API','api','contents','genindex')
253 # library for 1.5 version:
254 '1.5':
256 Book('tut','Tutorial','tut','node1'),
257 Book('lib','Library Reference','lib','node1','node268'),
258 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
259 Book('ext','Extending and Embedding','ext','node1'),
260 Book('api','Python/C API','api','node1','node48')
264 # AlmostNullWriter doesn't print anything; it just arranges to save the
265 # text sent to send_flowing_data(). This is used to capture the text
266 # between an anchor begin/end pair, e.g. for TOC entries.
268 class AlmostNullWriter(NullWriter):
270 def __init__(self):
271 NullWriter.__init__(self)
272 self.saved_clear()
274 def send_flowing_data(self, data):
275 stripped = data.strip()
276 if stripped: # don't bother to save runs of whitespace
277 self.saved.append(stripped)
279 # Forget all saved text.
280 def saved_clear(self):
281 self.saved = []
283 # Return all saved text as a string.
284 def saved_get(self):
285 return ' '.join(self.saved)
287 class HelpHtmlParser(HTMLParser):
289 def __init__(self, formatter, path, output):
290 HTMLParser.__init__(self, formatter)
291 self.path = path # relative path
292 self.ft = output # output file
293 self.indent = 0 # number of tabs for pretty printing of files
294 self.proc = False # True when actively processing, else False
295 # (headers, footers, etc)
296 # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
297 # XXX See SF bug <http://www.python.org/sf/546579>.
298 self.hrefstack = [] # stack of hrefs from anchor begins
300 def begin_group(self):
301 self.indent += 1
302 self.proc = True
304 def finish_group(self):
305 self.indent -= 1
306 # stop processing when back to top level
307 self.proc = self.indent > 0
309 def anchor_bgn(self, href, name, type):
310 if self.proc:
311 # XXX See SF bug <http://www.python.org/sf/546579>.
312 # XXX index.html for the 2.2.1 language reference manual contains
313 # XXX nested <a></a> tags in the entry for the section on blank
314 # XXX lines. We want to ignore the nested part completely.
315 if len(self.hrefstack) == 0:
316 self.saved_clear()
317 self.hrefstack.append(href)
319 def anchor_end(self):
320 if self.proc:
321 # XXX See XXX above.
322 if self.hrefstack:
323 title = cgi.escape(self.saved_get(), True)
324 path = self.path + '/' + self.hrefstack.pop()
325 self.tab(object_sitemap % (title, path))
327 def start_dl(self, atr_val):
328 self.begin_group()
330 def end_dl(self):
331 self.finish_group()
333 def do_dt(self, atr_val):
334 # no trailing newline on purpose!
335 self.tab("<LI>")
337 # Write text to output file.
338 def write(self, text):
339 self.ft.write(text)
341 # Write text to output file after indenting by self.indent tabs.
342 def tab(self, text=''):
343 self.write('\t' * self.indent)
344 if text:
345 self.write(text)
347 # Forget all saved text.
348 def saved_clear(self):
349 self.formatter.writer.saved_clear()
351 # Return all saved text as a string.
352 def saved_get(self):
353 return self.formatter.writer.saved_get()
355 class IdxHlpHtmlParser(HelpHtmlParser):
356 # nothing special here, seems enough with parent class
357 pass
359 class TocHlpHtmlParser(HelpHtmlParser):
361 def start_dl(self, atr_val):
362 self.begin_group()
363 self.tab('<UL>\n')
365 def end_dl(self):
366 self.finish_group()
367 self.tab('</UL>\n')
369 def start_ul(self, atr_val):
370 self.begin_group()
371 self.tab('<UL>\n')
373 def end_ul(self):
374 self.finish_group()
375 self.tab('</UL>\n')
377 def do_li(self, atr_val):
378 # no trailing newline on purpose!
379 self.tab("<LI>")
381 def index(path, indexpage, output):
382 parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
383 path, output)
384 f = open(path + '/' + indexpage)
385 parser.feed(f.read())
386 parser.close()
387 f.close()
389 def content(path, contentpage, output):
390 parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
391 path, output)
392 f = open(path + '/' + contentpage)
393 parser.feed(f.read())
394 parser.close()
395 f.close()
397 def do_index(library, output):
398 output.write('<UL>\n')
399 for book in library:
400 print '\t', book.title, '-', book.indexpage
401 if book.indexpage:
402 index(book.directory, book.indexpage, output)
403 output.write('</UL>\n')
405 def do_content(library, version, output):
406 output.write(contents_header)
407 for book in library:
408 print '\t', book.title, '-', book.firstpage
409 path = book.directory + "/" + book.firstpage
410 output.write('<LI>')
411 output.write(object_sitemap % (book.title, path))
412 if book.contentpage:
413 content(book.directory, book.contentpage, output)
414 output.write(contents_footer)
416 # Fill in the [FILES] section of the project (.hhp) file.
417 # 'library' is the list of directory description tuples from
418 # supported_libraries for the version of the docs getting generated.
419 def do_project(library, output, arch, version):
420 output.write(project_template % locals())
421 pathseen = {}
422 for book in library:
423 directory = book.directory
424 path = directory + '\\%s\n'
425 for page in os.listdir(directory):
426 if page.endswith('.html') or page.endswith('.css'):
427 fullpath = path % page
428 if fullpath not in pathseen:
429 output.write(fullpath)
430 pathseen[fullpath] = True
432 def openfile(file):
433 try:
434 p = open(file, "w")
435 except IOError, msg:
436 print file, ":", msg
437 sys.exit(1)
438 return p
440 def usage():
441 print usage_mode
442 sys.exit(0)
444 def do_it(args = None):
445 if not args:
446 args = sys.argv[1:]
448 if not args:
449 usage()
451 try:
452 optlist, args = getopt.getopt(args, 'ckpv:')
453 except getopt.error, msg:
454 print msg
455 usage()
457 if not args or len(args) > 1:
458 usage()
459 arch = args[0]
461 version = None
462 for opt in optlist:
463 if opt[0] == '-v':
464 version = opt[1]
465 break
466 if not version:
467 usage()
469 library = supported_libraries[version]
471 if not (('-p','') in optlist):
472 fname = arch + '.stp'
473 f = openfile(fname)
474 print "Building stoplist", fname, "..."
475 words = stop_list.split()
476 words.sort()
477 for word in words:
478 print >> f, word
479 f.close()
481 f = openfile(arch + '.hhp')
482 print "Building Project..."
483 do_project(library, f, arch, version)
484 if version == '2.0.0':
485 for image in os.listdir('icons'):
486 f.write('icons'+ '\\' + image + '\n')
488 f.close()
490 if not (('-c','') in optlist):
491 f = openfile(arch + '.hhc')
492 print "Building Table of Content..."
493 do_content(library, version, f)
494 f.close()
496 if not (('-k','') in optlist):
497 f = openfile(arch + '.hhk')
498 print "Building Index..."
499 do_index(library, f)
500 f.close()
502 if __name__ == '__main__':
503 do_it()