Minor fix for currentframe (SF #1652788).
[python.git] / Doc / tools / prechm.py
blob57a43fd6fe03ccbfdda56389f108befe9c863063
1 """
2 Makes the necesary files to convert from plain html of
3 Python 1.5 and 1.5.x Documentation to
4 Microsoft HTML Help format version 1.1
5 Doesn't change the html's docs.
7 by hernan.foffani@iname.com
8 no copyright and no responsabilities.
10 modified by Dale Nagata for Python 1.5.2
12 Renamed from make_chm.py to prechm.py, and checked into the Python
13 project, 19-Apr-2002 by Tim Peters. Assorted modifications by Tim
14 and Fred Drake. Obtained from Robin Dunn's .chm packaging of the
15 Python 2.2 docs, at <http://alldunn.com/python/>.
16 """
18 import sys
19 import os
20 from formatter import NullWriter, AbstractFormatter
21 from htmllib import HTMLParser
22 import getopt
23 import cgi
25 usage_mode = '''
26 Usage: prechm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
27 -c: does not build filename.hhc (Table of Contents)
28 -k: does not build filename.hhk (Index)
29 -p: does not build filename.hhp (Project File)
30 -v 1.5[.x]: makes help for the python 1.5[.x] docs
31 (default is python 1.5.2 docs)
32 '''
34 # Project file (*.hhp) template. 'arch' is the file basename (like
35 # the pythlp in pythlp.hhp); 'version' is the doc version number (like
36 # the 2.2 in Python 2.2).
37 # The magical numbers in the long line under [WINDOWS] set most of the
38 # user-visible features (visible buttons, tabs, etc).
39 # About 0x10384e: This defines the buttons in the help viewer. The
40 # following defns are taken from htmlhelp.h. Not all possibilities
41 # actually work, and not all those that work are available from the Help
42 # Workshop GUI. In particular, the Zoom/Font button works and is not
43 # available from the GUI. The ones we're using are marked with 'x':
45 # 0x000002 Hide/Show x
46 # 0x000004 Back x
47 # 0x000008 Forward x
48 # 0x000010 Stop
49 # 0x000020 Refresh
50 # 0x000040 Home x
51 # 0x000080 Forward
52 # 0x000100 Back
53 # 0x000200 Notes
54 # 0x000400 Contents
55 # 0x000800 Locate x
56 # 0x001000 Options x
57 # 0x002000 Print x
58 # 0x004000 Index
59 # 0x008000 Search
60 # 0x010000 History
61 # 0x020000 Favorites
62 # 0x040000 Jump 1
63 # 0x080000 Jump 2
64 # 0x100000 Zoom/Font x
65 # 0x200000 TOC Next
66 # 0x400000 TOC Prev
68 project_template = '''
69 [OPTIONS]
70 Compiled file=%(arch)s.chm
71 Contents file=%(arch)s.hhc
72 Default Window=%(arch)s
73 Default topic=index.html
74 Display compile progress=No
75 Full text search stop list file=%(arch)s.stp
76 Full-text search=Yes
77 Index file=%(arch)s.hhk
78 Language=0x409
79 Title=Python %(version)s Documentation
81 [WINDOWS]
82 %(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
83 "index.html","index.html",,,,,0x63520,220,0x10384e,[0,0,1024,768],,,,,,,0
85 [FILES]
86 '''
88 contents_header = '''\
89 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
90 <HTML>
91 <HEAD>
92 <meta name="GENERATOR" content="Microsoft&reg; HTML Help Workshop 4.1">
93 <!-- Sitemap 1.0 -->
94 </HEAD><BODY>
95 <OBJECT type="text/site properties">
96 <param name="Window Styles" value="0x801227">
97 <param name="ImageType" value="Folder">
98 </OBJECT>
99 <UL>
102 contents_footer = '''\
103 </UL></BODY></HTML>
106 object_sitemap = '''\
107 <OBJECT type="text/sitemap">
108 <param name="Name" value="%s">
109 <param name="Local" value="%s">
110 </OBJECT>
113 # List of words the full text search facility shouldn't index. This
114 # becomes file ARCH.stp. Note that this list must be pretty small!
115 # Different versions of the MS docs claim the file has a maximum size of
116 # 256 or 512 bytes (including \r\n at the end of each line).
117 # Note that "and", "or", "not" and "near" are operators in the search
118 # language, so no point indexing them even if we wanted to.
119 stop_list = '''
120 a and are as at
121 be but by
123 if in into is it
124 near no not
125 of on or
126 such
127 that the their then there these they this to
128 was will with
131 # s is a string or None. If None or empty, return None. Else tack '.html'
132 # on to the end, unless it's already there.
133 def addhtml(s):
134 if s:
135 if not s.endswith('.html'):
136 s += '.html'
137 return s
139 # Convenience class to hold info about "a book" in HTMLHelp terms == a doc
140 # directory in Python terms.
141 class Book:
142 def __init__(self, directory, title, firstpage,
143 contentpage=None, indexpage=None):
144 self.directory = directory
145 self.title = title
146 self.firstpage = addhtml(firstpage)
147 self.contentpage = addhtml(contentpage)
148 self.indexpage = addhtml(indexpage)
150 # Library Doc list of books:
151 # each 'book' : (Dir, Title, First page, Content page, Index page)
152 supported_libraries = {
153 '2.5':
155 Book('.', 'Main page', 'index'),
156 Book('.', 'Global Module Index', 'modindex'),
157 Book('whatsnew', "What's New", 'index', 'contents'),
158 Book('tut','Tutorial','tut','node2'),
159 Book('lib','Library Reference','lib','contents','genindex'),
160 Book('ref','Language Reference','ref','contents','genindex'),
161 Book('mac','Macintosh Reference','mac','contents','genindex'),
162 Book('ext','Extending and Embedding','ext','contents'),
163 Book('api','Python/C API','api','contents','genindex'),
164 Book('doc','Documenting Python','doc','contents'),
165 Book('inst','Installing Python Modules', 'inst', 'index'),
166 Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'),
169 '2.4':
171 Book('.', 'Main page', 'index'),
172 Book('.', 'Global Module Index', 'modindex'),
173 Book('whatsnew', "What's New", 'index', 'contents'),
174 Book('tut','Tutorial','tut','node2'),
175 Book('lib','Library Reference','lib','contents','genindex'),
176 Book('ref','Language Reference','ref','contents','genindex'),
177 Book('mac','Macintosh Reference','mac','contents','genindex'),
178 Book('ext','Extending and Embedding','ext','contents'),
179 Book('api','Python/C API','api','contents','genindex'),
180 Book('doc','Documenting Python','doc','contents'),
181 Book('inst','Installing Python Modules', 'inst', 'index'),
182 Book('dist','Distributing Python Modules', 'dist', 'index', 'genindex'),
185 '2.3':
187 Book('.', 'Main page', 'index'),
188 Book('.', 'Global Module Index', 'modindex'),
189 Book('whatsnew', "What's New", 'index', 'contents'),
190 Book('tut','Tutorial','tut','node2'),
191 Book('lib','Library Reference','lib','contents','genindex'),
192 Book('ref','Language Reference','ref','contents','genindex'),
193 Book('mac','Macintosh Reference','mac','contents','genindex'),
194 Book('ext','Extending and Embedding','ext','contents'),
195 Book('api','Python/C API','api','contents','genindex'),
196 Book('doc','Documenting Python','doc','contents'),
197 Book('inst','Installing Python Modules', 'inst', 'index'),
198 Book('dist','Distributing Python Modules', 'dist', 'index'),
201 '2.2':
203 Book('.', 'Main page', 'index'),
204 Book('.', 'Global Module Index', 'modindex'),
205 Book('whatsnew', "What's New", 'index', 'contents'),
206 Book('tut','Tutorial','tut','node2'),
207 Book('lib','Library Reference','lib','contents','genindex'),
208 Book('ref','Language Reference','ref','contents','genindex'),
209 Book('mac','Macintosh Reference','mac','contents','genindex'),
210 Book('ext','Extending and Embedding','ext','contents'),
211 Book('api','Python/C API','api','contents','genindex'),
212 Book('doc','Documenting Python','doc','contents'),
213 Book('inst','Installing Python Modules', 'inst', 'index'),
214 Book('dist','Distributing Python Modules', 'dist', 'index'),
217 '2.1.1':
219 Book('.', 'Main page', 'index'),
220 Book('.', 'Global Module Index', 'modindex'),
221 Book('tut','Tutorial','tut','node2'),
222 Book('lib','Library Reference','lib','contents','genindex'),
223 Book('ref','Language Reference','ref','contents','genindex'),
224 Book('mac','Macintosh Reference','mac','contents','genindex'),
225 Book('ext','Extending and Embedding','ext','contents'),
226 Book('api','Python/C API','api','contents','genindex'),
227 Book('doc','Documenting Python','doc','contents'),
228 Book('inst','Installing Python Modules', 'inst', 'index'),
229 Book('dist','Distributing Python Modules', 'dist', 'index'),
232 '2.0.0':
234 Book('.', 'Global Module Index', 'modindex'),
235 Book('tut','Tutorial','tut','node2'),
236 Book('lib','Library Reference','lib','contents','genindex'),
237 Book('ref','Language Reference','ref','contents','genindex'),
238 Book('mac','Macintosh Reference','mac','contents','genindex'),
239 Book('ext','Extending and Embedding','ext','contents'),
240 Book('api','Python/C API','api','contents','genindex'),
241 Book('doc','Documenting Python','doc','contents'),
242 Book('inst','Installing Python Modules', 'inst', 'contents'),
243 Book('dist','Distributing Python Modules', 'dist', 'contents'),
246 # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
247 # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
248 '1.5.2':
250 Book('tut','Tutorial','tut','node2'),
251 Book('lib','Library Reference','lib','contents','genindex'),
252 Book('ref','Language Reference','ref','contents','genindex'),
253 Book('mac','Macintosh Reference','mac','contents','genindex'),
254 Book('ext','Extending and Embedding','ext','contents'),
255 Book('api','Python/C API','api','contents','genindex'),
256 Book('doc','Documenting Python','doc','contents')
259 # library for 1.5.1 version:
260 '1.5.1':
262 Book('tut','Tutorial','tut','contents'),
263 Book('lib','Library Reference','lib','contents','genindex'),
264 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
265 Book('ext','Extending and Embedding','ext','contents'),
266 Book('api','Python/C API','api','contents','genindex')
269 # library for 1.5 version:
270 '1.5':
272 Book('tut','Tutorial','tut','node1'),
273 Book('lib','Library Reference','lib','node1','node268'),
274 Book('ref','Language Reference','ref-1','ref-2','ref-11'),
275 Book('ext','Extending and Embedding','ext','node1'),
276 Book('api','Python/C API','api','node1','node48')
280 # AlmostNullWriter doesn't print anything; it just arranges to save the
281 # text sent to send_flowing_data(). This is used to capture the text
282 # between an anchor begin/end pair, e.g. for TOC entries.
284 class AlmostNullWriter(NullWriter):
286 def __init__(self):
287 NullWriter.__init__(self)
288 self.saved_clear()
290 def send_flowing_data(self, data):
291 stripped = data.strip()
292 if stripped: # don't bother to save runs of whitespace
293 self.saved.append(stripped)
295 # Forget all saved text.
296 def saved_clear(self):
297 self.saved = []
299 # Return all saved text as a string.
300 def saved_get(self):
301 return ' '.join(self.saved)
303 class HelpHtmlParser(HTMLParser):
305 def __init__(self, formatter, path, output):
306 HTMLParser.__init__(self, formatter)
307 self.path = path # relative path
308 self.ft = output # output file
309 self.indent = 0 # number of tabs for pretty printing of files
310 self.proc = False # True when actively processing, else False
311 # (headers, footers, etc)
312 # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
313 # XXX See SF bug <http://www.python.org/sf/546579>.
314 self.hrefstack = [] # stack of hrefs from anchor begins
316 def begin_group(self):
317 self.indent += 1
318 self.proc = True
320 def finish_group(self):
321 self.indent -= 1
322 # stop processing when back to top level
323 self.proc = self.indent > 0
325 def anchor_bgn(self, href, name, type):
326 if self.proc:
327 # XXX See SF bug <http://www.python.org/sf/546579>.
328 # XXX index.html for the 2.2.1 language reference manual contains
329 # XXX nested <a></a> tags in the entry for the section on blank
330 # XXX lines. We want to ignore the nested part completely.
331 if len(self.hrefstack) == 0:
332 self.saved_clear()
333 self.hrefstack.append(href)
335 def anchor_end(self):
336 if self.proc:
337 # XXX See XXX above.
338 if self.hrefstack:
339 title = cgi.escape(self.saved_get(), True)
340 path = self.path + '/' + self.hrefstack.pop()
341 self.tab(object_sitemap % (title, path))
343 def start_dl(self, atr_val):
344 self.begin_group()
346 def end_dl(self):
347 self.finish_group()
349 def do_dt(self, atr_val):
350 # no trailing newline on purpose!
351 self.tab("<LI>")
353 # Write text to output file.
354 def write(self, text):
355 self.ft.write(text)
357 # Write text to output file after indenting by self.indent tabs.
358 def tab(self, text=''):
359 self.write('\t' * self.indent)
360 if text:
361 self.write(text)
363 # Forget all saved text.
364 def saved_clear(self):
365 self.formatter.writer.saved_clear()
367 # Return all saved text as a string.
368 def saved_get(self):
369 return self.formatter.writer.saved_get()
371 class IdxHlpHtmlParser(HelpHtmlParser):
372 # nothing special here, seems enough with parent class
373 pass
375 class TocHlpHtmlParser(HelpHtmlParser):
377 def start_dl(self, atr_val):
378 self.begin_group()
379 self.tab('<UL>\n')
381 def end_dl(self):
382 self.finish_group()
383 self.tab('</UL>\n')
385 def start_ul(self, atr_val):
386 self.begin_group()
387 self.tab('<UL>\n')
389 def end_ul(self):
390 self.finish_group()
391 self.tab('</UL>\n')
393 def do_li(self, atr_val):
394 # no trailing newline on purpose!
395 self.tab("<LI>")
397 def index(path, indexpage, output):
398 parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
399 path, output)
400 f = open(path + '/' + indexpage)
401 parser.feed(f.read())
402 parser.close()
403 f.close()
405 def content(path, contentpage, output):
406 parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
407 path, output)
408 f = open(path + '/' + contentpage)
409 parser.feed(f.read())
410 parser.close()
411 f.close()
413 def do_index(library, output):
414 output.write('<UL>\n')
415 for book in library:
416 print '\t', book.title, '-', book.indexpage
417 if book.indexpage:
418 index(book.directory, book.indexpage, output)
419 output.write('</UL>\n')
421 def do_content(library, version, output):
422 output.write(contents_header)
423 for book in library:
424 print '\t', book.title, '-', book.firstpage
425 path = book.directory + "/" + book.firstpage
426 output.write('<LI>')
427 output.write(object_sitemap % (book.title, path))
428 if book.contentpage:
429 content(book.directory, book.contentpage, output)
430 output.write(contents_footer)
432 # Fill in the [FILES] section of the project (.hhp) file.
433 # 'library' is the list of directory description tuples from
434 # supported_libraries for the version of the docs getting generated.
435 def do_project(library, output, arch, version):
436 output.write(project_template % locals())
437 pathseen = {}
438 for book in library:
439 directory = book.directory
440 path = directory + '\\%s\n'
441 for page in os.listdir(directory):
442 if page.endswith('.html') or page.endswith('.css'):
443 fullpath = path % page
444 if fullpath not in pathseen:
445 output.write(fullpath)
446 pathseen[fullpath] = True
448 def openfile(file):
449 try:
450 p = open(file, "w")
451 except IOError, msg:
452 print file, ":", msg
453 sys.exit(1)
454 return p
456 def usage():
457 print usage_mode
458 sys.exit(0)
460 def do_it(args = None):
461 if not args:
462 args = sys.argv[1:]
464 if not args:
465 usage()
467 try:
468 optlist, args = getopt.getopt(args, 'ckpv:')
469 except getopt.error, msg:
470 print msg
471 usage()
473 if not args or len(args) > 1:
474 usage()
475 arch = args[0]
477 version = None
478 for opt in optlist:
479 if opt[0] == '-v':
480 version = opt[1]
481 break
482 if not version:
483 usage()
485 library = supported_libraries[version]
487 if not (('-p','') in optlist):
488 fname = arch + '.stp'
489 f = openfile(fname)
490 print "Building stoplist", fname, "..."
491 words = stop_list.split()
492 words.sort()
493 for word in words:
494 print >> f, word
495 f.close()
497 f = openfile(arch + '.hhp')
498 print "Building Project..."
499 do_project(library, f, arch, version)
500 if version == '2.0.0':
501 for image in os.listdir('icons'):
502 f.write('icons'+ '\\' + image + '\n')
504 f.close()
506 if not (('-c','') in optlist):
507 f = openfile(arch + '.hhc')
508 print "Building Table of Content..."
509 do_content(library, version, f)
510 f.close()
512 if not (('-k','') in optlist):
513 f = openfile(arch + '.hhk')
514 print "Building Index..."
515 do_index(library, f)
516 f.close()
518 if __name__ == '__main__':
519 do_it()