4 rst2chm.py - Converts reStructuredText into Microsoft (TM) HTML Help.
6 Copyright (C) 2003 by Ollie Rutherfurd <oliver@rutherfurd.net>
8 License: Python license
18 from docutils
import nodes
, writers
22 # ---------------------------------------------------------------------------
23 # default location of HTML Help compiler
25 CHM_COMPILER
= 'C:\\Program Files\\HTML Help Workshop\\hhc.exe'
27 # ---------------------------------------------------------------------------
28 # template for HHC (contents file)
31 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
34 <meta name="GENERATOR" content="rst2chm v%s">
37 <OBJECT type="text/site properties">
38 <param name="Auto Generated" value="No">
43 <LI><OBJECT type="text/sitemap">
44 <param name="Name" value="%(name)s">
45 <param name="Local" value="%(href)s">
55 Compatibility=1.1 or later
56 Compiled file=%(chm_file)s
57 Contents file=%(hhc_file)s
58 Default topic=%(default_topic)s
59 Display compile progress=%(display_compile_progress)s
60 Full-text search=%(full_text_search_on)s
70 # ---------------------------------------------------------------------------
71 # just a couple languages (those supported by docutils)
73 # mappings (from "htmlhelp\langcodes.xml" in docbook xsl stylesheets)
75 # need to add the rest, but first need to figure out how to handle
76 # multiple mappings for the same abbreviation
79 'af': '0x0436 Afrikaans',
80 'de': '0x0407 German (Germany)',
81 'en': '0x0409 English (United States)',
82 'es': '0x040a Spanish (Traditional Sort)',
83 'fr': '0x040c French (France)',
84 'it': '0x0410 Italian',
85 'ru': '0x0419 Russian',
86 'sk': '0x041b Slovak',
87 'sv': '0x041d Swedish',
90 # ---------------------------------------------------------------------------
91 # custom usage message (as we've got positional args)
93 USAGE
= """usage: %prog [options] OUTPUT_FILE INTPUT_FILE [INPUT_FILE2 ...]"""
95 # ---------------------------------------------------------------------------
96 # option group setting spec
99 'rst2chm-Specific Options',
102 ('HTML Help File Title. Default is name of HTML Help File.',
106 ('Default topic. Default is first given file.',
108 {'default': None, 'metavar': '<file>'}),
110 ('Path to HTML Help Compiler (hhc.exe). Required if hhc.exe not in '
111 ' PATH or `%s`.' % CHM_COMPILER
,
113 {'default': 'hhc.exe', 'dest': 'hhc', 'metavar': '<file>'}),
115 ('Disable full-text searching in generated CHM file. Full-text'
116 ' searching enabled by default.',
117 ['--no-full-text-search'],
118 {'default': 1, 'dest': 'full_text_search', 'action': 'store_false'}),
120 ('Specify the language of input text (ISO 639 2-letter identifier).'
121 ' Default is "en" (English).',
122 ['--language', '-l'],
123 {'dest': 'language_code', 'default': 'en', 'metavar': '<name>'}),
125 ("Relative path to CSS file use with HTML.",
127 {'default': None, 'metavar': 'FILENAME'}),
129 ("Don't generate HTML files from reST source files."
130 " This is useful if you wish to customize the HTML generation.",
132 {'default': '1', 'dest': 'generate_html', 'action': 'store_false'}),
134 ("Don't remove generated .hhc & .hhp files. By default these files"
137 {'default': '1', 'dest': 'clean', 'action': 'store_false'}),
139 ('No status messages. Default verbose output.',
141 {'default': 1, 'dest': 'verbose', 'action': 'store_false'}),
146 # ---------------------------------------------------------------------------
147 # overrides for HTML writer
149 HTML_WRITER_OVERRIDES
={
150 'xml_declaration': '', # hhc.exe doesn't like it
151 'stylesheet_path': 'default.css',
152 'embed_stylesheet': '', # will be set to "yes", if stylesheet given
153 'language_code':'en',
154 'output_encoding': 'iso-8859-1',
157 # ---------------------------------------------------------------------------
160 class CHMCompileError(Exception):
162 Exception raised when chm file can't be generated.
166 # ---------------------------------------------------------------------------
167 # docutils Writer for HTML Help index file
169 class Writer(writers
.Writer
):
173 section_filename
= ''
174 """name of HTML file to be generated
176 This is used to reference the location of the section,
183 writers
.Writer
.__init
__(self
)
184 self
.translator_class
= HTMLHelpContentsTranslator
185 self
.section_filename
= ''
188 visitor
= HTMLHelpContentsTranslator(self
.document
)
189 visitor
.section_filename
= self
.section_filename
190 self
.document
.walkabout(visitor
)
191 self
.output
= visitor
.astext()
193 # ---------------------------------------------------------------------------
194 # docutils NodeVisitor that writes .hhc contents for a single file
196 class HTMLHelpContentsTranslator(nodes
.NodeVisitor
):
199 HTMLHelpContentsTranslator collects sections and titles
203 def __init__(self
, document
):
204 self
.section_filename
= ''
205 self
.document
= document
207 self
.section_level
= 0
210 return ''.join(self
.content
)
212 def encode(self
, text
):
213 """Encode special characters in `text` & return."""
214 # taken from htmlcss1 writer
215 # @@@ A codec to do these and all other HTML entities would be nice.
216 text
= text
.replace("&", "&")
217 text
= text
.replace("<", "<")
218 text
= text
.replace('"', """)
219 text
= text
.replace(">", ">")
220 text
= text
.replace("@", "@") # may thwart some address harvesters
223 def visit_document(self
, node
):
224 self
.content
.append('<UL>\n')
225 def depart_document(self
, node
):
226 self
.content
.append('</UL>\n')
228 def visit_section(self
, node
):
229 self
.section_level
+= 1
230 self
.content
.append('<UL>\n')
231 def depart_section(self
, node
):
232 self
.content
.append('</UL>\n')
233 self
.section_level
-= 1
235 def visit_title(self
, node
):
236 name
,href
= self
.encode(node
.astext()),''
237 # only want to collection document and section titles
238 if isinstance(node
.parent
, nodes
.document
):
240 elif isinstance(node
.parent
, nodes
.section
):
245 if self
.section_level
== 0:
246 href
= self
.section_filename
248 # only add this to contents if we can link to it
249 # and to link to it, 'id' is needed
250 if len(node
.parent
['ids'])>0:
251 href
= self
.section_filename
+ '#' + node
.parent
['ids'][0]
254 self
.content
.append(HHC_ITEM
% {'name': name
, 'href': href
})
256 def depart_title(self
, node
):
259 def unknown_visit(self
, node
):
261 def unknown_departure(self
, node
):
265 def status(msg
, options
):
267 If `options.verbose`, `msg` is written to `sys.stdout`.
270 sys
.stderr
.write(msg
)
273 def txt2htmlfilename(filename
):
275 Returns HTML filename for given txt filename.
277 return os
.path
.splitext(filename
.replace('\\','/'))[0] + '.html'
280 def write_contents_file(filenames
, options
):
282 Creates .hhc (HTML Help Contents file).
284 Uses a custom docutils.writer.Writer to extract
285 section names and links from reStructuredText
288 File created is `options.hhc_file`.
291 status('creating contents file (%s...) \n' % options
.hhc_file
, options
)
293 for i
in range(len(filenames
)):
294 filename
= filenames
[i
]
296 status('* %s (%d of %d)... ' % (filename
, i
+1, len(filenames
)), options
)
298 # this should really be relative
299 html_filename
= txt2htmlfilename(filename
)
301 writer
.section_filename
= html_filename
303 pub
= docutils
.core
.Publisher()
304 pub
.set_reader('standalone', None, 'restructuredtext')
306 settings
= pub
.get_settings(output_encoding
='iso-8859-1')
307 pub
.settings
._destination
= ''
308 pub
.source
= docutils
.io
.FileInput(source_path
=filename
,
309 encoding
=settings
.input_encoding
)
310 pub
.destination
= docutils
.io
.StringOutput(
311 encoding
=settings
.output_encoding
)
312 pub
.document
= pub
.reader
.read(pub
.source
, pub
.parser
, pub
.settings
)
313 pub
.apply_transforms()
314 output
= pub
.writer
.write(pub
.document
, pub
.destination
)
315 pub
.writer
.assemble_parts()
316 contents
.append(output
)
318 status('OK\n', options
)
320 f
= open(options
.hhc_file
,'w')
321 print >> f
, HHC_HEADER
+ ''.join(contents
) + HHC_FOOTER
325 def write_project_file(html_files
, options
):
327 Creates HTML Help Project file (.hpp) file.
329 `options.hpp_file` is the name of the created file.
331 if not options
.default_topic
:
332 options
.default_topic
= html_files
[0]
335 'chm_file': options
.chm_file
,
336 'hhc_file': options
.hhc_file
,
337 'default_topic': options
.default_topic
,
338 'display_compile_progress': ['Yes','No'][not(options
.verbose
)],
339 'full_text_search_on': ['Yes','No'][not(options
.full_text_search
)],
340 'language': LANGUAGES
[options
.language_code
.lower()],
341 'title': options
.title
,
342 'files': '\n'.join(html_files
),
345 status('creating project file (%s...) ' % options
.hhp_file
, options
)
347 f
= open(options
.hhp_file
,'w')
348 print >> f
, HHP_TEMPLATE
% settings
351 status('OK\n', options
)
354 def create_html_files(filenames
, options
):
356 Generates HTML files from reST source files using html writer.
358 returns names of generated files.
361 status('creating HTML files...\n',options
)
362 for i
in range(len(filenames
)):
363 filename
= filenames
[i
]
364 status('* %s (%d of %d)... ' % (filename
, i
+1, len(filenames
)), options
)
365 html_file
= txt2htmlfilename(filename
)
366 docutils
.core
.publish_file(source
=None, source_path
=filename
,
367 destination_path
=html_file
,
368 reader
=None, reader_name
='standalone',
369 parser
=None, parser_name
='restructuredtext',
370 writer
=None, writer_name
='html',
371 settings
=None, settings_spec
=None,
372 settings_overrides
=HTML_WRITER_OVERRIDES
)
373 html_files
.append(html_file
)
374 status('OK\n', options
)
378 def compile_chm(options
):
380 Creates .chm file for `options.hhp_file`.
382 Looks for 'hhc.exe' using PATH and the default location.
383 If hhc.exe can't be found, or there's an error executing it
384 then an `CHMCompileError` exception is raised.
388 status('compiling %s... ' % options
.chm_file
, options
)
390 # search for hhc.exe in PATH, and append default location (for good measure)
391 PATH
= os
.environ
['PATH'].split(os
.pathsep
)
392 exes
= filter(os
.path
.isfile
, [os
.path
.join(d
,'hhc.exe') for d
in PATH
])
393 exes
.append(CHM_COMPILER
)
395 r
= 1 # it appears that hhc.exe returns 1 on success
399 hhp_file
= '"' + os
.path
.abspath(options
.hhp_file
) + '"'
400 r
= os
.spawnv(os
.P_WAIT
, exe
, [hhp_file
,hhp_file
])
404 if e
.errno
== 2: # FNF
405 # if exe not found (CHM_COMPILER) just keep going
406 # as an error will be generated later
409 raise CHMCompileError(str(e
))
412 raise CHMCompileError('hhc.exe not found')
414 status('OK\n',options
)
416 # ---------------------------------------------------------------------------
423 # create option parser, and add rst2chm options
424 parser
= optparse
.OptionParser(usage
=USAGE
)
425 name
,description
,options
= SETTINGS_SPEC
426 group
= optparse
.OptionGroup(parser
, name
, description
)
427 for help_text
, option_strings
, kwargs
in options
:
428 group
.add_option(help=help_text
, *option_strings
, **kwargs
)
429 parser
.add_option_group(group
)
431 options
,args
= parser
.parse_args(args
)
433 args
= args
[1:] # remove script name
438 options
.chm_file
= args
[0] # output file is first arg
439 # make sure user puts output file as first argument
440 if options
.chm_file
[-4:].lower() != '.chm':
441 print >> sys
.stderr
, "\nERROR: `%s` does not end with `.chm`.\n" % \
443 print >> sys
.stderr
, "use `--help` for usage instructions"
446 # get filenames from command line and expand globs
449 filenames
.extend(glob
.glob(a
))
451 # use chm filename as base for hhc & hhp filenames
452 options
.hhc_file
= os
.path
.splitext(options
.chm_file
)[0] + '.hhc'
453 options
.hhp_file
= os
.path
.splitext(options
.chm_file
)[0] + '.hhp'
455 if options
.stylesheet
:
456 HTML_WRITER_OVERRIDES
['stylesheet_path'] = options
.stylesheet
457 HTML_WRITER_OVERRIDES
['embed_stylesheet'] = 'yes'
458 HTML_WRITER_OVERRIDES
['language_code'] = options
.language_code
460 if not options
.title
:
461 options
.title
= options
.chm_file
463 write_contents_file(filenames
,options
)
464 if options
.generate_html
:
465 html_files
= create_html_files(filenames
,options
)
467 html_files
= [txt2htmlfilename(f
) for f
in filenames
]
468 write_project_file(html_files
,options
)
473 status('cleaning up... ', options
)
474 if options
.generate_html
:
475 map(os
.remove
, html_files
)
476 os
.remove(options
.hhp_file
)
477 os
.remove(options
.hhc_file
)
478 status('OK\n' ,options
)
480 # ---------------------------------------------------------------------------
482 if __name__
== '__main__':
485 except CHMCompileError
,e
:
486 print >> sys
.stderr
, """
487 Error generating chm file.
491 Please ensure you have the HTML Help workshop installed and that hhc.exe \
492 is in the PATH, or specify the location of hhc.exe using the `--compiler` \
494 except KeyboardInterrupt,e
:
498 # :indentSize=4:lineSeparator=\r\n:maxLineLen=80:noTabs=true:tabSize=4:deepIndent=true: