1 # Authors: David Goodger
2 # Contact: goodger@python.org
5 # Copyright: This module has been placed in the public domain.
8 Calling the ``publish_*`` convenience functions (or instantiating a
9 `Publisher` object) with component names will result in default
10 behavior. For custom behavior (setting component options), create
11 custom component objects first, and pass *them* to
12 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
14 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
17 __docformat__
= 'reStructuredText'
21 from docutils
import __version__
, SettingsSpec
22 from docutils
import frontend
, io
, utils
, readers
, writers
23 from docutils
.frontend
import OptionParser
29 A facade encapsulating the high-level logic of a Docutils system.
32 def __init__(self
, reader
=None, parser
=None, writer
=None,
33 source
=None, source_class
=io
.FileInput
,
34 destination
=None, destination_class
=io
.FileOutput
,
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
43 """A `docutils.readers.Reader` instance."""
46 """A `docutils.parsers.Parser` instance."""
49 """A `docutils.writers.Writer` instance."""
52 """The source of input data, a `docutils.io.Input` instance."""
54 self
.source_class
= source_class
55 """The class for dynamically created source objects."""
57 self
.destination
= destination
58 """The destination for docutils output, a `docutils.io.Output`
61 self
.destination_class
= destination_class
62 """The class for dynamically created destination objects."""
64 self
.settings
= settings
65 """An object containing Docutils settings as instance attributes.
66 Set by `self.process_command_line()` or `self.get_settings()`."""
68 def set_reader(self
, reader_name
, parser
, parser_name
):
69 """Set `self.reader` by name."""
70 reader_class
= readers
.get_reader_class(reader_name
)
71 self
.reader
= reader_class(parser
, parser_name
)
72 self
.parser
= self
.reader
.parser
74 def set_writer(self
, writer_name
):
75 """Set `self.writer` by name."""
76 writer_class
= writers
.get_writer_class(writer_name
)
77 self
.writer
= writer_class()
79 def set_components(self
, reader_name
, parser_name
, writer_name
):
80 if self
.reader
is None:
81 self
.set_reader(reader_name
, self
.parser
, parser_name
)
82 if self
.parser
is None:
83 if self
.reader
.parser
is None:
84 self
.reader
.set_parser(parser_name
)
85 self
.parser
= self
.reader
.parser
86 if self
.writer
is None:
87 self
.set_writer(writer_name
)
89 def setup_option_parser(self
, usage
=None, description
=None,
90 settings_spec
=None, config_section
=None,
94 settings_spec
= SettingsSpec()
95 settings_spec
.config_section
= config_section
96 parts
= config_section
.split()
97 if len(parts
) > 1 and parts
[-1] == 'application':
98 settings_spec
.config_section_dependencies
= ['applications']
99 #@@@ Add self.source & self.destination to components in future?
100 option_parser
= OptionParser(
101 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
102 defaults
=defaults
, read_config_files
=1,
103 usage
=usage
, description
=description
)
106 def get_settings(self
, usage
=None, description
=None,
107 settings_spec
=None, config_section
=None, **defaults
):
109 Set and return default settings (overrides in `defaults` dict).
111 Set components first (`self.set_reader` & `self.set_writer`).
112 Explicitly setting `self.settings` disables command line option
113 processing from `self.publish()`.
115 option_parser
= self
.setup_option_parser(
116 usage
, description
, settings_spec
, config_section
, **defaults
)
117 self
.settings
= option_parser
.get_default_values()
120 def process_programmatic_settings(self
, settings_spec
,
123 if self
.settings
is None:
124 defaults
= (settings_overrides
or {}).copy()
125 # Propagate exceptions by default when used programmatically:
126 defaults
.setdefault('traceback', 1)
127 self
.get_settings(settings_spec
=settings_spec
,
128 config_section
=config_section
,
131 def process_command_line(self
, argv
=None, usage
=None, description
=None,
132 settings_spec
=None, config_section
=None,
135 Pass an empty list to `argv` to avoid reading `sys.argv` (the
138 Set components first (`self.set_reader` & `self.set_writer`).
140 option_parser
= self
.setup_option_parser(
141 usage
, description
, settings_spec
, config_section
, **defaults
)
144 self
.settings
= option_parser
.parse_args(argv
)
146 def set_io(self
, source_path
=None, destination_path
=None):
147 if self
.source
is None:
148 self
.set_source(source_path
=source_path
)
149 if self
.destination
is None:
150 self
.set_destination(destination_path
=destination_path
)
152 def set_source(self
, source
=None, source_path
=None):
153 if source_path
is None:
154 source_path
= self
.settings
._source
156 self
.settings
._source
= source_path
157 self
.source
= self
.source_class(
158 source
=source
, source_path
=source_path
,
159 encoding
=self
.settings
.input_encoding
)
161 def set_destination(self
, destination
=None, destination_path
=None):
162 if destination_path
is None:
163 destination_path
= self
.settings
._destination
165 self
.settings
._destination
= destination_path
166 self
.destination
= self
.destination_class(
167 destination
=destination
, destination_path
=destination_path
,
168 encoding
=self
.settings
.output_encoding
,
169 error_handler
=self
.settings
.output_encoding_error_handler
)
171 def apply_transforms(self
, document
):
172 document
.transformer
.populate_from_components(
173 (self
.source
, self
.reader
, self
.reader
.parser
, self
.writer
,
175 document
.transformer
.apply_transforms()
177 def publish(self
, argv
=None, usage
=None, description
=None,
178 settings_spec
=None, settings_overrides
=None,
179 config_section
=None, enable_exit_status
=None):
181 Process command line options and arguments (if `self.settings` not
182 already set), run `self.reader` and then `self.writer`. Return
183 `self.writer`'s output.
185 if self
.settings
is None:
186 self
.process_command_line(
187 argv
, usage
, description
, settings_spec
, config_section
,
188 **(settings_overrides
or {}))
193 document
= self
.reader
.read(self
.source
, self
.parser
,
195 self
.apply_transforms(document
)
196 output
= self
.writer
.write(document
, self
.destination
)
197 self
.writer
.assemble_parts()
198 except Exception, error
:
199 if self
.settings
.traceback
: # propagate exceptions?
200 self
.debugging_dumps(document
)
202 self
.report_Exception(error
)
204 self
.debugging_dumps(document
)
205 if (enable_exit_status
and document
206 and (document
.reporter
.max_level
207 >= self
.settings
.exit_status_level
)):
208 sys
.exit(document
.reporter
.max_level
+ 10)
213 def debugging_dumps(self
, document
):
216 if self
.settings
.dump_settings
:
217 print >>sys
.stderr
, '\n::: Runtime settings:'
218 print >>sys
.stderr
, pprint
.pformat(self
.settings
.__dict
__)
219 if self
.settings
.dump_internals
and document
:
220 print >>sys
.stderr
, '\n::: Document internals:'
221 print >>sys
.stderr
, pprint
.pformat(document
.__dict
__)
222 if self
.settings
.dump_transforms
and document
:
223 print >>sys
.stderr
, '\n::: Transforms applied:'
224 print >>sys
.stderr
, pprint
.pformat(document
.transformer
.applied
)
225 if self
.settings
.dump_pseudo_xml
and document
:
226 print >>sys
.stderr
, '\n::: Pseudo-XML:'
227 print >>sys
.stderr
, document
.pformat().encode(
228 'raw_unicode_escape')
230 def report_Exception(self
, error
):
231 if isinstance(error
, utils
.SystemMessage
):
232 self
.report_SystemMessage(error
)
233 elif isinstance(error
, UnicodeError):
234 self
.report_UnicodeError(error
)
236 print >>sys
.stderr
, '%s: %s' % (error
.__class
__.__name
__, error
)
237 print >>sys
.stderr
, ("""\
238 Exiting due to error. Use "--traceback" to diagnose.
239 Please report errors to <docutils-users@lists.sf.net>.
240 Include "--traceback" output, Docutils version (%s),
241 Python version (%s), your OS type & version, and the
242 command line used.""" % (__version__
, sys
.version
.split()[0]))
244 def report_SystemMessage(self
, error
):
245 print >>sys
.stderr
, ('Exiting due to level-%s (%s) system message.'
247 utils
.Reporter
.levels
[error
.level
]))
249 def report_UnicodeError(self
, error
):
253 'The specified output encoding (%s) cannot\n'
254 'handle all of the output.\n'
255 'Try setting "--output-encoding-error-handler" to\n'
257 '* "xmlcharrefreplace" (for HTML & XML output);\n'
258 % (error
.__class
__.__name
__, error
,
259 self
.settings
.output_encoding
))
261 data
= error
.object[error
.start
:error
.end
]
263 ' the output will contain "%s" and should be usable.\n'
264 '* "backslashreplace" (for other output formats, Python 2.3+);\n'
265 ' look for "%s" in the output.\n'
266 % (data
.encode('ascii', 'xmlcharrefreplace'),
267 data
.encode('ascii', 'backslashreplace')))
268 except AttributeError:
269 sys
.stderr
.write(' the output should be usable as-is.\n')
271 '* "replace"; look for "?" in the output.\n'
273 '"--output-encoding-error-handler" is currently set to "%s".\n'
275 'Exiting due to error. Use "--traceback" to diagnose.\n'
276 'If the advice above doesn\'t eliminate the error,\n'
277 'please report it to <docutils-users@lists.sf.net>.\n'
278 'Include "--traceback" output, Docutils version (%s),\n'
279 'Python version (%s), your OS type & version, and the\n'
280 'command line used.\n'
281 % (self
.settings
.output_encoding_error_handler
,
282 __version__
, sys
.version
.split()[0]))
284 default_usage
= '%prog [options] [<source> [<destination>]]'
285 default_description
= ('Reads from <source> (default is stdin) and writes to '
286 '<destination> (default is stdout).')
288 def publish_cmdline(reader
=None, reader_name
='standalone',
289 parser
=None, parser_name
='restructuredtext',
290 writer
=None, writer_name
='pseudoxml',
291 settings
=None, settings_spec
=None,
292 settings_overrides
=None, config_section
=None,
293 enable_exit_status
=1, argv
=None,
294 usage
=default_usage
, description
=default_description
):
296 Set up & run a `Publisher` for command-line-based file I/O (input and
297 output file paths taken automatically from the command line). Return the
298 encoded string output also.
300 Parameters: see `publish_programmatically` for the remainder.
302 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
303 - `usage`: Usage string, output if there's a problem parsing the command
305 - `description`: Program description, output for the "--help" option
306 (along with command-line option descriptions).
308 pub
= Publisher(reader
, parser
, writer
, settings
=settings
)
309 pub
.set_components(reader_name
, parser_name
, writer_name
)
310 output
= pub
.publish(
311 argv
, usage
, description
, settings_spec
, settings_overrides
,
312 config_section
=config_section
, enable_exit_status
=enable_exit_status
)
315 def publish_file(source
=None, source_path
=None,
316 destination
=None, destination_path
=None,
317 reader
=None, reader_name
='standalone',
318 parser
=None, parser_name
='restructuredtext',
319 writer
=None, writer_name
='pseudoxml',
320 settings
=None, settings_spec
=None, settings_overrides
=None,
321 config_section
=None, enable_exit_status
=None):
323 Set up & run a `Publisher` for programmatic use with file-like I/O.
324 Return the encoded string output also.
326 Parameters: see `publish_programmatically`.
328 output
, pub
= publish_programmatically(
329 source_class
=io
.FileInput
, source
=source
, source_path
=source_path
,
330 destination_class
=io
.FileOutput
,
331 destination
=destination
, destination_path
=destination_path
,
332 reader
=reader
, reader_name
=reader_name
,
333 parser
=parser
, parser_name
=parser_name
,
334 writer
=writer
, writer_name
=writer_name
,
335 settings
=settings
, settings_spec
=settings_spec
,
336 settings_overrides
=settings_overrides
,
337 config_section
=config_section
,
338 enable_exit_status
=enable_exit_status
)
341 def publish_string(source
, source_path
=None, destination_path
=None,
342 reader
=None, reader_name
='standalone',
343 parser
=None, parser_name
='restructuredtext',
344 writer
=None, writer_name
='pseudoxml',
345 settings
=None, settings_spec
=None,
346 settings_overrides
=None, config_section
=None,
347 enable_exit_status
=None):
349 Set up & run a `Publisher` for programmatic use with string I/O. Return
350 the encoded string or Unicode string output.
352 For encoded string output, be sure to set the 'output_encoding' setting to
353 the desired encoding. Set it to 'unicode' for unencoded Unicode string
354 output. Here's one way::
356 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
358 Similarly for Unicode string input (`source`)::
360 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
362 Parameters: see `publish_programmatically`.
364 output
, pub
= publish_programmatically(
365 source_class
=io
.StringInput
, source
=source
, source_path
=source_path
,
366 destination_class
=io
.StringOutput
,
367 destination
=None, destination_path
=destination_path
,
368 reader
=reader
, reader_name
=reader_name
,
369 parser
=parser
, parser_name
=parser_name
,
370 writer
=writer
, writer_name
=writer_name
,
371 settings
=settings
, settings_spec
=settings_spec
,
372 settings_overrides
=settings_overrides
,
373 config_section
=config_section
,
374 enable_exit_status
=enable_exit_status
)
377 def publish_parts(source
, source_path
=None, destination_path
=None,
378 reader
=None, reader_name
='standalone',
379 parser
=None, parser_name
='restructuredtext',
380 writer
=None, writer_name
='pseudoxml',
381 settings
=None, settings_spec
=None,
382 settings_overrides
=None, config_section
=None,
383 enable_exit_status
=None):
385 Set up & run a `Publisher`, and return a dictionary of document parts.
386 Dictionary keys are the names of parts, and values are Unicode strings;
387 encoding is up to the client. For programmatic use with string I/O.
389 For encoded string input, be sure to set the 'input_encoding' setting to
390 the desired encoding. Set it to 'unicode' for unencoded Unicode string
393 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
395 Parameters: see `publish_programmatically`.
397 output
, pub
= publish_programmatically(
398 source_class
=io
.StringInput
, source
=source
, source_path
=source_path
,
399 destination_class
=io
.StringOutput
,
400 destination
=None, destination_path
=destination_path
,
401 reader
=reader
, reader_name
=reader_name
,
402 parser
=parser
, parser_name
=parser_name
,
403 writer
=writer
, writer_name
=writer_name
,
404 settings
=settings
, settings_spec
=settings_spec
,
405 settings_overrides
=settings_overrides
,
406 config_section
=config_section
,
407 enable_exit_status
=enable_exit_status
)
408 return pub
.writer
.parts
410 def publish_programmatically(source_class
, source
, source_path
,
411 destination_class
, destination
, destination_path
,
415 settings
, settings_spec
,
416 settings_overrides
, config_section
,
419 Set up & run a `Publisher` for custom programmatic use. Return the
420 encoded string output and the Publisher object.
422 Applications should not need to call this function directly. If it does
423 seem to be necessary to call this function directly, please write to the
424 docutils-develop@lists.sourceforge.net mailing list.
428 * `source_class` **required**: The class for dynamically created source
429 objects. Typically `io.FileInput` or `io.StringInput`.
431 * `source`: Type depends on `source_class`:
433 - `io.FileInput`: Either a file-like object (must have 'read' and
434 'close' methods), or ``None`` (`source_path` is opened). If neither
435 `source` nor `source_path` are supplied, `sys.stdin` is used.
437 - `io.StringInput` **required**: The input string, either an encoded
438 8-bit string (set the 'input_encoding' setting to the correct
439 encoding) or a Unicode string (set the 'input_encoding' setting to
442 * `source_path`: Type depends on `source_class`:
444 - `io.FileInput`: Path to the input file, opened if no `source`
447 - `io.StringInput`: Optional. Path to the file or object that produced
448 `source`. Only used for diagnostic output.
450 * `destination_class` **required**: The class for dynamically created
451 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
453 * `destination`: Type depends on `destination_class`:
455 - `io.FileOutput`: Either a file-like object (must have 'write' and
456 'close' methods), or ``None`` (`destination_path` is opened). If
457 neither `destination` nor `destination_path` are supplied,
458 `sys.stdout` is used.
460 - `io.StringOutput`: Not used; pass ``None``.
462 * `destination_path`: Type depends on `destination_class`:
464 - `io.FileOutput`: Path to the output file. Opened if no `destination`
467 - `io.StringOutput`: Path to the file or object which will receive the
468 output; optional. Used for determining relative paths (stylesheets,
471 * `reader`: A `docutils.readers.Reader` object.
473 * `reader_name`: Name or alias of the Reader class to be instantiated if
474 no `reader` supplied.
476 * `parser`: A `docutils.parsers.Parser` object.
478 * `parser_name`: Name or alias of the Parser class to be instantiated if
479 no `parser` supplied.
481 * `writer`: A `docutils.writers.Writer` object.
483 * `writer_name`: Name or alias of the Writer class to be instantiated if
484 no `writer` supplied.
486 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
487 dotted-attribute access to runtime settings. It's the end result of the
488 `SettingsSpec`, config file, and option processing. If `settings` is
489 passed, it's assumed to be complete and no further setting/config/option
492 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
493 extra application-specific settings definitions independently of
494 components. In other words, the application becomes a component, and
495 its settings data is processed along with that of the other components.
496 Used only if no `settings` specified.
498 * `settings_overrides`: A dictionary containing application-specific
499 settings defaults that override the defaults of other components.
500 Used only if no `settings` specified.
502 * `config_section`: A string, the name of the configuration file section
503 for this application. Overrides the ``config_section`` attribute
504 defined by `settings_spec`. Used only if no `settings` specified.
506 * `enable_exit_status`: Boolean; enable exit status at end of processing?
508 pub
= Publisher(reader
, parser
, writer
, settings
=settings
,
509 source_class
=source_class
,
510 destination_class
=destination_class
)
511 pub
.set_components(reader_name
, parser_name
, writer_name
)
512 pub
.process_programmatic_settings(
513 settings_spec
, settings_overrides
, config_section
)
514 pub
.set_source(source
, source_path
)
515 pub
.set_destination(destination
, destination_path
)
516 output
= pub
.publish(enable_exit_status
=enable_exit_status
)