Fix bug 2896512 and add some more test cases.
[docutils.git] / docutils / core.py
blob2f355a532b58cf0cd94f50b91f6b9bffd47a8cf5
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 import docutils.readers.doctree
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.document = None
43 """The document tree (`docutils.nodes` objects)."""
45 self.reader = reader
46 """A `docutils.readers.Reader` instance."""
48 self.parser = parser
49 """A `docutils.parsers.Parser` instance."""
51 self.writer = writer
52 """A `docutils.writers.Writer` instance."""
54 for component in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self, component), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self, component), component, component))
61 self.source = source
62 """The source of input data, a `docutils.io.Input` instance."""
64 self.source_class = source_class
65 """The class for dynamically created source objects."""
67 self.destination = destination
68 """The destination for docutils output, a `docutils.io.Output`
69 instance."""
71 self.destination_class = destination_class
72 """The class for dynamically created destination objects."""
74 self.settings = settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 def set_reader(self, reader_name, parser, parser_name):
79 """Set `self.reader` by name."""
80 reader_class = readers.get_reader_class(reader_name)
81 self.reader = reader_class(parser, parser_name)
82 self.parser = self.reader.parser
84 def set_writer(self, writer_name):
85 """Set `self.writer` by name."""
86 writer_class = writers.get_writer_class(writer_name)
87 self.writer = writer_class()
89 def set_components(self, reader_name, parser_name, writer_name):
90 if self.reader is None:
91 self.set_reader(reader_name, self.parser, parser_name)
92 if self.parser is None:
93 if self.reader.parser is None:
94 self.reader.set_parser(parser_name)
95 self.parser = self.reader.parser
96 if self.writer is None:
97 self.set_writer(writer_name)
99 def setup_option_parser(self, usage=None, description=None,
100 settings_spec=None, config_section=None,
101 **defaults):
102 if config_section:
103 if not settings_spec:
104 settings_spec = SettingsSpec()
105 settings_spec.config_section = config_section
106 parts = config_section.split()
107 if len(parts) > 1 and parts[-1] == 'application':
108 settings_spec.config_section_dependencies = ['applications']
109 #@@@ Add self.source & self.destination to components in future?
110 option_parser = OptionParser(
111 components=(self.parser, self.reader, self.writer, settings_spec),
112 defaults=defaults, read_config_files=1,
113 usage=usage, description=description)
114 return option_parser
116 def get_settings(self, usage=None, description=None,
117 settings_spec=None, config_section=None, **defaults):
119 Set and return default settings (overrides in `defaults` dict).
121 Set components first (`self.set_reader` & `self.set_writer`).
122 Explicitly setting `self.settings` disables command line option
123 processing from `self.publish()`.
125 option_parser = self.setup_option_parser(
126 usage, description, settings_spec, config_section, **defaults)
127 self.settings = option_parser.get_default_values()
128 return self.settings
130 def process_programmatic_settings(self, settings_spec,
131 settings_overrides,
132 config_section):
133 if self.settings is None:
134 defaults = (settings_overrides or {}).copy()
135 # Propagate exceptions by default when used programmatically:
136 defaults.setdefault('traceback', 1)
137 self.get_settings(settings_spec=settings_spec,
138 config_section=config_section,
139 **defaults)
141 def process_command_line(self, argv=None, usage=None, description=None,
142 settings_spec=None, config_section=None,
143 **defaults):
145 Pass an empty list to `argv` to avoid reading `sys.argv` (the
146 default).
148 Set components first (`self.set_reader` & `self.set_writer`).
150 option_parser = self.setup_option_parser(
151 usage, description, settings_spec, config_section, **defaults)
152 if argv is None:
153 argv = sys.argv[1:]
154 self.settings = option_parser.parse_args(argv)
156 def set_io(self, source_path=None, destination_path=None):
157 if self.source is None:
158 self.set_source(source_path=source_path)
159 if self.destination is None:
160 self.set_destination(destination_path=destination_path)
162 def set_source(self, source=None, source_path=None):
163 if source_path is None:
164 source_path = self.settings._source
165 else:
166 self.settings._source = source_path
167 self.source = self.source_class(
168 source=source, source_path=source_path,
169 encoding=self.settings.input_encoding)
171 def set_destination(self, destination=None, destination_path=None):
172 if destination_path is None:
173 destination_path = self.settings._destination
174 else:
175 self.settings._destination = destination_path
176 self.destination = self.destination_class(
177 destination=destination, destination_path=destination_path,
178 encoding=self.settings.output_encoding,
179 error_handler=self.settings.output_encoding_error_handler)
181 def apply_transforms(self):
182 self.document.transformer.populate_from_components(
183 (self.source, self.reader, self.reader.parser, self.writer,
184 self.destination))
185 self.document.transformer.apply_transforms()
187 def publish(self, argv=None, usage=None, description=None,
188 settings_spec=None, settings_overrides=None,
189 config_section=None, enable_exit_status=None):
191 Process command line options and arguments (if `self.settings` not
192 already set), run `self.reader` and then `self.writer`. Return
193 `self.writer`'s output.
195 exit = None
196 try:
197 if self.settings is None:
198 self.process_command_line(
199 argv, usage, description, settings_spec, config_section,
200 **(settings_overrides or {}))
201 self.set_io()
202 self.document = self.reader.read(self.source, self.parser,
203 self.settings)
204 self.apply_transforms()
205 output = self.writer.write(self.document, self.destination)
206 self.writer.assemble_parts()
207 except SystemExit, error:
208 exit = 1
209 exit_status = error.code
210 except Exception, error:
211 if not self.settings: # exception too early to report nicely
212 raise
213 if self.settings.traceback: # Propagate exceptions?
214 self.debugging_dumps()
215 raise
216 self.report_Exception(error)
217 exit = 1
218 exit_status = 1
219 self.debugging_dumps()
220 if (enable_exit_status and self.document
221 and (self.document.reporter.max_level
222 >= self.settings.exit_status_level)):
223 sys.exit(self.document.reporter.max_level + 10)
224 elif exit:
225 sys.exit(exit_status)
226 return output
228 def debugging_dumps(self):
229 if not self.document:
230 return
231 if self.settings.dump_settings:
232 print >>sys.stderr, '\n::: Runtime settings:'
233 print >>sys.stderr, pprint.pformat(self.settings.__dict__)
234 if self.settings.dump_internals:
235 print >>sys.stderr, '\n::: Document internals:'
236 print >>sys.stderr, pprint.pformat(self.document.__dict__)
237 if self.settings.dump_transforms:
238 print >>sys.stderr, '\n::: Transforms applied:'
239 print >>sys.stderr, (' (priority, transform class, '
240 'pending node details, keyword args)')
241 print >>sys.stderr, pprint.pformat(
242 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
243 pending and pending.details, kwargs)
244 for priority, xclass, pending, kwargs
245 in self.document.transformer.applied])
246 if self.settings.dump_pseudo_xml:
247 print >>sys.stderr, '\n::: Pseudo-XML:'
248 print >>sys.stderr, self.document.pformat().encode(
249 'raw_unicode_escape')
251 def report_Exception(self, error):
252 if isinstance(error, utils.SystemMessage):
253 self.report_SystemMessage(error)
254 elif isinstance(error, UnicodeEncodeError):
255 self.report_UnicodeError(error)
256 else:
257 print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error)
258 print >>sys.stderr, ("""\
259 Exiting due to error. Use "--traceback" to diagnose.
260 Please report errors to <docutils-users@lists.sf.net>.
261 Include "--traceback" output, Docutils version (%s [%s]),
262 Python version (%s), your OS type & version, and the
263 command line used.""" % (__version__, __version_details__,
264 sys.version.split()[0]))
266 def report_SystemMessage(self, error):
267 print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
268 % (error.level,
269 utils.Reporter.levels[error.level]))
271 def report_UnicodeError(self, error):
272 sys.stderr.write(
273 '%s: %s\n'
274 '\n'
275 'The specified output encoding (%s) cannot\n'
276 'handle all of the output.\n'
277 'Try setting "--output-encoding-error-handler" to\n'
278 '\n'
279 '* "xmlcharrefreplace" (for HTML & XML output);\n'
280 % (error.__class__.__name__, error,
281 self.settings.output_encoding))
282 try:
283 data = error.object[error.start:error.end]
284 sys.stderr.write(
285 ' the output will contain "%s" and should be usable.\n'
286 '* "backslashreplace" (for other output formats, Python 2.3+);\n'
287 ' look for "%s" in the output.\n'
288 % (data.encode('ascii', 'xmlcharrefreplace'),
289 data.encode('ascii', 'backslashreplace')))
290 except AttributeError:
291 sys.stderr.write(' the output should be usable as-is.\n')
292 sys.stderr.write(
293 '* "replace"; look for "?" in the output.\n'
294 '\n'
295 '"--output-encoding-error-handler" is currently set to "%s".\n'
296 '\n'
297 'Exiting due to error. Use "--traceback" to diagnose.\n'
298 'If the advice above doesn\'t eliminate the error,\n'
299 'please report it to <docutils-users@lists.sf.net>.\n'
300 'Include "--traceback" output, Docutils version (%s),\n'
301 'Python version (%s), your OS type & version, and the\n'
302 'command line used.\n'
303 % (self.settings.output_encoding_error_handler,
304 __version__, sys.version.split()[0]))
306 default_usage = '%prog [options] [<source> [<destination>]]'
307 default_description = ('Reads from <source> (default is stdin) and writes to '
308 '<destination> (default is stdout). See '
309 '<http://docutils.sf.net/docs/user/config.html> for '
310 'the full reference.')
312 def publish_cmdline(reader=None, reader_name='standalone',
313 parser=None, parser_name='restructuredtext',
314 writer=None, writer_name='pseudoxml',
315 settings=None, settings_spec=None,
316 settings_overrides=None, config_section=None,
317 enable_exit_status=1, argv=None,
318 usage=default_usage, description=default_description):
320 Set up & run a `Publisher` for command-line-based file I/O (input and
321 output file paths taken automatically from the command line). Return the
322 encoded string output also.
324 Parameters: see `publish_programmatically` for the remainder.
326 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
327 - `usage`: Usage string, output if there's a problem parsing the command
328 line.
329 - `description`: Program description, output for the "--help" option
330 (along with command-line option descriptions).
332 pub = Publisher(reader, parser, writer, settings=settings)
333 pub.set_components(reader_name, parser_name, writer_name)
334 output = pub.publish(
335 argv, usage, description, settings_spec, settings_overrides,
336 config_section=config_section, enable_exit_status=enable_exit_status)
337 return output
339 def publish_file(source=None, source_path=None,
340 destination=None, destination_path=None,
341 reader=None, reader_name='standalone',
342 parser=None, parser_name='restructuredtext',
343 writer=None, writer_name='pseudoxml',
344 settings=None, settings_spec=None, settings_overrides=None,
345 config_section=None, enable_exit_status=None):
347 Set up & run a `Publisher` for programmatic use with file-like I/O.
348 Return the encoded string output also.
350 Parameters: see `publish_programmatically`.
352 output, pub = publish_programmatically(
353 source_class=io.FileInput, source=source, source_path=source_path,
354 destination_class=io.FileOutput,
355 destination=destination, destination_path=destination_path,
356 reader=reader, reader_name=reader_name,
357 parser=parser, parser_name=parser_name,
358 writer=writer, writer_name=writer_name,
359 settings=settings, settings_spec=settings_spec,
360 settings_overrides=settings_overrides,
361 config_section=config_section,
362 enable_exit_status=enable_exit_status)
363 return output
365 def publish_string(source, source_path=None, destination_path=None,
366 reader=None, reader_name='standalone',
367 parser=None, parser_name='restructuredtext',
368 writer=None, writer_name='pseudoxml',
369 settings=None, settings_spec=None,
370 settings_overrides=None, config_section=None,
371 enable_exit_status=None):
373 Set up & run a `Publisher` for programmatic use with string I/O. Return
374 the encoded string or Unicode string output.
376 For encoded string output, be sure to set the 'output_encoding' setting to
377 the desired encoding. Set it to 'unicode' for unencoded Unicode string
378 output. Here's one way::
380 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
382 Similarly for Unicode string input (`source`)::
384 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
386 Parameters: see `publish_programmatically`.
388 output, pub = publish_programmatically(
389 source_class=io.StringInput, source=source, source_path=source_path,
390 destination_class=io.StringOutput,
391 destination=None, destination_path=destination_path,
392 reader=reader, reader_name=reader_name,
393 parser=parser, parser_name=parser_name,
394 writer=writer, writer_name=writer_name,
395 settings=settings, settings_spec=settings_spec,
396 settings_overrides=settings_overrides,
397 config_section=config_section,
398 enable_exit_status=enable_exit_status)
399 return output
401 def publish_parts(source, source_path=None, source_class=io.StringInput,
402 destination_path=None,
403 reader=None, reader_name='standalone',
404 parser=None, parser_name='restructuredtext',
405 writer=None, writer_name='pseudoxml',
406 settings=None, settings_spec=None,
407 settings_overrides=None, config_section=None,
408 enable_exit_status=None):
410 Set up & run a `Publisher`, and return a dictionary of document parts.
411 Dictionary keys are the names of parts, and values are Unicode strings;
412 encoding is up to the client. For programmatic use with string I/O.
414 For encoded string input, be sure to set the 'input_encoding' setting to
415 the desired encoding. Set it to 'unicode' for unencoded Unicode string
416 input. Here's how::
418 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
420 Parameters: see `publish_programmatically`.
422 output, pub = publish_programmatically(
423 source=source, source_path=source_path, source_class=source_class,
424 destination_class=io.StringOutput,
425 destination=None, destination_path=destination_path,
426 reader=reader, reader_name=reader_name,
427 parser=parser, parser_name=parser_name,
428 writer=writer, writer_name=writer_name,
429 settings=settings, settings_spec=settings_spec,
430 settings_overrides=settings_overrides,
431 config_section=config_section,
432 enable_exit_status=enable_exit_status)
433 return pub.writer.parts
435 def publish_doctree(source, source_path=None,
436 source_class=io.StringInput,
437 reader=None, reader_name='standalone',
438 parser=None, parser_name='restructuredtext',
439 settings=None, settings_spec=None,
440 settings_overrides=None, config_section=None,
441 enable_exit_status=None):
443 Set up & run a `Publisher` for programmatic use with string I/O.
444 Return the document tree.
446 For encoded string input, be sure to set the 'input_encoding' setting to
447 the desired encoding. Set it to 'unicode' for unencoded Unicode string
448 input. Here's one way::
450 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
452 Parameters: see `publish_programmatically`.
454 pub = Publisher(reader=reader, parser=parser, writer=None,
455 settings=settings,
456 source_class=source_class,
457 destination_class=io.NullOutput)
458 pub.set_components(reader_name, parser_name, 'null')
459 pub.process_programmatic_settings(
460 settings_spec, settings_overrides, config_section)
461 pub.set_source(source, source_path)
462 pub.set_destination(None, None)
463 output = pub.publish(enable_exit_status=enable_exit_status)
464 return pub.document
466 def publish_from_doctree(document, destination_path=None,
467 writer=None, writer_name='pseudoxml',
468 settings=None, settings_spec=None,
469 settings_overrides=None, config_section=None,
470 enable_exit_status=None):
472 Set up & run a `Publisher` to render from an existing document
473 tree data structure, for programmatic use with string I/O. Return
474 the encoded string output.
476 Note that document.settings is overridden; if you want to use the settings
477 of the original `document`, pass settings=document.settings.
479 Also, new document.transformer and document.reporter objects are
480 generated.
482 For encoded string output, be sure to set the 'output_encoding' setting to
483 the desired encoding. Set it to 'unicode' for unencoded Unicode string
484 output. Here's one way::
486 publish_from_doctree(
487 ..., settings_overrides={'output_encoding': 'unicode'})
489 Parameters: `document` is a `docutils.nodes.document` object, an existing
490 document tree.
492 Other parameters: see `publish_programmatically`.
494 reader = docutils.readers.doctree.Reader(parser_name='null')
495 pub = Publisher(reader, None, writer,
496 source=io.DocTreeInput(document),
497 destination_class=io.StringOutput, settings=settings)
498 if not writer and writer_name:
499 pub.set_writer(writer_name)
500 pub.process_programmatic_settings(
501 settings_spec, settings_overrides, config_section)
502 pub.set_destination(None, destination_path)
503 return pub.publish(enable_exit_status=enable_exit_status)
505 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
506 parser=None, parser_name='restructuredtext',
507 writer=None, writer_name='pseudoxml',
508 settings=None, settings_spec=None,
509 settings_overrides=None, config_section=None,
510 enable_exit_status=1, argv=None,
511 usage=default_usage, description=default_description,
512 destination=None, destination_class=io.BinaryFileOutput
515 Set up & run a `Publisher` for command-line-based file I/O (input and
516 output file paths taken automatically from the command line). Return the
517 encoded string output also.
519 This is just like publish_cmdline, except that it uses
520 io.BinaryFileOutput instead of io.FileOutput.
522 Parameters: see `publish_programmatically` for the remainder.
524 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
525 - `usage`: Usage string, output if there's a problem parsing the command
526 line.
527 - `description`: Program description, output for the "--help" option
528 (along with command-line option descriptions).
530 pub = Publisher(reader, parser, writer, settings=settings,
531 destination_class=destination_class)
532 pub.set_components(reader_name, parser_name, writer_name)
533 output = pub.publish(
534 argv, usage, description, settings_spec, settings_overrides,
535 config_section=config_section, enable_exit_status=enable_exit_status)
536 return output
538 def publish_programmatically(source_class, source, source_path,
539 destination_class, destination, destination_path,
540 reader, reader_name,
541 parser, parser_name,
542 writer, writer_name,
543 settings, settings_spec,
544 settings_overrides, config_section,
545 enable_exit_status):
547 Set up & run a `Publisher` for custom programmatic use. Return the
548 encoded string output and the Publisher object.
550 Applications should not need to call this function directly. If it does
551 seem to be necessary to call this function directly, please write to the
552 Docutils-develop mailing list
553 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
555 Parameters:
557 * `source_class` **required**: The class for dynamically created source
558 objects. Typically `io.FileInput` or `io.StringInput`.
560 * `source`: Type depends on `source_class`:
562 - If `source_class` is `io.FileInput`: Either a file-like object
563 (must have 'read' and 'close' methods), or ``None``
564 (`source_path` is opened). If neither `source` nor
565 `source_path` are supplied, `sys.stdin` is used.
567 - If `source_class` is `io.StringInput` **required**: The input
568 string, either an encoded 8-bit string (set the
569 'input_encoding' setting to the correct encoding) or a Unicode
570 string (set the 'input_encoding' setting to 'unicode').
572 * `source_path`: Type depends on `source_class`:
574 - `io.FileInput`: Path to the input file, opened if no `source`
575 supplied.
577 - `io.StringInput`: Optional. Path to the file or object that produced
578 `source`. Only used for diagnostic output.
580 * `destination_class` **required**: The class for dynamically created
581 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
583 * `destination`: Type depends on `destination_class`:
585 - `io.FileOutput`: Either a file-like object (must have 'write' and
586 'close' methods), or ``None`` (`destination_path` is opened). If
587 neither `destination` nor `destination_path` are supplied,
588 `sys.stdout` is used.
590 - `io.StringOutput`: Not used; pass ``None``.
592 * `destination_path`: Type depends on `destination_class`:
594 - `io.FileOutput`: Path to the output file. Opened if no `destination`
595 supplied.
597 - `io.StringOutput`: Path to the file or object which will receive the
598 output; optional. Used for determining relative paths (stylesheets,
599 source links, etc.).
601 * `reader`: A `docutils.readers.Reader` object.
603 * `reader_name`: Name or alias of the Reader class to be instantiated if
604 no `reader` supplied.
606 * `parser`: A `docutils.parsers.Parser` object.
608 * `parser_name`: Name or alias of the Parser class to be instantiated if
609 no `parser` supplied.
611 * `writer`: A `docutils.writers.Writer` object.
613 * `writer_name`: Name or alias of the Writer class to be instantiated if
614 no `writer` supplied.
616 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
617 dotted-attribute access to runtime settings. It's the end result of the
618 `SettingsSpec`, config file, and option processing. If `settings` is
619 passed, it's assumed to be complete and no further setting/config/option
620 processing is done.
622 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
623 extra application-specific settings definitions independently of
624 components. In other words, the application becomes a component, and
625 its settings data is processed along with that of the other components.
626 Used only if no `settings` specified.
628 * `settings_overrides`: A dictionary containing application-specific
629 settings defaults that override the defaults of other components.
630 Used only if no `settings` specified.
632 * `config_section`: A string, the name of the configuration file section
633 for this application. Overrides the ``config_section`` attribute
634 defined by `settings_spec`. Used only if no `settings` specified.
636 * `enable_exit_status`: Boolean; enable exit status at end of processing?
638 pub = Publisher(reader, parser, writer, settings=settings,
639 source_class=source_class,
640 destination_class=destination_class)
641 pub.set_components(reader_name, parser_name, writer_name)
642 pub.process_programmatic_settings(
643 settings_spec, settings_overrides, config_section)
644 pub.set_source(source, source_path)
645 pub.set_destination(destination, destination_path)
646 output = pub.publish(enable_exit_status=enable_exit_status)
647 return output, pub