Add test case for issue 2926161 (commented out)
[docutils.git] / docutils / core.py
blob19d57bb17e9aaf5e626c753286b0b7ef718fb0a7
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 from docutils.error_reporting import ErrorOutput, ErrorString
24 import docutils.readers.doctree
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.document = None
43 """The document tree (`docutils.nodes` objects)."""
45 self.reader = reader
46 """A `docutils.readers.Reader` instance."""
48 self.parser = parser
49 """A `docutils.parsers.Parser` instance."""
51 self.writer = writer
52 """A `docutils.writers.Writer` instance."""
54 for component in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self, component), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self, component), component, component))
61 self.source = source
62 """The source of input data, a `docutils.io.Input` instance."""
64 self.source_class = source_class
65 """The class for dynamically created source objects."""
67 self.destination = destination
68 """The destination for docutils output, a `docutils.io.Output`
69 instance."""
71 self.destination_class = destination_class
72 """The class for dynamically created destination objects."""
74 self.settings = settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 self._stderr = ErrorOutput()
80 def set_reader(self, reader_name, parser, parser_name):
81 """Set `self.reader` by name."""
82 reader_class = readers.get_reader_class(reader_name)
83 self.reader = reader_class(parser, parser_name)
84 self.parser = self.reader.parser
86 def set_writer(self, writer_name):
87 """Set `self.writer` by name."""
88 writer_class = writers.get_writer_class(writer_name)
89 self.writer = writer_class()
91 def set_components(self, reader_name, parser_name, writer_name):
92 if self.reader is None:
93 self.set_reader(reader_name, self.parser, parser_name)
94 if self.parser is None:
95 if self.reader.parser is None:
96 self.reader.set_parser(parser_name)
97 self.parser = self.reader.parser
98 if self.writer is None:
99 self.set_writer(writer_name)
101 def setup_option_parser(self, usage=None, description=None,
102 settings_spec=None, config_section=None,
103 **defaults):
104 if config_section:
105 if not settings_spec:
106 settings_spec = SettingsSpec()
107 settings_spec.config_section = config_section
108 parts = config_section.split()
109 if len(parts) > 1 and parts[-1] == 'application':
110 settings_spec.config_section_dependencies = ['applications']
111 #@@@ Add self.source & self.destination to components in future?
112 option_parser = OptionParser(
113 components=(self.parser, self.reader, self.writer, settings_spec),
114 defaults=defaults, read_config_files=1,
115 usage=usage, description=description)
116 return option_parser
118 def get_settings(self, usage=None, description=None,
119 settings_spec=None, config_section=None, **defaults):
121 Set and return default settings (overrides in `defaults` dict).
123 Set components first (`self.set_reader` & `self.set_writer`).
124 Explicitly setting `self.settings` disables command line option
125 processing from `self.publish()`.
127 option_parser = self.setup_option_parser(
128 usage, description, settings_spec, config_section, **defaults)
129 self.settings = option_parser.get_default_values()
130 return self.settings
132 def process_programmatic_settings(self, settings_spec,
133 settings_overrides,
134 config_section):
135 if self.settings is None:
136 defaults = (settings_overrides or {}).copy()
137 # Propagate exceptions by default when used programmatically:
138 defaults.setdefault('traceback', 1)
139 self.get_settings(settings_spec=settings_spec,
140 config_section=config_section,
141 **defaults)
143 def process_command_line(self, argv=None, usage=None, description=None,
144 settings_spec=None, config_section=None,
145 **defaults):
147 Pass an empty list to `argv` to avoid reading `sys.argv` (the
148 default).
150 Set components first (`self.set_reader` & `self.set_writer`).
152 option_parser = self.setup_option_parser(
153 usage, description, settings_spec, config_section, **defaults)
154 if argv is None:
155 argv = sys.argv[1:]
156 # converting to Unicode (Python 3 does this automatically):
157 if sys.version_info < (3,0):
158 # TODO: make this failsafe and reversible
159 argv_encoding = (sys.stdin.encoding or
160 frontend.locale_encoding or 'ascii')
161 argv = [a.decode(argv_encoding) for a in argv]
162 self.settings = option_parser.parse_args(argv)
164 def set_io(self, source_path=None, destination_path=None):
165 if self.source is None:
166 self.set_source(source_path=source_path)
167 if self.destination is None:
168 self.set_destination(destination_path=destination_path)
170 def set_source(self, source=None, source_path=None):
171 if source_path is None:
172 source_path = self.settings._source
173 else:
174 self.settings._source = source_path
175 self.source = self.source_class(
176 source=source, source_path=source_path,
177 encoding=self.settings.input_encoding)
179 def set_destination(self, destination=None, destination_path=None):
180 if destination_path is None:
181 destination_path = self.settings._destination
182 else:
183 self.settings._destination = destination_path
184 self.destination = self.destination_class(
185 destination=destination, destination_path=destination_path,
186 encoding=self.settings.output_encoding,
187 error_handler=self.settings.output_encoding_error_handler)
189 def apply_transforms(self):
190 self.document.transformer.populate_from_components(
191 (self.source, self.reader, self.reader.parser, self.writer,
192 self.destination))
193 self.document.transformer.apply_transforms()
195 def publish(self, argv=None, usage=None, description=None,
196 settings_spec=None, settings_overrides=None,
197 config_section=None, enable_exit_status=None):
199 Process command line options and arguments (if `self.settings` not
200 already set), run `self.reader` and then `self.writer`. Return
201 `self.writer`'s output.
203 exit = None
204 try:
205 if self.settings is None:
206 self.process_command_line(
207 argv, usage, description, settings_spec, config_section,
208 **(settings_overrides or {}))
209 self.set_io()
210 self.document = self.reader.read(self.source, self.parser,
211 self.settings)
212 self.apply_transforms()
213 output = self.writer.write(self.document, self.destination)
214 self.writer.assemble_parts()
215 except SystemExit, error:
216 exit = 1
217 exit_status = error.code
218 except Exception, error:
219 if not self.settings: # exception too early to report nicely
220 raise
221 if self.settings.traceback: # Propagate exceptions?
222 self.debugging_dumps()
223 raise
224 self.report_Exception(error)
225 exit = 1
226 exit_status = 1
227 self.debugging_dumps()
228 if (enable_exit_status and self.document
229 and (self.document.reporter.max_level
230 >= self.settings.exit_status_level)):
231 sys.exit(self.document.reporter.max_level + 10)
232 elif exit:
233 sys.exit(exit_status)
234 return output
236 def debugging_dumps(self):
237 if not self.document:
238 return
239 if self.settings.dump_settings:
240 print >>self._stderr, '\n::: Runtime settings:'
241 print >>self._stderr, pprint.pformat(self.settings.__dict__)
242 if self.settings.dump_internals:
243 print >>self._stderr, '\n::: Document internals:'
244 print >>self._stderr, pprint.pformat(self.document.__dict__)
245 if self.settings.dump_transforms:
246 print >>self._stderr, '\n::: Transforms applied:'
247 print >>self._stderr, (' (priority, transform class, '
248 'pending node details, keyword args)')
249 print >>self._stderr, pprint.pformat(
250 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
251 pending and pending.details, kwargs)
252 for priority, xclass, pending, kwargs
253 in self.document.transformer.applied])
254 if self.settings.dump_pseudo_xml:
255 print >>self._stderr, '\n::: Pseudo-XML:'
256 print >>self._stderr, self.document.pformat().encode(
257 'raw_unicode_escape')
259 def report_Exception(self, error):
260 if isinstance(error, utils.SystemMessage):
261 self.report_SystemMessage(error)
262 elif isinstance(error, UnicodeEncodeError):
263 self.report_UnicodeError(error)
264 else:
265 print >>self._stderr, u'%s' % ErrorString(error)
266 print >>self._stderr, ("""\
267 Exiting due to error. Use "--traceback" to diagnose.
268 Please report errors to <docutils-users@lists.sf.net>.
269 Include "--traceback" output, Docutils version (%s [%s]),
270 Python version (%s), your OS type & version, and the
271 command line used.""" % (__version__, __version_details__,
272 sys.version.split()[0]))
274 def report_SystemMessage(self, error):
275 print >>self._stderr, ('Exiting due to level-%s (%s) system message.'
276 % (error.level,
277 utils.Reporter.levels[error.level]))
279 def report_UnicodeError(self, error):
280 data = error.object[error.start:error.end]
281 self._stderr.write(
282 '%s\n'
283 '\n'
284 'The specified output encoding (%s) cannot\n'
285 'handle all of the output.\n'
286 'Try setting "--output-encoding-error-handler" to\n'
287 '\n'
288 '* "xmlcharrefreplace" (for HTML & XML output);\n'
289 ' the output will contain "%s" and should be usable.\n'
290 '* "backslashreplace" (for other output formats);\n'
291 ' look for "%s" in the output.\n'
292 '* "replace"; look for "?" in the output.\n'
293 '\n'
294 '"--output-encoding-error-handler" is currently set to "%s".\n'
295 '\n'
296 'Exiting due to error. Use "--traceback" to diagnose.\n'
297 'If the advice above doesn\'t eliminate the error,\n'
298 'please report it to <docutils-users@lists.sf.net>.\n'
299 'Include "--traceback" output, Docutils version (%s),\n'
300 'Python version (%s), your OS type & version, and the\n'
301 'command line used.\n'
302 % (ErrorString(error),
303 self.settings.output_encoding,
304 data.encode('ascii', 'xmlcharrefreplace'),
305 data.encode('ascii', 'backslashreplace'),
306 self.settings.output_encoding_error_handler,
307 __version__, sys.version.split()[0]))
309 default_usage = '%prog [options] [<source> [<destination>]]'
310 default_description = ('Reads from <source> (default is stdin) and writes to '
311 '<destination> (default is stdout). See '
312 '<http://docutils.sf.net/docs/user/config.html> for '
313 'the full reference.')
315 def publish_cmdline(reader=None, reader_name='standalone',
316 parser=None, parser_name='restructuredtext',
317 writer=None, writer_name='pseudoxml',
318 settings=None, settings_spec=None,
319 settings_overrides=None, config_section=None,
320 enable_exit_status=1, argv=None,
321 usage=default_usage, description=default_description):
323 Set up & run a `Publisher` for command-line-based file I/O (input and
324 output file paths taken automatically from the command line). Return the
325 encoded string output also.
327 Parameters: see `publish_programmatically` for the remainder.
329 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
330 - `usage`: Usage string, output if there's a problem parsing the command
331 line.
332 - `description`: Program description, output for the "--help" option
333 (along with command-line option descriptions).
335 pub = Publisher(reader, parser, writer, settings=settings)
336 pub.set_components(reader_name, parser_name, writer_name)
337 output = pub.publish(
338 argv, usage, description, settings_spec, settings_overrides,
339 config_section=config_section, enable_exit_status=enable_exit_status)
340 return output
342 def publish_file(source=None, source_path=None,
343 destination=None, destination_path=None,
344 reader=None, reader_name='standalone',
345 parser=None, parser_name='restructuredtext',
346 writer=None, writer_name='pseudoxml',
347 settings=None, settings_spec=None, settings_overrides=None,
348 config_section=None, enable_exit_status=None):
350 Set up & run a `Publisher` for programmatic use with file-like I/O.
351 Return the encoded string output also.
353 Parameters: see `publish_programmatically`.
355 output, pub = publish_programmatically(
356 source_class=io.FileInput, source=source, source_path=source_path,
357 destination_class=io.FileOutput,
358 destination=destination, destination_path=destination_path,
359 reader=reader, reader_name=reader_name,
360 parser=parser, parser_name=parser_name,
361 writer=writer, writer_name=writer_name,
362 settings=settings, settings_spec=settings_spec,
363 settings_overrides=settings_overrides,
364 config_section=config_section,
365 enable_exit_status=enable_exit_status)
366 return output
368 def publish_string(source, source_path=None, destination_path=None,
369 reader=None, reader_name='standalone',
370 parser=None, parser_name='restructuredtext',
371 writer=None, writer_name='pseudoxml',
372 settings=None, settings_spec=None,
373 settings_overrides=None, config_section=None,
374 enable_exit_status=None):
376 Set up & run a `Publisher` for programmatic use with string I/O. Return
377 the encoded string or Unicode string output.
379 For encoded string output, be sure to set the 'output_encoding' setting to
380 the desired encoding. Set it to 'unicode' for unencoded Unicode string
381 output. Here's one way::
383 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
385 Similarly for Unicode string input (`source`)::
387 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
389 Parameters: see `publish_programmatically`.
391 output, pub = publish_programmatically(
392 source_class=io.StringInput, source=source, source_path=source_path,
393 destination_class=io.StringOutput,
394 destination=None, destination_path=destination_path,
395 reader=reader, reader_name=reader_name,
396 parser=parser, parser_name=parser_name,
397 writer=writer, writer_name=writer_name,
398 settings=settings, settings_spec=settings_spec,
399 settings_overrides=settings_overrides,
400 config_section=config_section,
401 enable_exit_status=enable_exit_status)
402 return output
404 def publish_parts(source, source_path=None, source_class=io.StringInput,
405 destination_path=None,
406 reader=None, reader_name='standalone',
407 parser=None, parser_name='restructuredtext',
408 writer=None, writer_name='pseudoxml',
409 settings=None, settings_spec=None,
410 settings_overrides=None, config_section=None,
411 enable_exit_status=None):
413 Set up & run a `Publisher`, and return a dictionary of document parts.
414 Dictionary keys are the names of parts, and values are Unicode strings;
415 encoding is up to the client. For programmatic use with string I/O.
417 For encoded string input, be sure to set the 'input_encoding' setting to
418 the desired encoding. Set it to 'unicode' for unencoded Unicode string
419 input. Here's how::
421 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
423 Parameters: see `publish_programmatically`.
425 output, pub = publish_programmatically(
426 source=source, source_path=source_path, source_class=source_class,
427 destination_class=io.StringOutput,
428 destination=None, destination_path=destination_path,
429 reader=reader, reader_name=reader_name,
430 parser=parser, parser_name=parser_name,
431 writer=writer, writer_name=writer_name,
432 settings=settings, settings_spec=settings_spec,
433 settings_overrides=settings_overrides,
434 config_section=config_section,
435 enable_exit_status=enable_exit_status)
436 return pub.writer.parts
438 def publish_doctree(source, source_path=None,
439 source_class=io.StringInput,
440 reader=None, reader_name='standalone',
441 parser=None, parser_name='restructuredtext',
442 settings=None, settings_spec=None,
443 settings_overrides=None, config_section=None,
444 enable_exit_status=None):
446 Set up & run a `Publisher` for programmatic use with string I/O.
447 Return the document tree.
449 For encoded string input, be sure to set the 'input_encoding' setting to
450 the desired encoding. Set it to 'unicode' for unencoded Unicode string
451 input. Here's one way::
453 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
455 Parameters: see `publish_programmatically`.
457 pub = Publisher(reader=reader, parser=parser, writer=None,
458 settings=settings,
459 source_class=source_class,
460 destination_class=io.NullOutput)
461 pub.set_components(reader_name, parser_name, 'null')
462 pub.process_programmatic_settings(
463 settings_spec, settings_overrides, config_section)
464 pub.set_source(source, source_path)
465 pub.set_destination(None, None)
466 output = pub.publish(enable_exit_status=enable_exit_status)
467 return pub.document
469 def publish_from_doctree(document, destination_path=None,
470 writer=None, writer_name='pseudoxml',
471 settings=None, settings_spec=None,
472 settings_overrides=None, config_section=None,
473 enable_exit_status=None):
475 Set up & run a `Publisher` to render from an existing document
476 tree data structure, for programmatic use with string I/O. Return
477 the encoded string output.
479 Note that document.settings is overridden; if you want to use the settings
480 of the original `document`, pass settings=document.settings.
482 Also, new document.transformer and document.reporter objects are
483 generated.
485 For encoded string output, be sure to set the 'output_encoding' setting to
486 the desired encoding. Set it to 'unicode' for unencoded Unicode string
487 output. Here's one way::
489 publish_from_doctree(
490 ..., settings_overrides={'output_encoding': 'unicode'})
492 Parameters: `document` is a `docutils.nodes.document` object, an existing
493 document tree.
495 Other parameters: see `publish_programmatically`.
497 reader = docutils.readers.doctree.Reader(parser_name='null')
498 pub = Publisher(reader, None, writer,
499 source=io.DocTreeInput(document),
500 destination_class=io.StringOutput, settings=settings)
501 if not writer and writer_name:
502 pub.set_writer(writer_name)
503 pub.process_programmatic_settings(
504 settings_spec, settings_overrides, config_section)
505 pub.set_destination(None, destination_path)
506 return pub.publish(enable_exit_status=enable_exit_status)
508 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
509 parser=None, parser_name='restructuredtext',
510 writer=None, writer_name='pseudoxml',
511 settings=None, settings_spec=None,
512 settings_overrides=None, config_section=None,
513 enable_exit_status=1, argv=None,
514 usage=default_usage, description=default_description,
515 destination=None, destination_class=io.BinaryFileOutput
518 Set up & run a `Publisher` for command-line-based file I/O (input and
519 output file paths taken automatically from the command line). Return the
520 encoded string output also.
522 This is just like publish_cmdline, except that it uses
523 io.BinaryFileOutput instead of io.FileOutput.
525 Parameters: see `publish_programmatically` for the remainder.
527 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
528 - `usage`: Usage string, output if there's a problem parsing the command
529 line.
530 - `description`: Program description, output for the "--help" option
531 (along with command-line option descriptions).
533 pub = Publisher(reader, parser, writer, settings=settings,
534 destination_class=destination_class)
535 pub.set_components(reader_name, parser_name, writer_name)
536 output = pub.publish(
537 argv, usage, description, settings_spec, settings_overrides,
538 config_section=config_section, enable_exit_status=enable_exit_status)
539 return output
541 def publish_programmatically(source_class, source, source_path,
542 destination_class, destination, destination_path,
543 reader, reader_name,
544 parser, parser_name,
545 writer, writer_name,
546 settings, settings_spec,
547 settings_overrides, config_section,
548 enable_exit_status):
550 Set up & run a `Publisher` for custom programmatic use. Return the
551 encoded string output and the Publisher object.
553 Applications should not need to call this function directly. If it does
554 seem to be necessary to call this function directly, please write to the
555 Docutils-develop mailing list
556 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
558 Parameters:
560 * `source_class` **required**: The class for dynamically created source
561 objects. Typically `io.FileInput` or `io.StringInput`.
563 * `source`: Type depends on `source_class`:
565 - If `source_class` is `io.FileInput`: Either a file-like object
566 (must have 'read' and 'close' methods), or ``None``
567 (`source_path` is opened). If neither `source` nor
568 `source_path` are supplied, `sys.stdin` is used.
570 - If `source_class` is `io.StringInput` **required**: The input
571 string, either an encoded 8-bit string (set the
572 'input_encoding' setting to the correct encoding) or a Unicode
573 string (set the 'input_encoding' setting to 'unicode').
575 * `source_path`: Type depends on `source_class`:
577 - `io.FileInput`: Path to the input file, opened if no `source`
578 supplied.
580 - `io.StringInput`: Optional. Path to the file or object that produced
581 `source`. Only used for diagnostic output.
583 * `destination_class` **required**: The class for dynamically created
584 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
586 * `destination`: Type depends on `destination_class`:
588 - `io.FileOutput`: Either a file-like object (must have 'write' and
589 'close' methods), or ``None`` (`destination_path` is opened). If
590 neither `destination` nor `destination_path` are supplied,
591 `sys.stdout` is used.
593 - `io.StringOutput`: Not used; pass ``None``.
595 * `destination_path`: Type depends on `destination_class`:
597 - `io.FileOutput`: Path to the output file. Opened if no `destination`
598 supplied.
600 - `io.StringOutput`: Path to the file or object which will receive the
601 output; optional. Used for determining relative paths (stylesheets,
602 source links, etc.).
604 * `reader`: A `docutils.readers.Reader` object.
606 * `reader_name`: Name or alias of the Reader class to be instantiated if
607 no `reader` supplied.
609 * `parser`: A `docutils.parsers.Parser` object.
611 * `parser_name`: Name or alias of the Parser class to be instantiated if
612 no `parser` supplied.
614 * `writer`: A `docutils.writers.Writer` object.
616 * `writer_name`: Name or alias of the Writer class to be instantiated if
617 no `writer` supplied.
619 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
620 dotted-attribute access to runtime settings. It's the end result of the
621 `SettingsSpec`, config file, and option processing. If `settings` is
622 passed, it's assumed to be complete and no further setting/config/option
623 processing is done.
625 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
626 extra application-specific settings definitions independently of
627 components. In other words, the application becomes a component, and
628 its settings data is processed along with that of the other components.
629 Used only if no `settings` specified.
631 * `settings_overrides`: A dictionary containing application-specific
632 settings defaults that override the defaults of other components.
633 Used only if no `settings` specified.
635 * `config_section`: A string, the name of the configuration file section
636 for this application. Overrides the ``config_section`` attribute
637 defined by `settings_spec`. Used only if no `settings` specified.
639 * `enable_exit_status`: Boolean; enable exit status at end of processing?
641 pub = Publisher(reader, parser, writer, settings=settings,
642 source_class=source_class,
643 destination_class=destination_class)
644 pub.set_components(reader_name, parser_name, writer_name)
645 pub.process_programmatic_settings(
646 settings_spec, settings_overrides, config_section)
647 pub.set_source(source, source_path)
648 pub.set_destination(destination, destination_path)
649 output = pub.publish(enable_exit_status=enable_exit_status)
650 return output, pub