add tinkerer
[docutils.git] / docutils / core.py
blob7ba37706bf45902c2063ce03c42ae5e3196b6a48
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 from docutils.utils.error_reporting import ErrorOutput, ErrorString
24 import docutils.readers.doctree
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.document = None
43 """The document tree (`docutils.nodes` objects)."""
45 self.reader = reader
46 """A `docutils.readers.Reader` instance."""
48 self.parser = parser
49 """A `docutils.parsers.Parser` instance."""
51 self.writer = writer
52 """A `docutils.writers.Writer` instance."""
54 for component in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self, component), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self, component), component, component))
61 self.source = source
62 """The source of input data, a `docutils.io.Input` instance."""
64 self.source_class = source_class
65 """The class for dynamically created source objects."""
67 self.destination = destination
68 """The destination for docutils output, a `docutils.io.Output`
69 instance."""
71 self.destination_class = destination_class
72 """The class for dynamically created destination objects."""
74 self.settings = settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 self._stderr = ErrorOutput()
80 def set_reader(self, reader_name, parser, parser_name):
81 """Set `self.reader` by name."""
82 reader_class = readers.get_reader_class(reader_name)
83 self.reader = reader_class(parser, parser_name)
84 self.parser = self.reader.parser
86 def set_writer(self, writer_name):
87 """Set `self.writer` by name."""
88 writer_class = writers.get_writer_class(writer_name)
89 self.writer = writer_class()
91 def set_components(self, reader_name, parser_name, writer_name):
92 if self.reader is None:
93 self.set_reader(reader_name, self.parser, parser_name)
94 if self.parser is None:
95 if self.reader.parser is None:
96 self.reader.set_parser(parser_name)
97 self.parser = self.reader.parser
98 if self.writer is None:
99 self.set_writer(writer_name)
101 def setup_option_parser(self, usage=None, description=None,
102 settings_spec=None, config_section=None,
103 **defaults):
104 if config_section:
105 if not settings_spec:
106 settings_spec = SettingsSpec()
107 settings_spec.config_section = config_section
108 parts = config_section.split()
109 if len(parts) > 1 and parts[-1] == 'application':
110 settings_spec.config_section_dependencies = ['applications']
111 #@@@ Add self.source & self.destination to components in future?
112 option_parser = OptionParser(
113 components=(self.parser, self.reader, self.writer, settings_spec),
114 defaults=defaults, read_config_files=True,
115 usage=usage, description=description)
116 return option_parser
118 def get_settings(self, usage=None, description=None,
119 settings_spec=None, config_section=None, **defaults):
121 Set and return default settings (overrides in `defaults` dict).
123 Set components first (`self.set_reader` & `self.set_writer`).
124 Explicitly setting `self.settings` disables command line option
125 processing from `self.publish()`.
127 option_parser = self.setup_option_parser(
128 usage, description, settings_spec, config_section, **defaults)
129 self.settings = option_parser.get_default_values()
130 return self.settings
132 def process_programmatic_settings(self, settings_spec,
133 settings_overrides,
134 config_section):
135 if self.settings is None:
136 defaults = (settings_overrides or {}).copy()
137 # Propagate exceptions by default when used programmatically:
138 defaults.setdefault('traceback', True)
139 self.get_settings(settings_spec=settings_spec,
140 config_section=config_section,
141 **defaults)
143 def process_command_line(self, argv=None, usage=None, description=None,
144 settings_spec=None, config_section=None,
145 **defaults):
147 Pass an empty list to `argv` to avoid reading `sys.argv` (the
148 default).
150 Set components first (`self.set_reader` & `self.set_writer`).
152 option_parser = self.setup_option_parser(
153 usage, description, settings_spec, config_section, **defaults)
154 if argv is None:
155 argv = sys.argv[1:]
156 # converting to Unicode (Python 3 does this automatically):
157 if sys.version_info < (3,0):
158 # TODO: make this failsafe and reversible?
159 argv_encoding = (frontend.locale_encoding or 'ascii')
160 argv = [a.decode(argv_encoding) for a in argv]
161 self.settings = option_parser.parse_args(argv)
163 def set_io(self, source_path=None, destination_path=None):
164 if self.source is None:
165 self.set_source(source_path=source_path)
166 if self.destination is None:
167 self.set_destination(destination_path=destination_path)
169 def set_source(self, source=None, source_path=None):
170 if source_path is None:
171 source_path = self.settings._source
172 else:
173 self.settings._source = source_path
174 # Raise IOError instead of system exit with `tracback == True`
175 # TODO: change io.FileInput's default behaviour and remove this hack
176 try:
177 self.source = self.source_class(
178 source=source, source_path=source_path,
179 encoding=self.settings.input_encoding)
180 except TypeError:
181 self.source = self.source_class(
182 source=source, source_path=source_path,
183 encoding=self.settings.input_encoding)
185 def set_destination(self, destination=None, destination_path=None):
186 if destination_path is None:
187 destination_path = self.settings._destination
188 else:
189 self.settings._destination = destination_path
190 self.destination = self.destination_class(
191 destination=destination, destination_path=destination_path,
192 encoding=self.settings.output_encoding,
193 error_handler=self.settings.output_encoding_error_handler)
195 def apply_transforms(self):
196 self.document.transformer.populate_from_components(
197 (self.source, self.reader, self.reader.parser, self.writer,
198 self.destination))
199 self.document.transformer.apply_transforms()
201 def publish(self, argv=None, usage=None, description=None,
202 settings_spec=None, settings_overrides=None,
203 config_section=None, enable_exit_status=False):
205 Process command line options and arguments (if `self.settings` not
206 already set), run `self.reader` and then `self.writer`. Return
207 `self.writer`'s output.
209 exit = None
210 try:
211 if self.settings is None:
212 self.process_command_line(
213 argv, usage, description, settings_spec, config_section,
214 **(settings_overrides or {}))
215 self.set_io()
216 self.document = self.reader.read(self.source, self.parser,
217 self.settings)
218 self.apply_transforms()
219 output = self.writer.write(self.document, self.destination)
220 self.writer.assemble_parts()
221 except SystemExit, error:
222 exit = 1
223 exit_status = error.code
224 except Exception, error:
225 if not self.settings: # exception too early to report nicely
226 raise
227 if self.settings.traceback: # Propagate exceptions?
228 self.debugging_dumps()
229 raise
230 self.report_Exception(error)
231 exit = True
232 exit_status = 1
233 self.debugging_dumps()
234 if (enable_exit_status and self.document
235 and (self.document.reporter.max_level
236 >= self.settings.exit_status_level)):
237 sys.exit(self.document.reporter.max_level + 10)
238 elif exit:
239 sys.exit(exit_status)
240 return output
242 def debugging_dumps(self):
243 if not self.document:
244 return
245 if self.settings.dump_settings:
246 print >>self._stderr, '\n::: Runtime settings:'
247 print >>self._stderr, pprint.pformat(self.settings.__dict__)
248 if self.settings.dump_internals:
249 print >>self._stderr, '\n::: Document internals:'
250 print >>self._stderr, pprint.pformat(self.document.__dict__)
251 if self.settings.dump_transforms:
252 print >>self._stderr, '\n::: Transforms applied:'
253 print >>self._stderr, (' (priority, transform class, '
254 'pending node details, keyword args)')
255 print >>self._stderr, pprint.pformat(
256 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
257 pending and pending.details, kwargs)
258 for priority, xclass, pending, kwargs
259 in self.document.transformer.applied])
260 if self.settings.dump_pseudo_xml:
261 print >>self._stderr, '\n::: Pseudo-XML:'
262 print >>self._stderr, self.document.pformat().encode(
263 'raw_unicode_escape')
265 def report_Exception(self, error):
266 if isinstance(error, utils.SystemMessage):
267 self.report_SystemMessage(error)
268 elif isinstance(error, UnicodeEncodeError):
269 self.report_UnicodeError(error)
270 elif isinstance(error, io.InputError):
271 self._stderr.write(u'Unable to open source file for reading:\n'
272 u' %s\n' % ErrorString(error))
273 elif isinstance(error, io.OutputError):
274 self._stderr.write(
275 u'Unable to open destination file for writing:\n'
276 u' %s\n' % ErrorString(error))
277 else:
278 print >>self._stderr, u'%s' % ErrorString(error)
279 print >>self._stderr, ("""\
280 Exiting due to error. Use "--traceback" to diagnose.
281 Please report errors to <docutils-users@lists.sf.net>.
282 Include "--traceback" output, Docutils version (%s [%s]),
283 Python version (%s), your OS type & version, and the
284 command line used.""" % (__version__, __version_details__,
285 sys.version.split()[0]))
287 def report_SystemMessage(self, error):
288 print >>self._stderr, ('Exiting due to level-%s (%s) system message.'
289 % (error.level,
290 utils.Reporter.levels[error.level]))
292 def report_UnicodeError(self, error):
293 data = error.object[error.start:error.end]
294 self._stderr.write(
295 '%s\n'
296 '\n'
297 'The specified output encoding (%s) cannot\n'
298 'handle all of the output.\n'
299 'Try setting "--output-encoding-error-handler" to\n'
300 '\n'
301 '* "xmlcharrefreplace" (for HTML & XML output);\n'
302 ' the output will contain "%s" and should be usable.\n'
303 '* "backslashreplace" (for other output formats);\n'
304 ' look for "%s" in the output.\n'
305 '* "replace"; look for "?" in the output.\n'
306 '\n'
307 '"--output-encoding-error-handler" is currently set to "%s".\n'
308 '\n'
309 'Exiting due to error. Use "--traceback" to diagnose.\n'
310 'If the advice above doesn\'t eliminate the error,\n'
311 'please report it to <docutils-users@lists.sf.net>.\n'
312 'Include "--traceback" output, Docutils version (%s),\n'
313 'Python version (%s), your OS type & version, and the\n'
314 'command line used.\n'
315 % (ErrorString(error),
316 self.settings.output_encoding,
317 data.encode('ascii', 'xmlcharrefreplace'),
318 data.encode('ascii', 'backslashreplace'),
319 self.settings.output_encoding_error_handler,
320 __version__, sys.version.split()[0]))
322 default_usage = '%prog [options] [<source> [<destination>]]'
323 default_description = ('Reads from <source> (default is stdin) and writes to '
324 '<destination> (default is stdout). See '
325 '<http://docutils.sf.net/docs/user/config.html> for '
326 'the full reference.')
328 def publish_cmdline(reader=None, reader_name='standalone',
329 parser=None, parser_name='restructuredtext',
330 writer=None, writer_name='pseudoxml',
331 settings=None, settings_spec=None,
332 settings_overrides=None, config_section=None,
333 enable_exit_status=True, argv=None,
334 usage=default_usage, description=default_description):
336 Set up & run a `Publisher` for command-line-based file I/O (input and
337 output file paths taken automatically from the command line). Return the
338 encoded string output also.
340 Parameters: see `publish_programmatically` for the remainder.
342 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
343 - `usage`: Usage string, output if there's a problem parsing the command
344 line.
345 - `description`: Program description, output for the "--help" option
346 (along with command-line option descriptions).
348 pub = Publisher(reader, parser, writer, settings=settings)
349 pub.set_components(reader_name, parser_name, writer_name)
350 output = pub.publish(
351 argv, usage, description, settings_spec, settings_overrides,
352 config_section=config_section, enable_exit_status=enable_exit_status)
353 return output
355 def publish_file(source=None, source_path=None,
356 destination=None, destination_path=None,
357 reader=None, reader_name='standalone',
358 parser=None, parser_name='restructuredtext',
359 writer=None, writer_name='pseudoxml',
360 settings=None, settings_spec=None, settings_overrides=None,
361 config_section=None, enable_exit_status=False):
363 Set up & run a `Publisher` for programmatic use with file-like I/O.
364 Return the encoded string output also.
366 Parameters: see `publish_programmatically`.
368 output, pub = publish_programmatically(
369 source_class=io.FileInput, source=source, source_path=source_path,
370 destination_class=io.FileOutput,
371 destination=destination, destination_path=destination_path,
372 reader=reader, reader_name=reader_name,
373 parser=parser, parser_name=parser_name,
374 writer=writer, writer_name=writer_name,
375 settings=settings, settings_spec=settings_spec,
376 settings_overrides=settings_overrides,
377 config_section=config_section,
378 enable_exit_status=enable_exit_status)
379 return output
381 def publish_string(source, source_path=None, destination_path=None,
382 reader=None, reader_name='standalone',
383 parser=None, parser_name='restructuredtext',
384 writer=None, writer_name='pseudoxml',
385 settings=None, settings_spec=None,
386 settings_overrides=None, config_section=None,
387 enable_exit_status=False):
389 Set up & run a `Publisher` for programmatic use with string I/O. Return
390 the encoded string or Unicode string output.
392 For encoded string output, be sure to set the 'output_encoding' setting to
393 the desired encoding. Set it to 'unicode' for unencoded Unicode string
394 output. Here's one way::
396 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
398 Similarly for Unicode string input (`source`)::
400 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
402 Parameters: see `publish_programmatically`.
404 output, pub = publish_programmatically(
405 source_class=io.StringInput, source=source, source_path=source_path,
406 destination_class=io.StringOutput,
407 destination=None, destination_path=destination_path,
408 reader=reader, reader_name=reader_name,
409 parser=parser, parser_name=parser_name,
410 writer=writer, writer_name=writer_name,
411 settings=settings, settings_spec=settings_spec,
412 settings_overrides=settings_overrides,
413 config_section=config_section,
414 enable_exit_status=enable_exit_status)
415 return output
417 def publish_parts(source, source_path=None, source_class=io.StringInput,
418 destination_path=None,
419 reader=None, reader_name='standalone',
420 parser=None, parser_name='restructuredtext',
421 writer=None, writer_name='pseudoxml',
422 settings=None, settings_spec=None,
423 settings_overrides=None, config_section=None,
424 enable_exit_status=False):
426 Set up & run a `Publisher`, and return a dictionary of document parts.
427 Dictionary keys are the names of parts, and values are Unicode strings;
428 encoding is up to the client. For programmatic use with string I/O.
430 For encoded string input, be sure to set the 'input_encoding' setting to
431 the desired encoding. Set it to 'unicode' for unencoded Unicode string
432 input. Here's how::
434 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
436 Parameters: see `publish_programmatically`.
438 output, pub = publish_programmatically(
439 source=source, source_path=source_path, source_class=source_class,
440 destination_class=io.StringOutput,
441 destination=None, destination_path=destination_path,
442 reader=reader, reader_name=reader_name,
443 parser=parser, parser_name=parser_name,
444 writer=writer, writer_name=writer_name,
445 settings=settings, settings_spec=settings_spec,
446 settings_overrides=settings_overrides,
447 config_section=config_section,
448 enable_exit_status=enable_exit_status)
449 return pub.writer.parts
451 def publish_doctree(source, source_path=None,
452 source_class=io.StringInput,
453 reader=None, reader_name='standalone',
454 parser=None, parser_name='restructuredtext',
455 settings=None, settings_spec=None,
456 settings_overrides=None, config_section=None,
457 enable_exit_status=False):
459 Set up & run a `Publisher` for programmatic use with string I/O.
460 Return the document tree.
462 For encoded string input, be sure to set the 'input_encoding' setting to
463 the desired encoding. Set it to 'unicode' for unencoded Unicode string
464 input. Here's one way::
466 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
468 Parameters: see `publish_programmatically`.
470 pub = Publisher(reader=reader, parser=parser, writer=None,
471 settings=settings,
472 source_class=source_class,
473 destination_class=io.NullOutput)
474 pub.set_components(reader_name, parser_name, 'null')
475 pub.process_programmatic_settings(
476 settings_spec, settings_overrides, config_section)
477 pub.set_source(source, source_path)
478 pub.set_destination(None, None)
479 output = pub.publish(enable_exit_status=enable_exit_status)
480 return pub.document
482 def publish_from_doctree(document, destination_path=None,
483 writer=None, writer_name='pseudoxml',
484 settings=None, settings_spec=None,
485 settings_overrides=None, config_section=None,
486 enable_exit_status=False):
488 Set up & run a `Publisher` to render from an existing document
489 tree data structure, for programmatic use with string I/O. Return
490 the encoded string output.
492 Note that document.settings is overridden; if you want to use the settings
493 of the original `document`, pass settings=document.settings.
495 Also, new document.transformer and document.reporter objects are
496 generated.
498 For encoded string output, be sure to set the 'output_encoding' setting to
499 the desired encoding. Set it to 'unicode' for unencoded Unicode string
500 output. Here's one way::
502 publish_from_doctree(
503 ..., settings_overrides={'output_encoding': 'unicode'})
505 Parameters: `document` is a `docutils.nodes.document` object, an existing
506 document tree.
508 Other parameters: see `publish_programmatically`.
510 reader = docutils.readers.doctree.Reader(parser_name='null')
511 pub = Publisher(reader, None, writer,
512 source=io.DocTreeInput(document),
513 destination_class=io.StringOutput, settings=settings)
514 if not writer and writer_name:
515 pub.set_writer(writer_name)
516 pub.process_programmatic_settings(
517 settings_spec, settings_overrides, config_section)
518 pub.set_destination(None, destination_path)
519 return pub.publish(enable_exit_status=enable_exit_status)
521 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
522 parser=None, parser_name='restructuredtext',
523 writer=None, writer_name='pseudoxml',
524 settings=None, settings_spec=None,
525 settings_overrides=None, config_section=None,
526 enable_exit_status=True, argv=None,
527 usage=default_usage, description=default_description,
528 destination=None, destination_class=io.BinaryFileOutput
531 Set up & run a `Publisher` for command-line-based file I/O (input and
532 output file paths taken automatically from the command line). Return the
533 encoded string output also.
535 This is just like publish_cmdline, except that it uses
536 io.BinaryFileOutput instead of io.FileOutput.
538 Parameters: see `publish_programmatically` for the remainder.
540 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
541 - `usage`: Usage string, output if there's a problem parsing the command
542 line.
543 - `description`: Program description, output for the "--help" option
544 (along with command-line option descriptions).
546 pub = Publisher(reader, parser, writer, settings=settings,
547 destination_class=destination_class)
548 pub.set_components(reader_name, parser_name, writer_name)
549 output = pub.publish(
550 argv, usage, description, settings_spec, settings_overrides,
551 config_section=config_section, enable_exit_status=enable_exit_status)
552 return output
554 def publish_programmatically(source_class, source, source_path,
555 destination_class, destination, destination_path,
556 reader, reader_name,
557 parser, parser_name,
558 writer, writer_name,
559 settings, settings_spec,
560 settings_overrides, config_section,
561 enable_exit_status):
563 Set up & run a `Publisher` for custom programmatic use. Return the
564 encoded string output and the Publisher object.
566 Applications should not need to call this function directly. If it does
567 seem to be necessary to call this function directly, please write to the
568 Docutils-develop mailing list
569 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
571 Parameters:
573 * `source_class` **required**: The class for dynamically created source
574 objects. Typically `io.FileInput` or `io.StringInput`.
576 * `source`: Type depends on `source_class`:
578 - If `source_class` is `io.FileInput`: Either a file-like object
579 (must have 'read' and 'close' methods), or ``None``
580 (`source_path` is opened). If neither `source` nor
581 `source_path` are supplied, `sys.stdin` is used.
583 - If `source_class` is `io.StringInput` **required**: The input
584 string, either an encoded 8-bit string (set the
585 'input_encoding' setting to the correct encoding) or a Unicode
586 string (set the 'input_encoding' setting to 'unicode').
588 * `source_path`: Type depends on `source_class`:
590 - `io.FileInput`: Path to the input file, opened if no `source`
591 supplied.
593 - `io.StringInput`: Optional. Path to the file or object that produced
594 `source`. Only used for diagnostic output.
596 * `destination_class` **required**: The class for dynamically created
597 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
599 * `destination`: Type depends on `destination_class`:
601 - `io.FileOutput`: Either a file-like object (must have 'write' and
602 'close' methods), or ``None`` (`destination_path` is opened). If
603 neither `destination` nor `destination_path` are supplied,
604 `sys.stdout` is used.
606 - `io.StringOutput`: Not used; pass ``None``.
608 * `destination_path`: Type depends on `destination_class`:
610 - `io.FileOutput`: Path to the output file. Opened if no `destination`
611 supplied.
613 - `io.StringOutput`: Path to the file or object which will receive the
614 output; optional. Used for determining relative paths (stylesheets,
615 source links, etc.).
617 * `reader`: A `docutils.readers.Reader` object.
619 * `reader_name`: Name or alias of the Reader class to be instantiated if
620 no `reader` supplied.
622 * `parser`: A `docutils.parsers.Parser` object.
624 * `parser_name`: Name or alias of the Parser class to be instantiated if
625 no `parser` supplied.
627 * `writer`: A `docutils.writers.Writer` object.
629 * `writer_name`: Name or alias of the Writer class to be instantiated if
630 no `writer` supplied.
632 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
633 dotted-attribute access to runtime settings. It's the end result of the
634 `SettingsSpec`, config file, and option processing. If `settings` is
635 passed, it's assumed to be complete and no further setting/config/option
636 processing is done.
638 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
639 extra application-specific settings definitions independently of
640 components. In other words, the application becomes a component, and
641 its settings data is processed along with that of the other components.
642 Used only if no `settings` specified.
644 * `settings_overrides`: A dictionary containing application-specific
645 settings defaults that override the defaults of other components.
646 Used only if no `settings` specified.
648 * `config_section`: A string, the name of the configuration file section
649 for this application. Overrides the ``config_section`` attribute
650 defined by `settings_spec`. Used only if no `settings` specified.
652 * `enable_exit_status`: Boolean; enable exit status at end of processing?
654 pub = Publisher(reader, parser, writer, settings=settings,
655 source_class=source_class,
656 destination_class=destination_class)
657 pub.set_components(reader_name, parser_name, writer_name)
658 pub.process_programmatic_settings(
659 settings_spec, settings_overrides, config_section)
660 pub.set_source(source, source_path)
661 pub.set_destination(destination, destination_path)
662 output = pub.publish(enable_exit_status=enable_exit_status)
663 return output, pub