math, error_reporting, and urischemes moved to the utils package.
[docutils.git] / docutils / core.py
blob03fd4e549c6380f76cb1bc2cdd9b5262ba5f5586
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 from docutils.utils.error_reporting import ErrorOutput, ErrorString
24 import docutils.readers.doctree
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.document = None
43 """The document tree (`docutils.nodes` objects)."""
45 self.reader = reader
46 """A `docutils.readers.Reader` instance."""
48 self.parser = parser
49 """A `docutils.parsers.Parser` instance."""
51 self.writer = writer
52 """A `docutils.writers.Writer` instance."""
54 for component in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self, component), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self, component), component, component))
61 self.source = source
62 """The source of input data, a `docutils.io.Input` instance."""
64 self.source_class = source_class
65 """The class for dynamically created source objects."""
67 self.destination = destination
68 """The destination for docutils output, a `docutils.io.Output`
69 instance."""
71 self.destination_class = destination_class
72 """The class for dynamically created destination objects."""
74 self.settings = settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 self._stderr = ErrorOutput()
80 def set_reader(self, reader_name, parser, parser_name):
81 """Set `self.reader` by name."""
82 reader_class = readers.get_reader_class(reader_name)
83 self.reader = reader_class(parser, parser_name)
84 self.parser = self.reader.parser
86 def set_writer(self, writer_name):
87 """Set `self.writer` by name."""
88 writer_class = writers.get_writer_class(writer_name)
89 self.writer = writer_class()
91 def set_components(self, reader_name, parser_name, writer_name):
92 if self.reader is None:
93 self.set_reader(reader_name, self.parser, parser_name)
94 if self.parser is None:
95 if self.reader.parser is None:
96 self.reader.set_parser(parser_name)
97 self.parser = self.reader.parser
98 if self.writer is None:
99 self.set_writer(writer_name)
101 def setup_option_parser(self, usage=None, description=None,
102 settings_spec=None, config_section=None,
103 **defaults):
104 if config_section:
105 if not settings_spec:
106 settings_spec = SettingsSpec()
107 settings_spec.config_section = config_section
108 parts = config_section.split()
109 if len(parts) > 1 and parts[-1] == 'application':
110 settings_spec.config_section_dependencies = ['applications']
111 #@@@ Add self.source & self.destination to components in future?
112 option_parser = OptionParser(
113 components=(self.parser, self.reader, self.writer, settings_spec),
114 defaults=defaults, read_config_files=True,
115 usage=usage, description=description)
116 return option_parser
118 def get_settings(self, usage=None, description=None,
119 settings_spec=None, config_section=None, **defaults):
121 Set and return default settings (overrides in `defaults` dict).
123 Set components first (`self.set_reader` & `self.set_writer`).
124 Explicitly setting `self.settings` disables command line option
125 processing from `self.publish()`.
127 option_parser = self.setup_option_parser(
128 usage, description, settings_spec, config_section, **defaults)
129 self.settings = option_parser.get_default_values()
130 return self.settings
132 def process_programmatic_settings(self, settings_spec,
133 settings_overrides,
134 config_section):
135 if self.settings is None:
136 defaults = (settings_overrides or {}).copy()
137 # Propagate exceptions by default when used programmatically:
138 defaults.setdefault('traceback', True)
139 self.get_settings(settings_spec=settings_spec,
140 config_section=config_section,
141 **defaults)
143 def process_command_line(self, argv=None, usage=None, description=None,
144 settings_spec=None, config_section=None,
145 **defaults):
147 Pass an empty list to `argv` to avoid reading `sys.argv` (the
148 default).
150 Set components first (`self.set_reader` & `self.set_writer`).
152 option_parser = self.setup_option_parser(
153 usage, description, settings_spec, config_section, **defaults)
154 if argv is None:
155 argv = sys.argv[1:]
156 # converting to Unicode (Python 3 does this automatically):
157 if sys.version_info < (3,0):
158 # TODO: make this failsafe and reversible?
159 argv_encoding = (frontend.locale_encoding or 'ascii')
160 argv = [a.decode(argv_encoding) for a in argv]
161 self.settings = option_parser.parse_args(argv)
163 def set_io(self, source_path=None, destination_path=None):
164 if self.source is None:
165 self.set_source(source_path=source_path)
166 if self.destination is None:
167 self.set_destination(destination_path=destination_path)
169 def set_source(self, source=None, source_path=None):
170 if source_path is None:
171 source_path = self.settings._source
172 else:
173 self.settings._source = source_path
174 # Raise IOError instead of system exit with `tracback == True`
175 # TODO: change io.FileInput's default behaviour and remove this hack
176 try:
177 self.source = self.source_class(
178 source=source, source_path=source_path,
179 encoding=self.settings.input_encoding,
180 handle_io_errors=False)
181 except TypeError:
182 self.source = self.source_class(
183 source=source, source_path=source_path,
184 encoding=self.settings.input_encoding)
186 def set_destination(self, destination=None, destination_path=None):
187 if destination_path is None:
188 destination_path = self.settings._destination
189 else:
190 self.settings._destination = destination_path
191 self.destination = self.destination_class(
192 destination=destination, destination_path=destination_path,
193 encoding=self.settings.output_encoding,
194 error_handler=self.settings.output_encoding_error_handler)
195 # Raise IOError instead of system exit with `tracback == True`
196 # TODO: change io.FileInput's default behaviour and remove this hack
197 self.destination.handle_io_errors=False
199 def apply_transforms(self):
200 self.document.transformer.populate_from_components(
201 (self.source, self.reader, self.reader.parser, self.writer,
202 self.destination))
203 self.document.transformer.apply_transforms()
205 def publish(self, argv=None, usage=None, description=None,
206 settings_spec=None, settings_overrides=None,
207 config_section=None, enable_exit_status=False):
209 Process command line options and arguments (if `self.settings` not
210 already set), run `self.reader` and then `self.writer`. Return
211 `self.writer`'s output.
213 exit = None
214 try:
215 if self.settings is None:
216 self.process_command_line(
217 argv, usage, description, settings_spec, config_section,
218 **(settings_overrides or {}))
219 self.set_io()
220 self.document = self.reader.read(self.source, self.parser,
221 self.settings)
222 self.apply_transforms()
223 output = self.writer.write(self.document, self.destination)
224 self.writer.assemble_parts()
225 except SystemExit, error:
226 exit = 1
227 exit_status = error.code
228 except Exception, error:
229 if not self.settings: # exception too early to report nicely
230 raise
231 if self.settings.traceback: # Propagate exceptions?
232 self.debugging_dumps()
233 raise
234 self.report_Exception(error)
235 exit = True
236 exit_status = 1
237 self.debugging_dumps()
238 if (enable_exit_status and self.document
239 and (self.document.reporter.max_level
240 >= self.settings.exit_status_level)):
241 sys.exit(self.document.reporter.max_level + 10)
242 elif exit:
243 sys.exit(exit_status)
244 return output
246 def debugging_dumps(self):
247 if not self.document:
248 return
249 if self.settings.dump_settings:
250 print >>self._stderr, '\n::: Runtime settings:'
251 print >>self._stderr, pprint.pformat(self.settings.__dict__)
252 if self.settings.dump_internals:
253 print >>self._stderr, '\n::: Document internals:'
254 print >>self._stderr, pprint.pformat(self.document.__dict__)
255 if self.settings.dump_transforms:
256 print >>self._stderr, '\n::: Transforms applied:'
257 print >>self._stderr, (' (priority, transform class, '
258 'pending node details, keyword args)')
259 print >>self._stderr, pprint.pformat(
260 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
261 pending and pending.details, kwargs)
262 for priority, xclass, pending, kwargs
263 in self.document.transformer.applied])
264 if self.settings.dump_pseudo_xml:
265 print >>self._stderr, '\n::: Pseudo-XML:'
266 print >>self._stderr, self.document.pformat().encode(
267 'raw_unicode_escape')
269 def report_Exception(self, error):
270 if isinstance(error, utils.SystemMessage):
271 self.report_SystemMessage(error)
272 elif isinstance(error, UnicodeEncodeError):
273 self.report_UnicodeError(error)
274 elif isinstance(error, io.InputError):
275 self._stderr.write(u'Unable to open source file for reading:\n'
276 u' %s\n' % ErrorString(error))
277 elif isinstance(error, io.OutputError):
278 self._stderr.write(
279 u'Unable to open destination file for writing:\n'
280 u' %s\n' % ErrorString(error))
281 else:
282 print >>self._stderr, u'%s' % ErrorString(error)
283 print >>self._stderr, ("""\
284 Exiting due to error. Use "--traceback" to diagnose.
285 Please report errors to <docutils-users@lists.sf.net>.
286 Include "--traceback" output, Docutils version (%s [%s]),
287 Python version (%s), your OS type & version, and the
288 command line used.""" % (__version__, __version_details__,
289 sys.version.split()[0]))
291 def report_SystemMessage(self, error):
292 print >>self._stderr, ('Exiting due to level-%s (%s) system message.'
293 % (error.level,
294 utils.Reporter.levels[error.level]))
296 def report_UnicodeError(self, error):
297 data = error.object[error.start:error.end]
298 self._stderr.write(
299 '%s\n'
300 '\n'
301 'The specified output encoding (%s) cannot\n'
302 'handle all of the output.\n'
303 'Try setting "--output-encoding-error-handler" to\n'
304 '\n'
305 '* "xmlcharrefreplace" (for HTML & XML output);\n'
306 ' the output will contain "%s" and should be usable.\n'
307 '* "backslashreplace" (for other output formats);\n'
308 ' look for "%s" in the output.\n'
309 '* "replace"; look for "?" in the output.\n'
310 '\n'
311 '"--output-encoding-error-handler" is currently set to "%s".\n'
312 '\n'
313 'Exiting due to error. Use "--traceback" to diagnose.\n'
314 'If the advice above doesn\'t eliminate the error,\n'
315 'please report it to <docutils-users@lists.sf.net>.\n'
316 'Include "--traceback" output, Docutils version (%s),\n'
317 'Python version (%s), your OS type & version, and the\n'
318 'command line used.\n'
319 % (ErrorString(error),
320 self.settings.output_encoding,
321 data.encode('ascii', 'xmlcharrefreplace'),
322 data.encode('ascii', 'backslashreplace'),
323 self.settings.output_encoding_error_handler,
324 __version__, sys.version.split()[0]))
326 default_usage = '%prog [options] [<source> [<destination>]]'
327 default_description = ('Reads from <source> (default is stdin) and writes to '
328 '<destination> (default is stdout). See '
329 '<http://docutils.sf.net/docs/user/config.html> for '
330 'the full reference.')
332 def publish_cmdline(reader=None, reader_name='standalone',
333 parser=None, parser_name='restructuredtext',
334 writer=None, writer_name='pseudoxml',
335 settings=None, settings_spec=None,
336 settings_overrides=None, config_section=None,
337 enable_exit_status=True, argv=None,
338 usage=default_usage, description=default_description):
340 Set up & run a `Publisher` for command-line-based file I/O (input and
341 output file paths taken automatically from the command line). Return the
342 encoded string output also.
344 Parameters: see `publish_programmatically` for the remainder.
346 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
347 - `usage`: Usage string, output if there's a problem parsing the command
348 line.
349 - `description`: Program description, output for the "--help" option
350 (along with command-line option descriptions).
352 pub = Publisher(reader, parser, writer, settings=settings)
353 pub.set_components(reader_name, parser_name, writer_name)
354 output = pub.publish(
355 argv, usage, description, settings_spec, settings_overrides,
356 config_section=config_section, enable_exit_status=enable_exit_status)
357 return output
359 def publish_file(source=None, source_path=None,
360 destination=None, destination_path=None,
361 reader=None, reader_name='standalone',
362 parser=None, parser_name='restructuredtext',
363 writer=None, writer_name='pseudoxml',
364 settings=None, settings_spec=None, settings_overrides=None,
365 config_section=None, enable_exit_status=False):
367 Set up & run a `Publisher` for programmatic use with file-like I/O.
368 Return the encoded string output also.
370 Parameters: see `publish_programmatically`.
372 output, pub = publish_programmatically(
373 source_class=io.FileInput, source=source, source_path=source_path,
374 destination_class=io.FileOutput,
375 destination=destination, destination_path=destination_path,
376 reader=reader, reader_name=reader_name,
377 parser=parser, parser_name=parser_name,
378 writer=writer, writer_name=writer_name,
379 settings=settings, settings_spec=settings_spec,
380 settings_overrides=settings_overrides,
381 config_section=config_section,
382 enable_exit_status=enable_exit_status)
383 return output
385 def publish_string(source, source_path=None, destination_path=None,
386 reader=None, reader_name='standalone',
387 parser=None, parser_name='restructuredtext',
388 writer=None, writer_name='pseudoxml',
389 settings=None, settings_spec=None,
390 settings_overrides=None, config_section=None,
391 enable_exit_status=False):
393 Set up & run a `Publisher` for programmatic use with string I/O. Return
394 the encoded string or Unicode string output.
396 For encoded string output, be sure to set the 'output_encoding' setting to
397 the desired encoding. Set it to 'unicode' for unencoded Unicode string
398 output. Here's one way::
400 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
402 Similarly for Unicode string input (`source`)::
404 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
406 Parameters: see `publish_programmatically`.
408 output, pub = publish_programmatically(
409 source_class=io.StringInput, source=source, source_path=source_path,
410 destination_class=io.StringOutput,
411 destination=None, destination_path=destination_path,
412 reader=reader, reader_name=reader_name,
413 parser=parser, parser_name=parser_name,
414 writer=writer, writer_name=writer_name,
415 settings=settings, settings_spec=settings_spec,
416 settings_overrides=settings_overrides,
417 config_section=config_section,
418 enable_exit_status=enable_exit_status)
419 return output
421 def publish_parts(source, source_path=None, source_class=io.StringInput,
422 destination_path=None,
423 reader=None, reader_name='standalone',
424 parser=None, parser_name='restructuredtext',
425 writer=None, writer_name='pseudoxml',
426 settings=None, settings_spec=None,
427 settings_overrides=None, config_section=None,
428 enable_exit_status=False):
430 Set up & run a `Publisher`, and return a dictionary of document parts.
431 Dictionary keys are the names of parts, and values are Unicode strings;
432 encoding is up to the client. For programmatic use with string I/O.
434 For encoded string input, be sure to set the 'input_encoding' setting to
435 the desired encoding. Set it to 'unicode' for unencoded Unicode string
436 input. Here's how::
438 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
440 Parameters: see `publish_programmatically`.
442 output, pub = publish_programmatically(
443 source=source, source_path=source_path, source_class=source_class,
444 destination_class=io.StringOutput,
445 destination=None, destination_path=destination_path,
446 reader=reader, reader_name=reader_name,
447 parser=parser, parser_name=parser_name,
448 writer=writer, writer_name=writer_name,
449 settings=settings, settings_spec=settings_spec,
450 settings_overrides=settings_overrides,
451 config_section=config_section,
452 enable_exit_status=enable_exit_status)
453 return pub.writer.parts
455 def publish_doctree(source, source_path=None,
456 source_class=io.StringInput,
457 reader=None, reader_name='standalone',
458 parser=None, parser_name='restructuredtext',
459 settings=None, settings_spec=None,
460 settings_overrides=None, config_section=None,
461 enable_exit_status=False):
463 Set up & run a `Publisher` for programmatic use with string I/O.
464 Return the document tree.
466 For encoded string input, be sure to set the 'input_encoding' setting to
467 the desired encoding. Set it to 'unicode' for unencoded Unicode string
468 input. Here's one way::
470 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
472 Parameters: see `publish_programmatically`.
474 pub = Publisher(reader=reader, parser=parser, writer=None,
475 settings=settings,
476 source_class=source_class,
477 destination_class=io.NullOutput)
478 pub.set_components(reader_name, parser_name, 'null')
479 pub.process_programmatic_settings(
480 settings_spec, settings_overrides, config_section)
481 pub.set_source(source, source_path)
482 pub.set_destination(None, None)
483 output = pub.publish(enable_exit_status=enable_exit_status)
484 return pub.document
486 def publish_from_doctree(document, destination_path=None,
487 writer=None, writer_name='pseudoxml',
488 settings=None, settings_spec=None,
489 settings_overrides=None, config_section=None,
490 enable_exit_status=False):
492 Set up & run a `Publisher` to render from an existing document
493 tree data structure, for programmatic use with string I/O. Return
494 the encoded string output.
496 Note that document.settings is overridden; if you want to use the settings
497 of the original `document`, pass settings=document.settings.
499 Also, new document.transformer and document.reporter objects are
500 generated.
502 For encoded string output, be sure to set the 'output_encoding' setting to
503 the desired encoding. Set it to 'unicode' for unencoded Unicode string
504 output. Here's one way::
506 publish_from_doctree(
507 ..., settings_overrides={'output_encoding': 'unicode'})
509 Parameters: `document` is a `docutils.nodes.document` object, an existing
510 document tree.
512 Other parameters: see `publish_programmatically`.
514 reader = docutils.readers.doctree.Reader(parser_name='null')
515 pub = Publisher(reader, None, writer,
516 source=io.DocTreeInput(document),
517 destination_class=io.StringOutput, settings=settings)
518 if not writer and writer_name:
519 pub.set_writer(writer_name)
520 pub.process_programmatic_settings(
521 settings_spec, settings_overrides, config_section)
522 pub.set_destination(None, destination_path)
523 return pub.publish(enable_exit_status=enable_exit_status)
525 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
526 parser=None, parser_name='restructuredtext',
527 writer=None, writer_name='pseudoxml',
528 settings=None, settings_spec=None,
529 settings_overrides=None, config_section=None,
530 enable_exit_status=True, argv=None,
531 usage=default_usage, description=default_description,
532 destination=None, destination_class=io.BinaryFileOutput
535 Set up & run a `Publisher` for command-line-based file I/O (input and
536 output file paths taken automatically from the command line). Return the
537 encoded string output also.
539 This is just like publish_cmdline, except that it uses
540 io.BinaryFileOutput instead of io.FileOutput.
542 Parameters: see `publish_programmatically` for the remainder.
544 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
545 - `usage`: Usage string, output if there's a problem parsing the command
546 line.
547 - `description`: Program description, output for the "--help" option
548 (along with command-line option descriptions).
550 pub = Publisher(reader, parser, writer, settings=settings,
551 destination_class=destination_class)
552 pub.set_components(reader_name, parser_name, writer_name)
553 output = pub.publish(
554 argv, usage, description, settings_spec, settings_overrides,
555 config_section=config_section, enable_exit_status=enable_exit_status)
556 return output
558 def publish_programmatically(source_class, source, source_path,
559 destination_class, destination, destination_path,
560 reader, reader_name,
561 parser, parser_name,
562 writer, writer_name,
563 settings, settings_spec,
564 settings_overrides, config_section,
565 enable_exit_status):
567 Set up & run a `Publisher` for custom programmatic use. Return the
568 encoded string output and the Publisher object.
570 Applications should not need to call this function directly. If it does
571 seem to be necessary to call this function directly, please write to the
572 Docutils-develop mailing list
573 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
575 Parameters:
577 * `source_class` **required**: The class for dynamically created source
578 objects. Typically `io.FileInput` or `io.StringInput`.
580 * `source`: Type depends on `source_class`:
582 - If `source_class` is `io.FileInput`: Either a file-like object
583 (must have 'read' and 'close' methods), or ``None``
584 (`source_path` is opened). If neither `source` nor
585 `source_path` are supplied, `sys.stdin` is used.
587 - If `source_class` is `io.StringInput` **required**: The input
588 string, either an encoded 8-bit string (set the
589 'input_encoding' setting to the correct encoding) or a Unicode
590 string (set the 'input_encoding' setting to 'unicode').
592 * `source_path`: Type depends on `source_class`:
594 - `io.FileInput`: Path to the input file, opened if no `source`
595 supplied.
597 - `io.StringInput`: Optional. Path to the file or object that produced
598 `source`. Only used for diagnostic output.
600 * `destination_class` **required**: The class for dynamically created
601 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
603 * `destination`: Type depends on `destination_class`:
605 - `io.FileOutput`: Either a file-like object (must have 'write' and
606 'close' methods), or ``None`` (`destination_path` is opened). If
607 neither `destination` nor `destination_path` are supplied,
608 `sys.stdout` is used.
610 - `io.StringOutput`: Not used; pass ``None``.
612 * `destination_path`: Type depends on `destination_class`:
614 - `io.FileOutput`: Path to the output file. Opened if no `destination`
615 supplied.
617 - `io.StringOutput`: Path to the file or object which will receive the
618 output; optional. Used for determining relative paths (stylesheets,
619 source links, etc.).
621 * `reader`: A `docutils.readers.Reader` object.
623 * `reader_name`: Name or alias of the Reader class to be instantiated if
624 no `reader` supplied.
626 * `parser`: A `docutils.parsers.Parser` object.
628 * `parser_name`: Name or alias of the Parser class to be instantiated if
629 no `parser` supplied.
631 * `writer`: A `docutils.writers.Writer` object.
633 * `writer_name`: Name or alias of the Writer class to be instantiated if
634 no `writer` supplied.
636 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
637 dotted-attribute access to runtime settings. It's the end result of the
638 `SettingsSpec`, config file, and option processing. If `settings` is
639 passed, it's assumed to be complete and no further setting/config/option
640 processing is done.
642 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
643 extra application-specific settings definitions independently of
644 components. In other words, the application becomes a component, and
645 its settings data is processed along with that of the other components.
646 Used only if no `settings` specified.
648 * `settings_overrides`: A dictionary containing application-specific
649 settings defaults that override the defaults of other components.
650 Used only if no `settings` specified.
652 * `config_section`: A string, the name of the configuration file section
653 for this application. Overrides the ``config_section`` attribute
654 defined by `settings_spec`. Used only if no `settings` specified.
656 * `enable_exit_status`: Boolean; enable exit status at end of processing?
658 pub = Publisher(reader, parser, writer, settings=settings,
659 source_class=source_class,
660 destination_class=destination_class)
661 pub.set_components(reader_name, parser_name, writer_name)
662 pub.process_programmatic_settings(
663 settings_spec, settings_overrides, config_section)
664 pub.set_source(source, source_path)
665 pub.set_destination(destination, destination_path)
666 output = pub.publish(enable_exit_status=enable_exit_status)
667 return output, pub