Now loading the 'compile module in rst.el
[docutils.git] / docutils / core.py
blobedf2db90151c14b50b913f582bc507348cf4a889
1 # Authors: David Goodger
2 # Contact: goodger@python.org
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 Calling the ``publish_*`` convenience functions (or instantiating a
9 `Publisher` object) with component names will result in default
10 behavior. For custom behavior (setting component options), create
11 custom component objects first, and pass *them* to
12 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
14 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
15 """
17 __docformat__ = 'reStructuredText'
19 import sys
20 import pprint
21 from types import StringType
22 from docutils import __version__, __version_details__, SettingsSpec
23 from docutils import frontend, io, utils, readers, writers
24 from docutils.frontend import OptionParser
25 from docutils.transforms import Transformer
26 import docutils.readers.doctree
29 class Publisher:
31 """
32 A facade encapsulating the high-level logic of a Docutils system.
33 """
35 def __init__(self, reader=None, parser=None, writer=None,
36 source=None, source_class=io.FileInput,
37 destination=None, destination_class=io.FileOutput,
38 settings=None):
39 """
40 Initial setup. If any of `reader`, `parser`, or `writer` are not
41 specified, the corresponding ``set_...`` method should be called with
42 a component name (`set_reader` sets the parser as well).
43 """
45 self.document = None
46 """The document tree (`docutils.nodes` objects)."""
48 self.reader = reader
49 """A `docutils.readers.Reader` instance."""
51 self.parser = parser
52 """A `docutils.parsers.Parser` instance."""
54 self.writer = writer
55 """A `docutils.writers.Writer` instance."""
57 for component in 'reader', 'parser', 'writer':
58 assert not isinstance(getattr(self, component), StringType), \
59 ('passed string as "%s" parameter; use "%s_name" instead'
60 % (getattr(self, component), component, component))
62 self.source = source
63 """The source of input data, a `docutils.io.Input` instance."""
65 self.source_class = source_class
66 """The class for dynamically created source objects."""
68 self.destination = destination
69 """The destination for docutils output, a `docutils.io.Output`
70 instance."""
72 self.destination_class = destination_class
73 """The class for dynamically created destination objects."""
75 self.settings = settings
76 """An object containing Docutils settings as instance attributes.
77 Set by `self.process_command_line()` or `self.get_settings()`."""
79 def set_reader(self, reader_name, parser, parser_name):
80 """Set `self.reader` by name."""
81 reader_class = readers.get_reader_class(reader_name)
82 self.reader = reader_class(parser, parser_name)
83 self.parser = self.reader.parser
85 def set_writer(self, writer_name):
86 """Set `self.writer` by name."""
87 writer_class = writers.get_writer_class(writer_name)
88 self.writer = writer_class()
90 def set_components(self, reader_name, parser_name, writer_name):
91 if self.reader is None:
92 self.set_reader(reader_name, self.parser, parser_name)
93 if self.parser is None:
94 if self.reader.parser is None:
95 self.reader.set_parser(parser_name)
96 self.parser = self.reader.parser
97 if self.writer is None:
98 self.set_writer(writer_name)
100 def setup_option_parser(self, usage=None, description=None,
101 settings_spec=None, config_section=None,
102 **defaults):
103 if config_section:
104 if not settings_spec:
105 settings_spec = SettingsSpec()
106 settings_spec.config_section = config_section
107 parts = config_section.split()
108 if len(parts) > 1 and parts[-1] == 'application':
109 settings_spec.config_section_dependencies = ['applications']
110 #@@@ Add self.source & self.destination to components in future?
111 option_parser = OptionParser(
112 components=(self.parser, self.reader, self.writer, settings_spec),
113 defaults=defaults, read_config_files=1,
114 usage=usage, description=description)
115 return option_parser
117 def get_settings(self, usage=None, description=None,
118 settings_spec=None, config_section=None, **defaults):
120 Set and return default settings (overrides in `defaults` dict).
122 Set components first (`self.set_reader` & `self.set_writer`).
123 Explicitly setting `self.settings` disables command line option
124 processing from `self.publish()`.
126 option_parser = self.setup_option_parser(
127 usage, description, settings_spec, config_section, **defaults)
128 self.settings = option_parser.get_default_values()
129 return self.settings
131 def process_programmatic_settings(self, settings_spec,
132 settings_overrides,
133 config_section):
134 if self.settings is None:
135 defaults = (settings_overrides or {}).copy()
136 # Propagate exceptions by default when used programmatically:
137 defaults.setdefault('traceback', 1)
138 self.get_settings(settings_spec=settings_spec,
139 config_section=config_section,
140 **defaults)
142 def process_command_line(self, argv=None, usage=None, description=None,
143 settings_spec=None, config_section=None,
144 **defaults):
146 Pass an empty list to `argv` to avoid reading `sys.argv` (the
147 default).
149 Set components first (`self.set_reader` & `self.set_writer`).
151 option_parser = self.setup_option_parser(
152 usage, description, settings_spec, config_section, **defaults)
153 if argv is None:
154 argv = sys.argv[1:]
155 self.settings = option_parser.parse_args(argv)
157 def set_io(self, source_path=None, destination_path=None):
158 if self.source is None:
159 self.set_source(source_path=source_path)
160 if self.destination is None:
161 self.set_destination(destination_path=destination_path)
163 def set_source(self, source=None, source_path=None):
164 if source_path is None:
165 source_path = self.settings._source
166 else:
167 self.settings._source = source_path
168 self.source = self.source_class(
169 source=source, source_path=source_path,
170 encoding=self.settings.input_encoding)
172 def set_destination(self, destination=None, destination_path=None):
173 if destination_path is None:
174 destination_path = self.settings._destination
175 else:
176 self.settings._destination = destination_path
177 self.destination = self.destination_class(
178 destination=destination, destination_path=destination_path,
179 encoding=self.settings.output_encoding,
180 error_handler=self.settings.output_encoding_error_handler)
182 def apply_transforms(self):
183 self.document.transformer.populate_from_components(
184 (self.source, self.reader, self.reader.parser, self.writer,
185 self.destination))
186 self.document.transformer.apply_transforms()
188 def publish(self, argv=None, usage=None, description=None,
189 settings_spec=None, settings_overrides=None,
190 config_section=None, enable_exit_status=None):
192 Process command line options and arguments (if `self.settings` not
193 already set), run `self.reader` and then `self.writer`. Return
194 `self.writer`'s output.
196 exit = None
197 try:
198 if self.settings is None:
199 self.process_command_line(
200 argv, usage, description, settings_spec, config_section,
201 **(settings_overrides or {}))
202 self.set_io()
203 self.document = self.reader.read(self.source, self.parser,
204 self.settings)
205 self.apply_transforms()
206 output = self.writer.write(self.document, self.destination)
207 self.writer.assemble_parts()
208 except SystemExit, error:
209 exit = 1
210 exit_status = error.code
211 except Exception, error:
212 if not self.settings: # exception too early to report nicely
213 raise
214 if self.settings.traceback: # Propagate exceptions?
215 self.debugging_dumps()
216 raise
217 self.report_Exception(error)
218 exit = 1
219 exit_status = 1
220 self.debugging_dumps()
221 if (enable_exit_status and self.document
222 and (self.document.reporter.max_level
223 >= self.settings.exit_status_level)):
224 sys.exit(self.document.reporter.max_level + 10)
225 elif exit:
226 sys.exit(exit_status)
227 return output
229 def debugging_dumps(self):
230 if not self.document:
231 return
232 if self.settings.dump_settings:
233 print >>sys.stderr, '\n::: Runtime settings:'
234 print >>sys.stderr, pprint.pformat(self.settings.__dict__)
235 if self.settings.dump_internals:
236 print >>sys.stderr, '\n::: Document internals:'
237 print >>sys.stderr, pprint.pformat(self.document.__dict__)
238 if self.settings.dump_transforms:
239 print >>sys.stderr, '\n::: Transforms applied:'
240 print >>sys.stderr, (' (priority, transform class, '
241 'pending node details, keyword args)')
242 print >>sys.stderr, pprint.pformat(
243 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
244 pending and pending.details, kwargs)
245 for priority, xclass, pending, kwargs
246 in self.document.transformer.applied])
247 if self.settings.dump_pseudo_xml:
248 print >>sys.stderr, '\n::: Pseudo-XML:'
249 print >>sys.stderr, self.document.pformat().encode(
250 'raw_unicode_escape')
252 def report_Exception(self, error):
253 if isinstance(error, utils.SystemMessage):
254 self.report_SystemMessage(error)
255 elif isinstance(error, UnicodeError):
256 self.report_UnicodeError(error)
257 else:
258 print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error)
259 print >>sys.stderr, ("""\
260 Exiting due to error. Use "--traceback" to diagnose.
261 Please report errors to <docutils-users@lists.sf.net>.
262 Include "--traceback" output, Docutils version (%s [%s]),
263 Python version (%s), your OS type & version, and the
264 command line used.""" % (__version__, __version_details__,
265 sys.version.split()[0]))
267 def report_SystemMessage(self, error):
268 print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
269 % (error.level,
270 utils.Reporter.levels[error.level]))
272 def report_UnicodeError(self, error):
273 sys.stderr.write(
274 '%s: %s\n'
275 '\n'
276 'The specified output encoding (%s) cannot\n'
277 'handle all of the output.\n'
278 'Try setting "--output-encoding-error-handler" to\n'
279 '\n'
280 '* "xmlcharrefreplace" (for HTML & XML output);\n'
281 % (error.__class__.__name__, error,
282 self.settings.output_encoding))
283 try:
284 data = error.object[error.start:error.end]
285 sys.stderr.write(
286 ' the output will contain "%s" and should be usable.\n'
287 '* "backslashreplace" (for other output formats, Python 2.3+);\n'
288 ' look for "%s" in the output.\n'
289 % (data.encode('ascii', 'xmlcharrefreplace'),
290 data.encode('ascii', 'backslashreplace')))
291 except AttributeError:
292 sys.stderr.write(' the output should be usable as-is.\n')
293 sys.stderr.write(
294 '* "replace"; look for "?" in the output.\n'
295 '\n'
296 '"--output-encoding-error-handler" is currently set to "%s".\n'
297 '\n'
298 'Exiting due to error. Use "--traceback" to diagnose.\n'
299 'If the advice above doesn\'t eliminate the error,\n'
300 'please report it to <docutils-users@lists.sf.net>.\n'
301 'Include "--traceback" output, Docutils version (%s),\n'
302 'Python version (%s), your OS type & version, and the\n'
303 'command line used.\n'
304 % (self.settings.output_encoding_error_handler,
305 __version__, sys.version.split()[0]))
307 default_usage = '%prog [options] [<source> [<destination>]]'
308 default_description = ('Reads from <source> (default is stdin) and writes to '
309 '<destination> (default is stdout). See '
310 '<http://docutils.sf.net/docs/user/config.html> for '
311 'the full reference.')
313 def publish_cmdline(reader=None, reader_name='standalone',
314 parser=None, parser_name='restructuredtext',
315 writer=None, writer_name='pseudoxml',
316 settings=None, settings_spec=None,
317 settings_overrides=None, config_section=None,
318 enable_exit_status=1, argv=None,
319 usage=default_usage, description=default_description):
321 Set up & run a `Publisher` for command-line-based file I/O (input and
322 output file paths taken automatically from the command line). Return the
323 encoded string output also.
325 Parameters: see `publish_programmatically` for the remainder.
327 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
328 - `usage`: Usage string, output if there's a problem parsing the command
329 line.
330 - `description`: Program description, output for the "--help" option
331 (along with command-line option descriptions).
333 pub = Publisher(reader, parser, writer, settings=settings)
334 pub.set_components(reader_name, parser_name, writer_name)
335 output = pub.publish(
336 argv, usage, description, settings_spec, settings_overrides,
337 config_section=config_section, enable_exit_status=enable_exit_status)
338 return output
340 def publish_file(source=None, source_path=None,
341 destination=None, destination_path=None,
342 reader=None, reader_name='standalone',
343 parser=None, parser_name='restructuredtext',
344 writer=None, writer_name='pseudoxml',
345 settings=None, settings_spec=None, settings_overrides=None,
346 config_section=None, enable_exit_status=None):
348 Set up & run a `Publisher` for programmatic use with file-like I/O.
349 Return the encoded string output also.
351 Parameters: see `publish_programmatically`.
353 output, pub = publish_programmatically(
354 source_class=io.FileInput, source=source, source_path=source_path,
355 destination_class=io.FileOutput,
356 destination=destination, destination_path=destination_path,
357 reader=reader, reader_name=reader_name,
358 parser=parser, parser_name=parser_name,
359 writer=writer, writer_name=writer_name,
360 settings=settings, settings_spec=settings_spec,
361 settings_overrides=settings_overrides,
362 config_section=config_section,
363 enable_exit_status=enable_exit_status)
364 return output
366 def publish_string(source, source_path=None, destination_path=None,
367 reader=None, reader_name='standalone',
368 parser=None, parser_name='restructuredtext',
369 writer=None, writer_name='pseudoxml',
370 settings=None, settings_spec=None,
371 settings_overrides=None, config_section=None,
372 enable_exit_status=None):
374 Set up & run a `Publisher` for programmatic use with string I/O. Return
375 the encoded string or Unicode string output.
377 For encoded string output, be sure to set the 'output_encoding' setting to
378 the desired encoding. Set it to 'unicode' for unencoded Unicode string
379 output. Here's one way::
381 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
383 Similarly for Unicode string input (`source`)::
385 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
387 Parameters: see `publish_programmatically`.
389 output, pub = publish_programmatically(
390 source_class=io.StringInput, source=source, source_path=source_path,
391 destination_class=io.StringOutput,
392 destination=None, destination_path=destination_path,
393 reader=reader, reader_name=reader_name,
394 parser=parser, parser_name=parser_name,
395 writer=writer, writer_name=writer_name,
396 settings=settings, settings_spec=settings_spec,
397 settings_overrides=settings_overrides,
398 config_section=config_section,
399 enable_exit_status=enable_exit_status)
400 return output
402 def publish_parts(source, source_path=None, source_class=io.StringInput,
403 destination_path=None,
404 reader=None, reader_name='standalone',
405 parser=None, parser_name='restructuredtext',
406 writer=None, writer_name='pseudoxml',
407 settings=None, settings_spec=None,
408 settings_overrides=None, config_section=None,
409 enable_exit_status=None):
411 Set up & run a `Publisher`, and return a dictionary of document parts.
412 Dictionary keys are the names of parts, and values are Unicode strings;
413 encoding is up to the client. For programmatic use with string I/O.
415 For encoded string input, be sure to set the 'input_encoding' setting to
416 the desired encoding. Set it to 'unicode' for unencoded Unicode string
417 input. Here's how::
419 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
421 Parameters: see `publish_programmatically`.
423 output, pub = publish_programmatically(
424 source=source, source_path=source_path, source_class=source_class,
425 destination_class=io.StringOutput,
426 destination=None, destination_path=destination_path,
427 reader=reader, reader_name=reader_name,
428 parser=parser, parser_name=parser_name,
429 writer=writer, writer_name=writer_name,
430 settings=settings, settings_spec=settings_spec,
431 settings_overrides=settings_overrides,
432 config_section=config_section,
433 enable_exit_status=enable_exit_status)
434 return pub.writer.parts
436 def publish_doctree(source, source_path=None,
437 source_class=io.StringInput,
438 reader=None, reader_name='standalone',
439 parser=None, parser_name='restructuredtext',
440 settings=None, settings_spec=None,
441 settings_overrides=None, config_section=None,
442 enable_exit_status=None):
444 Set up & run a `Publisher` for programmatic use with string I/O.
445 Return the document tree.
447 For encoded string input, be sure to set the 'input_encoding' setting to
448 the desired encoding. Set it to 'unicode' for unencoded Unicode string
449 input. Here's one way::
451 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
453 Parameters: see `publish_programmatically`.
455 pub = Publisher(reader=reader, parser=parser, writer=None,
456 settings=settings,
457 source_class=source_class,
458 destination_class=io.NullOutput)
459 pub.set_components(reader_name, parser_name, 'null')
460 pub.process_programmatic_settings(
461 settings_spec, settings_overrides, config_section)
462 pub.set_source(source, source_path)
463 pub.set_destination(None, None)
464 output = pub.publish(enable_exit_status=enable_exit_status)
465 return pub.document
467 def publish_from_doctree(document, destination_path=None,
468 writer=None, writer_name='pseudoxml',
469 settings=None, settings_spec=None,
470 settings_overrides=None, config_section=None,
471 enable_exit_status=None):
473 Set up & run a `Publisher` to render from an existing document tree data
474 structure, for programmatic use with string I/O. Return a pair of encoded
475 string output and document parts.
477 Note that document.settings is overridden; if you want to use the settings
478 of the original `document`, pass settings=document.settings.
480 Also, new document.transformer and document.reporter objects are
481 generated.
483 For encoded string output, be sure to set the 'output_encoding' setting to
484 the desired encoding. Set it to 'unicode' for unencoded Unicode string
485 output. Here's one way::
487 publish_from_doctree(
488 ..., settings_overrides={'output_encoding': 'unicode'})
490 Parameters: `document` is a `docutils.nodes.document` object, an existing
491 document tree.
493 Other parameters: see `publish_programmatically`.
495 reader = docutils.readers.doctree.Reader(parser_name='null')
496 pub = Publisher(reader, None, writer,
497 source=io.DocTreeInput(document),
498 destination_class=io.StringOutput, settings=settings)
499 if not writer and writer_name:
500 pub.set_writer(writer_name)
501 pub.process_programmatic_settings(
502 settings_spec, settings_overrides, config_section)
503 pub.set_destination(None, destination_path)
504 return pub.publish(enable_exit_status=enable_exit_status)
506 def publish_programmatically(source_class, source, source_path,
507 destination_class, destination, destination_path,
508 reader, reader_name,
509 parser, parser_name,
510 writer, writer_name,
511 settings, settings_spec,
512 settings_overrides, config_section,
513 enable_exit_status):
515 Set up & run a `Publisher` for custom programmatic use. Return the
516 encoded string output and the Publisher object.
518 Applications should not need to call this function directly. If it does
519 seem to be necessary to call this function directly, please write to the
520 Docutils-develop mailing list
521 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
523 Parameters:
525 * `source_class` **required**: The class for dynamically created source
526 objects. Typically `io.FileInput` or `io.StringInput`.
528 * `source`: Type depends on `source_class`:
530 - If `source_class` is `io.FileInput`: Either a file-like object
531 (must have 'read' and 'close' methods), or ``None``
532 (`source_path` is opened). If neither `source` nor
533 `source_path` are supplied, `sys.stdin` is used.
535 - If `source_class` is `io.StringInput` **required**: The input
536 string, either an encoded 8-bit string (set the
537 'input_encoding' setting to the correct encoding) or a Unicode
538 string (set the 'input_encoding' setting to 'unicode').
540 * `source_path`: Type depends on `source_class`:
542 - `io.FileInput`: Path to the input file, opened if no `source`
543 supplied.
545 - `io.StringInput`: Optional. Path to the file or object that produced
546 `source`. Only used for diagnostic output.
548 * `destination_class` **required**: The class for dynamically created
549 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
551 * `destination`: Type depends on `destination_class`:
553 - `io.FileOutput`: Either a file-like object (must have 'write' and
554 'close' methods), or ``None`` (`destination_path` is opened). If
555 neither `destination` nor `destination_path` are supplied,
556 `sys.stdout` is used.
558 - `io.StringOutput`: Not used; pass ``None``.
560 * `destination_path`: Type depends on `destination_class`:
562 - `io.FileOutput`: Path to the output file. Opened if no `destination`
563 supplied.
565 - `io.StringOutput`: Path to the file or object which will receive the
566 output; optional. Used for determining relative paths (stylesheets,
567 source links, etc.).
569 * `reader`: A `docutils.readers.Reader` object.
571 * `reader_name`: Name or alias of the Reader class to be instantiated if
572 no `reader` supplied.
574 * `parser`: A `docutils.parsers.Parser` object.
576 * `parser_name`: Name or alias of the Parser class to be instantiated if
577 no `parser` supplied.
579 * `writer`: A `docutils.writers.Writer` object.
581 * `writer_name`: Name or alias of the Writer class to be instantiated if
582 no `writer` supplied.
584 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
585 dotted-attribute access to runtime settings. It's the end result of the
586 `SettingsSpec`, config file, and option processing. If `settings` is
587 passed, it's assumed to be complete and no further setting/config/option
588 processing is done.
590 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
591 extra application-specific settings definitions independently of
592 components. In other words, the application becomes a component, and
593 its settings data is processed along with that of the other components.
594 Used only if no `settings` specified.
596 * `settings_overrides`: A dictionary containing application-specific
597 settings defaults that override the defaults of other components.
598 Used only if no `settings` specified.
600 * `config_section`: A string, the name of the configuration file section
601 for this application. Overrides the ``config_section`` attribute
602 defined by `settings_spec`. Used only if no `settings` specified.
604 * `enable_exit_status`: Boolean; enable exit status at end of processing?
606 pub = Publisher(reader, parser, writer, settings=settings,
607 source_class=source_class,
608 destination_class=destination_class)
609 pub.set_components(reader_name, parser_name, writer_name)
610 pub.process_programmatic_settings(
611 settings_spec, settings_overrides, config_section)
612 pub.set_source(source, source_path)
613 pub.set_destination(destination, destination_path)
614 output = pub.publish(enable_exit_status=enable_exit_status)
615 return output, pub