removed CVS reference
[docutils.git] / docutils / core.py
blobf68f74cea6db79a3b7c10c4fba5bb84b981d01b8
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from types import StringType
20 from docutils import __version__, __version_details__, SettingsSpec
21 from docutils import frontend, io, utils, readers, writers
22 from docutils.frontend import OptionParser
23 from docutils.transforms import Transformer
24 import docutils.readers.doctree
27 class Publisher:
29 """
30 A facade encapsulating the high-level logic of a Docutils system.
31 """
33 def __init__(self, reader=None, parser=None, writer=None,
34 source=None, source_class=io.FileInput,
35 destination=None, destination_class=io.FileOutput,
36 settings=None):
37 """
38 Initial setup. If any of `reader`, `parser`, or `writer` are not
39 specified, the corresponding ``set_...`` method should be called with
40 a component name (`set_reader` sets the parser as well).
41 """
43 self.document = None
44 """The document tree (`docutils.nodes` objects)."""
46 self.reader = reader
47 """A `docutils.readers.Reader` instance."""
49 self.parser = parser
50 """A `docutils.parsers.Parser` instance."""
52 self.writer = writer
53 """A `docutils.writers.Writer` instance."""
55 for component in 'reader', 'parser', 'writer':
56 assert not isinstance(getattr(self, component), StringType), \
57 ('passed string as "%s" parameter; use "%s_name" instead'
58 % (getattr(self, component), component, component))
60 self.source = source
61 """The source of input data, a `docutils.io.Input` instance."""
63 self.source_class = source_class
64 """The class for dynamically created source objects."""
66 self.destination = destination
67 """The destination for docutils output, a `docutils.io.Output`
68 instance."""
70 self.destination_class = destination_class
71 """The class for dynamically created destination objects."""
73 self.settings = settings
74 """An object containing Docutils settings as instance attributes.
75 Set by `self.process_command_line()` or `self.get_settings()`."""
77 def set_reader(self, reader_name, parser, parser_name):
78 """Set `self.reader` by name."""
79 reader_class = readers.get_reader_class(reader_name)
80 self.reader = reader_class(parser, parser_name)
81 self.parser = self.reader.parser
83 def set_writer(self, writer_name):
84 """Set `self.writer` by name."""
85 writer_class = writers.get_writer_class(writer_name)
86 self.writer = writer_class()
88 def set_components(self, reader_name, parser_name, writer_name):
89 if self.reader is None:
90 self.set_reader(reader_name, self.parser, parser_name)
91 if self.parser is None:
92 if self.reader.parser is None:
93 self.reader.set_parser(parser_name)
94 self.parser = self.reader.parser
95 if self.writer is None:
96 self.set_writer(writer_name)
98 def setup_option_parser(self, usage=None, description=None,
99 settings_spec=None, config_section=None,
100 **defaults):
101 if config_section:
102 if not settings_spec:
103 settings_spec = SettingsSpec()
104 settings_spec.config_section = config_section
105 parts = config_section.split()
106 if len(parts) > 1 and parts[-1] == 'application':
107 settings_spec.config_section_dependencies = ['applications']
108 #@@@ Add self.source & self.destination to components in future?
109 option_parser = OptionParser(
110 components=(self.parser, self.reader, self.writer, settings_spec),
111 defaults=defaults, read_config_files=1,
112 usage=usage, description=description)
113 return option_parser
115 def get_settings(self, usage=None, description=None,
116 settings_spec=None, config_section=None, **defaults):
118 Set and return default settings (overrides in `defaults` dict).
120 Set components first (`self.set_reader` & `self.set_writer`).
121 Explicitly setting `self.settings` disables command line option
122 processing from `self.publish()`.
124 option_parser = self.setup_option_parser(
125 usage, description, settings_spec, config_section, **defaults)
126 self.settings = option_parser.get_default_values()
127 return self.settings
129 def process_programmatic_settings(self, settings_spec,
130 settings_overrides,
131 config_section):
132 if self.settings is None:
133 defaults = (settings_overrides or {}).copy()
134 # Propagate exceptions by default when used programmatically:
135 defaults.setdefault('traceback', 1)
136 self.get_settings(settings_spec=settings_spec,
137 config_section=config_section,
138 **defaults)
140 def process_command_line(self, argv=None, usage=None, description=None,
141 settings_spec=None, config_section=None,
142 **defaults):
144 Pass an empty list to `argv` to avoid reading `sys.argv` (the
145 default).
147 Set components first (`self.set_reader` & `self.set_writer`).
149 option_parser = self.setup_option_parser(
150 usage, description, settings_spec, config_section, **defaults)
151 if argv is None:
152 argv = sys.argv[1:]
153 self.settings = option_parser.parse_args(argv)
155 def set_io(self, source_path=None, destination_path=None):
156 if self.source is None:
157 self.set_source(source_path=source_path)
158 if self.destination is None:
159 self.set_destination(destination_path=destination_path)
161 def set_source(self, source=None, source_path=None):
162 if source_path is None:
163 source_path = self.settings._source
164 else:
165 self.settings._source = source_path
166 self.source = self.source_class(
167 source=source, source_path=source_path,
168 encoding=self.settings.input_encoding)
170 def set_destination(self, destination=None, destination_path=None):
171 if destination_path is None:
172 destination_path = self.settings._destination
173 else:
174 self.settings._destination = destination_path
175 self.destination = self.destination_class(
176 destination=destination, destination_path=destination_path,
177 encoding=self.settings.output_encoding,
178 error_handler=self.settings.output_encoding_error_handler)
180 def apply_transforms(self):
181 self.document.transformer.populate_from_components(
182 (self.source, self.reader, self.reader.parser, self.writer,
183 self.destination))
184 self.document.transformer.apply_transforms()
186 def publish(self, argv=None, usage=None, description=None,
187 settings_spec=None, settings_overrides=None,
188 config_section=None, enable_exit_status=None):
190 Process command line options and arguments (if `self.settings` not
191 already set), run `self.reader` and then `self.writer`. Return
192 `self.writer`'s output.
194 exit = None
195 try:
196 if self.settings is None:
197 self.process_command_line(
198 argv, usage, description, settings_spec, config_section,
199 **(settings_overrides or {}))
200 self.set_io()
201 self.document = self.reader.read(self.source, self.parser,
202 self.settings)
203 self.apply_transforms()
204 output = self.writer.write(self.document, self.destination)
205 self.writer.assemble_parts()
206 except SystemExit, error:
207 exit = 1
208 exit_status = error.code
209 except Exception, error:
210 if not self.settings: # exception too early to report nicely
211 raise
212 if self.settings.traceback: # Propagate exceptions?
213 self.debugging_dumps()
214 raise
215 self.report_Exception(error)
216 exit = 1
217 exit_status = 1
218 self.debugging_dumps()
219 if (enable_exit_status and self.document
220 and (self.document.reporter.max_level
221 >= self.settings.exit_status_level)):
222 sys.exit(self.document.reporter.max_level + 10)
223 elif exit:
224 sys.exit(exit_status)
225 return output
227 def debugging_dumps(self):
228 if not self.document:
229 return
230 if self.settings.dump_settings:
231 print >>sys.stderr, '\n::: Runtime settings:'
232 print >>sys.stderr, pprint.pformat(self.settings.__dict__)
233 if self.settings.dump_internals:
234 print >>sys.stderr, '\n::: Document internals:'
235 print >>sys.stderr, pprint.pformat(self.document.__dict__)
236 if self.settings.dump_transforms:
237 print >>sys.stderr, '\n::: Transforms applied:'
238 print >>sys.stderr, (' (priority, transform class, '
239 'pending node details, keyword args)')
240 print >>sys.stderr, pprint.pformat(
241 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
242 pending and pending.details, kwargs)
243 for priority, xclass, pending, kwargs
244 in self.document.transformer.applied])
245 if self.settings.dump_pseudo_xml:
246 print >>sys.stderr, '\n::: Pseudo-XML:'
247 print >>sys.stderr, self.document.pformat().encode(
248 'raw_unicode_escape')
250 def report_Exception(self, error):
251 if isinstance(error, utils.SystemMessage):
252 self.report_SystemMessage(error)
253 elif isinstance(error, UnicodeError):
254 self.report_UnicodeError(error)
255 else:
256 print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error)
257 print >>sys.stderr, ("""\
258 Exiting due to error. Use "--traceback" to diagnose.
259 Please report errors to <docutils-users@lists.sf.net>.
260 Include "--traceback" output, Docutils version (%s [%s]),
261 Python version (%s), your OS type & version, and the
262 command line used.""" % (__version__, __version_details__,
263 sys.version.split()[0]))
265 def report_SystemMessage(self, error):
266 print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
267 % (error.level,
268 utils.Reporter.levels[error.level]))
270 def report_UnicodeError(self, error):
271 sys.stderr.write(
272 '%s: %s\n'
273 '\n'
274 'The specified output encoding (%s) cannot\n'
275 'handle all of the output.\n'
276 'Try setting "--output-encoding-error-handler" to\n'
277 '\n'
278 '* "xmlcharrefreplace" (for HTML & XML output);\n'
279 % (error.__class__.__name__, error,
280 self.settings.output_encoding))
281 try:
282 data = error.object[error.start:error.end]
283 sys.stderr.write(
284 ' the output will contain "%s" and should be usable.\n'
285 '* "backslashreplace" (for other output formats, Python 2.3+);\n'
286 ' look for "%s" in the output.\n'
287 % (data.encode('ascii', 'xmlcharrefreplace'),
288 data.encode('ascii', 'backslashreplace')))
289 except AttributeError:
290 sys.stderr.write(' the output should be usable as-is.\n')
291 sys.stderr.write(
292 '* "replace"; look for "?" in the output.\n'
293 '\n'
294 '"--output-encoding-error-handler" is currently set to "%s".\n'
295 '\n'
296 'Exiting due to error. Use "--traceback" to diagnose.\n'
297 'If the advice above doesn\'t eliminate the error,\n'
298 'please report it to <docutils-users@lists.sf.net>.\n'
299 'Include "--traceback" output, Docutils version (%s),\n'
300 'Python version (%s), your OS type & version, and the\n'
301 'command line used.\n'
302 % (self.settings.output_encoding_error_handler,
303 __version__, sys.version.split()[0]))
305 default_usage = '%prog [options] [<source> [<destination>]]'
306 default_description = ('Reads from <source> (default is stdin) and writes to '
307 '<destination> (default is stdout). See '
308 '<http://docutils.sf.net/docs/user/config.html> for '
309 'the full reference.')
311 def publish_cmdline(reader=None, reader_name='standalone',
312 parser=None, parser_name='restructuredtext',
313 writer=None, writer_name='pseudoxml',
314 settings=None, settings_spec=None,
315 settings_overrides=None, config_section=None,
316 enable_exit_status=1, argv=None,
317 usage=default_usage, description=default_description):
319 Set up & run a `Publisher` for command-line-based file I/O (input and
320 output file paths taken automatically from the command line). Return the
321 encoded string output also.
323 Parameters: see `publish_programmatically` for the remainder.
325 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
326 - `usage`: Usage string, output if there's a problem parsing the command
327 line.
328 - `description`: Program description, output for the "--help" option
329 (along with command-line option descriptions).
331 pub = Publisher(reader, parser, writer, settings=settings)
332 pub.set_components(reader_name, parser_name, writer_name)
333 output = pub.publish(
334 argv, usage, description, settings_spec, settings_overrides,
335 config_section=config_section, enable_exit_status=enable_exit_status)
336 return output
338 def publish_file(source=None, source_path=None,
339 destination=None, destination_path=None,
340 reader=None, reader_name='standalone',
341 parser=None, parser_name='restructuredtext',
342 writer=None, writer_name='pseudoxml',
343 settings=None, settings_spec=None, settings_overrides=None,
344 config_section=None, enable_exit_status=None):
346 Set up & run a `Publisher` for programmatic use with file-like I/O.
347 Return the encoded string output also.
349 Parameters: see `publish_programmatically`.
351 output, pub = publish_programmatically(
352 source_class=io.FileInput, source=source, source_path=source_path,
353 destination_class=io.FileOutput,
354 destination=destination, destination_path=destination_path,
355 reader=reader, reader_name=reader_name,
356 parser=parser, parser_name=parser_name,
357 writer=writer, writer_name=writer_name,
358 settings=settings, settings_spec=settings_spec,
359 settings_overrides=settings_overrides,
360 config_section=config_section,
361 enable_exit_status=enable_exit_status)
362 return output
364 def publish_string(source, source_path=None, destination_path=None,
365 reader=None, reader_name='standalone',
366 parser=None, parser_name='restructuredtext',
367 writer=None, writer_name='pseudoxml',
368 settings=None, settings_spec=None,
369 settings_overrides=None, config_section=None,
370 enable_exit_status=None):
372 Set up & run a `Publisher` for programmatic use with string I/O. Return
373 the encoded string or Unicode string output.
375 For encoded string output, be sure to set the 'output_encoding' setting to
376 the desired encoding. Set it to 'unicode' for unencoded Unicode string
377 output. Here's one way::
379 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
381 Similarly for Unicode string input (`source`)::
383 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
385 Parameters: see `publish_programmatically`.
387 output, pub = publish_programmatically(
388 source_class=io.StringInput, source=source, source_path=source_path,
389 destination_class=io.StringOutput,
390 destination=None, destination_path=destination_path,
391 reader=reader, reader_name=reader_name,
392 parser=parser, parser_name=parser_name,
393 writer=writer, writer_name=writer_name,
394 settings=settings, settings_spec=settings_spec,
395 settings_overrides=settings_overrides,
396 config_section=config_section,
397 enable_exit_status=enable_exit_status)
398 return output
400 def publish_parts(source, source_path=None, source_class=io.StringInput,
401 destination_path=None,
402 reader=None, reader_name='standalone',
403 parser=None, parser_name='restructuredtext',
404 writer=None, writer_name='pseudoxml',
405 settings=None, settings_spec=None,
406 settings_overrides=None, config_section=None,
407 enable_exit_status=None):
409 Set up & run a `Publisher`, and return a dictionary of document parts.
410 Dictionary keys are the names of parts, and values are Unicode strings;
411 encoding is up to the client. For programmatic use with string I/O.
413 For encoded string input, be sure to set the 'input_encoding' setting to
414 the desired encoding. Set it to 'unicode' for unencoded Unicode string
415 input. Here's how::
417 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
419 Parameters: see `publish_programmatically`.
421 output, pub = publish_programmatically(
422 source=source, source_path=source_path, source_class=source_class,
423 destination_class=io.StringOutput,
424 destination=None, destination_path=destination_path,
425 reader=reader, reader_name=reader_name,
426 parser=parser, parser_name=parser_name,
427 writer=writer, writer_name=writer_name,
428 settings=settings, settings_spec=settings_spec,
429 settings_overrides=settings_overrides,
430 config_section=config_section,
431 enable_exit_status=enable_exit_status)
432 return pub.writer.parts
434 def publish_doctree(source, source_path=None,
435 source_class=io.StringInput,
436 reader=None, reader_name='standalone',
437 parser=None, parser_name='restructuredtext',
438 settings=None, settings_spec=None,
439 settings_overrides=None, config_section=None,
440 enable_exit_status=None):
442 Set up & run a `Publisher` for programmatic use with string I/O.
443 Return the document tree.
445 For encoded string input, be sure to set the 'input_encoding' setting to
446 the desired encoding. Set it to 'unicode' for unencoded Unicode string
447 input. Here's one way::
449 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
451 Parameters: see `publish_programmatically`.
453 pub = Publisher(reader=reader, parser=parser, writer=None,
454 settings=settings,
455 source_class=source_class,
456 destination_class=io.NullOutput)
457 pub.set_components(reader_name, parser_name, 'null')
458 pub.process_programmatic_settings(
459 settings_spec, settings_overrides, config_section)
460 pub.set_source(source, source_path)
461 pub.set_destination(None, None)
462 output = pub.publish(enable_exit_status=enable_exit_status)
463 return pub.document
465 def publish_from_doctree(document, destination_path=None,
466 writer=None, writer_name='pseudoxml',
467 settings=None, settings_spec=None,
468 settings_overrides=None, config_section=None,
469 enable_exit_status=None):
471 Set up & run a `Publisher` to render from an existing document
472 tree data structure, for programmatic use with string I/O. Return
473 the encoded string output.
475 Note that document.settings is overridden; if you want to use the settings
476 of the original `document`, pass settings=document.settings.
478 Also, new document.transformer and document.reporter objects are
479 generated.
481 For encoded string output, be sure to set the 'output_encoding' setting to
482 the desired encoding. Set it to 'unicode' for unencoded Unicode string
483 output. Here's one way::
485 publish_from_doctree(
486 ..., settings_overrides={'output_encoding': 'unicode'})
488 Parameters: `document` is a `docutils.nodes.document` object, an existing
489 document tree.
491 Other parameters: see `publish_programmatically`.
493 reader = docutils.readers.doctree.Reader(parser_name='null')
494 pub = Publisher(reader, None, writer,
495 source=io.DocTreeInput(document),
496 destination_class=io.StringOutput, settings=settings)
497 if not writer and writer_name:
498 pub.set_writer(writer_name)
499 pub.process_programmatic_settings(
500 settings_spec, settings_overrides, config_section)
501 pub.set_destination(None, destination_path)
502 return pub.publish(enable_exit_status=enable_exit_status)
504 def publish_programmatically(source_class, source, source_path,
505 destination_class, destination, destination_path,
506 reader, reader_name,
507 parser, parser_name,
508 writer, writer_name,
509 settings, settings_spec,
510 settings_overrides, config_section,
511 enable_exit_status):
513 Set up & run a `Publisher` for custom programmatic use. Return the
514 encoded string output and the Publisher object.
516 Applications should not need to call this function directly. If it does
517 seem to be necessary to call this function directly, please write to the
518 Docutils-develop mailing list
519 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
521 Parameters:
523 * `source_class` **required**: The class for dynamically created source
524 objects. Typically `io.FileInput` or `io.StringInput`.
526 * `source`: Type depends on `source_class`:
528 - If `source_class` is `io.FileInput`: Either a file-like object
529 (must have 'read' and 'close' methods), or ``None``
530 (`source_path` is opened). If neither `source` nor
531 `source_path` are supplied, `sys.stdin` is used.
533 - If `source_class` is `io.StringInput` **required**: The input
534 string, either an encoded 8-bit string (set the
535 'input_encoding' setting to the correct encoding) or a Unicode
536 string (set the 'input_encoding' setting to 'unicode').
538 * `source_path`: Type depends on `source_class`:
540 - `io.FileInput`: Path to the input file, opened if no `source`
541 supplied.
543 - `io.StringInput`: Optional. Path to the file or object that produced
544 `source`. Only used for diagnostic output.
546 * `destination_class` **required**: The class for dynamically created
547 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
549 * `destination`: Type depends on `destination_class`:
551 - `io.FileOutput`: Either a file-like object (must have 'write' and
552 'close' methods), or ``None`` (`destination_path` is opened). If
553 neither `destination` nor `destination_path` are supplied,
554 `sys.stdout` is used.
556 - `io.StringOutput`: Not used; pass ``None``.
558 * `destination_path`: Type depends on `destination_class`:
560 - `io.FileOutput`: Path to the output file. Opened if no `destination`
561 supplied.
563 - `io.StringOutput`: Path to the file or object which will receive the
564 output; optional. Used for determining relative paths (stylesheets,
565 source links, etc.).
567 * `reader`: A `docutils.readers.Reader` object.
569 * `reader_name`: Name or alias of the Reader class to be instantiated if
570 no `reader` supplied.
572 * `parser`: A `docutils.parsers.Parser` object.
574 * `parser_name`: Name or alias of the Parser class to be instantiated if
575 no `parser` supplied.
577 * `writer`: A `docutils.writers.Writer` object.
579 * `writer_name`: Name or alias of the Writer class to be instantiated if
580 no `writer` supplied.
582 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
583 dotted-attribute access to runtime settings. It's the end result of the
584 `SettingsSpec`, config file, and option processing. If `settings` is
585 passed, it's assumed to be complete and no further setting/config/option
586 processing is done.
588 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
589 extra application-specific settings definitions independently of
590 components. In other words, the application becomes a component, and
591 its settings data is processed along with that of the other components.
592 Used only if no `settings` specified.
594 * `settings_overrides`: A dictionary containing application-specific
595 settings defaults that override the defaults of other components.
596 Used only if no `settings` specified.
598 * `config_section`: A string, the name of the configuration file section
599 for this application. Overrides the ``config_section`` attribute
600 defined by `settings_spec`. Used only if no `settings` specified.
602 * `enable_exit_status`: Boolean; enable exit status at end of processing?
604 pub = Publisher(reader, parser, writer, settings=settings,
605 source_class=source_class,
606 destination_class=destination_class)
607 pub.set_components(reader_name, parser_name, writer_name)
608 pub.process_programmatic_settings(
609 settings_spec, settings_overrides, config_section)
610 pub.set_source(source, source_path)
611 pub.set_destination(destination, destination_path)
612 output = pub.publish(enable_exit_status=enable_exit_status)
613 return output, pub