I restructured (pun intended) the emacs support
[docutils.git] / docutils / core.py
blob4cb9d7ec3e52cf5b3c8cb5e476b2c4b27e896a18
1 # Authors: David Goodger
2 # Contact: goodger@python.org
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 Calling the ``publish_*`` convenience functions (or instantiating a
9 `Publisher` object) with component names will result in default
10 behavior. For custom behavior (setting component options), create
11 custom component objects first, and pass *them* to
12 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
14 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
15 """
17 __docformat__ = 'reStructuredText'
19 import sys
20 import pprint
21 from docutils import __version__, SettingsSpec
22 from docutils import frontend, io, utils, readers, writers
23 from docutils.frontend import OptionParser
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.reader = reader
43 """A `docutils.readers.Reader` instance."""
45 self.parser = parser
46 """A `docutils.parsers.Parser` instance."""
48 self.writer = writer
49 """A `docutils.writers.Writer` instance."""
51 self.source = source
52 """The source of input data, a `docutils.io.Input` instance."""
54 self.source_class = source_class
55 """The class for dynamically created source objects."""
57 self.destination = destination
58 """The destination for docutils output, a `docutils.io.Output`
59 instance."""
61 self.destination_class = destination_class
62 """The class for dynamically created destination objects."""
64 self.settings = settings
65 """An object containing Docutils settings as instance attributes.
66 Set by `self.process_command_line()` or `self.get_settings()`."""
68 def set_reader(self, reader_name, parser, parser_name):
69 """Set `self.reader` by name."""
70 reader_class = readers.get_reader_class(reader_name)
71 self.reader = reader_class(parser, parser_name)
72 self.parser = self.reader.parser
74 def set_writer(self, writer_name):
75 """Set `self.writer` by name."""
76 writer_class = writers.get_writer_class(writer_name)
77 self.writer = writer_class()
79 def set_components(self, reader_name, parser_name, writer_name):
80 if self.reader is None:
81 self.set_reader(reader_name, self.parser, parser_name)
82 if self.parser is None:
83 if self.reader.parser is None:
84 self.reader.set_parser(parser_name)
85 self.parser = self.reader.parser
86 if self.writer is None:
87 self.set_writer(writer_name)
89 def setup_option_parser(self, usage=None, description=None,
90 settings_spec=None, config_section=None,
91 **defaults):
92 if config_section:
93 if not settings_spec:
94 settings_spec = SettingsSpec()
95 settings_spec.config_section = config_section
96 parts = config_section.split()
97 if len(parts) > 1 and parts[-1] == 'application':
98 settings_spec.config_section_dependencies = ['applications']
99 #@@@ Add self.source & self.destination to components in future?
100 option_parser = OptionParser(
101 components=(self.parser, self.reader, self.writer, settings_spec),
102 defaults=defaults, read_config_files=1,
103 usage=usage, description=description)
104 return option_parser
106 def get_settings(self, usage=None, description=None,
107 settings_spec=None, config_section=None, **defaults):
109 Set and return default settings (overrides in `defaults` dict).
111 Set components first (`self.set_reader` & `self.set_writer`).
112 Explicitly setting `self.settings` disables command line option
113 processing from `self.publish()`.
115 option_parser = self.setup_option_parser(
116 usage, description, settings_spec, config_section, **defaults)
117 self.settings = option_parser.get_default_values()
118 return self.settings
120 def process_programmatic_settings(self, settings_spec,
121 settings_overrides,
122 config_section):
123 if self.settings is None:
124 defaults = (settings_overrides or {}).copy()
125 # Propagate exceptions by default when used programmatically:
126 defaults.setdefault('traceback', 1)
127 self.get_settings(settings_spec=settings_spec,
128 config_section=config_section,
129 **defaults)
131 def process_command_line(self, argv=None, usage=None, description=None,
132 settings_spec=None, config_section=None,
133 **defaults):
135 Pass an empty list to `argv` to avoid reading `sys.argv` (the
136 default).
138 Set components first (`self.set_reader` & `self.set_writer`).
140 option_parser = self.setup_option_parser(
141 usage, description, settings_spec, config_section, **defaults)
142 if argv is None:
143 argv = sys.argv[1:]
144 self.settings = option_parser.parse_args(argv)
146 def set_io(self, source_path=None, destination_path=None):
147 if self.source is None:
148 self.set_source(source_path=source_path)
149 if self.destination is None:
150 self.set_destination(destination_path=destination_path)
152 def set_source(self, source=None, source_path=None):
153 if source_path is None:
154 source_path = self.settings._source
155 else:
156 self.settings._source = source_path
157 self.source = self.source_class(
158 source=source, source_path=source_path,
159 encoding=self.settings.input_encoding)
161 def set_destination(self, destination=None, destination_path=None):
162 if destination_path is None:
163 destination_path = self.settings._destination
164 else:
165 self.settings._destination = destination_path
166 self.destination = self.destination_class(
167 destination=destination, destination_path=destination_path,
168 encoding=self.settings.output_encoding,
169 error_handler=self.settings.output_encoding_error_handler)
171 def apply_transforms(self, document):
172 document.transformer.populate_from_components(
173 (self.source, self.reader, self.reader.parser, self.writer,
174 self.destination))
175 document.transformer.apply_transforms()
177 def publish(self, argv=None, usage=None, description=None,
178 settings_spec=None, settings_overrides=None,
179 config_section=None, enable_exit_status=None):
181 Process command line options and arguments (if `self.settings` not
182 already set), run `self.reader` and then `self.writer`. Return
183 `self.writer`'s output.
185 if self.settings is None:
186 self.process_command_line(
187 argv, usage, description, settings_spec, config_section,
188 **(settings_overrides or {}))
189 self.set_io()
190 exit = None
191 document = None
192 try:
193 document = self.reader.read(self.source, self.parser,
194 self.settings)
195 self.apply_transforms(document)
196 output = self.writer.write(document, self.destination)
197 self.writer.assemble_parts()
198 except Exception, error:
199 if self.settings.traceback: # propagate exceptions?
200 self.debugging_dumps(document)
201 raise
202 self.report_Exception(error)
203 exit = 1
204 self.debugging_dumps(document)
205 if (enable_exit_status and document
206 and (document.reporter.max_level
207 >= self.settings.exit_status_level)):
208 sys.exit(document.reporter.max_level + 10)
209 elif exit:
210 sys.exit(1)
211 return output
213 def debugging_dumps(self, document):
214 if not document:
215 return
216 if self.settings.dump_settings:
217 print >>sys.stderr, '\n::: Runtime settings:'
218 print >>sys.stderr, pprint.pformat(self.settings.__dict__)
219 if self.settings.dump_internals and document:
220 print >>sys.stderr, '\n::: Document internals:'
221 print >>sys.stderr, pprint.pformat(document.__dict__)
222 if self.settings.dump_transforms and document:
223 print >>sys.stderr, '\n::: Transforms applied:'
224 print >>sys.stderr, pprint.pformat(document.transformer.applied)
225 if self.settings.dump_pseudo_xml and document:
226 print >>sys.stderr, '\n::: Pseudo-XML:'
227 print >>sys.stderr, document.pformat().encode(
228 'raw_unicode_escape')
230 def report_Exception(self, error):
231 if isinstance(error, utils.SystemMessage):
232 self.report_SystemMessage(error)
233 elif isinstance(error, UnicodeError):
234 self.report_UnicodeError(error)
235 else:
236 print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error)
237 print >>sys.stderr, ("""\
238 Exiting due to error. Use "--traceback" to diagnose.
239 Please report errors to <docutils-users@lists.sf.net>.
240 Include "--traceback" output, Docutils version (%s),
241 Python version (%s), your OS type & version, and the
242 command line used.""" % (__version__, sys.version.split()[0]))
244 def report_SystemMessage(self, error):
245 print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
246 % (error.level,
247 utils.Reporter.levels[error.level]))
249 def report_UnicodeError(self, error):
250 sys.stderr.write(
251 '%s: %s\n'
252 '\n'
253 'The specified output encoding (%s) cannot\n'
254 'handle all of the output.\n'
255 'Try setting "--output-encoding-error-handler" to\n'
256 '\n'
257 '* "xmlcharrefreplace" (for HTML & XML output);\n'
258 % (error.__class__.__name__, error,
259 self.settings.output_encoding))
260 try:
261 data = error.object[error.start:error.end]
262 sys.stderr.write(
263 ' the output will contain "%s" and should be usable.\n'
264 '* "backslashreplace" (for other output formats, Python 2.3+);\n'
265 ' look for "%s" in the output.\n'
266 % (data.encode('ascii', 'xmlcharrefreplace'),
267 data.encode('ascii', 'backslashreplace')))
268 except AttributeError:
269 sys.stderr.write(' the output should be usable as-is.\n')
270 sys.stderr.write(
271 '* "replace"; look for "?" in the output.\n'
272 '\n'
273 '"--output-encoding-error-handler" is currently set to "%s".\n'
274 '\n'
275 'Exiting due to error. Use "--traceback" to diagnose.\n'
276 'If the advice above doesn\'t eliminate the error,\n'
277 'please report it to <docutils-users@lists.sf.net>.\n'
278 'Include "--traceback" output, Docutils version (%s),\n'
279 'Python version (%s), your OS type & version, and the\n'
280 'command line used.\n'
281 % (self.settings.output_encoding_error_handler,
282 __version__, sys.version.split()[0]))
284 default_usage = '%prog [options] [<source> [<destination>]]'
285 default_description = ('Reads from <source> (default is stdin) and writes to '
286 '<destination> (default is stdout).')
288 def publish_cmdline(reader=None, reader_name='standalone',
289 parser=None, parser_name='restructuredtext',
290 writer=None, writer_name='pseudoxml',
291 settings=None, settings_spec=None,
292 settings_overrides=None, config_section=None,
293 enable_exit_status=1, argv=None,
294 usage=default_usage, description=default_description):
296 Set up & run a `Publisher` for command-line-based file I/O (input and
297 output file paths taken automatically from the command line). Return the
298 encoded string output also.
300 Parameters: see `publish_programmatically` for the remainder.
302 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
303 - `usage`: Usage string, output if there's a problem parsing the command
304 line.
305 - `description`: Program description, output for the "--help" option
306 (along with command-line option descriptions).
308 pub = Publisher(reader, parser, writer, settings=settings)
309 pub.set_components(reader_name, parser_name, writer_name)
310 output = pub.publish(
311 argv, usage, description, settings_spec, settings_overrides,
312 config_section=config_section, enable_exit_status=enable_exit_status)
313 return output
315 def publish_file(source=None, source_path=None,
316 destination=None, destination_path=None,
317 reader=None, reader_name='standalone',
318 parser=None, parser_name='restructuredtext',
319 writer=None, writer_name='pseudoxml',
320 settings=None, settings_spec=None, settings_overrides=None,
321 config_section=None, enable_exit_status=None):
323 Set up & run a `Publisher` for programmatic use with file-like I/O.
324 Return the encoded string output also.
326 Parameters: see `publish_programmatically`.
328 output, pub = publish_programmatically(
329 source_class=io.FileInput, source=source, source_path=source_path,
330 destination_class=io.FileOutput,
331 destination=destination, destination_path=destination_path,
332 reader=reader, reader_name=reader_name,
333 parser=parser, parser_name=parser_name,
334 writer=writer, writer_name=writer_name,
335 settings=settings, settings_spec=settings_spec,
336 settings_overrides=settings_overrides,
337 config_section=config_section,
338 enable_exit_status=enable_exit_status)
339 return output
341 def publish_string(source, source_path=None, destination_path=None,
342 reader=None, reader_name='standalone',
343 parser=None, parser_name='restructuredtext',
344 writer=None, writer_name='pseudoxml',
345 settings=None, settings_spec=None,
346 settings_overrides=None, config_section=None,
347 enable_exit_status=None):
349 Set up & run a `Publisher` for programmatic use with string I/O. Return
350 the encoded string or Unicode string output.
352 For encoded string output, be sure to set the 'output_encoding' setting to
353 the desired encoding. Set it to 'unicode' for unencoded Unicode string
354 output. Here's one way::
356 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
358 Similarly for Unicode string input (`source`)::
360 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
362 Parameters: see `publish_programmatically`.
364 output, pub = publish_programmatically(
365 source_class=io.StringInput, source=source, source_path=source_path,
366 destination_class=io.StringOutput,
367 destination=None, destination_path=destination_path,
368 reader=reader, reader_name=reader_name,
369 parser=parser, parser_name=parser_name,
370 writer=writer, writer_name=writer_name,
371 settings=settings, settings_spec=settings_spec,
372 settings_overrides=settings_overrides,
373 config_section=config_section,
374 enable_exit_status=enable_exit_status)
375 return output
377 def publish_parts(source, source_path=None, destination_path=None,
378 reader=None, reader_name='standalone',
379 parser=None, parser_name='restructuredtext',
380 writer=None, writer_name='pseudoxml',
381 settings=None, settings_spec=None,
382 settings_overrides=None, config_section=None,
383 enable_exit_status=None):
385 Set up & run a `Publisher`, and return a dictionary of document parts.
386 Dictionary keys are the names of parts, and values are Unicode strings;
387 encoding is up to the client. For programmatic use with string I/O.
389 For encoded string input, be sure to set the 'input_encoding' setting to
390 the desired encoding. Set it to 'unicode' for unencoded Unicode string
391 input. Here's how::
393 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
395 Parameters: see `publish_programmatically`.
397 output, pub = publish_programmatically(
398 source_class=io.StringInput, source=source, source_path=source_path,
399 destination_class=io.StringOutput,
400 destination=None, destination_path=destination_path,
401 reader=reader, reader_name=reader_name,
402 parser=parser, parser_name=parser_name,
403 writer=writer, writer_name=writer_name,
404 settings=settings, settings_spec=settings_spec,
405 settings_overrides=settings_overrides,
406 config_section=config_section,
407 enable_exit_status=enable_exit_status)
408 return pub.writer.parts
410 def publish_programmatically(source_class, source, source_path,
411 destination_class, destination, destination_path,
412 reader, reader_name,
413 parser, parser_name,
414 writer, writer_name,
415 settings, settings_spec,
416 settings_overrides, config_section,
417 enable_exit_status):
419 Set up & run a `Publisher` for custom programmatic use. Return the
420 encoded string output and the Publisher object.
422 Applications should not need to call this function directly. If it does
423 seem to be necessary to call this function directly, please write to the
424 docutils-develop@lists.sourceforge.net mailing list.
426 Parameters:
428 * `source_class` **required**: The class for dynamically created source
429 objects. Typically `io.FileInput` or `io.StringInput`.
431 * `source`: Type depends on `source_class`:
433 - `io.FileInput`: Either a file-like object (must have 'read' and
434 'close' methods), or ``None`` (`source_path` is opened). If neither
435 `source` nor `source_path` are supplied, `sys.stdin` is used.
437 - `io.StringInput` **required**: The input string, either an encoded
438 8-bit string (set the 'input_encoding' setting to the correct
439 encoding) or a Unicode string (set the 'input_encoding' setting to
440 'unicode').
442 * `source_path`: Type depends on `source_class`:
444 - `io.FileInput`: Path to the input file, opened if no `source`
445 supplied.
447 - `io.StringInput`: Optional. Path to the file or object that produced
448 `source`. Only used for diagnostic output.
450 * `destination_class` **required**: The class for dynamically created
451 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
453 * `destination`: Type depends on `destination_class`:
455 - `io.FileOutput`: Either a file-like object (must have 'write' and
456 'close' methods), or ``None`` (`destination_path` is opened). If
457 neither `destination` nor `destination_path` are supplied,
458 `sys.stdout` is used.
460 - `io.StringOutput`: Not used; pass ``None``.
462 * `destination_path`: Type depends on `destination_class`:
464 - `io.FileOutput`: Path to the output file. Opened if no `destination`
465 supplied.
467 - `io.StringOutput`: Path to the file or object which will receive the
468 output; optional. Used for determining relative paths (stylesheets,
469 source links, etc.).
471 * `reader`: A `docutils.readers.Reader` object.
473 * `reader_name`: Name or alias of the Reader class to be instantiated if
474 no `reader` supplied.
476 * `parser`: A `docutils.parsers.Parser` object.
478 * `parser_name`: Name or alias of the Parser class to be instantiated if
479 no `parser` supplied.
481 * `writer`: A `docutils.writers.Writer` object.
483 * `writer_name`: Name or alias of the Writer class to be instantiated if
484 no `writer` supplied.
486 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
487 dotted-attribute access to runtime settings. It's the end result of the
488 `SettingsSpec`, config file, and option processing. If `settings` is
489 passed, it's assumed to be complete and no further setting/config/option
490 processing is done.
492 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
493 extra application-specific settings definitions independently of
494 components. In other words, the application becomes a component, and
495 its settings data is processed along with that of the other components.
496 Used only if no `settings` specified.
498 * `settings_overrides`: A dictionary containing application-specific
499 settings defaults that override the defaults of other components.
500 Used only if no `settings` specified.
502 * `config_section`: A string, the name of the configuration file section
503 for this application. Overrides the ``config_section`` attribute
504 defined by `settings_spec`. Used only if no `settings` specified.
506 * `enable_exit_status`: Boolean; enable exit status at end of processing?
508 pub = Publisher(reader, parser, writer, settings=settings,
509 source_class=source_class,
510 destination_class=destination_class)
511 pub.set_components(reader_name, parser_name, writer_name)
512 pub.process_programmatic_settings(
513 settings_spec, settings_overrides, config_section)
514 pub.set_source(source, source_path)
515 pub.set_destination(destination, destination_path)
516 output = pub.publish(enable_exit_status=enable_exit_status)
517 return output, pub