argv_encoding != sys.stdin.encoding
[docutils.git] / docutils / core.py
blobcf856549a3382802b3c8915729a6b600b9146e1e
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 from docutils.error_reporting import ErrorOutput, ErrorString
24 import docutils.readers.doctree
26 class Publisher:
28 """
29 A facade encapsulating the high-level logic of a Docutils system.
30 """
32 def __init__(self, reader=None, parser=None, writer=None,
33 source=None, source_class=io.FileInput,
34 destination=None, destination_class=io.FileOutput,
35 settings=None):
36 """
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
40 """
42 self.document = None
43 """The document tree (`docutils.nodes` objects)."""
45 self.reader = reader
46 """A `docutils.readers.Reader` instance."""
48 self.parser = parser
49 """A `docutils.parsers.Parser` instance."""
51 self.writer = writer
52 """A `docutils.writers.Writer` instance."""
54 for component in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self, component), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self, component), component, component))
61 self.source = source
62 """The source of input data, a `docutils.io.Input` instance."""
64 self.source_class = source_class
65 """The class for dynamically created source objects."""
67 self.destination = destination
68 """The destination for docutils output, a `docutils.io.Output`
69 instance."""
71 self.destination_class = destination_class
72 """The class for dynamically created destination objects."""
74 self.settings = settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 self._stderr = ErrorOutput()
80 def set_reader(self, reader_name, parser, parser_name):
81 """Set `self.reader` by name."""
82 reader_class = readers.get_reader_class(reader_name)
83 self.reader = reader_class(parser, parser_name)
84 self.parser = self.reader.parser
86 def set_writer(self, writer_name):
87 """Set `self.writer` by name."""
88 writer_class = writers.get_writer_class(writer_name)
89 self.writer = writer_class()
91 def set_components(self, reader_name, parser_name, writer_name):
92 if self.reader is None:
93 self.set_reader(reader_name, self.parser, parser_name)
94 if self.parser is None:
95 if self.reader.parser is None:
96 self.reader.set_parser(parser_name)
97 self.parser = self.reader.parser
98 if self.writer is None:
99 self.set_writer(writer_name)
101 def setup_option_parser(self, usage=None, description=None,
102 settings_spec=None, config_section=None,
103 **defaults):
104 if config_section:
105 if not settings_spec:
106 settings_spec = SettingsSpec()
107 settings_spec.config_section = config_section
108 parts = config_section.split()
109 if len(parts) > 1 and parts[-1] == 'application':
110 settings_spec.config_section_dependencies = ['applications']
111 #@@@ Add self.source & self.destination to components in future?
112 option_parser = OptionParser(
113 components=(self.parser, self.reader, self.writer, settings_spec),
114 defaults=defaults, read_config_files=1,
115 usage=usage, description=description)
116 return option_parser
118 def get_settings(self, usage=None, description=None,
119 settings_spec=None, config_section=None, **defaults):
121 Set and return default settings (overrides in `defaults` dict).
123 Set components first (`self.set_reader` & `self.set_writer`).
124 Explicitly setting `self.settings` disables command line option
125 processing from `self.publish()`.
127 option_parser = self.setup_option_parser(
128 usage, description, settings_spec, config_section, **defaults)
129 self.settings = option_parser.get_default_values()
130 return self.settings
132 def process_programmatic_settings(self, settings_spec,
133 settings_overrides,
134 config_section):
135 if self.settings is None:
136 defaults = (settings_overrides or {}).copy()
137 # Propagate exceptions by default when used programmatically:
138 defaults.setdefault('traceback', 1)
139 self.get_settings(settings_spec=settings_spec,
140 config_section=config_section,
141 **defaults)
143 def process_command_line(self, argv=None, usage=None, description=None,
144 settings_spec=None, config_section=None,
145 **defaults):
147 Pass an empty list to `argv` to avoid reading `sys.argv` (the
148 default).
150 Set components first (`self.set_reader` & `self.set_writer`).
152 option_parser = self.setup_option_parser(
153 usage, description, settings_spec, config_section, **defaults)
154 if argv is None:
155 argv = sys.argv[1:]
156 # converting to Unicode (Python 3 does this automatically):
157 if sys.version_info < (3,0):
158 # TODO: make this failsafe and reversible?
159 argv_encoding = (frontend.locale_encoding or 'ascii')
160 argv = [a.decode(argv_encoding) for a in argv]
161 self.settings = option_parser.parse_args(argv)
163 def set_io(self, source_path=None, destination_path=None):
164 if self.source is None:
165 self.set_source(source_path=source_path)
166 if self.destination is None:
167 self.set_destination(destination_path=destination_path)
169 def set_source(self, source=None, source_path=None):
170 if source_path is None:
171 source_path = self.settings._source
172 else:
173 self.settings._source = source_path
174 self.source = self.source_class(
175 source=source, source_path=source_path,
176 encoding=self.settings.input_encoding)
178 def set_destination(self, destination=None, destination_path=None):
179 if destination_path is None:
180 destination_path = self.settings._destination
181 else:
182 self.settings._destination = destination_path
183 self.destination = self.destination_class(
184 destination=destination, destination_path=destination_path,
185 encoding=self.settings.output_encoding,
186 error_handler=self.settings.output_encoding_error_handler)
188 def apply_transforms(self):
189 self.document.transformer.populate_from_components(
190 (self.source, self.reader, self.reader.parser, self.writer,
191 self.destination))
192 self.document.transformer.apply_transforms()
194 def publish(self, argv=None, usage=None, description=None,
195 settings_spec=None, settings_overrides=None,
196 config_section=None, enable_exit_status=None):
198 Process command line options and arguments (if `self.settings` not
199 already set), run `self.reader` and then `self.writer`. Return
200 `self.writer`'s output.
202 exit = None
203 try:
204 if self.settings is None:
205 self.process_command_line(
206 argv, usage, description, settings_spec, config_section,
207 **(settings_overrides or {}))
208 self.set_io()
209 self.document = self.reader.read(self.source, self.parser,
210 self.settings)
211 self.apply_transforms()
212 output = self.writer.write(self.document, self.destination)
213 self.writer.assemble_parts()
214 except SystemExit, error:
215 exit = 1
216 exit_status = error.code
217 except Exception, error:
218 if not self.settings: # exception too early to report nicely
219 raise
220 if self.settings.traceback: # Propagate exceptions?
221 self.debugging_dumps()
222 raise
223 self.report_Exception(error)
224 exit = 1
225 exit_status = 1
226 self.debugging_dumps()
227 if (enable_exit_status and self.document
228 and (self.document.reporter.max_level
229 >= self.settings.exit_status_level)):
230 sys.exit(self.document.reporter.max_level + 10)
231 elif exit:
232 sys.exit(exit_status)
233 return output
235 def debugging_dumps(self):
236 if not self.document:
237 return
238 if self.settings.dump_settings:
239 print >>self._stderr, '\n::: Runtime settings:'
240 print >>self._stderr, pprint.pformat(self.settings.__dict__)
241 if self.settings.dump_internals:
242 print >>self._stderr, '\n::: Document internals:'
243 print >>self._stderr, pprint.pformat(self.document.__dict__)
244 if self.settings.dump_transforms:
245 print >>self._stderr, '\n::: Transforms applied:'
246 print >>self._stderr, (' (priority, transform class, '
247 'pending node details, keyword args)')
248 print >>self._stderr, pprint.pformat(
249 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
250 pending and pending.details, kwargs)
251 for priority, xclass, pending, kwargs
252 in self.document.transformer.applied])
253 if self.settings.dump_pseudo_xml:
254 print >>self._stderr, '\n::: Pseudo-XML:'
255 print >>self._stderr, self.document.pformat().encode(
256 'raw_unicode_escape')
258 def report_Exception(self, error):
259 if isinstance(error, utils.SystemMessage):
260 self.report_SystemMessage(error)
261 elif isinstance(error, UnicodeEncodeError):
262 self.report_UnicodeError(error)
263 else:
264 print >>self._stderr, u'%s' % ErrorString(error)
265 print >>self._stderr, ("""\
266 Exiting due to error. Use "--traceback" to diagnose.
267 Please report errors to <docutils-users@lists.sf.net>.
268 Include "--traceback" output, Docutils version (%s [%s]),
269 Python version (%s), your OS type & version, and the
270 command line used.""" % (__version__, __version_details__,
271 sys.version.split()[0]))
273 def report_SystemMessage(self, error):
274 print >>self._stderr, ('Exiting due to level-%s (%s) system message.'
275 % (error.level,
276 utils.Reporter.levels[error.level]))
278 def report_UnicodeError(self, error):
279 data = error.object[error.start:error.end]
280 self._stderr.write(
281 '%s\n'
282 '\n'
283 'The specified output encoding (%s) cannot\n'
284 'handle all of the output.\n'
285 'Try setting "--output-encoding-error-handler" to\n'
286 '\n'
287 '* "xmlcharrefreplace" (for HTML & XML output);\n'
288 ' the output will contain "%s" and should be usable.\n'
289 '* "backslashreplace" (for other output formats);\n'
290 ' look for "%s" in the output.\n'
291 '* "replace"; look for "?" in the output.\n'
292 '\n'
293 '"--output-encoding-error-handler" is currently set to "%s".\n'
294 '\n'
295 'Exiting due to error. Use "--traceback" to diagnose.\n'
296 'If the advice above doesn\'t eliminate the error,\n'
297 'please report it to <docutils-users@lists.sf.net>.\n'
298 'Include "--traceback" output, Docutils version (%s),\n'
299 'Python version (%s), your OS type & version, and the\n'
300 'command line used.\n'
301 % (ErrorString(error),
302 self.settings.output_encoding,
303 data.encode('ascii', 'xmlcharrefreplace'),
304 data.encode('ascii', 'backslashreplace'),
305 self.settings.output_encoding_error_handler,
306 __version__, sys.version.split()[0]))
308 default_usage = '%prog [options] [<source> [<destination>]]'
309 default_description = ('Reads from <source> (default is stdin) and writes to '
310 '<destination> (default is stdout). See '
311 '<http://docutils.sf.net/docs/user/config.html> for '
312 'the full reference.')
314 def publish_cmdline(reader=None, reader_name='standalone',
315 parser=None, parser_name='restructuredtext',
316 writer=None, writer_name='pseudoxml',
317 settings=None, settings_spec=None,
318 settings_overrides=None, config_section=None,
319 enable_exit_status=1, argv=None,
320 usage=default_usage, description=default_description):
322 Set up & run a `Publisher` for command-line-based file I/O (input and
323 output file paths taken automatically from the command line). Return the
324 encoded string output also.
326 Parameters: see `publish_programmatically` for the remainder.
328 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
329 - `usage`: Usage string, output if there's a problem parsing the command
330 line.
331 - `description`: Program description, output for the "--help" option
332 (along with command-line option descriptions).
334 pub = Publisher(reader, parser, writer, settings=settings)
335 pub.set_components(reader_name, parser_name, writer_name)
336 output = pub.publish(
337 argv, usage, description, settings_spec, settings_overrides,
338 config_section=config_section, enable_exit_status=enable_exit_status)
339 return output
341 def publish_file(source=None, source_path=None,
342 destination=None, destination_path=None,
343 reader=None, reader_name='standalone',
344 parser=None, parser_name='restructuredtext',
345 writer=None, writer_name='pseudoxml',
346 settings=None, settings_spec=None, settings_overrides=None,
347 config_section=None, enable_exit_status=None):
349 Set up & run a `Publisher` for programmatic use with file-like I/O.
350 Return the encoded string output also.
352 Parameters: see `publish_programmatically`.
354 output, pub = publish_programmatically(
355 source_class=io.FileInput, source=source, source_path=source_path,
356 destination_class=io.FileOutput,
357 destination=destination, destination_path=destination_path,
358 reader=reader, reader_name=reader_name,
359 parser=parser, parser_name=parser_name,
360 writer=writer, writer_name=writer_name,
361 settings=settings, settings_spec=settings_spec,
362 settings_overrides=settings_overrides,
363 config_section=config_section,
364 enable_exit_status=enable_exit_status)
365 return output
367 def publish_string(source, source_path=None, destination_path=None,
368 reader=None, reader_name='standalone',
369 parser=None, parser_name='restructuredtext',
370 writer=None, writer_name='pseudoxml',
371 settings=None, settings_spec=None,
372 settings_overrides=None, config_section=None,
373 enable_exit_status=None):
375 Set up & run a `Publisher` for programmatic use with string I/O. Return
376 the encoded string or Unicode string output.
378 For encoded string output, be sure to set the 'output_encoding' setting to
379 the desired encoding. Set it to 'unicode' for unencoded Unicode string
380 output. Here's one way::
382 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
384 Similarly for Unicode string input (`source`)::
386 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
388 Parameters: see `publish_programmatically`.
390 output, pub = publish_programmatically(
391 source_class=io.StringInput, source=source, source_path=source_path,
392 destination_class=io.StringOutput,
393 destination=None, destination_path=destination_path,
394 reader=reader, reader_name=reader_name,
395 parser=parser, parser_name=parser_name,
396 writer=writer, writer_name=writer_name,
397 settings=settings, settings_spec=settings_spec,
398 settings_overrides=settings_overrides,
399 config_section=config_section,
400 enable_exit_status=enable_exit_status)
401 return output
403 def publish_parts(source, source_path=None, source_class=io.StringInput,
404 destination_path=None,
405 reader=None, reader_name='standalone',
406 parser=None, parser_name='restructuredtext',
407 writer=None, writer_name='pseudoxml',
408 settings=None, settings_spec=None,
409 settings_overrides=None, config_section=None,
410 enable_exit_status=None):
412 Set up & run a `Publisher`, and return a dictionary of document parts.
413 Dictionary keys are the names of parts, and values are Unicode strings;
414 encoding is up to the client. For programmatic use with string I/O.
416 For encoded string input, be sure to set the 'input_encoding' setting to
417 the desired encoding. Set it to 'unicode' for unencoded Unicode string
418 input. Here's how::
420 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
422 Parameters: see `publish_programmatically`.
424 output, pub = publish_programmatically(
425 source=source, source_path=source_path, source_class=source_class,
426 destination_class=io.StringOutput,
427 destination=None, destination_path=destination_path,
428 reader=reader, reader_name=reader_name,
429 parser=parser, parser_name=parser_name,
430 writer=writer, writer_name=writer_name,
431 settings=settings, settings_spec=settings_spec,
432 settings_overrides=settings_overrides,
433 config_section=config_section,
434 enable_exit_status=enable_exit_status)
435 return pub.writer.parts
437 def publish_doctree(source, source_path=None,
438 source_class=io.StringInput,
439 reader=None, reader_name='standalone',
440 parser=None, parser_name='restructuredtext',
441 settings=None, settings_spec=None,
442 settings_overrides=None, config_section=None,
443 enable_exit_status=None):
445 Set up & run a `Publisher` for programmatic use with string I/O.
446 Return the document tree.
448 For encoded string input, be sure to set the 'input_encoding' setting to
449 the desired encoding. Set it to 'unicode' for unencoded Unicode string
450 input. Here's one way::
452 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
454 Parameters: see `publish_programmatically`.
456 pub = Publisher(reader=reader, parser=parser, writer=None,
457 settings=settings,
458 source_class=source_class,
459 destination_class=io.NullOutput)
460 pub.set_components(reader_name, parser_name, 'null')
461 pub.process_programmatic_settings(
462 settings_spec, settings_overrides, config_section)
463 pub.set_source(source, source_path)
464 pub.set_destination(None, None)
465 output = pub.publish(enable_exit_status=enable_exit_status)
466 return pub.document
468 def publish_from_doctree(document, destination_path=None,
469 writer=None, writer_name='pseudoxml',
470 settings=None, settings_spec=None,
471 settings_overrides=None, config_section=None,
472 enable_exit_status=None):
474 Set up & run a `Publisher` to render from an existing document
475 tree data structure, for programmatic use with string I/O. Return
476 the encoded string output.
478 Note that document.settings is overridden; if you want to use the settings
479 of the original `document`, pass settings=document.settings.
481 Also, new document.transformer and document.reporter objects are
482 generated.
484 For encoded string output, be sure to set the 'output_encoding' setting to
485 the desired encoding. Set it to 'unicode' for unencoded Unicode string
486 output. Here's one way::
488 publish_from_doctree(
489 ..., settings_overrides={'output_encoding': 'unicode'})
491 Parameters: `document` is a `docutils.nodes.document` object, an existing
492 document tree.
494 Other parameters: see `publish_programmatically`.
496 reader = docutils.readers.doctree.Reader(parser_name='null')
497 pub = Publisher(reader, None, writer,
498 source=io.DocTreeInput(document),
499 destination_class=io.StringOutput, settings=settings)
500 if not writer and writer_name:
501 pub.set_writer(writer_name)
502 pub.process_programmatic_settings(
503 settings_spec, settings_overrides, config_section)
504 pub.set_destination(None, destination_path)
505 return pub.publish(enable_exit_status=enable_exit_status)
507 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
508 parser=None, parser_name='restructuredtext',
509 writer=None, writer_name='pseudoxml',
510 settings=None, settings_spec=None,
511 settings_overrides=None, config_section=None,
512 enable_exit_status=1, argv=None,
513 usage=default_usage, description=default_description,
514 destination=None, destination_class=io.BinaryFileOutput
517 Set up & run a `Publisher` for command-line-based file I/O (input and
518 output file paths taken automatically from the command line). Return the
519 encoded string output also.
521 This is just like publish_cmdline, except that it uses
522 io.BinaryFileOutput instead of io.FileOutput.
524 Parameters: see `publish_programmatically` for the remainder.
526 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
527 - `usage`: Usage string, output if there's a problem parsing the command
528 line.
529 - `description`: Program description, output for the "--help" option
530 (along with command-line option descriptions).
532 pub = Publisher(reader, parser, writer, settings=settings,
533 destination_class=destination_class)
534 pub.set_components(reader_name, parser_name, writer_name)
535 output = pub.publish(
536 argv, usage, description, settings_spec, settings_overrides,
537 config_section=config_section, enable_exit_status=enable_exit_status)
538 return output
540 def publish_programmatically(source_class, source, source_path,
541 destination_class, destination, destination_path,
542 reader, reader_name,
543 parser, parser_name,
544 writer, writer_name,
545 settings, settings_spec,
546 settings_overrides, config_section,
547 enable_exit_status):
549 Set up & run a `Publisher` for custom programmatic use. Return the
550 encoded string output and the Publisher object.
552 Applications should not need to call this function directly. If it does
553 seem to be necessary to call this function directly, please write to the
554 Docutils-develop mailing list
555 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
557 Parameters:
559 * `source_class` **required**: The class for dynamically created source
560 objects. Typically `io.FileInput` or `io.StringInput`.
562 * `source`: Type depends on `source_class`:
564 - If `source_class` is `io.FileInput`: Either a file-like object
565 (must have 'read' and 'close' methods), or ``None``
566 (`source_path` is opened). If neither `source` nor
567 `source_path` are supplied, `sys.stdin` is used.
569 - If `source_class` is `io.StringInput` **required**: The input
570 string, either an encoded 8-bit string (set the
571 'input_encoding' setting to the correct encoding) or a Unicode
572 string (set the 'input_encoding' setting to 'unicode').
574 * `source_path`: Type depends on `source_class`:
576 - `io.FileInput`: Path to the input file, opened if no `source`
577 supplied.
579 - `io.StringInput`: Optional. Path to the file or object that produced
580 `source`. Only used for diagnostic output.
582 * `destination_class` **required**: The class for dynamically created
583 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
585 * `destination`: Type depends on `destination_class`:
587 - `io.FileOutput`: Either a file-like object (must have 'write' and
588 'close' methods), or ``None`` (`destination_path` is opened). If
589 neither `destination` nor `destination_path` are supplied,
590 `sys.stdout` is used.
592 - `io.StringOutput`: Not used; pass ``None``.
594 * `destination_path`: Type depends on `destination_class`:
596 - `io.FileOutput`: Path to the output file. Opened if no `destination`
597 supplied.
599 - `io.StringOutput`: Path to the file or object which will receive the
600 output; optional. Used for determining relative paths (stylesheets,
601 source links, etc.).
603 * `reader`: A `docutils.readers.Reader` object.
605 * `reader_name`: Name or alias of the Reader class to be instantiated if
606 no `reader` supplied.
608 * `parser`: A `docutils.parsers.Parser` object.
610 * `parser_name`: Name or alias of the Parser class to be instantiated if
611 no `parser` supplied.
613 * `writer`: A `docutils.writers.Writer` object.
615 * `writer_name`: Name or alias of the Writer class to be instantiated if
616 no `writer` supplied.
618 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
619 dotted-attribute access to runtime settings. It's the end result of the
620 `SettingsSpec`, config file, and option processing. If `settings` is
621 passed, it's assumed to be complete and no further setting/config/option
622 processing is done.
624 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
625 extra application-specific settings definitions independently of
626 components. In other words, the application becomes a component, and
627 its settings data is processed along with that of the other components.
628 Used only if no `settings` specified.
630 * `settings_overrides`: A dictionary containing application-specific
631 settings defaults that override the defaults of other components.
632 Used only if no `settings` specified.
634 * `config_section`: A string, the name of the configuration file section
635 for this application. Overrides the ``config_section`` attribute
636 defined by `settings_spec`. Used only if no `settings` specified.
638 * `enable_exit_status`: Boolean; enable exit status at end of processing?
640 pub = Publisher(reader, parser, writer, settings=settings,
641 source_class=source_class,
642 destination_class=destination_class)
643 pub.set_components(reader_name, parser_name, writer_name)
644 pub.process_programmatic_settings(
645 settings_spec, settings_overrides, config_section)
646 pub.set_source(source, source_path)
647 pub.set_destination(destination, destination_path)
648 output = pub.publish(enable_exit_status=enable_exit_status)
649 return output, pub