Decode command line arguments with the locales preferred encoding.
[docutils.git] / docutils / core.py
blob21f8d54bebec6782de9ca78cd0f6c7e40cbd44ad
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
13 """
15 __docformat__ = 'reStructuredText'
17 import sys
18 import pprint
19 from docutils import __version__, __version_details__, SettingsSpec
20 from docutils import frontend, io, utils, readers, writers
21 from docutils.frontend import OptionParser
22 from docutils.transforms import Transformer
23 import docutils.readers.doctree
25 try:
26 import locale
27 argv_encoding = locale.getpreferredencoding()
28 except:
29 argv_encoding = 'ascii'
32 class Publisher:
34 """
35 A facade encapsulating the high-level logic of a Docutils system.
36 """
38 def __init__(self, reader=None, parser=None, writer=None,
39 source=None, source_class=io.FileInput,
40 destination=None, destination_class=io.FileOutput,
41 settings=None):
42 """
43 Initial setup. If any of `reader`, `parser`, or `writer` are not
44 specified, the corresponding ``set_...`` method should be called with
45 a component name (`set_reader` sets the parser as well).
46 """
48 self.document = None
49 """The document tree (`docutils.nodes` objects)."""
51 self.reader = reader
52 """A `docutils.readers.Reader` instance."""
54 self.parser = parser
55 """A `docutils.parsers.Parser` instance."""
57 self.writer = writer
58 """A `docutils.writers.Writer` instance."""
60 for component in 'reader', 'parser', 'writer':
61 assert not isinstance(getattr(self, component), str), (
62 'passed string "%s" as "%s" parameter; pass an instance, '
63 'or use the "%s_name" parameter instead (in '
64 'docutils.core.publish_* convenience functions).'
65 % (getattr(self, component), component, component))
67 self.source = source
68 """The source of input data, a `docutils.io.Input` instance."""
70 self.source_class = source_class
71 """The class for dynamically created source objects."""
73 self.destination = destination
74 """The destination for docutils output, a `docutils.io.Output`
75 instance."""
77 self.destination_class = destination_class
78 """The class for dynamically created destination objects."""
80 self.settings = settings
81 """An object containing Docutils settings as instance attributes.
82 Set by `self.process_command_line()` or `self.get_settings()`."""
84 def set_reader(self, reader_name, parser, parser_name):
85 """Set `self.reader` by name."""
86 reader_class = readers.get_reader_class(reader_name)
87 self.reader = reader_class(parser, parser_name)
88 self.parser = self.reader.parser
90 def set_writer(self, writer_name):
91 """Set `self.writer` by name."""
92 writer_class = writers.get_writer_class(writer_name)
93 self.writer = writer_class()
95 def set_components(self, reader_name, parser_name, writer_name):
96 if self.reader is None:
97 self.set_reader(reader_name, self.parser, parser_name)
98 if self.parser is None:
99 if self.reader.parser is None:
100 self.reader.set_parser(parser_name)
101 self.parser = self.reader.parser
102 if self.writer is None:
103 self.set_writer(writer_name)
105 def setup_option_parser(self, usage=None, description=None,
106 settings_spec=None, config_section=None,
107 **defaults):
108 if config_section:
109 if not settings_spec:
110 settings_spec = SettingsSpec()
111 settings_spec.config_section = config_section
112 parts = config_section.split()
113 if len(parts) > 1 and parts[-1] == 'application':
114 settings_spec.config_section_dependencies = ['applications']
115 #@@@ Add self.source & self.destination to components in future?
116 option_parser = OptionParser(
117 components=(self.parser, self.reader, self.writer, settings_spec),
118 defaults=defaults, read_config_files=1,
119 usage=usage, description=description)
120 return option_parser
122 def get_settings(self, usage=None, description=None,
123 settings_spec=None, config_section=None, **defaults):
125 Set and return default settings (overrides in `defaults` dict).
127 Set components first (`self.set_reader` & `self.set_writer`).
128 Explicitly setting `self.settings` disables command line option
129 processing from `self.publish()`.
131 option_parser = self.setup_option_parser(
132 usage, description, settings_spec, config_section, **defaults)
133 self.settings = option_parser.get_default_values()
134 return self.settings
136 def process_programmatic_settings(self, settings_spec,
137 settings_overrides,
138 config_section):
139 if self.settings is None:
140 defaults = (settings_overrides or {}).copy()
141 # Propagate exceptions by default when used programmatically:
142 defaults.setdefault('traceback', 1)
143 self.get_settings(settings_spec=settings_spec,
144 config_section=config_section,
145 **defaults)
147 def process_command_line(self, argv=None, usage=None, description=None,
148 settings_spec=None, config_section=None,
149 **defaults):
151 Pass an empty list to `argv` to avoid reading `sys.argv` (the
152 default).
154 Set components first (`self.set_reader` & `self.set_writer`).
156 option_parser = self.setup_option_parser(
157 usage, description, settings_spec, config_section, **defaults)
158 if argv is None:
159 argv = [a.decode(argv_encoding) for a in sys.argv[1:]]
160 self.settings = option_parser.parse_args(argv)
162 def set_io(self, source_path=None, destination_path=None):
163 if self.source is None:
164 self.set_source(source_path=source_path)
165 if self.destination is None:
166 self.set_destination(destination_path=destination_path)
168 def set_source(self, source=None, source_path=None):
169 if source_path is None:
170 source_path = self.settings._source
171 else:
172 self.settings._source = source_path
173 self.source = self.source_class(
174 source=source, source_path=source_path,
175 encoding=self.settings.input_encoding)
177 def set_destination(self, destination=None, destination_path=None):
178 if destination_path is None:
179 destination_path = self.settings._destination
180 else:
181 self.settings._destination = destination_path
182 self.destination = self.destination_class(
183 destination=destination, destination_path=destination_path,
184 encoding=self.settings.output_encoding,
185 error_handler=self.settings.output_encoding_error_handler)
187 def apply_transforms(self):
188 self.document.transformer.populate_from_components(
189 (self.source, self.reader, self.reader.parser, self.writer,
190 self.destination))
191 self.document.transformer.apply_transforms()
193 def publish(self, argv=None, usage=None, description=None,
194 settings_spec=None, settings_overrides=None,
195 config_section=None, enable_exit_status=None):
197 Process command line options and arguments (if `self.settings` not
198 already set), run `self.reader` and then `self.writer`. Return
199 `self.writer`'s output.
201 exit = None
202 try:
203 if self.settings is None:
204 self.process_command_line(
205 argv, usage, description, settings_spec, config_section,
206 **(settings_overrides or {}))
207 self.set_io()
208 self.document = self.reader.read(self.source, self.parser,
209 self.settings)
210 self.apply_transforms()
211 output = self.writer.write(self.document, self.destination)
212 self.writer.assemble_parts()
213 except SystemExit, error:
214 exit = 1
215 exit_status = error.code
216 except Exception, error:
217 if not self.settings: # exception too early to report nicely
218 raise
219 if self.settings.traceback: # Propagate exceptions?
220 self.debugging_dumps()
221 raise
222 self.report_Exception(error)
223 exit = 1
224 exit_status = 1
225 self.debugging_dumps()
226 if (enable_exit_status and self.document
227 and (self.document.reporter.max_level
228 >= self.settings.exit_status_level)):
229 sys.exit(self.document.reporter.max_level + 10)
230 elif exit:
231 sys.exit(exit_status)
232 return output
234 def debugging_dumps(self):
235 if not self.document:
236 return
237 if self.settings.dump_settings:
238 print >>sys.stderr, '\n::: Runtime settings:'
239 print >>sys.stderr, pprint.pformat(self.settings.__dict__)
240 if self.settings.dump_internals:
241 print >>sys.stderr, '\n::: Document internals:'
242 print >>sys.stderr, pprint.pformat(self.document.__dict__)
243 if self.settings.dump_transforms:
244 print >>sys.stderr, '\n::: Transforms applied:'
245 print >>sys.stderr, (' (priority, transform class, '
246 'pending node details, keyword args)')
247 print >>sys.stderr, pprint.pformat(
248 [(priority, '%s.%s' % (xclass.__module__, xclass.__name__),
249 pending and pending.details, kwargs)
250 for priority, xclass, pending, kwargs
251 in self.document.transformer.applied])
252 if self.settings.dump_pseudo_xml:
253 print >>sys.stderr, '\n::: Pseudo-XML:'
254 print >>sys.stderr, self.document.pformat().encode(
255 'raw_unicode_escape')
257 def report_Exception(self, error):
258 if isinstance(error, utils.SystemMessage):
259 self.report_SystemMessage(error)
260 elif isinstance(error, UnicodeEncodeError):
261 self.report_UnicodeError(error)
262 else:
263 print >>sys.stderr, '%s: %s' % (error.__class__.__name__, error)
264 print >>sys.stderr, ("""\
265 Exiting due to error. Use "--traceback" to diagnose.
266 Please report errors to <docutils-users@lists.sf.net>.
267 Include "--traceback" output, Docutils version (%s [%s]),
268 Python version (%s), your OS type & version, and the
269 command line used.""" % (__version__, __version_details__,
270 sys.version.split()[0]))
272 def report_SystemMessage(self, error):
273 print >>sys.stderr, ('Exiting due to level-%s (%s) system message.'
274 % (error.level,
275 utils.Reporter.levels[error.level]))
277 def report_UnicodeError(self, error):
278 data = error.object[error.start:error.end]
279 sys.stderr.write(
280 '%s: %s\n'
281 '\n'
282 'The specified output encoding (%s) cannot\n'
283 'handle all of the output.\n'
284 'Try setting "--output-encoding-error-handler" to\n'
285 '\n'
286 '* "xmlcharrefreplace" (for HTML & XML output);\n'
287 ' the output will contain "%s" and should be usable.\n'
288 '* "backslashreplace" (for other output formats);\n'
289 ' look for "%s" in the output.\n'
290 '* "replace"; look for "?" in the output.\n'
291 '\n'
292 '"--output-encoding-error-handler" is currently set to "%s".\n'
293 '\n'
294 'Exiting due to error. Use "--traceback" to diagnose.\n'
295 'If the advice above doesn\'t eliminate the error,\n'
296 'please report it to <docutils-users@lists.sf.net>.\n'
297 'Include "--traceback" output, Docutils version (%s),\n'
298 'Python version (%s), your OS type & version, and the\n'
299 'command line used.\n'
300 % (error.__class__.__name__, error,
301 self.settings.output_encoding,
302 data.encode('ascii', 'xmlcharrefreplace'),
303 data.encode('ascii', 'backslashreplace'),
304 self.settings.output_encoding_error_handler,
305 __version__, sys.version.split()[0]))
307 default_usage = '%prog [options] [<source> [<destination>]]'
308 default_description = ('Reads from <source> (default is stdin) and writes to '
309 '<destination> (default is stdout). See '
310 '<http://docutils.sf.net/docs/user/config.html> for '
311 'the full reference.')
313 def publish_cmdline(reader=None, reader_name='standalone',
314 parser=None, parser_name='restructuredtext',
315 writer=None, writer_name='pseudoxml',
316 settings=None, settings_spec=None,
317 settings_overrides=None, config_section=None,
318 enable_exit_status=1, argv=None,
319 usage=default_usage, description=default_description):
321 Set up & run a `Publisher` for command-line-based file I/O (input and
322 output file paths taken automatically from the command line). Return the
323 encoded string output also.
325 Parameters: see `publish_programmatically` for the remainder.
327 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
328 - `usage`: Usage string, output if there's a problem parsing the command
329 line.
330 - `description`: Program description, output for the "--help" option
331 (along with command-line option descriptions).
333 pub = Publisher(reader, parser, writer, settings=settings)
334 pub.set_components(reader_name, parser_name, writer_name)
335 output = pub.publish(
336 argv, usage, description, settings_spec, settings_overrides,
337 config_section=config_section, enable_exit_status=enable_exit_status)
338 return output
340 def publish_file(source=None, source_path=None,
341 destination=None, destination_path=None,
342 reader=None, reader_name='standalone',
343 parser=None, parser_name='restructuredtext',
344 writer=None, writer_name='pseudoxml',
345 settings=None, settings_spec=None, settings_overrides=None,
346 config_section=None, enable_exit_status=None):
348 Set up & run a `Publisher` for programmatic use with file-like I/O.
349 Return the encoded string output also.
351 Parameters: see `publish_programmatically`.
353 output, pub = publish_programmatically(
354 source_class=io.FileInput, source=source, source_path=source_path,
355 destination_class=io.FileOutput,
356 destination=destination, destination_path=destination_path,
357 reader=reader, reader_name=reader_name,
358 parser=parser, parser_name=parser_name,
359 writer=writer, writer_name=writer_name,
360 settings=settings, settings_spec=settings_spec,
361 settings_overrides=settings_overrides,
362 config_section=config_section,
363 enable_exit_status=enable_exit_status)
364 return output
366 def publish_string(source, source_path=None, destination_path=None,
367 reader=None, reader_name='standalone',
368 parser=None, parser_name='restructuredtext',
369 writer=None, writer_name='pseudoxml',
370 settings=None, settings_spec=None,
371 settings_overrides=None, config_section=None,
372 enable_exit_status=None):
374 Set up & run a `Publisher` for programmatic use with string I/O. Return
375 the encoded string or Unicode string output.
377 For encoded string output, be sure to set the 'output_encoding' setting to
378 the desired encoding. Set it to 'unicode' for unencoded Unicode string
379 output. Here's one way::
381 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
383 Similarly for Unicode string input (`source`)::
385 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
387 Parameters: see `publish_programmatically`.
389 output, pub = publish_programmatically(
390 source_class=io.StringInput, source=source, source_path=source_path,
391 destination_class=io.StringOutput,
392 destination=None, destination_path=destination_path,
393 reader=reader, reader_name=reader_name,
394 parser=parser, parser_name=parser_name,
395 writer=writer, writer_name=writer_name,
396 settings=settings, settings_spec=settings_spec,
397 settings_overrides=settings_overrides,
398 config_section=config_section,
399 enable_exit_status=enable_exit_status)
400 return output
402 def publish_parts(source, source_path=None, source_class=io.StringInput,
403 destination_path=None,
404 reader=None, reader_name='standalone',
405 parser=None, parser_name='restructuredtext',
406 writer=None, writer_name='pseudoxml',
407 settings=None, settings_spec=None,
408 settings_overrides=None, config_section=None,
409 enable_exit_status=None):
411 Set up & run a `Publisher`, and return a dictionary of document parts.
412 Dictionary keys are the names of parts, and values are Unicode strings;
413 encoding is up to the client. For programmatic use with string I/O.
415 For encoded string input, be sure to set the 'input_encoding' setting to
416 the desired encoding. Set it to 'unicode' for unencoded Unicode string
417 input. Here's how::
419 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
421 Parameters: see `publish_programmatically`.
423 output, pub = publish_programmatically(
424 source=source, source_path=source_path, source_class=source_class,
425 destination_class=io.StringOutput,
426 destination=None, destination_path=destination_path,
427 reader=reader, reader_name=reader_name,
428 parser=parser, parser_name=parser_name,
429 writer=writer, writer_name=writer_name,
430 settings=settings, settings_spec=settings_spec,
431 settings_overrides=settings_overrides,
432 config_section=config_section,
433 enable_exit_status=enable_exit_status)
434 return pub.writer.parts
436 def publish_doctree(source, source_path=None,
437 source_class=io.StringInput,
438 reader=None, reader_name='standalone',
439 parser=None, parser_name='restructuredtext',
440 settings=None, settings_spec=None,
441 settings_overrides=None, config_section=None,
442 enable_exit_status=None):
444 Set up & run a `Publisher` for programmatic use with string I/O.
445 Return the document tree.
447 For encoded string input, be sure to set the 'input_encoding' setting to
448 the desired encoding. Set it to 'unicode' for unencoded Unicode string
449 input. Here's one way::
451 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
453 Parameters: see `publish_programmatically`.
455 pub = Publisher(reader=reader, parser=parser, writer=None,
456 settings=settings,
457 source_class=source_class,
458 destination_class=io.NullOutput)
459 pub.set_components(reader_name, parser_name, 'null')
460 pub.process_programmatic_settings(
461 settings_spec, settings_overrides, config_section)
462 pub.set_source(source, source_path)
463 pub.set_destination(None, None)
464 output = pub.publish(enable_exit_status=enable_exit_status)
465 return pub.document
467 def publish_from_doctree(document, destination_path=None,
468 writer=None, writer_name='pseudoxml',
469 settings=None, settings_spec=None,
470 settings_overrides=None, config_section=None,
471 enable_exit_status=None):
473 Set up & run a `Publisher` to render from an existing document
474 tree data structure, for programmatic use with string I/O. Return
475 the encoded string output.
477 Note that document.settings is overridden; if you want to use the settings
478 of the original `document`, pass settings=document.settings.
480 Also, new document.transformer and document.reporter objects are
481 generated.
483 For encoded string output, be sure to set the 'output_encoding' setting to
484 the desired encoding. Set it to 'unicode' for unencoded Unicode string
485 output. Here's one way::
487 publish_from_doctree(
488 ..., settings_overrides={'output_encoding': 'unicode'})
490 Parameters: `document` is a `docutils.nodes.document` object, an existing
491 document tree.
493 Other parameters: see `publish_programmatically`.
495 reader = docutils.readers.doctree.Reader(parser_name='null')
496 pub = Publisher(reader, None, writer,
497 source=io.DocTreeInput(document),
498 destination_class=io.StringOutput, settings=settings)
499 if not writer and writer_name:
500 pub.set_writer(writer_name)
501 pub.process_programmatic_settings(
502 settings_spec, settings_overrides, config_section)
503 pub.set_destination(None, destination_path)
504 return pub.publish(enable_exit_status=enable_exit_status)
506 def publish_cmdline_to_binary(reader=None, reader_name='standalone',
507 parser=None, parser_name='restructuredtext',
508 writer=None, writer_name='pseudoxml',
509 settings=None, settings_spec=None,
510 settings_overrides=None, config_section=None,
511 enable_exit_status=1, argv=None,
512 usage=default_usage, description=default_description,
513 destination=None, destination_class=io.BinaryFileOutput
516 Set up & run a `Publisher` for command-line-based file I/O (input and
517 output file paths taken automatically from the command line). Return the
518 encoded string output also.
520 This is just like publish_cmdline, except that it uses
521 io.BinaryFileOutput instead of io.FileOutput.
523 Parameters: see `publish_programmatically` for the remainder.
525 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
526 - `usage`: Usage string, output if there's a problem parsing the command
527 line.
528 - `description`: Program description, output for the "--help" option
529 (along with command-line option descriptions).
531 pub = Publisher(reader, parser, writer, settings=settings,
532 destination_class=destination_class)
533 pub.set_components(reader_name, parser_name, writer_name)
534 output = pub.publish(
535 argv, usage, description, settings_spec, settings_overrides,
536 config_section=config_section, enable_exit_status=enable_exit_status)
537 return output
539 def publish_programmatically(source_class, source, source_path,
540 destination_class, destination, destination_path,
541 reader, reader_name,
542 parser, parser_name,
543 writer, writer_name,
544 settings, settings_spec,
545 settings_overrides, config_section,
546 enable_exit_status):
548 Set up & run a `Publisher` for custom programmatic use. Return the
549 encoded string output and the Publisher object.
551 Applications should not need to call this function directly. If it does
552 seem to be necessary to call this function directly, please write to the
553 Docutils-develop mailing list
554 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
556 Parameters:
558 * `source_class` **required**: The class for dynamically created source
559 objects. Typically `io.FileInput` or `io.StringInput`.
561 * `source`: Type depends on `source_class`:
563 - If `source_class` is `io.FileInput`: Either a file-like object
564 (must have 'read' and 'close' methods), or ``None``
565 (`source_path` is opened). If neither `source` nor
566 `source_path` are supplied, `sys.stdin` is used.
568 - If `source_class` is `io.StringInput` **required**: The input
569 string, either an encoded 8-bit string (set the
570 'input_encoding' setting to the correct encoding) or a Unicode
571 string (set the 'input_encoding' setting to 'unicode').
573 * `source_path`: Type depends on `source_class`:
575 - `io.FileInput`: Path to the input file, opened if no `source`
576 supplied.
578 - `io.StringInput`: Optional. Path to the file or object that produced
579 `source`. Only used for diagnostic output.
581 * `destination_class` **required**: The class for dynamically created
582 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
584 * `destination`: Type depends on `destination_class`:
586 - `io.FileOutput`: Either a file-like object (must have 'write' and
587 'close' methods), or ``None`` (`destination_path` is opened). If
588 neither `destination` nor `destination_path` are supplied,
589 `sys.stdout` is used.
591 - `io.StringOutput`: Not used; pass ``None``.
593 * `destination_path`: Type depends on `destination_class`:
595 - `io.FileOutput`: Path to the output file. Opened if no `destination`
596 supplied.
598 - `io.StringOutput`: Path to the file or object which will receive the
599 output; optional. Used for determining relative paths (stylesheets,
600 source links, etc.).
602 * `reader`: A `docutils.readers.Reader` object.
604 * `reader_name`: Name or alias of the Reader class to be instantiated if
605 no `reader` supplied.
607 * `parser`: A `docutils.parsers.Parser` object.
609 * `parser_name`: Name or alias of the Parser class to be instantiated if
610 no `parser` supplied.
612 * `writer`: A `docutils.writers.Writer` object.
614 * `writer_name`: Name or alias of the Writer class to be instantiated if
615 no `writer` supplied.
617 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
618 dotted-attribute access to runtime settings. It's the end result of the
619 `SettingsSpec`, config file, and option processing. If `settings` is
620 passed, it's assumed to be complete and no further setting/config/option
621 processing is done.
623 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
624 extra application-specific settings definitions independently of
625 components. In other words, the application becomes a component, and
626 its settings data is processed along with that of the other components.
627 Used only if no `settings` specified.
629 * `settings_overrides`: A dictionary containing application-specific
630 settings defaults that override the defaults of other components.
631 Used only if no `settings` specified.
633 * `config_section`: A string, the name of the configuration file section
634 for this application. Overrides the ``config_section`` attribute
635 defined by `settings_spec`. Used only if no `settings` specified.
637 * `enable_exit_status`: Boolean; enable exit status at end of processing?
639 pub = Publisher(reader, parser, writer, settings=settings,
640 source_class=source_class,
641 destination_class=destination_class)
642 pub.set_components(reader_name, parser_name, writer_name)
643 pub.process_programmatic_settings(
644 settings_spec, settings_overrides, config_section)
645 pub.set_source(source, source_path)
646 pub.set_destination(destination, destination_path)
647 output = pub.publish(enable_exit_status=enable_exit_status)
648 return output, pub