2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 Calling the ``publish_*`` convenience functions (or instantiating a
7 `Publisher` object) with component names will result in default
8 behavior. For custom behavior (setting component options), create
9 custom component objects first, and pass *them* to
10 ``publish_*``/`Publisher`. See `The Docutils Publisher`_.
12 .. _The Docutils Publisher: http://docutils.sf.net/docs/api/publisher.html
15 __docformat__
= 'reStructuredText'
19 from docutils
import __version__
, __version_details__
, SettingsSpec
20 from docutils
import frontend
, io
, utils
, readers
, writers
21 from docutils
.frontend
import OptionParser
22 from docutils
.transforms
import Transformer
23 from docutils
.error_reporting
import ErrorOutput
, ErrorString
24 import docutils
.readers
.doctree
29 A facade encapsulating the high-level logic of a Docutils system.
32 def __init__(self
, reader
=None, parser
=None, writer
=None,
33 source
=None, source_class
=io
.FileInput
,
34 destination
=None, destination_class
=io
.FileOutput
,
37 Initial setup. If any of `reader`, `parser`, or `writer` are not
38 specified, the corresponding ``set_...`` method should be called with
39 a component name (`set_reader` sets the parser as well).
43 """The document tree (`docutils.nodes` objects)."""
46 """A `docutils.readers.Reader` instance."""
49 """A `docutils.parsers.Parser` instance."""
52 """A `docutils.writers.Writer` instance."""
54 for component
in 'reader', 'parser', 'writer':
55 assert not isinstance(getattr(self
, component
), str), (
56 'passed string "%s" as "%s" parameter; pass an instance, '
57 'or use the "%s_name" parameter instead (in '
58 'docutils.core.publish_* convenience functions).'
59 % (getattr(self
, component
), component
, component
))
62 """The source of input data, a `docutils.io.Input` instance."""
64 self
.source_class
= source_class
65 """The class for dynamically created source objects."""
67 self
.destination
= destination
68 """The destination for docutils output, a `docutils.io.Output`
71 self
.destination_class
= destination_class
72 """The class for dynamically created destination objects."""
74 self
.settings
= settings
75 """An object containing Docutils settings as instance attributes.
76 Set by `self.process_command_line()` or `self.get_settings()`."""
78 self
._stderr
= ErrorOutput()
80 def set_reader(self
, reader_name
, parser
, parser_name
):
81 """Set `self.reader` by name."""
82 reader_class
= readers
.get_reader_class(reader_name
)
83 self
.reader
= reader_class(parser
, parser_name
)
84 self
.parser
= self
.reader
.parser
86 def set_writer(self
, writer_name
):
87 """Set `self.writer` by name."""
88 writer_class
= writers
.get_writer_class(writer_name
)
89 self
.writer
= writer_class()
91 def set_components(self
, reader_name
, parser_name
, writer_name
):
92 if self
.reader
is None:
93 self
.set_reader(reader_name
, self
.parser
, parser_name
)
94 if self
.parser
is None:
95 if self
.reader
.parser
is None:
96 self
.reader
.set_parser(parser_name
)
97 self
.parser
= self
.reader
.parser
98 if self
.writer
is None:
99 self
.set_writer(writer_name
)
101 def setup_option_parser(self
, usage
=None, description
=None,
102 settings_spec
=None, config_section
=None,
105 if not settings_spec
:
106 settings_spec
= SettingsSpec()
107 settings_spec
.config_section
= config_section
108 parts
= config_section
.split()
109 if len(parts
) > 1 and parts
[-1] == 'application':
110 settings_spec
.config_section_dependencies
= ['applications']
111 #@@@ Add self.source & self.destination to components in future?
112 option_parser
= OptionParser(
113 components
=(self
.parser
, self
.reader
, self
.writer
, settings_spec
),
114 defaults
=defaults
, read_config_files
=1,
115 usage
=usage
, description
=description
)
118 def get_settings(self
, usage
=None, description
=None,
119 settings_spec
=None, config_section
=None, **defaults
):
121 Set and return default settings (overrides in `defaults` dict).
123 Set components first (`self.set_reader` & `self.set_writer`).
124 Explicitly setting `self.settings` disables command line option
125 processing from `self.publish()`.
127 option_parser
= self
.setup_option_parser(
128 usage
, description
, settings_spec
, config_section
, **defaults
)
129 self
.settings
= option_parser
.get_default_values()
132 def process_programmatic_settings(self
, settings_spec
,
135 if self
.settings
is None:
136 defaults
= (settings_overrides
or {}).copy()
137 # Propagate exceptions by default when used programmatically:
138 defaults
.setdefault('traceback', 1)
139 self
.get_settings(settings_spec
=settings_spec
,
140 config_section
=config_section
,
143 def process_command_line(self
, argv
=None, usage
=None, description
=None,
144 settings_spec
=None, config_section
=None,
147 Pass an empty list to `argv` to avoid reading `sys.argv` (the
150 Set components first (`self.set_reader` & `self.set_writer`).
152 option_parser
= self
.setup_option_parser(
153 usage
, description
, settings_spec
, config_section
, **defaults
)
156 # converting to Unicode (Python 3 does this automatically):
157 if sys
.version_info
< (3,0):
158 # TODO: make this failsafe and reversible
159 argv_encoding
= (sys
.stdin
.encoding
or
160 frontend
.locale_encoding
or 'ascii')
161 argv
= [a
.decode(argv_encoding
) for a
in argv
]
162 self
.settings
= option_parser
.parse_args(argv
)
164 def set_io(self
, source_path
=None, destination_path
=None):
165 if self
.source
is None:
166 self
.set_source(source_path
=source_path
)
167 if self
.destination
is None:
168 self
.set_destination(destination_path
=destination_path
)
170 def set_source(self
, source
=None, source_path
=None):
171 if source_path
is None:
172 source_path
= self
.settings
._source
174 self
.settings
._source
= source_path
175 self
.source
= self
.source_class(
176 source
=source
, source_path
=source_path
,
177 encoding
=self
.settings
.input_encoding
)
179 def set_destination(self
, destination
=None, destination_path
=None):
180 if destination_path
is None:
181 destination_path
= self
.settings
._destination
183 self
.settings
._destination
= destination_path
184 self
.destination
= self
.destination_class(
185 destination
=destination
, destination_path
=destination_path
,
186 encoding
=self
.settings
.output_encoding
,
187 error_handler
=self
.settings
.output_encoding_error_handler
)
189 def apply_transforms(self
):
190 self
.document
.transformer
.populate_from_components(
191 (self
.source
, self
.reader
, self
.reader
.parser
, self
.writer
,
193 self
.document
.transformer
.apply_transforms()
195 def publish(self
, argv
=None, usage
=None, description
=None,
196 settings_spec
=None, settings_overrides
=None,
197 config_section
=None, enable_exit_status
=None):
199 Process command line options and arguments (if `self.settings` not
200 already set), run `self.reader` and then `self.writer`. Return
201 `self.writer`'s output.
205 if self
.settings
is None:
206 self
.process_command_line(
207 argv
, usage
, description
, settings_spec
, config_section
,
208 **(settings_overrides
or {}))
210 self
.document
= self
.reader
.read(self
.source
, self
.parser
,
212 self
.apply_transforms()
213 output
= self
.writer
.write(self
.document
, self
.destination
)
214 self
.writer
.assemble_parts()
215 except SystemExit, error
:
217 exit_status
= error
.code
218 except Exception, error
:
219 if not self
.settings
: # exception too early to report nicely
221 if self
.settings
.traceback
: # Propagate exceptions?
222 self
.debugging_dumps()
224 self
.report_Exception(error
)
227 self
.debugging_dumps()
228 if (enable_exit_status
and self
.document
229 and (self
.document
.reporter
.max_level
230 >= self
.settings
.exit_status_level
)):
231 sys
.exit(self
.document
.reporter
.max_level
+ 10)
233 sys
.exit(exit_status
)
236 def debugging_dumps(self
):
237 if not self
.document
:
239 if self
.settings
.dump_settings
:
240 print >>self
._stderr
, '\n::: Runtime settings:'
241 print >>self
._stderr
, pprint
.pformat(self
.settings
.__dict
__)
242 if self
.settings
.dump_internals
:
243 print >>self
._stderr
, '\n::: Document internals:'
244 print >>self
._stderr
, pprint
.pformat(self
.document
.__dict
__)
245 if self
.settings
.dump_transforms
:
246 print >>self
._stderr
, '\n::: Transforms applied:'
247 print >>self
._stderr
, (' (priority, transform class, '
248 'pending node details, keyword args)')
249 print >>self
._stderr
, pprint
.pformat(
250 [(priority
, '%s.%s' % (xclass
.__module
__, xclass
.__name
__),
251 pending
and pending
.details
, kwargs
)
252 for priority
, xclass
, pending
, kwargs
253 in self
.document
.transformer
.applied
])
254 if self
.settings
.dump_pseudo_xml
:
255 print >>self
._stderr
, '\n::: Pseudo-XML:'
256 print >>self
._stderr
, self
.document
.pformat().encode(
257 'raw_unicode_escape')
259 def report_Exception(self
, error
):
260 if isinstance(error
, utils
.SystemMessage
):
261 self
.report_SystemMessage(error
)
262 elif isinstance(error
, UnicodeEncodeError):
263 self
.report_UnicodeError(error
)
265 print >>self
._stderr
, u
'%s' % ErrorString(error
)
266 print >>self
._stderr
, ("""\
267 Exiting due to error. Use "--traceback" to diagnose.
268 Please report errors to <docutils-users@lists.sf.net>.
269 Include "--traceback" output, Docutils version (%s [%s]),
270 Python version (%s), your OS type & version, and the
271 command line used.""" % (__version__
, __version_details__
,
272 sys
.version
.split()[0]))
274 def report_SystemMessage(self
, error
):
275 print >>self
._stderr
, ('Exiting due to level-%s (%s) system message.'
277 utils
.Reporter
.levels
[error
.level
]))
279 def report_UnicodeError(self
, error
):
280 data
= error
.object[error
.start
:error
.end
]
284 'The specified output encoding (%s) cannot\n'
285 'handle all of the output.\n'
286 'Try setting "--output-encoding-error-handler" to\n'
288 '* "xmlcharrefreplace" (for HTML & XML output);\n'
289 ' the output will contain "%s" and should be usable.\n'
290 '* "backslashreplace" (for other output formats);\n'
291 ' look for "%s" in the output.\n'
292 '* "replace"; look for "?" in the output.\n'
294 '"--output-encoding-error-handler" is currently set to "%s".\n'
296 'Exiting due to error. Use "--traceback" to diagnose.\n'
297 'If the advice above doesn\'t eliminate the error,\n'
298 'please report it to <docutils-users@lists.sf.net>.\n'
299 'Include "--traceback" output, Docutils version (%s),\n'
300 'Python version (%s), your OS type & version, and the\n'
301 'command line used.\n'
302 % (ErrorString(error
),
303 self
.settings
.output_encoding
,
304 data
.encode('ascii', 'xmlcharrefreplace'),
305 data
.encode('ascii', 'backslashreplace'),
306 self
.settings
.output_encoding_error_handler
,
307 __version__
, sys
.version
.split()[0]))
309 default_usage
= '%prog [options] [<source> [<destination>]]'
310 default_description
= ('Reads from <source> (default is stdin) and writes to '
311 '<destination> (default is stdout). See '
312 '<http://docutils.sf.net/docs/user/config.html> for '
313 'the full reference.')
315 def publish_cmdline(reader
=None, reader_name
='standalone',
316 parser
=None, parser_name
='restructuredtext',
317 writer
=None, writer_name
='pseudoxml',
318 settings
=None, settings_spec
=None,
319 settings_overrides
=None, config_section
=None,
320 enable_exit_status
=1, argv
=None,
321 usage
=default_usage
, description
=default_description
):
323 Set up & run a `Publisher` for command-line-based file I/O (input and
324 output file paths taken automatically from the command line). Return the
325 encoded string output also.
327 Parameters: see `publish_programmatically` for the remainder.
329 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
330 - `usage`: Usage string, output if there's a problem parsing the command
332 - `description`: Program description, output for the "--help" option
333 (along with command-line option descriptions).
335 pub
= Publisher(reader
, parser
, writer
, settings
=settings
)
336 pub
.set_components(reader_name
, parser_name
, writer_name
)
337 output
= pub
.publish(
338 argv
, usage
, description
, settings_spec
, settings_overrides
,
339 config_section
=config_section
, enable_exit_status
=enable_exit_status
)
342 def publish_file(source
=None, source_path
=None,
343 destination
=None, destination_path
=None,
344 reader
=None, reader_name
='standalone',
345 parser
=None, parser_name
='restructuredtext',
346 writer
=None, writer_name
='pseudoxml',
347 settings
=None, settings_spec
=None, settings_overrides
=None,
348 config_section
=None, enable_exit_status
=None):
350 Set up & run a `Publisher` for programmatic use with file-like I/O.
351 Return the encoded string output also.
353 Parameters: see `publish_programmatically`.
355 output
, pub
= publish_programmatically(
356 source_class
=io
.FileInput
, source
=source
, source_path
=source_path
,
357 destination_class
=io
.FileOutput
,
358 destination
=destination
, destination_path
=destination_path
,
359 reader
=reader
, reader_name
=reader_name
,
360 parser
=parser
, parser_name
=parser_name
,
361 writer
=writer
, writer_name
=writer_name
,
362 settings
=settings
, settings_spec
=settings_spec
,
363 settings_overrides
=settings_overrides
,
364 config_section
=config_section
,
365 enable_exit_status
=enable_exit_status
)
368 def publish_string(source
, source_path
=None, destination_path
=None,
369 reader
=None, reader_name
='standalone',
370 parser
=None, parser_name
='restructuredtext',
371 writer
=None, writer_name
='pseudoxml',
372 settings
=None, settings_spec
=None,
373 settings_overrides
=None, config_section
=None,
374 enable_exit_status
=None):
376 Set up & run a `Publisher` for programmatic use with string I/O. Return
377 the encoded string or Unicode string output.
379 For encoded string output, be sure to set the 'output_encoding' setting to
380 the desired encoding. Set it to 'unicode' for unencoded Unicode string
381 output. Here's one way::
383 publish_string(..., settings_overrides={'output_encoding': 'unicode'})
385 Similarly for Unicode string input (`source`)::
387 publish_string(..., settings_overrides={'input_encoding': 'unicode'})
389 Parameters: see `publish_programmatically`.
391 output
, pub
= publish_programmatically(
392 source_class
=io
.StringInput
, source
=source
, source_path
=source_path
,
393 destination_class
=io
.StringOutput
,
394 destination
=None, destination_path
=destination_path
,
395 reader
=reader
, reader_name
=reader_name
,
396 parser
=parser
, parser_name
=parser_name
,
397 writer
=writer
, writer_name
=writer_name
,
398 settings
=settings
, settings_spec
=settings_spec
,
399 settings_overrides
=settings_overrides
,
400 config_section
=config_section
,
401 enable_exit_status
=enable_exit_status
)
404 def publish_parts(source
, source_path
=None, source_class
=io
.StringInput
,
405 destination_path
=None,
406 reader
=None, reader_name
='standalone',
407 parser
=None, parser_name
='restructuredtext',
408 writer
=None, writer_name
='pseudoxml',
409 settings
=None, settings_spec
=None,
410 settings_overrides
=None, config_section
=None,
411 enable_exit_status
=None):
413 Set up & run a `Publisher`, and return a dictionary of document parts.
414 Dictionary keys are the names of parts, and values are Unicode strings;
415 encoding is up to the client. For programmatic use with string I/O.
417 For encoded string input, be sure to set the 'input_encoding' setting to
418 the desired encoding. Set it to 'unicode' for unencoded Unicode string
421 publish_parts(..., settings_overrides={'input_encoding': 'unicode'})
423 Parameters: see `publish_programmatically`.
425 output
, pub
= publish_programmatically(
426 source
=source
, source_path
=source_path
, source_class
=source_class
,
427 destination_class
=io
.StringOutput
,
428 destination
=None, destination_path
=destination_path
,
429 reader
=reader
, reader_name
=reader_name
,
430 parser
=parser
, parser_name
=parser_name
,
431 writer
=writer
, writer_name
=writer_name
,
432 settings
=settings
, settings_spec
=settings_spec
,
433 settings_overrides
=settings_overrides
,
434 config_section
=config_section
,
435 enable_exit_status
=enable_exit_status
)
436 return pub
.writer
.parts
438 def publish_doctree(source
, source_path
=None,
439 source_class
=io
.StringInput
,
440 reader
=None, reader_name
='standalone',
441 parser
=None, parser_name
='restructuredtext',
442 settings
=None, settings_spec
=None,
443 settings_overrides
=None, config_section
=None,
444 enable_exit_status
=None):
446 Set up & run a `Publisher` for programmatic use with string I/O.
447 Return the document tree.
449 For encoded string input, be sure to set the 'input_encoding' setting to
450 the desired encoding. Set it to 'unicode' for unencoded Unicode string
451 input. Here's one way::
453 publish_doctree(..., settings_overrides={'input_encoding': 'unicode'})
455 Parameters: see `publish_programmatically`.
457 pub
= Publisher(reader
=reader
, parser
=parser
, writer
=None,
459 source_class
=source_class
,
460 destination_class
=io
.NullOutput
)
461 pub
.set_components(reader_name
, parser_name
, 'null')
462 pub
.process_programmatic_settings(
463 settings_spec
, settings_overrides
, config_section
)
464 pub
.set_source(source
, source_path
)
465 pub
.set_destination(None, None)
466 output
= pub
.publish(enable_exit_status
=enable_exit_status
)
469 def publish_from_doctree(document
, destination_path
=None,
470 writer
=None, writer_name
='pseudoxml',
471 settings
=None, settings_spec
=None,
472 settings_overrides
=None, config_section
=None,
473 enable_exit_status
=None):
475 Set up & run a `Publisher` to render from an existing document
476 tree data structure, for programmatic use with string I/O. Return
477 the encoded string output.
479 Note that document.settings is overridden; if you want to use the settings
480 of the original `document`, pass settings=document.settings.
482 Also, new document.transformer and document.reporter objects are
485 For encoded string output, be sure to set the 'output_encoding' setting to
486 the desired encoding. Set it to 'unicode' for unencoded Unicode string
487 output. Here's one way::
489 publish_from_doctree(
490 ..., settings_overrides={'output_encoding': 'unicode'})
492 Parameters: `document` is a `docutils.nodes.document` object, an existing
495 Other parameters: see `publish_programmatically`.
497 reader
= docutils
.readers
.doctree
.Reader(parser_name
='null')
498 pub
= Publisher(reader
, None, writer
,
499 source
=io
.DocTreeInput(document
),
500 destination_class
=io
.StringOutput
, settings
=settings
)
501 if not writer
and writer_name
:
502 pub
.set_writer(writer_name
)
503 pub
.process_programmatic_settings(
504 settings_spec
, settings_overrides
, config_section
)
505 pub
.set_destination(None, destination_path
)
506 return pub
.publish(enable_exit_status
=enable_exit_status
)
508 def publish_cmdline_to_binary(reader
=None, reader_name
='standalone',
509 parser
=None, parser_name
='restructuredtext',
510 writer
=None, writer_name
='pseudoxml',
511 settings
=None, settings_spec
=None,
512 settings_overrides
=None, config_section
=None,
513 enable_exit_status
=1, argv
=None,
514 usage
=default_usage
, description
=default_description
,
515 destination
=None, destination_class
=io
.BinaryFileOutput
518 Set up & run a `Publisher` for command-line-based file I/O (input and
519 output file paths taken automatically from the command line). Return the
520 encoded string output also.
522 This is just like publish_cmdline, except that it uses
523 io.BinaryFileOutput instead of io.FileOutput.
525 Parameters: see `publish_programmatically` for the remainder.
527 - `argv`: Command-line argument list to use instead of ``sys.argv[1:]``.
528 - `usage`: Usage string, output if there's a problem parsing the command
530 - `description`: Program description, output for the "--help" option
531 (along with command-line option descriptions).
533 pub
= Publisher(reader
, parser
, writer
, settings
=settings
,
534 destination_class
=destination_class
)
535 pub
.set_components(reader_name
, parser_name
, writer_name
)
536 output
= pub
.publish(
537 argv
, usage
, description
, settings_spec
, settings_overrides
,
538 config_section
=config_section
, enable_exit_status
=enable_exit_status
)
541 def publish_programmatically(source_class
, source
, source_path
,
542 destination_class
, destination
, destination_path
,
546 settings
, settings_spec
,
547 settings_overrides
, config_section
,
550 Set up & run a `Publisher` for custom programmatic use. Return the
551 encoded string output and the Publisher object.
553 Applications should not need to call this function directly. If it does
554 seem to be necessary to call this function directly, please write to the
555 Docutils-develop mailing list
556 <http://docutils.sf.net/docs/user/mailing-lists.html#docutils-develop>.
560 * `source_class` **required**: The class for dynamically created source
561 objects. Typically `io.FileInput` or `io.StringInput`.
563 * `source`: Type depends on `source_class`:
565 - If `source_class` is `io.FileInput`: Either a file-like object
566 (must have 'read' and 'close' methods), or ``None``
567 (`source_path` is opened). If neither `source` nor
568 `source_path` are supplied, `sys.stdin` is used.
570 - If `source_class` is `io.StringInput` **required**: The input
571 string, either an encoded 8-bit string (set the
572 'input_encoding' setting to the correct encoding) or a Unicode
573 string (set the 'input_encoding' setting to 'unicode').
575 * `source_path`: Type depends on `source_class`:
577 - `io.FileInput`: Path to the input file, opened if no `source`
580 - `io.StringInput`: Optional. Path to the file or object that produced
581 `source`. Only used for diagnostic output.
583 * `destination_class` **required**: The class for dynamically created
584 destination objects. Typically `io.FileOutput` or `io.StringOutput`.
586 * `destination`: Type depends on `destination_class`:
588 - `io.FileOutput`: Either a file-like object (must have 'write' and
589 'close' methods), or ``None`` (`destination_path` is opened). If
590 neither `destination` nor `destination_path` are supplied,
591 `sys.stdout` is used.
593 - `io.StringOutput`: Not used; pass ``None``.
595 * `destination_path`: Type depends on `destination_class`:
597 - `io.FileOutput`: Path to the output file. Opened if no `destination`
600 - `io.StringOutput`: Path to the file or object which will receive the
601 output; optional. Used for determining relative paths (stylesheets,
604 * `reader`: A `docutils.readers.Reader` object.
606 * `reader_name`: Name or alias of the Reader class to be instantiated if
607 no `reader` supplied.
609 * `parser`: A `docutils.parsers.Parser` object.
611 * `parser_name`: Name or alias of the Parser class to be instantiated if
612 no `parser` supplied.
614 * `writer`: A `docutils.writers.Writer` object.
616 * `writer_name`: Name or alias of the Writer class to be instantiated if
617 no `writer` supplied.
619 * `settings`: A runtime settings (`docutils.frontend.Values`) object, for
620 dotted-attribute access to runtime settings. It's the end result of the
621 `SettingsSpec`, config file, and option processing. If `settings` is
622 passed, it's assumed to be complete and no further setting/config/option
625 * `settings_spec`: A `docutils.SettingsSpec` subclass or object. Provides
626 extra application-specific settings definitions independently of
627 components. In other words, the application becomes a component, and
628 its settings data is processed along with that of the other components.
629 Used only if no `settings` specified.
631 * `settings_overrides`: A dictionary containing application-specific
632 settings defaults that override the defaults of other components.
633 Used only if no `settings` specified.
635 * `config_section`: A string, the name of the configuration file section
636 for this application. Overrides the ``config_section`` attribute
637 defined by `settings_spec`. Used only if no `settings` specified.
639 * `enable_exit_status`: Boolean; enable exit status at end of processing?
641 pub
= Publisher(reader
, parser
, writer
, settings
=settings
,
642 source_class
=source_class
,
643 destination_class
=destination_class
)
644 pub
.set_components(reader_name
, parser_name
, writer_name
)
645 pub
.process_programmatic_settings(
646 settings_spec
, settings_overrides
, config_section
)
647 pub
.set_source(source
, source_path
)
648 pub
.set_destination(destination
, destination_path
)
649 output
= pub
.publish(enable_exit_status
=enable_exit_status
)