Fix [ 3602652 ] correct syntax-highlight default value in help string.
[docutils.git] / docutils / parsers / rst / __init__.py
blobaaffc7066985a444fc2d074c5f19ce9d789b724c
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
7 the reStructuredText parser.
10 Usage
11 =====
13 1. Create a parser::
15 parser = docutils.parsers.rst.Parser()
17 Several optional arguments may be passed to modify the parser's behavior.
18 Please see `Customizing the Parser`_ below for details.
20 2. Gather input (a multi-line string), by reading a file or the standard
21 input::
23 input = sys.stdin.read()
25 3. Create a new empty `docutils.nodes.document` tree::
27 document = docutils.utils.new_document(source, settings)
29 See `docutils.utils.new_document()` for parameter details.
31 4. Run the parser, populating the document tree::
33 parser.parse(input, document)
36 Parser Overview
37 ===============
39 The reStructuredText parser is implemented as a state machine, examining its
40 input one line at a time. To understand how the parser works, please first
41 become familiar with the `docutils.statemachine` module, then see the
42 `states` module.
45 Customizing the Parser
46 ----------------------
48 Anything that isn't already customizable is that way simply because that type
49 of customizability hasn't been implemented yet. Patches welcome!
51 When instantiating an object of the `Parser` class, two parameters may be
52 passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an
53 initial RFC-2822 style header block, parsed as a "field_list" element (with
54 "class" attribute set to "rfc2822"). Currently this is the only body-level
55 element which is customizable without subclassing. (Tip: subclass `Parser`
56 and change its "state_classes" and "initial_state" attributes to refer to new
57 classes. Contact the author if you need more details.)
59 The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
60 It handles inline markup recognition. A common extension is the addition of
61 further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
62 `states.Inliner`, adding a new method for the implicit markup, and adding a
63 ``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
64 subclass. See `states.Inliner.implicit_inline()` for details. Explicit
65 inline markup can be customized in a `states.Inliner` subclass via the
66 ``patterns.initial`` and ``dispatch`` attributes (and new methods as
67 appropriate).
68 """
70 __docformat__ = 'reStructuredText'
73 import docutils.parsers
74 import docutils.statemachine
75 from docutils.parsers.rst import states
76 from docutils import frontend, nodes, Component
77 from docutils.transforms import universal
80 class Parser(docutils.parsers.Parser):
82 """The reStructuredText parser."""
84 supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
85 """Aliases this parser supports."""
87 settings_spec = (
88 'reStructuredText Parser Options',
89 None,
90 (('Recognize and link to standalone PEP references (like "PEP 258").',
91 ['--pep-references'],
92 {'action': 'store_true', 'validator': frontend.validate_boolean}),
93 ('Base URL for PEP references '
94 '(default "http://www.python.org/dev/peps/").',
95 ['--pep-base-url'],
96 {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/',
97 'validator': frontend.validate_url_trailing_slash}),
98 ('Template for PEP file part of URL. (default "pep-%04d")',
99 ['--pep-file-url-template'],
100 {'metavar': '<URL>', 'default': 'pep-%04d'}),
101 ('Recognize and link to standalone RFC references (like "RFC 822").',
102 ['--rfc-references'],
103 {'action': 'store_true', 'validator': frontend.validate_boolean}),
104 ('Base URL for RFC references (default "http://www.faqs.org/rfcs/").',
105 ['--rfc-base-url'],
106 {'metavar': '<URL>', 'default': 'http://www.faqs.org/rfcs/',
107 'validator': frontend.validate_url_trailing_slash}),
108 ('Set number of spaces for tab expansion (default 8).',
109 ['--tab-width'],
110 {'metavar': '<width>', 'type': 'int', 'default': 8,
111 'validator': frontend.validate_nonnegative_int}),
112 ('Remove spaces before footnote references.',
113 ['--trim-footnote-reference-space'],
114 {'action': 'store_true', 'validator': frontend.validate_boolean}),
115 ('Leave spaces before footnote references.',
116 ['--leave-footnote-reference-space'],
117 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}),
118 ('Disable directives that insert the contents of external file '
119 '("include" & "raw"); replaced with a "warning" system message.',
120 ['--no-file-insertion'],
121 {'action': 'store_false', 'default': 1,
122 'dest': 'file_insertion_enabled',
123 'validator': frontend.validate_boolean}),
124 ('Enable directives that insert the contents of external file '
125 '("include" & "raw"). Enabled by default.',
126 ['--file-insertion-enabled'],
127 {'action': 'store_true'}),
128 ('Disable the "raw" directives; replaced with a "warning" '
129 'system message.',
130 ['--no-raw'],
131 {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled',
132 'validator': frontend.validate_boolean}),
133 ('Enable the "raw" directive. Enabled by default.',
134 ['--raw-enabled'],
135 {'action': 'store_true'}),
136 ('Token name set for parsing code with Pygments: one of '
137 '"long", "short", or "none (no parsing)". Default is "long".',
138 ['--syntax-highlight'],
139 {'choices': ['long', 'short', 'none'],
140 'default': 'long', 'metavar': '<format>'}),
141 ('Change straight quotation marks to typographic form: '
142 'one of "yes", "no", "alt[ernative]" (default "no").',
143 ['--smart-quotes'],
144 {'default': False, 'validator': frontend.validate_ternary}),
147 config_section = 'restructuredtext parser'
148 config_section_dependencies = ('parsers',)
150 def __init__(self, rfc2822=False, inliner=None):
151 if rfc2822:
152 self.initial_state = 'RFC2822Body'
153 else:
154 self.initial_state = 'Body'
155 self.state_classes = states.state_classes
156 self.inliner = inliner
158 def get_transforms(self):
159 return Component.get_transforms(self) + [
160 universal.SmartQuotes]
162 def parse(self, inputstring, document):
163 """Parse `inputstring` and populate `document`, a document tree."""
164 self.setup_parse(inputstring, document)
165 self.statemachine = states.RSTStateMachine(
166 state_classes=self.state_classes,
167 initial_state=self.initial_state,
168 debug=document.reporter.debug_flag)
169 inputlines = docutils.statemachine.string2lines(
170 inputstring, tab_width=document.settings.tab_width,
171 convert_whitespace=True)
172 self.statemachine.run(inputlines, document, inliner=self.inliner)
173 self.finish_parse()
176 class DirectiveError(Exception):
179 Store a message and a system message level.
181 To be thrown from inside directive code.
183 Do not instantiate directly -- use `Directive.directive_error()`
184 instead!
187 def __init__(self, level, message):
188 """Set error `message` and `level`"""
189 Exception.__init__(self)
190 self.level = level
191 self.msg = message
194 class Directive(object):
197 Base class for reStructuredText directives.
199 The following attributes may be set by subclasses. They are
200 interpreted by the directive parser (which runs the directive
201 class):
203 - `required_arguments`: The number of required arguments (default:
206 - `optional_arguments`: The number of optional arguments (default:
209 - `final_argument_whitespace`: A boolean, indicating if the final
210 argument may contain whitespace (default: False).
212 - `option_spec`: A dictionary, mapping known option names to
213 conversion functions such as `int` or `float` (default: {}, no
214 options). Several conversion functions are defined in the
215 directives/__init__.py module.
217 Option conversion functions take a single parameter, the option
218 argument (a string or ``None``), validate it and/or convert it
219 to the appropriate form. Conversion functions may raise
220 `ValueError` and `TypeError` exceptions.
222 - `has_content`: A boolean; True if content is allowed. Client
223 code must handle the case where content is required but not
224 supplied (an empty content list will be supplied).
226 Arguments are normally single whitespace-separated words. The
227 final argument may contain whitespace and/or newlines if
228 `final_argument_whitespace` is True.
230 If the form of the arguments is more complex, specify only one
231 argument (either required or optional) and set
232 `final_argument_whitespace` to True; the client code must do any
233 context-sensitive parsing.
235 When a directive implementation is being run, the directive class
236 is instantiated, and the `run()` method is executed. During
237 instantiation, the following instance variables are set:
239 - ``name`` is the directive type or name (string).
241 - ``arguments`` is the list of positional arguments (strings).
243 - ``options`` is a dictionary mapping option names (strings) to
244 values (type depends on option conversion functions; see
245 `option_spec` above).
247 - ``content`` is a list of strings, the directive content line by line.
249 - ``lineno`` is the absolute line number of the first line
250 of the directive.
252 - ``src`` is the name (or path) of the rst source of the directive.
254 - ``srcline`` is the line number of the first line of the directive
255 in its source. It may differ from ``lineno``, if the main source
256 includes other sources with the ``.. include::`` directive.
258 - ``content_offset`` is the line offset of the first line of the content from
259 the beginning of the current input. Used when initiating a nested parse.
261 - ``block_text`` is a string containing the entire directive.
263 - ``state`` is the state which called the directive function.
265 - ``state_machine`` is the state machine which controls the state which called
266 the directive function.
268 Directive functions return a list of nodes which will be inserted
269 into the document tree at the point where the directive was
270 encountered. This can be an empty list if there is nothing to
271 insert.
273 For ordinary directives, the list must contain body elements or
274 structural elements. Some directives are intended specifically
275 for substitution definitions, and must return a list of `Text`
276 nodes and/or inline elements (suitable for inline insertion, in
277 place of the substitution reference). Such directives must verify
278 substitution definition context, typically using code like this::
280 if not isinstance(state, states.SubstitutionDef):
281 error = state_machine.reporter.error(
282 'Invalid context: the "%s" directive can only be used '
283 'within a substitution definition.' % (name),
284 nodes.literal_block(block_text, block_text), line=lineno)
285 return [error]
288 # There is a "Creating reStructuredText Directives" how-to at
289 # <http://docutils.sf.net/docs/howto/rst-directives.html>. If you
290 # update this docstring, please update the how-to as well.
292 required_arguments = 0
293 """Number of required directive arguments."""
295 optional_arguments = 0
296 """Number of optional arguments after the required arguments."""
298 final_argument_whitespace = False
299 """May the final argument contain whitespace?"""
301 option_spec = None
302 """Mapping of option names to validator functions."""
304 has_content = False
305 """May the directive have content?"""
307 def __init__(self, name, arguments, options, content, lineno,
308 content_offset, block_text, state, state_machine):
309 self.name = name
310 self.arguments = arguments
311 self.options = options
312 self.content = content
313 self.lineno = lineno
314 self.content_offset = content_offset
315 self.block_text = block_text
316 self.state = state
317 self.state_machine = state_machine
319 def run(self):
320 raise NotImplementedError('Must override run() is subclass.')
322 # Directive errors:
324 def directive_error(self, level, message):
326 Return a DirectiveError suitable for being thrown as an exception.
328 Call "raise self.directive_error(level, message)" from within
329 a directive implementation to return one single system message
330 at level `level`, which automatically gets the directive block
331 and the line number added.
333 Preferably use the `debug`, `info`, `warning`, `error`, or `severe`
334 wrapper methods, e.g. ``self.error(message)`` to generate an
335 ERROR-level directive error.
337 return DirectiveError(level, message)
339 def debug(self, message):
340 return self.directive_error(0, message)
342 def info(self, message):
343 return self.directive_error(1, message)
345 def warning(self, message):
346 return self.directive_error(2, message)
348 def error(self, message):
349 return self.directive_error(3, message)
351 def severe(self, message):
352 return self.directive_error(4, message)
354 # Convenience methods:
356 def assert_has_content(self):
358 Throw an ERROR-level DirectiveError if the directive doesn't
359 have contents.
361 if not self.content:
362 raise self.error('Content block expected for the "%s" directive; '
363 'none found.' % self.name)
365 def add_name(self, node):
366 """Append self.options['name'] to node['names'] if it exists.
368 Also normalize the name string and register it as explicit target.
370 if 'name' in self.options:
371 name = nodes.fully_normalize_name(self.options.pop('name'))
372 if 'name' in node:
373 del(node['name'])
374 node['names'].append(name)
375 self.state.document.note_explicit_target(node, node)
378 def convert_directive_function(directive_fn):
380 Define & return a directive class generated from `directive_fn`.
382 `directive_fn` uses the old-style, functional interface.
385 class FunctionalDirective(Directive):
387 option_spec = getattr(directive_fn, 'options', None)
388 has_content = getattr(directive_fn, 'content', False)
389 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False))
390 required_arguments, optional_arguments, final_argument_whitespace \
391 = _argument_spec
393 def run(self):
394 return directive_fn(
395 self.name, self.arguments, self.options, self.content,
396 self.lineno, self.content_offset, self.block_text,
397 self.state, self.state_machine)
399 # Return new-style directive.
400 return FunctionalDirective