Provide fallbacks for parser config settings.
[docutils.git] / docutils / docutils / parsers / rst / __init__.py
blob0ca7b34a9c18d0e32e88389cc98f15d69f537b8e
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
7 the reStructuredText parser.
10 Usage
11 =====
13 1. Create a parser::
15 parser = docutils.parsers.rst.Parser()
17 Several optional arguments may be passed to modify the parser's behavior.
18 Please see `Customizing the Parser`_ below for details.
20 2. Gather input (a multi-line string), by reading a file or the standard
21 input::
23 input = sys.stdin.read()
25 3. Create a new empty `docutils.nodes.document` tree::
27 document = docutils.utils.new_document(source, settings)
29 See `docutils.utils.new_document()` for parameter details.
31 4. Run the parser, populating the document tree::
33 parser.parse(input, document)
36 Parser Overview
37 ===============
39 The reStructuredText parser is implemented as a state machine, examining its
40 input one line at a time. To understand how the parser works, please first
41 become familiar with the `docutils.statemachine` module, then see the
42 `states` module.
45 Customizing the Parser
46 ----------------------
48 Anything that isn't already customizable is that way simply because that type
49 of customizability hasn't been implemented yet. Patches welcome!
51 When instantiating an object of the `Parser` class, two parameters may be
52 passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an
53 initial RFC-2822 style header block, parsed as a "field_list" element (with
54 "class" attribute set to "rfc2822"). Currently this is the only body-level
55 element which is customizable without subclassing. (Tip: subclass `Parser`
56 and change its "state_classes" and "initial_state" attributes to refer to new
57 classes. Contact the author if you need more details.)
59 The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
60 It handles inline markup recognition. A common extension is the addition of
61 further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
62 `states.Inliner`, adding a new method for the implicit markup, and adding a
63 ``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
64 subclass. See `states.Inliner.implicit_inline()` for details. Explicit
65 inline markup can be customized in a `states.Inliner` subclass via the
66 ``patterns.initial`` and ``dispatch`` attributes (and new methods as
67 appropriate).
68 """
70 __docformat__ = 'reStructuredText'
73 import docutils.parsers
74 import docutils.statemachine
75 from docutils.parsers.rst import roles, states
76 from docutils import frontend, nodes, Component
77 from docutils.transforms import universal
80 class Parser(docutils.parsers.Parser):
82 """The reStructuredText parser."""
84 supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
85 """Aliases this parser supports."""
87 settings_spec = docutils.parsers.Parser.settings_spec + (
88 'reStructuredText Parser Options',
89 None,
90 (('Recognize and link to standalone PEP references (like "PEP 258").',
91 ['--pep-references'],
92 {'action': 'store_true', 'validator': frontend.validate_boolean}),
93 ('Base URL for PEP references '
94 '(default "http://www.python.org/dev/peps/").',
95 ['--pep-base-url'],
96 {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/',
97 'validator': frontend.validate_url_trailing_slash}),
98 ('Template for PEP file part of URL. (default "pep-%04d")',
99 ['--pep-file-url-template'],
100 {'metavar': '<URL>', 'default': 'pep-%04d'}),
101 ('Recognize and link to standalone RFC references (like "RFC 822").',
102 ['--rfc-references'],
103 {'action': 'store_true', 'validator': frontend.validate_boolean}),
104 ('Base URL for RFC references (default "http://tools.ietf.org/html/").',
105 ['--rfc-base-url'],
106 {'metavar': '<URL>', 'default': 'http://tools.ietf.org/html/',
107 'validator': frontend.validate_url_trailing_slash}),
108 ('Set number of spaces for tab expansion (default 8).',
109 ['--tab-width'],
110 {'metavar': '<width>', 'type': 'int', 'default': 8,
111 'validator': frontend.validate_nonnegative_int}),
112 ('Remove spaces before footnote references.',
113 ['--trim-footnote-reference-space'],
114 {'action': 'store_true', 'validator': frontend.validate_boolean}),
115 ('Leave spaces before footnote references.',
116 ['--leave-footnote-reference-space'],
117 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}),
118 ('Token name set for parsing code with Pygments: one of '
119 '"long", "short", or "none" (no parsing). Default is "long".',
120 ['--syntax-highlight'],
121 {'choices': ['long', 'short', 'none'],
122 'default': 'long', 'metavar': '<format>'}),
123 ('Change straight quotation marks to typographic form: '
124 'one of "yes", "no", "alt[ernative]" (default "no").',
125 ['--smart-quotes'],
126 {'default': False, 'metavar': '<yes/no/alt>',
127 'validator': frontend.validate_ternary}),
128 ('Characters to use as "smart quotes" for <language>. ',
129 ['--smartquotes-locales'],
130 {'metavar': '<language:quotes[,language:quotes,...]>',
131 'action': 'append',
132 'validator': frontend.validate_smartquotes_locales}),
133 ('Inline markup recognized at word boundaries only '
134 '(adjacent to punctuation or whitespace). '
135 'Force character-level inline markup recognition with '
136 '"\\ " (backslash + space). Default.',
137 ['--word-level-inline-markup'],
138 {'action': 'store_false', 'dest': 'character_level_inline_markup'}),
139 ('Inline markup recognized anywhere, regardless of surrounding '
140 'characters. Backslash-escapes must be used to avoid unwanted '
141 'markup recognition. Useful for East Asian languages. '
142 'Experimental.',
143 ['--character-level-inline-markup'],
144 {'action': 'store_true', 'default': False,
145 'dest': 'character_level_inline_markup'}),
148 config_section = 'restructuredtext parser'
149 config_section_dependencies = ('parsers',)
151 def __init__(self, rfc2822=False, inliner=None):
152 if rfc2822:
153 self.initial_state = 'RFC2822Body'
154 else:
155 self.initial_state = 'Body'
156 self.state_classes = states.state_classes
157 self.inliner = inliner
159 def get_transforms(self):
160 return Component.get_transforms(self) + [
161 universal.SmartQuotes]
163 def parse(self, inputstring, document):
164 """Parse `inputstring` and populate `document`, a document tree."""
165 self.setup_parse(inputstring, document)
166 # provide fallbacks in case the document has only generic settings
167 self.document.settings.setdefault('tab_width', 8)
168 self.document.settings.setdefault('syntax_highlight', 'long')
169 self.statemachine = states.RSTStateMachine(
170 state_classes=self.state_classes,
171 initial_state=self.initial_state,
172 debug=document.reporter.debug_flag)
173 inputlines = docutils.statemachine.string2lines(
174 inputstring, tab_width=document.settings.tab_width,
175 convert_whitespace=True)
176 for i, line in enumerate(inputlines):
177 if len(line) > self.document.settings.line_length_limit:
178 error = self.document.reporter.error(
179 'Line %d exceeds the line-length-limit.'%(i+1))
180 self.document.append(error)
181 break
182 else:
183 self.statemachine.run(inputlines, document, inliner=self.inliner)
184 # restore the "default" default role after parsing a document
185 if '' in roles._roles:
186 del roles._roles['']
187 self.finish_parse()
190 class DirectiveError(Exception):
193 Store a message and a system message level.
195 To be thrown from inside directive code.
197 Do not instantiate directly -- use `Directive.directive_error()`
198 instead!
201 def __init__(self, level, message):
202 """Set error `message` and `level`"""
203 Exception.__init__(self)
204 self.level = level
205 self.msg = message
208 class Directive(object):
211 Base class for reStructuredText directives.
213 The following attributes may be set by subclasses. They are
214 interpreted by the directive parser (which runs the directive
215 class):
217 - `required_arguments`: The number of required arguments (default:
220 - `optional_arguments`: The number of optional arguments (default:
223 - `final_argument_whitespace`: A boolean, indicating if the final
224 argument may contain whitespace (default: False).
226 - `option_spec`: A dictionary, mapping known option names to
227 conversion functions such as `int` or `float` (default: {}, no
228 options). Several conversion functions are defined in the
229 directives/__init__.py module.
231 Option conversion functions take a single parameter, the option
232 argument (a string or ``None``), validate it and/or convert it
233 to the appropriate form. Conversion functions may raise
234 `ValueError` and `TypeError` exceptions.
236 - `has_content`: A boolean; True if content is allowed. Client
237 code must handle the case where content is required but not
238 supplied (an empty content list will be supplied).
240 Arguments are normally single whitespace-separated words. The
241 final argument may contain whitespace and/or newlines if
242 `final_argument_whitespace` is True.
244 If the form of the arguments is more complex, specify only one
245 argument (either required or optional) and set
246 `final_argument_whitespace` to True; the client code must do any
247 context-sensitive parsing.
249 When a directive implementation is being run, the directive class
250 is instantiated, and the `run()` method is executed. During
251 instantiation, the following instance variables are set:
253 - ``name`` is the directive type or name (string).
255 - ``arguments`` is the list of positional arguments (strings).
257 - ``options`` is a dictionary mapping option names (strings) to
258 values (type depends on option conversion functions; see
259 `option_spec` above).
261 - ``content`` is a list of strings, the directive content line by line.
263 - ``lineno`` is the absolute line number of the first line
264 of the directive.
266 - ``content_offset`` is the line offset of the first line of the content from
267 the beginning of the current input. Used when initiating a nested parse.
269 - ``block_text`` is a string containing the entire directive.
271 - ``state`` is the state which called the directive function.
273 - ``state_machine`` is the state machine which controls the state which called
274 the directive function.
276 Directive functions return a list of nodes which will be inserted
277 into the document tree at the point where the directive was
278 encountered. This can be an empty list if there is nothing to
279 insert.
281 For ordinary directives, the list must contain body elements or
282 structural elements. Some directives are intended specifically
283 for substitution definitions, and must return a list of `Text`
284 nodes and/or inline elements (suitable for inline insertion, in
285 place of the substitution reference). Such directives must verify
286 substitution definition context, typically using code like this::
288 if not isinstance(state, states.SubstitutionDef):
289 error = state_machine.reporter.error(
290 'Invalid context: the "%s" directive can only be used '
291 'within a substitution definition.' % (name),
292 nodes.literal_block(block_text, block_text), line=lineno)
293 return [error]
296 # There is a "Creating reStructuredText Directives" how-to at
297 # <http://docutils.sf.net/docs/howto/rst-directives.html>. If you
298 # update this docstring, please update the how-to as well.
300 required_arguments = 0
301 """Number of required directive arguments."""
303 optional_arguments = 0
304 """Number of optional arguments after the required arguments."""
306 final_argument_whitespace = False
307 """May the final argument contain whitespace?"""
309 option_spec = None
310 """Mapping of option names to validator functions."""
312 has_content = False
313 """May the directive have content?"""
315 def __init__(self, name, arguments, options, content, lineno,
316 content_offset, block_text, state, state_machine):
317 self.name = name
318 self.arguments = arguments
319 self.options = options
320 self.content = content
321 self.lineno = lineno
322 self.content_offset = content_offset
323 self.block_text = block_text
324 self.state = state
325 self.state_machine = state_machine
327 def run(self):
328 raise NotImplementedError('Must override run() is subclass.')
330 # Directive errors:
332 def directive_error(self, level, message):
334 Return a DirectiveError suitable for being thrown as an exception.
336 Call "raise self.directive_error(level, message)" from within
337 a directive implementation to return one single system message
338 at level `level`, which automatically gets the directive block
339 and the line number added.
341 Preferably use the `debug`, `info`, `warning`, `error`, or `severe`
342 wrapper methods, e.g. ``self.error(message)`` to generate an
343 ERROR-level directive error.
345 return DirectiveError(level, message)
347 def debug(self, message):
348 return self.directive_error(0, message)
350 def info(self, message):
351 return self.directive_error(1, message)
353 def warning(self, message):
354 return self.directive_error(2, message)
356 def error(self, message):
357 return self.directive_error(3, message)
359 def severe(self, message):
360 return self.directive_error(4, message)
362 # Convenience methods:
364 def assert_has_content(self):
366 Throw an ERROR-level DirectiveError if the directive doesn't
367 have contents.
369 if not self.content:
370 raise self.error('Content block expected for the "%s" directive; '
371 'none found.' % self.name)
373 def add_name(self, node):
374 """Append self.options['name'] to node['names'] if it exists.
376 Also normalize the name string and register it as explicit target.
378 if 'name' in self.options:
379 name = nodes.fully_normalize_name(self.options.pop('name'))
380 if 'name' in node:
381 del(node['name'])
382 node['names'].append(name)
383 self.state.document.note_explicit_target(node, node)
386 def convert_directive_function(directive_fn):
388 Define & return a directive class generated from `directive_fn`.
390 `directive_fn` uses the old-style, functional interface.
393 class FunctionalDirective(Directive):
395 option_spec = getattr(directive_fn, 'options', None)
396 has_content = getattr(directive_fn, 'content', False)
397 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False))
398 required_arguments, optional_arguments, final_argument_whitespace \
399 = _argument_spec
401 def run(self):
402 return directive_fn(
403 self.name, self.arguments, self.options, self.content,
404 self.lineno, self.content_offset, self.block_text,
405 self.state, self.state_machine)
407 # Return new-style directive.
408 return FunctionalDirective