Clean up system message (source, line) reporting.
[docutils.git] / docutils / parsers / rst / __init__.py
blob0e064677df49c00f06e5588e84b6f43270b25119
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
7 the reStructuredText parser.
10 Usage
11 =====
13 1. Create a parser::
15 parser = docutils.parsers.rst.Parser()
17 Several optional arguments may be passed to modify the parser's behavior.
18 Please see `Customizing the Parser`_ below for details.
20 2. Gather input (a multi-line string), by reading a file or the standard
21 input::
23 input = sys.stdin.read()
25 3. Create a new empty `docutils.nodes.document` tree::
27 document = docutils.utils.new_document(source, settings)
29 See `docutils.utils.new_document()` for parameter details.
31 4. Run the parser, populating the document tree::
33 parser.parse(input, document)
36 Parser Overview
37 ===============
39 The reStructuredText parser is implemented as a state machine, examining its
40 input one line at a time. To understand how the parser works, please first
41 become familiar with the `docutils.statemachine` module, then see the
42 `states` module.
45 Customizing the Parser
46 ----------------------
48 Anything that isn't already customizable is that way simply because that type
49 of customizability hasn't been implemented yet. Patches welcome!
51 When instantiating an object of the `Parser` class, two parameters may be
52 passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=1`` to enable an initial
53 RFC-2822 style header block, parsed as a "field_list" element (with "class"
54 attribute set to "rfc2822"). Currently this is the only body-level element
55 which is customizable without subclassing. (Tip: subclass `Parser` and change
56 its "state_classes" and "initial_state" attributes to refer to new classes.
57 Contact the author if you need more details.)
59 The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
60 It handles inline markup recognition. A common extension is the addition of
61 further implicit hyperlinks, like "RFC 2822". This can be done by subclassing
62 `states.Inliner`, adding a new method for the implicit markup, and adding a
63 ``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
64 subclass. See `states.Inliner.implicit_inline()` for details. Explicit
65 inline markup can be customized in a `states.Inliner` subclass via the
66 ``patterns.initial`` and ``dispatch`` attributes (and new methods as
67 appropriate).
68 """
70 __docformat__ = 'reStructuredText'
73 import docutils.parsers
74 import docutils.statemachine
75 from docutils.parsers.rst import states
76 from docutils import frontend, nodes
79 class Parser(docutils.parsers.Parser):
81 """The reStructuredText parser."""
83 supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
84 """Aliases this parser supports."""
86 settings_spec = (
87 'reStructuredText Parser Options',
88 None,
89 (('Recognize and link to standalone PEP references (like "PEP 258").',
90 ['--pep-references'],
91 {'action': 'store_true', 'validator': frontend.validate_boolean}),
92 ('Base URL for PEP references '
93 '(default "http://www.python.org/dev/peps/").',
94 ['--pep-base-url'],
95 {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/',
96 'validator': frontend.validate_url_trailing_slash}),
97 ('Template for PEP file part of URL. (default "pep-%04d")',
98 ['--pep-file-url-template'],
99 {'metavar': '<URL>', 'default': 'pep-%04d'}),
100 ('Recognize and link to standalone RFC references (like "RFC 822").',
101 ['--rfc-references'],
102 {'action': 'store_true', 'validator': frontend.validate_boolean}),
103 ('Base URL for RFC references (default "http://www.faqs.org/rfcs/").',
104 ['--rfc-base-url'],
105 {'metavar': '<URL>', 'default': 'http://www.faqs.org/rfcs/',
106 'validator': frontend.validate_url_trailing_slash}),
107 ('Set number of spaces for tab expansion (default 8).',
108 ['--tab-width'],
109 {'metavar': '<width>', 'type': 'int', 'default': 8,
110 'validator': frontend.validate_nonnegative_int}),
111 ('Remove spaces before footnote references.',
112 ['--trim-footnote-reference-space'],
113 {'action': 'store_true', 'validator': frontend.validate_boolean}),
114 ('Leave spaces before footnote references.',
115 ['--leave-footnote-reference-space'],
116 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}),
117 ('Disable directives that insert the contents of external file '
118 '("include" & "raw"); replaced with a "warning" system message.',
119 ['--no-file-insertion'],
120 {'action': 'store_false', 'default': 1,
121 'dest': 'file_insertion_enabled',
122 'validator': frontend.validate_boolean}),
123 ('Enable directives that insert the contents of external file '
124 '("include" & "raw"). Enabled by default.',
125 ['--file-insertion-enabled'],
126 {'action': 'store_true'}),
127 ('Disable the "raw" directives; replaced with a "warning" '
128 'system message.',
129 ['--no-raw'],
130 {'action': 'store_false', 'default': 1, 'dest': 'raw_enabled',
131 'validator': frontend.validate_boolean}),
132 ('Enable the "raw" directive. Enabled by default.',
133 ['--raw-enabled'],
134 {'action': 'store_true'}),
135 ('Token name set for parsing code with Pygments: one of '
136 '"long", "short", or "none (no parsing)". Default is "short".',
137 ['--syntax-highlight'],
138 {'choices': ['long', 'short', 'none'],
139 'default': 'short', 'metavar': '<format>'}),))
141 config_section = 'restructuredtext parser'
142 config_section_dependencies = ('parsers',)
144 def __init__(self, rfc2822=None, inliner=None):
145 if rfc2822:
146 self.initial_state = 'RFC2822Body'
147 else:
148 self.initial_state = 'Body'
149 self.state_classes = states.state_classes
150 self.inliner = inliner
152 def parse(self, inputstring, document):
153 """Parse `inputstring` and populate `document`, a document tree."""
154 self.setup_parse(inputstring, document)
155 self.statemachine = states.RSTStateMachine(
156 state_classes=self.state_classes,
157 initial_state=self.initial_state,
158 debug=document.reporter.debug_flag)
159 inputlines = docutils.statemachine.string2lines(
160 inputstring, tab_width=document.settings.tab_width,
161 convert_whitespace=1)
162 self.statemachine.run(inputlines, document, inliner=self.inliner)
163 self.finish_parse()
166 class DirectiveError(Exception):
169 Store a message and a system message level.
171 To be thrown from inside directive code.
173 Do not instantiate directly -- use `Directive.directive_error()`
174 instead!
177 def __init__(self, level, message):
178 """Set error `message` and `level`"""
179 Exception.__init__(self)
180 self.level = level
181 self.msg = message
184 class Directive(object):
187 Base class for reStructuredText directives.
189 The following attributes may be set by subclasses. They are
190 interpreted by the directive parser (which runs the directive
191 class):
193 - `required_arguments`: The number of required arguments (default:
196 - `optional_arguments`: The number of optional arguments (default:
199 - `final_argument_whitespace`: A boolean, indicating if the final
200 argument may contain whitespace (default: False).
202 - `option_spec`: A dictionary, mapping known option names to
203 conversion functions such as `int` or `float` (default: {}, no
204 options). Several conversion functions are defined in the
205 directives/__init__.py module.
207 Option conversion functions take a single parameter, the option
208 argument (a string or ``None``), validate it and/or convert it
209 to the appropriate form. Conversion functions may raise
210 `ValueError` and `TypeError` exceptions.
212 - `has_content`: A boolean; True if content is allowed. Client
213 code must handle the case where content is required but not
214 supplied (an empty content list will be supplied).
216 Arguments are normally single whitespace-separated words. The
217 final argument may contain whitespace and/or newlines if
218 `final_argument_whitespace` is True.
220 If the form of the arguments is more complex, specify only one
221 argument (either required or optional) and set
222 `final_argument_whitespace` to True; the client code must do any
223 context-sensitive parsing.
225 When a directive implementation is being run, the directive class
226 is instantiated, and the `run()` method is executed. During
227 instantiation, the following instance variables are set:
229 - ``name`` is the directive type or name (string).
231 - ``arguments`` is the list of positional arguments (strings).
233 - ``options`` is a dictionary mapping option names (strings) to
234 values (type depends on option conversion functions; see
235 `option_spec` above).
237 - ``content`` is a list of strings, the directive content line by line.
239 - ``lineno`` is the absolute line number of the first line
240 of the directive.
242 - ``src`` is the name (or path) of the rst source of the directive.
244 - ``srcline`` is the line number of the first line of the directive
245 in its source. It may differ from ``lineno``, if the main source
246 includes other sources with the ``.. include::`` directive.
248 - ``content_offset`` is the line offset of the first line of the content from
249 the beginning of the current input. Used when initiating a nested parse.
251 - ``block_text`` is a string containing the entire directive.
253 - ``state`` is the state which called the directive function.
255 - ``state_machine`` is the state machine which controls the state which called
256 the directive function.
258 Directive functions return a list of nodes which will be inserted
259 into the document tree at the point where the directive was
260 encountered. This can be an empty list if there is nothing to
261 insert.
263 For ordinary directives, the list must contain body elements or
264 structural elements. Some directives are intended specifically
265 for substitution definitions, and must return a list of `Text`
266 nodes and/or inline elements (suitable for inline insertion, in
267 place of the substitution reference). Such directives must verify
268 substitution definition context, typically using code like this::
270 if not isinstance(state, states.SubstitutionDef):
271 error = state_machine.reporter.error(
272 'Invalid context: the "%s" directive can only be used '
273 'within a substitution definition.' % (name),
274 nodes.literal_block(block_text, block_text), line=lineno)
275 return [error]
278 # There is a "Creating reStructuredText Directives" how-to at
279 # <http://docutils.sf.net/docs/howto/rst-directives.html>. If you
280 # update this docstring, please update the how-to as well.
282 required_arguments = 0
283 """Number of required directive arguments."""
285 optional_arguments = 0
286 """Number of optional arguments after the required arguments."""
288 final_argument_whitespace = False
289 """May the final argument contain whitespace?"""
291 option_spec = None
292 """Mapping of option names to validator functions."""
294 has_content = False
295 """May the directive have content?"""
297 def __init__(self, name, arguments, options, content, lineno,
298 content_offset, block_text, state, state_machine):
299 self.name = name
300 self.arguments = arguments
301 self.options = options
302 self.content = content
303 self.lineno = lineno
304 self.content_offset = content_offset
305 self.block_text = block_text
306 self.state = state
307 self.state_machine = state_machine
309 def run(self):
310 raise NotImplementedError('Must override run() is subclass.')
312 # Directive errors:
314 def directive_error(self, level, message):
316 Return a DirectiveError suitable for being thrown as an exception.
318 Call "raise self.directive_error(level, message)" from within
319 a directive implementation to return one single system message
320 at level `level`, which automatically gets the directive block
321 and the line number added.
323 Preferably use the `debug`, `info`, `warning`, `error`, or `severe`
324 wrapper methods, e.g. ``self.error(message)`` to generate an
325 ERROR-level directive error.
327 return DirectiveError(level, message)
329 def debug(self, message):
330 return self.directive_error(0, message)
332 def info(self, message):
333 return self.directive_error(1, message)
335 def warning(self, message):
336 return self.directive_error(2, message)
338 def error(self, message):
339 return self.directive_error(3, message)
341 def severe(self, message):
342 return self.directive_error(4, message)
344 # Convenience methods:
346 def assert_has_content(self):
348 Throw an ERROR-level DirectiveError if the directive doesn't
349 have contents.
351 if not self.content:
352 raise self.error('Content block expected for the "%s" directive; '
353 'none found.' % self.name)
355 def add_name(self, node):
356 """Append self.options['name'] to node['names'] if it exists.
358 Also normalize the name string and register it as explicit target.
360 if 'name' in self.options:
361 name = nodes.fully_normalize_name(self.options.pop('name'))
362 if 'name' in node:
363 del(node['name'])
364 node['names'].append(name)
365 self.state.document.note_explicit_target(node, node)
368 def convert_directive_function(directive_fn):
370 Define & return a directive class generated from `directive_fn`.
372 `directive_fn` uses the old-style, functional interface.
375 class FunctionalDirective(Directive):
377 option_spec = getattr(directive_fn, 'options', None)
378 has_content = getattr(directive_fn, 'content', False)
379 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False))
380 required_arguments, optional_arguments, final_argument_whitespace \
381 = _argument_spec
383 def run(self):
384 return directive_fn(
385 self.name, self.arguments, self.options, self.content,
386 self.lineno, self.content_offset, self.block_text,
387 self.state, self.state_machine)
389 # Return new-style directive.
390 return FunctionalDirective