Prepare for python 3.0: minimize "types" module where possible (gbrandl).
[docutils.git] / docutils / parsers / rst / states.py
blobd6fb1d30f4047a339e19123ff569852339d10f09
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 This is the ``docutils.parsers.restructuredtext.states`` module, the core of
7 the reStructuredText parser. It defines the following:
9 :Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
31 :Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
36 :Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
40 :Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
43 Parser Overview
44 ===============
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__ = 'reStructuredText'
106 import sys
107 import re
108 import roman
109 from types import FunctionType, MethodType
110 from docutils import nodes, statemachine, utils, urischemes
111 from docutils import ApplicationError, DataError
112 from docutils.statemachine import StateMachineWS, StateWS
113 from docutils.nodes import fully_normalize_name as normalize_name
114 from docutils.nodes import whitespace_normalize_name
115 from docutils.utils import escape2null, unescape, column_width
116 import docutils.parsers.rst
117 from docutils.parsers.rst import directives, languages, tableparser, roles
118 from docutils.parsers.rst.languages import en as _fallback_language_module
121 class MarkupError(DataError): pass
122 class UnknownInterpretedRoleError(DataError): pass
123 class InterpretedRoleNotImplementedError(DataError): pass
124 class ParserError(ApplicationError): pass
125 class MarkupMismatch(Exception): pass
128 class Struct:
130 """Stores data attributes for dotted-attribute access."""
132 def __init__(self, **keywordargs):
133 self.__dict__.update(keywordargs)
136 class RSTStateMachine(StateMachineWS):
139 reStructuredText's master StateMachine.
141 The entry point to reStructuredText parsing is the `run()` method.
144 def run(self, input_lines, document, input_offset=0, match_titles=1,
145 inliner=None):
147 Parse `input_lines` and modify the `document` node in place.
149 Extend `StateMachineWS.run()`: set up parse-global data and
150 run the StateMachine.
152 self.language = languages.get_language(
153 document.settings.language_code)
154 self.match_titles = match_titles
155 if inliner is None:
156 inliner = Inliner()
157 inliner.init_customizations(document.settings)
158 self.memo = Struct(document=document,
159 reporter=document.reporter,
160 language=self.language,
161 title_styles=[],
162 section_level=0,
163 section_bubble_up_kludge=0,
164 inliner=inliner)
165 self.document = document
166 self.attach_observer(document.note_source)
167 self.reporter = self.memo.reporter
168 self.node = document
169 results = StateMachineWS.run(self, input_lines, input_offset,
170 input_source=document['source'])
171 assert results == [], 'RSTStateMachine.run() results should be empty!'
172 self.node = self.memo = None # remove unneeded references
175 class NestedStateMachine(StateMachineWS):
178 StateMachine run from within other StateMachine runs, to parse nested
179 document structures.
182 def run(self, input_lines, input_offset, memo, node, match_titles=1):
184 Parse `input_lines` and populate a `docutils.nodes.document` instance.
186 Extend `StateMachineWS.run()`: set up document-wide data.
188 self.match_titles = match_titles
189 self.memo = memo
190 self.document = memo.document
191 self.attach_observer(self.document.note_source)
192 self.reporter = memo.reporter
193 self.language = memo.language
194 self.node = node
195 results = StateMachineWS.run(self, input_lines, input_offset)
196 assert results == [], ('NestedStateMachine.run() results should be '
197 'empty!')
198 return results
201 class RSTState(StateWS):
204 reStructuredText State superclass.
206 Contains methods used by all State subclasses.
209 nested_sm = NestedStateMachine
211 def __init__(self, state_machine, debug=0):
212 self.nested_sm_kwargs = {'state_classes': state_classes,
213 'initial_state': 'Body'}
214 StateWS.__init__(self, state_machine, debug)
216 def runtime_init(self):
217 StateWS.runtime_init(self)
218 memo = self.state_machine.memo
219 self.memo = memo
220 self.reporter = memo.reporter
221 self.inliner = memo.inliner
222 self.document = memo.document
223 self.parent = self.state_machine.node
225 def goto_line(self, abs_line_offset):
227 Jump to input line `abs_line_offset`, ignoring jumps past the end.
229 try:
230 self.state_machine.goto_line(abs_line_offset)
231 except EOFError:
232 pass
234 def no_match(self, context, transitions):
236 Override `StateWS.no_match` to generate a system message.
238 This code should never be run.
240 self.reporter.severe(
241 'Internal error: no transition pattern match. State: "%s"; '
242 'transitions: %s; context: %s; current line: %r.'
243 % (self.__class__.__name__, transitions, context,
244 self.state_machine.line),
245 line=self.state_machine.abs_line_number())
246 return context, None, []
248 def bof(self, context):
249 """Called at beginning of file."""
250 return [], []
252 def nested_parse(self, block, input_offset, node, match_titles=0,
253 state_machine_class=None, state_machine_kwargs=None):
255 Create a new StateMachine rooted at `node` and run it over the input
256 `block`.
258 if state_machine_class is None:
259 state_machine_class = self.nested_sm
260 if state_machine_kwargs is None:
261 state_machine_kwargs = self.nested_sm_kwargs
262 block_length = len(block)
263 state_machine = state_machine_class(debug=self.debug,
264 **state_machine_kwargs)
265 state_machine.run(block, input_offset, memo=self.memo,
266 node=node, match_titles=match_titles)
267 state_machine.unlink()
268 new_offset = state_machine.abs_line_offset()
269 # No `block.parent` implies disconnected -- lines aren't in sync:
270 if block.parent and (len(block) - block_length) != 0:
271 # Adjustment for block if modified in nested parse:
272 self.state_machine.next_line(len(block) - block_length)
273 return new_offset
275 def nested_list_parse(self, block, input_offset, node, initial_state,
276 blank_finish,
277 blank_finish_state=None,
278 extra_settings={},
279 match_titles=0,
280 state_machine_class=None,
281 state_machine_kwargs=None):
283 Create a new StateMachine rooted at `node` and run it over the input
284 `block`. Also keep track of optional intermediate blank lines and the
285 required final one.
287 if state_machine_class is None:
288 state_machine_class = self.nested_sm
289 if state_machine_kwargs is None:
290 state_machine_kwargs = self.nested_sm_kwargs.copy()
291 state_machine_kwargs['initial_state'] = initial_state
292 state_machine = state_machine_class(debug=self.debug,
293 **state_machine_kwargs)
294 if blank_finish_state is None:
295 blank_finish_state = initial_state
296 state_machine.states[blank_finish_state].blank_finish = blank_finish
297 for key, value in extra_settings.items():
298 setattr(state_machine.states[initial_state], key, value)
299 state_machine.run(block, input_offset, memo=self.memo,
300 node=node, match_titles=match_titles)
301 blank_finish = state_machine.states[blank_finish_state].blank_finish
302 state_machine.unlink()
303 return state_machine.abs_line_offset(), blank_finish
305 def section(self, title, source, style, lineno, messages):
306 """Check for a valid subsection and create one if it checks out."""
307 if self.check_subsection(source, style, lineno):
308 self.new_subsection(title, lineno, messages)
310 def check_subsection(self, source, style, lineno):
312 Check for a valid subsection header. Return 1 (true) or None (false).
314 When a new section is reached that isn't a subsection of the current
315 section, back up the line count (use ``previous_line(-x)``), then
316 ``raise EOFError``. The current StateMachine will finish, then the
317 calling StateMachine can re-examine the title. This will work its way
318 back up the calling chain until the correct section level isreached.
320 @@@ Alternative: Evaluate the title, store the title info & level, and
321 back up the chain until that level is reached. Store in memo? Or
322 return in results?
324 :Exception: `EOFError` when a sibling or supersection encountered.
326 memo = self.memo
327 title_styles = memo.title_styles
328 mylevel = memo.section_level
329 try: # check for existing title style
330 level = title_styles.index(style) + 1
331 except ValueError: # new title style
332 if len(title_styles) == memo.section_level: # new subsection
333 title_styles.append(style)
334 return 1
335 else: # not at lowest level
336 self.parent += self.title_inconsistent(source, lineno)
337 return None
338 if level <= mylevel: # sibling or supersection
339 memo.section_level = level # bubble up to parent section
340 if len(style) == 2:
341 memo.section_bubble_up_kludge = 1
342 # back up 2 lines for underline title, 3 for overline title
343 self.state_machine.previous_line(len(style) + 1)
344 raise EOFError # let parent section re-evaluate
345 if level == mylevel + 1: # immediate subsection
346 return 1
347 else: # invalid subsection
348 self.parent += self.title_inconsistent(source, lineno)
349 return None
351 def title_inconsistent(self, sourcetext, lineno):
352 error = self.reporter.severe(
353 'Title level inconsistent:', nodes.literal_block('', sourcetext),
354 line=lineno)
355 return error
357 def new_subsection(self, title, lineno, messages):
358 """Append new subsection to document tree. On return, check level."""
359 memo = self.memo
360 mylevel = memo.section_level
361 memo.section_level += 1
362 section_node = nodes.section()
363 self.parent += section_node
364 textnodes, title_messages = self.inline_text(title, lineno)
365 titlenode = nodes.title(title, '', *textnodes)
366 name = normalize_name(titlenode.astext())
367 section_node['names'].append(name)
368 section_node += titlenode
369 section_node += messages
370 section_node += title_messages
371 self.document.note_implicit_target(section_node, section_node)
372 offset = self.state_machine.line_offset + 1
373 absoffset = self.state_machine.abs_line_offset() + 1
374 newabsoffset = self.nested_parse(
375 self.state_machine.input_lines[offset:], input_offset=absoffset,
376 node=section_node, match_titles=1)
377 self.goto_line(newabsoffset)
378 if memo.section_level <= mylevel: # can't handle next section?
379 raise EOFError # bubble up to supersection
380 # reset section_level; next pass will detect it properly
381 memo.section_level = mylevel
383 def paragraph(self, lines, lineno):
385 Return a list (paragraph & messages) & a boolean: literal_block next?
387 data = '\n'.join(lines).rstrip()
388 if re.search(r'(?<!\\)(\\\\)*::$', data):
389 if len(data) == 2:
390 return [], 1
391 elif data[-3] in ' \n':
392 text = data[:-3].rstrip()
393 else:
394 text = data[:-1]
395 literalnext = 1
396 else:
397 text = data
398 literalnext = 0
399 textnodes, messages = self.inline_text(text, lineno)
400 p = nodes.paragraph(data, '', *textnodes)
401 p.line = lineno
402 return [p] + messages, literalnext
404 def inline_text(self, text, lineno):
406 Return 2 lists: nodes (text and inline elements), and system_messages.
408 return self.inliner.parse(text, lineno, self.memo, self.parent)
410 def unindent_warning(self, node_name):
411 return self.reporter.warning(
412 '%s ends without a blank line; unexpected unindent.' % node_name,
413 line=(self.state_machine.abs_line_number() + 1))
416 def build_regexp(definition, compile=1):
418 Build, compile and return a regular expression based on `definition`.
420 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
421 where "parts" is a list of regular expressions and/or regular
422 expression definitions to be joined into an or-group.
424 name, prefix, suffix, parts = definition
425 part_strings = []
426 for part in parts:
427 if type(part) is tuple:
428 part_strings.append(build_regexp(part, None))
429 else:
430 part_strings.append(part)
431 or_group = '|'.join(part_strings)
432 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
433 if compile:
434 return re.compile(regexp, re.UNICODE)
435 else:
436 return regexp
439 class Inliner:
442 Parse inline markup; call the `parse()` method.
445 def __init__(self):
446 self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
447 """List of (pattern, bound method) tuples, used by
448 `self.implicit_inline`."""
450 def init_customizations(self, settings):
451 """Setting-based customizations; run when parsing begins."""
452 if settings.pep_references:
453 self.implicit_dispatch.append((self.patterns.pep,
454 self.pep_reference))
455 if settings.rfc_references:
456 self.implicit_dispatch.append((self.patterns.rfc,
457 self.rfc_reference))
459 def parse(self, text, lineno, memo, parent):
460 # Needs to be refactored for nested inline markup.
461 # Add nested_parse() method?
463 Return 2 lists: nodes (text and inline elements), and system_messages.
465 Using `self.patterns.initial`, a pattern which matches start-strings
466 (emphasis, strong, interpreted, phrase reference, literal,
467 substitution reference, and inline target) and complete constructs
468 (simple reference, footnote reference), search for a candidate. When
469 one is found, check for validity (e.g., not a quoted '*' character).
470 If valid, search for the corresponding end string if applicable, and
471 check it for validity. If not found or invalid, generate a warning
472 and ignore the start-string. Implicit inline markup (e.g. standalone
473 URIs) is found last.
475 self.reporter = memo.reporter
476 self.document = memo.document
477 self.language = memo.language
478 self.parent = parent
479 pattern_search = self.patterns.initial.search
480 dispatch = self.dispatch
481 remaining = escape2null(text)
482 processed = []
483 unprocessed = []
484 messages = []
485 while remaining:
486 match = pattern_search(remaining)
487 if match:
488 groups = match.groupdict()
489 method = dispatch[groups['start'] or groups['backquote']
490 or groups['refend'] or groups['fnend']]
491 before, inlines, remaining, sysmessages = method(self, match,
492 lineno)
493 unprocessed.append(before)
494 messages += sysmessages
495 if inlines:
496 processed += self.implicit_inline(''.join(unprocessed),
497 lineno)
498 processed += inlines
499 unprocessed = []
500 else:
501 break
502 remaining = ''.join(unprocessed) + remaining
503 if remaining:
504 processed += self.implicit_inline(remaining, lineno)
505 return processed, messages
507 openers = u'\'"([{<\u2018\u201c\xab\u00a1\u00bf' # see quoted_start below
508 closers = u'\'")]}>\u2019\u201d\xbb!?'
509 unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
510 start_string_prefix = (ur'((?<=^)|(?<=[-/: \n\u2019%s%s]))'
511 % (re.escape(unicode_delimiters),
512 re.escape(openers)))
513 end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
514 % (re.escape(unicode_delimiters),
515 re.escape(closers)))
516 non_whitespace_before = r'(?<![ \n])'
517 non_whitespace_escape_before = r'(?<![ \n\x00])'
518 non_whitespace_after = r'(?![ \n])'
519 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
520 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
521 # Valid URI characters (see RFC 2396 & RFC 2732);
522 # final \x00 allows backslash escapes in URIs:
523 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
524 # Delimiter indicating the end of a URI (not part of the URI):
525 uri_end_delim = r"""[>]"""
526 # Last URI character; same as uric but no punctuation:
527 urilast = r"""[_~*/=+a-zA-Z0-9]"""
528 # End of a URI (either 'urilast' or 'uric followed by a
529 # uri_end_delim'):
530 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
531 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
532 email_pattern = r"""
533 %(emailc)s+(?:\.%(emailc)s+)* # name
534 (?<!\x00)@ # at
535 %(emailc)s+(?:\.%(emailc)s*)* # host
536 %(uri_end)s # final URI char
538 parts = ('initial_inline', start_string_prefix, '',
539 [('start', '', non_whitespace_after, # simple start-strings
540 [r'\*\*', # strong
541 r'\*(?!\*)', # emphasis but not strong
542 r'``', # literal
543 r'_`', # inline internal target
544 r'\|(?!\|)'] # substitution reference
546 ('whole', '', end_string_suffix, # whole constructs
547 [# reference name & end-string
548 r'(?P<refname>%s)(?P<refend>__?)' % simplename,
549 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
550 [r'[0-9]+', # manually numbered
551 r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
552 r'\*', # auto-symbol
553 r'(?P<citationlabel>%s)' % simplename] # citation reference
557 ('backquote', # interpreted text or phrase reference
558 '(?P<role>(:%s:)?)' % simplename, # optional role
559 non_whitespace_after,
560 ['`(?!`)'] # but not literal
564 patterns = Struct(
565 initial=build_regexp(parts),
566 emphasis=re.compile(non_whitespace_escape_before
567 + r'(\*)' + end_string_suffix),
568 strong=re.compile(non_whitespace_escape_before
569 + r'(\*\*)' + end_string_suffix),
570 interpreted_or_phrase_ref=re.compile(
571 r"""
572 %(non_whitespace_escape_before)s
575 (?P<suffix>
576 (?P<role>:%(simplename)s:)?
577 (?P<refend>__?)?
580 %(end_string_suffix)s
581 """ % locals(), re.VERBOSE | re.UNICODE),
582 embedded_uri=re.compile(
583 r"""
585 (?:[ \n]+|^) # spaces or beginning of line/string
586 < # open bracket
587 %(non_whitespace_after)s
588 ([^<>\x00]+) # anything but angle brackets & nulls
589 %(non_whitespace_before)s
590 > # close bracket w/o whitespace before
592 $ # end of string
593 """ % locals(), re.VERBOSE),
594 literal=re.compile(non_whitespace_before + '(``)'
595 + end_string_suffix),
596 target=re.compile(non_whitespace_escape_before
597 + r'(`)' + end_string_suffix),
598 substitution_ref=re.compile(non_whitespace_escape_before
599 + r'(\|_{0,2})'
600 + end_string_suffix),
601 email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
602 uri=re.compile(
603 (r"""
604 %(start_string_prefix)s
605 (?P<whole>
606 (?P<absolute> # absolute URI
607 (?P<scheme> # scheme (http, ftp, mailto)
608 [a-zA-Z][a-zA-Z0-9.+-]*
612 ( # either:
613 (//?)? # hierarchical URI
614 %(uric)s* # URI characters
615 %(uri_end)s # final URI char
617 ( # optional query
618 \?%(uric)s*
619 %(uri_end)s
621 ( # optional fragment
622 \#%(uric)s*
623 %(uri_end)s
627 | # *OR*
628 (?P<email> # email address
629 """ + email_pattern + r"""
632 %(end_string_suffix)s
633 """) % locals(), re.VERBOSE),
634 pep=re.compile(
635 r"""
636 %(start_string_prefix)s
638 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
640 (PEP\s+(?P<pepnum2>\d+)) # reference by name
642 %(end_string_suffix)s""" % locals(), re.VERBOSE),
643 rfc=re.compile(
644 r"""
645 %(start_string_prefix)s
646 (RFC(-|\s+)?(?P<rfcnum>\d+))
647 %(end_string_suffix)s""" % locals(), re.VERBOSE))
649 def quoted_start(self, match):
650 """Return 1 if inline markup start-string is 'quoted', 0 if not."""
651 string = match.string
652 start = match.start()
653 end = match.end()
654 if start == 0: # start-string at beginning of text
655 return 0
656 prestart = string[start - 1]
657 try:
658 poststart = string[end]
659 if self.openers.index(prestart) \
660 == self.closers.index(poststart): # quoted
661 return 1
662 except IndexError: # start-string at end of text
663 return 1
664 except ValueError: # not quoted
665 pass
666 return 0
668 def inline_obj(self, match, lineno, end_pattern, nodeclass,
669 restore_backslashes=0):
670 string = match.string
671 matchstart = match.start('start')
672 matchend = match.end('start')
673 if self.quoted_start(match):
674 return (string[:matchend], [], string[matchend:], [], '')
675 endmatch = end_pattern.search(string[matchend:])
676 if endmatch and endmatch.start(1): # 1 or more chars
677 text = unescape(endmatch.string[:endmatch.start(1)],
678 restore_backslashes)
679 textend = matchend + endmatch.end(1)
680 rawsource = unescape(string[matchstart:textend], 1)
681 return (string[:matchstart], [nodeclass(rawsource, text)],
682 string[textend:], [], endmatch.group(1))
683 msg = self.reporter.warning(
684 'Inline %s start-string without end-string.'
685 % nodeclass.__name__, line=lineno)
686 text = unescape(string[matchstart:matchend], 1)
687 rawsource = unescape(string[matchstart:matchend], 1)
688 prb = self.problematic(text, rawsource, msg)
689 return string[:matchstart], [prb], string[matchend:], [msg], ''
691 def problematic(self, text, rawsource, message):
692 msgid = self.document.set_id(message, self.parent)
693 problematic = nodes.problematic(rawsource, text, refid=msgid)
694 prbid = self.document.set_id(problematic)
695 message.add_backref(prbid)
696 return problematic
698 def emphasis(self, match, lineno):
699 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
700 match, lineno, self.patterns.emphasis, nodes.emphasis)
701 return before, inlines, remaining, sysmessages
703 def strong(self, match, lineno):
704 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
705 match, lineno, self.patterns.strong, nodes.strong)
706 return before, inlines, remaining, sysmessages
708 def interpreted_or_phrase_ref(self, match, lineno):
709 end_pattern = self.patterns.interpreted_or_phrase_ref
710 string = match.string
711 matchstart = match.start('backquote')
712 matchend = match.end('backquote')
713 rolestart = match.start('role')
714 role = match.group('role')
715 position = ''
716 if role:
717 role = role[1:-1]
718 position = 'prefix'
719 elif self.quoted_start(match):
720 return (string[:matchend], [], string[matchend:], [])
721 endmatch = end_pattern.search(string[matchend:])
722 if endmatch and endmatch.start(1): # 1 or more chars
723 textend = matchend + endmatch.end()
724 if endmatch.group('role'):
725 if role:
726 msg = self.reporter.warning(
727 'Multiple roles in interpreted text (both '
728 'prefix and suffix present; only one allowed).',
729 line=lineno)
730 text = unescape(string[rolestart:textend], 1)
731 prb = self.problematic(text, text, msg)
732 return string[:rolestart], [prb], string[textend:], [msg]
733 role = endmatch.group('suffix')[1:-1]
734 position = 'suffix'
735 escaped = endmatch.string[:endmatch.start(1)]
736 rawsource = unescape(string[matchstart:textend], 1)
737 if rawsource[-1:] == '_':
738 if role:
739 msg = self.reporter.warning(
740 'Mismatch: both interpreted text role %s and '
741 'reference suffix.' % position, line=lineno)
742 text = unescape(string[rolestart:textend], 1)
743 prb = self.problematic(text, text, msg)
744 return string[:rolestart], [prb], string[textend:], [msg]
745 return self.phrase_ref(string[:matchstart], string[textend:],
746 rawsource, escaped, unescape(escaped))
747 else:
748 rawsource = unescape(string[rolestart:textend], 1)
749 nodelist, messages = self.interpreted(rawsource, escaped, role,
750 lineno)
751 return (string[:rolestart], nodelist,
752 string[textend:], messages)
753 msg = self.reporter.warning(
754 'Inline interpreted text or phrase reference start-string '
755 'without end-string.', line=lineno)
756 text = unescape(string[matchstart:matchend], 1)
757 prb = self.problematic(text, text, msg)
758 return string[:matchstart], [prb], string[matchend:], [msg]
760 def phrase_ref(self, before, after, rawsource, escaped, text):
761 match = self.patterns.embedded_uri.search(escaped)
762 if match:
763 text = unescape(escaped[:match.start(0)])
764 uri_text = match.group(2)
765 uri = ''.join(uri_text.split())
766 uri = self.adjust_uri(uri)
767 if uri:
768 target = nodes.target(match.group(1), refuri=uri)
769 else:
770 raise ApplicationError('problem with URI: %r' % uri_text)
771 if not text:
772 text = uri
773 else:
774 target = None
775 refname = normalize_name(text)
776 reference = nodes.reference(rawsource, text,
777 name=whitespace_normalize_name(text))
778 node_list = [reference]
779 if rawsource[-2:] == '__':
780 if target:
781 reference['refuri'] = uri
782 else:
783 reference['anonymous'] = 1
784 else:
785 if target:
786 reference['refuri'] = uri
787 target['names'].append(refname)
788 self.document.note_explicit_target(target, self.parent)
789 node_list.append(target)
790 else:
791 reference['refname'] = refname
792 self.document.note_refname(reference)
793 return before, node_list, after, []
795 def adjust_uri(self, uri):
796 match = self.patterns.email.match(uri)
797 if match:
798 return 'mailto:' + uri
799 else:
800 return uri
802 def interpreted(self, rawsource, text, role, lineno):
803 role_fn, messages = roles.role(role, self.language, lineno,
804 self.reporter)
805 if role_fn:
806 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
807 return nodes, messages + messages2
808 else:
809 msg = self.reporter.error(
810 'Unknown interpreted text role "%s".' % role,
811 line=lineno)
812 return ([self.problematic(rawsource, rawsource, msg)],
813 messages + [msg])
815 def literal(self, match, lineno):
816 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
817 match, lineno, self.patterns.literal, nodes.literal,
818 restore_backslashes=1)
819 return before, inlines, remaining, sysmessages
821 def inline_internal_target(self, match, lineno):
822 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
823 match, lineno, self.patterns.target, nodes.target)
824 if inlines and isinstance(inlines[0], nodes.target):
825 assert len(inlines) == 1
826 target = inlines[0]
827 name = normalize_name(target.astext())
828 target['names'].append(name)
829 self.document.note_explicit_target(target, self.parent)
830 return before, inlines, remaining, sysmessages
832 def substitution_reference(self, match, lineno):
833 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
834 match, lineno, self.patterns.substitution_ref,
835 nodes.substitution_reference)
836 if len(inlines) == 1:
837 subref_node = inlines[0]
838 if isinstance(subref_node, nodes.substitution_reference):
839 subref_text = subref_node.astext()
840 self.document.note_substitution_ref(subref_node, subref_text)
841 if endstring[-1:] == '_':
842 reference_node = nodes.reference(
843 '|%s%s' % (subref_text, endstring), '')
844 if endstring[-2:] == '__':
845 reference_node['anonymous'] = 1
846 else:
847 reference_node['refname'] = normalize_name(subref_text)
848 self.document.note_refname(reference_node)
849 reference_node += subref_node
850 inlines = [reference_node]
851 return before, inlines, remaining, sysmessages
853 def footnote_reference(self, match, lineno):
855 Handles `nodes.footnote_reference` and `nodes.citation_reference`
856 elements.
858 label = match.group('footnotelabel')
859 refname = normalize_name(label)
860 string = match.string
861 before = string[:match.start('whole')]
862 remaining = string[match.end('whole'):]
863 if match.group('citationlabel'):
864 refnode = nodes.citation_reference('[%s]_' % label,
865 refname=refname)
866 refnode += nodes.Text(label)
867 self.document.note_citation_ref(refnode)
868 else:
869 refnode = nodes.footnote_reference('[%s]_' % label)
870 if refname[0] == '#':
871 refname = refname[1:]
872 refnode['auto'] = 1
873 self.document.note_autofootnote_ref(refnode)
874 elif refname == '*':
875 refname = ''
876 refnode['auto'] = '*'
877 self.document.note_symbol_footnote_ref(
878 refnode)
879 else:
880 refnode += nodes.Text(label)
881 if refname:
882 refnode['refname'] = refname
883 self.document.note_footnote_ref(refnode)
884 if utils.get_trim_footnote_ref_space(self.document.settings):
885 before = before.rstrip()
886 return (before, [refnode], remaining, [])
888 def reference(self, match, lineno, anonymous=None):
889 referencename = match.group('refname')
890 refname = normalize_name(referencename)
891 referencenode = nodes.reference(
892 referencename + match.group('refend'), referencename,
893 name=whitespace_normalize_name(referencename))
894 if anonymous:
895 referencenode['anonymous'] = 1
896 else:
897 referencenode['refname'] = refname
898 self.document.note_refname(referencenode)
899 string = match.string
900 matchstart = match.start('whole')
901 matchend = match.end('whole')
902 return (string[:matchstart], [referencenode], string[matchend:], [])
904 def anonymous_reference(self, match, lineno):
905 return self.reference(match, lineno, anonymous=1)
907 def standalone_uri(self, match, lineno):
908 if (not match.group('scheme')
909 or match.group('scheme').lower() in urischemes.schemes):
910 if match.group('email'):
911 addscheme = 'mailto:'
912 else:
913 addscheme = ''
914 text = match.group('whole')
915 unescaped = unescape(text, 0)
916 return [nodes.reference(unescape(text, 1), unescaped,
917 refuri=addscheme + unescaped)]
918 else: # not a valid scheme
919 raise MarkupMismatch
921 def pep_reference(self, match, lineno):
922 text = match.group(0)
923 if text.startswith('pep-'):
924 pepnum = int(match.group('pepnum1'))
925 elif text.startswith('PEP'):
926 pepnum = int(match.group('pepnum2'))
927 else:
928 raise MarkupMismatch
929 ref = (self.document.settings.pep_base_url
930 + self.document.settings.pep_file_url_template % pepnum)
931 unescaped = unescape(text, 0)
932 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
934 rfc_url = 'rfc%d.html'
936 def rfc_reference(self, match, lineno):
937 text = match.group(0)
938 if text.startswith('RFC'):
939 rfcnum = int(match.group('rfcnum'))
940 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
941 else:
942 raise MarkupMismatch
943 unescaped = unescape(text, 0)
944 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
946 def implicit_inline(self, text, lineno):
948 Check each of the patterns in `self.implicit_dispatch` for a match,
949 and dispatch to the stored method for the pattern. Recursively check
950 the text before and after the match. Return a list of `nodes.Text`
951 and inline element nodes.
953 if not text:
954 return []
955 for pattern, method in self.implicit_dispatch:
956 match = pattern.search(text)
957 if match:
958 try:
959 # Must recurse on strings before *and* after the match;
960 # there may be multiple patterns.
961 return (self.implicit_inline(text[:match.start()], lineno)
962 + method(match, lineno) +
963 self.implicit_inline(text[match.end():], lineno))
964 except MarkupMismatch:
965 pass
966 return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
968 dispatch = {'*': emphasis,
969 '**': strong,
970 '`': interpreted_or_phrase_ref,
971 '``': literal,
972 '_`': inline_internal_target,
973 ']_': footnote_reference,
974 '|': substitution_reference,
975 '_': reference,
976 '__': anonymous_reference}
979 def _loweralpha_to_int(s, _zero=(ord('a')-1)):
980 return ord(s) - _zero
982 def _upperalpha_to_int(s, _zero=(ord('A')-1)):
983 return ord(s) - _zero
985 def _lowerroman_to_int(s):
986 return roman.fromRoman(s.upper())
989 class Body(RSTState):
992 Generic classifier of the first line of a block.
995 double_width_pad_char = tableparser.TableParser.double_width_pad_char
996 """Padding character for East Asian double-width text."""
998 enum = Struct()
999 """Enumerated list parsing information."""
1001 enum.formatinfo = {
1002 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1003 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1004 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1005 enum.formats = enum.formatinfo.keys()
1006 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1007 'lowerroman', 'upperroman'] # ORDERED!
1008 enum.sequencepats = {'arabic': '[0-9]+',
1009 'loweralpha': '[a-z]',
1010 'upperalpha': '[A-Z]',
1011 'lowerroman': '[ivxlcdm]+',
1012 'upperroman': '[IVXLCDM]+',}
1013 enum.converters = {'arabic': int,
1014 'loweralpha': _loweralpha_to_int,
1015 'upperalpha': _upperalpha_to_int,
1016 'lowerroman': _lowerroman_to_int,
1017 'upperroman': roman.fromRoman}
1019 enum.sequenceregexps = {}
1020 for sequence in enum.sequences:
1021 enum.sequenceregexps[sequence] = re.compile(
1022 enum.sequencepats[sequence] + '$')
1024 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1025 """Matches the top (& bottom) of a full table)."""
1027 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1028 """Matches the top of a simple table."""
1030 simple_table_border_pat = re.compile('=+[ =]*$')
1031 """Matches the bottom & header bottom of a simple table."""
1033 pats = {}
1034 """Fragments of patterns used by transitions."""
1036 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1037 pats['alpha'] = '[a-zA-Z]'
1038 pats['alphanum'] = '[a-zA-Z0-9]'
1039 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1040 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1041 '|%(upperroman)s|#)' % enum.sequencepats)
1042 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1043 # @@@ Loosen up the pattern? Allow Unicode?
1044 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1045 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1046 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1047 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1049 for format in enum.formats:
1050 pats[format] = '(?P<%s>%s%s%s)' % (
1051 format, re.escape(enum.formatinfo[format].prefix),
1052 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1054 patterns = {
1055 'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
1056 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1057 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1058 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1059 'doctest': r'>>>( +|$)',
1060 'line_block': r'\|( +|$)',
1061 'grid_table_top': grid_table_top_pat,
1062 'simple_table_top': simple_table_top_pat,
1063 'explicit_markup': r'\.\.( +|$)',
1064 'anonymous': r'__( +|$)',
1065 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1066 'text': r''}
1067 initial_transitions = (
1068 'bullet',
1069 'enumerator',
1070 'field_marker',
1071 'option_marker',
1072 'doctest',
1073 'line_block',
1074 'grid_table_top',
1075 'simple_table_top',
1076 'explicit_markup',
1077 'anonymous',
1078 'line',
1079 'text')
1081 def indent(self, match, context, next_state):
1082 """Block quote."""
1083 indented, indent, line_offset, blank_finish = \
1084 self.state_machine.get_indented()
1085 elements = self.block_quote(indented, line_offset)
1086 self.parent += elements
1087 if not blank_finish:
1088 self.parent += self.unindent_warning('Block quote')
1089 return context, next_state, []
1091 def block_quote(self, indented, line_offset):
1092 elements = []
1093 while indented:
1094 (blockquote_lines,
1095 attribution_lines,
1096 attribution_offset,
1097 indented,
1098 new_line_offset) = self.split_attribution(indented, line_offset)
1099 blockquote = nodes.block_quote()
1100 self.nested_parse(blockquote_lines, line_offset, blockquote)
1101 elements.append(blockquote)
1102 if attribution_lines:
1103 attribution, messages = self.parse_attribution(
1104 attribution_lines, attribution_offset)
1105 blockquote += attribution
1106 elements += messages
1107 line_offset = new_line_offset
1108 while indented and not indented[0]:
1109 indented = indented[1:]
1110 line_offset += 1
1111 return elements
1113 # U+2014 is an em-dash:
1114 attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
1116 def split_attribution(self, indented, line_offset):
1118 Check for a block quote attribution and split it off:
1120 * First line after a blank line must begin with a dash ("--", "---",
1121 em-dash; matches `self.attribution_pattern`).
1122 * Every line after that must have consistent indentation.
1123 * Attributions must be preceded by block quote content.
1125 Return a tuple of: (block quote content lines, content offset,
1126 attribution lines, attribution offset, remaining indented lines).
1128 blank = None
1129 nonblank_seen = False
1130 for i in range(len(indented)):
1131 line = indented[i].rstrip()
1132 if line:
1133 if nonblank_seen and blank == i - 1: # last line blank
1134 match = self.attribution_pattern.match(line)
1135 if match:
1136 attribution_end, indent = self.check_attribution(
1137 indented, i)
1138 if attribution_end:
1139 a_lines = indented[i:attribution_end]
1140 a_lines.trim_left(match.end(), end=1)
1141 a_lines.trim_left(indent, start=1)
1142 return (indented[:i], a_lines,
1143 i, indented[attribution_end:],
1144 line_offset + attribution_end)
1145 nonblank_seen = True
1146 else:
1147 blank = i
1148 else:
1149 return (indented, None, None, None, None)
1151 def check_attribution(self, indented, attribution_start):
1153 Check attribution shape.
1154 Return the index past the end of the attribution, and the indent.
1156 indent = None
1157 i = attribution_start + 1
1158 for i in range(attribution_start + 1, len(indented)):
1159 line = indented[i].rstrip()
1160 if not line:
1161 break
1162 if indent is None:
1163 indent = len(line) - len(line.lstrip())
1164 elif len(line) - len(line.lstrip()) != indent:
1165 return None, None # bad shape; not an attribution
1166 else:
1167 # return index of line after last attribution line:
1168 i += 1
1169 return i, (indent or 0)
1171 def parse_attribution(self, indented, line_offset):
1172 text = '\n'.join(indented).rstrip()
1173 lineno = self.state_machine.abs_line_number() + line_offset
1174 textnodes, messages = self.inline_text(text, lineno)
1175 node = nodes.attribution(text, '', *textnodes)
1176 node.line = lineno
1177 return node, messages
1179 def bullet(self, match, context, next_state):
1180 """Bullet list item."""
1181 bulletlist = nodes.bullet_list()
1182 self.parent += bulletlist
1183 bulletlist['bullet'] = match.string[0]
1184 i, blank_finish = self.list_item(match.end())
1185 bulletlist += i
1186 offset = self.state_machine.line_offset + 1 # next line
1187 new_line_offset, blank_finish = self.nested_list_parse(
1188 self.state_machine.input_lines[offset:],
1189 input_offset=self.state_machine.abs_line_offset() + 1,
1190 node=bulletlist, initial_state='BulletList',
1191 blank_finish=blank_finish)
1192 self.goto_line(new_line_offset)
1193 if not blank_finish:
1194 self.parent += self.unindent_warning('Bullet list')
1195 return [], next_state, []
1197 def list_item(self, indent):
1198 if self.state_machine.line[indent:]:
1199 indented, line_offset, blank_finish = (
1200 self.state_machine.get_known_indented(indent))
1201 else:
1202 indented, indent, line_offset, blank_finish = (
1203 self.state_machine.get_first_known_indented(indent))
1204 listitem = nodes.list_item('\n'.join(indented))
1205 if indented:
1206 self.nested_parse(indented, input_offset=line_offset,
1207 node=listitem)
1208 return listitem, blank_finish
1210 def enumerator(self, match, context, next_state):
1211 """Enumerated List Item"""
1212 format, sequence, text, ordinal = self.parse_enumerator(match)
1213 if not self.is_enumerated_list_item(ordinal, sequence, format):
1214 raise statemachine.TransitionCorrection('text')
1215 enumlist = nodes.enumerated_list()
1216 self.parent += enumlist
1217 if sequence == '#':
1218 enumlist['enumtype'] = 'arabic'
1219 else:
1220 enumlist['enumtype'] = sequence
1221 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1222 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1223 if ordinal != 1:
1224 enumlist['start'] = ordinal
1225 msg = self.reporter.info(
1226 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1227 % (text, ordinal), line=self.state_machine.abs_line_number())
1228 self.parent += msg
1229 listitem, blank_finish = self.list_item(match.end())
1230 enumlist += listitem
1231 offset = self.state_machine.line_offset + 1 # next line
1232 newline_offset, blank_finish = self.nested_list_parse(
1233 self.state_machine.input_lines[offset:],
1234 input_offset=self.state_machine.abs_line_offset() + 1,
1235 node=enumlist, initial_state='EnumeratedList',
1236 blank_finish=blank_finish,
1237 extra_settings={'lastordinal': ordinal,
1238 'format': format,
1239 'auto': sequence == '#'})
1240 self.goto_line(newline_offset)
1241 if not blank_finish:
1242 self.parent += self.unindent_warning('Enumerated list')
1243 return [], next_state, []
1245 def parse_enumerator(self, match, expected_sequence=None):
1247 Analyze an enumerator and return the results.
1249 :Return:
1250 - the enumerator format ('period', 'parens', or 'rparen'),
1251 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1252 - the text of the enumerator, stripped of formatting, and
1253 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1254 ``None`` is returned for invalid enumerator text).
1256 The enumerator format has already been determined by the regular
1257 expression match. If `expected_sequence` is given, that sequence is
1258 tried first. If not, we check for Roman numeral 1. This way,
1259 single-character Roman numerals (which are also alphabetical) can be
1260 matched. If no sequence has been matched, all sequences are checked in
1261 order.
1263 groupdict = match.groupdict()
1264 sequence = ''
1265 for format in self.enum.formats:
1266 if groupdict[format]: # was this the format matched?
1267 break # yes; keep `format`
1268 else: # shouldn't happen
1269 raise ParserError('enumerator format not matched')
1270 text = groupdict[format][self.enum.formatinfo[format].start
1271 :self.enum.formatinfo[format].end]
1272 if text == '#':
1273 sequence = '#'
1274 elif expected_sequence:
1275 try:
1276 if self.enum.sequenceregexps[expected_sequence].match(text):
1277 sequence = expected_sequence
1278 except KeyError: # shouldn't happen
1279 raise ParserError('unknown enumerator sequence: %s'
1280 % sequence)
1281 elif text == 'i':
1282 sequence = 'lowerroman'
1283 elif text == 'I':
1284 sequence = 'upperroman'
1285 if not sequence:
1286 for sequence in self.enum.sequences:
1287 if self.enum.sequenceregexps[sequence].match(text):
1288 break
1289 else: # shouldn't happen
1290 raise ParserError('enumerator sequence not matched')
1291 if sequence == '#':
1292 ordinal = 1
1293 else:
1294 try:
1295 ordinal = self.enum.converters[sequence](text)
1296 except roman.InvalidRomanNumeralError:
1297 ordinal = None
1298 return format, sequence, text, ordinal
1300 def is_enumerated_list_item(self, ordinal, sequence, format):
1302 Check validity based on the ordinal value and the second line.
1304 Return true iff the ordinal is valid and the second line is blank,
1305 indented, or starts with the next enumerator or an auto-enumerator.
1307 if ordinal is None:
1308 return None
1309 try:
1310 next_line = self.state_machine.next_line()
1311 except EOFError: # end of input lines
1312 self.state_machine.previous_line()
1313 return 1
1314 else:
1315 self.state_machine.previous_line()
1316 if not next_line[:1].strip(): # blank or indented
1317 return 1
1318 result = self.make_enumerator(ordinal + 1, sequence, format)
1319 if result:
1320 next_enumerator, auto_enumerator = result
1321 try:
1322 if ( next_line.startswith(next_enumerator) or
1323 next_line.startswith(auto_enumerator) ):
1324 return 1
1325 except TypeError:
1326 pass
1327 return None
1329 def make_enumerator(self, ordinal, sequence, format):
1331 Construct and return the next enumerated list item marker, and an
1332 auto-enumerator ("#" instead of the regular enumerator).
1334 Return ``None`` for invalid (out of range) ordinals.
1335 """ #"
1336 if sequence == '#':
1337 enumerator = '#'
1338 elif sequence == 'arabic':
1339 enumerator = str(ordinal)
1340 else:
1341 if sequence.endswith('alpha'):
1342 if ordinal > 26:
1343 return None
1344 enumerator = chr(ordinal + ord('a') - 1)
1345 elif sequence.endswith('roman'):
1346 try:
1347 enumerator = roman.toRoman(ordinal)
1348 except roman.RomanError:
1349 return None
1350 else: # shouldn't happen
1351 raise ParserError('unknown enumerator sequence: "%s"'
1352 % sequence)
1353 if sequence.startswith('lower'):
1354 enumerator = enumerator.lower()
1355 elif sequence.startswith('upper'):
1356 enumerator = enumerator.upper()
1357 else: # shouldn't happen
1358 raise ParserError('unknown enumerator sequence: "%s"'
1359 % sequence)
1360 formatinfo = self.enum.formatinfo[format]
1361 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1362 + ' ')
1363 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1364 return next_enumerator, auto_enumerator
1366 def field_marker(self, match, context, next_state):
1367 """Field list item."""
1368 field_list = nodes.field_list()
1369 self.parent += field_list
1370 field, blank_finish = self.field(match)
1371 field_list += field
1372 offset = self.state_machine.line_offset + 1 # next line
1373 newline_offset, blank_finish = self.nested_list_parse(
1374 self.state_machine.input_lines[offset:],
1375 input_offset=self.state_machine.abs_line_offset() + 1,
1376 node=field_list, initial_state='FieldList',
1377 blank_finish=blank_finish)
1378 self.goto_line(newline_offset)
1379 if not blank_finish:
1380 self.parent += self.unindent_warning('Field list')
1381 return [], next_state, []
1383 def field(self, match):
1384 name = self.parse_field_marker(match)
1385 lineno = self.state_machine.abs_line_number()
1386 indented, indent, line_offset, blank_finish = \
1387 self.state_machine.get_first_known_indented(match.end())
1388 field_node = nodes.field()
1389 field_node.line = lineno
1390 name_nodes, name_messages = self.inline_text(name, lineno)
1391 field_node += nodes.field_name(name, '', *name_nodes)
1392 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1393 field_node += field_body
1394 if indented:
1395 self.parse_field_body(indented, line_offset, field_body)
1396 return field_node, blank_finish
1398 def parse_field_marker(self, match):
1399 """Extract & return field name from a field marker match."""
1400 field = match.group()[1:] # strip off leading ':'
1401 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1402 return field
1404 def parse_field_body(self, indented, offset, node):
1405 self.nested_parse(indented, input_offset=offset, node=node)
1407 def option_marker(self, match, context, next_state):
1408 """Option list item."""
1409 optionlist = nodes.option_list()
1410 try:
1411 listitem, blank_finish = self.option_list_item(match)
1412 except MarkupError, (message, lineno):
1413 # This shouldn't happen; pattern won't match.
1414 msg = self.reporter.error(
1415 'Invalid option list marker: %s' % message, line=lineno)
1416 self.parent += msg
1417 indented, indent, line_offset, blank_finish = \
1418 self.state_machine.get_first_known_indented(match.end())
1419 elements = self.block_quote(indented, line_offset)
1420 self.parent += elements
1421 if not blank_finish:
1422 self.parent += self.unindent_warning('Option list')
1423 return [], next_state, []
1424 self.parent += optionlist
1425 optionlist += listitem
1426 offset = self.state_machine.line_offset + 1 # next line
1427 newline_offset, blank_finish = self.nested_list_parse(
1428 self.state_machine.input_lines[offset:],
1429 input_offset=self.state_machine.abs_line_offset() + 1,
1430 node=optionlist, initial_state='OptionList',
1431 blank_finish=blank_finish)
1432 self.goto_line(newline_offset)
1433 if not blank_finish:
1434 self.parent += self.unindent_warning('Option list')
1435 return [], next_state, []
1437 def option_list_item(self, match):
1438 offset = self.state_machine.abs_line_offset()
1439 options = self.parse_option_marker(match)
1440 indented, indent, line_offset, blank_finish = \
1441 self.state_machine.get_first_known_indented(match.end())
1442 if not indented: # not an option list item
1443 self.goto_line(offset)
1444 raise statemachine.TransitionCorrection('text')
1445 option_group = nodes.option_group('', *options)
1446 description = nodes.description('\n'.join(indented))
1447 option_list_item = nodes.option_list_item('', option_group,
1448 description)
1449 if indented:
1450 self.nested_parse(indented, input_offset=line_offset,
1451 node=description)
1452 return option_list_item, blank_finish
1454 def parse_option_marker(self, match):
1456 Return a list of `node.option` and `node.option_argument` objects,
1457 parsed from an option marker match.
1459 :Exception: `MarkupError` for invalid option markers.
1461 optlist = []
1462 optionstrings = match.group().rstrip().split(', ')
1463 for optionstring in optionstrings:
1464 tokens = optionstring.split()
1465 delimiter = ' '
1466 firstopt = tokens[0].split('=')
1467 if len(firstopt) > 1:
1468 # "--opt=value" form
1469 tokens[:1] = firstopt
1470 delimiter = '='
1471 elif (len(tokens[0]) > 2
1472 and ((tokens[0].startswith('-')
1473 and not tokens[0].startswith('--'))
1474 or tokens[0].startswith('+'))):
1475 # "-ovalue" form
1476 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1477 delimiter = ''
1478 if len(tokens) > 1 and (tokens[1].startswith('<')
1479 and tokens[-1].endswith('>')):
1480 # "-o <value1 value2>" form; join all values into one token
1481 tokens[1:] = [' '.join(tokens[1:])]
1482 if 0 < len(tokens) <= 2:
1483 option = nodes.option(optionstring)
1484 option += nodes.option_string(tokens[0], tokens[0])
1485 if len(tokens) > 1:
1486 option += nodes.option_argument(tokens[1], tokens[1],
1487 delimiter=delimiter)
1488 optlist.append(option)
1489 else:
1490 raise MarkupError(
1491 'wrong number of option tokens (=%s), should be 1 or 2: '
1492 '"%s"' % (len(tokens), optionstring),
1493 self.state_machine.abs_line_number() + 1)
1494 return optlist
1496 def doctest(self, match, context, next_state):
1497 data = '\n'.join(self.state_machine.get_text_block())
1498 self.parent += nodes.doctest_block(data, data)
1499 return [], next_state, []
1501 def line_block(self, match, context, next_state):
1502 """First line of a line block."""
1503 block = nodes.line_block()
1504 self.parent += block
1505 lineno = self.state_machine.abs_line_number()
1506 line, messages, blank_finish = self.line_block_line(match, lineno)
1507 block += line
1508 self.parent += messages
1509 if not blank_finish:
1510 offset = self.state_machine.line_offset + 1 # next line
1511 new_line_offset, blank_finish = self.nested_list_parse(
1512 self.state_machine.input_lines[offset:],
1513 input_offset=self.state_machine.abs_line_offset() + 1,
1514 node=block, initial_state='LineBlock',
1515 blank_finish=0)
1516 self.goto_line(new_line_offset)
1517 if not blank_finish:
1518 self.parent += self.reporter.warning(
1519 'Line block ends without a blank line.',
1520 line=(self.state_machine.abs_line_number() + 1))
1521 if len(block):
1522 if block[0].indent is None:
1523 block[0].indent = 0
1524 self.nest_line_block_lines(block)
1525 return [], next_state, []
1527 def line_block_line(self, match, lineno):
1528 """Return one line element of a line_block."""
1529 indented, indent, line_offset, blank_finish = \
1530 self.state_machine.get_first_known_indented(match.end(),
1531 until_blank=1)
1532 text = u'\n'.join(indented)
1533 text_nodes, messages = self.inline_text(text, lineno)
1534 line = nodes.line(text, '', *text_nodes)
1535 if match.string.rstrip() != '|': # not empty
1536 line.indent = len(match.group(1)) - 1
1537 return line, messages, blank_finish
1539 def nest_line_block_lines(self, block):
1540 for index in range(1, len(block)):
1541 if block[index].indent is None:
1542 block[index].indent = block[index - 1].indent
1543 self.nest_line_block_segment(block)
1545 def nest_line_block_segment(self, block):
1546 indents = [item.indent for item in block]
1547 least = min(indents)
1548 new_items = []
1549 new_block = nodes.line_block()
1550 for item in block:
1551 if item.indent > least:
1552 new_block.append(item)
1553 else:
1554 if len(new_block):
1555 self.nest_line_block_segment(new_block)
1556 new_items.append(new_block)
1557 new_block = nodes.line_block()
1558 new_items.append(item)
1559 if len(new_block):
1560 self.nest_line_block_segment(new_block)
1561 new_items.append(new_block)
1562 block[:] = new_items
1564 def grid_table_top(self, match, context, next_state):
1565 """Top border of a full table."""
1566 return self.table_top(match, context, next_state,
1567 self.isolate_grid_table,
1568 tableparser.GridTableParser)
1570 def simple_table_top(self, match, context, next_state):
1571 """Top border of a simple table."""
1572 return self.table_top(match, context, next_state,
1573 self.isolate_simple_table,
1574 tableparser.SimpleTableParser)
1576 def table_top(self, match, context, next_state,
1577 isolate_function, parser_class):
1578 """Top border of a generic table."""
1579 nodelist, blank_finish = self.table(isolate_function, parser_class)
1580 self.parent += nodelist
1581 if not blank_finish:
1582 msg = self.reporter.warning(
1583 'Blank line required after table.',
1584 line=self.state_machine.abs_line_number() + 1)
1585 self.parent += msg
1586 return [], next_state, []
1588 def table(self, isolate_function, parser_class):
1589 """Parse a table."""
1590 block, messages, blank_finish = isolate_function()
1591 if block:
1592 try:
1593 parser = parser_class()
1594 tabledata = parser.parse(block)
1595 tableline = (self.state_machine.abs_line_number() - len(block)
1596 + 1)
1597 table = self.build_table(tabledata, tableline)
1598 nodelist = [table] + messages
1599 except tableparser.TableMarkupError, detail:
1600 nodelist = self.malformed_table(
1601 block, ' '.join(detail.args)) + messages
1602 else:
1603 nodelist = messages
1604 return nodelist, blank_finish
1606 def isolate_grid_table(self):
1607 messages = []
1608 blank_finish = 1
1609 try:
1610 block = self.state_machine.get_text_block(flush_left=1)
1611 except statemachine.UnexpectedIndentationError, instance:
1612 block, source, lineno = instance.args
1613 messages.append(self.reporter.error('Unexpected indentation.',
1614 source=source, line=lineno))
1615 blank_finish = 0
1616 block.disconnect()
1617 # for East Asian chars:
1618 block.pad_double_width(self.double_width_pad_char)
1619 width = len(block[0].strip())
1620 for i in range(len(block)):
1621 block[i] = block[i].strip()
1622 if block[i][0] not in '+|': # check left edge
1623 blank_finish = 0
1624 self.state_machine.previous_line(len(block) - i)
1625 del block[i:]
1626 break
1627 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1628 blank_finish = 0
1629 # from second-last to third line of table:
1630 for i in range(len(block) - 2, 1, -1):
1631 if self.grid_table_top_pat.match(block[i]):
1632 self.state_machine.previous_line(len(block) - i + 1)
1633 del block[i+1:]
1634 break
1635 else:
1636 messages.extend(self.malformed_table(block))
1637 return [], messages, blank_finish
1638 for i in range(len(block)): # check right edge
1639 if len(block[i]) != width or block[i][-1] not in '+|':
1640 messages.extend(self.malformed_table(block))
1641 return [], messages, blank_finish
1642 return block, messages, blank_finish
1644 def isolate_simple_table(self):
1645 start = self.state_machine.line_offset
1646 lines = self.state_machine.input_lines
1647 limit = len(lines) - 1
1648 toplen = len(lines[start].strip())
1649 pattern_match = self.simple_table_border_pat.match
1650 found = 0
1651 found_at = None
1652 i = start + 1
1653 while i <= limit:
1654 line = lines[i]
1655 match = pattern_match(line)
1656 if match:
1657 if len(line.strip()) != toplen:
1658 self.state_machine.next_line(i - start)
1659 messages = self.malformed_table(
1660 lines[start:i+1], 'Bottom/header table border does '
1661 'not match top border.')
1662 return [], messages, i == limit or not lines[i+1].strip()
1663 found += 1
1664 found_at = i
1665 if found == 2 or i == limit or not lines[i+1].strip():
1666 end = i
1667 break
1668 i += 1
1669 else: # reached end of input_lines
1670 if found:
1671 extra = ' or no blank line after table bottom'
1672 self.state_machine.next_line(found_at - start)
1673 block = lines[start:found_at+1]
1674 else:
1675 extra = ''
1676 self.state_machine.next_line(i - start - 1)
1677 block = lines[start:]
1678 messages = self.malformed_table(
1679 block, 'No bottom table border found%s.' % extra)
1680 return [], messages, not extra
1681 self.state_machine.next_line(end - start)
1682 block = lines[start:end+1]
1683 # for East Asian chars:
1684 block.pad_double_width(self.double_width_pad_char)
1685 return block, [], end == limit or not lines[end+1].strip()
1687 def malformed_table(self, block, detail=''):
1688 block.replace(self.double_width_pad_char, '')
1689 data = '\n'.join(block)
1690 message = 'Malformed table.'
1691 lineno = self.state_machine.abs_line_number() - len(block) + 1
1692 if detail:
1693 message += '\n' + detail
1694 error = self.reporter.error(message, nodes.literal_block(data, data),
1695 line=lineno)
1696 return [error]
1698 def build_table(self, tabledata, tableline, stub_columns=0):
1699 colwidths, headrows, bodyrows = tabledata
1700 table = nodes.table()
1701 tgroup = nodes.tgroup(cols=len(colwidths))
1702 table += tgroup
1703 for colwidth in colwidths:
1704 colspec = nodes.colspec(colwidth=colwidth)
1705 if stub_columns:
1706 colspec.attributes['stub'] = 1
1707 stub_columns -= 1
1708 tgroup += colspec
1709 if headrows:
1710 thead = nodes.thead()
1711 tgroup += thead
1712 for row in headrows:
1713 thead += self.build_table_row(row, tableline)
1714 tbody = nodes.tbody()
1715 tgroup += tbody
1716 for row in bodyrows:
1717 tbody += self.build_table_row(row, tableline)
1718 return table
1720 def build_table_row(self, rowdata, tableline):
1721 row = nodes.row()
1722 for cell in rowdata:
1723 if cell is None:
1724 continue
1725 morerows, morecols, offset, cellblock = cell
1726 attributes = {}
1727 if morerows:
1728 attributes['morerows'] = morerows
1729 if morecols:
1730 attributes['morecols'] = morecols
1731 entry = nodes.entry(**attributes)
1732 row += entry
1733 if ''.join(cellblock):
1734 self.nested_parse(cellblock, input_offset=tableline+offset,
1735 node=entry)
1736 return row
1739 explicit = Struct()
1740 """Patterns and constants used for explicit markup recognition."""
1742 explicit.patterns = Struct(
1743 target=re.compile(r"""
1745 _ # anonymous target
1746 | # *OR*
1747 (?!_) # no underscore at the beginning
1748 (?P<quote>`?) # optional open quote
1749 (?![ `]) # first char. not space or
1750 # backquote
1751 (?P<name> # reference name
1754 %(non_whitespace_escape_before)s
1755 (?P=quote) # close quote if open quote used
1757 (?<!(?<!\x00):) # no unescaped colon at end
1758 %(non_whitespace_escape_before)s
1759 [ ]? # optional space
1760 : # end of reference name
1761 ([ ]+|$) # followed by whitespace
1762 """ % vars(Inliner), re.VERBOSE),
1763 reference=re.compile(r"""
1765 (?P<simple>%(simplename)s)_
1766 | # *OR*
1767 ` # open backquote
1768 (?![ ]) # not space
1769 (?P<phrase>.+?) # hyperlink phrase
1770 %(non_whitespace_escape_before)s
1771 `_ # close backquote,
1772 # reference mark
1774 $ # end of string
1775 """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1776 substitution=re.compile(r"""
1778 (?![ ]) # first char. not space
1779 (?P<name>.+?) # substitution text
1780 %(non_whitespace_escape_before)s
1781 \| # close delimiter
1783 ([ ]+|$) # followed by whitespace
1784 """ % vars(Inliner), re.VERBOSE),)
1786 def footnote(self, match):
1787 lineno = self.state_machine.abs_line_number()
1788 indented, indent, offset, blank_finish = \
1789 self.state_machine.get_first_known_indented(match.end())
1790 label = match.group(1)
1791 name = normalize_name(label)
1792 footnote = nodes.footnote('\n'.join(indented))
1793 footnote.line = lineno
1794 if name[0] == '#': # auto-numbered
1795 name = name[1:] # autonumber label
1796 footnote['auto'] = 1
1797 if name:
1798 footnote['names'].append(name)
1799 self.document.note_autofootnote(footnote)
1800 elif name == '*': # auto-symbol
1801 name = ''
1802 footnote['auto'] = '*'
1803 self.document.note_symbol_footnote(footnote)
1804 else: # manually numbered
1805 footnote += nodes.label('', label)
1806 footnote['names'].append(name)
1807 self.document.note_footnote(footnote)
1808 if name:
1809 self.document.note_explicit_target(footnote, footnote)
1810 else:
1811 self.document.set_id(footnote, footnote)
1812 if indented:
1813 self.nested_parse(indented, input_offset=offset, node=footnote)
1814 return [footnote], blank_finish
1816 def citation(self, match):
1817 lineno = self.state_machine.abs_line_number()
1818 indented, indent, offset, blank_finish = \
1819 self.state_machine.get_first_known_indented(match.end())
1820 label = match.group(1)
1821 name = normalize_name(label)
1822 citation = nodes.citation('\n'.join(indented))
1823 citation.line = lineno
1824 citation += nodes.label('', label)
1825 citation['names'].append(name)
1826 self.document.note_citation(citation)
1827 self.document.note_explicit_target(citation, citation)
1828 if indented:
1829 self.nested_parse(indented, input_offset=offset, node=citation)
1830 return [citation], blank_finish
1832 def hyperlink_target(self, match):
1833 pattern = self.explicit.patterns.target
1834 lineno = self.state_machine.abs_line_number()
1835 block, indent, offset, blank_finish = \
1836 self.state_machine.get_first_known_indented(
1837 match.end(), until_blank=1, strip_indent=0)
1838 blocktext = match.string[:match.end()] + '\n'.join(block)
1839 block = [escape2null(line) for line in block]
1840 escaped = block[0]
1841 blockindex = 0
1842 while 1:
1843 targetmatch = pattern.match(escaped)
1844 if targetmatch:
1845 break
1846 blockindex += 1
1847 try:
1848 escaped += block[blockindex]
1849 except IndexError:
1850 raise MarkupError('malformed hyperlink target.', lineno)
1851 del block[:blockindex]
1852 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1853 target = self.make_target(block, blocktext, lineno,
1854 targetmatch.group('name'))
1855 return [target], blank_finish
1857 def make_target(self, block, block_text, lineno, target_name):
1858 target_type, data = self.parse_target(block, block_text, lineno)
1859 if target_type == 'refname':
1860 target = nodes.target(block_text, '', refname=normalize_name(data))
1861 target.indirect_reference_name = data
1862 self.add_target(target_name, '', target, lineno)
1863 self.document.note_indirect_target(target)
1864 return target
1865 elif target_type == 'refuri':
1866 target = nodes.target(block_text, '')
1867 self.add_target(target_name, data, target, lineno)
1868 return target
1869 else:
1870 return data
1872 def parse_target(self, block, block_text, lineno):
1874 Determine the type of reference of a target.
1876 :Return: A 2-tuple, one of:
1878 - 'refname' and the indirect reference name
1879 - 'refuri' and the URI
1880 - 'malformed' and a system_message node
1882 if block and block[-1].strip()[-1:] == '_': # possible indirect target
1883 reference = ' '.join([line.strip() for line in block])
1884 refname = self.is_reference(reference)
1885 if refname:
1886 return 'refname', refname
1887 reference = ''.join([''.join(line.split()) for line in block])
1888 return 'refuri', unescape(reference)
1890 def is_reference(self, reference):
1891 match = self.explicit.patterns.reference.match(
1892 whitespace_normalize_name(reference))
1893 if not match:
1894 return None
1895 return unescape(match.group('simple') or match.group('phrase'))
1897 def add_target(self, targetname, refuri, target, lineno):
1898 target.line = lineno
1899 if targetname:
1900 name = normalize_name(unescape(targetname))
1901 target['names'].append(name)
1902 if refuri:
1903 uri = self.inliner.adjust_uri(refuri)
1904 if uri:
1905 target['refuri'] = uri
1906 else:
1907 raise ApplicationError('problem with URI: %r' % refuri)
1908 self.document.note_explicit_target(target, self.parent)
1909 else: # anonymous target
1910 if refuri:
1911 target['refuri'] = refuri
1912 target['anonymous'] = 1
1913 self.document.note_anonymous_target(target)
1915 def substitution_def(self, match):
1916 pattern = self.explicit.patterns.substitution
1917 lineno = self.state_machine.abs_line_number()
1918 block, indent, offset, blank_finish = \
1919 self.state_machine.get_first_known_indented(match.end(),
1920 strip_indent=0)
1921 blocktext = (match.string[:match.end()] + '\n'.join(block))
1922 block.disconnect()
1923 escaped = escape2null(block[0].rstrip())
1924 blockindex = 0
1925 while 1:
1926 subdefmatch = pattern.match(escaped)
1927 if subdefmatch:
1928 break
1929 blockindex += 1
1930 try:
1931 escaped = escaped + ' ' + escape2null(block[blockindex].strip())
1932 except IndexError:
1933 raise MarkupError('malformed substitution definition.',
1934 lineno)
1935 del block[:blockindex] # strip out the substitution marker
1936 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
1937 if not block[0]:
1938 del block[0]
1939 offset += 1
1940 while block and not block[-1].strip():
1941 block.pop()
1942 subname = subdefmatch.group('name')
1943 substitution_node = nodes.substitution_definition(blocktext)
1944 substitution_node.line = lineno
1945 if not block:
1946 msg = self.reporter.warning(
1947 'Substitution definition "%s" missing contents.' % subname,
1948 nodes.literal_block(blocktext, blocktext), line=lineno)
1949 return [msg], blank_finish
1950 block[0] = block[0].strip()
1951 substitution_node['names'].append(
1952 nodes.whitespace_normalize_name(subname))
1953 new_abs_offset, blank_finish = self.nested_list_parse(
1954 block, input_offset=offset, node=substitution_node,
1955 initial_state='SubstitutionDef', blank_finish=blank_finish)
1956 i = 0
1957 for node in substitution_node[:]:
1958 if not (isinstance(node, nodes.Inline) or
1959 isinstance(node, nodes.Text)):
1960 self.parent += substitution_node[i]
1961 del substitution_node[i]
1962 else:
1963 i += 1
1964 for node in substitution_node.traverse(nodes.Element):
1965 if self.disallowed_inside_substitution_definitions(node):
1966 pformat = nodes.literal_block('', node.pformat().rstrip())
1967 msg = self.reporter.error(
1968 'Substitution definition contains illegal element:',
1969 pformat, nodes.literal_block(blocktext, blocktext),
1970 line=lineno)
1971 return [msg], blank_finish
1972 if len(substitution_node) == 0:
1973 msg = self.reporter.warning(
1974 'Substitution definition "%s" empty or invalid.'
1975 % subname,
1976 nodes.literal_block(blocktext, blocktext), line=lineno)
1977 return [msg], blank_finish
1978 self.document.note_substitution_def(
1979 substitution_node, subname, self.parent)
1980 return [substitution_node], blank_finish
1982 def disallowed_inside_substitution_definitions(self, node):
1983 if (node['ids'] or
1984 isinstance(node, nodes.reference) and node.get('anonymous') or
1985 isinstance(node, nodes.footnote_reference) and node.get('auto')):
1986 return 1
1987 else:
1988 return 0
1990 def directive(self, match, **option_presets):
1991 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1992 type_name = match.group(1)
1993 directive_class, messages = directives.directive(
1994 type_name, self.memo.language, self.document)
1995 self.parent += messages
1996 if directive_class:
1997 return self.run_directive(
1998 directive_class, match, type_name, option_presets)
1999 else:
2000 return self.unknown_directive(type_name)
2002 def run_directive(self, directive, match, type_name, option_presets):
2004 Parse a directive then run its directive function.
2006 Parameters:
2008 - `directive`: The class implementing the directive. Must be
2009 a subclass of `rst.Directive`.
2011 - `match`: A regular expression match object which matched the first
2012 line of the directive.
2014 - `type_name`: The directive name, as used in the source text.
2016 - `option_presets`: A dictionary of preset options, defaults for the
2017 directive options. Currently, only an "alt" option is passed by
2018 substitution definitions (value: the substitution name), which may
2019 be used by an embedded image directive.
2021 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2023 if isinstance(directive, (FunctionType, MethodType)):
2024 from docutils.parsers.rst import convert_directive_function
2025 directive = convert_directive_function(directive)
2026 lineno = self.state_machine.abs_line_number()
2027 initial_line_offset = self.state_machine.line_offset
2028 indented, indent, line_offset, blank_finish \
2029 = self.state_machine.get_first_known_indented(match.end(),
2030 strip_top=0)
2031 block_text = '\n'.join(self.state_machine.input_lines[
2032 initial_line_offset : self.state_machine.line_offset + 1])
2033 try:
2034 arguments, options, content, content_offset = (
2035 self.parse_directive_block(indented, line_offset,
2036 directive, option_presets))
2037 except MarkupError, detail:
2038 error = self.reporter.error(
2039 'Error in "%s" directive:\n%s.' % (type_name,
2040 ' '.join(detail.args)),
2041 nodes.literal_block(block_text, block_text), line=lineno)
2042 return [error], blank_finish
2043 directive_instance = directive(
2044 type_name, arguments, options, content, lineno,
2045 content_offset, block_text, self, self.state_machine)
2046 try:
2047 result = directive_instance.run()
2048 except docutils.parsers.rst.DirectiveError, directive_error:
2049 msg_node = self.reporter.system_message(directive_error.level,
2050 directive_error.message)
2051 msg_node += nodes.literal_block(block_text, block_text)
2052 msg_node['line'] = lineno
2053 result = [msg_node]
2054 assert isinstance(result, list), \
2055 'Directive "%s" must return a list of nodes.' % type_name
2056 for i in range(len(result)):
2057 assert isinstance(result[i], nodes.Node), \
2058 ('Directive "%s" returned non-Node object (index %s): %r'
2059 % (type_name, i, result[i]))
2060 return (result,
2061 blank_finish or self.state_machine.is_next_line_blank())
2063 def parse_directive_block(self, indented, line_offset, directive,
2064 option_presets):
2065 option_spec = directive.option_spec
2066 has_content = directive.has_content
2067 if indented and not indented[0].strip():
2068 indented.trim_start()
2069 line_offset += 1
2070 while indented and not indented[-1].strip():
2071 indented.trim_end()
2072 if indented and (directive.required_arguments
2073 or directive.optional_arguments
2074 or option_spec):
2075 for i in range(len(indented)):
2076 if not indented[i].strip():
2077 break
2078 else:
2079 i += 1
2080 arg_block = indented[:i]
2081 content = indented[i+1:]
2082 content_offset = line_offset + i + 1
2083 else:
2084 content = indented
2085 content_offset = line_offset
2086 arg_block = []
2087 while content and not content[0].strip():
2088 content.trim_start()
2089 content_offset += 1
2090 if option_spec:
2091 options, arg_block = self.parse_directive_options(
2092 option_presets, option_spec, arg_block)
2093 if arg_block and not (directive.required_arguments
2094 or directive.optional_arguments):
2095 raise MarkupError('no arguments permitted; blank line '
2096 'required before content block')
2097 else:
2098 options = {}
2099 if directive.required_arguments or directive.optional_arguments:
2100 arguments = self.parse_directive_arguments(
2101 directive, arg_block)
2102 else:
2103 arguments = []
2104 if content and not has_content:
2105 raise MarkupError('no content permitted')
2106 return (arguments, options, content, content_offset)
2108 def parse_directive_options(self, option_presets, option_spec, arg_block):
2109 options = option_presets.copy()
2110 for i in range(len(arg_block)):
2111 if arg_block[i][:1] == ':':
2112 opt_block = arg_block[i:]
2113 arg_block = arg_block[:i]
2114 break
2115 else:
2116 opt_block = []
2117 if opt_block:
2118 success, data = self.parse_extension_options(option_spec,
2119 opt_block)
2120 if success: # data is a dict of options
2121 options.update(data)
2122 else: # data is an error string
2123 raise MarkupError(data)
2124 return options, arg_block
2126 def parse_directive_arguments(self, directive, arg_block):
2127 required = directive.required_arguments
2128 optional = directive.optional_arguments
2129 arg_text = '\n'.join(arg_block)
2130 arguments = arg_text.split()
2131 if len(arguments) < required:
2132 raise MarkupError('%s argument(s) required, %s supplied'
2133 % (required, len(arguments)))
2134 elif len(arguments) > required + optional:
2135 if directive.final_argument_whitespace:
2136 arguments = arg_text.split(None, required + optional - 1)
2137 else:
2138 raise MarkupError(
2139 'maximum %s argument(s) allowed, %s supplied'
2140 % (required + optional, len(arguments)))
2141 return arguments
2143 def parse_extension_options(self, option_spec, datalines):
2145 Parse `datalines` for a field list containing extension options
2146 matching `option_spec`.
2148 :Parameters:
2149 - `option_spec`: a mapping of option name to conversion
2150 function, which should raise an exception on bad input.
2151 - `datalines`: a list of input strings.
2153 :Return:
2154 - Success value, 1 or 0.
2155 - An option dictionary on success, an error string on failure.
2157 node = nodes.field_list()
2158 newline_offset, blank_finish = self.nested_list_parse(
2159 datalines, 0, node, initial_state='ExtensionOptions',
2160 blank_finish=1)
2161 if newline_offset != len(datalines): # incomplete parse of block
2162 return 0, 'invalid option block'
2163 try:
2164 options = utils.extract_extension_options(node, option_spec)
2165 except KeyError, detail:
2166 return 0, ('unknown option: "%s"' % detail.args[0])
2167 except (ValueError, TypeError), detail:
2168 return 0, ('invalid option value: %s' % ' '.join(detail.args))
2169 except utils.ExtensionOptionError, detail:
2170 return 0, ('invalid option data: %s' % ' '.join(detail.args))
2171 if blank_finish:
2172 return 1, options
2173 else:
2174 return 0, 'option data incompletely parsed'
2176 def unknown_directive(self, type_name):
2177 lineno = self.state_machine.abs_line_number()
2178 indented, indent, offset, blank_finish = \
2179 self.state_machine.get_first_known_indented(0, strip_indent=0)
2180 text = '\n'.join(indented)
2181 error = self.reporter.error(
2182 'Unknown directive type "%s".' % type_name,
2183 nodes.literal_block(text, text), line=lineno)
2184 return [error], blank_finish
2186 def comment(self, match):
2187 if not match.string[match.end():].strip() \
2188 and self.state_machine.is_next_line_blank(): # an empty comment?
2189 return [nodes.comment()], 1 # "A tiny but practical wart."
2190 indented, indent, offset, blank_finish = \
2191 self.state_machine.get_first_known_indented(match.end())
2192 while indented and not indented[-1].strip():
2193 indented.trim_end()
2194 text = '\n'.join(indented)
2195 return [nodes.comment(text, text)], blank_finish
2197 explicit.constructs = [
2198 (footnote,
2199 re.compile(r"""
2200 \.\.[ ]+ # explicit markup start
2202 ( # footnote label:
2203 [0-9]+ # manually numbered footnote
2204 | # *OR*
2205 \# # anonymous auto-numbered footnote
2206 | # *OR*
2207 \#%s # auto-number ed?) footnote label
2208 | # *OR*
2209 \* # auto-symbol footnote
2212 ([ ]+|$) # whitespace or end of line
2213 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2214 (citation,
2215 re.compile(r"""
2216 \.\.[ ]+ # explicit markup start
2217 \[(%s)\] # citation label
2218 ([ ]+|$) # whitespace or end of line
2219 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2220 (hyperlink_target,
2221 re.compile(r"""
2222 \.\.[ ]+ # explicit markup start
2223 _ # target indicator
2224 (?![ ]|$) # first char. not space or EOL
2225 """, re.VERBOSE)),
2226 (substitution_def,
2227 re.compile(r"""
2228 \.\.[ ]+ # explicit markup start
2229 \| # substitution indicator
2230 (?![ ]|$) # first char. not space or EOL
2231 """, re.VERBOSE)),
2232 (directive,
2233 re.compile(r"""
2234 \.\.[ ]+ # explicit markup start
2235 (%s) # directive name
2236 [ ]? # optional space
2237 :: # directive delimiter
2238 ([ ]+|$) # whitespace or end of line
2239 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2241 def explicit_markup(self, match, context, next_state):
2242 """Footnotes, hyperlink targets, directives, comments."""
2243 nodelist, blank_finish = self.explicit_construct(match)
2244 self.parent += nodelist
2245 self.explicit_list(blank_finish)
2246 return [], next_state, []
2248 def explicit_construct(self, match):
2249 """Determine which explicit construct this is, parse & return it."""
2250 errors = []
2251 for method, pattern in self.explicit.constructs:
2252 expmatch = pattern.match(match.string)
2253 if expmatch:
2254 try:
2255 return method(self, expmatch)
2256 except MarkupError, error: # never reached?
2257 message, lineno = error.args
2258 errors.append(self.reporter.warning(message, line=lineno))
2259 break
2260 nodelist, blank_finish = self.comment(match)
2261 return nodelist + errors, blank_finish
2263 def explicit_list(self, blank_finish):
2265 Create a nested state machine for a series of explicit markup
2266 constructs (including anonymous hyperlink targets).
2268 offset = self.state_machine.line_offset + 1 # next line
2269 newline_offset, blank_finish = self.nested_list_parse(
2270 self.state_machine.input_lines[offset:],
2271 input_offset=self.state_machine.abs_line_offset() + 1,
2272 node=self.parent, initial_state='Explicit',
2273 blank_finish=blank_finish,
2274 match_titles=self.state_machine.match_titles)
2275 self.goto_line(newline_offset)
2276 if not blank_finish:
2277 self.parent += self.unindent_warning('Explicit markup')
2279 def anonymous(self, match, context, next_state):
2280 """Anonymous hyperlink targets."""
2281 nodelist, blank_finish = self.anonymous_target(match)
2282 self.parent += nodelist
2283 self.explicit_list(blank_finish)
2284 return [], next_state, []
2286 def anonymous_target(self, match):
2287 lineno = self.state_machine.abs_line_number()
2288 block, indent, offset, blank_finish \
2289 = self.state_machine.get_first_known_indented(match.end(),
2290 until_blank=1)
2291 blocktext = match.string[:match.end()] + '\n'.join(block)
2292 block = [escape2null(line) for line in block]
2293 target = self.make_target(block, blocktext, lineno, '')
2294 return [target], blank_finish
2296 def line(self, match, context, next_state):
2297 """Section title overline or transition marker."""
2298 if self.state_machine.match_titles:
2299 return [match.string], 'Line', []
2300 elif match.string.strip() == '::':
2301 raise statemachine.TransitionCorrection('text')
2302 elif len(match.string.strip()) < 4:
2303 msg = self.reporter.info(
2304 'Unexpected possible title overline or transition.\n'
2305 "Treating it as ordinary text because it's so short.",
2306 line=self.state_machine.abs_line_number())
2307 self.parent += msg
2308 raise statemachine.TransitionCorrection('text')
2309 else:
2310 blocktext = self.state_machine.line
2311 msg = self.reporter.severe(
2312 'Unexpected section title or transition.',
2313 nodes.literal_block(blocktext, blocktext),
2314 line=self.state_machine.abs_line_number())
2315 self.parent += msg
2316 return [], next_state, []
2318 def text(self, match, context, next_state):
2319 """Titles, definition lists, paragraphs."""
2320 return [match.string], 'Text', []
2323 class RFC2822Body(Body):
2326 RFC2822 headers are only valid as the first constructs in documents. As
2327 soon as anything else appears, the `Body` state should take over.
2330 patterns = Body.patterns.copy() # can't modify the original
2331 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2332 initial_transitions = [(name, 'Body')
2333 for name in Body.initial_transitions]
2334 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2336 def rfc2822(self, match, context, next_state):
2337 """RFC2822-style field list item."""
2338 fieldlist = nodes.field_list(classes=['rfc2822'])
2339 self.parent += fieldlist
2340 field, blank_finish = self.rfc2822_field(match)
2341 fieldlist += field
2342 offset = self.state_machine.line_offset + 1 # next line
2343 newline_offset, blank_finish = self.nested_list_parse(
2344 self.state_machine.input_lines[offset:],
2345 input_offset=self.state_machine.abs_line_offset() + 1,
2346 node=fieldlist, initial_state='RFC2822List',
2347 blank_finish=blank_finish)
2348 self.goto_line(newline_offset)
2349 if not blank_finish:
2350 self.parent += self.unindent_warning(
2351 'RFC2822-style field list')
2352 return [], next_state, []
2354 def rfc2822_field(self, match):
2355 name = match.string[:match.string.find(':')]
2356 indented, indent, line_offset, blank_finish = \
2357 self.state_machine.get_first_known_indented(match.end(),
2358 until_blank=1)
2359 fieldnode = nodes.field()
2360 fieldnode += nodes.field_name(name, name)
2361 fieldbody = nodes.field_body('\n'.join(indented))
2362 fieldnode += fieldbody
2363 if indented:
2364 self.nested_parse(indented, input_offset=line_offset,
2365 node=fieldbody)
2366 return fieldnode, blank_finish
2369 class SpecializedBody(Body):
2372 Superclass for second and subsequent compound element members. Compound
2373 elements are lists and list-like constructs.
2375 All transition methods are disabled (redefined as `invalid_input`).
2376 Override individual methods in subclasses to re-enable.
2378 For example, once an initial bullet list item, say, is recognized, the
2379 `BulletList` subclass takes over, with a "bullet_list" node as its
2380 container. Upon encountering the initial bullet list item, `Body.bullet`
2381 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2382 starts up a nested parsing session with `BulletList` as the initial state.
2383 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2384 as only bullet list items are encountered, they are parsed and inserted
2385 into the container. The first construct which is *not* a bullet list item
2386 triggers the `invalid_input` method, which ends the nested parse and
2387 closes the container. `BulletList` needs to recognize input that is
2388 invalid in the context of a bullet list, which means everything *other
2389 than* bullet list items, so it inherits the transition list created in
2390 `Body`.
2393 def invalid_input(self, match=None, context=None, next_state=None):
2394 """Not a compound element member. Abort this state machine."""
2395 self.state_machine.previous_line() # back up so parent SM can reassess
2396 raise EOFError
2398 indent = invalid_input
2399 bullet = invalid_input
2400 enumerator = invalid_input
2401 field_marker = invalid_input
2402 option_marker = invalid_input
2403 doctest = invalid_input
2404 line_block = invalid_input
2405 grid_table_top = invalid_input
2406 simple_table_top = invalid_input
2407 explicit_markup = invalid_input
2408 anonymous = invalid_input
2409 line = invalid_input
2410 text = invalid_input
2413 class BulletList(SpecializedBody):
2415 """Second and subsequent bullet_list list_items."""
2417 def bullet(self, match, context, next_state):
2418 """Bullet list item."""
2419 if match.string[0] != self.parent['bullet']:
2420 # different bullet: new list
2421 self.invalid_input()
2422 listitem, blank_finish = self.list_item(match.end())
2423 self.parent += listitem
2424 self.blank_finish = blank_finish
2425 return [], next_state, []
2428 class DefinitionList(SpecializedBody):
2430 """Second and subsequent definition_list_items."""
2432 def text(self, match, context, next_state):
2433 """Definition lists."""
2434 return [match.string], 'Definition', []
2437 class EnumeratedList(SpecializedBody):
2439 """Second and subsequent enumerated_list list_items."""
2441 def enumerator(self, match, context, next_state):
2442 """Enumerated list item."""
2443 format, sequence, text, ordinal = self.parse_enumerator(
2444 match, self.parent['enumtype'])
2445 if ( format != self.format
2446 or (sequence != '#' and (sequence != self.parent['enumtype']
2447 or self.auto
2448 or ordinal != (self.lastordinal + 1)))
2449 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2450 # different enumeration: new list
2451 self.invalid_input()
2452 if sequence == '#':
2453 self.auto = 1
2454 listitem, blank_finish = self.list_item(match.end())
2455 self.parent += listitem
2456 self.blank_finish = blank_finish
2457 self.lastordinal = ordinal
2458 return [], next_state, []
2461 class FieldList(SpecializedBody):
2463 """Second and subsequent field_list fields."""
2465 def field_marker(self, match, context, next_state):
2466 """Field list field."""
2467 field, blank_finish = self.field(match)
2468 self.parent += field
2469 self.blank_finish = blank_finish
2470 return [], next_state, []
2473 class OptionList(SpecializedBody):
2475 """Second and subsequent option_list option_list_items."""
2477 def option_marker(self, match, context, next_state):
2478 """Option list item."""
2479 try:
2480 option_list_item, blank_finish = self.option_list_item(match)
2481 except MarkupError, (message, lineno):
2482 self.invalid_input()
2483 self.parent += option_list_item
2484 self.blank_finish = blank_finish
2485 return [], next_state, []
2488 class RFC2822List(SpecializedBody, RFC2822Body):
2490 """Second and subsequent RFC2822-style field_list fields."""
2492 patterns = RFC2822Body.patterns
2493 initial_transitions = RFC2822Body.initial_transitions
2495 def rfc2822(self, match, context, next_state):
2496 """RFC2822-style field list item."""
2497 field, blank_finish = self.rfc2822_field(match)
2498 self.parent += field
2499 self.blank_finish = blank_finish
2500 return [], 'RFC2822List', []
2502 blank = SpecializedBody.invalid_input
2505 class ExtensionOptions(FieldList):
2508 Parse field_list fields for extension options.
2510 No nested parsing is done (including inline markup parsing).
2513 def parse_field_body(self, indented, offset, node):
2514 """Override `Body.parse_field_body` for simpler parsing."""
2515 lines = []
2516 for line in list(indented) + ['']:
2517 if line.strip():
2518 lines.append(line)
2519 elif lines:
2520 text = '\n'.join(lines)
2521 node += nodes.paragraph(text, text)
2522 lines = []
2525 class LineBlock(SpecializedBody):
2527 """Second and subsequent lines of a line_block."""
2529 blank = SpecializedBody.invalid_input
2531 def line_block(self, match, context, next_state):
2532 """New line of line block."""
2533 lineno = self.state_machine.abs_line_number()
2534 line, messages, blank_finish = self.line_block_line(match, lineno)
2535 self.parent += line
2536 self.parent.parent += messages
2537 self.blank_finish = blank_finish
2538 return [], next_state, []
2541 class Explicit(SpecializedBody):
2543 """Second and subsequent explicit markup construct."""
2545 def explicit_markup(self, match, context, next_state):
2546 """Footnotes, hyperlink targets, directives, comments."""
2547 nodelist, blank_finish = self.explicit_construct(match)
2548 self.parent += nodelist
2549 self.blank_finish = blank_finish
2550 return [], next_state, []
2552 def anonymous(self, match, context, next_state):
2553 """Anonymous hyperlink targets."""
2554 nodelist, blank_finish = self.anonymous_target(match)
2555 self.parent += nodelist
2556 self.blank_finish = blank_finish
2557 return [], next_state, []
2559 blank = SpecializedBody.invalid_input
2562 class SubstitutionDef(Body):
2565 Parser for the contents of a substitution_definition element.
2568 patterns = {
2569 'embedded_directive': re.compile(r'(%s)::( +|$)'
2570 % Inliner.simplename, re.UNICODE),
2571 'text': r''}
2572 initial_transitions = ['embedded_directive', 'text']
2574 def embedded_directive(self, match, context, next_state):
2575 nodelist, blank_finish = self.directive(match,
2576 alt=self.parent['names'][0])
2577 self.parent += nodelist
2578 if not self.state_machine.at_eof():
2579 self.blank_finish = blank_finish
2580 raise EOFError
2582 def text(self, match, context, next_state):
2583 if not self.state_machine.at_eof():
2584 self.blank_finish = self.state_machine.is_next_line_blank()
2585 raise EOFError
2588 class Text(RSTState):
2591 Classifier of second line of a text block.
2593 Could be a paragraph, a definition list item, or a title.
2596 patterns = {'underline': Body.patterns['line'],
2597 'text': r''}
2598 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2600 def blank(self, match, context, next_state):
2601 """End of paragraph."""
2602 paragraph, literalnext = self.paragraph(
2603 context, self.state_machine.abs_line_number() - 1)
2604 self.parent += paragraph
2605 if literalnext:
2606 self.parent += self.literal_block()
2607 return [], 'Body', []
2609 def eof(self, context):
2610 if context:
2611 self.blank(None, context, None)
2612 return []
2614 def indent(self, match, context, next_state):
2615 """Definition list item."""
2616 definitionlist = nodes.definition_list()
2617 definitionlistitem, blank_finish = self.definition_list_item(context)
2618 definitionlist += definitionlistitem
2619 self.parent += definitionlist
2620 offset = self.state_machine.line_offset + 1 # next line
2621 newline_offset, blank_finish = self.nested_list_parse(
2622 self.state_machine.input_lines[offset:],
2623 input_offset=self.state_machine.abs_line_offset() + 1,
2624 node=definitionlist, initial_state='DefinitionList',
2625 blank_finish=blank_finish, blank_finish_state='Definition')
2626 self.goto_line(newline_offset)
2627 if not blank_finish:
2628 self.parent += self.unindent_warning('Definition list')
2629 return [], 'Body', []
2631 def underline(self, match, context, next_state):
2632 """Section title."""
2633 lineno = self.state_machine.abs_line_number()
2634 title = context[0].rstrip()
2635 underline = match.string.rstrip()
2636 source = title + '\n' + underline
2637 messages = []
2638 if column_width(title) > len(underline):
2639 if len(underline) < 4:
2640 if self.state_machine.match_titles:
2641 msg = self.reporter.info(
2642 'Possible title underline, too short for the title.\n'
2643 "Treating it as ordinary text because it's so short.",
2644 line=lineno)
2645 self.parent += msg
2646 raise statemachine.TransitionCorrection('text')
2647 else:
2648 blocktext = context[0] + '\n' + self.state_machine.line
2649 msg = self.reporter.warning(
2650 'Title underline too short.',
2651 nodes.literal_block(blocktext, blocktext), line=lineno)
2652 messages.append(msg)
2653 if not self.state_machine.match_titles:
2654 blocktext = context[0] + '\n' + self.state_machine.line
2655 msg = self.reporter.severe(
2656 'Unexpected section title.',
2657 nodes.literal_block(blocktext, blocktext), line=lineno)
2658 self.parent += messages
2659 self.parent += msg
2660 return [], next_state, []
2661 style = underline[0]
2662 context[:] = []
2663 self.section(title, source, style, lineno - 1, messages)
2664 return [], next_state, []
2666 def text(self, match, context, next_state):
2667 """Paragraph."""
2668 startline = self.state_machine.abs_line_number() - 1
2669 msg = None
2670 try:
2671 block = self.state_machine.get_text_block(flush_left=1)
2672 except statemachine.UnexpectedIndentationError, instance:
2673 block, source, lineno = instance.args
2674 msg = self.reporter.error('Unexpected indentation.',
2675 source=source, line=lineno)
2676 lines = context + list(block)
2677 paragraph, literalnext = self.paragraph(lines, startline)
2678 self.parent += paragraph
2679 self.parent += msg
2680 if literalnext:
2681 try:
2682 self.state_machine.next_line()
2683 except EOFError:
2684 pass
2685 self.parent += self.literal_block()
2686 return [], next_state, []
2688 def literal_block(self):
2689 """Return a list of nodes."""
2690 indented, indent, offset, blank_finish = \
2691 self.state_machine.get_indented()
2692 while indented and not indented[-1].strip():
2693 indented.trim_end()
2694 if not indented:
2695 return self.quoted_literal_block()
2696 data = '\n'.join(indented)
2697 literal_block = nodes.literal_block(data, data)
2698 literal_block.line = offset + 1
2699 nodelist = [literal_block]
2700 if not blank_finish:
2701 nodelist.append(self.unindent_warning('Literal block'))
2702 return nodelist
2704 def quoted_literal_block(self):
2705 abs_line_offset = self.state_machine.abs_line_offset()
2706 offset = self.state_machine.line_offset
2707 parent_node = nodes.Element()
2708 new_abs_offset = self.nested_parse(
2709 self.state_machine.input_lines[offset:],
2710 input_offset=abs_line_offset, node=parent_node, match_titles=0,
2711 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2712 'initial_state': 'QuotedLiteralBlock'})
2713 self.goto_line(new_abs_offset)
2714 return parent_node.children
2716 def definition_list_item(self, termline):
2717 indented, indent, line_offset, blank_finish = \
2718 self.state_machine.get_indented()
2719 definitionlistitem = nodes.definition_list_item(
2720 '\n'.join(termline + list(indented)))
2721 lineno = self.state_machine.abs_line_number() - 1
2722 definitionlistitem.line = lineno
2723 termlist, messages = self.term(termline, lineno)
2724 definitionlistitem += termlist
2725 definition = nodes.definition('', *messages)
2726 definitionlistitem += definition
2727 if termline[0][-2:] == '::':
2728 definition += self.reporter.info(
2729 'Blank line missing before literal block (after the "::")? '
2730 'Interpreted as a definition list item.', line=line_offset+1)
2731 self.nested_parse(indented, input_offset=line_offset, node=definition)
2732 return definitionlistitem, blank_finish
2734 classifier_delimiter = re.compile(' +: +')
2736 def term(self, lines, lineno):
2737 """Return a definition_list's term and optional classifiers."""
2738 assert len(lines) == 1
2739 text_nodes, messages = self.inline_text(lines[0], lineno)
2740 term_node = nodes.term()
2741 node_list = [term_node]
2742 for i in range(len(text_nodes)):
2743 node = text_nodes[i]
2744 if isinstance(node, nodes.Text):
2745 parts = self.classifier_delimiter.split(node.rawsource)
2746 if len(parts) == 1:
2747 node_list[-1] += node
2748 else:
2750 node_list[-1] += nodes.Text(parts[0].rstrip())
2751 for part in parts[1:]:
2752 classifier_node = nodes.classifier('', part)
2753 node_list.append(classifier_node)
2754 else:
2755 node_list[-1] += node
2756 return node_list, messages
2759 class SpecializedText(Text):
2762 Superclass for second and subsequent lines of Text-variants.
2764 All transition methods are disabled. Override individual methods in
2765 subclasses to re-enable.
2768 def eof(self, context):
2769 """Incomplete construct."""
2770 return []
2772 def invalid_input(self, match=None, context=None, next_state=None):
2773 """Not a compound element member. Abort this state machine."""
2774 raise EOFError
2776 blank = invalid_input
2777 indent = invalid_input
2778 underline = invalid_input
2779 text = invalid_input
2782 class Definition(SpecializedText):
2784 """Second line of potential definition_list_item."""
2786 def eof(self, context):
2787 """Not a definition."""
2788 self.state_machine.previous_line(2) # so parent SM can reassess
2789 return []
2791 def indent(self, match, context, next_state):
2792 """Definition list item."""
2793 definitionlistitem, blank_finish = self.definition_list_item(context)
2794 self.parent += definitionlistitem
2795 self.blank_finish = blank_finish
2796 return [], 'DefinitionList', []
2799 class Line(SpecializedText):
2802 Second line of over- & underlined section title or transition marker.
2805 eofcheck = 1 # @@@ ???
2806 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2808 def eof(self, context):
2809 """Transition marker at end of section or document."""
2810 marker = context[0].strip()
2811 if self.memo.section_bubble_up_kludge:
2812 self.memo.section_bubble_up_kludge = 0
2813 elif len(marker) < 4:
2814 self.state_correction(context)
2815 if self.eofcheck: # ignore EOFError with sections
2816 lineno = self.state_machine.abs_line_number() - 1
2817 transition = nodes.transition(rawsource=context[0])
2818 transition.line = lineno
2819 self.parent += transition
2820 self.eofcheck = 1
2821 return []
2823 def blank(self, match, context, next_state):
2824 """Transition marker."""
2825 lineno = self.state_machine.abs_line_number() - 1
2826 marker = context[0].strip()
2827 if len(marker) < 4:
2828 self.state_correction(context)
2829 transition = nodes.transition(rawsource=marker)
2830 transition.line = lineno
2831 self.parent += transition
2832 return [], 'Body', []
2834 def text(self, match, context, next_state):
2835 """Potential over- & underlined title."""
2836 lineno = self.state_machine.abs_line_number() - 1
2837 overline = context[0]
2838 title = match.string
2839 underline = ''
2840 try:
2841 underline = self.state_machine.next_line()
2842 except EOFError:
2843 blocktext = overline + '\n' + title
2844 if len(overline.rstrip()) < 4:
2845 self.short_overline(context, blocktext, lineno, 2)
2846 else:
2847 msg = self.reporter.severe(
2848 'Incomplete section title.',
2849 nodes.literal_block(blocktext, blocktext), line=lineno)
2850 self.parent += msg
2851 return [], 'Body', []
2852 source = '%s\n%s\n%s' % (overline, title, underline)
2853 overline = overline.rstrip()
2854 underline = underline.rstrip()
2855 if not self.transitions['underline'][0].match(underline):
2856 blocktext = overline + '\n' + title + '\n' + underline
2857 if len(overline.rstrip()) < 4:
2858 self.short_overline(context, blocktext, lineno, 2)
2859 else:
2860 msg = self.reporter.severe(
2861 'Missing matching underline for section title overline.',
2862 nodes.literal_block(source, source), line=lineno)
2863 self.parent += msg
2864 return [], 'Body', []
2865 elif overline != underline:
2866 blocktext = overline + '\n' + title + '\n' + underline
2867 if len(overline.rstrip()) < 4:
2868 self.short_overline(context, blocktext, lineno, 2)
2869 else:
2870 msg = self.reporter.severe(
2871 'Title overline & underline mismatch.',
2872 nodes.literal_block(source, source), line=lineno)
2873 self.parent += msg
2874 return [], 'Body', []
2875 title = title.rstrip()
2876 messages = []
2877 if column_width(title) > len(overline):
2878 blocktext = overline + '\n' + title + '\n' + underline
2879 if len(overline.rstrip()) < 4:
2880 self.short_overline(context, blocktext, lineno, 2)
2881 else:
2882 msg = self.reporter.warning(
2883 'Title overline too short.',
2884 nodes.literal_block(source, source), line=lineno)
2885 messages.append(msg)
2886 style = (overline[0], underline[0])
2887 self.eofcheck = 0 # @@@ not sure this is correct
2888 self.section(title.lstrip(), source, style, lineno + 1, messages)
2889 self.eofcheck = 1
2890 return [], 'Body', []
2892 indent = text # indented title
2894 def underline(self, match, context, next_state):
2895 overline = context[0]
2896 blocktext = overline + '\n' + self.state_machine.line
2897 lineno = self.state_machine.abs_line_number() - 1
2898 if len(overline.rstrip()) < 4:
2899 self.short_overline(context, blocktext, lineno, 1)
2900 msg = self.reporter.error(
2901 'Invalid section title or transition marker.',
2902 nodes.literal_block(blocktext, blocktext), line=lineno)
2903 self.parent += msg
2904 return [], 'Body', []
2906 def short_overline(self, context, blocktext, lineno, lines=1):
2907 msg = self.reporter.info(
2908 'Possible incomplete section title.\nTreating the overline as '
2909 "ordinary text because it's so short.", line=lineno)
2910 self.parent += msg
2911 self.state_correction(context, lines)
2913 def state_correction(self, context, lines=1):
2914 self.state_machine.previous_line(lines)
2915 context[:] = []
2916 raise statemachine.StateCorrection('Body', 'text')
2919 class QuotedLiteralBlock(RSTState):
2922 Nested parse handler for quoted (unindented) literal blocks.
2924 Special-purpose. Not for inclusion in `state_classes`.
2927 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
2928 'text': r''}
2929 initial_transitions = ('initial_quoted', 'text')
2931 def __init__(self, state_machine, debug=0):
2932 RSTState.__init__(self, state_machine, debug)
2933 self.messages = []
2934 self.initial_lineno = None
2936 def blank(self, match, context, next_state):
2937 if context:
2938 raise EOFError
2939 else:
2940 return context, next_state, []
2942 def eof(self, context):
2943 if context:
2944 text = '\n'.join(context)
2945 literal_block = nodes.literal_block(text, text)
2946 literal_block.line = self.initial_lineno
2947 self.parent += literal_block
2948 else:
2949 self.parent += self.reporter.warning(
2950 'Literal block expected; none found.',
2951 line=self.state_machine.abs_line_number())
2952 self.state_machine.previous_line()
2953 self.parent += self.messages
2954 return []
2956 def indent(self, match, context, next_state):
2957 assert context, ('QuotedLiteralBlock.indent: context should not '
2958 'be empty!')
2959 self.messages.append(
2960 self.reporter.error('Unexpected indentation.',
2961 line=self.state_machine.abs_line_number()))
2962 self.state_machine.previous_line()
2963 raise EOFError
2965 def initial_quoted(self, match, context, next_state):
2966 """Match arbitrary quote character on the first line only."""
2967 self.remove_transition('initial_quoted')
2968 quote = match.string[0]
2969 pattern = re.compile(re.escape(quote))
2970 # New transition matches consistent quotes only:
2971 self.add_transition('quoted',
2972 (pattern, self.quoted, self.__class__.__name__))
2973 self.initial_lineno = self.state_machine.abs_line_number()
2974 return [match.string], next_state, []
2976 def quoted(self, match, context, next_state):
2977 """Match consistent quotes on subsequent lines."""
2978 context.append(match.string)
2979 return context, next_state, []
2981 def text(self, match, context, next_state):
2982 if context:
2983 self.messages.append(
2984 self.reporter.error('Inconsistent literal block quoting.',
2985 line=self.state_machine.abs_line_number()))
2986 self.state_machine.previous_line()
2987 raise EOFError
2990 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
2991 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
2992 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
2993 """Standard set of State classes used to start `RSTStateMachine`."""