Store source and line in the "raw" node generated by raw-derived roles.
[docutils.git] / docutils / parsers / rst / states.py
blob0c1bdbbe7da7571db547ef18fc80cdac6386637a
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
9 :Classes:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
31 :Exception classes:
32 - `MarkupError`
33 - `ParserError`
34 - `MarkupMismatch`
36 :Functions:
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
40 :Attributes:
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
43 Parser Overview
44 ===============
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
96 used.
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__ = 'reStructuredText'
106 import sys
107 import re
108 try:
109 import roman
110 except ImportError:
111 import docutils.utils.roman as roman
112 from types import FunctionType, MethodType
114 from docutils import nodes, statemachine, utils, urischemes
115 from docutils import ApplicationError, DataError
116 from docutils.statemachine import StateMachineWS, StateWS
117 from docutils.nodes import fully_normalize_name as normalize_name
118 from docutils.nodes import whitespace_normalize_name
119 import docutils.parsers.rst
120 from docutils.parsers.rst import directives, languages, tableparser, roles
121 from docutils.parsers.rst.languages import en as _fallback_language_module
122 from docutils.utils import escape2null, unescape, column_width
123 from docutils.utils import punctuation_chars
125 class MarkupError(DataError): pass
126 class UnknownInterpretedRoleError(DataError): pass
127 class InterpretedRoleNotImplementedError(DataError): pass
128 class ParserError(ApplicationError): pass
129 class MarkupMismatch(Exception): pass
132 class Struct:
134 """Stores data attributes for dotted-attribute access."""
136 def __init__(self, **keywordargs):
137 self.__dict__.update(keywordargs)
140 class RSTStateMachine(StateMachineWS):
143 reStructuredText's master StateMachine.
145 The entry point to reStructuredText parsing is the `run()` method.
148 def run(self, input_lines, document, input_offset=0, match_titles=1,
149 inliner=None):
151 Parse `input_lines` and modify the `document` node in place.
153 Extend `StateMachineWS.run()`: set up parse-global data and
154 run the StateMachine.
156 self.language = languages.get_language(
157 document.settings.language_code)
158 self.match_titles = match_titles
159 if inliner is None:
160 inliner = Inliner()
161 inliner.init_customizations(document.settings)
162 self.memo = Struct(document=document,
163 reporter=document.reporter,
164 language=self.language,
165 title_styles=[],
166 section_level=0,
167 section_bubble_up_kludge=0,
168 inliner=inliner)
169 self.document = document
170 self.attach_observer(document.note_source)
171 self.reporter = self.memo.reporter
172 self.node = document
173 results = StateMachineWS.run(self, input_lines, input_offset,
174 input_source=document['source'])
175 assert results == [], 'RSTStateMachine.run() results should be empty!'
176 self.node = self.memo = None # remove unneeded references
179 class NestedStateMachine(StateMachineWS):
182 StateMachine run from within other StateMachine runs, to parse nested
183 document structures.
186 def run(self, input_lines, input_offset, memo, node, match_titles=1):
188 Parse `input_lines` and populate a `docutils.nodes.document` instance.
190 Extend `StateMachineWS.run()`: set up document-wide data.
192 self.match_titles = match_titles
193 self.memo = memo
194 self.document = memo.document
195 self.attach_observer(self.document.note_source)
196 self.reporter = memo.reporter
197 self.language = memo.language
198 self.node = node
199 results = StateMachineWS.run(self, input_lines, input_offset)
200 assert results == [], ('NestedStateMachine.run() results should be '
201 'empty!')
202 return results
205 class RSTState(StateWS):
208 reStructuredText State superclass.
210 Contains methods used by all State subclasses.
213 nested_sm = NestedStateMachine
214 nested_sm_cache = []
216 def __init__(self, state_machine, debug=0):
217 self.nested_sm_kwargs = {'state_classes': state_classes,
218 'initial_state': 'Body'}
219 StateWS.__init__(self, state_machine, debug)
221 def runtime_init(self):
222 StateWS.runtime_init(self)
223 memo = self.state_machine.memo
224 self.memo = memo
225 self.reporter = memo.reporter
226 self.inliner = memo.inliner
227 self.document = memo.document
228 self.parent = self.state_machine.node
229 # enable the reporter to determine source and source-line
230 if not hasattr(self.reporter, 'get_source_and_line'):
231 self.reporter.get_source_and_line = self.state_machine.get_source_and_line
232 # print "adding get_source_and_line to reporter", self.state_machine.input_offset
235 def goto_line(self, abs_line_offset):
237 Jump to input line `abs_line_offset`, ignoring jumps past the end.
239 try:
240 self.state_machine.goto_line(abs_line_offset)
241 except EOFError:
242 pass
244 def no_match(self, context, transitions):
246 Override `StateWS.no_match` to generate a system message.
248 This code should never be run.
250 src, srcline = self.state_machine.get_source_and_line()
251 self.reporter.severe(
252 'Internal error: no transition pattern match. State: "%s"; '
253 'transitions: %s; context: %s; current line: %r.'
254 % (self.__class__.__name__, transitions, context,
255 self.state_machine.line),
256 source=src, line=srcline)
257 return context, None, []
259 def bof(self, context):
260 """Called at beginning of file."""
261 return [], []
263 def nested_parse(self, block, input_offset, node, match_titles=0,
264 state_machine_class=None, state_machine_kwargs=None):
266 Create a new StateMachine rooted at `node` and run it over the input
267 `block`.
269 use_default = 0
270 if state_machine_class is None:
271 state_machine_class = self.nested_sm
272 use_default += 1
273 if state_machine_kwargs is None:
274 state_machine_kwargs = self.nested_sm_kwargs
275 use_default += 1
276 block_length = len(block)
278 state_machine = None
279 if use_default == 2:
280 try:
281 state_machine = self.nested_sm_cache.pop()
282 except IndexError:
283 pass
284 if not state_machine:
285 state_machine = state_machine_class(debug=self.debug,
286 **state_machine_kwargs)
287 state_machine.run(block, input_offset, memo=self.memo,
288 node=node, match_titles=match_titles)
289 if use_default == 2:
290 self.nested_sm_cache.append(state_machine)
291 else:
292 state_machine.unlink()
293 new_offset = state_machine.abs_line_offset()
294 # No `block.parent` implies disconnected -- lines aren't in sync:
295 if block.parent and (len(block) - block_length) != 0:
296 # Adjustment for block if modified in nested parse:
297 self.state_machine.next_line(len(block) - block_length)
298 return new_offset
300 def nested_list_parse(self, block, input_offset, node, initial_state,
301 blank_finish,
302 blank_finish_state=None,
303 extra_settings={},
304 match_titles=0,
305 state_machine_class=None,
306 state_machine_kwargs=None):
308 Create a new StateMachine rooted at `node` and run it over the input
309 `block`. Also keep track of optional intermediate blank lines and the
310 required final one.
312 if state_machine_class is None:
313 state_machine_class = self.nested_sm
314 if state_machine_kwargs is None:
315 state_machine_kwargs = self.nested_sm_kwargs.copy()
316 state_machine_kwargs['initial_state'] = initial_state
317 state_machine = state_machine_class(debug=self.debug,
318 **state_machine_kwargs)
319 if blank_finish_state is None:
320 blank_finish_state = initial_state
321 state_machine.states[blank_finish_state].blank_finish = blank_finish
322 for key, value in extra_settings.items():
323 setattr(state_machine.states[initial_state], key, value)
324 state_machine.run(block, input_offset, memo=self.memo,
325 node=node, match_titles=match_titles)
326 blank_finish = state_machine.states[blank_finish_state].blank_finish
327 state_machine.unlink()
328 return state_machine.abs_line_offset(), blank_finish
330 def section(self, title, source, style, lineno, messages):
331 """Check for a valid subsection and create one if it checks out."""
332 if self.check_subsection(source, style, lineno):
333 self.new_subsection(title, lineno, messages)
335 def check_subsection(self, source, style, lineno):
337 Check for a valid subsection header. Return 1 (true) or None (false).
339 When a new section is reached that isn't a subsection of the current
340 section, back up the line count (use ``previous_line(-x)``), then
341 ``raise EOFError``. The current StateMachine will finish, then the
342 calling StateMachine can re-examine the title. This will work its way
343 back up the calling chain until the correct section level isreached.
345 @@@ Alternative: Evaluate the title, store the title info & level, and
346 back up the chain until that level is reached. Store in memo? Or
347 return in results?
349 :Exception: `EOFError` when a sibling or supersection encountered.
351 memo = self.memo
352 title_styles = memo.title_styles
353 mylevel = memo.section_level
354 try: # check for existing title style
355 level = title_styles.index(style) + 1
356 except ValueError: # new title style
357 if len(title_styles) == memo.section_level: # new subsection
358 title_styles.append(style)
359 return 1
360 else: # not at lowest level
361 self.parent += self.title_inconsistent(source, lineno)
362 return None
363 if level <= mylevel: # sibling or supersection
364 memo.section_level = level # bubble up to parent section
365 if len(style) == 2:
366 memo.section_bubble_up_kludge = 1
367 # back up 2 lines for underline title, 3 for overline title
368 self.state_machine.previous_line(len(style) + 1)
369 raise EOFError # let parent section re-evaluate
370 if level == mylevel + 1: # immediate subsection
371 return 1
372 else: # invalid subsection
373 self.parent += self.title_inconsistent(source, lineno)
374 return None
376 def title_inconsistent(self, sourcetext, lineno):
377 src, srcline = self.state_machine.get_source_and_line(lineno)
378 error = self.reporter.severe(
379 'Title level inconsistent:', nodes.literal_block('', sourcetext),
380 source=src, line=srcline)
381 return error
383 def new_subsection(self, title, lineno, messages):
384 """Append new subsection to document tree. On return, check level."""
385 memo = self.memo
386 mylevel = memo.section_level
387 memo.section_level += 1
388 section_node = nodes.section()
389 self.parent += section_node
390 textnodes, title_messages = self.inline_text(title, lineno)
391 titlenode = nodes.title(title, '', *textnodes)
392 name = normalize_name(titlenode.astext())
393 section_node['names'].append(name)
394 section_node += titlenode
395 section_node += messages
396 section_node += title_messages
397 self.document.note_implicit_target(section_node, section_node)
398 offset = self.state_machine.line_offset + 1
399 absoffset = self.state_machine.abs_line_offset() + 1
400 newabsoffset = self.nested_parse(
401 self.state_machine.input_lines[offset:], input_offset=absoffset,
402 node=section_node, match_titles=1)
403 self.goto_line(newabsoffset)
404 if memo.section_level <= mylevel: # can't handle next section?
405 raise EOFError # bubble up to supersection
406 # reset section_level; next pass will detect it properly
407 memo.section_level = mylevel
409 def paragraph(self, lines, lineno):
411 Return a list (paragraph & messages) & a boolean: literal_block next?
413 data = '\n'.join(lines).rstrip()
414 if re.search(r'(?<!\\)(\\\\)*::$', data):
415 if len(data) == 2:
416 return [], 1
417 elif data[-3] in ' \n':
418 text = data[:-3].rstrip()
419 else:
420 text = data[:-1]
421 literalnext = 1
422 else:
423 text = data
424 literalnext = 0
425 textnodes, messages = self.inline_text(text, lineno)
426 p = nodes.paragraph(data, '', *textnodes)
427 p.source, p.line = self.state_machine.get_source_and_line(lineno)
428 return [p] + messages, literalnext
430 def inline_text(self, text, lineno):
432 Return 2 lists: nodes (text and inline elements), and system_messages.
434 return self.inliner.parse(text, lineno, self.memo, self.parent)
436 def unindent_warning(self, node_name):
437 # the actual problem is one line below the current line
438 src, srcline = self.state_machine.get_source_and_line()
439 return self.reporter.warning('%s ends without a blank line; '
440 'unexpected unindent.' % node_name,
441 source=src, line=srcline+1)
444 def build_regexp(definition, compile=1):
446 Build, compile and return a regular expression based on `definition`.
448 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
449 where "parts" is a list of regular expressions and/or regular
450 expression definitions to be joined into an or-group.
452 name, prefix, suffix, parts = definition
453 part_strings = []
454 for part in parts:
455 if type(part) is tuple:
456 part_strings.append(build_regexp(part, None))
457 else:
458 part_strings.append(part)
459 or_group = '|'.join(part_strings)
460 regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
461 if compile:
462 return re.compile(regexp, re.UNICODE)
463 else:
464 return regexp
467 class Inliner:
470 Parse inline markup; call the `parse()` method.
473 def __init__(self):
474 self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
475 """List of (pattern, bound method) tuples, used by
476 `self.implicit_inline`."""
478 def init_customizations(self, settings):
479 """Setting-based customizations; run when parsing begins."""
480 if settings.pep_references:
481 self.implicit_dispatch.append((self.patterns.pep,
482 self.pep_reference))
483 if settings.rfc_references:
484 self.implicit_dispatch.append((self.patterns.rfc,
485 self.rfc_reference))
487 def parse(self, text, lineno, memo, parent):
488 # Needs to be refactored for nested inline markup.
489 # Add nested_parse() method?
491 Return 2 lists: nodes (text and inline elements), and system_messages.
493 Using `self.patterns.initial`, a pattern which matches start-strings
494 (emphasis, strong, interpreted, phrase reference, literal,
495 substitution reference, and inline target) and complete constructs
496 (simple reference, footnote reference), search for a candidate. When
497 one is found, check for validity (e.g., not a quoted '*' character).
498 If valid, search for the corresponding end string if applicable, and
499 check it for validity. If not found or invalid, generate a warning
500 and ignore the start-string. Implicit inline markup (e.g. standalone
501 URIs) is found last.
503 self.reporter = memo.reporter
504 self.document = memo.document
505 self.language = memo.language
506 self.parent = parent
507 pattern_search = self.patterns.initial.search
508 dispatch = self.dispatch
509 remaining = escape2null(text)
510 processed = []
511 unprocessed = []
512 messages = []
513 while remaining:
514 match = pattern_search(remaining)
515 if match:
516 groups = match.groupdict()
517 method = dispatch[groups['start'] or groups['backquote']
518 or groups['refend'] or groups['fnend']]
519 before, inlines, remaining, sysmessages = method(self, match,
520 lineno)
521 unprocessed.append(before)
522 messages += sysmessages
523 if inlines:
524 processed += self.implicit_inline(''.join(unprocessed),
525 lineno)
526 processed += inlines
527 unprocessed = []
528 else:
529 break
530 remaining = ''.join(unprocessed) + remaining
531 if remaining:
532 processed += self.implicit_inline(remaining, lineno)
533 return processed, messages
535 # Inline object recognition
536 # -------------------------
537 # lookahead and look-behind expressions for inline markup rules
538 start_string_prefix = (u'(^|(?<=\\s|[%s%s]))' %
539 (punctuation_chars.openers,
540 punctuation_chars.delimiters))
541 end_string_suffix = (u'($|(?=\\s|[\x00%s%s%s]))' %
542 (punctuation_chars.closing_delimiters,
543 punctuation_chars.delimiters,
544 punctuation_chars.closers))
545 # print start_string_prefix.encode('utf8')
546 # TODO: support non-ASCII whitespace in the following 4 patterns?
547 non_whitespace_before = r'(?<![ \n])'
548 non_whitespace_escape_before = r'(?<![ \n\x00])'
549 non_unescaped_whitespace_escape_before = r'(?<!(?<!\x00)[ \n\x00])'
550 non_whitespace_after = r'(?![ \n])'
551 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
552 simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
553 # Valid URI characters (see RFC 2396 & RFC 2732);
554 # final \x00 allows backslash escapes in URIs:
555 uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
556 # Delimiter indicating the end of a URI (not part of the URI):
557 uri_end_delim = r"""[>]"""
558 # Last URI character; same as uric but no punctuation:
559 urilast = r"""[_~*/=+a-zA-Z0-9]"""
560 # End of a URI (either 'urilast' or 'uric followed by a
561 # uri_end_delim'):
562 uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
563 emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
564 email_pattern = r"""
565 %(emailc)s+(?:\.%(emailc)s+)* # name
566 (?<!\x00)@ # at
567 %(emailc)s+(?:\.%(emailc)s*)* # host
568 %(uri_end)s # final URI char
570 parts = ('initial_inline', start_string_prefix, '',
571 [('start', '', non_whitespace_after, # simple start-strings
572 [r'\*\*', # strong
573 r'\*(?!\*)', # emphasis but not strong
574 r'``', # literal
575 r'_`', # inline internal target
576 r'\|(?!\|)'] # substitution reference
578 ('whole', '', end_string_suffix, # whole constructs
579 [# reference name & end-string
580 r'(?P<refname>%s)(?P<refend>__?)' % simplename,
581 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
582 [r'[0-9]+', # manually numbered
583 r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
584 r'\*', # auto-symbol
585 r'(?P<citationlabel>%s)' % simplename] # citation reference
589 ('backquote', # interpreted text or phrase reference
590 '(?P<role>(:%s:)?)' % simplename, # optional role
591 non_whitespace_after,
592 ['`(?!`)'] # but not literal
596 patterns = Struct(
597 initial=build_regexp(parts),
598 emphasis=re.compile(non_whitespace_escape_before
599 + r'(\*)' + end_string_suffix, re.UNICODE),
600 strong=re.compile(non_whitespace_escape_before
601 + r'(\*\*)' + end_string_suffix, re.UNICODE),
602 interpreted_or_phrase_ref=re.compile(
603 r"""
604 %(non_unescaped_whitespace_escape_before)s
607 (?P<suffix>
608 (?P<role>:%(simplename)s:)?
609 (?P<refend>__?)?
612 %(end_string_suffix)s
613 """ % locals(), re.VERBOSE | re.UNICODE),
614 embedded_uri=re.compile(
615 r"""
617 (?:[ \n]+|^) # spaces or beginning of line/string
618 < # open bracket
619 %(non_whitespace_after)s
620 ([^<>\x00]+) # anything but angle brackets & nulls
621 %(non_whitespace_before)s
622 > # close bracket w/o whitespace before
624 $ # end of string
625 """ % locals(), re.VERBOSE | re.UNICODE),
626 literal=re.compile(non_whitespace_before + '(``)'
627 + end_string_suffix),
628 target=re.compile(non_whitespace_escape_before
629 + r'(`)' + end_string_suffix),
630 substitution_ref=re.compile(non_whitespace_escape_before
631 + r'(\|_{0,2})'
632 + end_string_suffix),
633 email=re.compile(email_pattern % locals() + '$',
634 re.VERBOSE | re.UNICODE),
635 uri=re.compile(
636 (r"""
637 %(start_string_prefix)s
638 (?P<whole>
639 (?P<absolute> # absolute URI
640 (?P<scheme> # scheme (http, ftp, mailto)
641 [a-zA-Z][a-zA-Z0-9.+-]*
645 ( # either:
646 (//?)? # hierarchical URI
647 %(uric)s* # URI characters
648 %(uri_end)s # final URI char
650 ( # optional query
651 \?%(uric)s*
652 %(uri_end)s
654 ( # optional fragment
655 \#%(uric)s*
656 %(uri_end)s
660 | # *OR*
661 (?P<email> # email address
662 """ + email_pattern + r"""
665 %(end_string_suffix)s
666 """) % locals(), re.VERBOSE | re.UNICODE),
667 pep=re.compile(
668 r"""
669 %(start_string_prefix)s
671 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
673 (PEP\s+(?P<pepnum2>\d+)) # reference by name
675 %(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE),
676 rfc=re.compile(
677 r"""
678 %(start_string_prefix)s
679 (RFC(-|\s+)?(?P<rfcnum>\d+))
680 %(end_string_suffix)s""" % locals(), re.VERBOSE | re.UNICODE))
682 def quoted_start(self, match):
683 """Test if inline markup start-string is 'quoted'.
685 'Quoted' in this context means the start-string is enclosed in a pair
686 of matching opening/closing delimiters (not necessarily quotes)
687 or at the end of the match.
689 string = match.string
690 start = match.start()
691 if start == 0: # start-string at beginning of text
692 return False
693 prestart = string[start - 1]
694 try:
695 poststart = string[match.end()]
696 except IndexError: # start-string at end of text
697 return True # not "quoted" but no markup start-string either
698 return punctuation_chars.match_chars(prestart, poststart)
700 def inline_obj(self, match, lineno, end_pattern, nodeclass,
701 restore_backslashes=0):
702 string = match.string
703 matchstart = match.start('start')
704 matchend = match.end('start')
705 if self.quoted_start(match):
706 return (string[:matchend], [], string[matchend:], [], '')
707 endmatch = end_pattern.search(string[matchend:])
708 if endmatch and endmatch.start(1): # 1 or more chars
709 text = unescape(endmatch.string[:endmatch.start(1)],
710 restore_backslashes)
711 textend = matchend + endmatch.end(1)
712 rawsource = unescape(string[matchstart:textend], 1)
713 return (string[:matchstart], [nodeclass(rawsource, text)],
714 string[textend:], [], endmatch.group(1))
715 msg = self.reporter.warning(
716 'Inline %s start-string without end-string.'
717 % nodeclass.__name__, line=lineno)
718 text = unescape(string[matchstart:matchend], 1)
719 rawsource = unescape(string[matchstart:matchend], 1)
720 prb = self.problematic(text, rawsource, msg)
721 return string[:matchstart], [prb], string[matchend:], [msg], ''
723 def problematic(self, text, rawsource, message):
724 msgid = self.document.set_id(message, self.parent)
725 problematic = nodes.problematic(rawsource, text, refid=msgid)
726 prbid = self.document.set_id(problematic)
727 message.add_backref(prbid)
728 return problematic
730 def emphasis(self, match, lineno):
731 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
732 match, lineno, self.patterns.emphasis, nodes.emphasis)
733 return before, inlines, remaining, sysmessages
735 def strong(self, match, lineno):
736 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
737 match, lineno, self.patterns.strong, nodes.strong)
738 return before, inlines, remaining, sysmessages
740 def interpreted_or_phrase_ref(self, match, lineno):
741 end_pattern = self.patterns.interpreted_or_phrase_ref
742 string = match.string
743 matchstart = match.start('backquote')
744 matchend = match.end('backquote')
745 rolestart = match.start('role')
746 role = match.group('role')
747 position = ''
748 if role:
749 role = role[1:-1]
750 position = 'prefix'
751 elif self.quoted_start(match):
752 return (string[:matchend], [], string[matchend:], [])
753 endmatch = end_pattern.search(string[matchend:])
754 if endmatch and endmatch.start(1): # 1 or more chars
755 textend = matchend + endmatch.end()
756 if endmatch.group('role'):
757 if role:
758 msg = self.reporter.warning(
759 'Multiple roles in interpreted text (both '
760 'prefix and suffix present; only one allowed).',
761 line=lineno)
762 text = unescape(string[rolestart:textend], 1)
763 prb = self.problematic(text, text, msg)
764 return string[:rolestart], [prb], string[textend:], [msg]
765 role = endmatch.group('suffix')[1:-1]
766 position = 'suffix'
767 escaped = endmatch.string[:endmatch.start(1)]
768 rawsource = unescape(string[matchstart:textend], 1)
769 if rawsource[-1:] == '_':
770 if role:
771 msg = self.reporter.warning(
772 'Mismatch: both interpreted text role %s and '
773 'reference suffix.' % position, line=lineno)
774 text = unescape(string[rolestart:textend], 1)
775 prb = self.problematic(text, text, msg)
776 return string[:rolestart], [prb], string[textend:], [msg]
777 return self.phrase_ref(string[:matchstart], string[textend:],
778 rawsource, escaped, unescape(escaped))
779 else:
780 rawsource = unescape(string[rolestart:textend], 1)
781 nodelist, messages = self.interpreted(rawsource, escaped, role,
782 lineno)
783 return (string[:rolestart], nodelist,
784 string[textend:], messages)
785 msg = self.reporter.warning(
786 'Inline interpreted text or phrase reference start-string '
787 'without end-string.', line=lineno)
788 text = unescape(string[matchstart:matchend], 1)
789 prb = self.problematic(text, text, msg)
790 return string[:matchstart], [prb], string[matchend:], [msg]
792 def phrase_ref(self, before, after, rawsource, escaped, text):
793 match = self.patterns.embedded_uri.search(escaped)
794 if match:
795 text = unescape(escaped[:match.start(0)])
796 uri_text = match.group(2)
797 uri = ''.join(uri_text.split())
798 uri = self.adjust_uri(uri)
799 if uri:
800 target = nodes.target(match.group(1), refuri=uri)
801 else:
802 raise ApplicationError('problem with URI: %r' % uri_text)
803 if not text:
804 text = uri
805 else:
806 target = None
807 refname = normalize_name(text)
808 reference = nodes.reference(rawsource, text,
809 name=whitespace_normalize_name(text))
810 node_list = [reference]
811 if rawsource[-2:] == '__':
812 if target:
813 reference['refuri'] = uri
814 else:
815 reference['anonymous'] = 1
816 else:
817 if target:
818 reference['refuri'] = uri
819 target['names'].append(refname)
820 self.document.note_explicit_target(target, self.parent)
821 node_list.append(target)
822 else:
823 reference['refname'] = refname
824 self.document.note_refname(reference)
825 return before, node_list, after, []
827 def adjust_uri(self, uri):
828 match = self.patterns.email.match(uri)
829 if match:
830 return 'mailto:' + uri
831 else:
832 return uri
834 def interpreted(self, rawsource, text, role, lineno):
835 role_fn, messages = roles.role(role, self.language, lineno,
836 self.reporter)
837 if role_fn:
838 nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
839 return nodes, messages + messages2
840 else:
841 msg = self.reporter.error(
842 'Unknown interpreted text role "%s".' % role,
843 line=lineno)
844 return ([self.problematic(rawsource, rawsource, msg)],
845 messages + [msg])
847 def literal(self, match, lineno):
848 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
849 match, lineno, self.patterns.literal, nodes.literal,
850 restore_backslashes=1)
851 return before, inlines, remaining, sysmessages
853 def inline_internal_target(self, match, lineno):
854 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
855 match, lineno, self.patterns.target, nodes.target)
856 if inlines and isinstance(inlines[0], nodes.target):
857 assert len(inlines) == 1
858 target = inlines[0]
859 name = normalize_name(target.astext())
860 target['names'].append(name)
861 self.document.note_explicit_target(target, self.parent)
862 return before, inlines, remaining, sysmessages
864 def substitution_reference(self, match, lineno):
865 before, inlines, remaining, sysmessages, endstring = self.inline_obj(
866 match, lineno, self.patterns.substitution_ref,
867 nodes.substitution_reference)
868 if len(inlines) == 1:
869 subref_node = inlines[0]
870 if isinstance(subref_node, nodes.substitution_reference):
871 subref_text = subref_node.astext()
872 self.document.note_substitution_ref(subref_node, subref_text)
873 if endstring[-1:] == '_':
874 reference_node = nodes.reference(
875 '|%s%s' % (subref_text, endstring), '')
876 if endstring[-2:] == '__':
877 reference_node['anonymous'] = 1
878 else:
879 reference_node['refname'] = normalize_name(subref_text)
880 self.document.note_refname(reference_node)
881 reference_node += subref_node
882 inlines = [reference_node]
883 return before, inlines, remaining, sysmessages
885 def footnote_reference(self, match, lineno):
887 Handles `nodes.footnote_reference` and `nodes.citation_reference`
888 elements.
890 label = match.group('footnotelabel')
891 refname = normalize_name(label)
892 string = match.string
893 before = string[:match.start('whole')]
894 remaining = string[match.end('whole'):]
895 if match.group('citationlabel'):
896 refnode = nodes.citation_reference('[%s]_' % label,
897 refname=refname)
898 refnode += nodes.Text(label)
899 self.document.note_citation_ref(refnode)
900 else:
901 refnode = nodes.footnote_reference('[%s]_' % label)
902 if refname[0] == '#':
903 refname = refname[1:]
904 refnode['auto'] = 1
905 self.document.note_autofootnote_ref(refnode)
906 elif refname == '*':
907 refname = ''
908 refnode['auto'] = '*'
909 self.document.note_symbol_footnote_ref(
910 refnode)
911 else:
912 refnode += nodes.Text(label)
913 if refname:
914 refnode['refname'] = refname
915 self.document.note_footnote_ref(refnode)
916 if utils.get_trim_footnote_ref_space(self.document.settings):
917 before = before.rstrip()
918 return (before, [refnode], remaining, [])
920 def reference(self, match, lineno, anonymous=None):
921 referencename = match.group('refname')
922 refname = normalize_name(referencename)
923 referencenode = nodes.reference(
924 referencename + match.group('refend'), referencename,
925 name=whitespace_normalize_name(referencename))
926 if anonymous:
927 referencenode['anonymous'] = 1
928 else:
929 referencenode['refname'] = refname
930 self.document.note_refname(referencenode)
931 string = match.string
932 matchstart = match.start('whole')
933 matchend = match.end('whole')
934 return (string[:matchstart], [referencenode], string[matchend:], [])
936 def anonymous_reference(self, match, lineno):
937 return self.reference(match, lineno, anonymous=1)
939 def standalone_uri(self, match, lineno):
940 if (not match.group('scheme')
941 or match.group('scheme').lower() in urischemes.schemes):
942 if match.group('email'):
943 addscheme = 'mailto:'
944 else:
945 addscheme = ''
946 text = match.group('whole')
947 unescaped = unescape(text, 0)
948 return [nodes.reference(unescape(text, 1), unescaped,
949 refuri=addscheme + unescaped)]
950 else: # not a valid scheme
951 raise MarkupMismatch
953 def pep_reference(self, match, lineno):
954 text = match.group(0)
955 if text.startswith('pep-'):
956 pepnum = int(match.group('pepnum1'))
957 elif text.startswith('PEP'):
958 pepnum = int(match.group('pepnum2'))
959 else:
960 raise MarkupMismatch
961 ref = (self.document.settings.pep_base_url
962 + self.document.settings.pep_file_url_template % pepnum)
963 unescaped = unescape(text, 0)
964 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
966 rfc_url = 'rfc%d.html'
968 def rfc_reference(self, match, lineno):
969 text = match.group(0)
970 if text.startswith('RFC'):
971 rfcnum = int(match.group('rfcnum'))
972 ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
973 else:
974 raise MarkupMismatch
975 unescaped = unescape(text, 0)
976 return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
978 def implicit_inline(self, text, lineno):
980 Check each of the patterns in `self.implicit_dispatch` for a match,
981 and dispatch to the stored method for the pattern. Recursively check
982 the text before and after the match. Return a list of `nodes.Text`
983 and inline element nodes.
985 if not text:
986 return []
987 for pattern, method in self.implicit_dispatch:
988 match = pattern.search(text)
989 if match:
990 try:
991 # Must recurse on strings before *and* after the match;
992 # there may be multiple patterns.
993 return (self.implicit_inline(text[:match.start()], lineno)
994 + method(match, lineno) +
995 self.implicit_inline(text[match.end():], lineno))
996 except MarkupMismatch:
997 pass
998 return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
1000 dispatch = {'*': emphasis,
1001 '**': strong,
1002 '`': interpreted_or_phrase_ref,
1003 '``': literal,
1004 '_`': inline_internal_target,
1005 ']_': footnote_reference,
1006 '|': substitution_reference,
1007 '_': reference,
1008 '__': anonymous_reference}
1011 def _loweralpha_to_int(s, _zero=(ord('a')-1)):
1012 return ord(s) - _zero
1014 def _upperalpha_to_int(s, _zero=(ord('A')-1)):
1015 return ord(s) - _zero
1017 def _lowerroman_to_int(s):
1018 return roman.fromRoman(s.upper())
1021 class Body(RSTState):
1024 Generic classifier of the first line of a block.
1027 double_width_pad_char = tableparser.TableParser.double_width_pad_char
1028 """Padding character for East Asian double-width text."""
1030 enum = Struct()
1031 """Enumerated list parsing information."""
1033 enum.formatinfo = {
1034 'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1035 'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1036 'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1037 enum.formats = enum.formatinfo.keys()
1038 enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1039 'lowerroman', 'upperroman'] # ORDERED!
1040 enum.sequencepats = {'arabic': '[0-9]+',
1041 'loweralpha': '[a-z]',
1042 'upperalpha': '[A-Z]',
1043 'lowerroman': '[ivxlcdm]+',
1044 'upperroman': '[IVXLCDM]+',}
1045 enum.converters = {'arabic': int,
1046 'loweralpha': _loweralpha_to_int,
1047 'upperalpha': _upperalpha_to_int,
1048 'lowerroman': _lowerroman_to_int,
1049 'upperroman': roman.fromRoman}
1051 enum.sequenceregexps = {}
1052 for sequence in enum.sequences:
1053 enum.sequenceregexps[sequence] = re.compile(
1054 enum.sequencepats[sequence] + '$', re.UNICODE)
1056 grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1057 """Matches the top (& bottom) of a full table)."""
1059 simple_table_top_pat = re.compile('=+( +=+)+ *$')
1060 """Matches the top of a simple table."""
1062 simple_table_border_pat = re.compile('=+[ =]*$')
1063 """Matches the bottom & header bottom of a simple table."""
1065 pats = {}
1066 """Fragments of patterns used by transitions."""
1068 pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1069 pats['alpha'] = '[a-zA-Z]'
1070 pats['alphanum'] = '[a-zA-Z0-9]'
1071 pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1072 pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1073 '|%(upperroman)s|#)' % enum.sequencepats)
1074 pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1075 # @@@ Loosen up the pattern? Allow Unicode?
1076 pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1077 pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1078 pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1079 pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1081 for format in enum.formats:
1082 pats[format] = '(?P<%s>%s%s%s)' % (
1083 format, re.escape(enum.formatinfo[format].prefix),
1084 pats['enum'], re.escape(enum.formatinfo[format].suffix))
1086 patterns = {
1087 'bullet': u'[-+*\u2022\u2023\u2043]( +|$)',
1088 'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1089 'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1090 'option_marker': r'%(option)s(, %(option)s)*( +| ?$)' % pats,
1091 'doctest': r'>>>( +|$)',
1092 'line_block': r'\|( +|$)',
1093 'grid_table_top': grid_table_top_pat,
1094 'simple_table_top': simple_table_top_pat,
1095 'explicit_markup': r'\.\.( +|$)',
1096 'anonymous': r'__( +|$)',
1097 'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1098 'text': r''}
1099 initial_transitions = (
1100 'bullet',
1101 'enumerator',
1102 'field_marker',
1103 'option_marker',
1104 'doctest',
1105 'line_block',
1106 'grid_table_top',
1107 'simple_table_top',
1108 'explicit_markup',
1109 'anonymous',
1110 'line',
1111 'text')
1113 def indent(self, match, context, next_state):
1114 """Block quote."""
1115 indented, indent, line_offset, blank_finish = \
1116 self.state_machine.get_indented()
1117 elements = self.block_quote(indented, line_offset)
1118 self.parent += elements
1119 if not blank_finish:
1120 self.parent += self.unindent_warning('Block quote')
1121 return context, next_state, []
1123 def block_quote(self, indented, line_offset):
1124 elements = []
1125 while indented:
1126 (blockquote_lines,
1127 attribution_lines,
1128 attribution_offset,
1129 indented,
1130 new_line_offset) = self.split_attribution(indented, line_offset)
1131 blockquote = nodes.block_quote()
1132 self.nested_parse(blockquote_lines, line_offset, blockquote)
1133 elements.append(blockquote)
1134 if attribution_lines:
1135 attribution, messages = self.parse_attribution(
1136 attribution_lines, attribution_offset)
1137 blockquote += attribution
1138 elements += messages
1139 line_offset = new_line_offset
1140 while indented and not indented[0]:
1141 indented = indented[1:]
1142 line_offset += 1
1143 return elements
1145 # U+2014 is an em-dash:
1146 attribution_pattern = re.compile(u'(---?(?!-)|\u2014) *(?=[^ \\n])',
1147 re.UNICODE)
1149 def split_attribution(self, indented, line_offset):
1151 Check for a block quote attribution and split it off:
1153 * First line after a blank line must begin with a dash ("--", "---",
1154 em-dash; matches `self.attribution_pattern`).
1155 * Every line after that must have consistent indentation.
1156 * Attributions must be preceded by block quote content.
1158 Return a tuple of: (block quote content lines, content offset,
1159 attribution lines, attribution offset, remaining indented lines).
1161 blank = None
1162 nonblank_seen = False
1163 for i in range(len(indented)):
1164 line = indented[i].rstrip()
1165 if line:
1166 if nonblank_seen and blank == i - 1: # last line blank
1167 match = self.attribution_pattern.match(line)
1168 if match:
1169 attribution_end, indent = self.check_attribution(
1170 indented, i)
1171 if attribution_end:
1172 a_lines = indented[i:attribution_end]
1173 a_lines.trim_left(match.end(), end=1)
1174 a_lines.trim_left(indent, start=1)
1175 return (indented[:i], a_lines,
1176 i, indented[attribution_end:],
1177 line_offset + attribution_end)
1178 nonblank_seen = True
1179 else:
1180 blank = i
1181 else:
1182 return (indented, None, None, None, None)
1184 def check_attribution(self, indented, attribution_start):
1186 Check attribution shape.
1187 Return the index past the end of the attribution, and the indent.
1189 indent = None
1190 i = attribution_start + 1
1191 for i in range(attribution_start + 1, len(indented)):
1192 line = indented[i].rstrip()
1193 if not line:
1194 break
1195 if indent is None:
1196 indent = len(line) - len(line.lstrip())
1197 elif len(line) - len(line.lstrip()) != indent:
1198 return None, None # bad shape; not an attribution
1199 else:
1200 # return index of line after last attribution line:
1201 i += 1
1202 return i, (indent or 0)
1204 def parse_attribution(self, indented, line_offset):
1205 text = '\n'.join(indented).rstrip()
1206 lineno = self.state_machine.abs_line_number() + line_offset
1207 textnodes, messages = self.inline_text(text, lineno)
1208 node = nodes.attribution(text, '', *textnodes)
1209 node.line = lineno
1210 # report with source and source-line results in
1211 # ``IndexError: list index out of range``
1212 # node.source, node.line = self.state_machine.get_source_and_line(lineno)
1213 return node, messages
1215 def bullet(self, match, context, next_state):
1216 """Bullet list item."""
1217 bulletlist = nodes.bullet_list()
1218 self.parent += bulletlist
1219 bulletlist['bullet'] = match.string[0]
1220 i, blank_finish = self.list_item(match.end())
1221 bulletlist += i
1222 offset = self.state_machine.line_offset + 1 # next line
1223 new_line_offset, blank_finish = self.nested_list_parse(
1224 self.state_machine.input_lines[offset:],
1225 input_offset=self.state_machine.abs_line_offset() + 1,
1226 node=bulletlist, initial_state='BulletList',
1227 blank_finish=blank_finish)
1228 self.goto_line(new_line_offset)
1229 if not blank_finish:
1230 self.parent += self.unindent_warning('Bullet list')
1231 return [], next_state, []
1233 def list_item(self, indent):
1234 if self.state_machine.line[indent:]:
1235 indented, line_offset, blank_finish = (
1236 self.state_machine.get_known_indented(indent))
1237 else:
1238 indented, indent, line_offset, blank_finish = (
1239 self.state_machine.get_first_known_indented(indent))
1240 listitem = nodes.list_item('\n'.join(indented))
1241 if indented:
1242 self.nested_parse(indented, input_offset=line_offset,
1243 node=listitem)
1244 return listitem, blank_finish
1246 def enumerator(self, match, context, next_state):
1247 """Enumerated List Item"""
1248 format, sequence, text, ordinal = self.parse_enumerator(match)
1249 if not self.is_enumerated_list_item(ordinal, sequence, format):
1250 raise statemachine.TransitionCorrection('text')
1251 enumlist = nodes.enumerated_list()
1252 self.parent += enumlist
1253 if sequence == '#':
1254 enumlist['enumtype'] = 'arabic'
1255 else:
1256 enumlist['enumtype'] = sequence
1257 enumlist['prefix'] = self.enum.formatinfo[format].prefix
1258 enumlist['suffix'] = self.enum.formatinfo[format].suffix
1259 if ordinal != 1:
1260 enumlist['start'] = ordinal
1261 src, srcline = self.state_machine.get_source_and_line()
1262 msg = self.reporter.info(
1263 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1264 % (text, ordinal), source=src, line=srcline)
1265 self.parent += msg
1266 listitem, blank_finish = self.list_item(match.end())
1267 enumlist += listitem
1268 offset = self.state_machine.line_offset + 1 # next line
1269 newline_offset, blank_finish = self.nested_list_parse(
1270 self.state_machine.input_lines[offset:],
1271 input_offset=self.state_machine.abs_line_offset() + 1,
1272 node=enumlist, initial_state='EnumeratedList',
1273 blank_finish=blank_finish,
1274 extra_settings={'lastordinal': ordinal,
1275 'format': format,
1276 'auto': sequence == '#'})
1277 self.goto_line(newline_offset)
1278 if not blank_finish:
1279 self.parent += self.unindent_warning('Enumerated list')
1280 return [], next_state, []
1282 def parse_enumerator(self, match, expected_sequence=None):
1284 Analyze an enumerator and return the results.
1286 :Return:
1287 - the enumerator format ('period', 'parens', or 'rparen'),
1288 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1289 - the text of the enumerator, stripped of formatting, and
1290 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1291 ``None`` is returned for invalid enumerator text).
1293 The enumerator format has already been determined by the regular
1294 expression match. If `expected_sequence` is given, that sequence is
1295 tried first. If not, we check for Roman numeral 1. This way,
1296 single-character Roman numerals (which are also alphabetical) can be
1297 matched. If no sequence has been matched, all sequences are checked in
1298 order.
1300 groupdict = match.groupdict()
1301 sequence = ''
1302 for format in self.enum.formats:
1303 if groupdict[format]: # was this the format matched?
1304 break # yes; keep `format`
1305 else: # shouldn't happen
1306 raise ParserError('enumerator format not matched')
1307 text = groupdict[format][self.enum.formatinfo[format].start
1308 :self.enum.formatinfo[format].end]
1309 if text == '#':
1310 sequence = '#'
1311 elif expected_sequence:
1312 try:
1313 if self.enum.sequenceregexps[expected_sequence].match(text):
1314 sequence = expected_sequence
1315 except KeyError: # shouldn't happen
1316 raise ParserError('unknown enumerator sequence: %s'
1317 % sequence)
1318 elif text == 'i':
1319 sequence = 'lowerroman'
1320 elif text == 'I':
1321 sequence = 'upperroman'
1322 if not sequence:
1323 for sequence in self.enum.sequences:
1324 if self.enum.sequenceregexps[sequence].match(text):
1325 break
1326 else: # shouldn't happen
1327 raise ParserError('enumerator sequence not matched')
1328 if sequence == '#':
1329 ordinal = 1
1330 else:
1331 try:
1332 ordinal = self.enum.converters[sequence](text)
1333 except roman.InvalidRomanNumeralError:
1334 ordinal = None
1335 return format, sequence, text, ordinal
1337 def is_enumerated_list_item(self, ordinal, sequence, format):
1339 Check validity based on the ordinal value and the second line.
1341 Return true if the ordinal is valid and the second line is blank,
1342 indented, or starts with the next enumerator or an auto-enumerator.
1344 if ordinal is None:
1345 return None
1346 try:
1347 next_line = self.state_machine.next_line()
1348 except EOFError: # end of input lines
1349 self.state_machine.previous_line()
1350 return 1
1351 else:
1352 self.state_machine.previous_line()
1353 if not next_line[:1].strip(): # blank or indented
1354 return 1
1355 result = self.make_enumerator(ordinal + 1, sequence, format)
1356 if result:
1357 next_enumerator, auto_enumerator = result
1358 try:
1359 if ( next_line.startswith(next_enumerator) or
1360 next_line.startswith(auto_enumerator) ):
1361 return 1
1362 except TypeError:
1363 pass
1364 return None
1366 def make_enumerator(self, ordinal, sequence, format):
1368 Construct and return the next enumerated list item marker, and an
1369 auto-enumerator ("#" instead of the regular enumerator).
1371 Return ``None`` for invalid (out of range) ordinals.
1372 """ #"
1373 if sequence == '#':
1374 enumerator = '#'
1375 elif sequence == 'arabic':
1376 enumerator = str(ordinal)
1377 else:
1378 if sequence.endswith('alpha'):
1379 if ordinal > 26:
1380 return None
1381 enumerator = chr(ordinal + ord('a') - 1)
1382 elif sequence.endswith('roman'):
1383 try:
1384 enumerator = roman.toRoman(ordinal)
1385 except roman.RomanError:
1386 return None
1387 else: # shouldn't happen
1388 raise ParserError('unknown enumerator sequence: "%s"'
1389 % sequence)
1390 if sequence.startswith('lower'):
1391 enumerator = enumerator.lower()
1392 elif sequence.startswith('upper'):
1393 enumerator = enumerator.upper()
1394 else: # shouldn't happen
1395 raise ParserError('unknown enumerator sequence: "%s"'
1396 % sequence)
1397 formatinfo = self.enum.formatinfo[format]
1398 next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1399 + ' ')
1400 auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1401 return next_enumerator, auto_enumerator
1403 def field_marker(self, match, context, next_state):
1404 """Field list item."""
1405 field_list = nodes.field_list()
1406 self.parent += field_list
1407 field, blank_finish = self.field(match)
1408 field_list += field
1409 offset = self.state_machine.line_offset + 1 # next line
1410 newline_offset, blank_finish = self.nested_list_parse(
1411 self.state_machine.input_lines[offset:],
1412 input_offset=self.state_machine.abs_line_offset() + 1,
1413 node=field_list, initial_state='FieldList',
1414 blank_finish=blank_finish)
1415 self.goto_line(newline_offset)
1416 if not blank_finish:
1417 self.parent += self.unindent_warning('Field list')
1418 return [], next_state, []
1420 def field(self, match):
1421 name = self.parse_field_marker(match)
1422 src, srcline = self.state_machine.get_source_and_line()
1423 lineno = self.state_machine.abs_line_number()
1424 indented, indent, line_offset, blank_finish = \
1425 self.state_machine.get_first_known_indented(match.end())
1426 field_node = nodes.field()
1427 field_node.source = src
1428 field_node.line = srcline
1429 name_nodes, name_messages = self.inline_text(name, lineno)
1430 field_node += nodes.field_name(name, '', *name_nodes)
1431 field_body = nodes.field_body('\n'.join(indented), *name_messages)
1432 field_node += field_body
1433 if indented:
1434 self.parse_field_body(indented, line_offset, field_body)
1435 return field_node, blank_finish
1437 def parse_field_marker(self, match):
1438 """Extract & return field name from a field marker match."""
1439 field = match.group()[1:] # strip off leading ':'
1440 field = field[:field.rfind(':')] # strip off trailing ':' etc.
1441 return field
1443 def parse_field_body(self, indented, offset, node):
1444 self.nested_parse(indented, input_offset=offset, node=node)
1446 def option_marker(self, match, context, next_state):
1447 """Option list item."""
1448 optionlist = nodes.option_list()
1449 try:
1450 listitem, blank_finish = self.option_list_item(match)
1451 except MarkupError, error:
1452 # This shouldn't happen; pattern won't match.
1453 src, srcline = self.state_machine.get_source_and_line()
1454 msg = self.reporter.error(u'Invalid option list marker: %s' %
1455 error, source=src, line=srcline)
1456 self.parent += msg
1457 indented, indent, line_offset, blank_finish = \
1458 self.state_machine.get_first_known_indented(match.end())
1459 elements = self.block_quote(indented, line_offset)
1460 self.parent += elements
1461 if not blank_finish:
1462 self.parent += self.unindent_warning('Option list')
1463 return [], next_state, []
1464 self.parent += optionlist
1465 optionlist += listitem
1466 offset = self.state_machine.line_offset + 1 # next line
1467 newline_offset, blank_finish = self.nested_list_parse(
1468 self.state_machine.input_lines[offset:],
1469 input_offset=self.state_machine.abs_line_offset() + 1,
1470 node=optionlist, initial_state='OptionList',
1471 blank_finish=blank_finish)
1472 self.goto_line(newline_offset)
1473 if not blank_finish:
1474 self.parent += self.unindent_warning('Option list')
1475 return [], next_state, []
1477 def option_list_item(self, match):
1478 offset = self.state_machine.abs_line_offset()
1479 options = self.parse_option_marker(match)
1480 indented, indent, line_offset, blank_finish = \
1481 self.state_machine.get_first_known_indented(match.end())
1482 if not indented: # not an option list item
1483 self.goto_line(offset)
1484 raise statemachine.TransitionCorrection('text')
1485 option_group = nodes.option_group('', *options)
1486 description = nodes.description('\n'.join(indented))
1487 option_list_item = nodes.option_list_item('', option_group,
1488 description)
1489 if indented:
1490 self.nested_parse(indented, input_offset=line_offset,
1491 node=description)
1492 return option_list_item, blank_finish
1494 def parse_option_marker(self, match):
1496 Return a list of `node.option` and `node.option_argument` objects,
1497 parsed from an option marker match.
1499 :Exception: `MarkupError` for invalid option markers.
1501 optlist = []
1502 optionstrings = match.group().rstrip().split(', ')
1503 for optionstring in optionstrings:
1504 tokens = optionstring.split()
1505 delimiter = ' '
1506 firstopt = tokens[0].split('=', 1)
1507 if len(firstopt) > 1:
1508 # "--opt=value" form
1509 tokens[:1] = firstopt
1510 delimiter = '='
1511 elif (len(tokens[0]) > 2
1512 and ((tokens[0].startswith('-')
1513 and not tokens[0].startswith('--'))
1514 or tokens[0].startswith('+'))):
1515 # "-ovalue" form
1516 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1517 delimiter = ''
1518 if len(tokens) > 1 and (tokens[1].startswith('<')
1519 and tokens[-1].endswith('>')):
1520 # "-o <value1 value2>" form; join all values into one token
1521 tokens[1:] = [' '.join(tokens[1:])]
1522 if 0 < len(tokens) <= 2:
1523 option = nodes.option(optionstring)
1524 option += nodes.option_string(tokens[0], tokens[0])
1525 if len(tokens) > 1:
1526 option += nodes.option_argument(tokens[1], tokens[1],
1527 delimiter=delimiter)
1528 optlist.append(option)
1529 else:
1530 raise MarkupError(
1531 'wrong number of option tokens (=%s), should be 1 or 2: '
1532 '"%s"' % (len(tokens), optionstring))
1533 return optlist
1535 def doctest(self, match, context, next_state):
1536 data = '\n'.join(self.state_machine.get_text_block())
1537 self.parent += nodes.doctest_block(data, data)
1538 return [], next_state, []
1540 def line_block(self, match, context, next_state):
1541 """First line of a line block."""
1542 block = nodes.line_block()
1543 self.parent += block
1544 lineno = self.state_machine.abs_line_number()
1545 line, messages, blank_finish = self.line_block_line(match, lineno)
1546 block += line
1547 self.parent += messages
1548 if not blank_finish:
1549 offset = self.state_machine.line_offset + 1 # next line
1550 new_line_offset, blank_finish = self.nested_list_parse(
1551 self.state_machine.input_lines[offset:],
1552 input_offset=self.state_machine.abs_line_offset() + 1,
1553 node=block, initial_state='LineBlock',
1554 blank_finish=0)
1555 self.goto_line(new_line_offset)
1556 if not blank_finish:
1557 src, srcline = self.state_machine.get_source_and_line()
1558 self.parent += self.reporter.warning(
1559 'Line block ends without a blank line.',
1560 source=src, line=srcline+1)
1561 if len(block):
1562 if block[0].indent is None:
1563 block[0].indent = 0
1564 self.nest_line_block_lines(block)
1565 return [], next_state, []
1567 def line_block_line(self, match, lineno):
1568 """Return one line element of a line_block."""
1569 indented, indent, line_offset, blank_finish = \
1570 self.state_machine.get_first_known_indented(match.end(),
1571 until_blank=1)
1572 text = u'\n'.join(indented)
1573 text_nodes, messages = self.inline_text(text, lineno)
1574 line = nodes.line(text, '', *text_nodes)
1575 if match.string.rstrip() != '|': # not empty
1576 line.indent = len(match.group(1)) - 1
1577 return line, messages, blank_finish
1579 def nest_line_block_lines(self, block):
1580 for index in range(1, len(block)):
1581 if block[index].indent is None:
1582 block[index].indent = block[index - 1].indent
1583 self.nest_line_block_segment(block)
1585 def nest_line_block_segment(self, block):
1586 indents = [item.indent for item in block]
1587 least = min(indents)
1588 new_items = []
1589 new_block = nodes.line_block()
1590 for item in block:
1591 if item.indent > least:
1592 new_block.append(item)
1593 else:
1594 if len(new_block):
1595 self.nest_line_block_segment(new_block)
1596 new_items.append(new_block)
1597 new_block = nodes.line_block()
1598 new_items.append(item)
1599 if len(new_block):
1600 self.nest_line_block_segment(new_block)
1601 new_items.append(new_block)
1602 block[:] = new_items
1604 def grid_table_top(self, match, context, next_state):
1605 """Top border of a full table."""
1606 return self.table_top(match, context, next_state,
1607 self.isolate_grid_table,
1608 tableparser.GridTableParser)
1610 def simple_table_top(self, match, context, next_state):
1611 """Top border of a simple table."""
1612 return self.table_top(match, context, next_state,
1613 self.isolate_simple_table,
1614 tableparser.SimpleTableParser)
1616 def table_top(self, match, context, next_state,
1617 isolate_function, parser_class):
1618 """Top border of a generic table."""
1619 nodelist, blank_finish = self.table(isolate_function, parser_class)
1620 self.parent += nodelist
1621 if not blank_finish:
1622 src, srcline = self.state_machine.get_source_and_line()
1623 msg = self.reporter.warning(
1624 'Blank line required after table.',
1625 source=src, line=srcline+1)
1626 self.parent += msg
1627 return [], next_state, []
1629 def table(self, isolate_function, parser_class):
1630 """Parse a table."""
1631 block, messages, blank_finish = isolate_function()
1632 if block:
1633 try:
1634 parser = parser_class()
1635 tabledata = parser.parse(block)
1636 tableline = (self.state_machine.abs_line_number() - len(block)
1637 + 1)
1638 table = self.build_table(tabledata, tableline)
1639 nodelist = [table] + messages
1640 except tableparser.TableMarkupError, detail:
1641 nodelist = self.malformed_table(
1642 block, ' '.join(detail.args)) + messages
1643 else:
1644 nodelist = messages
1645 return nodelist, blank_finish
1647 def isolate_grid_table(self):
1648 messages = []
1649 blank_finish = 1
1650 try:
1651 block = self.state_machine.get_text_block(flush_left=1)
1652 except statemachine.UnexpectedIndentationError, instance:
1653 block, src, srcline = instance.args
1654 messages.append(self.reporter.error('Unexpected indentation.',
1655 source=src, line=srcline))
1656 blank_finish = 0
1657 block.disconnect()
1658 # for East Asian chars:
1659 block.pad_double_width(self.double_width_pad_char)
1660 width = len(block[0].strip())
1661 for i in range(len(block)):
1662 block[i] = block[i].strip()
1663 if block[i][0] not in '+|': # check left edge
1664 blank_finish = 0
1665 self.state_machine.previous_line(len(block) - i)
1666 del block[i:]
1667 break
1668 if not self.grid_table_top_pat.match(block[-1]): # find bottom
1669 blank_finish = 0
1670 # from second-last to third line of table:
1671 for i in range(len(block) - 2, 1, -1):
1672 if self.grid_table_top_pat.match(block[i]):
1673 self.state_machine.previous_line(len(block) - i + 1)
1674 del block[i+1:]
1675 break
1676 else:
1677 messages.extend(self.malformed_table(block))
1678 return [], messages, blank_finish
1679 for i in range(len(block)): # check right edge
1680 if len(block[i]) != width or block[i][-1] not in '+|':
1681 messages.extend(self.malformed_table(block))
1682 return [], messages, blank_finish
1683 return block, messages, blank_finish
1685 def isolate_simple_table(self):
1686 start = self.state_machine.line_offset
1687 lines = self.state_machine.input_lines
1688 limit = len(lines) - 1
1689 toplen = len(lines[start].strip())
1690 pattern_match = self.simple_table_border_pat.match
1691 found = 0
1692 found_at = None
1693 i = start + 1
1694 while i <= limit:
1695 line = lines[i]
1696 match = pattern_match(line)
1697 if match:
1698 if len(line.strip()) != toplen:
1699 self.state_machine.next_line(i - start)
1700 messages = self.malformed_table(
1701 lines[start:i+1], 'Bottom/header table border does '
1702 'not match top border.')
1703 return [], messages, i == limit or not lines[i+1].strip()
1704 found += 1
1705 found_at = i
1706 if found == 2 or i == limit or not lines[i+1].strip():
1707 end = i
1708 break
1709 i += 1
1710 else: # reached end of input_lines
1711 if found:
1712 extra = ' or no blank line after table bottom'
1713 self.state_machine.next_line(found_at - start)
1714 block = lines[start:found_at+1]
1715 else:
1716 extra = ''
1717 self.state_machine.next_line(i - start - 1)
1718 block = lines[start:]
1719 messages = self.malformed_table(
1720 block, 'No bottom table border found%s.' % extra)
1721 return [], messages, not extra
1722 self.state_machine.next_line(end - start)
1723 block = lines[start:end+1]
1724 # for East Asian chars:
1725 block.pad_double_width(self.double_width_pad_char)
1726 return block, [], end == limit or not lines[end+1].strip()
1728 def malformed_table(self, block, detail=''):
1729 block.replace(self.double_width_pad_char, '')
1730 data = '\n'.join(block)
1731 message = 'Malformed table.'
1732 startline = self.state_machine.abs_line_number() - len(block) + 1
1733 src, srcline = self.state_machine.get_source_and_line(startline)
1734 if detail:
1735 message += '\n' + detail
1736 error = self.reporter.error(message, nodes.literal_block(data, data),
1737 source=src, line=srcline)
1738 return [error]
1740 def build_table(self, tabledata, tableline, stub_columns=0):
1741 colwidths, headrows, bodyrows = tabledata
1742 table = nodes.table()
1743 tgroup = nodes.tgroup(cols=len(colwidths))
1744 table += tgroup
1745 for colwidth in colwidths:
1746 colspec = nodes.colspec(colwidth=colwidth)
1747 if stub_columns:
1748 colspec.attributes['stub'] = 1
1749 stub_columns -= 1
1750 tgroup += colspec
1751 if headrows:
1752 thead = nodes.thead()
1753 tgroup += thead
1754 for row in headrows:
1755 thead += self.build_table_row(row, tableline)
1756 tbody = nodes.tbody()
1757 tgroup += tbody
1758 for row in bodyrows:
1759 tbody += self.build_table_row(row, tableline)
1760 return table
1762 def build_table_row(self, rowdata, tableline):
1763 row = nodes.row()
1764 for cell in rowdata:
1765 if cell is None:
1766 continue
1767 morerows, morecols, offset, cellblock = cell
1768 attributes = {}
1769 if morerows:
1770 attributes['morerows'] = morerows
1771 if morecols:
1772 attributes['morecols'] = morecols
1773 entry = nodes.entry(**attributes)
1774 row += entry
1775 if ''.join(cellblock):
1776 self.nested_parse(cellblock, input_offset=tableline+offset,
1777 node=entry)
1778 return row
1781 explicit = Struct()
1782 """Patterns and constants used for explicit markup recognition."""
1784 explicit.patterns = Struct(
1785 target=re.compile(r"""
1787 _ # anonymous target
1788 | # *OR*
1789 (?!_) # no underscore at the beginning
1790 (?P<quote>`?) # optional open quote
1791 (?![ `]) # first char. not space or
1792 # backquote
1793 (?P<name> # reference name
1796 %(non_whitespace_escape_before)s
1797 (?P=quote) # close quote if open quote used
1799 (?<!(?<!\x00):) # no unescaped colon at end
1800 %(non_whitespace_escape_before)s
1801 [ ]? # optional space
1802 : # end of reference name
1803 ([ ]+|$) # followed by whitespace
1804 """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1805 reference=re.compile(r"""
1807 (?P<simple>%(simplename)s)_
1808 | # *OR*
1809 ` # open backquote
1810 (?![ ]) # not space
1811 (?P<phrase>.+?) # hyperlink phrase
1812 %(non_whitespace_escape_before)s
1813 `_ # close backquote,
1814 # reference mark
1816 $ # end of string
1817 """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1818 substitution=re.compile(r"""
1820 (?![ ]) # first char. not space
1821 (?P<name>.+?) # substitution text
1822 %(non_whitespace_escape_before)s
1823 \| # close delimiter
1825 ([ ]+|$) # followed by whitespace
1826 """ % vars(Inliner),
1827 re.VERBOSE | re.UNICODE),)
1829 def footnote(self, match):
1830 src, srcline = self.state_machine.get_source_and_line()
1831 indented, indent, offset, blank_finish = \
1832 self.state_machine.get_first_known_indented(match.end())
1833 label = match.group(1)
1834 name = normalize_name(label)
1835 footnote = nodes.footnote('\n'.join(indented))
1836 footnote.source = src
1837 footnote.line = srcline
1838 if name[0] == '#': # auto-numbered
1839 name = name[1:] # autonumber label
1840 footnote['auto'] = 1
1841 if name:
1842 footnote['names'].append(name)
1843 self.document.note_autofootnote(footnote)
1844 elif name == '*': # auto-symbol
1845 name = ''
1846 footnote['auto'] = '*'
1847 self.document.note_symbol_footnote(footnote)
1848 else: # manually numbered
1849 footnote += nodes.label('', label)
1850 footnote['names'].append(name)
1851 self.document.note_footnote(footnote)
1852 if name:
1853 self.document.note_explicit_target(footnote, footnote)
1854 else:
1855 self.document.set_id(footnote, footnote)
1856 if indented:
1857 self.nested_parse(indented, input_offset=offset, node=footnote)
1858 return [footnote], blank_finish
1860 def citation(self, match):
1861 src, srcline = self.state_machine.get_source_and_line()
1862 indented, indent, offset, blank_finish = \
1863 self.state_machine.get_first_known_indented(match.end())
1864 label = match.group(1)
1865 name = normalize_name(label)
1866 citation = nodes.citation('\n'.join(indented))
1867 citation.source = src
1868 citation.line = srcline
1869 citation += nodes.label('', label)
1870 citation['names'].append(name)
1871 self.document.note_citation(citation)
1872 self.document.note_explicit_target(citation, citation)
1873 if indented:
1874 self.nested_parse(indented, input_offset=offset, node=citation)
1875 return [citation], blank_finish
1877 def hyperlink_target(self, match):
1878 pattern = self.explicit.patterns.target
1879 lineno = self.state_machine.abs_line_number()
1880 src, srcline = self.state_machine.get_source_and_line()
1881 block, indent, offset, blank_finish = \
1882 self.state_machine.get_first_known_indented(
1883 match.end(), until_blank=1, strip_indent=0)
1884 blocktext = match.string[:match.end()] + '\n'.join(block)
1885 block = [escape2null(line) for line in block]
1886 escaped = block[0]
1887 blockindex = 0
1888 while 1:
1889 targetmatch = pattern.match(escaped)
1890 if targetmatch:
1891 break
1892 blockindex += 1
1893 try:
1894 escaped += block[blockindex]
1895 except IndexError:
1896 raise MarkupError('malformed hyperlink target.')
1897 del block[:blockindex]
1898 block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1899 target = self.make_target(block, blocktext, lineno,
1900 targetmatch.group('name'))
1901 return [target], blank_finish
1903 def make_target(self, block, block_text, lineno, target_name):
1904 target_type, data = self.parse_target(block, block_text, lineno)
1905 if target_type == 'refname':
1906 target = nodes.target(block_text, '', refname=normalize_name(data))
1907 target.indirect_reference_name = data
1908 self.add_target(target_name, '', target, lineno)
1909 self.document.note_indirect_target(target)
1910 return target
1911 elif target_type == 'refuri':
1912 target = nodes.target(block_text, '')
1913 self.add_target(target_name, data, target, lineno)
1914 return target
1915 else:
1916 return data
1918 def parse_target(self, block, block_text, lineno):
1920 Determine the type of reference of a target.
1922 :Return: A 2-tuple, one of:
1924 - 'refname' and the indirect reference name
1925 - 'refuri' and the URI
1926 - 'malformed' and a system_message node
1928 if block and block[-1].strip()[-1:] == '_': # possible indirect target
1929 reference = ' '.join([line.strip() for line in block])
1930 refname = self.is_reference(reference)
1931 if refname:
1932 return 'refname', refname
1933 reference = ''.join([''.join(line.split()) for line in block])
1934 return 'refuri', unescape(reference)
1936 def is_reference(self, reference):
1937 match = self.explicit.patterns.reference.match(
1938 whitespace_normalize_name(reference))
1939 if not match:
1940 return None
1941 return unescape(match.group('simple') or match.group('phrase'))
1943 def add_target(self, targetname, refuri, target, lineno):
1944 target.line = lineno
1945 if targetname:
1946 name = normalize_name(unescape(targetname))
1947 target['names'].append(name)
1948 if refuri:
1949 uri = self.inliner.adjust_uri(refuri)
1950 if uri:
1951 target['refuri'] = uri
1952 else:
1953 raise ApplicationError('problem with URI: %r' % refuri)
1954 self.document.note_explicit_target(target, self.parent)
1955 else: # anonymous target
1956 if refuri:
1957 target['refuri'] = refuri
1958 target['anonymous'] = 1
1959 self.document.note_anonymous_target(target)
1961 def substitution_def(self, match):
1962 pattern = self.explicit.patterns.substitution
1963 src, srcline = self.state_machine.get_source_and_line()
1964 block, indent, offset, blank_finish = \
1965 self.state_machine.get_first_known_indented(match.end(),
1966 strip_indent=0)
1967 blocktext = (match.string[:match.end()] + '\n'.join(block))
1968 block.disconnect()
1969 escaped = escape2null(block[0].rstrip())
1970 blockindex = 0
1971 while 1:
1972 subdefmatch = pattern.match(escaped)
1973 if subdefmatch:
1974 break
1975 blockindex += 1
1976 try:
1977 escaped = escaped + ' ' + escape2null(block[blockindex].strip())
1978 except IndexError:
1979 raise MarkupError('malformed substitution definition.')
1980 del block[:blockindex] # strip out the substitution marker
1981 block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
1982 if not block[0]:
1983 del block[0]
1984 offset += 1
1985 while block and not block[-1].strip():
1986 block.pop()
1987 subname = subdefmatch.group('name')
1988 substitution_node = nodes.substitution_definition(blocktext)
1989 substitution_node.source = src
1990 substitution_node.line = srcline
1991 if not block:
1992 msg = self.reporter.warning(
1993 'Substitution definition "%s" missing contents.' % subname,
1994 nodes.literal_block(blocktext, blocktext),
1995 source=src, line=srcline)
1996 return [msg], blank_finish
1997 block[0] = block[0].strip()
1998 substitution_node['names'].append(
1999 nodes.whitespace_normalize_name(subname))
2000 new_abs_offset, blank_finish = self.nested_list_parse(
2001 block, input_offset=offset, node=substitution_node,
2002 initial_state='SubstitutionDef', blank_finish=blank_finish)
2003 i = 0
2004 for node in substitution_node[:]:
2005 if not (isinstance(node, nodes.Inline) or
2006 isinstance(node, nodes.Text)):
2007 self.parent += substitution_node[i]
2008 del substitution_node[i]
2009 else:
2010 i += 1
2011 for node in substitution_node.traverse(nodes.Element):
2012 if self.disallowed_inside_substitution_definitions(node):
2013 pformat = nodes.literal_block('', node.pformat().rstrip())
2014 msg = self.reporter.error(
2015 'Substitution definition contains illegal element:',
2016 pformat, nodes.literal_block(blocktext, blocktext),
2017 source=src, line=srcline)
2018 return [msg], blank_finish
2019 if len(substitution_node) == 0:
2020 msg = self.reporter.warning(
2021 'Substitution definition "%s" empty or invalid.' % subname,
2022 nodes.literal_block(blocktext, blocktext),
2023 source=src, line=srcline)
2024 return [msg], blank_finish
2025 self.document.note_substitution_def(
2026 substitution_node, subname, self.parent)
2027 return [substitution_node], blank_finish
2029 def disallowed_inside_substitution_definitions(self, node):
2030 if (node['ids'] or
2031 isinstance(node, nodes.reference) and node.get('anonymous') or
2032 isinstance(node, nodes.footnote_reference) and node.get('auto')):
2033 return 1
2034 else:
2035 return 0
2037 def directive(self, match, **option_presets):
2038 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2039 type_name = match.group(1)
2040 directive_class, messages = directives.directive(
2041 type_name, self.memo.language, self.document)
2042 self.parent += messages
2043 if directive_class:
2044 return self.run_directive(
2045 directive_class, match, type_name, option_presets)
2046 else:
2047 return self.unknown_directive(type_name)
2049 def run_directive(self, directive, match, type_name, option_presets):
2051 Parse a directive then run its directive function.
2053 Parameters:
2055 - `directive`: The class implementing the directive. Must be
2056 a subclass of `rst.Directive`.
2058 - `match`: A regular expression match object which matched the first
2059 line of the directive.
2061 - `type_name`: The directive name, as used in the source text.
2063 - `option_presets`: A dictionary of preset options, defaults for the
2064 directive options. Currently, only an "alt" option is passed by
2065 substitution definitions (value: the substitution name), which may
2066 be used by an embedded image directive.
2068 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2070 if isinstance(directive, (FunctionType, MethodType)):
2071 from docutils.parsers.rst import convert_directive_function
2072 directive = convert_directive_function(directive)
2073 lineno = self.state_machine.abs_line_number()
2074 src, srcline = self.state_machine.get_source_and_line()
2075 initial_line_offset = self.state_machine.line_offset
2076 indented, indent, line_offset, blank_finish \
2077 = self.state_machine.get_first_known_indented(match.end(),
2078 strip_top=0)
2079 block_text = '\n'.join(self.state_machine.input_lines[
2080 initial_line_offset : self.state_machine.line_offset + 1])
2081 try:
2082 arguments, options, content, content_offset = (
2083 self.parse_directive_block(indented, line_offset,
2084 directive, option_presets))
2085 except MarkupError, detail:
2086 error = self.reporter.error(
2087 'Error in "%s" directive:\n%s.' % (type_name,
2088 ' '.join(detail.args)),
2089 nodes.literal_block(block_text, block_text),
2090 source=src, line=srcline)
2091 return [error], blank_finish
2092 directive_instance = directive(
2093 type_name, arguments, options, content, lineno,
2094 content_offset, block_text, self, self.state_machine)
2095 try:
2096 result = directive_instance.run()
2097 except docutils.parsers.rst.DirectiveError, error:
2098 msg_node = self.reporter.system_message(error.level, error.msg,
2099 source=src, line=srcline)
2100 msg_node += nodes.literal_block(block_text, block_text)
2101 result = [msg_node]
2102 assert isinstance(result, list), \
2103 'Directive "%s" must return a list of nodes.' % type_name
2104 for i in range(len(result)):
2105 assert isinstance(result[i], nodes.Node), \
2106 ('Directive "%s" returned non-Node object (index %s): %r'
2107 % (type_name, i, result[i]))
2108 return (result,
2109 blank_finish or self.state_machine.is_next_line_blank())
2111 def parse_directive_block(self, indented, line_offset, directive,
2112 option_presets):
2113 option_spec = directive.option_spec
2114 has_content = directive.has_content
2115 if indented and not indented[0].strip():
2116 indented.trim_start()
2117 line_offset += 1
2118 while indented and not indented[-1].strip():
2119 indented.trim_end()
2120 if indented and (directive.required_arguments
2121 or directive.optional_arguments
2122 or option_spec):
2123 for i, line in enumerate(indented):
2124 if not line.strip():
2125 break
2126 else:
2127 i += 1
2128 arg_block = indented[:i]
2129 content = indented[i+1:]
2130 content_offset = line_offset + i + 1
2131 else:
2132 content = indented
2133 content_offset = line_offset
2134 arg_block = []
2135 if option_spec:
2136 options, arg_block = self.parse_directive_options(
2137 option_presets, option_spec, arg_block)
2138 else:
2139 options = {}
2140 if arg_block and not (directive.required_arguments
2141 or directive.optional_arguments):
2142 content = arg_block + indented[i:]
2143 content_offset = line_offset
2144 arg_block = []
2145 while content and not content[0].strip():
2146 content.trim_start()
2147 content_offset += 1
2148 if directive.required_arguments or directive.optional_arguments:
2149 arguments = self.parse_directive_arguments(
2150 directive, arg_block)
2151 else:
2152 arguments = []
2153 if content and not has_content:
2154 raise MarkupError('no content permitted')
2155 return (arguments, options, content, content_offset)
2157 def parse_directive_options(self, option_presets, option_spec, arg_block):
2158 options = option_presets.copy()
2159 for i in range(len(arg_block)):
2160 if arg_block[i][:1] == ':':
2161 opt_block = arg_block[i:]
2162 arg_block = arg_block[:i]
2163 break
2164 else:
2165 opt_block = []
2166 if opt_block:
2167 success, data = self.parse_extension_options(option_spec,
2168 opt_block)
2169 if success: # data is a dict of options
2170 options.update(data)
2171 else: # data is an error string
2172 raise MarkupError(data)
2173 return options, arg_block
2175 def parse_directive_arguments(self, directive, arg_block):
2176 required = directive.required_arguments
2177 optional = directive.optional_arguments
2178 arg_text = '\n'.join(arg_block)
2179 arguments = arg_text.split()
2180 if len(arguments) < required:
2181 raise MarkupError('%s argument(s) required, %s supplied'
2182 % (required, len(arguments)))
2183 elif len(arguments) > required + optional:
2184 if directive.final_argument_whitespace:
2185 arguments = arg_text.split(None, required + optional - 1)
2186 else:
2187 raise MarkupError(
2188 'maximum %s argument(s) allowed, %s supplied'
2189 % (required + optional, len(arguments)))
2190 return arguments
2192 def parse_extension_options(self, option_spec, datalines):
2194 Parse `datalines` for a field list containing extension options
2195 matching `option_spec`.
2197 :Parameters:
2198 - `option_spec`: a mapping of option name to conversion
2199 function, which should raise an exception on bad input.
2200 - `datalines`: a list of input strings.
2202 :Return:
2203 - Success value, 1 or 0.
2204 - An option dictionary on success, an error string on failure.
2206 node = nodes.field_list()
2207 newline_offset, blank_finish = self.nested_list_parse(
2208 datalines, 0, node, initial_state='ExtensionOptions',
2209 blank_finish=1)
2210 if newline_offset != len(datalines): # incomplete parse of block
2211 return 0, 'invalid option block'
2212 try:
2213 options = utils.extract_extension_options(node, option_spec)
2214 except KeyError, detail:
2215 return 0, ('unknown option: "%s"' % detail.args[0])
2216 except (ValueError, TypeError), detail:
2217 return 0, ('invalid option value: %s' % ' '.join(detail.args))
2218 except utils.ExtensionOptionError, detail:
2219 return 0, ('invalid option data: %s' % ' '.join(detail.args))
2220 if blank_finish:
2221 return 1, options
2222 else:
2223 return 0, 'option data incompletely parsed'
2225 def unknown_directive(self, type_name):
2226 src, srcline = self.state_machine.get_source_and_line()
2227 indented, indent, offset, blank_finish = \
2228 self.state_machine.get_first_known_indented(0, strip_indent=0)
2229 text = '\n'.join(indented)
2230 error = self.reporter.error(
2231 'Unknown directive type "%s".' % type_name,
2232 nodes.literal_block(text, text), source=src, line=srcline)
2233 return [error], blank_finish
2235 def comment(self, match):
2236 if not match.string[match.end():].strip() \
2237 and self.state_machine.is_next_line_blank(): # an empty comment?
2238 return [nodes.comment()], 1 # "A tiny but practical wart."
2239 indented, indent, offset, blank_finish = \
2240 self.state_machine.get_first_known_indented(match.end())
2241 while indented and not indented[-1].strip():
2242 indented.trim_end()
2243 text = '\n'.join(indented)
2244 return [nodes.comment(text, text)], blank_finish
2246 explicit.constructs = [
2247 (footnote,
2248 re.compile(r"""
2249 \.\.[ ]+ # explicit markup start
2251 ( # footnote label:
2252 [0-9]+ # manually numbered footnote
2253 | # *OR*
2254 \# # anonymous auto-numbered footnote
2255 | # *OR*
2256 \#%s # auto-number ed?) footnote label
2257 | # *OR*
2258 \* # auto-symbol footnote
2261 ([ ]+|$) # whitespace or end of line
2262 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2263 (citation,
2264 re.compile(r"""
2265 \.\.[ ]+ # explicit markup start
2266 \[(%s)\] # citation label
2267 ([ ]+|$) # whitespace or end of line
2268 """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2269 (hyperlink_target,
2270 re.compile(r"""
2271 \.\.[ ]+ # explicit markup start
2272 _ # target indicator
2273 (?![ ]|$) # first char. not space or EOL
2274 """, re.VERBOSE | re.UNICODE)),
2275 (substitution_def,
2276 re.compile(r"""
2277 \.\.[ ]+ # explicit markup start
2278 \| # substitution indicator
2279 (?![ ]|$) # first char. not space or EOL
2280 """, re.VERBOSE | re.UNICODE)),
2281 (directive,
2282 re.compile(r"""
2283 \.\.[ ]+ # explicit markup start
2284 (%s) # directive name
2285 [ ]? # optional space
2286 :: # directive delimiter
2287 ([ ]+|$) # whitespace or end of line
2288 """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2290 def explicit_markup(self, match, context, next_state):
2291 """Footnotes, hyperlink targets, directives, comments."""
2292 nodelist, blank_finish = self.explicit_construct(match)
2293 self.parent += nodelist
2294 self.explicit_list(blank_finish)
2295 return [], next_state, []
2297 def explicit_construct(self, match):
2298 """Determine which explicit construct this is, parse & return it."""
2299 errors = []
2300 for method, pattern in self.explicit.constructs:
2301 expmatch = pattern.match(match.string)
2302 if expmatch:
2303 try:
2304 return method(self, expmatch)
2305 except MarkupError, error: # never reached?
2306 message = ' '.join(error.args)
2307 src, srcline = self.state_machine.get_source_and_line()
2308 errors.append(self.reporter.warning(
2309 message, source=src, line=srcline))
2310 break
2311 nodelist, blank_finish = self.comment(match)
2312 return nodelist + errors, blank_finish
2314 def explicit_list(self, blank_finish):
2316 Create a nested state machine for a series of explicit markup
2317 constructs (including anonymous hyperlink targets).
2319 offset = self.state_machine.line_offset + 1 # next line
2320 newline_offset, blank_finish = self.nested_list_parse(
2321 self.state_machine.input_lines[offset:],
2322 input_offset=self.state_machine.abs_line_offset() + 1,
2323 node=self.parent, initial_state='Explicit',
2324 blank_finish=blank_finish,
2325 match_titles=self.state_machine.match_titles)
2326 self.goto_line(newline_offset)
2327 if not blank_finish:
2328 self.parent += self.unindent_warning('Explicit markup')
2330 def anonymous(self, match, context, next_state):
2331 """Anonymous hyperlink targets."""
2332 nodelist, blank_finish = self.anonymous_target(match)
2333 self.parent += nodelist
2334 self.explicit_list(blank_finish)
2335 return [], next_state, []
2337 def anonymous_target(self, match):
2338 lineno = self.state_machine.abs_line_number()
2339 block, indent, offset, blank_finish \
2340 = self.state_machine.get_first_known_indented(match.end(),
2341 until_blank=1)
2342 blocktext = match.string[:match.end()] + '\n'.join(block)
2343 block = [escape2null(line) for line in block]
2344 target = self.make_target(block, blocktext, lineno, '')
2345 return [target], blank_finish
2347 def line(self, match, context, next_state):
2348 """Section title overline or transition marker."""
2349 if self.state_machine.match_titles:
2350 return [match.string], 'Line', []
2351 elif match.string.strip() == '::':
2352 raise statemachine.TransitionCorrection('text')
2353 elif len(match.string.strip()) < 4:
2354 msg = self.reporter.info(
2355 'Unexpected possible title overline or transition.\n'
2356 "Treating it as ordinary text because it's so short.",
2357 line=self.state_machine.abs_line_number())
2358 self.parent += msg
2359 raise statemachine.TransitionCorrection('text')
2360 else:
2361 blocktext = self.state_machine.line
2362 msg = self.reporter.severe(
2363 'Unexpected section title or transition.',
2364 nodes.literal_block(blocktext, blocktext),
2365 line=self.state_machine.abs_line_number())
2366 self.parent += msg
2367 return [], next_state, []
2369 def text(self, match, context, next_state):
2370 """Titles, definition lists, paragraphs."""
2371 return [match.string], 'Text', []
2374 class RFC2822Body(Body):
2377 RFC2822 headers are only valid as the first constructs in documents. As
2378 soon as anything else appears, the `Body` state should take over.
2381 patterns = Body.patterns.copy() # can't modify the original
2382 patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2383 initial_transitions = [(name, 'Body')
2384 for name in Body.initial_transitions]
2385 initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2387 def rfc2822(self, match, context, next_state):
2388 """RFC2822-style field list item."""
2389 fieldlist = nodes.field_list(classes=['rfc2822'])
2390 self.parent += fieldlist
2391 field, blank_finish = self.rfc2822_field(match)
2392 fieldlist += field
2393 offset = self.state_machine.line_offset + 1 # next line
2394 newline_offset, blank_finish = self.nested_list_parse(
2395 self.state_machine.input_lines[offset:],
2396 input_offset=self.state_machine.abs_line_offset() + 1,
2397 node=fieldlist, initial_state='RFC2822List',
2398 blank_finish=blank_finish)
2399 self.goto_line(newline_offset)
2400 if not blank_finish:
2401 self.parent += self.unindent_warning(
2402 'RFC2822-style field list')
2403 return [], next_state, []
2405 def rfc2822_field(self, match):
2406 name = match.string[:match.string.find(':')]
2407 indented, indent, line_offset, blank_finish = \
2408 self.state_machine.get_first_known_indented(match.end(),
2409 until_blank=1)
2410 fieldnode = nodes.field()
2411 fieldnode += nodes.field_name(name, name)
2412 fieldbody = nodes.field_body('\n'.join(indented))
2413 fieldnode += fieldbody
2414 if indented:
2415 self.nested_parse(indented, input_offset=line_offset,
2416 node=fieldbody)
2417 return fieldnode, blank_finish
2420 class SpecializedBody(Body):
2423 Superclass for second and subsequent compound element members. Compound
2424 elements are lists and list-like constructs.
2426 All transition methods are disabled (redefined as `invalid_input`).
2427 Override individual methods in subclasses to re-enable.
2429 For example, once an initial bullet list item, say, is recognized, the
2430 `BulletList` subclass takes over, with a "bullet_list" node as its
2431 container. Upon encountering the initial bullet list item, `Body.bullet`
2432 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2433 starts up a nested parsing session with `BulletList` as the initial state.
2434 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2435 as only bullet list items are encountered, they are parsed and inserted
2436 into the container. The first construct which is *not* a bullet list item
2437 triggers the `invalid_input` method, which ends the nested parse and
2438 closes the container. `BulletList` needs to recognize input that is
2439 invalid in the context of a bullet list, which means everything *other
2440 than* bullet list items, so it inherits the transition list created in
2441 `Body`.
2444 def invalid_input(self, match=None, context=None, next_state=None):
2445 """Not a compound element member. Abort this state machine."""
2446 self.state_machine.previous_line() # back up so parent SM can reassess
2447 raise EOFError
2449 indent = invalid_input
2450 bullet = invalid_input
2451 enumerator = invalid_input
2452 field_marker = invalid_input
2453 option_marker = invalid_input
2454 doctest = invalid_input
2455 line_block = invalid_input
2456 grid_table_top = invalid_input
2457 simple_table_top = invalid_input
2458 explicit_markup = invalid_input
2459 anonymous = invalid_input
2460 line = invalid_input
2461 text = invalid_input
2464 class BulletList(SpecializedBody):
2466 """Second and subsequent bullet_list list_items."""
2468 def bullet(self, match, context, next_state):
2469 """Bullet list item."""
2470 if match.string[0] != self.parent['bullet']:
2471 # different bullet: new list
2472 self.invalid_input()
2473 listitem, blank_finish = self.list_item(match.end())
2474 self.parent += listitem
2475 self.blank_finish = blank_finish
2476 return [], next_state, []
2479 class DefinitionList(SpecializedBody):
2481 """Second and subsequent definition_list_items."""
2483 def text(self, match, context, next_state):
2484 """Definition lists."""
2485 return [match.string], 'Definition', []
2488 class EnumeratedList(SpecializedBody):
2490 """Second and subsequent enumerated_list list_items."""
2492 def enumerator(self, match, context, next_state):
2493 """Enumerated list item."""
2494 format, sequence, text, ordinal = self.parse_enumerator(
2495 match, self.parent['enumtype'])
2496 if ( format != self.format
2497 or (sequence != '#' and (sequence != self.parent['enumtype']
2498 or self.auto
2499 or ordinal != (self.lastordinal + 1)))
2500 or not self.is_enumerated_list_item(ordinal, sequence, format)):
2501 # different enumeration: new list
2502 self.invalid_input()
2503 if sequence == '#':
2504 self.auto = 1
2505 listitem, blank_finish = self.list_item(match.end())
2506 self.parent += listitem
2507 self.blank_finish = blank_finish
2508 self.lastordinal = ordinal
2509 return [], next_state, []
2512 class FieldList(SpecializedBody):
2514 """Second and subsequent field_list fields."""
2516 def field_marker(self, match, context, next_state):
2517 """Field list field."""
2518 field, blank_finish = self.field(match)
2519 self.parent += field
2520 self.blank_finish = blank_finish
2521 return [], next_state, []
2524 class OptionList(SpecializedBody):
2526 """Second and subsequent option_list option_list_items."""
2528 def option_marker(self, match, context, next_state):
2529 """Option list item."""
2530 try:
2531 option_list_item, blank_finish = self.option_list_item(match)
2532 except MarkupError:
2533 self.invalid_input()
2534 self.parent += option_list_item
2535 self.blank_finish = blank_finish
2536 return [], next_state, []
2539 class RFC2822List(SpecializedBody, RFC2822Body):
2541 """Second and subsequent RFC2822-style field_list fields."""
2543 patterns = RFC2822Body.patterns
2544 initial_transitions = RFC2822Body.initial_transitions
2546 def rfc2822(self, match, context, next_state):
2547 """RFC2822-style field list item."""
2548 field, blank_finish = self.rfc2822_field(match)
2549 self.parent += field
2550 self.blank_finish = blank_finish
2551 return [], 'RFC2822List', []
2553 blank = SpecializedBody.invalid_input
2556 class ExtensionOptions(FieldList):
2559 Parse field_list fields for extension options.
2561 No nested parsing is done (including inline markup parsing).
2564 def parse_field_body(self, indented, offset, node):
2565 """Override `Body.parse_field_body` for simpler parsing."""
2566 lines = []
2567 for line in list(indented) + ['']:
2568 if line.strip():
2569 lines.append(line)
2570 elif lines:
2571 text = '\n'.join(lines)
2572 node += nodes.paragraph(text, text)
2573 lines = []
2576 class LineBlock(SpecializedBody):
2578 """Second and subsequent lines of a line_block."""
2580 blank = SpecializedBody.invalid_input
2582 def line_block(self, match, context, next_state):
2583 """New line of line block."""
2584 lineno = self.state_machine.abs_line_number()
2585 line, messages, blank_finish = self.line_block_line(match, lineno)
2586 self.parent += line
2587 self.parent.parent += messages
2588 self.blank_finish = blank_finish
2589 return [], next_state, []
2592 class Explicit(SpecializedBody):
2594 """Second and subsequent explicit markup construct."""
2596 def explicit_markup(self, match, context, next_state):
2597 """Footnotes, hyperlink targets, directives, comments."""
2598 nodelist, blank_finish = self.explicit_construct(match)
2599 self.parent += nodelist
2600 self.blank_finish = blank_finish
2601 return [], next_state, []
2603 def anonymous(self, match, context, next_state):
2604 """Anonymous hyperlink targets."""
2605 nodelist, blank_finish = self.anonymous_target(match)
2606 self.parent += nodelist
2607 self.blank_finish = blank_finish
2608 return [], next_state, []
2610 blank = SpecializedBody.invalid_input
2613 class SubstitutionDef(Body):
2616 Parser for the contents of a substitution_definition element.
2619 patterns = {
2620 'embedded_directive': re.compile(r'(%s)::( +|$)'
2621 % Inliner.simplename, re.UNICODE),
2622 'text': r''}
2623 initial_transitions = ['embedded_directive', 'text']
2625 def embedded_directive(self, match, context, next_state):
2626 nodelist, blank_finish = self.directive(match,
2627 alt=self.parent['names'][0])
2628 self.parent += nodelist
2629 if not self.state_machine.at_eof():
2630 self.blank_finish = blank_finish
2631 raise EOFError
2633 def text(self, match, context, next_state):
2634 if not self.state_machine.at_eof():
2635 self.blank_finish = self.state_machine.is_next_line_blank()
2636 raise EOFError
2639 class Text(RSTState):
2642 Classifier of second line of a text block.
2644 Could be a paragraph, a definition list item, or a title.
2647 patterns = {'underline': Body.patterns['line'],
2648 'text': r''}
2649 initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2651 def blank(self, match, context, next_state):
2652 """End of paragraph."""
2653 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2654 paragraph, literalnext = self.paragraph(
2655 context, self.state_machine.abs_line_number() - 1)
2656 self.parent += paragraph
2657 if literalnext:
2658 self.parent += self.literal_block()
2659 return [], 'Body', []
2661 def eof(self, context):
2662 if context:
2663 self.blank(None, context, None)
2664 return []
2666 def indent(self, match, context, next_state):
2667 """Definition list item."""
2668 definitionlist = nodes.definition_list()
2669 definitionlistitem, blank_finish = self.definition_list_item(context)
2670 definitionlist += definitionlistitem
2671 self.parent += definitionlist
2672 offset = self.state_machine.line_offset + 1 # next line
2673 newline_offset, blank_finish = self.nested_list_parse(
2674 self.state_machine.input_lines[offset:],
2675 input_offset=self.state_machine.abs_line_offset() + 1,
2676 node=definitionlist, initial_state='DefinitionList',
2677 blank_finish=blank_finish, blank_finish_state='Definition')
2678 self.goto_line(newline_offset)
2679 if not blank_finish:
2680 self.parent += self.unindent_warning('Definition list')
2681 return [], 'Body', []
2683 def underline(self, match, context, next_state):
2684 """Section title."""
2685 lineno = self.state_machine.abs_line_number()
2686 src, srcline = self.state_machine.get_source_and_line()
2687 title = context[0].rstrip()
2688 underline = match.string.rstrip()
2689 source = title + '\n' + underline
2690 messages = []
2691 if column_width(title) > len(underline):
2692 if len(underline) < 4:
2693 if self.state_machine.match_titles:
2694 msg = self.reporter.info(
2695 'Possible title underline, too short for the title.\n'
2696 "Treating it as ordinary text because it's so short.",
2697 source=src, line=srcline)
2698 self.parent += msg
2699 raise statemachine.TransitionCorrection('text')
2700 else:
2701 blocktext = context[0] + '\n' + self.state_machine.line
2702 msg = self.reporter.warning(
2703 'Title underline too short.',
2704 nodes.literal_block(blocktext, blocktext),
2705 source=src, line=srcline)
2706 messages.append(msg)
2707 if not self.state_machine.match_titles:
2708 blocktext = context[0] + '\n' + self.state_machine.line
2709 msg = self.reporter.severe(
2710 'Unexpected section title.',
2711 nodes.literal_block(blocktext, blocktext),
2712 source=src, line=srcline)
2713 self.parent += messages
2714 self.parent += msg
2715 return [], next_state, []
2716 style = underline[0]
2717 context[:] = []
2718 self.section(title, source, style, lineno - 1, messages)
2719 return [], next_state, []
2721 def text(self, match, context, next_state):
2722 """Paragraph."""
2723 startline = self.state_machine.abs_line_number() - 1
2724 msg = None
2725 try:
2726 block = self.state_machine.get_text_block(flush_left=1)
2727 except statemachine.UnexpectedIndentationError, instance:
2728 block, src, srcline = instance.args
2729 msg = self.reporter.error('Unexpected indentation.',
2730 source=src, line=srcline)
2731 lines = context + list(block)
2732 paragraph, literalnext = self.paragraph(lines, startline)
2733 self.parent += paragraph
2734 self.parent += msg
2735 if literalnext:
2736 try:
2737 self.state_machine.next_line()
2738 except EOFError:
2739 pass
2740 self.parent += self.literal_block()
2741 return [], next_state, []
2743 def literal_block(self):
2744 """Return a list of nodes."""
2745 indented, indent, offset, blank_finish = \
2746 self.state_machine.get_indented()
2747 while indented and not indented[-1].strip():
2748 indented.trim_end()
2749 if not indented:
2750 return self.quoted_literal_block()
2751 data = '\n'.join(indented)
2752 literal_block = nodes.literal_block(data, data)
2753 literal_block.line = offset + 1
2754 nodelist = [literal_block]
2755 if not blank_finish:
2756 nodelist.append(self.unindent_warning('Literal block'))
2757 return nodelist
2759 def quoted_literal_block(self):
2760 abs_line_offset = self.state_machine.abs_line_offset()
2761 offset = self.state_machine.line_offset
2762 parent_node = nodes.Element()
2763 new_abs_offset = self.nested_parse(
2764 self.state_machine.input_lines[offset:],
2765 input_offset=abs_line_offset, node=parent_node, match_titles=0,
2766 state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2767 'initial_state': 'QuotedLiteralBlock'})
2768 self.goto_line(new_abs_offset)
2769 return parent_node.children
2771 def definition_list_item(self, termline):
2772 indented, indent, line_offset, blank_finish = \
2773 self.state_machine.get_indented()
2774 definitionlistitem = nodes.definition_list_item(
2775 '\n'.join(termline + list(indented)))
2776 lineno = self.state_machine.abs_line_number() - 1
2777 src, srcline = self.state_machine.get_source_and_line()
2778 definitionlistitem.source = src
2779 definitionlistitem.line = srcline - 1
2780 termlist, messages = self.term(termline, lineno)
2781 definitionlistitem += termlist
2782 definition = nodes.definition('', *messages)
2783 definitionlistitem += definition
2784 if termline[0][-2:] == '::':
2785 definition += self.reporter.info(
2786 'Blank line missing before literal block (after the "::")? '
2787 'Interpreted as a definition list item.',
2788 source=src, line=srcline)
2789 self.nested_parse(indented, input_offset=line_offset, node=definition)
2790 return definitionlistitem, blank_finish
2792 classifier_delimiter = re.compile(' +: +')
2794 def term(self, lines, lineno):
2795 """Return a definition_list's term and optional classifiers."""
2796 assert len(lines) == 1
2797 text_nodes, messages = self.inline_text(lines[0], lineno)
2798 term_node = nodes.term()
2799 node_list = [term_node]
2800 for i in range(len(text_nodes)):
2801 node = text_nodes[i]
2802 if isinstance(node, nodes.Text):
2803 parts = self.classifier_delimiter.split(node.rawsource)
2804 if len(parts) == 1:
2805 node_list[-1] += node
2806 else:
2808 node_list[-1] += nodes.Text(parts[0].rstrip())
2809 for part in parts[1:]:
2810 classifier_node = nodes.classifier('', part)
2811 node_list.append(classifier_node)
2812 else:
2813 node_list[-1] += node
2814 return node_list, messages
2817 class SpecializedText(Text):
2820 Superclass for second and subsequent lines of Text-variants.
2822 All transition methods are disabled. Override individual methods in
2823 subclasses to re-enable.
2826 def eof(self, context):
2827 """Incomplete construct."""
2828 return []
2830 def invalid_input(self, match=None, context=None, next_state=None):
2831 """Not a compound element member. Abort this state machine."""
2832 raise EOFError
2834 blank = invalid_input
2835 indent = invalid_input
2836 underline = invalid_input
2837 text = invalid_input
2840 class Definition(SpecializedText):
2842 """Second line of potential definition_list_item."""
2844 def eof(self, context):
2845 """Not a definition."""
2846 self.state_machine.previous_line(2) # so parent SM can reassess
2847 return []
2849 def indent(self, match, context, next_state):
2850 """Definition list item."""
2851 definitionlistitem, blank_finish = self.definition_list_item(context)
2852 self.parent += definitionlistitem
2853 self.blank_finish = blank_finish
2854 return [], 'DefinitionList', []
2857 class Line(SpecializedText):
2860 Second line of over- & underlined section title or transition marker.
2863 eofcheck = 1 # @@@ ???
2864 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2866 def eof(self, context):
2867 """Transition marker at end of section or document."""
2868 marker = context[0].strip()
2869 if self.memo.section_bubble_up_kludge:
2870 self.memo.section_bubble_up_kludge = 0
2871 elif len(marker) < 4:
2872 self.state_correction(context)
2873 if self.eofcheck: # ignore EOFError with sections
2874 lineno = self.state_machine.abs_line_number() - 1
2875 transition = nodes.transition(rawsource=context[0])
2876 transition.line = lineno
2877 self.parent += transition
2878 self.eofcheck = 1
2879 return []
2881 def blank(self, match, context, next_state):
2882 """Transition marker."""
2883 src, srcline = self.state_machine.get_source_and_line()
2884 marker = context[0].strip()
2885 if len(marker) < 4:
2886 self.state_correction(context)
2887 transition = nodes.transition(rawsource=marker)
2888 transition.source = src
2889 transition.line = srcline - 1
2890 self.parent += transition
2891 return [], 'Body', []
2893 def text(self, match, context, next_state):
2894 """Potential over- & underlined title."""
2895 lineno = self.state_machine.abs_line_number() - 1
2896 src, srcline = self.state_machine.get_source_and_line()
2897 overline = context[0]
2898 title = match.string
2899 underline = ''
2900 try:
2901 underline = self.state_machine.next_line()
2902 except EOFError:
2903 blocktext = overline + '\n' + title
2904 if len(overline.rstrip()) < 4:
2905 self.short_overline(context, blocktext, lineno, 2)
2906 else:
2907 msg = self.reporter.severe(
2908 'Incomplete section title.',
2909 nodes.literal_block(blocktext, blocktext),
2910 source=src, line=srcline-1)
2911 self.parent += msg
2912 return [], 'Body', []
2913 source = '%s\n%s\n%s' % (overline, title, underline)
2914 overline = overline.rstrip()
2915 underline = underline.rstrip()
2916 if not self.transitions['underline'][0].match(underline):
2917 blocktext = overline + '\n' + title + '\n' + underline
2918 if len(overline.rstrip()) < 4:
2919 self.short_overline(context, blocktext, lineno, 2)
2920 else:
2921 msg = self.reporter.severe(
2922 'Missing matching underline for section title overline.',
2923 nodes.literal_block(source, source),
2924 source=src, line=srcline-1)
2925 self.parent += msg
2926 return [], 'Body', []
2927 elif overline != underline:
2928 blocktext = overline + '\n' + title + '\n' + underline
2929 if len(overline.rstrip()) < 4:
2930 self.short_overline(context, blocktext, lineno, 2)
2931 else:
2932 msg = self.reporter.severe(
2933 'Title overline & underline mismatch.',
2934 nodes.literal_block(source, source),
2935 source=src, line=srcline-1)
2936 self.parent += msg
2937 return [], 'Body', []
2938 title = title.rstrip()
2939 messages = []
2940 if column_width(title) > len(overline):
2941 blocktext = overline + '\n' + title + '\n' + underline
2942 if len(overline.rstrip()) < 4:
2943 self.short_overline(context, blocktext, lineno, 2)
2944 else:
2945 msg = self.reporter.warning(
2946 'Title overline too short.',
2947 nodes.literal_block(source, source),
2948 source=src, line=srcline-1)
2949 messages.append(msg)
2950 style = (overline[0], underline[0])
2951 self.eofcheck = 0 # @@@ not sure this is correct
2952 self.section(title.lstrip(), source, style, lineno + 1, messages)
2953 self.eofcheck = 1
2954 return [], 'Body', []
2956 indent = text # indented title
2958 def underline(self, match, context, next_state):
2959 overline = context[0]
2960 blocktext = overline + '\n' + self.state_machine.line
2961 lineno = self.state_machine.abs_line_number() - 1
2962 src, srcline = self.state_machine.get_source_and_line()
2963 if len(overline.rstrip()) < 4:
2964 self.short_overline(context, blocktext, lineno, 1)
2965 msg = self.reporter.error(
2966 'Invalid section title or transition marker.',
2967 nodes.literal_block(blocktext, blocktext),
2968 source=src, line=srcline-1)
2969 self.parent += msg
2970 return [], 'Body', []
2972 def short_overline(self, context, blocktext, lineno, lines=1):
2973 src, srcline = self.state_machine.get_source_and_line(lineno)
2974 msg = self.reporter.info(
2975 'Possible incomplete section title.\nTreating the overline as '
2976 "ordinary text because it's so short.",
2977 source=src, line=srcline)
2978 self.parent += msg
2979 self.state_correction(context, lines)
2981 def state_correction(self, context, lines=1):
2982 self.state_machine.previous_line(lines)
2983 context[:] = []
2984 raise statemachine.StateCorrection('Body', 'text')
2987 class QuotedLiteralBlock(RSTState):
2990 Nested parse handler for quoted (unindented) literal blocks.
2992 Special-purpose. Not for inclusion in `state_classes`.
2995 patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
2996 'text': r''}
2997 initial_transitions = ('initial_quoted', 'text')
2999 def __init__(self, state_machine, debug=0):
3000 RSTState.__init__(self, state_machine, debug)
3001 self.messages = []
3002 self.initial_lineno = None
3004 def blank(self, match, context, next_state):
3005 if context:
3006 raise EOFError
3007 else:
3008 return context, next_state, []
3010 def eof(self, context):
3011 if context:
3012 src, srcline = self.state_machine.get_source_and_line(
3013 self.initial_lineno)
3014 text = '\n'.join(context)
3015 literal_block = nodes.literal_block(text, text)
3016 literal_block.source = src
3017 literal_block.line = srcline
3018 self.parent += literal_block
3019 else:
3020 self.parent += self.reporter.warning(
3021 'Literal block expected; none found.',
3022 line=self.state_machine.abs_line_number())
3023 # src not available, because statemachine.input_lines is empty
3024 self.state_machine.previous_line()
3025 self.parent += self.messages
3026 return []
3028 def indent(self, match, context, next_state):
3029 assert context, ('QuotedLiteralBlock.indent: context should not '
3030 'be empty!')
3031 self.messages.append(
3032 self.reporter.error('Unexpected indentation.',
3033 line=self.state_machine.abs_line_number()))
3034 self.state_machine.previous_line()
3035 raise EOFError
3037 def initial_quoted(self, match, context, next_state):
3038 """Match arbitrary quote character on the first line only."""
3039 self.remove_transition('initial_quoted')
3040 quote = match.string[0]
3041 pattern = re.compile(re.escape(quote), re.UNICODE)
3042 # New transition matches consistent quotes only:
3043 self.add_transition('quoted',
3044 (pattern, self.quoted, self.__class__.__name__))
3045 self.initial_lineno = self.state_machine.abs_line_number()
3046 return [match.string], next_state, []
3048 def quoted(self, match, context, next_state):
3049 """Match consistent quotes on subsequent lines."""
3050 context.append(match.string)
3051 return context, next_state, []
3053 def text(self, match, context, next_state):
3054 if context:
3055 src, srcline = self.state_machine.get_source_and_line()
3056 self.messages.append(
3057 self.reporter.error('Inconsistent literal block quoting.',
3058 source=src, line=srcline))
3059 self.state_machine.previous_line()
3060 raise EOFError
3063 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
3064 OptionList, LineBlock, ExtensionOptions, Explicit, Text,
3065 Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
3066 """Standard set of State classes used to start `RSTStateMachine`."""