docutils/parsers/rst/states.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 This is the ``docutils.parsers.restructuredtext.states`` module, the core of
   7 the reStructuredText parser.  It defines the following:
   8
   9 :Classes:
  10     - `RSTStateMachine`: reStructuredText parser's entry point.
  11     - `NestedStateMachine`: recursive StateMachine.
  12     - `RSTState`: reStructuredText State superclass.
  13     - `Inliner`: For parsing inline markup.
  14     - `Body`: Generic classifier of the first line of a block.
  15     - `SpecializedBody`: Superclass for compound element members.
  16     - `BulletList`: Second and subsequent bullet_list list_items
  17     - `DefinitionList`: Second+ definition_list_items.
  18     - `EnumeratedList`: Second+ enumerated_list list_items.
  19     - `FieldList`: Second+ fields.
  20     - `OptionList`: Second+ option_list_items.
  21     - `RFC2822List`: Second+ RFC2822-style fields.
  22     - `ExtensionOptions`: Parses directive option fields.
  23     - `Explicit`: Second+ explicit markup constructs.
  24     - `SubstitutionDef`: For embedded directives in substitution definitions.
  25     - `Text`: Classifier of second line of a text block.
  26     - `SpecializedText`: Superclass for continuation lines of Text-variants.
  27     - `Definition`: Second line of potential definition_list_item.
  28     - `Line`: Second line of overlined section title or transition marker.
  29     - `Struct`: An auxiliary collection class.
  30
  31 :Exception classes:
  32     - `MarkupError`
  33     - `ParserError`
  34     - `MarkupMismatch`
  35
  36 :Functions:
  37     - `escape2null()`: Return a string, escape-backslashes converted to nulls.
  38     - `unescape()`: Return a string, nulls removed or restored to backslashes.
  39
  40 :Attributes:
  41     - `state_classes`: set of State classes used with `RSTStateMachine`.
  42
  43 Parser Overview
  44 ===============
  45
  46 The reStructuredText parser is implemented as a recursive state machine,
  47 examining its input one line at a time.  To understand how the parser works,
  48 please first become familiar with the `docutils.statemachine` module.  In the
  49 description below, references are made to classes defined in this module;
  50 please see the individual classes for details.
  51
  52 Parsing proceeds as follows:
  53
  54 1. The state machine examines each line of input, checking each of the
  55    transition patterns of the state `Body`, in order, looking for a match.
  56    The implicit transitions (blank lines and indentation) are checked before
  57    any others.  The 'text' transition is a catch-all (matches anything).
  58
  59 2. The method associated with the matched transition pattern is called.
  60
  61    A. Some transition methods are self-contained, appending elements to the
  62       document tree (`Body.doctest` parses a doctest block).  The parser's
  63       current line index is advanced to the end of the element, and parsing
  64       continues with step 1.
  65
  66    B. Other transition methods trigger the creation of a nested state machine,
  67       whose job is to parse a compound construct ('indent' does a block quote,
  68       'bullet' does a bullet list, 'overline' does a section [first checking
  69       for a valid section header], etc.).
  70
  71       - In the case of lists and explicit markup, a one-off state machine is
  72         created and run to parse contents of the first item.
  73
  74       - A new state machine is created and its initial state is set to the
  75         appropriate specialized state (`BulletList` in the case of the
  76         'bullet' transition; see `SpecializedBody` for more detail).  This
  77         state machine is run to parse the compound element (or series of
  78         explicit markup elements), and returns as soon as a non-member element
  79         is encountered.  For example, the `BulletList` state machine ends as
  80         soon as it encounters an element which is not a list item of that
  81         bullet list.  The optional omission of inter-element blank lines is
  82         enabled by this nested state machine.
  83
  84       - The current line index is advanced to the end of the elements parsed,
  85         and parsing continues with step 1.
  86
  87    C. The result of the 'text' transition depends on the next line of text.
  88       The current state is changed to `Text`, under which the second line is
  89       examined.  If the second line is:
  90
  91       - Indented: The element is a definition list item, and parsing proceeds
  92         similarly to step 2.B, using the `DefinitionList` state.
  93
  94       - A line of uniform punctuation characters: The element is a section
  95         header; again, parsing proceeds as in step 2.B, and `Body` is still
  96         used.
  97
  98       - Anything else: The element is a paragraph, which is examined for
  99         inline markup and appended to the parent element.  Processing
 100         continues with step 1.
 101 """
 102
 103 __docformat__ = 'reStructuredText'
 104
 105
 106 import sys
 107 import re
 108 import roman
 109 from types import FunctionType, MethodType
 110 from docutils import nodes, statemachine, utils, urischemes
 111 from docutils import ApplicationError, DataError
 112 from docutils.statemachine import StateMachineWS, StateWS
 113 from docutils.nodes import fully_normalize_name as normalize_name
 114 from docutils.nodes import whitespace_normalize_name
 115 from docutils.utils import escape2null, unescape, column_width
 116 import docutils.parsers.rst
 117 from docutils.parsers.rst import directives, languages, tableparser, roles
 118 from docutils.parsers.rst.languages import en as _fallback_language_module
 119
 120
 121 class MarkupError(DataError): pass
 122 class UnknownInterpretedRoleError(DataError): pass
 123 class InterpretedRoleNotImplementedError(DataError): pass
 124 class ParserError(ApplicationError): pass
 125 class MarkupMismatch(Exception): pass
 126
 127
 128 class Struct:
 129
 130     """Stores data attributes for dotted-attribute access."""
 131
 132     def __init__(self, **keywordargs):
 133         self.__dict__.update(keywordargs)
 134
 135
 136 class RSTStateMachine(StateMachineWS):
 137
 138     """
 139     reStructuredText's master StateMachine.
 140
 141     The entry point to reStructuredText parsing is the `run()` method.
 142     """
 143
 144     def run(self, input_lines, document, input_offset=0, match_titles=1,
 145             inliner=None):
 146         """
 147         Parse `input_lines` and modify the `document` node in place.
 148
 149         Extend `StateMachineWS.run()`: set up parse-global data and
 150         run the StateMachine.
 151         """
 152         self.language = languages.get_language(
 153             document.settings.language_code)
 154         self.match_titles = match_titles
 155         if inliner is None:
 156             inliner = Inliner()
 157         inliner.init_customizations(document.settings)
 158         self.memo = Struct(document=document,
 159                            reporter=document.reporter,
 160                            language=self.language,
 161                            title_styles=[],
 162                            section_level=0,
 163                            section_bubble_up_kludge=0,
 164                            inliner=inliner)
 165         self.document = document
 166         self.attach_observer(document.note_source)
 167         self.reporter = self.memo.reporter
 168         self.node = document
 169         results = StateMachineWS.run(self, input_lines, input_offset,
 170                                      input_source=document['source'])
 171         assert results == [], 'RSTStateMachine.run() results should be empty!'
 172         self.node = self.memo = None    # remove unneeded references
 173
 174
 175 class NestedStateMachine(StateMachineWS):
 176
 177     """
 178     StateMachine run from within other StateMachine runs, to parse nested
 179     document structures.
 180     """
 181
 182     def run(self, input_lines, input_offset, memo, node, match_titles=1):
 183         """
 184         Parse `input_lines` and populate a `docutils.nodes.document` instance.
 185
 186         Extend `StateMachineWS.run()`: set up document-wide data.
 187         """
 188         self.match_titles = match_titles
 189         self.memo = memo
 190         self.document = memo.document
 191         self.attach_observer(self.document.note_source)
 192         self.reporter = memo.reporter
 193         self.language = memo.language
 194         self.node = node
 195         results = StateMachineWS.run(self, input_lines, input_offset)
 196         assert results == [], ('NestedStateMachine.run() results should be '
 197                                'empty!')
 198         return results
 199
 200
 201 class RSTState(StateWS):
 202
 203     """
 204     reStructuredText State superclass.
 205
 206     Contains methods used by all State subclasses.
 207     """
 208
 209     nested_sm = NestedStateMachine
 210
 211     def __init__(self, state_machine, debug=0):
 212         self.nested_sm_kwargs = {'state_classes': state_classes,
 213                                  'initial_state': 'Body'}
 214         StateWS.__init__(self, state_machine, debug)
 215
 216     def runtime_init(self):
 217         StateWS.runtime_init(self)
 218         memo = self.state_machine.memo
 219         self.memo = memo
 220         self.reporter = memo.reporter
 221         self.inliner = memo.inliner
 222         self.document = memo.document
 223         self.parent = self.state_machine.node
 224
 225     def goto_line(self, abs_line_offset):
 226         """
 227         Jump to input line `abs_line_offset`, ignoring jumps past the end.
 228         """
 229         try:
 230             self.state_machine.goto_line(abs_line_offset)
 231         except EOFError:
 232             pass
 233
 234     def no_match(self, context, transitions):
 235         """
 236         Override `StateWS.no_match` to generate a system message.
 237
 238         This code should never be run.
 239         """
 240         self.reporter.severe(
 241             'Internal error: no transition pattern match.  State: "%s"; '
 242             'transitions: %s; context: %s; current line: %r.'
 243             % (self.__class__.__name__, transitions, context,
 244                self.state_machine.line),
 245             line=self.state_machine.abs_line_number())
 246         return context, None, []
 247
 248     def bof(self, context):
 249         """Called at beginning of file."""
 250         return [], []
 251
 252     def nested_parse(self, block, input_offset, node, match_titles=0,
 253                      state_machine_class=None, state_machine_kwargs=None):
 254         """
 255         Create a new StateMachine rooted at `node` and run it over the input
 256         `block`.
 257         """
 258         if state_machine_class is None:
 259             state_machine_class = self.nested_sm
 260         if state_machine_kwargs is None:
 261             state_machine_kwargs = self.nested_sm_kwargs
 262         block_length = len(block)
 263         state_machine = state_machine_class(debug=self.debug,
 264                                             **state_machine_kwargs)
 265         state_machine.run(block, input_offset, memo=self.memo,
 266                           node=node, match_titles=match_titles)
 267         state_machine.unlink()
 268         new_offset = state_machine.abs_line_offset()
 269         # No `block.parent` implies disconnected -- lines aren't in sync:
 270         if block.parent and (len(block) - block_length) != 0:
 271             # Adjustment for block if modified in nested parse:
 272             self.state_machine.next_line(len(block) - block_length)
 273         return new_offset
 274
 275     def nested_list_parse(self, block, input_offset, node, initial_state,
 276                           blank_finish,
 277                           blank_finish_state=None,
 278                           extra_settings={},
 279                           match_titles=0,
 280                           state_machine_class=None,
 281                           state_machine_kwargs=None):
 282         """
 283         Create a new StateMachine rooted at `node` and run it over the input
 284         `block`. Also keep track of optional intermediate blank lines and the
 285         required final one.
 286         """
 287         if state_machine_class is None:
 288             state_machine_class = self.nested_sm
 289         if state_machine_kwargs is None:
 290             state_machine_kwargs = self.nested_sm_kwargs.copy()
 291         state_machine_kwargs['initial_state'] = initial_state
 292         state_machine = state_machine_class(debug=self.debug,
 293                                             **state_machine_kwargs)
 294         if blank_finish_state is None:
 295             blank_finish_state = initial_state
 296         state_machine.states[blank_finish_state].blank_finish = blank_finish
 297         for key, value in extra_settings.items():
 298             setattr(state_machine.states[initial_state], key, value)
 299         state_machine.run(block, input_offset, memo=self.memo,
 300                           node=node, match_titles=match_titles)
 301         blank_finish = state_machine.states[blank_finish_state].blank_finish
 302         state_machine.unlink()
 303         return state_machine.abs_line_offset(), blank_finish
 304
 305     def section(self, title, source, style, lineno, messages):
 306         """Check for a valid subsection and create one if it checks out."""
 307         if self.check_subsection(source, style, lineno):
 308             self.new_subsection(title, lineno, messages)
 309
 310     def check_subsection(self, source, style, lineno):
 311         """
 312         Check for a valid subsection header.  Return 1 (true) or None (false).
 313
 314         When a new section is reached that isn't a subsection of the current
 315         section, back up the line count (use ``previous_line(-x)``), then
 316         ``raise EOFError``.  The current StateMachine will finish, then the
 317         calling StateMachine can re-examine the title.  This will work its way
 318         back up the calling chain until the correct section level isreached.
 319
 320         @@@ Alternative: Evaluate the title, store the title info & level, and
 321         back up the chain until that level is reached.  Store in memo? Or
 322         return in results?
 323
 324         :Exception: `EOFError` when a sibling or supersection encountered.
 325         """
 326         memo = self.memo
 327         title_styles = memo.title_styles
 328         mylevel = memo.section_level
 329         try:                            # check for existing title style
 330             level = title_styles.index(style) + 1
 331         except ValueError:              # new title style
 332             if len(title_styles) == memo.section_level: # new subsection
 333                 title_styles.append(style)
 334                 return 1
 335             else:                       # not at lowest level
 336                 self.parent += self.title_inconsistent(source, lineno)
 337                 return None
 338         if level <= mylevel:            # sibling or supersection
 339             memo.section_level = level   # bubble up to parent section
 340             if len(style) == 2:
 341                 memo.section_bubble_up_kludge = 1
 342             # back up 2 lines for underline title, 3 for overline title
 343             self.state_machine.previous_line(len(style) + 1)
 344             raise EOFError              # let parent section re-evaluate
 345         if level == mylevel + 1:        # immediate subsection
 346             return 1
 347         else:                           # invalid subsection
 348             self.parent += self.title_inconsistent(source, lineno)
 349             return None
 350
 351     def title_inconsistent(self, sourcetext, lineno):
 352         error = self.reporter.severe(
 353             'Title level inconsistent:', nodes.literal_block('', sourcetext),
 354             line=lineno)
 355         return error
 356
 357     def new_subsection(self, title, lineno, messages):
 358         """Append new subsection to document tree. On return, check level."""
 359         memo = self.memo
 360         mylevel = memo.section_level
 361         memo.section_level += 1
 362         section_node = nodes.section()
 363         self.parent += section_node
 364         textnodes, title_messages = self.inline_text(title, lineno)
 365         titlenode = nodes.title(title, '', *textnodes)
 366         name = normalize_name(titlenode.astext())
 367         section_node['names'].append(name)
 368         section_node += titlenode
 369         section_node += messages
 370         section_node += title_messages
 371         self.document.note_implicit_target(section_node, section_node)
 372         offset = self.state_machine.line_offset + 1
 373         absoffset = self.state_machine.abs_line_offset() + 1
 374         newabsoffset = self.nested_parse(
 375               self.state_machine.input_lines[offset:], input_offset=absoffset,
 376               node=section_node, match_titles=1)
 377         self.goto_line(newabsoffset)
 378         if memo.section_level <= mylevel: # can't handle next section?
 379             raise EOFError              # bubble up to supersection
 380         # reset section_level; next pass will detect it properly
 381         memo.section_level = mylevel
 382
 383     def paragraph(self, lines, lineno):
 384         """
 385         Return a list (paragraph & messages) & a boolean: literal_block next?
 386         """
 387         data = '\n'.join(lines).rstrip()
 388         if re.search(r'(?<!\\)(\\\\)*::$', data):
 389             if len(data) == 2:
 390                 return [], 1
 391             elif data[-3] in ' \n':
 392                 text = data[:-3].rstrip()
 393             else:
 394                 text = data[:-1]
 395             literalnext = 1
 396         else:
 397             text = data
 398             literalnext = 0
 399         textnodes, messages = self.inline_text(text, lineno)
 400         p = nodes.paragraph(data, '', *textnodes)
 401         p.line = lineno
 402         return [p] + messages, literalnext
 403
 404     def inline_text(self, text, lineno):
 405         """
 406         Return 2 lists: nodes (text and inline elements), and system_messages.
 407         """
 408         return self.inliner.parse(text, lineno, self.memo, self.parent)
 409
 410     def unindent_warning(self, node_name):
 411         return self.reporter.warning(
 412             '%s ends without a blank line; unexpected unindent.' % node_name,
 413             line=(self.state_machine.abs_line_number() + 1))
 414
 415
 416 def build_regexp(definition, compile=1):
 417     """
 418     Build, compile and return a regular expression based on `definition`.
 419
 420     :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
 421         where "parts" is a list of regular expressions and/or regular
 422         expression definitions to be joined into an or-group.
 423     """
 424     name, prefix, suffix, parts = definition
 425     part_strings = []
 426     for part in parts:
 427         if type(part) is tuple:
 428             part_strings.append(build_regexp(part, None))
 429         else:
 430             part_strings.append(part)
 431     or_group = '|'.join(part_strings)
 432     regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
 433     if compile:
 434         return re.compile(regexp, re.UNICODE)
 435     else:
 436         return regexp
 437
 438
 439 class Inliner:
 440
 441     """
 442     Parse inline markup; call the `parse()` method.
 443     """
 444
 445     def __init__(self):
 446         self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
 447         """List of (pattern, bound method) tuples, used by
 448         `self.implicit_inline`."""
 449
 450     def init_customizations(self, settings):
 451         """Setting-based customizations; run when parsing begins."""
 452         if settings.pep_references:
 453             self.implicit_dispatch.append((self.patterns.pep,
 454                                            self.pep_reference))
 455         if settings.rfc_references:
 456             self.implicit_dispatch.append((self.patterns.rfc,
 457                                            self.rfc_reference))
 458
 459     def parse(self, text, lineno, memo, parent):
 460         # Needs to be refactored for nested inline markup.
 461         # Add nested_parse() method?
 462         """
 463         Return 2 lists: nodes (text and inline elements), and system_messages.
 464
 465         Using `self.patterns.initial`, a pattern which matches start-strings
 466         (emphasis, strong, interpreted, phrase reference, literal,
 467         substitution reference, and inline target) and complete constructs
 468         (simple reference, footnote reference), search for a candidate.  When
 469         one is found, check for validity (e.g., not a quoted '*' character).
 470         If valid, search for the corresponding end string if applicable, and
 471         check it for validity.  If not found or invalid, generate a warning
 472         and ignore the start-string.  Implicit inline markup (e.g. standalone
 473         URIs) is found last.
 474         """
 475         self.reporter = memo.reporter
 476         self.document = memo.document
 477         self.language = memo.language
 478         self.parent = parent
 479         pattern_search = self.patterns.initial.search
 480         dispatch = self.dispatch
 481         remaining = escape2null(text)
 482         processed = []
 483         unprocessed = []
 484         messages = []
 485         while remaining:
 486             match = pattern_search(remaining)
 487             if match:
 488                 groups = match.groupdict()
 489                 method = dispatch[groups['start'] or groups['backquote']
 490                                   or groups['refend'] or groups['fnend']]
 491                 before, inlines, remaining, sysmessages = method(self, match,
 492                                                                  lineno)
 493                 unprocessed.append(before)
 494                 messages += sysmessages
 495                 if inlines:
 496                     processed += self.implicit_inline(''.join(unprocessed),
 497                                                       lineno)
 498                     processed += inlines
 499                     unprocessed = []
 500             else:
 501                 break
 502         remaining = ''.join(unprocessed) + remaining
 503         if remaining:
 504             processed += self.implicit_inline(remaining, lineno)
 505         return processed, messages
 506
 507     openers = u'\'"([{<\u2018\u201c\xab\u00a1\u00bf' # see quoted_start below
 508     closers = u'\'")]}>\u2019\u201d\xbb!?'
 509     unicode_delimiters = u'\u2010\u2011\u2012\u2013\u2014\u00a0'
 510     start_string_prefix = (ur'((?<=^)|(?<=[-/: \n\u2019%s%s]))'
 511                            % (re.escape(unicode_delimiters),
 512                               re.escape(openers)))
 513     end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
 514                          % (re.escape(unicode_delimiters),
 515                             re.escape(closers)))
 516     non_whitespace_before = r'(?<![ \n])'
 517     non_whitespace_escape_before = r'(?<![ \n\x00])'
 518     non_whitespace_after = r'(?![ \n])'
 519     # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
 520     simplename = r'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
 521     # Valid URI characters (see RFC 2396 & RFC 2732);
 522     # final \x00 allows backslash escapes in URIs:
 523     uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
 524     # Delimiter indicating the end of a URI (not part of the URI):
 525     uri_end_delim = r"""[>]"""
 526     # Last URI character; same as uric but no punctuation:
 527     urilast = r"""[_~*/=+a-zA-Z0-9]"""
 528     # End of a URI (either 'urilast' or 'uric followed by a
 529     # uri_end_delim'):
 530     uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
 531     emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
 532     email_pattern = r"""
 533           %(emailc)s+(?:\.%(emailc)s+)*   # name
 534           (?<!\x00)@                      # at
 535           %(emailc)s+(?:\.%(emailc)s*)*   # host
 536           %(uri_end)s                     # final URI char
 537           """
 538     parts = ('initial_inline', start_string_prefix, '',
 539              [('start', '', non_whitespace_after,  # simple start-strings
 540                [r'\*\*',                # strong
 541                 r'\*(?!\*)',            # emphasis but not strong
 542                 r'``',                  # literal
 543                 r'_`',                  # inline internal target
 544                 r'\|(?!\|)']            # substitution reference
 545                ),
 546               ('whole', '', end_string_suffix, # whole constructs
 547                [# reference name & end-string
 548                 r'(?P<refname>%s)(?P<refend>__?)' % simplename,
 549                 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
 550                  [r'[0-9]+',               # manually numbered
 551                   r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
 552                   r'\*',                   # auto-symbol
 553                   r'(?P<citationlabel>%s)' % simplename] # citation reference
 554                  )
 555                 ]
 556                ),
 557               ('backquote',             # interpreted text or phrase reference
 558                '(?P<role>(:%s:)?)' % simplename, # optional role
 559                non_whitespace_after,
 560                ['`(?!`)']               # but not literal
 561                )
 562               ]
 563              )
 564     patterns = Struct(
 565           initial=build_regexp(parts),
 566           emphasis=re.compile(non_whitespace_escape_before
 567                               + r'(\*)' + end_string_suffix),
 568           strong=re.compile(non_whitespace_escape_before
 569                             + r'(\*\*)' + end_string_suffix),
 570           interpreted_or_phrase_ref=re.compile(
 571               r"""
 572               %(non_whitespace_escape_before)s
 573               (
 574                 `
 575                 (?P<suffix>
 576                   (?P<role>:%(simplename)s:)?
 577                   (?P<refend>__?)?
 578                 )
 579               )
 580               %(end_string_suffix)s
 581               """ % locals(), re.VERBOSE | re.UNICODE),
 582           embedded_uri=re.compile(
 583               r"""
 584               (
 585                 (?:[ \n]+|^)            # spaces or beginning of line/string
 586                 <                       # open bracket
 587                 %(non_whitespace_after)s
 588                 ([^<>\x00]+)            # anything but angle brackets & nulls
 589                 %(non_whitespace_before)s
 590                 >                       # close bracket w/o whitespace before
 591               )
 592               $                         # end of string
 593               """ % locals(), re.VERBOSE),
 594           literal=re.compile(non_whitespace_before + '(``)'
 595                              + end_string_suffix),
 596           target=re.compile(non_whitespace_escape_before
 597                             + r'(`)' + end_string_suffix),
 598           substitution_ref=re.compile(non_whitespace_escape_before
 599                                       + r'(\|_{0,2})'
 600                                       + end_string_suffix),
 601           email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
 602           uri=re.compile(
 603                 (r"""
 604                 %(start_string_prefix)s
 605                 (?P<whole>
 606                   (?P<absolute>           # absolute URI
 607                     (?P<scheme>             # scheme (http, ftp, mailto)
 608                       [a-zA-Z][a-zA-Z0-9.+-]*
 609                     )
 610                     :
 611                     (
 612                       (                       # either:
 613                         (//?)?                  # hierarchical URI
 614                         %(uric)s*               # URI characters
 615                         %(uri_end)s             # final URI char
 616                       )
 617                       (                       # optional query
 618                         \?%(uric)s*
 619                         %(uri_end)s
 620                       )?
 621                       (                       # optional fragment
 622                         \#%(uric)s*
 623                         %(uri_end)s
 624                       )?
 625                     )
 626                   )
 627                 |                       # *OR*
 628                   (?P<email>              # email address
 629                     """ + email_pattern + r"""
 630                   )
 631                 )
 632                 %(end_string_suffix)s
 633                 """) % locals(), re.VERBOSE),
 634           pep=re.compile(
 635                 r"""
 636                 %(start_string_prefix)s
 637                 (
 638                   (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
 639                 |
 640                   (PEP\s+(?P<pepnum2>\d+))      # reference by name
 641                 )
 642                 %(end_string_suffix)s""" % locals(), re.VERBOSE),
 643           rfc=re.compile(
 644                 r"""
 645                 %(start_string_prefix)s
 646                 (RFC(-|\s+)?(?P<rfcnum>\d+))
 647                 %(end_string_suffix)s""" % locals(), re.VERBOSE))
 648
 649     def quoted_start(self, match):
 650         """Return 1 if inline markup start-string is 'quoted', 0 if not."""
 651         string = match.string
 652         start = match.start()
 653         end = match.end()
 654         if start == 0:                  # start-string at beginning of text
 655             return 0
 656         prestart = string[start - 1]
 657         try:
 658             poststart = string[end]
 659             if self.openers.index(prestart) \
 660                   == self.closers.index(poststart):   # quoted
 661                 return 1
 662         except IndexError:              # start-string at end of text
 663             return 1
 664         except ValueError:              # not quoted
 665             pass
 666         return 0
 667
 668     def inline_obj(self, match, lineno, end_pattern, nodeclass,
 669                    restore_backslashes=0):
 670         string = match.string
 671         matchstart = match.start('start')
 672         matchend = match.end('start')
 673         if self.quoted_start(match):
 674             return (string[:matchend], [], string[matchend:], [], '')
 675         endmatch = end_pattern.search(string[matchend:])
 676         if endmatch and endmatch.start(1):  # 1 or more chars
 677             text = unescape(endmatch.string[:endmatch.start(1)],
 678                             restore_backslashes)
 679             textend = matchend + endmatch.end(1)
 680             rawsource = unescape(string[matchstart:textend], 1)
 681             return (string[:matchstart], [nodeclass(rawsource, text)],
 682                     string[textend:], [], endmatch.group(1))
 683         msg = self.reporter.warning(
 684               'Inline %s start-string without end-string.'
 685               % nodeclass.__name__, line=lineno)
 686         text = unescape(string[matchstart:matchend], 1)
 687         rawsource = unescape(string[matchstart:matchend], 1)
 688         prb = self.problematic(text, rawsource, msg)
 689         return string[:matchstart], [prb], string[matchend:], [msg], ''
 690
 691     def problematic(self, text, rawsource, message):
 692         msgid = self.document.set_id(message, self.parent)
 693         problematic = nodes.problematic(rawsource, text, refid=msgid)
 694         prbid = self.document.set_id(problematic)
 695         message.add_backref(prbid)
 696         return problematic
 697
 698     def emphasis(self, match, lineno):
 699         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 700               match, lineno, self.patterns.emphasis, nodes.emphasis)
 701         return before, inlines, remaining, sysmessages
 702
 703     def strong(self, match, lineno):
 704         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 705               match, lineno, self.patterns.strong, nodes.strong)
 706         return before, inlines, remaining, sysmessages
 707
 708     def interpreted_or_phrase_ref(self, match, lineno):
 709         end_pattern = self.patterns.interpreted_or_phrase_ref
 710         string = match.string
 711         matchstart = match.start('backquote')
 712         matchend = match.end('backquote')
 713         rolestart = match.start('role')
 714         role = match.group('role')
 715         position = ''
 716         if role:
 717             role = role[1:-1]
 718             position = 'prefix'
 719         elif self.quoted_start(match):
 720             return (string[:matchend], [], string[matchend:], [])
 721         endmatch = end_pattern.search(string[matchend:])
 722         if endmatch and endmatch.start(1):  # 1 or more chars
 723             textend = matchend + endmatch.end()
 724             if endmatch.group('role'):
 725                 if role:
 726                     msg = self.reporter.warning(
 727                         'Multiple roles in interpreted text (both '
 728                         'prefix and suffix present; only one allowed).',
 729                         line=lineno)
 730                     text = unescape(string[rolestart:textend], 1)
 731                     prb = self.problematic(text, text, msg)
 732                     return string[:rolestart], [prb], string[textend:], [msg]
 733                 role = endmatch.group('suffix')[1:-1]
 734                 position = 'suffix'
 735             escaped = endmatch.string[:endmatch.start(1)]
 736             rawsource = unescape(string[matchstart:textend], 1)
 737             if rawsource[-1:] == '_':
 738                 if role:
 739                     msg = self.reporter.warning(
 740                           'Mismatch: both interpreted text role %s and '
 741                           'reference suffix.' % position, line=lineno)
 742                     text = unescape(string[rolestart:textend], 1)
 743                     prb = self.problematic(text, text, msg)
 744                     return string[:rolestart], [prb], string[textend:], [msg]
 745                 return self.phrase_ref(string[:matchstart], string[textend:],
 746                                        rawsource, escaped, unescape(escaped))
 747             else:
 748                 rawsource = unescape(string[rolestart:textend], 1)
 749                 nodelist, messages = self.interpreted(rawsource, escaped, role,
 750                                                       lineno)
 751                 return (string[:rolestart], nodelist,
 752                         string[textend:], messages)
 753         msg = self.reporter.warning(
 754               'Inline interpreted text or phrase reference start-string '
 755               'without end-string.', line=lineno)
 756         text = unescape(string[matchstart:matchend], 1)
 757         prb = self.problematic(text, text, msg)
 758         return string[:matchstart], [prb], string[matchend:], [msg]
 759
 760     def phrase_ref(self, before, after, rawsource, escaped, text):
 761         match = self.patterns.embedded_uri.search(escaped)
 762         if match:
 763             text = unescape(escaped[:match.start(0)])
 764             uri_text = match.group(2)
 765             uri = ''.join(uri_text.split())
 766             uri = self.adjust_uri(uri)
 767             if uri:
 768                 target = nodes.target(match.group(1), refuri=uri)
 769             else:
 770                 raise ApplicationError('problem with URI: %r' % uri_text)
 771             if not text:
 772                 text = uri
 773         else:
 774             target = None
 775         refname = normalize_name(text)
 776         reference = nodes.reference(rawsource, text,
 777                                     name=whitespace_normalize_name(text))
 778         node_list = [reference]
 779         if rawsource[-2:] == '__':
 780             if target:
 781                 reference['refuri'] = uri
 782             else:
 783                 reference['anonymous'] = 1
 784         else:
 785             if target:
 786                 reference['refuri'] = uri
 787                 target['names'].append(refname)
 788                 self.document.note_explicit_target(target, self.parent)
 789                 node_list.append(target)
 790             else:
 791                 reference['refname'] = refname
 792                 self.document.note_refname(reference)
 793         return before, node_list, after, []
 794
 795     def adjust_uri(self, uri):
 796         match = self.patterns.email.match(uri)
 797         if match:
 798             return 'mailto:' + uri
 799         else:
 800             return uri
 801
 802     def interpreted(self, rawsource, text, role, lineno):
 803         role_fn, messages = roles.role(role, self.language, lineno,
 804                                        self.reporter)
 805         if role_fn:
 806             nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
 807             return nodes, messages + messages2
 808         else:
 809             msg = self.reporter.error(
 810                 'Unknown interpreted text role "%s".' % role,
 811                 line=lineno)
 812             return ([self.problematic(rawsource, rawsource, msg)],
 813                     messages + [msg])
 814
 815     def literal(self, match, lineno):
 816         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 817               match, lineno, self.patterns.literal, nodes.literal,
 818               restore_backslashes=1)
 819         return before, inlines, remaining, sysmessages
 820
 821     def inline_internal_target(self, match, lineno):
 822         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 823               match, lineno, self.patterns.target, nodes.target)
 824         if inlines and isinstance(inlines[0], nodes.target):
 825             assert len(inlines) == 1
 826             target = inlines[0]
 827             name = normalize_name(target.astext())
 828             target['names'].append(name)
 829             self.document.note_explicit_target(target, self.parent)
 830         return before, inlines, remaining, sysmessages
 831
 832     def substitution_reference(self, match, lineno):
 833         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 834               match, lineno, self.patterns.substitution_ref,
 835               nodes.substitution_reference)
 836         if len(inlines) == 1:
 837             subref_node = inlines[0]
 838             if isinstance(subref_node, nodes.substitution_reference):
 839                 subref_text = subref_node.astext()
 840                 self.document.note_substitution_ref(subref_node, subref_text)
 841                 if endstring[-1:] == '_':
 842                     reference_node = nodes.reference(
 843                         '|%s%s' % (subref_text, endstring), '')
 844                     if endstring[-2:] == '__':
 845                         reference_node['anonymous'] = 1
 846                     else:
 847                         reference_node['refname'] = normalize_name(subref_text)
 848                         self.document.note_refname(reference_node)
 849                     reference_node += subref_node
 850                     inlines = [reference_node]
 851         return before, inlines, remaining, sysmessages
 852
 853     def footnote_reference(self, match, lineno):
 854         """
 855         Handles `nodes.footnote_reference` and `nodes.citation_reference`
 856         elements.
 857         """
 858         label = match.group('footnotelabel')
 859         refname = normalize_name(label)
 860         string = match.string
 861         before = string[:match.start('whole')]
 862         remaining = string[match.end('whole'):]
 863         if match.group('citationlabel'):
 864             refnode = nodes.citation_reference('[%s]_' % label,
 865                                                refname=refname)
 866             refnode += nodes.Text(label)
 867             self.document.note_citation_ref(refnode)
 868         else:
 869             refnode = nodes.footnote_reference('[%s]_' % label)
 870             if refname[0] == '#':
 871                 refname = refname[1:]
 872                 refnode['auto'] = 1
 873                 self.document.note_autofootnote_ref(refnode)
 874             elif refname == '*':
 875                 refname = ''
 876                 refnode['auto'] = '*'
 877                 self.document.note_symbol_footnote_ref(
 878                       refnode)
 879             else:
 880                 refnode += nodes.Text(label)
 881             if refname:
 882                 refnode['refname'] = refname
 883                 self.document.note_footnote_ref(refnode)
 884             if utils.get_trim_footnote_ref_space(self.document.settings):
 885                 before = before.rstrip()
 886         return (before, [refnode], remaining, [])
 887
 888     def reference(self, match, lineno, anonymous=None):
 889         referencename = match.group('refname')
 890         refname = normalize_name(referencename)
 891         referencenode = nodes.reference(
 892             referencename + match.group('refend'), referencename,
 893             name=whitespace_normalize_name(referencename))
 894         if anonymous:
 895             referencenode['anonymous'] = 1
 896         else:
 897             referencenode['refname'] = refname
 898             self.document.note_refname(referencenode)
 899         string = match.string
 900         matchstart = match.start('whole')
 901         matchend = match.end('whole')
 902         return (string[:matchstart], [referencenode], string[matchend:], [])
 903
 904     def anonymous_reference(self, match, lineno):
 905         return self.reference(match, lineno, anonymous=1)
 906
 907     def standalone_uri(self, match, lineno):
 908         if (not match.group('scheme')
 909                 or match.group('scheme').lower() in urischemes.schemes):
 910             if match.group('email'):
 911                 addscheme = 'mailto:'
 912             else:
 913                 addscheme = ''
 914             text = match.group('whole')
 915             unescaped = unescape(text, 0)
 916             return [nodes.reference(unescape(text, 1), unescaped,
 917                                     refuri=addscheme + unescaped)]
 918         else:                   # not a valid scheme
 919             raise MarkupMismatch
 920
 921     def pep_reference(self, match, lineno):
 922         text = match.group(0)
 923         if text.startswith('pep-'):
 924             pepnum = int(match.group('pepnum1'))
 925         elif text.startswith('PEP'):
 926             pepnum = int(match.group('pepnum2'))
 927         else:
 928             raise MarkupMismatch
 929         ref = (self.document.settings.pep_base_url
 930                + self.document.settings.pep_file_url_template % pepnum)
 931         unescaped = unescape(text, 0)
 932         return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
 933
 934     rfc_url = 'rfc%d.html'
 935
 936     def rfc_reference(self, match, lineno):
 937         text = match.group(0)
 938         if text.startswith('RFC'):
 939             rfcnum = int(match.group('rfcnum'))
 940             ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
 941         else:
 942             raise MarkupMismatch
 943         unescaped = unescape(text, 0)
 944         return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
 945
 946     def implicit_inline(self, text, lineno):
 947         """
 948         Check each of the patterns in `self.implicit_dispatch` for a match,
 949         and dispatch to the stored method for the pattern.  Recursively check
 950         the text before and after the match.  Return a list of `nodes.Text`
 951         and inline element nodes.
 952         """
 953         if not text:
 954             return []
 955         for pattern, method in self.implicit_dispatch:
 956             match = pattern.search(text)
 957             if match:
 958                 try:
 959                     # Must recurse on strings before *and* after the match;
 960                     # there may be multiple patterns.
 961                     return (self.implicit_inline(text[:match.start()], lineno)
 962                             + method(match, lineno) +
 963                             self.implicit_inline(text[match.end():], lineno))
 964                 except MarkupMismatch:
 965                     pass
 966         return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
 967
 968     dispatch = {'*': emphasis,
 969                 '**': strong,
 970                 '`': interpreted_or_phrase_ref,
 971                 '``': literal,
 972                 '_`': inline_internal_target,
 973                 ']_': footnote_reference,
 974                 '|': substitution_reference,
 975                 '_': reference,
 976                 '__': anonymous_reference}
 977
 978
 979 def _loweralpha_to_int(s, _zero=(ord('a')-1)):
 980     return ord(s) - _zero
 981
 982 def _upperalpha_to_int(s, _zero=(ord('A')-1)):
 983     return ord(s) - _zero
 984
 985 def _lowerroman_to_int(s):
 986     return roman.fromRoman(s.upper())
 987
 988
 989 class Body(RSTState):
 990
 991     """
 992     Generic classifier of the first line of a block.
 993     """
 994
 995     double_width_pad_char = tableparser.TableParser.double_width_pad_char
 996     """Padding character for East Asian double-width text."""
 997
 998     enum = Struct()
 999     """Enumerated list parsing information."""
1000
1001     enum.formatinfo = {
1002           'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1003           'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1004           'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1005     enum.formats = enum.formatinfo.keys()
1006     enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1007                       'lowerroman', 'upperroman'] # ORDERED!
1008     enum.sequencepats = {'arabic': '[0-9]+',
1009                          'loweralpha': '[a-z]',
1010                          'upperalpha': '[A-Z]',
1011                          'lowerroman': '[ivxlcdm]+',
1012                          'upperroman': '[IVXLCDM]+',}
1013     enum.converters = {'arabic': int,
1014                        'loweralpha': _loweralpha_to_int,
1015                        'upperalpha': _upperalpha_to_int,
1016                        'lowerroman': _lowerroman_to_int,
1017                        'upperroman': roman.fromRoman}
1018
1019     enum.sequenceregexps = {}
1020     for sequence in enum.sequences:
1021         enum.sequenceregexps[sequence] = re.compile(
1022               enum.sequencepats[sequence] + '$')
1023
1024     grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1025     """Matches the top (& bottom) of a full table)."""
1026
1027     simple_table_top_pat = re.compile('=+( +=+)+ *$')
1028     """Matches the top of a simple table."""
1029
1030     simple_table_border_pat = re.compile('=+[ =]*$')
1031     """Matches the bottom & header bottom of a simple table."""
1032
1033     pats = {}
1034     """Fragments of patterns used by transitions."""
1035
1036     pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1037     pats['alpha'] = '[a-zA-Z]'
1038     pats['alphanum'] = '[a-zA-Z0-9]'
1039     pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1040     pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1041                     '|%(upperroman)s|#)' % enum.sequencepats)
1042     pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1043     # @@@ Loosen up the pattern?  Allow Unicode?
1044     pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1045     pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1046     pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1047     pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1048
1049     for format in enum.formats:
1050         pats[format] = '(?P<%s>%s%s%s)' % (
1051               format, re.escape(enum.formatinfo[format].prefix),
1052               pats['enum'], re.escape(enum.formatinfo[format].suffix))
1053
1054     patterns = {
1055           'bullet': ur'[-+*\u2022\u2023\u2043]( +|$)',
1056           'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1057           'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1058           'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats,
1059           'doctest': r'>>>( +|$)',
1060           'line_block': r'\|( +|$)',
1061           'grid_table_top': grid_table_top_pat,
1062           'simple_table_top': simple_table_top_pat,
1063           'explicit_markup': r'\.\.( +|$)',
1064           'anonymous': r'__( +|$)',
1065           'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1066           'text': r''}
1067     initial_transitions = (
1068           'bullet',
1069           'enumerator',
1070           'field_marker',
1071           'option_marker',
1072           'doctest',
1073           'line_block',
1074           'grid_table_top',
1075           'simple_table_top',
1076           'explicit_markup',
1077           'anonymous',
1078           'line',
1079           'text')
1080
1081     def indent(self, match, context, next_state):
1082         """Block quote."""
1083         indented, indent, line_offset, blank_finish = \
1084               self.state_machine.get_indented()
1085         elements = self.block_quote(indented, line_offset)
1086         self.parent += elements
1087         if not blank_finish:
1088             self.parent += self.unindent_warning('Block quote')
1089         return context, next_state, []
1090
1091     def block_quote(self, indented, line_offset):
1092         elements = []
1093         while indented:
1094             (blockquote_lines,
1095              attribution_lines,
1096              attribution_offset,
1097              indented,
1098              new_line_offset) = self.split_attribution(indented, line_offset)
1099             blockquote = nodes.block_quote()
1100             self.nested_parse(blockquote_lines, line_offset, blockquote)
1101             elements.append(blockquote)
1102             if attribution_lines:
1103                 attribution, messages = self.parse_attribution(
1104                     attribution_lines, attribution_offset)
1105                 blockquote += attribution
1106                 elements += messages
1107             line_offset = new_line_offset
1108             while indented and not indented[0]:
1109                 indented = indented[1:]
1110                 line_offset += 1
1111         return elements
1112
1113     # U+2014 is an em-dash:
1114     attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
1115
1116     def split_attribution(self, indented, line_offset):
1117         """
1118         Check for a block quote attribution and split it off:
1119
1120         * First line after a blank line must begin with a dash ("--", "---",
1121           em-dash; matches `self.attribution_pattern`).
1122         * Every line after that must have consistent indentation.
1123         * Attributions must be preceded by block quote content.
1124
1125         Return a tuple of: (block quote content lines, content offset,
1126         attribution lines, attribution offset, remaining indented lines).
1127         """
1128         blank = None
1129         nonblank_seen = False
1130         for i in range(len(indented)):
1131             line = indented[i].rstrip()
1132             if line:
1133                 if nonblank_seen and blank == i - 1: # last line blank
1134                     match = self.attribution_pattern.match(line)
1135                     if match:
1136                         attribution_end, indent = self.check_attribution(
1137                             indented, i)
1138                         if attribution_end:
1139                             a_lines = indented[i:attribution_end]
1140                             a_lines.trim_left(match.end(), end=1)
1141                             a_lines.trim_left(indent, start=1)
1142                             return (indented[:i], a_lines,
1143                                     i, indented[attribution_end:],
1144                                     line_offset + attribution_end)
1145                 nonblank_seen = True
1146             else:
1147                 blank = i
1148         else:
1149             return (indented, None, None, None, None)
1150
1151     def check_attribution(self, indented, attribution_start):
1152         """
1153         Check attribution shape.
1154         Return the index past the end of the attribution, and the indent.
1155         """
1156         indent = None
1157         i = attribution_start + 1
1158         for i in range(attribution_start + 1, len(indented)):
1159             line = indented[i].rstrip()
1160             if not line:
1161                 break
1162             if indent is None:
1163                 indent = len(line) - len(line.lstrip())
1164             elif len(line) - len(line.lstrip()) != indent:
1165                 return None, None       # bad shape; not an attribution
1166         else:
1167             # return index of line after last attribution line:
1168             i += 1
1169         return i, (indent or 0)
1170
1171     def parse_attribution(self, indented, line_offset):
1172         text = '\n'.join(indented).rstrip()
1173         lineno = self.state_machine.abs_line_number() + line_offset
1174         textnodes, messages = self.inline_text(text, lineno)
1175         node = nodes.attribution(text, '', *textnodes)
1176         node.line = lineno
1177         return node, messages
1178
1179     def bullet(self, match, context, next_state):
1180         """Bullet list item."""
1181         bulletlist = nodes.bullet_list()
1182         self.parent += bulletlist
1183         bulletlist['bullet'] = match.string[0]
1184         i, blank_finish = self.list_item(match.end())
1185         bulletlist += i
1186         offset = self.state_machine.line_offset + 1   # next line
1187         new_line_offset, blank_finish = self.nested_list_parse(
1188               self.state_machine.input_lines[offset:],
1189               input_offset=self.state_machine.abs_line_offset() + 1,
1190               node=bulletlist, initial_state='BulletList',
1191               blank_finish=blank_finish)
1192         self.goto_line(new_line_offset)
1193         if not blank_finish:
1194             self.parent += self.unindent_warning('Bullet list')
1195         return [], next_state, []
1196
1197     def list_item(self, indent):
1198         if self.state_machine.line[indent:]:
1199             indented, line_offset, blank_finish = (
1200                 self.state_machine.get_known_indented(indent))
1201         else:
1202             indented, indent, line_offset, blank_finish = (
1203                 self.state_machine.get_first_known_indented(indent))
1204         listitem = nodes.list_item('\n'.join(indented))
1205         if indented:
1206             self.nested_parse(indented, input_offset=line_offset,
1207                               node=listitem)
1208         return listitem, blank_finish
1209
1210     def enumerator(self, match, context, next_state):
1211         """Enumerated List Item"""
1212         format, sequence, text, ordinal = self.parse_enumerator(match)
1213         if not self.is_enumerated_list_item(ordinal, sequence, format):
1214             raise statemachine.TransitionCorrection('text')
1215         enumlist = nodes.enumerated_list()
1216         self.parent += enumlist
1217         if sequence == '#':
1218             enumlist['enumtype'] = 'arabic'
1219         else:
1220             enumlist['enumtype'] = sequence
1221         enumlist['prefix'] = self.enum.formatinfo[format].prefix
1222         enumlist['suffix'] = self.enum.formatinfo[format].suffix
1223         if ordinal != 1:
1224             enumlist['start'] = ordinal
1225             msg = self.reporter.info(
1226                 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1227                 % (text, ordinal), line=self.state_machine.abs_line_number())
1228             self.parent += msg
1229         listitem, blank_finish = self.list_item(match.end())
1230         enumlist += listitem
1231         offset = self.state_machine.line_offset + 1   # next line
1232         newline_offset, blank_finish = self.nested_list_parse(
1233               self.state_machine.input_lines[offset:],
1234               input_offset=self.state_machine.abs_line_offset() + 1,
1235               node=enumlist, initial_state='EnumeratedList',
1236               blank_finish=blank_finish,
1237               extra_settings={'lastordinal': ordinal,
1238                               'format': format,
1239                               'auto': sequence == '#'})
1240         self.goto_line(newline_offset)
1241         if not blank_finish:
1242             self.parent += self.unindent_warning('Enumerated list')
1243         return [], next_state, []
1244
1245     def parse_enumerator(self, match, expected_sequence=None):
1246         """
1247         Analyze an enumerator and return the results.
1248
1249         :Return:
1250             - the enumerator format ('period', 'parens', or 'rparen'),
1251             - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1252             - the text of the enumerator, stripped of formatting, and
1253             - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1254               ``None`` is returned for invalid enumerator text).
1255
1256         The enumerator format has already been determined by the regular
1257         expression match. If `expected_sequence` is given, that sequence is
1258         tried first. If not, we check for Roman numeral 1. This way,
1259         single-character Roman numerals (which are also alphabetical) can be
1260         matched. If no sequence has been matched, all sequences are checked in
1261         order.
1262         """
1263         groupdict = match.groupdict()
1264         sequence = ''
1265         for format in self.enum.formats:
1266             if groupdict[format]:       # was this the format matched?
1267                 break                   # yes; keep `format`
1268         else:                           # shouldn't happen
1269             raise ParserError('enumerator format not matched')
1270         text = groupdict[format][self.enum.formatinfo[format].start
1271                                  :self.enum.formatinfo[format].end]
1272         if text == '#':
1273             sequence = '#'
1274         elif expected_sequence:
1275             try:
1276                 if self.enum.sequenceregexps[expected_sequence].match(text):
1277                     sequence = expected_sequence
1278             except KeyError:            # shouldn't happen
1279                 raise ParserError('unknown enumerator sequence: %s'
1280                                   % sequence)
1281         elif text == 'i':
1282             sequence = 'lowerroman'
1283         elif text == 'I':
1284             sequence = 'upperroman'
1285         if not sequence:
1286             for sequence in self.enum.sequences:
1287                 if self.enum.sequenceregexps[sequence].match(text):
1288                     break
1289             else:                       # shouldn't happen
1290                 raise ParserError('enumerator sequence not matched')
1291         if sequence == '#':
1292             ordinal = 1
1293         else:
1294             try:
1295                 ordinal = self.enum.converters[sequence](text)
1296             except roman.InvalidRomanNumeralError:
1297                 ordinal = None
1298         return format, sequence, text, ordinal
1299
1300     def is_enumerated_list_item(self, ordinal, sequence, format):
1301         """
1302         Check validity based on the ordinal value and the second line.
1303
1304         Return true iff the ordinal is valid and the second line is blank,
1305         indented, or starts with the next enumerator or an auto-enumerator.
1306         """
1307         if ordinal is None:
1308             return None
1309         try:
1310             next_line = self.state_machine.next_line()
1311         except EOFError:              # end of input lines
1312             self.state_machine.previous_line()
1313             return 1
1314         else:
1315             self.state_machine.previous_line()
1316         if not next_line[:1].strip():   # blank or indented
1317             return 1
1318         result = self.make_enumerator(ordinal + 1, sequence, format)
1319         if result:
1320             next_enumerator, auto_enumerator = result
1321             try:
1322                 if ( next_line.startswith(next_enumerator) or
1323                      next_line.startswith(auto_enumerator) ):
1324                     return 1
1325             except TypeError:
1326                 pass
1327         return None
1328
1329     def make_enumerator(self, ordinal, sequence, format):
1330         """
1331         Construct and return the next enumerated list item marker, and an
1332         auto-enumerator ("#" instead of the regular enumerator).
1333
1334         Return ``None`` for invalid (out of range) ordinals.
1335         """ #"
1336         if sequence == '#':
1337             enumerator = '#'
1338         elif sequence == 'arabic':
1339             enumerator = str(ordinal)
1340         else:
1341             if sequence.endswith('alpha'):
1342                 if ordinal > 26:
1343                     return None
1344                 enumerator = chr(ordinal + ord('a') - 1)
1345             elif sequence.endswith('roman'):
1346                 try:
1347                     enumerator = roman.toRoman(ordinal)
1348                 except roman.RomanError:
1349                     return None
1350             else:                       # shouldn't happen
1351                 raise ParserError('unknown enumerator sequence: "%s"'
1352                                   % sequence)
1353             if sequence.startswith('lower'):
1354                 enumerator = enumerator.lower()
1355             elif sequence.startswith('upper'):
1356                 enumerator = enumerator.upper()
1357             else:                       # shouldn't happen
1358                 raise ParserError('unknown enumerator sequence: "%s"'
1359                                   % sequence)
1360         formatinfo = self.enum.formatinfo[format]
1361         next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1362                            + ' ')
1363         auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1364         return next_enumerator, auto_enumerator
1365
1366     def field_marker(self, match, context, next_state):
1367         """Field list item."""
1368         field_list = nodes.field_list()
1369         self.parent += field_list
1370         field, blank_finish = self.field(match)
1371         field_list += field
1372         offset = self.state_machine.line_offset + 1   # next line
1373         newline_offset, blank_finish = self.nested_list_parse(
1374               self.state_machine.input_lines[offset:],
1375               input_offset=self.state_machine.abs_line_offset() + 1,
1376               node=field_list, initial_state='FieldList',
1377               blank_finish=blank_finish)
1378         self.goto_line(newline_offset)
1379         if not blank_finish:
1380             self.parent += self.unindent_warning('Field list')
1381         return [], next_state, []
1382
1383     def field(self, match):
1384         name = self.parse_field_marker(match)
1385         lineno = self.state_machine.abs_line_number()
1386         indented, indent, line_offset, blank_finish = \
1387               self.state_machine.get_first_known_indented(match.end())
1388         field_node = nodes.field()
1389         field_node.line = lineno
1390         name_nodes, name_messages = self.inline_text(name, lineno)
1391         field_node += nodes.field_name(name, '', *name_nodes)
1392         field_body = nodes.field_body('\n'.join(indented), *name_messages)
1393         field_node += field_body
1394         if indented:
1395             self.parse_field_body(indented, line_offset, field_body)
1396         return field_node, blank_finish
1397
1398     def parse_field_marker(self, match):
1399         """Extract & return field name from a field marker match."""
1400         field = match.group()[1:]        # strip off leading ':'
1401         field = field[:field.rfind(':')] # strip off trailing ':' etc.
1402         return field
1403
1404     def parse_field_body(self, indented, offset, node):
1405         self.nested_parse(indented, input_offset=offset, node=node)
1406
1407     def option_marker(self, match, context, next_state):
1408         """Option list item."""
1409         optionlist = nodes.option_list()
1410         try:
1411             listitem, blank_finish = self.option_list_item(match)
1412         except MarkupError, (message, lineno):
1413             # This shouldn't happen; pattern won't match.
1414             msg = self.reporter.error(
1415                 'Invalid option list marker: %s' % message, line=lineno)
1416             self.parent += msg
1417             indented, indent, line_offset, blank_finish = \
1418                   self.state_machine.get_first_known_indented(match.end())
1419             elements = self.block_quote(indented, line_offset)
1420             self.parent += elements
1421             if not blank_finish:
1422                 self.parent += self.unindent_warning('Option list')
1423             return [], next_state, []
1424         self.parent += optionlist
1425         optionlist += listitem
1426         offset = self.state_machine.line_offset + 1   # next line
1427         newline_offset, blank_finish = self.nested_list_parse(
1428               self.state_machine.input_lines[offset:],
1429               input_offset=self.state_machine.abs_line_offset() + 1,
1430               node=optionlist, initial_state='OptionList',
1431               blank_finish=blank_finish)
1432         self.goto_line(newline_offset)
1433         if not blank_finish:
1434             self.parent += self.unindent_warning('Option list')
1435         return [], next_state, []
1436
1437     def option_list_item(self, match):
1438         offset = self.state_machine.abs_line_offset()
1439         options = self.parse_option_marker(match)
1440         indented, indent, line_offset, blank_finish = \
1441               self.state_machine.get_first_known_indented(match.end())
1442         if not indented:                # not an option list item
1443             self.goto_line(offset)
1444             raise statemachine.TransitionCorrection('text')
1445         option_group = nodes.option_group('', *options)
1446         description = nodes.description('\n'.join(indented))
1447         option_list_item = nodes.option_list_item('', option_group,
1448                                                   description)
1449         if indented:
1450             self.nested_parse(indented, input_offset=line_offset,
1451                               node=description)
1452         return option_list_item, blank_finish
1453
1454     def parse_option_marker(self, match):
1455         """
1456         Return a list of `node.option` and `node.option_argument` objects,
1457         parsed from an option marker match.
1458
1459         :Exception: `MarkupError` for invalid option markers.
1460         """
1461         optlist = []
1462         optionstrings = match.group().rstrip().split(', ')
1463         for optionstring in optionstrings:
1464             tokens = optionstring.split()
1465             delimiter = ' '
1466             firstopt = tokens[0].split('=')
1467             if len(firstopt) > 1:
1468                 # "--opt=value" form
1469                 tokens[:1] = firstopt
1470                 delimiter = '='
1471             elif (len(tokens[0]) > 2
1472                   and ((tokens[0].startswith('-')
1473                         and not tokens[0].startswith('--'))
1474                        or tokens[0].startswith('+'))):
1475                 # "-ovalue" form
1476                 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1477                 delimiter = ''
1478             if len(tokens) > 1 and (tokens[1].startswith('<')
1479                                     and tokens[-1].endswith('>')):
1480                 # "-o <value1 value2>" form; join all values into one token
1481                 tokens[1:] = [' '.join(tokens[1:])]
1482             if 0 < len(tokens) <= 2:
1483                 option = nodes.option(optionstring)
1484                 option += nodes.option_string(tokens[0], tokens[0])
1485                 if len(tokens) > 1:
1486                     option += nodes.option_argument(tokens[1], tokens[1],
1487                                                     delimiter=delimiter)
1488                 optlist.append(option)
1489             else:
1490                 raise MarkupError(
1491                     'wrong number of option tokens (=%s), should be 1 or 2: '
1492                     '"%s"' % (len(tokens), optionstring),
1493                     self.state_machine.abs_line_number() + 1)
1494         return optlist
1495
1496     def doctest(self, match, context, next_state):
1497         data = '\n'.join(self.state_machine.get_text_block())
1498         self.parent += nodes.doctest_block(data, data)
1499         return [], next_state, []
1500
1501     def line_block(self, match, context, next_state):
1502         """First line of a line block."""
1503         block = nodes.line_block()
1504         self.parent += block
1505         lineno = self.state_machine.abs_line_number()
1506         line, messages, blank_finish = self.line_block_line(match, lineno)
1507         block += line
1508         self.parent += messages
1509         if not blank_finish:
1510             offset = self.state_machine.line_offset + 1   # next line
1511             new_line_offset, blank_finish = self.nested_list_parse(
1512                   self.state_machine.input_lines[offset:],
1513                   input_offset=self.state_machine.abs_line_offset() + 1,
1514                   node=block, initial_state='LineBlock',
1515                   blank_finish=0)
1516             self.goto_line(new_line_offset)
1517         if not blank_finish:
1518             self.parent += self.reporter.warning(
1519                 'Line block ends without a blank line.',
1520                 line=(self.state_machine.abs_line_number() + 1))
1521         if len(block):
1522             if block[0].indent is None:
1523                 block[0].indent = 0
1524             self.nest_line_block_lines(block)
1525         return [], next_state, []
1526
1527     def line_block_line(self, match, lineno):
1528         """Return one line element of a line_block."""
1529         indented, indent, line_offset, blank_finish = \
1530               self.state_machine.get_first_known_indented(match.end(),
1531                                                           until_blank=1)
1532         text = u'\n'.join(indented)
1533         text_nodes, messages = self.inline_text(text, lineno)
1534         line = nodes.line(text, '', *text_nodes)
1535         if match.string.rstrip() != '|': # not empty
1536             line.indent = len(match.group(1)) - 1
1537         return line, messages, blank_finish
1538
1539     def nest_line_block_lines(self, block):
1540         for index in range(1, len(block)):
1541             if block[index].indent is None:
1542                 block[index].indent = block[index - 1].indent
1543         self.nest_line_block_segment(block)
1544
1545     def nest_line_block_segment(self, block):
1546         indents = [item.indent for item in block]
1547         least = min(indents)
1548         new_items = []
1549         new_block = nodes.line_block()
1550         for item in block:
1551             if item.indent > least:
1552                 new_block.append(item)
1553             else:
1554                 if len(new_block):
1555                     self.nest_line_block_segment(new_block)
1556                     new_items.append(new_block)
1557                     new_block = nodes.line_block()
1558                 new_items.append(item)
1559         if len(new_block):
1560             self.nest_line_block_segment(new_block)
1561             new_items.append(new_block)
1562         block[:] = new_items
1563
1564     def grid_table_top(self, match, context, next_state):
1565         """Top border of a full table."""
1566         return self.table_top(match, context, next_state,
1567                               self.isolate_grid_table,
1568                               tableparser.GridTableParser)
1569
1570     def simple_table_top(self, match, context, next_state):
1571         """Top border of a simple table."""
1572         return self.table_top(match, context, next_state,
1573                               self.isolate_simple_table,
1574                               tableparser.SimpleTableParser)
1575
1576     def table_top(self, match, context, next_state,
1577                   isolate_function, parser_class):
1578         """Top border of a generic table."""
1579         nodelist, blank_finish = self.table(isolate_function, parser_class)
1580         self.parent += nodelist
1581         if not blank_finish:
1582             msg = self.reporter.warning(
1583                 'Blank line required after table.',
1584                 line=self.state_machine.abs_line_number() + 1)
1585             self.parent += msg
1586         return [], next_state, []
1587
1588     def table(self, isolate_function, parser_class):
1589         """Parse a table."""
1590         block, messages, blank_finish = isolate_function()
1591         if block:
1592             try:
1593                 parser = parser_class()
1594                 tabledata = parser.parse(block)
1595                 tableline = (self.state_machine.abs_line_number() - len(block)
1596                              + 1)
1597                 table = self.build_table(tabledata, tableline)
1598                 nodelist = [table] + messages
1599             except tableparser.TableMarkupError, detail:
1600                 nodelist = self.malformed_table(
1601                     block, ' '.join(detail.args)) + messages
1602         else:
1603             nodelist = messages
1604         return nodelist, blank_finish
1605
1606     def isolate_grid_table(self):
1607         messages = []
1608         blank_finish = 1
1609         try:
1610             block = self.state_machine.get_text_block(flush_left=1)
1611         except statemachine.UnexpectedIndentationError, instance:
1612             block, source, lineno = instance.args
1613             messages.append(self.reporter.error('Unexpected indentation.',
1614                                                 source=source, line=lineno))
1615             blank_finish = 0
1616         block.disconnect()
1617         # for East Asian chars:
1618         block.pad_double_width(self.double_width_pad_char)
1619         width = len(block[0].strip())
1620         for i in range(len(block)):
1621             block[i] = block[i].strip()
1622             if block[i][0] not in '+|': # check left edge
1623                 blank_finish = 0
1624                 self.state_machine.previous_line(len(block) - i)
1625                 del block[i:]
1626                 break
1627         if not self.grid_table_top_pat.match(block[-1]): # find bottom
1628             blank_finish = 0
1629             # from second-last to third line of table:
1630             for i in range(len(block) - 2, 1, -1):
1631                 if self.grid_table_top_pat.match(block[i]):
1632                     self.state_machine.previous_line(len(block) - i + 1)
1633                     del block[i+1:]
1634                     break
1635             else:
1636                 messages.extend(self.malformed_table(block))
1637                 return [], messages, blank_finish
1638         for i in range(len(block)):     # check right edge
1639             if len(block[i]) != width or block[i][-1] not in '+|':
1640                 messages.extend(self.malformed_table(block))
1641                 return [], messages, blank_finish
1642         return block, messages, blank_finish
1643
1644     def isolate_simple_table(self):
1645         start = self.state_machine.line_offset
1646         lines = self.state_machine.input_lines
1647         limit = len(lines) - 1
1648         toplen = len(lines[start].strip())
1649         pattern_match = self.simple_table_border_pat.match
1650         found = 0
1651         found_at = None
1652         i = start + 1
1653         while i <= limit:
1654             line = lines[i]
1655             match = pattern_match(line)
1656             if match:
1657                 if len(line.strip()) != toplen:
1658                     self.state_machine.next_line(i - start)
1659                     messages = self.malformed_table(
1660                         lines[start:i+1], 'Bottom/header table border does '
1661                         'not match top border.')
1662                     return [], messages, i == limit or not lines[i+1].strip()
1663                 found += 1
1664                 found_at = i
1665                 if found == 2 or i == limit or not lines[i+1].strip():
1666                     end = i
1667                     break
1668             i += 1
1669         else:                           # reached end of input_lines
1670             if found:
1671                 extra = ' or no blank line after table bottom'
1672                 self.state_machine.next_line(found_at - start)
1673                 block = lines[start:found_at+1]
1674             else:
1675                 extra = ''
1676                 self.state_machine.next_line(i - start - 1)
1677                 block = lines[start:]
1678             messages = self.malformed_table(
1679                 block, 'No bottom table border found%s.' % extra)
1680             return [], messages, not extra
1681         self.state_machine.next_line(end - start)
1682         block = lines[start:end+1]
1683         # for East Asian chars:
1684         block.pad_double_width(self.double_width_pad_char)
1685         return block, [], end == limit or not lines[end+1].strip()
1686
1687     def malformed_table(self, block, detail=''):
1688         block.replace(self.double_width_pad_char, '')
1689         data = '\n'.join(block)
1690         message = 'Malformed table.'
1691         lineno = self.state_machine.abs_line_number() - len(block) + 1
1692         if detail:
1693             message += '\n' + detail
1694         error = self.reporter.error(message, nodes.literal_block(data, data),
1695                                     line=lineno)
1696         return [error]
1697
1698     def build_table(self, tabledata, tableline, stub_columns=0):
1699         colwidths, headrows, bodyrows = tabledata
1700         table = nodes.table()
1701         tgroup = nodes.tgroup(cols=len(colwidths))
1702         table += tgroup
1703         for colwidth in colwidths:
1704             colspec = nodes.colspec(colwidth=colwidth)
1705             if stub_columns:
1706                 colspec.attributes['stub'] = 1
1707                 stub_columns -= 1
1708             tgroup += colspec
1709         if headrows:
1710             thead = nodes.thead()
1711             tgroup += thead
1712             for row in headrows:
1713                 thead += self.build_table_row(row, tableline)
1714         tbody = nodes.tbody()
1715         tgroup += tbody
1716         for row in bodyrows:
1717             tbody += self.build_table_row(row, tableline)
1718         return table
1719
1720     def build_table_row(self, rowdata, tableline):
1721         row = nodes.row()
1722         for cell in rowdata:
1723             if cell is None:
1724                 continue
1725             morerows, morecols, offset, cellblock = cell
1726             attributes = {}
1727             if morerows:
1728                 attributes['morerows'] = morerows
1729             if morecols:
1730                 attributes['morecols'] = morecols
1731             entry = nodes.entry(**attributes)
1732             row += entry
1733             if ''.join(cellblock):
1734                 self.nested_parse(cellblock, input_offset=tableline+offset,
1735                                   node=entry)
1736         return row
1737
1738
1739     explicit = Struct()
1740     """Patterns and constants used for explicit markup recognition."""
1741
1742     explicit.patterns = Struct(
1743           target=re.compile(r"""
1744                             (
1745                               _               # anonymous target
1746                             |               # *OR*
1747                               (?!_)           # no underscore at the beginning
1748                               (?P<quote>`?)   # optional open quote
1749                               (?![ `])        # first char. not space or
1750                                               # backquote
1751                               (?P<name>       # reference name
1752                                 .+?
1753                               )
1754                               %(non_whitespace_escape_before)s
1755                               (?P=quote)      # close quote if open quote used
1756                             )
1757                             (?<!(?<!\x00):) # no unescaped colon at end
1758                             %(non_whitespace_escape_before)s
1759                             [ ]?            # optional space
1760                             :               # end of reference name
1761                             ([ ]+|$)        # followed by whitespace
1762                             """ % vars(Inliner), re.VERBOSE),
1763           reference=re.compile(r"""
1764                                (
1765                                  (?P<simple>%(simplename)s)_
1766                                |                  # *OR*
1767                                  `                  # open backquote
1768                                  (?![ ])            # not space
1769                                  (?P<phrase>.+?)    # hyperlink phrase
1770                                  %(non_whitespace_escape_before)s
1771                                  `_                 # close backquote,
1772                                                     # reference mark
1773                                )
1774                                $                  # end of string
1775                                """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1776           substitution=re.compile(r"""
1777                                   (
1778                                     (?![ ])          # first char. not space
1779                                     (?P<name>.+?)    # substitution text
1780                                     %(non_whitespace_escape_before)s
1781                                     \|               # close delimiter
1782                                   )
1783                                   ([ ]+|$)           # followed by whitespace
1784                                   """ % vars(Inliner), re.VERBOSE),)
1785
1786     def footnote(self, match):
1787         lineno = self.state_machine.abs_line_number()
1788         indented, indent, offset, blank_finish = \
1789               self.state_machine.get_first_known_indented(match.end())
1790         label = match.group(1)
1791         name = normalize_name(label)
1792         footnote = nodes.footnote('\n'.join(indented))
1793         footnote.line = lineno
1794         if name[0] == '#':              # auto-numbered
1795             name = name[1:]             # autonumber label
1796             footnote['auto'] = 1
1797             if name:
1798                 footnote['names'].append(name)
1799             self.document.note_autofootnote(footnote)
1800         elif name == '*':               # auto-symbol
1801             name = ''
1802             footnote['auto'] = '*'
1803             self.document.note_symbol_footnote(footnote)
1804         else:                           # manually numbered
1805             footnote += nodes.label('', label)
1806             footnote['names'].append(name)
1807             self.document.note_footnote(footnote)
1808         if name:
1809             self.document.note_explicit_target(footnote, footnote)
1810         else:
1811             self.document.set_id(footnote, footnote)
1812         if indented:
1813             self.nested_parse(indented, input_offset=offset, node=footnote)
1814         return [footnote], blank_finish
1815
1816     def citation(self, match):
1817         lineno = self.state_machine.abs_line_number()
1818         indented, indent, offset, blank_finish = \
1819               self.state_machine.get_first_known_indented(match.end())
1820         label = match.group(1)
1821         name = normalize_name(label)
1822         citation = nodes.citation('\n'.join(indented))
1823         citation.line = lineno
1824         citation += nodes.label('', label)
1825         citation['names'].append(name)
1826         self.document.note_citation(citation)
1827         self.document.note_explicit_target(citation, citation)
1828         if indented:
1829             self.nested_parse(indented, input_offset=offset, node=citation)
1830         return [citation], blank_finish
1831
1832     def hyperlink_target(self, match):
1833         pattern = self.explicit.patterns.target
1834         lineno = self.state_machine.abs_line_number()
1835         block, indent, offset, blank_finish = \
1836               self.state_machine.get_first_known_indented(
1837               match.end(), until_blank=1, strip_indent=0)
1838         blocktext = match.string[:match.end()] + '\n'.join(block)
1839         block = [escape2null(line) for line in block]
1840         escaped = block[0]
1841         blockindex = 0
1842         while 1:
1843             targetmatch = pattern.match(escaped)
1844             if targetmatch:
1845                 break
1846             blockindex += 1
1847             try:
1848                 escaped += block[blockindex]
1849             except IndexError:
1850                 raise MarkupError('malformed hyperlink target.', lineno)
1851         del block[:blockindex]
1852         block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1853         target = self.make_target(block, blocktext, lineno,
1854                                   targetmatch.group('name'))
1855         return [target], blank_finish
1856
1857     def make_target(self, block, block_text, lineno, target_name):
1858         target_type, data = self.parse_target(block, block_text, lineno)
1859         if target_type == 'refname':
1860             target = nodes.target(block_text, '', refname=normalize_name(data))
1861             target.indirect_reference_name = data
1862             self.add_target(target_name, '', target, lineno)
1863             self.document.note_indirect_target(target)
1864             return target
1865         elif target_type == 'refuri':
1866             target = nodes.target(block_text, '')
1867             self.add_target(target_name, data, target, lineno)
1868             return target
1869         else:
1870             return data
1871
1872     def parse_target(self, block, block_text, lineno):
1873         """
1874         Determine the type of reference of a target.
1875
1876         :Return: A 2-tuple, one of:
1877
1878             - 'refname' and the indirect reference name
1879             - 'refuri' and the URI
1880             - 'malformed' and a system_message node
1881         """
1882         if block and block[-1].strip()[-1:] == '_': # possible indirect target
1883             reference = ' '.join([line.strip() for line in block])
1884             refname = self.is_reference(reference)
1885             if refname:
1886                 return 'refname', refname
1887         reference = ''.join([''.join(line.split()) for line in block])
1888         return 'refuri', unescape(reference)
1889
1890     def is_reference(self, reference):
1891         match = self.explicit.patterns.reference.match(
1892             whitespace_normalize_name(reference))
1893         if not match:
1894             return None
1895         return unescape(match.group('simple') or match.group('phrase'))
1896
1897     def add_target(self, targetname, refuri, target, lineno):
1898         target.line = lineno
1899         if targetname:
1900             name = normalize_name(unescape(targetname))
1901             target['names'].append(name)
1902             if refuri:
1903                 uri = self.inliner.adjust_uri(refuri)
1904                 if uri:
1905                     target['refuri'] = uri
1906                 else:
1907                     raise ApplicationError('problem with URI: %r' % refuri)
1908             self.document.note_explicit_target(target, self.parent)
1909         else:                       # anonymous target
1910             if refuri:
1911                 target['refuri'] = refuri
1912             target['anonymous'] = 1
1913             self.document.note_anonymous_target(target)
1914
1915     def substitution_def(self, match):
1916         pattern = self.explicit.patterns.substitution
1917         lineno = self.state_machine.abs_line_number()
1918         block, indent, offset, blank_finish = \
1919               self.state_machine.get_first_known_indented(match.end(),
1920                                                           strip_indent=0)
1921         blocktext = (match.string[:match.end()] + '\n'.join(block))
1922         block.disconnect()
1923         escaped = escape2null(block[0].rstrip())
1924         blockindex = 0
1925         while 1:
1926             subdefmatch = pattern.match(escaped)
1927             if subdefmatch:
1928                 break
1929             blockindex += 1
1930             try:
1931                 escaped = escaped + ' ' + escape2null(block[blockindex].strip())
1932             except IndexError:
1933                 raise MarkupError('malformed substitution definition.',
1934                                   lineno)
1935         del block[:blockindex]          # strip out the substitution marker
1936         block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
1937         if not block[0]:
1938             del block[0]
1939             offset += 1
1940         while block and not block[-1].strip():
1941             block.pop()
1942         subname = subdefmatch.group('name')
1943         substitution_node = nodes.substitution_definition(blocktext)
1944         substitution_node.line = lineno
1945         if not block:
1946             msg = self.reporter.warning(
1947                 'Substitution definition "%s" missing contents.' % subname,
1948                 nodes.literal_block(blocktext, blocktext), line=lineno)
1949             return [msg], blank_finish
1950         block[0] = block[0].strip()
1951         substitution_node['names'].append(
1952             nodes.whitespace_normalize_name(subname))
1953         new_abs_offset, blank_finish = self.nested_list_parse(
1954               block, input_offset=offset, node=substitution_node,
1955               initial_state='SubstitutionDef', blank_finish=blank_finish)
1956         i = 0
1957         for node in substitution_node[:]:
1958             if not (isinstance(node, nodes.Inline) or
1959                     isinstance(node, nodes.Text)):
1960                 self.parent += substitution_node[i]
1961                 del substitution_node[i]
1962             else:
1963                 i += 1
1964         for node in substitution_node.traverse(nodes.Element):
1965             if self.disallowed_inside_substitution_definitions(node):
1966                 pformat = nodes.literal_block('', node.pformat().rstrip())
1967                 msg = self.reporter.error(
1968                     'Substitution definition contains illegal element:',
1969                     pformat, nodes.literal_block(blocktext, blocktext),
1970                     line=lineno)
1971                 return [msg], blank_finish
1972         if len(substitution_node) == 0:
1973             msg = self.reporter.warning(
1974                   'Substitution definition "%s" empty or invalid.'
1975                   % subname,
1976                   nodes.literal_block(blocktext, blocktext), line=lineno)
1977             return [msg], blank_finish
1978         self.document.note_substitution_def(
1979             substitution_node, subname, self.parent)
1980         return [substitution_node], blank_finish
1981
1982     def disallowed_inside_substitution_definitions(self, node):
1983         if (node['ids'] or
1984             isinstance(node, nodes.reference) and node.get('anonymous') or
1985             isinstance(node, nodes.footnote_reference) and node.get('auto')):
1986             return 1
1987         else:
1988             return 0
1989
1990     def directive(self, match, **option_presets):
1991         """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1992         type_name = match.group(1)
1993         directive_class, messages = directives.directive(
1994             type_name, self.memo.language, self.document)
1995         self.parent += messages
1996         if directive_class:
1997             return self.run_directive(
1998                 directive_class, match, type_name, option_presets)
1999         else:
2000             return self.unknown_directive(type_name)
2001
2002     def run_directive(self, directive, match, type_name, option_presets):
2003         """
2004         Parse a directive then run its directive function.
2005
2006         Parameters:
2007
2008         - `directive`: The class implementing the directive.  Must be
2009           a subclass of `rst.Directive`.
2010
2011         - `match`: A regular expression match object which matched the first
2012           line of the directive.
2013
2014         - `type_name`: The directive name, as used in the source text.
2015
2016         - `option_presets`: A dictionary of preset options, defaults for the
2017           directive options.  Currently, only an "alt" option is passed by
2018           substitution definitions (value: the substitution name), which may
2019           be used by an embedded image directive.
2020
2021         Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2022         """
2023         if isinstance(directive, (FunctionType, MethodType)):
2024             from docutils.parsers.rst import convert_directive_function
2025             directive = convert_directive_function(directive)
2026         lineno = self.state_machine.abs_line_number()
2027         initial_line_offset = self.state_machine.line_offset
2028         indented, indent, line_offset, blank_finish \
2029                   = self.state_machine.get_first_known_indented(match.end(),
2030                                                                 strip_top=0)
2031         block_text = '\n'.join(self.state_machine.input_lines[
2032             initial_line_offset : self.state_machine.line_offset + 1])
2033         try:
2034             arguments, options, content, content_offset = (
2035                 self.parse_directive_block(indented, line_offset,
2036                                            directive, option_presets))
2037         except MarkupError, detail:
2038             error = self.reporter.error(
2039                 'Error in "%s" directive:\n%s.' % (type_name,
2040                                                    ' '.join(detail.args)),
2041                 nodes.literal_block(block_text, block_text), line=lineno)
2042             return [error], blank_finish
2043         directive_instance = directive(
2044             type_name, arguments, options, content, lineno,
2045             content_offset, block_text, self, self.state_machine)
2046         try:
2047             result = directive_instance.run()
2048         except docutils.parsers.rst.DirectiveError, directive_error:
2049             msg_node = self.reporter.system_message(directive_error.level,
2050                                                     directive_error.message)
2051             msg_node += nodes.literal_block(block_text, block_text)
2052             msg_node['line'] = lineno
2053             result = [msg_node]
2054         assert isinstance(result, list), \
2055                'Directive "%s" must return a list of nodes.' % type_name
2056         for i in range(len(result)):
2057             assert isinstance(result[i], nodes.Node), \
2058                    ('Directive "%s" returned non-Node object (index %s): %r'
2059                     % (type_name, i, result[i]))
2060         return (result,
2061                 blank_finish or self.state_machine.is_next_line_blank())
2062
2063     def parse_directive_block(self, indented, line_offset, directive,
2064                               option_presets):
2065         option_spec = directive.option_spec
2066         has_content = directive.has_content
2067         if indented and not indented[0].strip():
2068             indented.trim_start()
2069             line_offset += 1
2070         while indented and not indented[-1].strip():
2071             indented.trim_end()
2072         if indented and (directive.required_arguments
2073                          or directive.optional_arguments
2074                          or option_spec):
2075             for i in range(len(indented)):
2076                 if not indented[i].strip():
2077                     break
2078             else:
2079                 i += 1
2080             arg_block = indented[:i]
2081             content = indented[i+1:]
2082             content_offset = line_offset + i + 1
2083         else:
2084             content = indented
2085             content_offset = line_offset
2086             arg_block = []
2087         while content and not content[0].strip():
2088             content.trim_start()
2089             content_offset += 1
2090         if option_spec:
2091             options, arg_block = self.parse_directive_options(
2092                 option_presets, option_spec, arg_block)
2093             if arg_block and not (directive.required_arguments
2094                                   or directive.optional_arguments):
2095                 raise MarkupError('no arguments permitted; blank line '
2096                                   'required before content block')
2097         else:
2098             options = {}
2099         if directive.required_arguments or directive.optional_arguments:
2100             arguments = self.parse_directive_arguments(
2101                 directive, arg_block)
2102         else:
2103             arguments = []
2104         if content and not has_content:
2105             raise MarkupError('no content permitted')
2106         return (arguments, options, content, content_offset)
2107
2108     def parse_directive_options(self, option_presets, option_spec, arg_block):
2109         options = option_presets.copy()
2110         for i in range(len(arg_block)):
2111             if arg_block[i][:1] == ':':
2112                 opt_block = arg_block[i:]
2113                 arg_block = arg_block[:i]
2114                 break
2115         else:
2116             opt_block = []
2117         if opt_block:
2118             success, data = self.parse_extension_options(option_spec,
2119                                                          opt_block)
2120             if success:                 # data is a dict of options
2121                 options.update(data)
2122             else:                       # data is an error string
2123                 raise MarkupError(data)
2124         return options, arg_block
2125
2126     def parse_directive_arguments(self, directive, arg_block):
2127         required = directive.required_arguments
2128         optional = directive.optional_arguments
2129         arg_text = '\n'.join(arg_block)
2130         arguments = arg_text.split()
2131         if len(arguments) < required:
2132             raise MarkupError('%s argument(s) required, %s supplied'
2133                               % (required, len(arguments)))
2134         elif len(arguments) > required + optional:
2135             if directive.final_argument_whitespace:
2136                 arguments = arg_text.split(None, required + optional - 1)
2137             else:
2138                 raise MarkupError(
2139                     'maximum %s argument(s) allowed, %s supplied'
2140                     % (required + optional, len(arguments)))
2141         return arguments
2142
2143     def parse_extension_options(self, option_spec, datalines):
2144         """
2145         Parse `datalines` for a field list containing extension options
2146         matching `option_spec`.
2147
2148         :Parameters:
2149             - `option_spec`: a mapping of option name to conversion
2150               function, which should raise an exception on bad input.
2151             - `datalines`: a list of input strings.
2152
2153         :Return:
2154             - Success value, 1 or 0.
2155             - An option dictionary on success, an error string on failure.
2156         """
2157         node = nodes.field_list()
2158         newline_offset, blank_finish = self.nested_list_parse(
2159               datalines, 0, node, initial_state='ExtensionOptions',
2160               blank_finish=1)
2161         if newline_offset != len(datalines): # incomplete parse of block
2162             return 0, 'invalid option block'
2163         try:
2164             options = utils.extract_extension_options(node, option_spec)
2165         except KeyError, detail:
2166             return 0, ('unknown option: "%s"' % detail.args[0])
2167         except (ValueError, TypeError), detail:
2168             return 0, ('invalid option value: %s' % ' '.join(detail.args))
2169         except utils.ExtensionOptionError, detail:
2170             return 0, ('invalid option data: %s' % ' '.join(detail.args))
2171         if blank_finish:
2172             return 1, options
2173         else:
2174             return 0, 'option data incompletely parsed'
2175
2176     def unknown_directive(self, type_name):
2177         lineno = self.state_machine.abs_line_number()
2178         indented, indent, offset, blank_finish = \
2179               self.state_machine.get_first_known_indented(0, strip_indent=0)
2180         text = '\n'.join(indented)
2181         error = self.reporter.error(
2182               'Unknown directive type "%s".' % type_name,
2183               nodes.literal_block(text, text), line=lineno)
2184         return [error], blank_finish
2185
2186     def comment(self, match):
2187         if not match.string[match.end():].strip() \
2188               and self.state_machine.is_next_line_blank(): # an empty comment?
2189             return [nodes.comment()], 1 # "A tiny but practical wart."
2190         indented, indent, offset, blank_finish = \
2191               self.state_machine.get_first_known_indented(match.end())
2192         while indented and not indented[-1].strip():
2193             indented.trim_end()
2194         text = '\n'.join(indented)
2195         return [nodes.comment(text, text)], blank_finish
2196
2197     explicit.constructs = [
2198           (footnote,
2199            re.compile(r"""
2200                       \.\.[ ]+          # explicit markup start
2201                       \[
2202                       (                 # footnote label:
2203                           [0-9]+          # manually numbered footnote
2204                         |               # *OR*
2205                           \#              # anonymous auto-numbered footnote
2206                         |               # *OR*
2207                           \#%s            # auto-number ed?) footnote label
2208                         |               # *OR*
2209                           \*              # auto-symbol footnote
2210                       )
2211                       \]
2212                       ([ ]+|$)          # whitespace or end of line
2213                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2214           (citation,
2215            re.compile(r"""
2216                       \.\.[ ]+          # explicit markup start
2217                       \[(%s)\]          # citation label
2218                       ([ ]+|$)          # whitespace or end of line
2219                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2220           (hyperlink_target,
2221            re.compile(r"""
2222                       \.\.[ ]+          # explicit markup start
2223                       _                 # target indicator
2224                       (?![ ]|$)         # first char. not space or EOL
2225                       """, re.VERBOSE)),
2226           (substitution_def,
2227            re.compile(r"""
2228                       \.\.[ ]+          # explicit markup start
2229                       \|                # substitution indicator
2230                       (?![ ]|$)         # first char. not space or EOL
2231                       """, re.VERBOSE)),
2232           (directive,
2233            re.compile(r"""
2234                       \.\.[ ]+          # explicit markup start
2235                       (%s)              # directive name
2236                       [ ]?              # optional space
2237                       ::                # directive delimiter
2238                       ([ ]+|$)          # whitespace or end of line
2239                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2240
2241     def explicit_markup(self, match, context, next_state):
2242         """Footnotes, hyperlink targets, directives, comments."""
2243         nodelist, blank_finish = self.explicit_construct(match)
2244         self.parent += nodelist
2245         self.explicit_list(blank_finish)
2246         return [], next_state, []
2247
2248     def explicit_construct(self, match):
2249         """Determine which explicit construct this is, parse & return it."""
2250         errors = []
2251         for method, pattern in self.explicit.constructs:
2252             expmatch = pattern.match(match.string)
2253             if expmatch:
2254                 try:
2255                     return method(self, expmatch)
2256                 except MarkupError, error: # never reached?
2257                     message, lineno = error.args
2258                     errors.append(self.reporter.warning(message, line=lineno))
2259                     break
2260         nodelist, blank_finish = self.comment(match)
2261         return nodelist + errors, blank_finish
2262
2263     def explicit_list(self, blank_finish):
2264         """
2265         Create a nested state machine for a series of explicit markup
2266         constructs (including anonymous hyperlink targets).
2267         """
2268         offset = self.state_machine.line_offset + 1   # next line
2269         newline_offset, blank_finish = self.nested_list_parse(
2270               self.state_machine.input_lines[offset:],
2271               input_offset=self.state_machine.abs_line_offset() + 1,
2272               node=self.parent, initial_state='Explicit',
2273               blank_finish=blank_finish,
2274               match_titles=self.state_machine.match_titles)
2275         self.goto_line(newline_offset)
2276         if not blank_finish:
2277             self.parent += self.unindent_warning('Explicit markup')
2278
2279     def anonymous(self, match, context, next_state):
2280         """Anonymous hyperlink targets."""
2281         nodelist, blank_finish = self.anonymous_target(match)
2282         self.parent += nodelist
2283         self.explicit_list(blank_finish)
2284         return [], next_state, []
2285
2286     def anonymous_target(self, match):
2287         lineno = self.state_machine.abs_line_number()
2288         block, indent, offset, blank_finish \
2289               = self.state_machine.get_first_known_indented(match.end(),
2290                                                             until_blank=1)
2291         blocktext = match.string[:match.end()] + '\n'.join(block)
2292         block = [escape2null(line) for line in block]
2293         target = self.make_target(block, blocktext, lineno, '')
2294         return [target], blank_finish
2295
2296     def line(self, match, context, next_state):
2297         """Section title overline or transition marker."""
2298         if self.state_machine.match_titles:
2299             return [match.string], 'Line', []
2300         elif match.string.strip() == '::':
2301             raise statemachine.TransitionCorrection('text')
2302         elif len(match.string.strip()) < 4:
2303             msg = self.reporter.info(
2304                 'Unexpected possible title overline or transition.\n'
2305                 "Treating it as ordinary text because it's so short.",
2306                 line=self.state_machine.abs_line_number())
2307             self.parent += msg
2308             raise statemachine.TransitionCorrection('text')
2309         else:
2310             blocktext = self.state_machine.line
2311             msg = self.reporter.severe(
2312                   'Unexpected section title or transition.',
2313                   nodes.literal_block(blocktext, blocktext),
2314                   line=self.state_machine.abs_line_number())
2315             self.parent += msg
2316             return [], next_state, []
2317
2318     def text(self, match, context, next_state):
2319         """Titles, definition lists, paragraphs."""
2320         return [match.string], 'Text', []
2321
2322
2323 class RFC2822Body(Body):
2324
2325     """
2326     RFC2822 headers are only valid as the first constructs in documents.  As
2327     soon as anything else appears, the `Body` state should take over.
2328     """
2329
2330     patterns = Body.patterns.copy()     # can't modify the original
2331     patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2332     initial_transitions = [(name, 'Body')
2333                            for name in Body.initial_transitions]
2334     initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2335
2336     def rfc2822(self, match, context, next_state):
2337         """RFC2822-style field list item."""
2338         fieldlist = nodes.field_list(classes=['rfc2822'])
2339         self.parent += fieldlist
2340         field, blank_finish = self.rfc2822_field(match)
2341         fieldlist += field
2342         offset = self.state_machine.line_offset + 1   # next line
2343         newline_offset, blank_finish = self.nested_list_parse(
2344               self.state_machine.input_lines[offset:],
2345               input_offset=self.state_machine.abs_line_offset() + 1,
2346               node=fieldlist, initial_state='RFC2822List',
2347               blank_finish=blank_finish)
2348         self.goto_line(newline_offset)
2349         if not blank_finish:
2350             self.parent += self.unindent_warning(
2351                   'RFC2822-style field list')
2352         return [], next_state, []
2353
2354     def rfc2822_field(self, match):
2355         name = match.string[:match.string.find(':')]
2356         indented, indent, line_offset, blank_finish = \
2357               self.state_machine.get_first_known_indented(match.end(),
2358                                                           until_blank=1)
2359         fieldnode = nodes.field()
2360         fieldnode += nodes.field_name(name, name)
2361         fieldbody = nodes.field_body('\n'.join(indented))
2362         fieldnode += fieldbody
2363         if indented:
2364             self.nested_parse(indented, input_offset=line_offset,
2365                               node=fieldbody)
2366         return fieldnode, blank_finish
2367
2368
2369 class SpecializedBody(Body):
2370
2371     """
2372     Superclass for second and subsequent compound element members.  Compound
2373     elements are lists and list-like constructs.
2374
2375     All transition methods are disabled (redefined as `invalid_input`).
2376     Override individual methods in subclasses to re-enable.
2377
2378     For example, once an initial bullet list item, say, is recognized, the
2379     `BulletList` subclass takes over, with a "bullet_list" node as its
2380     container.  Upon encountering the initial bullet list item, `Body.bullet`
2381     calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2382     starts up a nested parsing session with `BulletList` as the initial state.
2383     Only the ``bullet`` transition method is enabled in `BulletList`; as long
2384     as only bullet list items are encountered, they are parsed and inserted
2385     into the container.  The first construct which is *not* a bullet list item
2386     triggers the `invalid_input` method, which ends the nested parse and
2387     closes the container.  `BulletList` needs to recognize input that is
2388     invalid in the context of a bullet list, which means everything *other
2389     than* bullet list items, so it inherits the transition list created in
2390     `Body`.
2391     """
2392
2393     def invalid_input(self, match=None, context=None, next_state=None):
2394         """Not a compound element member. Abort this state machine."""
2395         self.state_machine.previous_line() # back up so parent SM can reassess
2396         raise EOFError
2397
2398     indent = invalid_input
2399     bullet = invalid_input
2400     enumerator = invalid_input
2401     field_marker = invalid_input
2402     option_marker = invalid_input
2403     doctest = invalid_input
2404     line_block = invalid_input
2405     grid_table_top = invalid_input
2406     simple_table_top = invalid_input
2407     explicit_markup = invalid_input
2408     anonymous = invalid_input
2409     line = invalid_input
2410     text = invalid_input
2411
2412
2413 class BulletList(SpecializedBody):
2414
2415     """Second and subsequent bullet_list list_items."""
2416
2417     def bullet(self, match, context, next_state):
2418         """Bullet list item."""
2419         if match.string[0] != self.parent['bullet']:
2420             # different bullet: new list
2421             self.invalid_input()
2422         listitem, blank_finish = self.list_item(match.end())
2423         self.parent += listitem
2424         self.blank_finish = blank_finish
2425         return [], next_state, []
2426
2427
2428 class DefinitionList(SpecializedBody):
2429
2430     """Second and subsequent definition_list_items."""
2431
2432     def text(self, match, context, next_state):
2433         """Definition lists."""
2434         return [match.string], 'Definition', []
2435
2436
2437 class EnumeratedList(SpecializedBody):
2438
2439     """Second and subsequent enumerated_list list_items."""
2440
2441     def enumerator(self, match, context, next_state):
2442         """Enumerated list item."""
2443         format, sequence, text, ordinal = self.parse_enumerator(
2444               match, self.parent['enumtype'])
2445         if ( format != self.format
2446              or (sequence != '#' and (sequence != self.parent['enumtype']
2447                                       or self.auto
2448                                       or ordinal != (self.lastordinal + 1)))
2449              or not self.is_enumerated_list_item(ordinal, sequence, format)):
2450             # different enumeration: new list
2451             self.invalid_input()
2452         if sequence == '#':
2453             self.auto = 1
2454         listitem, blank_finish = self.list_item(match.end())
2455         self.parent += listitem
2456         self.blank_finish = blank_finish
2457         self.lastordinal = ordinal
2458         return [], next_state, []
2459
2460
2461 class FieldList(SpecializedBody):
2462
2463     """Second and subsequent field_list fields."""
2464
2465     def field_marker(self, match, context, next_state):
2466         """Field list field."""
2467         field, blank_finish = self.field(match)
2468         self.parent += field
2469         self.blank_finish = blank_finish
2470         return [], next_state, []
2471
2472
2473 class OptionList(SpecializedBody):
2474
2475     """Second and subsequent option_list option_list_items."""
2476
2477     def option_marker(self, match, context, next_state):
2478         """Option list item."""
2479         try:
2480             option_list_item, blank_finish = self.option_list_item(match)
2481         except MarkupError, (message, lineno):
2482             self.invalid_input()
2483         self.parent += option_list_item
2484         self.blank_finish = blank_finish
2485         return [], next_state, []
2486
2487
2488 class RFC2822List(SpecializedBody, RFC2822Body):
2489
2490     """Second and subsequent RFC2822-style field_list fields."""
2491
2492     patterns = RFC2822Body.patterns
2493     initial_transitions = RFC2822Body.initial_transitions
2494
2495     def rfc2822(self, match, context, next_state):
2496         """RFC2822-style field list item."""
2497         field, blank_finish = self.rfc2822_field(match)
2498         self.parent += field
2499         self.blank_finish = blank_finish
2500         return [], 'RFC2822List', []
2501
2502     blank = SpecializedBody.invalid_input
2503
2504
2505 class ExtensionOptions(FieldList):
2506
2507     """
2508     Parse field_list fields for extension options.
2509
2510     No nested parsing is done (including inline markup parsing).
2511     """
2512
2513     def parse_field_body(self, indented, offset, node):
2514         """Override `Body.parse_field_body` for simpler parsing."""
2515         lines = []
2516         for line in list(indented) + ['']:
2517             if line.strip():
2518                 lines.append(line)
2519             elif lines:
2520                 text = '\n'.join(lines)
2521                 node += nodes.paragraph(text, text)
2522                 lines = []
2523
2524
2525 class LineBlock(SpecializedBody):
2526
2527     """Second and subsequent lines of a line_block."""
2528
2529     blank = SpecializedBody.invalid_input
2530
2531     def line_block(self, match, context, next_state):
2532         """New line of line block."""
2533         lineno = self.state_machine.abs_line_number()
2534         line, messages, blank_finish = self.line_block_line(match, lineno)
2535         self.parent += line
2536         self.parent.parent += messages
2537         self.blank_finish = blank_finish
2538         return [], next_state, []
2539
2540
2541 class Explicit(SpecializedBody):
2542
2543     """Second and subsequent explicit markup construct."""
2544
2545     def explicit_markup(self, match, context, next_state):
2546         """Footnotes, hyperlink targets, directives, comments."""
2547         nodelist, blank_finish = self.explicit_construct(match)
2548         self.parent += nodelist
2549         self.blank_finish = blank_finish
2550         return [], next_state, []
2551
2552     def anonymous(self, match, context, next_state):
2553         """Anonymous hyperlink targets."""
2554         nodelist, blank_finish = self.anonymous_target(match)
2555         self.parent += nodelist
2556         self.blank_finish = blank_finish
2557         return [], next_state, []
2558
2559     blank = SpecializedBody.invalid_input
2560
2561
2562 class SubstitutionDef(Body):
2563
2564     """
2565     Parser for the contents of a substitution_definition element.
2566     """
2567
2568     patterns = {
2569           'embedded_directive': re.compile(r'(%s)::( +|$)'
2570                                            % Inliner.simplename, re.UNICODE),
2571           'text': r''}
2572     initial_transitions = ['embedded_directive', 'text']
2573
2574     def embedded_directive(self, match, context, next_state):
2575         nodelist, blank_finish = self.directive(match,
2576                                                 alt=self.parent['names'][0])
2577         self.parent += nodelist
2578         if not self.state_machine.at_eof():
2579             self.blank_finish = blank_finish
2580         raise EOFError
2581
2582     def text(self, match, context, next_state):
2583         if not self.state_machine.at_eof():
2584             self.blank_finish = self.state_machine.is_next_line_blank()
2585         raise EOFError
2586
2587
2588 class Text(RSTState):
2589
2590     """
2591     Classifier of second line of a text block.
2592
2593     Could be a paragraph, a definition list item, or a title.
2594     """
2595
2596     patterns = {'underline': Body.patterns['line'],
2597                 'text': r''}
2598     initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2599
2600     def blank(self, match, context, next_state):
2601         """End of paragraph."""
2602         paragraph, literalnext = self.paragraph(
2603               context, self.state_machine.abs_line_number() - 1)
2604         self.parent += paragraph
2605         if literalnext:
2606             self.parent += self.literal_block()
2607         return [], 'Body', []
2608
2609     def eof(self, context):
2610         if context:
2611             self.blank(None, context, None)
2612         return []
2613
2614     def indent(self, match, context, next_state):
2615         """Definition list item."""
2616         definitionlist = nodes.definition_list()
2617         definitionlistitem, blank_finish = self.definition_list_item(context)
2618         definitionlist += definitionlistitem
2619         self.parent += definitionlist
2620         offset = self.state_machine.line_offset + 1   # next line
2621         newline_offset, blank_finish = self.nested_list_parse(
2622               self.state_machine.input_lines[offset:],
2623               input_offset=self.state_machine.abs_line_offset() + 1,
2624               node=definitionlist, initial_state='DefinitionList',
2625               blank_finish=blank_finish, blank_finish_state='Definition')
2626         self.goto_line(newline_offset)
2627         if not blank_finish:
2628             self.parent += self.unindent_warning('Definition list')
2629         return [], 'Body', []
2630
2631     def underline(self, match, context, next_state):
2632         """Section title."""
2633         lineno = self.state_machine.abs_line_number()
2634         title = context[0].rstrip()
2635         underline = match.string.rstrip()
2636         source = title + '\n' + underline
2637         messages = []
2638         if column_width(title) > len(underline):
2639             if len(underline) < 4:
2640                 if self.state_machine.match_titles:
2641                     msg = self.reporter.info(
2642                         'Possible title underline, too short for the title.\n'
2643                         "Treating it as ordinary text because it's so short.",
2644                         line=lineno)
2645                     self.parent += msg
2646                 raise statemachine.TransitionCorrection('text')
2647             else:
2648                 blocktext = context[0] + '\n' + self.state_machine.line
2649                 msg = self.reporter.warning(
2650                     'Title underline too short.',
2651                     nodes.literal_block(blocktext, blocktext), line=lineno)
2652                 messages.append(msg)
2653         if not self.state_machine.match_titles:
2654             blocktext = context[0] + '\n' + self.state_machine.line
2655             msg = self.reporter.severe(
2656                 'Unexpected section title.',
2657                 nodes.literal_block(blocktext, blocktext), line=lineno)
2658             self.parent += messages
2659             self.parent += msg
2660             return [], next_state, []
2661         style = underline[0]
2662         context[:] = []
2663         self.section(title, source, style, lineno - 1, messages)
2664         return [], next_state, []
2665
2666     def text(self, match, context, next_state):
2667         """Paragraph."""
2668         startline = self.state_machine.abs_line_number() - 1
2669         msg = None
2670         try:
2671             block = self.state_machine.get_text_block(flush_left=1)
2672         except statemachine.UnexpectedIndentationError, instance:
2673             block, source, lineno = instance.args
2674             msg = self.reporter.error('Unexpected indentation.',
2675                                       source=source, line=lineno)
2676         lines = context + list(block)
2677         paragraph, literalnext = self.paragraph(lines, startline)
2678         self.parent += paragraph
2679         self.parent += msg
2680         if literalnext:
2681             try:
2682                 self.state_machine.next_line()
2683             except EOFError:
2684                 pass
2685             self.parent += self.literal_block()
2686         return [], next_state, []
2687
2688     def literal_block(self):
2689         """Return a list of nodes."""
2690         indented, indent, offset, blank_finish = \
2691               self.state_machine.get_indented()
2692         while indented and not indented[-1].strip():
2693             indented.trim_end()
2694         if not indented:
2695             return self.quoted_literal_block()
2696         data = '\n'.join(indented)
2697         literal_block = nodes.literal_block(data, data)
2698         literal_block.line = offset + 1
2699         nodelist = [literal_block]
2700         if not blank_finish:
2701             nodelist.append(self.unindent_warning('Literal block'))
2702         return nodelist
2703
2704     def quoted_literal_block(self):
2705         abs_line_offset = self.state_machine.abs_line_offset()
2706         offset = self.state_machine.line_offset
2707         parent_node = nodes.Element()
2708         new_abs_offset = self.nested_parse(
2709             self.state_machine.input_lines[offset:],
2710             input_offset=abs_line_offset, node=parent_node, match_titles=0,
2711             state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2712                                   'initial_state': 'QuotedLiteralBlock'})
2713         self.goto_line(new_abs_offset)
2714         return parent_node.children
2715
2716     def definition_list_item(self, termline):
2717         indented, indent, line_offset, blank_finish = \
2718               self.state_machine.get_indented()
2719         definitionlistitem = nodes.definition_list_item(
2720             '\n'.join(termline + list(indented)))
2721         lineno = self.state_machine.abs_line_number() - 1
2722         definitionlistitem.line = lineno
2723         termlist, messages = self.term(termline, lineno)
2724         definitionlistitem += termlist
2725         definition = nodes.definition('', *messages)
2726         definitionlistitem += definition
2727         if termline[0][-2:] == '::':
2728             definition += self.reporter.info(
2729                   'Blank line missing before literal block (after the "::")? '
2730                   'Interpreted as a definition list item.', line=line_offset+1)
2731         self.nested_parse(indented, input_offset=line_offset, node=definition)
2732         return definitionlistitem, blank_finish
2733
2734     classifier_delimiter = re.compile(' +: +')
2735
2736     def term(self, lines, lineno):
2737         """Return a definition_list's term and optional classifiers."""
2738         assert len(lines) == 1
2739         text_nodes, messages = self.inline_text(lines[0], lineno)
2740         term_node = nodes.term()
2741         node_list = [term_node]
2742         for i in range(len(text_nodes)):
2743             node = text_nodes[i]
2744             if isinstance(node, nodes.Text):
2745                 parts = self.classifier_delimiter.split(node.rawsource)
2746                 if len(parts) == 1:
2747                     node_list[-1] += node
2748                 else:
2749
2750                     node_list[-1] += nodes.Text(parts[0].rstrip())
2751                     for part in parts[1:]:
2752                         classifier_node = nodes.classifier('', part)
2753                         node_list.append(classifier_node)
2754             else:
2755                 node_list[-1] += node
2756         return node_list, messages
2757
2758
2759 class SpecializedText(Text):
2760
2761     """
2762     Superclass for second and subsequent lines of Text-variants.
2763
2764     All transition methods are disabled. Override individual methods in
2765     subclasses to re-enable.
2766     """
2767
2768     def eof(self, context):
2769         """Incomplete construct."""
2770         return []
2771
2772     def invalid_input(self, match=None, context=None, next_state=None):
2773         """Not a compound element member. Abort this state machine."""
2774         raise EOFError
2775
2776     blank = invalid_input
2777     indent = invalid_input
2778     underline = invalid_input
2779     text = invalid_input
2780
2781
2782 class Definition(SpecializedText):
2783
2784     """Second line of potential definition_list_item."""
2785
2786     def eof(self, context):
2787         """Not a definition."""
2788         self.state_machine.previous_line(2) # so parent SM can reassess
2789         return []
2790
2791     def indent(self, match, context, next_state):
2792         """Definition list item."""
2793         definitionlistitem, blank_finish = self.definition_list_item(context)
2794         self.parent += definitionlistitem
2795         self.blank_finish = blank_finish
2796         return [], 'DefinitionList', []
2797
2798
2799 class Line(SpecializedText):
2800
2801     """
2802     Second line of over- & underlined section title or transition marker.
2803     """
2804
2805     eofcheck = 1                        # @@@ ???
2806     """Set to 0 while parsing sections, so that we don't catch the EOF."""
2807
2808     def eof(self, context):
2809         """Transition marker at end of section or document."""
2810         marker = context[0].strip()
2811         if self.memo.section_bubble_up_kludge:
2812             self.memo.section_bubble_up_kludge = 0
2813         elif len(marker) < 4:
2814             self.state_correction(context)
2815         if self.eofcheck:               # ignore EOFError with sections
2816             lineno = self.state_machine.abs_line_number() - 1
2817             transition = nodes.transition(rawsource=context[0])
2818             transition.line = lineno
2819             self.parent += transition
2820         self.eofcheck = 1
2821         return []
2822
2823     def blank(self, match, context, next_state):
2824         """Transition marker."""
2825         lineno = self.state_machine.abs_line_number() - 1
2826         marker = context[0].strip()
2827         if len(marker) < 4:
2828             self.state_correction(context)
2829         transition = nodes.transition(rawsource=marker)
2830         transition.line = lineno
2831         self.parent += transition
2832         return [], 'Body', []
2833
2834     def text(self, match, context, next_state):
2835         """Potential over- & underlined title."""
2836         lineno = self.state_machine.abs_line_number() - 1
2837         overline = context[0]
2838         title = match.string
2839         underline = ''
2840         try:
2841             underline = self.state_machine.next_line()
2842         except EOFError:
2843             blocktext = overline + '\n' + title
2844             if len(overline.rstrip()) < 4:
2845                 self.short_overline(context, blocktext, lineno, 2)
2846             else:
2847                 msg = self.reporter.severe(
2848                     'Incomplete section title.',
2849                     nodes.literal_block(blocktext, blocktext), line=lineno)
2850                 self.parent += msg
2851                 return [], 'Body', []
2852         source = '%s\n%s\n%s' % (overline, title, underline)
2853         overline = overline.rstrip()
2854         underline = underline.rstrip()
2855         if not self.transitions['underline'][0].match(underline):
2856             blocktext = overline + '\n' + title + '\n' + underline
2857             if len(overline.rstrip()) < 4:
2858                 self.short_overline(context, blocktext, lineno, 2)
2859             else:
2860                 msg = self.reporter.severe(
2861                     'Missing matching underline for section title overline.',
2862                     nodes.literal_block(source, source), line=lineno)
2863                 self.parent += msg
2864                 return [], 'Body', []
2865         elif overline != underline:
2866             blocktext = overline + '\n' + title + '\n' + underline
2867             if len(overline.rstrip()) < 4:
2868                 self.short_overline(context, blocktext, lineno, 2)
2869             else:
2870                 msg = self.reporter.severe(
2871                       'Title overline & underline mismatch.',
2872                       nodes.literal_block(source, source), line=lineno)
2873                 self.parent += msg
2874                 return [], 'Body', []
2875         title = title.rstrip()
2876         messages = []
2877         if column_width(title) > len(overline):
2878             blocktext = overline + '\n' + title + '\n' + underline
2879             if len(overline.rstrip()) < 4:
2880                 self.short_overline(context, blocktext, lineno, 2)
2881             else:
2882                 msg = self.reporter.warning(
2883                       'Title overline too short.',
2884                       nodes.literal_block(source, source), line=lineno)
2885                 messages.append(msg)
2886         style = (overline[0], underline[0])
2887         self.eofcheck = 0               # @@@ not sure this is correct
2888         self.section(title.lstrip(), source, style, lineno + 1, messages)
2889         self.eofcheck = 1
2890         return [], 'Body', []
2891
2892     indent = text                       # indented title
2893
2894     def underline(self, match, context, next_state):
2895         overline = context[0]
2896         blocktext = overline + '\n' + self.state_machine.line
2897         lineno = self.state_machine.abs_line_number() - 1
2898         if len(overline.rstrip()) < 4:
2899             self.short_overline(context, blocktext, lineno, 1)
2900         msg = self.reporter.error(
2901               'Invalid section title or transition marker.',
2902               nodes.literal_block(blocktext, blocktext), line=lineno)
2903         self.parent += msg
2904         return [], 'Body', []
2905
2906     def short_overline(self, context, blocktext, lineno, lines=1):
2907         msg = self.reporter.info(
2908             'Possible incomplete section title.\nTreating the overline as '
2909             "ordinary text because it's so short.", line=lineno)
2910         self.parent += msg
2911         self.state_correction(context, lines)
2912
2913     def state_correction(self, context, lines=1):
2914         self.state_machine.previous_line(lines)
2915         context[:] = []
2916         raise statemachine.StateCorrection('Body', 'text')
2917
2918
2919 class QuotedLiteralBlock(RSTState):
2920
2921     """
2922     Nested parse handler for quoted (unindented) literal blocks.
2923
2924     Special-purpose.  Not for inclusion in `state_classes`.
2925     """
2926
2927     patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
2928                 'text': r''}
2929     initial_transitions = ('initial_quoted', 'text')
2930
2931     def __init__(self, state_machine, debug=0):
2932         RSTState.__init__(self, state_machine, debug)
2933         self.messages = []
2934         self.initial_lineno = None
2935
2936     def blank(self, match, context, next_state):
2937         if context:
2938             raise EOFError
2939         else:
2940             return context, next_state, []
2941
2942     def eof(self, context):
2943         if context:
2944             text = '\n'.join(context)
2945             literal_block = nodes.literal_block(text, text)
2946             literal_block.line = self.initial_lineno
2947             self.parent += literal_block
2948         else:
2949             self.parent += self.reporter.warning(
2950                 'Literal block expected; none found.',
2951                 line=self.state_machine.abs_line_number())
2952             self.state_machine.previous_line()
2953         self.parent += self.messages
2954         return []
2955
2956     def indent(self, match, context, next_state):
2957         assert context, ('QuotedLiteralBlock.indent: context should not '
2958                          'be empty!')
2959         self.messages.append(
2960             self.reporter.error('Unexpected indentation.',
2961                                 line=self.state_machine.abs_line_number()))
2962         self.state_machine.previous_line()
2963         raise EOFError
2964
2965     def initial_quoted(self, match, context, next_state):
2966         """Match arbitrary quote character on the first line only."""
2967         self.remove_transition('initial_quoted')
2968         quote = match.string[0]
2969         pattern = re.compile(re.escape(quote))
2970         # New transition matches consistent quotes only:
2971         self.add_transition('quoted',
2972                             (pattern, self.quoted, self.__class__.__name__))
2973         self.initial_lineno = self.state_machine.abs_line_number()
2974         return [match.string], next_state, []
2975
2976     def quoted(self, match, context, next_state):
2977         """Match consistent quotes on subsequent lines."""
2978         context.append(match.string)
2979         return context, next_state, []
2980
2981     def text(self, match, context, next_state):
2982         if context:
2983             self.messages.append(
2984                 self.reporter.error('Inconsistent literal block quoting.',
2985                                     line=self.state_machine.abs_line_number()))
2986             self.state_machine.previous_line()
2987         raise EOFError
2988
2989
2990 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
2991                  OptionList, LineBlock, ExtensionOptions, Explicit, Text,
2992                  Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
2993 """Standard set of State classes used to start `RSTStateMachine`."""