docutils/parsers/rst/states.py

   1 # Author: David Goodger
   2 # Contact: goodger@users.sourceforge.net
   3 # Revision: $Revision$
   4 # Date: $Date$
   5 # Copyright: This module has been placed in the public domain.
   6
   7 """
   8 This is the ``docutils.parsers.restructuredtext.states`` module, the core of
   9 the reStructuredText parser.  It defines the following:
  10
  11 :Classes:
  12     - `RSTStateMachine`: reStructuredText parser's entry point.
  13     - `NestedStateMachine`: recursive StateMachine.
  14     - `RSTState`: reStructuredText State superclass.
  15     - `Inliner`: For parsing inline markup.
  16     - `Body`: Generic classifier of the first line of a block.
  17     - `SpecializedBody`: Superclass for compound element members.
  18     - `BulletList`: Second and subsequent bullet_list list_items
  19     - `DefinitionList`: Second+ definition_list_items.
  20     - `EnumeratedList`: Second+ enumerated_list list_items.
  21     - `FieldList`: Second+ fields.
  22     - `OptionList`: Second+ option_list_items.
  23     - `RFC2822List`: Second+ RFC2822-style fields.
  24     - `ExtensionOptions`: Parses directive option fields.
  25     - `Explicit`: Second+ explicit markup constructs.
  26     - `SubstitutionDef`: For embedded directives in substitution definitions.
  27     - `Text`: Classifier of second line of a text block.
  28     - `SpecializedText`: Superclass for continuation lines of Text-variants.
  29     - `Definition`: Second line of potential definition_list_item.
  30     - `Line`: Second line of overlined section title or transition marker.
  31     - `Struct`: An auxiliary collection class.
  32
  33 :Exception classes:
  34     - `MarkupError`
  35     - `ParserError`
  36     - `MarkupMismatch`
  37
  38 :Functions:
  39     - `escape2null()`: Return a string, escape-backslashes converted to nulls.
  40     - `unescape()`: Return a string, nulls removed or restored to backslashes.
  41
  42 :Attributes:
  43     - `state_classes`: set of State classes used with `RSTStateMachine`.
  44
  45 Parser Overview
  46 ===============
  47
  48 The reStructuredText parser is implemented as a recursive state machine,
  49 examining its input one line at a time.  To understand how the parser works,
  50 please first become familiar with the `docutils.statemachine` module.  In the
  51 description below, references are made to classes defined in this module;
  52 please see the individual classes for details.
  53
  54 Parsing proceeds as follows:
  55
  56 1. The state machine examines each line of input, checking each of the
  57    transition patterns of the state `Body`, in order, looking for a match.
  58    The implicit transitions (blank lines and indentation) are checked before
  59    any others.  The 'text' transition is a catch-all (matches anything).
  60
  61 2. The method associated with the matched transition pattern is called.
  62
  63    A. Some transition methods are self-contained, appending elements to the
  64       document tree (`Body.doctest` parses a doctest block).  The parser's
  65       current line index is advanced to the end of the element, and parsing
  66       continues with step 1.
  67
  68    B. Other transition methods trigger the creation of a nested state machine,
  69       whose job is to parse a compound construct ('indent' does a block quote,
  70       'bullet' does a bullet list, 'overline' does a section [first checking
  71       for a valid section header], etc.).
  72
  73       - In the case of lists and explicit markup, a one-off state machine is
  74         created and run to parse contents of the first item.
  75
  76       - A new state machine is created and its initial state is set to the
  77         appropriate specialized state (`BulletList` in the case of the
  78         'bullet' transition; see `SpecializedBody` for more detail).  This
  79         state machine is run to parse the compound element (or series of
  80         explicit markup elements), and returns as soon as a non-member element
  81         is encountered.  For example, the `BulletList` state machine ends as
  82         soon as it encounters an element which is not a list item of that
  83         bullet list.  The optional omission of inter-element blank lines is
  84         enabled by this nested state machine.
  85
  86       - The current line index is advanced to the end of the elements parsed,
  87         and parsing continues with step 1.
  88
  89    C. The result of the 'text' transition depends on the next line of text.
  90       The current state is changed to `Text`, under which the second line is
  91       examined.  If the second line is:
  92
  93       - Indented: The element is a definition list item, and parsing proceeds
  94         similarly to step 2.B, using the `DefinitionList` state.
  95
  96       - A line of uniform punctuation characters: The element is a section
  97         header; again, parsing proceeds as in step 2.B, and `Body` is still
  98         used.
  99
 100       - Anything else: The element is a paragraph, which is examined for
 101         inline markup and appended to the parent element.  Processing
 102         continues with step 1.
 103 """
 104
 105 __docformat__ = 'reStructuredText'
 106
 107
 108 import sys
 109 import re
 110 import roman
 111 from types import TupleType
 112 from docutils import nodes, statemachine, utils, urischemes
 113 from docutils import ApplicationError, DataError
 114 from docutils.statemachine import StateMachineWS, StateWS
 115 from docutils.nodes import fully_normalize_name as normalize_name
 116 from docutils.nodes import whitespace_normalize_name
 117 from docutils.utils import escape2null, unescape, column_width
 118 from docutils.parsers.rst import directives, languages, tableparser, roles
 119 from docutils.parsers.rst.languages import en as _fallback_language_module
 120
 121
 122 class MarkupError(DataError): pass
 123 class UnknownInterpretedRoleError(DataError): pass
 124 class InterpretedRoleNotImplementedError(DataError): pass
 125 class ParserError(ApplicationError): pass
 126 class MarkupMismatch(Exception): pass
 127
 128
 129 class Struct:
 130
 131     """Stores data attributes for dotted-attribute access."""
 132
 133     def __init__(self, **keywordargs):
 134         self.__dict__.update(keywordargs)
 135
 136
 137 class RSTStateMachine(StateMachineWS):
 138
 139     """
 140     reStructuredText's master StateMachine.
 141
 142     The entry point to reStructuredText parsing is the `run()` method.
 143     """
 144
 145     def run(self, input_lines, document, input_offset=0, match_titles=1,
 146             inliner=None):
 147         """
 148         Parse `input_lines` and modify the `document` node in place.
 149
 150         Extend `StateMachineWS.run()`: set up parse-global data and
 151         run the StateMachine.
 152         """
 153         self.language = languages.get_language(
 154             document.settings.language_code)
 155         self.match_titles = match_titles
 156         if inliner is None:
 157             inliner = Inliner()
 158         inliner.init_customizations(document.settings)
 159         self.memo = Struct(document=document,
 160                            reporter=document.reporter,
 161                            language=self.language,
 162                            title_styles=[],
 163                            section_level=0,
 164                            section_bubble_up_kludge=0,
 165                            inliner=inliner)
 166         self.document = document
 167         self.attach_observer(document.note_source)
 168         self.reporter = self.memo.reporter
 169         self.node = document
 170         results = StateMachineWS.run(self, input_lines, input_offset,
 171                                      input_source=document['source'])
 172         assert results == [], 'RSTStateMachine.run() results should be empty!'
 173         self.node = self.memo = None    # remove unneeded references
 174
 175
 176 class NestedStateMachine(StateMachineWS):
 177
 178     """
 179     StateMachine run from within other StateMachine runs, to parse nested
 180     document structures.
 181     """
 182
 183     def run(self, input_lines, input_offset, memo, node, match_titles=1):
 184         """
 185         Parse `input_lines` and populate a `docutils.nodes.document` instance.
 186
 187         Extend `StateMachineWS.run()`: set up document-wide data.
 188         """
 189         self.match_titles = match_titles
 190         self.memo = memo
 191         self.document = memo.document
 192         self.attach_observer(self.document.note_source)
 193         self.reporter = memo.reporter
 194         self.language = memo.language
 195         self.node = node
 196         results = StateMachineWS.run(self, input_lines, input_offset)
 197         assert results == [], ('NestedStateMachine.run() results should be '
 198                                'empty!')
 199         return results
 200
 201
 202 class RSTState(StateWS):
 203
 204     """
 205     reStructuredText State superclass.
 206
 207     Contains methods used by all State subclasses.
 208     """
 209
 210     nested_sm = NestedStateMachine
 211
 212     def __init__(self, state_machine, debug=0):
 213         self.nested_sm_kwargs = {'state_classes': state_classes,
 214                                  'initial_state': 'Body'}
 215         StateWS.__init__(self, state_machine, debug)
 216
 217     def runtime_init(self):
 218         StateWS.runtime_init(self)
 219         memo = self.state_machine.memo
 220         self.memo = memo
 221         self.reporter = memo.reporter
 222         self.inliner = memo.inliner
 223         self.document = memo.document
 224         self.parent = self.state_machine.node
 225
 226     def goto_line(self, abs_line_offset):
 227         """
 228         Jump to input line `abs_line_offset`, ignoring jumps past the end.
 229         """
 230         try:
 231             self.state_machine.goto_line(abs_line_offset)
 232         except EOFError:
 233             pass
 234
 235     def no_match(self, context, transitions):
 236         """
 237         Override `StateWS.no_match` to generate a system message.
 238
 239         This code should never be run.
 240         """
 241         self.reporter.severe(
 242             'Internal error: no transition pattern match.  State: "%s"; '
 243             'transitions: %s; context: %s; current line: %r.'
 244             % (self.__class__.__name__, transitions, context,
 245                self.state_machine.line),
 246             line=self.state_machine.abs_line_number())
 247         return context, None, []
 248
 249     def bof(self, context):
 250         """Called at beginning of file."""
 251         return [], []
 252
 253     def nested_parse(self, block, input_offset, node, match_titles=0,
 254                      state_machine_class=None, state_machine_kwargs=None):
 255         """
 256         Create a new StateMachine rooted at `node` and run it over the input
 257         `block`.
 258         """
 259         if state_machine_class is None:
 260             state_machine_class = self.nested_sm
 261         if state_machine_kwargs is None:
 262             state_machine_kwargs = self.nested_sm_kwargs
 263         block_length = len(block)
 264         state_machine = state_machine_class(debug=self.debug,
 265                                             **state_machine_kwargs)
 266         state_machine.run(block, input_offset, memo=self.memo,
 267                           node=node, match_titles=match_titles)
 268         state_machine.unlink()
 269         new_offset = state_machine.abs_line_offset()
 270         # No `block.parent` implies disconnected -- lines aren't in sync:
 271         if block.parent and (len(block) - block_length) != 0:
 272             # Adjustment for block if modified in nested parse:
 273             self.state_machine.next_line(len(block) - block_length)
 274         return new_offset
 275
 276     def nested_list_parse(self, block, input_offset, node, initial_state,
 277                           blank_finish,
 278                           blank_finish_state=None,
 279                           extra_settings={},
 280                           match_titles=0,
 281                           state_machine_class=None,
 282                           state_machine_kwargs=None):
 283         """
 284         Create a new StateMachine rooted at `node` and run it over the input
 285         `block`. Also keep track of optional intermediate blank lines and the
 286         required final one.
 287         """
 288         if state_machine_class is None:
 289             state_machine_class = self.nested_sm
 290         if state_machine_kwargs is None:
 291             state_machine_kwargs = self.nested_sm_kwargs.copy()
 292         state_machine_kwargs['initial_state'] = initial_state
 293         state_machine = state_machine_class(debug=self.debug,
 294                                             **state_machine_kwargs)
 295         if blank_finish_state is None:
 296             blank_finish_state = initial_state
 297         state_machine.states[blank_finish_state].blank_finish = blank_finish
 298         for key, value in extra_settings.items():
 299             setattr(state_machine.states[initial_state], key, value)
 300         state_machine.run(block, input_offset, memo=self.memo,
 301                           node=node, match_titles=match_titles)
 302         blank_finish = state_machine.states[blank_finish_state].blank_finish
 303         state_machine.unlink()
 304         return state_machine.abs_line_offset(), blank_finish
 305
 306     def section(self, title, source, style, lineno, messages):
 307         """Check for a valid subsection and create one if it checks out."""
 308         if self.check_subsection(source, style, lineno):
 309             self.new_subsection(title, lineno, messages)
 310
 311     def check_subsection(self, source, style, lineno):
 312         """
 313         Check for a valid subsection header.  Return 1 (true) or None (false).
 314
 315         When a new section is reached that isn't a subsection of the current
 316         section, back up the line count (use ``previous_line(-x)``), then
 317         ``raise EOFError``.  The current StateMachine will finish, then the
 318         calling StateMachine can re-examine the title.  This will work its way
 319         back up the calling chain until the correct section level isreached.
 320
 321         @@@ Alternative: Evaluate the title, store the title info & level, and
 322         back up the chain until that level is reached.  Store in memo? Or
 323         return in results?
 324
 325         :Exception: `EOFError` when a sibling or supersection encountered.
 326         """
 327         memo = self.memo
 328         title_styles = memo.title_styles
 329         mylevel = memo.section_level
 330         try:                            # check for existing title style
 331             level = title_styles.index(style) + 1
 332         except ValueError:              # new title style
 333             if len(title_styles) == memo.section_level: # new subsection
 334                 title_styles.append(style)
 335                 return 1
 336             else:                       # not at lowest level
 337                 self.parent += self.title_inconsistent(source, lineno)
 338                 return None
 339         if level <= mylevel:            # sibling or supersection
 340             memo.section_level = level   # bubble up to parent section
 341             if len(style) == 2:
 342                 memo.section_bubble_up_kludge = 1
 343             # back up 2 lines for underline title, 3 for overline title
 344             self.state_machine.previous_line(len(style) + 1)
 345             raise EOFError              # let parent section re-evaluate
 346         if level == mylevel + 1:        # immediate subsection
 347             return 1
 348         else:                           # invalid subsection
 349             self.parent += self.title_inconsistent(source, lineno)
 350             return None
 351
 352     def title_inconsistent(self, sourcetext, lineno):
 353         error = self.reporter.severe(
 354             'Title level inconsistent:', nodes.literal_block('', sourcetext),
 355             line=lineno)
 356         return error
 357
 358     def new_subsection(self, title, lineno, messages):
 359         """Append new subsection to document tree. On return, check level."""
 360         memo = self.memo
 361         mylevel = memo.section_level
 362         memo.section_level += 1
 363         section_node = nodes.section()
 364         self.parent += section_node
 365         textnodes, title_messages = self.inline_text(title, lineno)
 366         titlenode = nodes.title(title, '', *textnodes)
 367         name = normalize_name(titlenode.astext())
 368         section_node['names'].append(name)
 369         section_node += titlenode
 370         section_node += messages
 371         section_node += title_messages
 372         self.document.note_implicit_target(section_node, section_node)
 373         offset = self.state_machine.line_offset + 1
 374         absoffset = self.state_machine.abs_line_offset() + 1
 375         newabsoffset = self.nested_parse(
 376               self.state_machine.input_lines[offset:], input_offset=absoffset,
 377               node=section_node, match_titles=1)
 378         self.goto_line(newabsoffset)
 379         if memo.section_level <= mylevel: # can't handle next section?
 380             raise EOFError              # bubble up to supersection
 381         # reset section_level; next pass will detect it properly
 382         memo.section_level = mylevel
 383
 384     def paragraph(self, lines, lineno):
 385         """
 386         Return a list (paragraph & messages) & a boolean: literal_block next?
 387         """
 388         data = '\n'.join(lines).rstrip()
 389         if re.search(r'(?<!\\)(\\\\)*::$', data):
 390             if len(data) == 2:
 391                 return [], 1
 392             elif data[-3] in ' \n':
 393                 text = data[:-3].rstrip()
 394             else:
 395                 text = data[:-1]
 396             literalnext = 1
 397         else:
 398             text = data
 399             literalnext = 0
 400         textnodes, messages = self.inline_text(text, lineno)
 401         p = nodes.paragraph(data, '', *textnodes)
 402         p.line = lineno
 403         return [p] + messages, literalnext
 404
 405     def inline_text(self, text, lineno):
 406         """
 407         Return 2 lists: nodes (text and inline elements), and system_messages.
 408         """
 409         return self.inliner.parse(text, lineno, self.memo, self.parent)
 410
 411     def unindent_warning(self, node_name):
 412         return self.reporter.warning(
 413             '%s ends without a blank line; unexpected unindent.' % node_name,
 414             line=(self.state_machine.abs_line_number() + 1))
 415
 416
 417 def build_regexp(definition, compile=1):
 418     """
 419     Build, compile and return a regular expression based on `definition`.
 420
 421     :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
 422         where "parts" is a list of regular expressions and/or regular
 423         expression definitions to be joined into an or-group.
 424     """
 425     name, prefix, suffix, parts = definition
 426     part_strings = []
 427     for part in parts:
 428         if type(part) is TupleType:
 429             part_strings.append(build_regexp(part, None))
 430         else:
 431             part_strings.append(part)
 432     or_group = '|'.join(part_strings)
 433     regexp = '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
 434     if compile:
 435         return re.compile(regexp, re.UNICODE)
 436     else:
 437         return regexp
 438
 439
 440 class Inliner:
 441
 442     """
 443     Parse inline markup; call the `parse()` method.
 444     """
 445
 446     def __init__(self):
 447         self.implicit_dispatch = [(self.patterns.uri, self.standalone_uri),]
 448         """List of (pattern, bound method) tuples, used by
 449         `self.implicit_inline`."""
 450
 451     def init_customizations(self, settings):
 452         """Setting-based customizations; run when parsing begins."""
 453         if settings.pep_references:
 454             self.implicit_dispatch.append((self.patterns.pep,
 455                                            self.pep_reference))
 456         if settings.rfc_references:
 457             self.implicit_dispatch.append((self.patterns.rfc,
 458                                            self.rfc_reference))
 459
 460     def parse(self, text, lineno, memo, parent):
 461         # Needs to be refactored for nested inline markup.
 462         # Add nested_parse() method?
 463         """
 464         Return 2 lists: nodes (text and inline elements), and system_messages.
 465
 466         Using `self.patterns.initial`, a pattern which matches start-strings
 467         (emphasis, strong, interpreted, phrase reference, literal,
 468         substitution reference, and inline target) and complete constructs
 469         (simple reference, footnote reference), search for a candidate.  When
 470         one is found, check for validity (e.g., not a quoted '*' character).
 471         If valid, search for the corresponding end string if applicable, and
 472         check it for validity.  If not found or invalid, generate a warning
 473         and ignore the start-string.  Implicit inline markup (e.g. standalone
 474         URIs) is found last.
 475         """
 476         self.reporter = memo.reporter
 477         self.document = memo.document
 478         self.language = memo.language
 479         self.parent = parent
 480         pattern_search = self.patterns.initial.search
 481         dispatch = self.dispatch
 482         remaining = escape2null(text)
 483         processed = []
 484         unprocessed = []
 485         messages = []
 486         while remaining:
 487             match = pattern_search(remaining)
 488             if match:
 489                 groups = match.groupdict()
 490                 method = dispatch[groups['start'] or groups['backquote']
 491                                   or groups['refend'] or groups['fnend']]
 492                 before, inlines, remaining, sysmessages = method(self, match,
 493                                                                  lineno)
 494                 unprocessed.append(before)
 495                 messages += sysmessages
 496                 if inlines:
 497                     processed += self.implicit_inline(''.join(unprocessed),
 498                                                       lineno)
 499                     processed += inlines
 500                     unprocessed = []
 501             else:
 502                 break
 503         remaining = ''.join(unprocessed) + remaining
 504         if remaining:
 505             processed += self.implicit_inline(remaining, lineno)
 506         return processed, messages
 507
 508     openers = '\'"([{<'
 509     closers = '\'")]}>'
 510     start_string_prefix = (r'((?<=^)|(?<=[-/: \n%s]))' % re.escape(openers))
 511     end_string_suffix = (r'((?=$)|(?=[-/:.,;!? \n\x00%s]))'
 512                          % re.escape(closers))
 513     non_whitespace_before = r'(?<![ \n])'
 514     non_whitespace_escape_before = r'(?<![ \n\x00])'
 515     non_whitespace_after = r'(?![ \n])'
 516     # Alphanumerics with isolated internal [-._] chars (i.e. not 2 together):
 517     simplename = r'(?:(?!_)\w)+(?:[-._](?:(?!_)\w)+)*'
 518     # Valid URI characters (see RFC 2396 & RFC 2732);
 519     # final \x00 allows backslash escapes in URIs:
 520     uric = r"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
 521     # Delimiter indicating the end of a URI (not part of the URI):
 522     uri_end_delim = r"""[>]"""
 523     # Last URI character; same as uric but no punctuation:
 524     urilast = r"""[_~*/=+a-zA-Z0-9]"""
 525     # End of a URI (either 'urilast' or 'uric followed by a
 526     # uri_end_delim'):
 527     uri_end = r"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
 528     emailc = r"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
 529     email_pattern = r"""
 530           %(emailc)s+(?:\.%(emailc)s+)*   # name
 531           (?<!\x00)@                      # at
 532           %(emailc)s+(?:\.%(emailc)s*)*   # host
 533           %(uri_end)s                     # final URI char
 534           """
 535     parts = ('initial_inline', start_string_prefix, '',
 536              [('start', '', non_whitespace_after,  # simple start-strings
 537                [r'\*\*',                # strong
 538                 r'\*(?!\*)',            # emphasis but not strong
 539                 r'``',                  # literal
 540                 r'_`',                  # inline internal target
 541                 r'\|(?!\|)']            # substitution reference
 542                ),
 543               ('whole', '', end_string_suffix, # whole constructs
 544                [# reference name & end-string
 545                 r'(?P<refname>%s)(?P<refend>__?)' % simplename,
 546                 ('footnotelabel', r'\[', r'(?P<fnend>\]_)',
 547                  [r'[0-9]+',               # manually numbered
 548                   r'\#(%s)?' % simplename, # auto-numbered (w/ label?)
 549                   r'\*',                   # auto-symbol
 550                   r'(?P<citationlabel>%s)' % simplename] # citation reference
 551                  )
 552                 ]
 553                ),
 554               ('backquote',             # interpreted text or phrase reference
 555                '(?P<role>(:%s:)?)' % simplename, # optional role
 556                non_whitespace_after,
 557                ['`(?!`)']               # but not literal
 558                )
 559               ]
 560              )
 561     patterns = Struct(
 562           initial=build_regexp(parts),
 563           emphasis=re.compile(non_whitespace_escape_before
 564                               + r'(\*)' + end_string_suffix),
 565           strong=re.compile(non_whitespace_escape_before
 566                             + r'(\*\*)' + end_string_suffix),
 567           interpreted_or_phrase_ref=re.compile(
 568               r"""
 569               %(non_whitespace_escape_before)s
 570               (
 571                 `
 572                 (?P<suffix>
 573                   (?P<role>:%(simplename)s:)?
 574                   (?P<refend>__?)?
 575                 )
 576               )
 577               %(end_string_suffix)s
 578               """ % locals(), re.VERBOSE | re.UNICODE),
 579           embedded_uri=re.compile(
 580               r"""
 581               (
 582                 (?:[ \n]+|^)            # spaces or beginning of line/string
 583                 <                       # open bracket
 584                 %(non_whitespace_after)s
 585                 ([^<>\x00]+)            # anything but angle brackets & nulls
 586                 %(non_whitespace_before)s
 587                 >                       # close bracket w/o whitespace before
 588               )
 589               $                         # end of string
 590               """ % locals(), re.VERBOSE),
 591           literal=re.compile(non_whitespace_before + '(``)'
 592                              + end_string_suffix),
 593           target=re.compile(non_whitespace_escape_before
 594                             + r'(`)' + end_string_suffix),
 595           substitution_ref=re.compile(non_whitespace_escape_before
 596                                       + r'(\|_{0,2})'
 597                                       + end_string_suffix),
 598           email=re.compile(email_pattern % locals() + '$', re.VERBOSE),
 599           uri=re.compile(
 600                 (r"""
 601                 %(start_string_prefix)s
 602                 (?P<whole>
 603                   (?P<absolute>           # absolute URI
 604                     (?P<scheme>             # scheme (http, ftp, mailto)
 605                       [a-zA-Z][a-zA-Z0-9.+-]*
 606                     )
 607                     :
 608                     (
 609                       (                       # either:
 610                         (//?)?                  # hierarchical URI
 611                         %(uric)s*               # URI characters
 612                         %(uri_end)s             # final URI char
 613                       )
 614                       (                       # optional query
 615                         \?%(uric)s*
 616                         %(uri_end)s
 617                       )?
 618                       (                       # optional fragment
 619                         \#%(uric)s*
 620                         %(uri_end)s
 621                       )?
 622                     )
 623                   )
 624                 |                       # *OR*
 625                   (?P<email>              # email address
 626                     """ + email_pattern + r"""
 627                   )
 628                 )
 629                 %(end_string_suffix)s
 630                 """) % locals(), re.VERBOSE),
 631           pep=re.compile(
 632                 r"""
 633                 %(start_string_prefix)s
 634                 (
 635                   (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
 636                 |
 637                   (PEP\s+(?P<pepnum2>\d+))      # reference by name
 638                 )
 639                 %(end_string_suffix)s""" % locals(), re.VERBOSE),
 640           rfc=re.compile(
 641                 r"""
 642                 %(start_string_prefix)s
 643                 (RFC(-|\s+)?(?P<rfcnum>\d+))
 644                 %(end_string_suffix)s""" % locals(), re.VERBOSE))
 645
 646     def quoted_start(self, match):
 647         """Return 1 if inline markup start-string is 'quoted', 0 if not."""
 648         string = match.string
 649         start = match.start()
 650         end = match.end()
 651         if start == 0:                  # start-string at beginning of text
 652             return 0
 653         prestart = string[start - 1]
 654         try:
 655             poststart = string[end]
 656             if self.openers.index(prestart) \
 657                   == self.closers.index(poststart):   # quoted
 658                 return 1
 659         except IndexError:              # start-string at end of text
 660             return 1
 661         except ValueError:              # not quoted
 662             pass
 663         return 0
 664
 665     def inline_obj(self, match, lineno, end_pattern, nodeclass,
 666                    restore_backslashes=0):
 667         string = match.string
 668         matchstart = match.start('start')
 669         matchend = match.end('start')
 670         if self.quoted_start(match):
 671             return (string[:matchend], [], string[matchend:], [], '')
 672         endmatch = end_pattern.search(string[matchend:])
 673         if endmatch and endmatch.start(1):  # 1 or more chars
 674             text = unescape(endmatch.string[:endmatch.start(1)],
 675                             restore_backslashes)
 676             textend = matchend + endmatch.end(1)
 677             rawsource = unescape(string[matchstart:textend], 1)
 678             return (string[:matchstart], [nodeclass(rawsource, text)],
 679                     string[textend:], [], endmatch.group(1))
 680         msg = self.reporter.warning(
 681               'Inline %s start-string without end-string.'
 682               % nodeclass.__name__, line=lineno)
 683         text = unescape(string[matchstart:matchend], 1)
 684         rawsource = unescape(string[matchstart:matchend], 1)
 685         prb = self.problematic(text, rawsource, msg)
 686         return string[:matchstart], [prb], string[matchend:], [msg], ''
 687
 688     def problematic(self, text, rawsource, message):
 689         msgid = self.document.set_id(message, self.parent)
 690         problematic = nodes.problematic(rawsource, text, refid=msgid)
 691         prbid = self.document.set_id(problematic)
 692         message.add_backref(prbid)
 693         return problematic
 694
 695     def emphasis(self, match, lineno):
 696         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 697               match, lineno, self.patterns.emphasis, nodes.emphasis)
 698         return before, inlines, remaining, sysmessages
 699
 700     def strong(self, match, lineno):
 701         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 702               match, lineno, self.patterns.strong, nodes.strong)
 703         return before, inlines, remaining, sysmessages
 704
 705     def interpreted_or_phrase_ref(self, match, lineno):
 706         end_pattern = self.patterns.interpreted_or_phrase_ref
 707         string = match.string
 708         matchstart = match.start('backquote')
 709         matchend = match.end('backquote')
 710         rolestart = match.start('role')
 711         role = match.group('role')
 712         position = ''
 713         if role:
 714             role = role[1:-1]
 715             position = 'prefix'
 716         elif self.quoted_start(match):
 717             return (string[:matchend], [], string[matchend:], [])
 718         endmatch = end_pattern.search(string[matchend:])
 719         if endmatch and endmatch.start(1):  # 1 or more chars
 720             textend = matchend + endmatch.end()
 721             if endmatch.group('role'):
 722                 if role:
 723                     msg = self.reporter.warning(
 724                         'Multiple roles in interpreted text (both '
 725                         'prefix and suffix present; only one allowed).',
 726                         line=lineno)
 727                     text = unescape(string[rolestart:textend], 1)
 728                     prb = self.problematic(text, text, msg)
 729                     return string[:rolestart], [prb], string[textend:], [msg]
 730                 role = endmatch.group('suffix')[1:-1]
 731                 position = 'suffix'
 732             escaped = endmatch.string[:endmatch.start(1)]
 733             rawsource = unescape(string[matchstart:textend], 1)
 734             if rawsource[-1:] == '_':
 735                 if role:
 736                     msg = self.reporter.warning(
 737                           'Mismatch: both interpreted text role %s and '
 738                           'reference suffix.' % position, line=lineno)
 739                     text = unescape(string[rolestart:textend], 1)
 740                     prb = self.problematic(text, text, msg)
 741                     return string[:rolestart], [prb], string[textend:], [msg]
 742                 return self.phrase_ref(string[:matchstart], string[textend:],
 743                                        rawsource, escaped, unescape(escaped))
 744             else:
 745                 rawsource = unescape(string[rolestart:textend], 1)
 746                 nodelist, messages = self.interpreted(rawsource, escaped, role,
 747                                                       lineno)
 748                 return (string[:rolestart], nodelist,
 749                         string[textend:], messages)
 750         msg = self.reporter.warning(
 751               'Inline interpreted text or phrase reference start-string '
 752               'without end-string.', line=lineno)
 753         text = unescape(string[matchstart:matchend], 1)
 754         prb = self.problematic(text, text, msg)
 755         return string[:matchstart], [prb], string[matchend:], [msg]
 756
 757     def phrase_ref(self, before, after, rawsource, escaped, text):
 758         match = self.patterns.embedded_uri.search(escaped)
 759         if match:
 760             text = unescape(escaped[:match.start(0)])
 761             uri_text = match.group(2)
 762             uri = ''.join(uri_text.split())
 763             uri = self.adjust_uri(uri)
 764             if uri:
 765                 target = nodes.target(match.group(1), refuri=uri)
 766             else:
 767                 raise ApplicationError('problem with URI: %r' % uri_text)
 768             if not text:
 769                 text = uri
 770         else:
 771             target = None
 772         refname = normalize_name(text)
 773         reference = nodes.reference(rawsource, text,
 774                                     name=whitespace_normalize_name(text))
 775         node_list = [reference]
 776         if rawsource[-2:] == '__':
 777             if target:
 778                 reference['refuri'] = uri
 779             else:
 780                 reference['anonymous'] = 1
 781         else:
 782             if target:
 783                 reference['refuri'] = uri
 784                 target['names'].append(refname)
 785                 self.document.note_explicit_target(target, self.parent)
 786                 node_list.append(target)
 787             else:
 788                 reference['refname'] = refname
 789                 self.document.note_refname(reference)
 790         return before, node_list, after, []
 791
 792     def adjust_uri(self, uri):
 793         match = self.patterns.email.match(uri)
 794         if match:
 795             return 'mailto:' + uri
 796         else:
 797             return uri
 798
 799     def interpreted(self, rawsource, text, role, lineno):
 800         role_fn, messages = roles.role(role, self.language, lineno,
 801                                        self.reporter)
 802         if role_fn:
 803             nodes, messages2 = role_fn(role, rawsource, text, lineno, self)
 804             return nodes, messages + messages2
 805         else:
 806             msg = self.reporter.error(
 807                 'Unknown interpreted text role "%s".' % role,
 808                 line=lineno)
 809             return ([self.problematic(rawsource, rawsource, msg)],
 810                     messages + [msg])
 811
 812     def literal(self, match, lineno):
 813         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 814               match, lineno, self.patterns.literal, nodes.literal,
 815               restore_backslashes=1)
 816         return before, inlines, remaining, sysmessages
 817
 818     def inline_internal_target(self, match, lineno):
 819         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 820               match, lineno, self.patterns.target, nodes.target)
 821         if inlines and isinstance(inlines[0], nodes.target):
 822             assert len(inlines) == 1
 823             target = inlines[0]
 824             name = normalize_name(target.astext())
 825             target['names'].append(name)
 826             self.document.note_explicit_target(target, self.parent)
 827         return before, inlines, remaining, sysmessages
 828
 829     def substitution_reference(self, match, lineno):
 830         before, inlines, remaining, sysmessages, endstring = self.inline_obj(
 831               match, lineno, self.patterns.substitution_ref,
 832               nodes.substitution_reference)
 833         if len(inlines) == 1:
 834             subref_node = inlines[0]
 835             if isinstance(subref_node, nodes.substitution_reference):
 836                 subref_text = subref_node.astext()
 837                 self.document.note_substitution_ref(subref_node, subref_text)
 838                 if endstring[-1:] == '_':
 839                     reference_node = nodes.reference(
 840                         '|%s%s' % (subref_text, endstring), '')
 841                     if endstring[-2:] == '__':
 842                         reference_node['anonymous'] = 1
 843                     else:
 844                         reference_node['refname'] = normalize_name(subref_text)
 845                         self.document.note_refname(reference_node)
 846                     reference_node += subref_node
 847                     inlines = [reference_node]
 848         return before, inlines, remaining, sysmessages
 849
 850     def footnote_reference(self, match, lineno):
 851         """
 852         Handles `nodes.footnote_reference` and `nodes.citation_reference`
 853         elements.
 854         """
 855         label = match.group('footnotelabel')
 856         refname = normalize_name(label)
 857         string = match.string
 858         before = string[:match.start('whole')]
 859         remaining = string[match.end('whole'):]
 860         if match.group('citationlabel'):
 861             refnode = nodes.citation_reference('[%s]_' % label,
 862                                                refname=refname)
 863             refnode += nodes.Text(label)
 864             self.document.note_citation_ref(refnode)
 865         else:
 866             refnode = nodes.footnote_reference('[%s]_' % label)
 867             if refname[0] == '#':
 868                 refname = refname[1:]
 869                 refnode['auto'] = 1
 870                 self.document.note_autofootnote_ref(refnode)
 871             elif refname == '*':
 872                 refname = ''
 873                 refnode['auto'] = '*'
 874                 self.document.note_symbol_footnote_ref(
 875                       refnode)
 876             else:
 877                 refnode += nodes.Text(label)
 878             if refname:
 879                 refnode['refname'] = refname
 880                 self.document.note_footnote_ref(refnode)
 881             if utils.get_trim_footnote_ref_space(self.document.settings):
 882                 before = before.rstrip()
 883         return (before, [refnode], remaining, [])
 884
 885     def reference(self, match, lineno, anonymous=None):
 886         referencename = match.group('refname')
 887         refname = normalize_name(referencename)
 888         referencenode = nodes.reference(
 889             referencename + match.group('refend'), referencename,
 890             name=whitespace_normalize_name(referencename))
 891         if anonymous:
 892             referencenode['anonymous'] = 1
 893         else:
 894             referencenode['refname'] = refname
 895             self.document.note_refname(referencenode)
 896         string = match.string
 897         matchstart = match.start('whole')
 898         matchend = match.end('whole')
 899         return (string[:matchstart], [referencenode], string[matchend:], [])
 900
 901     def anonymous_reference(self, match, lineno):
 902         return self.reference(match, lineno, anonymous=1)
 903
 904     def standalone_uri(self, match, lineno):
 905         if not match.group('scheme') or urischemes.schemes.has_key(
 906               match.group('scheme').lower()):
 907             if match.group('email'):
 908                 addscheme = 'mailto:'
 909             else:
 910                 addscheme = ''
 911             text = match.group('whole')
 912             unescaped = unescape(text, 0)
 913             return [nodes.reference(unescape(text, 1), unescaped,
 914                                     refuri=addscheme + unescaped)]
 915         else:                   # not a valid scheme
 916             raise MarkupMismatch
 917
 918     def pep_reference(self, match, lineno):
 919         text = match.group(0)
 920         if text.startswith('pep-'):
 921             pepnum = int(match.group('pepnum1'))
 922         elif text.startswith('PEP'):
 923             pepnum = int(match.group('pepnum2'))
 924         else:
 925             raise MarkupMismatch
 926         ref = (self.document.settings.pep_base_url
 927                + self.document.settings.pep_file_url_template % pepnum)
 928         unescaped = unescape(text, 0)
 929         return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
 930
 931     rfc_url = 'rfc%d.html'
 932
 933     def rfc_reference(self, match, lineno):
 934         text = match.group(0)
 935         if text.startswith('RFC'):
 936             rfcnum = int(match.group('rfcnum'))
 937             ref = self.document.settings.rfc_base_url + self.rfc_url % rfcnum
 938         else:
 939             raise MarkupMismatch
 940         unescaped = unescape(text, 0)
 941         return [nodes.reference(unescape(text, 1), unescaped, refuri=ref)]
 942
 943     def implicit_inline(self, text, lineno):
 944         """
 945         Check each of the patterns in `self.implicit_dispatch` for a match,
 946         and dispatch to the stored method for the pattern.  Recursively check
 947         the text before and after the match.  Return a list of `nodes.Text`
 948         and inline element nodes.
 949         """
 950         if not text:
 951             return []
 952         for pattern, method in self.implicit_dispatch:
 953             match = pattern.search(text)
 954             if match:
 955                 try:
 956                     # Must recurse on strings before *and* after the match;
 957                     # there may be multiple patterns.
 958                     return (self.implicit_inline(text[:match.start()], lineno)
 959                             + method(match, lineno) +
 960                             self.implicit_inline(text[match.end():], lineno))
 961                 except MarkupMismatch:
 962                     pass
 963         return [nodes.Text(unescape(text), rawsource=unescape(text, 1))]
 964
 965     dispatch = {'*': emphasis,
 966                 '**': strong,
 967                 '`': interpreted_or_phrase_ref,
 968                 '``': literal,
 969                 '_`': inline_internal_target,
 970                 ']_': footnote_reference,
 971                 '|': substitution_reference,
 972                 '_': reference,
 973                 '__': anonymous_reference}
 974
 975
 976 def _loweralpha_to_int(s, _zero=(ord('a')-1)):
 977     return ord(s) - _zero
 978
 979 def _upperalpha_to_int(s, _zero=(ord('A')-1)):
 980     return ord(s) - _zero
 981
 982 def _lowerroman_to_int(s):
 983     return roman.fromRoman(s.upper())
 984
 985
 986 class Body(RSTState):
 987
 988     """
 989     Generic classifier of the first line of a block.
 990     """
 991
 992     double_width_pad_char = tableparser.TableParser.double_width_pad_char
 993     """Padding character for East Asian double-width text."""
 994
 995     enum = Struct()
 996     """Enumerated list parsing information."""
 997
 998     enum.formatinfo = {
 999           'parens': Struct(prefix='(', suffix=')', start=1, end=-1),
1000           'rparen': Struct(prefix='', suffix=')', start=0, end=-1),
1001           'period': Struct(prefix='', suffix='.', start=0, end=-1)}
1002     enum.formats = enum.formatinfo.keys()
1003     enum.sequences = ['arabic', 'loweralpha', 'upperalpha',
1004                       'lowerroman', 'upperroman'] # ORDERED!
1005     enum.sequencepats = {'arabic': '[0-9]+',
1006                          'loweralpha': '[a-z]',
1007                          'upperalpha': '[A-Z]',
1008                          'lowerroman': '[ivxlcdm]+',
1009                          'upperroman': '[IVXLCDM]+',}
1010     enum.converters = {'arabic': int,
1011                        'loweralpha': _loweralpha_to_int,
1012                        'upperalpha': _upperalpha_to_int,
1013                        'lowerroman': _lowerroman_to_int,
1014                        'upperroman': roman.fromRoman}
1015
1016     enum.sequenceregexps = {}
1017     for sequence in enum.sequences:
1018         enum.sequenceregexps[sequence] = re.compile(
1019               enum.sequencepats[sequence] + '$')
1020
1021     grid_table_top_pat = re.compile(r'\+-[-+]+-\+ *$')
1022     """Matches the top (& bottom) of a full table)."""
1023
1024     simple_table_top_pat = re.compile('=+( +=+)+ *$')
1025     """Matches the top of a simple table."""
1026
1027     simple_table_border_pat = re.compile('=+[ =]*$')
1028     """Matches the bottom & header bottom of a simple table."""
1029
1030     pats = {}
1031     """Fragments of patterns used by transitions."""
1032
1033     pats['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1034     pats['alpha'] = '[a-zA-Z]'
1035     pats['alphanum'] = '[a-zA-Z0-9]'
1036     pats['alphanumplus'] = '[a-zA-Z0-9_-]'
1037     pats['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1038                     '|%(upperroman)s|#)' % enum.sequencepats)
1039     pats['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1040     # @@@ Loosen up the pattern?  Allow Unicode?
1041     pats['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1042     pats['shortopt'] = r'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1043     pats['longopt'] = r'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1044     pats['option'] = r'(%(shortopt)s|%(longopt)s)' % pats
1045
1046     for format in enum.formats:
1047         pats[format] = '(?P<%s>%s%s%s)' % (
1048               format, re.escape(enum.formatinfo[format].prefix),
1049               pats['enum'], re.escape(enum.formatinfo[format].suffix))
1050
1051     patterns = {
1052           'bullet': r'[-+*]( +|$)',
1053           'enumerator': r'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats,
1054           'field_marker': r':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1055           'option_marker': r'%(option)s(, %(option)s)*(  +| ?$)' % pats,
1056           'doctest': r'>>>( +|$)',
1057           'line_block': r'\|( +|$)',
1058           'grid_table_top': grid_table_top_pat,
1059           'simple_table_top': simple_table_top_pat,
1060           'explicit_markup': r'\.\.( +|$)',
1061           'anonymous': r'__( +|$)',
1062           'line': r'(%(nonalphanum7bit)s)\1* *$' % pats,
1063           'text': r''}
1064     initial_transitions = (
1065           'bullet',
1066           'enumerator',
1067           'field_marker',
1068           'option_marker',
1069           'doctest',
1070           'line_block',
1071           'grid_table_top',
1072           'simple_table_top',
1073           'explicit_markup',
1074           'anonymous',
1075           'line',
1076           'text')
1077
1078     def indent(self, match, context, next_state):
1079         """Block quote."""
1080         indented, indent, line_offset, blank_finish = \
1081               self.state_machine.get_indented()
1082         elements = self.block_quote(indented, line_offset)
1083         self.parent += elements
1084         if not blank_finish:
1085             self.parent += self.unindent_warning('Block quote')
1086         return context, next_state, []
1087
1088     def block_quote(self, indented, line_offset):
1089         elements = []
1090         while indented:
1091             (blockquote_lines,
1092              attribution_lines,
1093              attribution_offset,
1094              indented,
1095              new_line_offset) = self.split_attribution(indented, line_offset)
1096             blockquote = nodes.block_quote()
1097             self.nested_parse(blockquote_lines, line_offset, blockquote)
1098             elements.append(blockquote)
1099             if attribution_lines:
1100                 attribution, messages = self.parse_attribution(
1101                     attribution_lines, attribution_offset)
1102                 blockquote += attribution
1103                 elements += messages
1104             line_offset = new_line_offset
1105             while indented and not indented[0]:
1106                 indented = indented[1:]
1107                 line_offset += 1
1108         return elements
1109
1110     # U+2014 is an em-dash:
1111     attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])')
1112
1113     def split_attribution(self, indented, line_offset):
1114         """
1115         Check for a block quote attribution and split it off:
1116
1117         * First line after a blank line must begin with a dash ("--", "---",
1118           em-dash; matches `self.attribution_pattern`).
1119         * Every line after that must have consistent indentation.
1120         * Attributions must be preceded by block quote content.
1121
1122         Return a tuple of: (block quote content lines, content offset,
1123         attribution lines, attribution offset, remaining indented lines).
1124         """
1125         blank = None
1126         nonblank_seen = False
1127         for i in range(len(indented)):
1128             line = indented[i].rstrip()
1129             if line:
1130                 if nonblank_seen and blank == i - 1: # last line blank
1131                     match = self.attribution_pattern.match(line)
1132                     if match:
1133                         attribution_end, indent = self.check_attribution(
1134                             indented, i)
1135                         if attribution_end:
1136                             a_lines = indented[i:attribution_end]
1137                             a_lines.trim_left(match.end(), end=1)
1138                             a_lines.trim_left(indent, start=1)
1139                             return (indented[:i], a_lines,
1140                                     i, indented[attribution_end:],
1141                                     line_offset + attribution_end)
1142                 nonblank_seen = True
1143             else:
1144                 blank = i
1145         else:
1146             return (indented, None, None, None, None)
1147
1148     def check_attribution(self, indented, attribution_start):
1149         """Check attribution shape
1150         """
1151         indent = None
1152         i = attribution_start + 1
1153         for i in range(attribution_start + 1, len(indented)):
1154             line = indented[i].rstrip()
1155             if not line:
1156                 break
1157             if indent is None:
1158                 indent = len(line) - len(line.lstrip())
1159             elif len(line) - len(line.lstrip()) != indent:
1160                 return None, None       # bad shape; not an attribution
1161         return i, (indent or 0)
1162
1163     def parse_attribution(self, indented, line_offset):
1164         text = '\n'.join(indented).rstrip()
1165         lineno = self.state_machine.abs_line_number() + line_offset
1166         textnodes, messages = self.inline_text(text, lineno)
1167         node = nodes.attribution(text, '', *textnodes)
1168         node.line = lineno
1169         return node, messages
1170
1171     def bullet(self, match, context, next_state):
1172         """Bullet list item."""
1173         bulletlist = nodes.bullet_list()
1174         self.parent += bulletlist
1175         bulletlist['bullet'] = match.string[0]
1176         i, blank_finish = self.list_item(match.end())
1177         bulletlist += i
1178         offset = self.state_machine.line_offset + 1   # next line
1179         new_line_offset, blank_finish = self.nested_list_parse(
1180               self.state_machine.input_lines[offset:],
1181               input_offset=self.state_machine.abs_line_offset() + 1,
1182               node=bulletlist, initial_state='BulletList',
1183               blank_finish=blank_finish)
1184         self.goto_line(new_line_offset)
1185         if not blank_finish:
1186             self.parent += self.unindent_warning('Bullet list')
1187         return [], next_state, []
1188
1189     def list_item(self, indent):
1190         if self.state_machine.line[indent:]:
1191             indented, line_offset, blank_finish = (
1192                 self.state_machine.get_known_indented(indent))
1193         else:
1194             indented, indent, line_offset, blank_finish = (
1195                 self.state_machine.get_first_known_indented(indent))
1196         listitem = nodes.list_item('\n'.join(indented))
1197         if indented:
1198             self.nested_parse(indented, input_offset=line_offset,
1199                               node=listitem)
1200         return listitem, blank_finish
1201
1202     def enumerator(self, match, context, next_state):
1203         """Enumerated List Item"""
1204         format, sequence, text, ordinal = self.parse_enumerator(match)
1205         if not self.is_enumerated_list_item(ordinal, sequence, format):
1206             raise statemachine.TransitionCorrection('text')
1207         enumlist = nodes.enumerated_list()
1208         self.parent += enumlist
1209         if sequence == '#':
1210             enumlist['enumtype'] = 'arabic'
1211         else:
1212             enumlist['enumtype'] = sequence
1213         enumlist['prefix'] = self.enum.formatinfo[format].prefix
1214         enumlist['suffix'] = self.enum.formatinfo[format].suffix
1215         if ordinal != 1:
1216             enumlist['start'] = ordinal
1217             msg = self.reporter.info(
1218                 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1219                 % (text, ordinal), line=self.state_machine.abs_line_number())
1220             self.parent += msg
1221         listitem, blank_finish = self.list_item(match.end())
1222         enumlist += listitem
1223         offset = self.state_machine.line_offset + 1   # next line
1224         newline_offset, blank_finish = self.nested_list_parse(
1225               self.state_machine.input_lines[offset:],
1226               input_offset=self.state_machine.abs_line_offset() + 1,
1227               node=enumlist, initial_state='EnumeratedList',
1228               blank_finish=blank_finish,
1229               extra_settings={'lastordinal': ordinal,
1230                               'format': format,
1231                               'auto': sequence == '#'})
1232         self.goto_line(newline_offset)
1233         if not blank_finish:
1234             self.parent += self.unindent_warning('Enumerated list')
1235         return [], next_state, []
1236
1237     def parse_enumerator(self, match, expected_sequence=None):
1238         """
1239         Analyze an enumerator and return the results.
1240
1241         :Return:
1242             - the enumerator format ('period', 'parens', or 'rparen'),
1243             - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1244             - the text of the enumerator, stripped of formatting, and
1245             - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1246               ``None`` is returned for invalid enumerator text).
1247
1248         The enumerator format has already been determined by the regular
1249         expression match. If `expected_sequence` is given, that sequence is
1250         tried first. If not, we check for Roman numeral 1. This way,
1251         single-character Roman numerals (which are also alphabetical) can be
1252         matched. If no sequence has been matched, all sequences are checked in
1253         order.
1254         """
1255         groupdict = match.groupdict()
1256         sequence = ''
1257         for format in self.enum.formats:
1258             if groupdict[format]:       # was this the format matched?
1259                 break                   # yes; keep `format`
1260         else:                           # shouldn't happen
1261             raise ParserError('enumerator format not matched')
1262         text = groupdict[format][self.enum.formatinfo[format].start
1263                                  :self.enum.formatinfo[format].end]
1264         if text == '#':
1265             sequence = '#'
1266         elif expected_sequence:
1267             try:
1268                 if self.enum.sequenceregexps[expected_sequence].match(text):
1269                     sequence = expected_sequence
1270             except KeyError:            # shouldn't happen
1271                 raise ParserError('unknown enumerator sequence: %s'
1272                                   % sequence)
1273         elif text == 'i':
1274             sequence = 'lowerroman'
1275         elif text == 'I':
1276             sequence = 'upperroman'
1277         if not sequence:
1278             for sequence in self.enum.sequences:
1279                 if self.enum.sequenceregexps[sequence].match(text):
1280                     break
1281             else:                       # shouldn't happen
1282                 raise ParserError('enumerator sequence not matched')
1283         if sequence == '#':
1284             ordinal = 1
1285         else:
1286             try:
1287                 ordinal = self.enum.converters[sequence](text)
1288             except roman.InvalidRomanNumeralError:
1289                 ordinal = None
1290         return format, sequence, text, ordinal
1291
1292     def is_enumerated_list_item(self, ordinal, sequence, format):
1293         """
1294         Check validity based on the ordinal value and the second line.
1295
1296         Return true iff the ordinal is valid and the second line is blank,
1297         indented, or starts with the next enumerator or an auto-enumerator.
1298         """
1299         if ordinal is None:
1300             return None
1301         try:
1302             next_line = self.state_machine.next_line()
1303         except EOFError:              # end of input lines
1304             self.state_machine.previous_line()
1305             return 1
1306         else:
1307             self.state_machine.previous_line()
1308         if not next_line[:1].strip():   # blank or indented
1309             return 1
1310         result = self.make_enumerator(ordinal + 1, sequence, format)
1311         if result:
1312             next_enumerator, auto_enumerator = result
1313             try:
1314                 if ( next_line.startswith(next_enumerator) or
1315                      next_line.startswith(auto_enumerator) ):
1316                     return 1
1317             except TypeError:
1318                 pass
1319         return None
1320
1321     def make_enumerator(self, ordinal, sequence, format):
1322         """
1323         Construct and return the next enumerated list item marker, and an
1324         auto-enumerator ("#" instead of the regular enumerator).
1325
1326         Return ``None`` for invalid (out of range) ordinals.
1327         """ #"
1328         if sequence == '#':
1329             enumerator = '#'
1330         elif sequence == 'arabic':
1331             enumerator = str(ordinal)
1332         else:
1333             if sequence.endswith('alpha'):
1334                 if ordinal > 26:
1335                     return None
1336                 enumerator = chr(ordinal + ord('a') - 1)
1337             elif sequence.endswith('roman'):
1338                 try:
1339                     enumerator = roman.toRoman(ordinal)
1340                 except roman.RomanError:
1341                     return None
1342             else:                       # shouldn't happen
1343                 raise ParserError('unknown enumerator sequence: "%s"'
1344                                   % sequence)
1345             if sequence.startswith('lower'):
1346                 enumerator = enumerator.lower()
1347             elif sequence.startswith('upper'):
1348                 enumerator = enumerator.upper()
1349             else:                       # shouldn't happen
1350                 raise ParserError('unknown enumerator sequence: "%s"'
1351                                   % sequence)
1352         formatinfo = self.enum.formatinfo[format]
1353         next_enumerator = (formatinfo.prefix + enumerator + formatinfo.suffix
1354                            + ' ')
1355         auto_enumerator = formatinfo.prefix + '#' + formatinfo.suffix + ' '
1356         return next_enumerator, auto_enumerator
1357
1358     def field_marker(self, match, context, next_state):
1359         """Field list item."""
1360         field_list = nodes.field_list()
1361         self.parent += field_list
1362         field, blank_finish = self.field(match)
1363         field_list += field
1364         offset = self.state_machine.line_offset + 1   # next line
1365         newline_offset, blank_finish = self.nested_list_parse(
1366               self.state_machine.input_lines[offset:],
1367               input_offset=self.state_machine.abs_line_offset() + 1,
1368               node=field_list, initial_state='FieldList',
1369               blank_finish=blank_finish)
1370         self.goto_line(newline_offset)
1371         if not blank_finish:
1372             self.parent += self.unindent_warning('Field list')
1373         return [], next_state, []
1374
1375     def field(self, match):
1376         name = self.parse_field_marker(match)
1377         lineno = self.state_machine.abs_line_number()
1378         indented, indent, line_offset, blank_finish = \
1379               self.state_machine.get_first_known_indented(match.end())
1380         field_node = nodes.field()
1381         field_node.line = lineno
1382         name_nodes, name_messages = self.inline_text(name, lineno)
1383         field_node += nodes.field_name(name, '', *name_nodes)
1384         field_body = nodes.field_body('\n'.join(indented), *name_messages)
1385         field_node += field_body
1386         if indented:
1387             self.parse_field_body(indented, line_offset, field_body)
1388         return field_node, blank_finish
1389
1390     def parse_field_marker(self, match):
1391         """Extract & return field name from a field marker match."""
1392         field = match.group()[1:]        # strip off leading ':'
1393         field = field[:field.rfind(':')] # strip off trailing ':' etc.
1394         return field
1395
1396     def parse_field_body(self, indented, offset, node):
1397         self.nested_parse(indented, input_offset=offset, node=node)
1398
1399     def option_marker(self, match, context, next_state):
1400         """Option list item."""
1401         optionlist = nodes.option_list()
1402         try:
1403             listitem, blank_finish = self.option_list_item(match)
1404         except MarkupError, (message, lineno):
1405             # This shouldn't happen; pattern won't match.
1406             msg = self.reporter.error(
1407                 'Invalid option list marker: %s' % message, line=lineno)
1408             self.parent += msg
1409             indented, indent, line_offset, blank_finish = \
1410                   self.state_machine.get_first_known_indented(match.end())
1411             blockquote, messages = self.block_quote(indented, line_offset)
1412             self.parent += blockquote
1413             self.parent += messages
1414             if not blank_finish:
1415                 self.parent += self.unindent_warning('Option list')
1416             return [], next_state, []
1417         self.parent += optionlist
1418         optionlist += listitem
1419         offset = self.state_machine.line_offset + 1   # next line
1420         newline_offset, blank_finish = self.nested_list_parse(
1421               self.state_machine.input_lines[offset:],
1422               input_offset=self.state_machine.abs_line_offset() + 1,
1423               node=optionlist, initial_state='OptionList',
1424               blank_finish=blank_finish)
1425         self.goto_line(newline_offset)
1426         if not blank_finish:
1427             self.parent += self.unindent_warning('Option list')
1428         return [], next_state, []
1429
1430     def option_list_item(self, match):
1431         offset = self.state_machine.abs_line_offset()
1432         options = self.parse_option_marker(match)
1433         indented, indent, line_offset, blank_finish = \
1434               self.state_machine.get_first_known_indented(match.end())
1435         if not indented:                # not an option list item
1436             self.goto_line(offset)
1437             raise statemachine.TransitionCorrection('text')
1438         option_group = nodes.option_group('', *options)
1439         description = nodes.description('\n'.join(indented))
1440         option_list_item = nodes.option_list_item('', option_group,
1441                                                   description)
1442         if indented:
1443             self.nested_parse(indented, input_offset=line_offset,
1444                               node=description)
1445         return option_list_item, blank_finish
1446
1447     def parse_option_marker(self, match):
1448         """
1449         Return a list of `node.option` and `node.option_argument` objects,
1450         parsed from an option marker match.
1451
1452         :Exception: `MarkupError` for invalid option markers.
1453         """
1454         optlist = []
1455         optionstrings = match.group().rstrip().split(', ')
1456         for optionstring in optionstrings:
1457             tokens = optionstring.split()
1458             delimiter = ' '
1459             firstopt = tokens[0].split('=')
1460             if len(firstopt) > 1:
1461                 # "--opt=value" form
1462                 tokens[:1] = firstopt
1463                 delimiter = '='
1464             elif (len(tokens[0]) > 2
1465                   and ((tokens[0].startswith('-')
1466                         and not tokens[0].startswith('--'))
1467                        or tokens[0].startswith('+'))):
1468                 # "-ovalue" form
1469                 tokens[:1] = [tokens[0][:2], tokens[0][2:]]
1470                 delimiter = ''
1471             if len(tokens) > 1 and (tokens[1].startswith('<')
1472                                     and tokens[-1].endswith('>')):
1473                 # "-o <value1 value2>" form; join all values into one token
1474                 tokens[1:] = [' '.join(tokens[1:])]
1475             if 0 < len(tokens) <= 2:
1476                 option = nodes.option(optionstring)
1477                 option += nodes.option_string(tokens[0], tokens[0])
1478                 if len(tokens) > 1:
1479                     option += nodes.option_argument(tokens[1], tokens[1],
1480                                                     delimiter=delimiter)
1481                 optlist.append(option)
1482             else:
1483                 raise MarkupError(
1484                     'wrong number of option tokens (=%s), should be 1 or 2: '
1485                     '"%s"' % (len(tokens), optionstring),
1486                     self.state_machine.abs_line_number() + 1)
1487         return optlist
1488
1489     def doctest(self, match, context, next_state):
1490         data = '\n'.join(self.state_machine.get_text_block())
1491         self.parent += nodes.doctest_block(data, data)
1492         return [], next_state, []
1493
1494     def line_block(self, match, context, next_state):
1495         """First line of a line block."""
1496         block = nodes.line_block()
1497         self.parent += block
1498         lineno = self.state_machine.abs_line_number()
1499         line, messages, blank_finish = self.line_block_line(match, lineno)
1500         block += line
1501         self.parent += messages
1502         if not blank_finish:
1503             offset = self.state_machine.line_offset + 1   # next line
1504             new_line_offset, blank_finish = self.nested_list_parse(
1505                   self.state_machine.input_lines[offset:],
1506                   input_offset=self.state_machine.abs_line_offset() + 1,
1507                   node=block, initial_state='LineBlock',
1508                   blank_finish=0)
1509             self.goto_line(new_line_offset)
1510         if not blank_finish:
1511             self.parent += self.reporter.warning(
1512                 'Line block ends without a blank line.',
1513                 line=(self.state_machine.abs_line_number() + 1))
1514         if len(block):
1515             if block[0].indent is None:
1516                 block[0].indent = 0
1517             self.nest_line_block_lines(block)
1518         return [], next_state, []
1519
1520     def line_block_line(self, match, lineno):
1521         """Return one line element of a line_block."""
1522         indented, indent, line_offset, blank_finish = \
1523               self.state_machine.get_first_known_indented(match.end(),
1524                                                           until_blank=1)
1525         text = u'\n'.join(indented)
1526         text_nodes, messages = self.inline_text(text, lineno)
1527         line = nodes.line(text, '', *text_nodes)
1528         if match.string.rstrip() != '|': # not empty
1529             line.indent = len(match.group(1)) - 1
1530         return line, messages, blank_finish
1531
1532     def nest_line_block_lines(self, block):
1533         for index in range(1, len(block)):
1534             if block[index].indent is None:
1535                 block[index].indent = block[index - 1].indent
1536         self.nest_line_block_segment(block)
1537
1538     def nest_line_block_segment(self, block):
1539         indents = [item.indent for item in block]
1540         least = min(indents)
1541         new_items = []
1542         new_block = nodes.line_block()
1543         for item in block:
1544             if item.indent > least:
1545                 new_block.append(item)
1546             else:
1547                 if len(new_block):
1548                     self.nest_line_block_segment(new_block)
1549                     new_items.append(new_block)
1550                     new_block = nodes.line_block()
1551                 new_items.append(item)
1552         if len(new_block):
1553             self.nest_line_block_segment(new_block)
1554             new_items.append(new_block)
1555         block[:] = new_items
1556
1557     def grid_table_top(self, match, context, next_state):
1558         """Top border of a full table."""
1559         return self.table_top(match, context, next_state,
1560                               self.isolate_grid_table,
1561                               tableparser.GridTableParser)
1562
1563     def simple_table_top(self, match, context, next_state):
1564         """Top border of a simple table."""
1565         return self.table_top(match, context, next_state,
1566                               self.isolate_simple_table,
1567                               tableparser.SimpleTableParser)
1568
1569     def table_top(self, match, context, next_state,
1570                   isolate_function, parser_class):
1571         """Top border of a generic table."""
1572         nodelist, blank_finish = self.table(isolate_function, parser_class)
1573         self.parent += nodelist
1574         if not blank_finish:
1575             msg = self.reporter.warning(
1576                 'Blank line required after table.',
1577                 line=self.state_machine.abs_line_number() + 1)
1578             self.parent += msg
1579         return [], next_state, []
1580
1581     def table(self, isolate_function, parser_class):
1582         """Parse a table."""
1583         block, messages, blank_finish = isolate_function()
1584         if block:
1585             try:
1586                 parser = parser_class()
1587                 tabledata = parser.parse(block)
1588                 tableline = (self.state_machine.abs_line_number() - len(block)
1589                              + 1)
1590                 table = self.build_table(tabledata, tableline)
1591                 nodelist = [table] + messages
1592             except tableparser.TableMarkupError, detail:
1593                 nodelist = self.malformed_table(
1594                     block, ' '.join(detail.args)) + messages
1595         else:
1596             nodelist = messages
1597         return nodelist, blank_finish
1598
1599     def isolate_grid_table(self):
1600         messages = []
1601         blank_finish = 1
1602         try:
1603             block = self.state_machine.get_text_block(flush_left=1)
1604         except statemachine.UnexpectedIndentationError, instance:
1605             block, source, lineno = instance.args
1606             messages.append(self.reporter.error('Unexpected indentation.',
1607                                                 source=source, line=lineno))
1608             blank_finish = 0
1609         block.disconnect()
1610         # for East Asian chars:
1611         block.pad_double_width(self.double_width_pad_char)
1612         width = len(block[0].strip())
1613         for i in range(len(block)):
1614             block[i] = block[i].strip()
1615             if block[i][0] not in '+|': # check left edge
1616                 blank_finish = 0
1617                 self.state_machine.previous_line(len(block) - i)
1618                 del block[i:]
1619                 break
1620         if not self.grid_table_top_pat.match(block[-1]): # find bottom
1621             blank_finish = 0
1622             # from second-last to third line of table:
1623             for i in range(len(block) - 2, 1, -1):
1624                 if self.grid_table_top_pat.match(block[i]):
1625                     self.state_machine.previous_line(len(block) - i + 1)
1626                     del block[i+1:]
1627                     break
1628             else:
1629                 messages.extend(self.malformed_table(block))
1630                 return [], messages, blank_finish
1631         for i in range(len(block)):     # check right edge
1632             if len(block[i]) != width or block[i][-1] not in '+|':
1633                 messages.extend(self.malformed_table(block))
1634                 return [], messages, blank_finish
1635         return block, messages, blank_finish
1636
1637     def isolate_simple_table(self):
1638         start = self.state_machine.line_offset
1639         lines = self.state_machine.input_lines
1640         limit = len(lines) - 1
1641         toplen = len(lines[start].strip())
1642         pattern_match = self.simple_table_border_pat.match
1643         found = 0
1644         found_at = None
1645         i = start + 1
1646         while i <= limit:
1647             line = lines[i]
1648             match = pattern_match(line)
1649             if match:
1650                 if len(line.strip()) != toplen:
1651                     self.state_machine.next_line(i - start)
1652                     messages = self.malformed_table(
1653                         lines[start:i+1], 'Bottom/header table border does '
1654                         'not match top border.')
1655                     return [], messages, i == limit or not lines[i+1].strip()
1656                 found += 1
1657                 found_at = i
1658                 if found == 2 or i == limit or not lines[i+1].strip():
1659                     end = i
1660                     break
1661             i += 1
1662         else:                           # reached end of input_lines
1663             if found:
1664                 extra = ' or no blank line after table bottom'
1665                 self.state_machine.next_line(found_at - start)
1666                 block = lines[start:found_at+1]
1667             else:
1668                 extra = ''
1669                 self.state_machine.next_line(i - start - 1)
1670                 block = lines[start:]
1671             messages = self.malformed_table(
1672                 block, 'No bottom table border found%s.' % extra)
1673             return [], messages, not extra
1674         self.state_machine.next_line(end - start)
1675         block = lines[start:end+1]
1676         # for East Asian chars:
1677         block.pad_double_width(self.double_width_pad_char)
1678         return block, [], end == limit or not lines[end+1].strip()
1679
1680     def malformed_table(self, block, detail=''):
1681         block.replace(self.double_width_pad_char, '')
1682         data = '\n'.join(block)
1683         message = 'Malformed table.'
1684         lineno = self.state_machine.abs_line_number() - len(block) + 1
1685         if detail:
1686             message += '\n' + detail
1687         error = self.reporter.error(message, nodes.literal_block(data, data),
1688                                     line=lineno)
1689         return [error]
1690
1691     def build_table(self, tabledata, tableline, stub_columns=0):
1692         colwidths, headrows, bodyrows = tabledata
1693         table = nodes.table()
1694         tgroup = nodes.tgroup(cols=len(colwidths))
1695         table += tgroup
1696         for colwidth in colwidths:
1697             colspec = nodes.colspec(colwidth=colwidth)
1698             if stub_columns:
1699                 colspec.attributes['stub'] = 1
1700                 stub_columns -= 1
1701             tgroup += colspec
1702         if headrows:
1703             thead = nodes.thead()
1704             tgroup += thead
1705             for row in headrows:
1706                 thead += self.build_table_row(row, tableline)
1707         tbody = nodes.tbody()
1708         tgroup += tbody
1709         for row in bodyrows:
1710             tbody += self.build_table_row(row, tableline)
1711         return table
1712
1713     def build_table_row(self, rowdata, tableline):
1714         row = nodes.row()
1715         for cell in rowdata:
1716             if cell is None:
1717                 continue
1718             morerows, morecols, offset, cellblock = cell
1719             attributes = {}
1720             if morerows:
1721                 attributes['morerows'] = morerows
1722             if morecols:
1723                 attributes['morecols'] = morecols
1724             entry = nodes.entry(**attributes)
1725             row += entry
1726             if ''.join(cellblock):
1727                 self.nested_parse(cellblock, input_offset=tableline+offset,
1728                                   node=entry)
1729         return row
1730
1731
1732     explicit = Struct()
1733     """Patterns and constants used for explicit markup recognition."""
1734
1735     explicit.patterns = Struct(
1736           target=re.compile(r"""
1737                             (
1738                               _               # anonymous target
1739                             |               # *OR*
1740                               (?!_)           # no underscore at the beginning
1741                               (?P<quote>`?)   # optional open quote
1742                               (?![ `])        # first char. not space or
1743                                               # backquote
1744                               (?P<name>       # reference name
1745                                 .+?
1746                               )
1747                               %(non_whitespace_escape_before)s
1748                               (?P=quote)      # close quote if open quote used
1749                             )
1750                             (?<!(?<!\x00):) # no unescaped colon at end
1751                             %(non_whitespace_escape_before)s
1752                             [ ]?            # optional space
1753                             :               # end of reference name
1754                             ([ ]+|$)        # followed by whitespace
1755                             """ % vars(Inliner), re.VERBOSE),
1756           reference=re.compile(r"""
1757                                (
1758                                  (?P<simple>%(simplename)s)_
1759                                |                  # *OR*
1760                                  `                  # open backquote
1761                                  (?![ ])            # not space
1762                                  (?P<phrase>.+?)    # hyperlink phrase
1763                                  %(non_whitespace_escape_before)s
1764                                  `_                 # close backquote,
1765                                                     # reference mark
1766                                )
1767                                $                  # end of string
1768                                """ % vars(Inliner), re.VERBOSE | re.UNICODE),
1769           substitution=re.compile(r"""
1770                                   (
1771                                     (?![ ])          # first char. not space
1772                                     (?P<name>.+?)    # substitution text
1773                                     %(non_whitespace_escape_before)s
1774                                     \|               # close delimiter
1775                                   )
1776                                   ([ ]+|$)           # followed by whitespace
1777                                   """ % vars(Inliner), re.VERBOSE),)
1778
1779     def footnote(self, match):
1780         lineno = self.state_machine.abs_line_number()
1781         indented, indent, offset, blank_finish = \
1782               self.state_machine.get_first_known_indented(match.end())
1783         label = match.group(1)
1784         name = normalize_name(label)
1785         footnote = nodes.footnote('\n'.join(indented))
1786         footnote.line = lineno
1787         if name[0] == '#':              # auto-numbered
1788             name = name[1:]             # autonumber label
1789             footnote['auto'] = 1
1790             if name:
1791                 footnote['names'].append(name)
1792             self.document.note_autofootnote(footnote)
1793         elif name == '*':               # auto-symbol
1794             name = ''
1795             footnote['auto'] = '*'
1796             self.document.note_symbol_footnote(footnote)
1797         else:                           # manually numbered
1798             footnote += nodes.label('', label)
1799             footnote['names'].append(name)
1800             self.document.note_footnote(footnote)
1801         if name:
1802             self.document.note_explicit_target(footnote, footnote)
1803         else:
1804             self.document.set_id(footnote, footnote)
1805         if indented:
1806             self.nested_parse(indented, input_offset=offset, node=footnote)
1807         return [footnote], blank_finish
1808
1809     def citation(self, match):
1810         lineno = self.state_machine.abs_line_number()
1811         indented, indent, offset, blank_finish = \
1812               self.state_machine.get_first_known_indented(match.end())
1813         label = match.group(1)
1814         name = normalize_name(label)
1815         citation = nodes.citation('\n'.join(indented))
1816         citation.line = lineno
1817         citation += nodes.label('', label)
1818         citation['names'].append(name)
1819         self.document.note_citation(citation)
1820         self.document.note_explicit_target(citation, citation)
1821         if indented:
1822             self.nested_parse(indented, input_offset=offset, node=citation)
1823         return [citation], blank_finish
1824
1825     def hyperlink_target(self, match):
1826         pattern = self.explicit.patterns.target
1827         lineno = self.state_machine.abs_line_number()
1828         block, indent, offset, blank_finish = \
1829               self.state_machine.get_first_known_indented(
1830               match.end(), until_blank=1, strip_indent=0)
1831         blocktext = match.string[:match.end()] + '\n'.join(block)
1832         block = [escape2null(line) for line in block]
1833         escaped = block[0]
1834         blockindex = 0
1835         while 1:
1836             targetmatch = pattern.match(escaped)
1837             if targetmatch:
1838                 break
1839             blockindex += 1
1840             try:
1841                 escaped += block[blockindex]
1842             except IndexError:
1843                 raise MarkupError('malformed hyperlink target.', lineno)
1844         del block[:blockindex]
1845         block[0] = (block[0] + ' ')[targetmatch.end()-len(escaped)-1:].strip()
1846         target = self.make_target(block, blocktext, lineno,
1847                                   targetmatch.group('name'))
1848         return [target], blank_finish
1849
1850     def make_target(self, block, block_text, lineno, target_name):
1851         target_type, data = self.parse_target(block, block_text, lineno)
1852         if target_type == 'refname':
1853             target = nodes.target(block_text, '', refname=normalize_name(data))
1854             target.indirect_reference_name = data
1855             self.add_target(target_name, '', target, lineno)
1856             self.document.note_indirect_target(target)
1857             return target
1858         elif target_type == 'refuri':
1859             target = nodes.target(block_text, '')
1860             self.add_target(target_name, data, target, lineno)
1861             return target
1862         else:
1863             return data
1864
1865     def parse_target(self, block, block_text, lineno):
1866         """
1867         Determine the type of reference of a target.
1868
1869         :Return: A 2-tuple, one of:
1870
1871             - 'refname' and the indirect reference name
1872             - 'refuri' and the URI
1873             - 'malformed' and a system_message node
1874         """
1875         if block and block[-1].strip()[-1:] == '_': # possible indirect target
1876             reference = ' '.join([line.strip() for line in block])
1877             refname = self.is_reference(reference)
1878             if refname:
1879                 return 'refname', refname
1880         reference = ''.join([''.join(line.split()) for line in block])
1881         return 'refuri', unescape(reference)
1882
1883     def is_reference(self, reference):
1884         match = self.explicit.patterns.reference.match(
1885             whitespace_normalize_name(reference))
1886         if not match:
1887             return None
1888         return unescape(match.group('simple') or match.group('phrase'))
1889
1890     def add_target(self, targetname, refuri, target, lineno):
1891         target.line = lineno
1892         if targetname:
1893             name = normalize_name(unescape(targetname))
1894             target['names'].append(name)
1895             if refuri:
1896                 uri = self.inliner.adjust_uri(refuri)
1897                 if uri:
1898                     target['refuri'] = uri
1899                 else:
1900                     raise ApplicationError('problem with URI: %r' % refuri)
1901             self.document.note_explicit_target(target, self.parent)
1902         else:                       # anonymous target
1903             if refuri:
1904                 target['refuri'] = refuri
1905             target['anonymous'] = 1
1906             self.document.note_anonymous_target(target)
1907
1908     def substitution_def(self, match):
1909         pattern = self.explicit.patterns.substitution
1910         lineno = self.state_machine.abs_line_number()
1911         block, indent, offset, blank_finish = \
1912               self.state_machine.get_first_known_indented(match.end(),
1913                                                           strip_indent=0)
1914         blocktext = (match.string[:match.end()] + '\n'.join(block))
1915         block.disconnect()
1916         escaped = escape2null(block[0].rstrip())
1917         blockindex = 0
1918         while 1:
1919             subdefmatch = pattern.match(escaped)
1920             if subdefmatch:
1921                 break
1922             blockindex += 1
1923             try:
1924                 escaped = escaped + ' ' + escape2null(block[blockindex].strip())
1925             except IndexError:
1926                 raise MarkupError('malformed substitution definition.',
1927                                   lineno)
1928         del block[:blockindex]          # strip out the substitution marker
1929         block[0] = (block[0].strip() + ' ')[subdefmatch.end()-len(escaped)-1:-1]
1930         if not block[0]:
1931             del block[0]
1932             offset += 1
1933         while block and not block[-1].strip():
1934             block.pop()
1935         subname = subdefmatch.group('name')
1936         substitution_node = nodes.substitution_definition(blocktext)
1937         substitution_node.line = lineno
1938         if not block:
1939             msg = self.reporter.warning(
1940                 'Substitution definition "%s" missing contents.' % subname,
1941                 nodes.literal_block(blocktext, blocktext), line=lineno)
1942             return [msg], blank_finish
1943         block[0] = block[0].strip()
1944         substitution_node['names'].append(
1945             nodes.whitespace_normalize_name(subname))
1946         new_abs_offset, blank_finish = self.nested_list_parse(
1947               block, input_offset=offset, node=substitution_node,
1948               initial_state='SubstitutionDef', blank_finish=blank_finish)
1949         i = 0
1950         for node in substitution_node[:]:
1951             if not (isinstance(node, nodes.Inline) or
1952                     isinstance(node, nodes.Text)):
1953                 self.parent += substitution_node[i]
1954                 del substitution_node[i]
1955             else:
1956                 i += 1
1957         for node in substitution_node.traverse(nodes.Element):
1958             if self.disallowed_inside_substitution_definitions(node):
1959                 pformat = nodes.literal_block('', node.pformat().rstrip())
1960                 msg = self.reporter.error(
1961                     'Substitution definition contains illegal element:',
1962                     pformat, nodes.literal_block(blocktext, blocktext),
1963                     line=lineno)
1964                 return [msg], blank_finish
1965         if len(substitution_node) == 0:
1966             msg = self.reporter.warning(
1967                   'Substitution definition "%s" empty or invalid.'
1968                   % subname,
1969                   nodes.literal_block(blocktext, blocktext), line=lineno)
1970             return [msg], blank_finish
1971         self.document.note_substitution_def(
1972             substitution_node, subname, self.parent)
1973         return [substitution_node], blank_finish
1974
1975     def disallowed_inside_substitution_definitions(self, node):
1976         if (node['ids'] or
1977             isinstance(node, nodes.reference) and node.get('anonymous') or
1978             isinstance(node, nodes.footnote_reference) and node.get('auto')):
1979             return 1
1980         else:
1981             return 0
1982
1983     def directive(self, match, **option_presets):
1984         """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1985         type_name = match.group(1)
1986         directive_function, messages = directives.directive(
1987             type_name, self.memo.language, self.document)
1988         self.parent += messages
1989         if directive_function:
1990             return self.run_directive(
1991                 directive_function, match, type_name, option_presets)
1992         else:
1993             return self.unknown_directive(type_name)
1994
1995     def run_directive(self, directive_fn, match, type_name, option_presets):
1996         """
1997         Parse a directive then run its directive function.
1998
1999         Parameters:
2000
2001         - `directive_fn`: The function implementing the directive.  Uses
2002           function attributes ``arguments``, ``options``, and/or ``content``
2003           if present.
2004
2005         - `match`: A regular expression match object which matched the first
2006           line of the directive.
2007
2008         - `type_name`: The directive name, as used in the source text.
2009
2010         - `option_presets`: A dictionary of preset options, defaults for the
2011           directive options.  Currently, only an "alt" option is passed by
2012           substitution definitions (value: the substitution name), which may
2013           be used by an embedded image directive.
2014
2015         Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2016         """
2017         lineno = self.state_machine.abs_line_number()
2018         initial_line_offset = self.state_machine.line_offset
2019         indented, indent, line_offset, blank_finish \
2020                   = self.state_machine.get_first_known_indented(match.end(),
2021                                                                 strip_top=0)
2022         block_text = '\n'.join(self.state_machine.input_lines[
2023             initial_line_offset : self.state_machine.line_offset + 1])
2024         try:
2025             arguments, options, content, content_offset = (
2026                 self.parse_directive_block(indented, line_offset,
2027                                            directive_fn, option_presets))
2028         except MarkupError, detail:
2029             error = self.reporter.error(
2030                 'Error in "%s" directive:\n%s.' % (type_name,
2031                                                    ' '.join(detail.args)),
2032                 nodes.literal_block(block_text, block_text), line=lineno)
2033             return [error], blank_finish
2034         result = directive_fn(type_name, arguments, options, content, lineno,
2035                               content_offset, block_text, self,
2036                               self.state_machine)
2037         return (result,
2038                 blank_finish or self.state_machine.is_next_line_blank())
2039
2040     def parse_directive_block(self, indented, line_offset, directive_fn,
2041                               option_presets):
2042         arguments = []
2043         options = {}
2044         argument_spec = getattr(directive_fn, 'arguments', None)
2045         if argument_spec and argument_spec[:2] == (0, 0):
2046             argument_spec = None
2047         option_spec = getattr(directive_fn, 'options', None)
2048         content_spec = getattr(directive_fn, 'content', None)
2049         if indented and not indented[0].strip():
2050             indented.trim_start()
2051             line_offset += 1
2052         while indented and not indented[-1].strip():
2053             indented.trim_end()
2054         if indented and (argument_spec or option_spec):
2055             for i in range(len(indented)):
2056                 if not indented[i].strip():
2057                     break
2058             else:
2059                 i += 1
2060             arg_block = indented[:i]
2061             content = indented[i+1:]
2062             content_offset = line_offset + i + 1
2063         else:
2064             content = indented
2065             content_offset = line_offset
2066             arg_block = []
2067         while content and not content[0].strip():
2068             content.trim_start()
2069             content_offset += 1
2070         if option_spec:
2071             options, arg_block = self.parse_directive_options(
2072                 option_presets, option_spec, arg_block)
2073             if arg_block and not argument_spec:
2074                 raise MarkupError('no arguments permitted; blank line '
2075                                   'required before content block')
2076         if argument_spec:
2077             arguments = self.parse_directive_arguments(
2078                 argument_spec, arg_block)
2079         if content and not content_spec:
2080             raise MarkupError('no content permitted')
2081         return (arguments, options, content, content_offset)
2082
2083     def parse_directive_options(self, option_presets, option_spec, arg_block):
2084         options = option_presets.copy()
2085         for i in range(len(arg_block)):
2086             if arg_block[i][:1] == ':':
2087                 opt_block = arg_block[i:]
2088                 arg_block = arg_block[:i]
2089                 break
2090         else:
2091             opt_block = []
2092         if opt_block:
2093             success, data = self.parse_extension_options(option_spec,
2094                                                          opt_block)
2095             if success:                 # data is a dict of options
2096                 options.update(data)
2097             else:                       # data is an error string
2098                 raise MarkupError(data)
2099         return options, arg_block
2100
2101     def parse_directive_arguments(self, argument_spec, arg_block):
2102         required, optional, last_whitespace = argument_spec
2103         arg_text = '\n'.join(arg_block)
2104         arguments = arg_text.split()
2105         if len(arguments) < required:
2106             raise MarkupError('%s argument(s) required, %s supplied'
2107                               % (required, len(arguments)))
2108         elif len(arguments) > required + optional:
2109             if last_whitespace:
2110                 arguments = arg_text.split(None, required + optional - 1)
2111             else:
2112                 raise MarkupError(
2113                     'maximum %s argument(s) allowed, %s supplied'
2114                     % (required + optional, len(arguments)))
2115         return arguments
2116
2117     def parse_extension_options(self, option_spec, datalines):
2118         """
2119         Parse `datalines` for a field list containing extension options
2120         matching `option_spec`.
2121
2122         :Parameters:
2123             - `option_spec`: a mapping of option name to conversion
2124               function, which should raise an exception on bad input.
2125             - `datalines`: a list of input strings.
2126
2127         :Return:
2128             - Success value, 1 or 0.
2129             - An option dictionary on success, an error string on failure.
2130         """
2131         node = nodes.field_list()
2132         newline_offset, blank_finish = self.nested_list_parse(
2133               datalines, 0, node, initial_state='ExtensionOptions',
2134               blank_finish=1)
2135         if newline_offset != len(datalines): # incomplete parse of block
2136             return 0, 'invalid option block'
2137         try:
2138             options = utils.extract_extension_options(node, option_spec)
2139         except KeyError, detail:
2140             return 0, ('unknown option: "%s"' % detail.args[0])
2141         except (ValueError, TypeError), detail:
2142             return 0, ('invalid option value: %s' % ' '.join(detail.args))
2143         except utils.ExtensionOptionError, detail:
2144             return 0, ('invalid option data: %s' % ' '.join(detail.args))
2145         if blank_finish:
2146             return 1, options
2147         else:
2148             return 0, 'option data incompletely parsed'
2149
2150     def unknown_directive(self, type_name):
2151         lineno = self.state_machine.abs_line_number()
2152         indented, indent, offset, blank_finish = \
2153               self.state_machine.get_first_known_indented(0, strip_indent=0)
2154         text = '\n'.join(indented)
2155         error = self.reporter.error(
2156               'Unknown directive type "%s".' % type_name,
2157               nodes.literal_block(text, text), line=lineno)
2158         return [error], blank_finish
2159
2160     def comment(self, match):
2161         if not match.string[match.end():].strip() \
2162               and self.state_machine.is_next_line_blank(): # an empty comment?
2163             return [nodes.comment()], 1 # "A tiny but practical wart."
2164         indented, indent, offset, blank_finish = \
2165               self.state_machine.get_first_known_indented(match.end())
2166         while indented and not indented[-1].strip():
2167             indented.trim_end()
2168         text = '\n'.join(indented)
2169         return [nodes.comment(text, text)], blank_finish
2170
2171     explicit.constructs = [
2172           (footnote,
2173            re.compile(r"""
2174                       \.\.[ ]+          # explicit markup start
2175                       \[
2176                       (                 # footnote label:
2177                           [0-9]+          # manually numbered footnote
2178                         |               # *OR*
2179                           \#              # anonymous auto-numbered footnote
2180                         |               # *OR*
2181                           \#%s            # auto-number ed?) footnote label
2182                         |               # *OR*
2183                           \*              # auto-symbol footnote
2184                       )
2185                       \]
2186                       ([ ]+|$)          # whitespace or end of line
2187                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2188           (citation,
2189            re.compile(r"""
2190                       \.\.[ ]+          # explicit markup start
2191                       \[(%s)\]          # citation label
2192                       ([ ]+|$)          # whitespace or end of line
2193                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE)),
2194           (hyperlink_target,
2195            re.compile(r"""
2196                       \.\.[ ]+          # explicit markup start
2197                       _                 # target indicator
2198                       (?![ ]|$)         # first char. not space or EOL
2199                       """, re.VERBOSE)),
2200           (substitution_def,
2201            re.compile(r"""
2202                       \.\.[ ]+          # explicit markup start
2203                       \|                # substitution indicator
2204                       (?![ ]|$)         # first char. not space or EOL
2205                       """, re.VERBOSE)),
2206           (directive,
2207            re.compile(r"""
2208                       \.\.[ ]+          # explicit markup start
2209                       (%s)              # directive name
2210                       [ ]?              # optional space
2211                       ::                # directive delimiter
2212                       ([ ]+|$)          # whitespace or end of line
2213                       """ % Inliner.simplename, re.VERBOSE | re.UNICODE))]
2214
2215     def explicit_markup(self, match, context, next_state):
2216         """Footnotes, hyperlink targets, directives, comments."""
2217         nodelist, blank_finish = self.explicit_construct(match)
2218         self.parent += nodelist
2219         self.explicit_list(blank_finish)
2220         return [], next_state, []
2221
2222     def explicit_construct(self, match):
2223         """Determine which explicit construct this is, parse & return it."""
2224         errors = []
2225         for method, pattern in self.explicit.constructs:
2226             expmatch = pattern.match(match.string)
2227             if expmatch:
2228                 try:
2229                     return method(self, expmatch)
2230                 except MarkupError, (message, lineno): # never reached?
2231                     errors.append(self.reporter.warning(message, line=lineno))
2232                     break
2233         nodelist, blank_finish = self.comment(match)
2234         return nodelist + errors, blank_finish
2235
2236     def explicit_list(self, blank_finish):
2237         """
2238         Create a nested state machine for a series of explicit markup
2239         constructs (including anonymous hyperlink targets).
2240         """
2241         offset = self.state_machine.line_offset + 1   # next line
2242         newline_offset, blank_finish = self.nested_list_parse(
2243               self.state_machine.input_lines[offset:],
2244               input_offset=self.state_machine.abs_line_offset() + 1,
2245               node=self.parent, initial_state='Explicit',
2246               blank_finish=blank_finish,
2247               match_titles=self.state_machine.match_titles)
2248         self.goto_line(newline_offset)
2249         if not blank_finish:
2250             self.parent += self.unindent_warning('Explicit markup')
2251
2252     def anonymous(self, match, context, next_state):
2253         """Anonymous hyperlink targets."""
2254         nodelist, blank_finish = self.anonymous_target(match)
2255         self.parent += nodelist
2256         self.explicit_list(blank_finish)
2257         return [], next_state, []
2258
2259     def anonymous_target(self, match):
2260         lineno = self.state_machine.abs_line_number()
2261         block, indent, offset, blank_finish \
2262               = self.state_machine.get_first_known_indented(match.end(),
2263                                                             until_blank=1)
2264         blocktext = match.string[:match.end()] + '\n'.join(block)
2265         block = [escape2null(line) for line in block]
2266         target = self.make_target(block, blocktext, lineno, '')
2267         return [target], blank_finish
2268
2269     def line(self, match, context, next_state):
2270         """Section title overline or transition marker."""
2271         if self.state_machine.match_titles:
2272             return [match.string], 'Line', []
2273         elif match.string.strip() == '::':
2274             raise statemachine.TransitionCorrection('text')
2275         elif len(match.string.strip()) < 4:
2276             msg = self.reporter.info(
2277                 'Unexpected possible title overline or transition.\n'
2278                 "Treating it as ordinary text because it's so short.",
2279                 line=self.state_machine.abs_line_number())
2280             self.parent += msg
2281             raise statemachine.TransitionCorrection('text')
2282         else:
2283             blocktext = self.state_machine.line
2284             msg = self.reporter.severe(
2285                   'Unexpected section title or transition.',
2286                   nodes.literal_block(blocktext, blocktext),
2287                   line=self.state_machine.abs_line_number())
2288             self.parent += msg
2289             return [], next_state, []
2290
2291     def text(self, match, context, next_state):
2292         """Titles, definition lists, paragraphs."""
2293         return [match.string], 'Text', []
2294
2295
2296 class RFC2822Body(Body):
2297
2298     """
2299     RFC2822 headers are only valid as the first constructs in documents.  As
2300     soon as anything else appears, the `Body` state should take over.
2301     """
2302
2303     patterns = Body.patterns.copy()     # can't modify the original
2304     patterns['rfc2822'] = r'[!-9;-~]+:( +|$)'
2305     initial_transitions = [(name, 'Body')
2306                            for name in Body.initial_transitions]
2307     initial_transitions.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2308
2309     def rfc2822(self, match, context, next_state):
2310         """RFC2822-style field list item."""
2311         fieldlist = nodes.field_list(classes=['rfc2822'])
2312         self.parent += fieldlist
2313         field, blank_finish = self.rfc2822_field(match)
2314         fieldlist += field
2315         offset = self.state_machine.line_offset + 1   # next line
2316         newline_offset, blank_finish = self.nested_list_parse(
2317               self.state_machine.input_lines[offset:],
2318               input_offset=self.state_machine.abs_line_offset() + 1,
2319               node=fieldlist, initial_state='RFC2822List',
2320               blank_finish=blank_finish)
2321         self.goto_line(newline_offset)
2322         if not blank_finish:
2323             self.parent += self.unindent_warning(
2324                   'RFC2822-style field list')
2325         return [], next_state, []
2326
2327     def rfc2822_field(self, match):
2328         name = match.string[:match.string.find(':')]
2329         indented, indent, line_offset, blank_finish = \
2330               self.state_machine.get_first_known_indented(match.end(),
2331                                                           until_blank=1)
2332         fieldnode = nodes.field()
2333         fieldnode += nodes.field_name(name, name)
2334         fieldbody = nodes.field_body('\n'.join(indented))
2335         fieldnode += fieldbody
2336         if indented:
2337             self.nested_parse(indented, input_offset=line_offset,
2338                               node=fieldbody)
2339         return fieldnode, blank_finish
2340
2341
2342 class SpecializedBody(Body):
2343
2344     """
2345     Superclass for second and subsequent compound element members.  Compound
2346     elements are lists and list-like constructs.
2347
2348     All transition methods are disabled (redefined as `invalid_input`).
2349     Override individual methods in subclasses to re-enable.
2350
2351     For example, once an initial bullet list item, say, is recognized, the
2352     `BulletList` subclass takes over, with a "bullet_list" node as its
2353     container.  Upon encountering the initial bullet list item, `Body.bullet`
2354     calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2355     starts up a nested parsing session with `BulletList` as the initial state.
2356     Only the ``bullet`` transition method is enabled in `BulletList`; as long
2357     as only bullet list items are encountered, they are parsed and inserted
2358     into the container.  The first construct which is *not* a bullet list item
2359     triggers the `invalid_input` method, which ends the nested parse and
2360     closes the container.  `BulletList` needs to recognize input that is
2361     invalid in the context of a bullet list, which means everything *other
2362     than* bullet list items, so it inherits the transition list created in
2363     `Body`.
2364     """
2365
2366     def invalid_input(self, match=None, context=None, next_state=None):
2367         """Not a compound element member. Abort this state machine."""
2368         self.state_machine.previous_line() # back up so parent SM can reassess
2369         raise EOFError
2370
2371     indent = invalid_input
2372     bullet = invalid_input
2373     enumerator = invalid_input
2374     field_marker = invalid_input
2375     option_marker = invalid_input
2376     doctest = invalid_input
2377     line_block = invalid_input
2378     grid_table_top = invalid_input
2379     simple_table_top = invalid_input
2380     explicit_markup = invalid_input
2381     anonymous = invalid_input
2382     line = invalid_input
2383     text = invalid_input
2384
2385
2386 class BulletList(SpecializedBody):
2387
2388     """Second and subsequent bullet_list list_items."""
2389
2390     def bullet(self, match, context, next_state):
2391         """Bullet list item."""
2392         if match.string[0] != self.parent['bullet']:
2393             # different bullet: new list
2394             self.invalid_input()
2395         listitem, blank_finish = self.list_item(match.end())
2396         self.parent += listitem
2397         self.blank_finish = blank_finish
2398         return [], next_state, []
2399
2400
2401 class DefinitionList(SpecializedBody):
2402
2403     """Second and subsequent definition_list_items."""
2404
2405     def text(self, match, context, next_state):
2406         """Definition lists."""
2407         return [match.string], 'Definition', []
2408
2409
2410 class EnumeratedList(SpecializedBody):
2411
2412     """Second and subsequent enumerated_list list_items."""
2413
2414     def enumerator(self, match, context, next_state):
2415         """Enumerated list item."""
2416         format, sequence, text, ordinal = self.parse_enumerator(
2417               match, self.parent['enumtype'])
2418         if ( format != self.format
2419              or (sequence != '#' and (sequence != self.parent['enumtype']
2420                                       or self.auto
2421                                       or ordinal != (self.lastordinal + 1)))
2422              or not self.is_enumerated_list_item(ordinal, sequence, format)):
2423             # different enumeration: new list
2424             self.invalid_input()
2425         if sequence == '#':
2426             self.auto = 1
2427         listitem, blank_finish = self.list_item(match.end())
2428         self.parent += listitem
2429         self.blank_finish = blank_finish
2430         self.lastordinal = ordinal
2431         return [], next_state, []
2432
2433
2434 class FieldList(SpecializedBody):
2435
2436     """Second and subsequent field_list fields."""
2437
2438     def field_marker(self, match, context, next_state):
2439         """Field list field."""
2440         field, blank_finish = self.field(match)
2441         self.parent += field
2442         self.blank_finish = blank_finish
2443         return [], next_state, []
2444
2445
2446 class OptionList(SpecializedBody):
2447
2448     """Second and subsequent option_list option_list_items."""
2449
2450     def option_marker(self, match, context, next_state):
2451         """Option list item."""
2452         try:
2453             option_list_item, blank_finish = self.option_list_item(match)
2454         except MarkupError, (message, lineno):
2455             self.invalid_input()
2456         self.parent += option_list_item
2457         self.blank_finish = blank_finish
2458         return [], next_state, []
2459
2460
2461 class RFC2822List(SpecializedBody, RFC2822Body):
2462
2463     """Second and subsequent RFC2822-style field_list fields."""
2464
2465     patterns = RFC2822Body.patterns
2466     initial_transitions = RFC2822Body.initial_transitions
2467
2468     def rfc2822(self, match, context, next_state):
2469         """RFC2822-style field list item."""
2470         field, blank_finish = self.rfc2822_field(match)
2471         self.parent += field
2472         self.blank_finish = blank_finish
2473         return [], 'RFC2822List', []
2474
2475     blank = SpecializedBody.invalid_input
2476
2477
2478 class ExtensionOptions(FieldList):
2479
2480     """
2481     Parse field_list fields for extension options.
2482
2483     No nested parsing is done (including inline markup parsing).
2484     """
2485
2486     def parse_field_body(self, indented, offset, node):
2487         """Override `Body.parse_field_body` for simpler parsing."""
2488         lines = []
2489         for line in list(indented) + ['']:
2490             if line.strip():
2491                 lines.append(line)
2492             elif lines:
2493                 text = '\n'.join(lines)
2494                 node += nodes.paragraph(text, text)
2495                 lines = []
2496
2497
2498 class LineBlock(SpecializedBody):
2499
2500     """Second and subsequent lines of a line_block."""
2501
2502     blank = SpecializedBody.invalid_input
2503
2504     def line_block(self, match, context, next_state):
2505         """New line of line block."""
2506         lineno = self.state_machine.abs_line_number()
2507         line, messages, blank_finish = self.line_block_line(match, lineno)
2508         self.parent += line
2509         self.parent.parent += messages
2510         self.blank_finish = blank_finish
2511         return [], next_state, []
2512
2513
2514 class Explicit(SpecializedBody):
2515
2516     """Second and subsequent explicit markup construct."""
2517
2518     def explicit_markup(self, match, context, next_state):
2519         """Footnotes, hyperlink targets, directives, comments."""
2520         nodelist, blank_finish = self.explicit_construct(match)
2521         self.parent += nodelist
2522         self.blank_finish = blank_finish
2523         return [], next_state, []
2524
2525     def anonymous(self, match, context, next_state):
2526         """Anonymous hyperlink targets."""
2527         nodelist, blank_finish = self.anonymous_target(match)
2528         self.parent += nodelist
2529         self.blank_finish = blank_finish
2530         return [], next_state, []
2531
2532     blank = SpecializedBody.invalid_input
2533
2534
2535 class SubstitutionDef(Body):
2536
2537     """
2538     Parser for the contents of a substitution_definition element.
2539     """
2540
2541     patterns = {
2542           'embedded_directive': re.compile(r'(%s)::( +|$)'
2543                                            % Inliner.simplename, re.UNICODE),
2544           'text': r''}
2545     initial_transitions = ['embedded_directive', 'text']
2546
2547     def embedded_directive(self, match, context, next_state):
2548         nodelist, blank_finish = self.directive(match,
2549                                                 alt=self.parent['names'][0])
2550         self.parent += nodelist
2551         if not self.state_machine.at_eof():
2552             self.blank_finish = blank_finish
2553         raise EOFError
2554
2555     def text(self, match, context, next_state):
2556         if not self.state_machine.at_eof():
2557             self.blank_finish = self.state_machine.is_next_line_blank()
2558         raise EOFError
2559
2560
2561 class Text(RSTState):
2562
2563     """
2564     Classifier of second line of a text block.
2565
2566     Could be a paragraph, a definition list item, or a title.
2567     """
2568
2569     patterns = {'underline': Body.patterns['line'],
2570                 'text': r''}
2571     initial_transitions = [('underline', 'Body'), ('text', 'Body')]
2572
2573     def blank(self, match, context, next_state):
2574         """End of paragraph."""
2575         paragraph, literalnext = self.paragraph(
2576               context, self.state_machine.abs_line_number() - 1)
2577         self.parent += paragraph
2578         if literalnext:
2579             self.parent += self.literal_block()
2580         return [], 'Body', []
2581
2582     def eof(self, context):
2583         if context:
2584             self.blank(None, context, None)
2585         return []
2586
2587     def indent(self, match, context, next_state):
2588         """Definition list item."""
2589         definitionlist = nodes.definition_list()
2590         definitionlistitem, blank_finish = self.definition_list_item(context)
2591         definitionlist += definitionlistitem
2592         self.parent += definitionlist
2593         offset = self.state_machine.line_offset + 1   # next line
2594         newline_offset, blank_finish = self.nested_list_parse(
2595               self.state_machine.input_lines[offset:],
2596               input_offset=self.state_machine.abs_line_offset() + 1,
2597               node=definitionlist, initial_state='DefinitionList',
2598               blank_finish=blank_finish, blank_finish_state='Definition')
2599         self.goto_line(newline_offset)
2600         if not blank_finish:
2601             self.parent += self.unindent_warning('Definition list')
2602         return [], 'Body', []
2603
2604     def underline(self, match, context, next_state):
2605         """Section title."""
2606         lineno = self.state_machine.abs_line_number()
2607         title = context[0].rstrip()
2608         underline = match.string.rstrip()
2609         source = title + '\n' + underline
2610         messages = []
2611         if column_width(title) > len(underline):
2612             if len(underline) < 4:
2613                 if self.state_machine.match_titles:
2614                     msg = self.reporter.info(
2615                         'Possible title underline, too short for the title.\n'
2616                         "Treating it as ordinary text because it's so short.",
2617                         line=lineno)
2618                     self.parent += msg
2619                 raise statemachine.TransitionCorrection('text')
2620             else:
2621                 blocktext = context[0] + '\n' + self.state_machine.line
2622                 msg = self.reporter.warning(
2623                     'Title underline too short.',
2624                     nodes.literal_block(blocktext, blocktext), line=lineno)
2625                 messages.append(msg)
2626         if not self.state_machine.match_titles:
2627             blocktext = context[0] + '\n' + self.state_machine.line
2628             msg = self.reporter.severe(
2629                 'Unexpected section title.',
2630                 nodes.literal_block(blocktext, blocktext), line=lineno)
2631             self.parent += messages
2632             self.parent += msg
2633             return [], next_state, []
2634         style = underline[0]
2635         context[:] = []
2636         self.section(title, source, style, lineno - 1, messages)
2637         return [], next_state, []
2638
2639     def text(self, match, context, next_state):
2640         """Paragraph."""
2641         startline = self.state_machine.abs_line_number() - 1
2642         msg = None
2643         try:
2644             block = self.state_machine.get_text_block(flush_left=1)
2645         except statemachine.UnexpectedIndentationError, instance:
2646             block, source, lineno = instance.args
2647             msg = self.reporter.error('Unexpected indentation.',
2648                                       source=source, line=lineno)
2649         lines = context + list(block)
2650         paragraph, literalnext = self.paragraph(lines, startline)
2651         self.parent += paragraph
2652         self.parent += msg
2653         if literalnext:
2654             try:
2655                 self.state_machine.next_line()
2656             except EOFError:
2657                 pass
2658             self.parent += self.literal_block()
2659         return [], next_state, []
2660
2661     def literal_block(self):
2662         """Return a list of nodes."""
2663         indented, indent, offset, blank_finish = \
2664               self.state_machine.get_indented()
2665         while indented and not indented[-1].strip():
2666             indented.trim_end()
2667         if not indented:
2668             return self.quoted_literal_block()
2669         data = '\n'.join(indented)
2670         literal_block = nodes.literal_block(data, data)
2671         literal_block.line = offset + 1
2672         nodelist = [literal_block]
2673         if not blank_finish:
2674             nodelist.append(self.unindent_warning('Literal block'))
2675         return nodelist
2676
2677     def quoted_literal_block(self):
2678         abs_line_offset = self.state_machine.abs_line_offset()
2679         offset = self.state_machine.line_offset
2680         parent_node = nodes.Element()
2681         new_abs_offset = self.nested_parse(
2682             self.state_machine.input_lines[offset:],
2683             input_offset=abs_line_offset, node=parent_node, match_titles=0,
2684             state_machine_kwargs={'state_classes': (QuotedLiteralBlock,),
2685                                   'initial_state': 'QuotedLiteralBlock'})
2686         self.goto_line(new_abs_offset)
2687         return parent_node.children
2688
2689     def definition_list_item(self, termline):
2690         indented, indent, line_offset, blank_finish = \
2691               self.state_machine.get_indented()
2692         definitionlistitem = nodes.definition_list_item(
2693             '\n'.join(termline + list(indented)))
2694         lineno = self.state_machine.abs_line_number() - 1
2695         definitionlistitem.line = lineno
2696         termlist, messages = self.term(termline, lineno)
2697         definitionlistitem += termlist
2698         definition = nodes.definition('', *messages)
2699         definitionlistitem += definition
2700         if termline[0][-2:] == '::':
2701             definition += self.reporter.info(
2702                   'Blank line missing before literal block (after the "::")? '
2703                   'Interpreted as a definition list item.', line=line_offset+1)
2704         self.nested_parse(indented, input_offset=line_offset, node=definition)
2705         return definitionlistitem, blank_finish
2706
2707     classifier_delimiter = re.compile(' +: +')
2708
2709     def term(self, lines, lineno):
2710         """Return a definition_list's term and optional classifiers."""
2711         assert len(lines) == 1
2712         text_nodes, messages = self.inline_text(lines[0], lineno)
2713         term_node = nodes.term()
2714         node_list = [term_node]
2715         for i in range(len(text_nodes)):
2716             node = text_nodes[i]
2717             if isinstance(node, nodes.Text):
2718                 parts = self.classifier_delimiter.split(node.rawsource)
2719                 if len(parts) == 1:
2720                     node_list[-1] += node
2721                 else:
2722
2723                     node_list[-1] += nodes.Text(parts[0].rstrip())
2724                     for part in parts[1:]:
2725                         classifier_node = nodes.classifier('', part)
2726                         node_list.append(classifier_node)
2727             else:
2728                 node_list[-1] += node
2729         return node_list, messages
2730
2731
2732 class SpecializedText(Text):
2733
2734     """
2735     Superclass for second and subsequent lines of Text-variants.
2736
2737     All transition methods are disabled. Override individual methods in
2738     subclasses to re-enable.
2739     """
2740
2741     def eof(self, context):
2742         """Incomplete construct."""
2743         return []
2744
2745     def invalid_input(self, match=None, context=None, next_state=None):
2746         """Not a compound element member. Abort this state machine."""
2747         raise EOFError
2748
2749     blank = invalid_input
2750     indent = invalid_input
2751     underline = invalid_input
2752     text = invalid_input
2753
2754
2755 class Definition(SpecializedText):
2756
2757     """Second line of potential definition_list_item."""
2758
2759     def eof(self, context):
2760         """Not a definition."""
2761         self.state_machine.previous_line(2) # so parent SM can reassess
2762         return []
2763
2764     def indent(self, match, context, next_state):
2765         """Definition list item."""
2766         definitionlistitem, blank_finish = self.definition_list_item(context)
2767         self.parent += definitionlistitem
2768         self.blank_finish = blank_finish
2769         return [], 'DefinitionList', []
2770
2771
2772 class Line(SpecializedText):
2773
2774     """
2775     Second line of over- & underlined section title or transition marker.
2776     """
2777
2778     eofcheck = 1                        # @@@ ???
2779     """Set to 0 while parsing sections, so that we don't catch the EOF."""
2780
2781     def eof(self, context):
2782         """Transition marker at end of section or document."""
2783         marker = context[0].strip()
2784         if self.memo.section_bubble_up_kludge:
2785             self.memo.section_bubble_up_kludge = 0
2786         elif len(marker) < 4:
2787             self.state_correction(context)
2788         if self.eofcheck:               # ignore EOFError with sections
2789             lineno = self.state_machine.abs_line_number() - 1
2790             transition = nodes.transition(rawsource=context[0])
2791             transition.line = lineno
2792             self.parent += transition
2793         self.eofcheck = 1
2794         return []
2795
2796     def blank(self, match, context, next_state):
2797         """Transition marker."""
2798         lineno = self.state_machine.abs_line_number() - 1
2799         marker = context[0].strip()
2800         if len(marker) < 4:
2801             self.state_correction(context)
2802         transition = nodes.transition(rawsource=marker)
2803         transition.line = lineno
2804         self.parent += transition
2805         return [], 'Body', []
2806
2807     def text(self, match, context, next_state):
2808         """Potential over- & underlined title."""
2809         lineno = self.state_machine.abs_line_number() - 1
2810         overline = context[0]
2811         title = match.string
2812         underline = ''
2813         try:
2814             underline = self.state_machine.next_line()
2815         except EOFError:
2816             blocktext = overline + '\n' + title
2817             if len(overline.rstrip()) < 4:
2818                 self.short_overline(context, blocktext, lineno, 2)
2819             else:
2820                 msg = self.reporter.severe(
2821                     'Incomplete section title.',
2822                     nodes.literal_block(blocktext, blocktext), line=lineno)
2823                 self.parent += msg
2824                 return [], 'Body', []
2825         source = '%s\n%s\n%s' % (overline, title, underline)
2826         overline = overline.rstrip()
2827         underline = underline.rstrip()
2828         if not self.transitions['underline'][0].match(underline):
2829             blocktext = overline + '\n' + title + '\n' + underline
2830             if len(overline.rstrip()) < 4:
2831                 self.short_overline(context, blocktext, lineno, 2)
2832             else:
2833                 msg = self.reporter.severe(
2834                     'Missing matching underline for section title overline.',
2835                     nodes.literal_block(source, source), line=lineno)
2836                 self.parent += msg
2837                 return [], 'Body', []
2838         elif overline != underline:
2839             blocktext = overline + '\n' + title + '\n' + underline
2840             if len(overline.rstrip()) < 4:
2841                 self.short_overline(context, blocktext, lineno, 2)
2842             else:
2843                 msg = self.reporter.severe(
2844                       'Title overline & underline mismatch.',
2845                       nodes.literal_block(source, source), line=lineno)
2846                 self.parent += msg
2847                 return [], 'Body', []
2848         title = title.rstrip()
2849         messages = []
2850         if column_width(title) > len(overline):
2851             blocktext = overline + '\n' + title + '\n' + underline
2852             if len(overline.rstrip()) < 4:
2853                 self.short_overline(context, blocktext, lineno, 2)
2854             else:
2855                 msg = self.reporter.warning(
2856                       'Title overline too short.',
2857                       nodes.literal_block(source, source), line=lineno)
2858                 messages.append(msg)
2859         style = (overline[0], underline[0])
2860         self.eofcheck = 0               # @@@ not sure this is correct
2861         self.section(title.lstrip(), source, style, lineno + 1, messages)
2862         self.eofcheck = 1
2863         return [], 'Body', []
2864
2865     indent = text                       # indented title
2866
2867     def underline(self, match, context, next_state):
2868         overline = context[0]
2869         blocktext = overline + '\n' + self.state_machine.line
2870         lineno = self.state_machine.abs_line_number() - 1
2871         if len(overline.rstrip()) < 4:
2872             self.short_overline(context, blocktext, lineno, 1)
2873         msg = self.reporter.error(
2874               'Invalid section title or transition marker.',
2875               nodes.literal_block(blocktext, blocktext), line=lineno)
2876         self.parent += msg
2877         return [], 'Body', []
2878
2879     def short_overline(self, context, blocktext, lineno, lines=1):
2880         msg = self.reporter.info(
2881             'Possible incomplete section title.\nTreating the overline as '
2882             "ordinary text because it's so short.", line=lineno)
2883         self.parent += msg
2884         self.state_correction(context, lines)
2885
2886     def state_correction(self, context, lines=1):
2887         self.state_machine.previous_line(lines)
2888         context[:] = []
2889         raise statemachine.StateCorrection('Body', 'text')
2890
2891
2892 class QuotedLiteralBlock(RSTState):
2893
2894     """
2895     Nested parse handler for quoted (unindented) literal blocks.
2896
2897     Special-purpose.  Not for inclusion in `state_classes`.
2898     """
2899
2900     patterns = {'initial_quoted': r'(%(nonalphanum7bit)s)' % Body.pats,
2901                 'text': r''}
2902     initial_transitions = ('initial_quoted', 'text')
2903
2904     def __init__(self, state_machine, debug=0):
2905         RSTState.__init__(self, state_machine, debug)
2906         self.messages = []
2907         self.initial_lineno = None
2908
2909     def blank(self, match, context, next_state):
2910         if context:
2911             raise EOFError
2912         else:
2913             return context, next_state, []
2914
2915     def eof(self, context):
2916         if context:
2917             text = '\n'.join(context)
2918             literal_block = nodes.literal_block(text, text)
2919             literal_block.line = self.initial_lineno
2920             self.parent += literal_block
2921         else:
2922             self.parent += self.reporter.warning(
2923                 'Literal block expected; none found.',
2924                 line=self.state_machine.abs_line_number())
2925             self.state_machine.previous_line()
2926         self.parent += self.messages
2927         return []
2928
2929     def indent(self, match, context, next_state):
2930         assert context, ('QuotedLiteralBlock.indent: context should not '
2931                          'be empty!')
2932         self.messages.append(
2933             self.reporter.error('Unexpected indentation.',
2934                                 line=self.state_machine.abs_line_number()))
2935         self.state_machine.previous_line()
2936         raise EOFError
2937
2938     def initial_quoted(self, match, context, next_state):
2939         """Match arbitrary quote character on the first line only."""
2940         self.remove_transition('initial_quoted')
2941         quote = match.string[0]
2942         pattern = re.compile(re.escape(quote))
2943         # New transition matches consistent quotes only:
2944         self.add_transition('quoted',
2945                             (pattern, self.quoted, self.__class__.__name__))
2946         self.initial_lineno = self.state_machine.abs_line_number()
2947         return [match.string], next_state, []
2948
2949     def quoted(self, match, context, next_state):
2950         """Match consistent quotes on subsequent lines."""
2951         context.append(match.string)
2952         return context, next_state, []
2953
2954     def text(self, match, context, next_state):
2955         if context:
2956             self.messages.append(
2957                 self.reporter.error('Inconsistent literal block quoting.',
2958                                     line=self.state_machine.abs_line_number()))
2959             self.state_machine.previous_line()
2960         raise EOFError
2961
2962
2963 state_classes = (Body, BulletList, DefinitionList, EnumeratedList, FieldList,
2964                  OptionList, LineBlock, ExtensionOptions, Explicit, Text,
2965                  Definition, Line, SubstitutionDef, RFC2822Body, RFC2822List)
2966 """Standard set of State classes used to start `RSTStateMachine`."""