2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
108 from types
import FunctionType
, MethodType
110 from docutils
import nodes
, statemachine
, utils
111 from docutils
import ApplicationError
, DataError
112 from docutils
.statemachine
import StateMachineWS
, StateWS
113 from docutils
.nodes
import fully_normalize_name
as normalize_name
114 from docutils
.nodes
import whitespace_normalize_name
115 import docutils
.parsers
.rst
116 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
117 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
118 from docutils
.utils
import escape2null
, unescape
, column_width
119 from docutils
.utils
import punctuation_chars
, roman
, urischemes
120 from docutils
.utils
import split_escaped_whitespace
122 class MarkupError(DataError
): pass
123 class UnknownInterpretedRoleError(DataError
): pass
124 class InterpretedRoleNotImplementedError(DataError
): pass
125 class ParserError(ApplicationError
): pass
126 class MarkupMismatch(Exception): pass
131 """Stores data attributes for dotted-attribute access."""
133 def __init__(self
, **keywordargs
):
134 self
.__dict
__.update(keywordargs
)
137 class RSTStateMachine(StateMachineWS
):
140 reStructuredText's master StateMachine.
142 The entry point to reStructuredText parsing is the `run()` method.
145 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=True,
148 Parse `input_lines` and modify the `document` node in place.
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
153 self
.language
= languages
.get_language(
154 document
.settings
.language_code
)
155 self
.match_titles
= match_titles
158 inliner
.init_customizations(document
.settings
)
159 self
.memo
= Struct(document
=document
,
160 reporter
=document
.reporter
,
161 language
=self
.language
,
164 section_bubble_up_kludge
=False,
166 self
.document
= document
167 self
.attach_observer(document
.note_source
)
168 self
.reporter
= self
.memo
.reporter
170 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
171 input_source
=document
['source'])
172 assert results
== [], 'RSTStateMachine.run() results should be empty!'
173 self
.node
= self
.memo
= None # remove unneeded references
176 class NestedStateMachine(StateMachineWS
):
179 StateMachine run from within other StateMachine runs, to parse nested
183 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=True):
185 Parse `input_lines` and populate a `docutils.nodes.document` instance.
187 Extend `StateMachineWS.run()`: set up document-wide data.
189 self
.match_titles
= match_titles
191 self
.document
= memo
.document
192 self
.attach_observer(self
.document
.note_source
)
193 self
.reporter
= memo
.reporter
194 self
.language
= memo
.language
196 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
197 assert results
== [], ('NestedStateMachine.run() results should be '
202 class RSTState(StateWS
):
205 reStructuredText State superclass.
207 Contains methods used by all State subclasses.
210 nested_sm
= NestedStateMachine
213 def __init__(self
, state_machine
, debug
=False):
214 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
215 'initial_state': 'Body'}
216 StateWS
.__init
__(self
, state_machine
, debug
)
218 def runtime_init(self
):
219 StateWS
.runtime_init(self
)
220 memo
= self
.state_machine
.memo
222 self
.reporter
= memo
.reporter
223 self
.inliner
= memo
.inliner
224 self
.document
= memo
.document
225 self
.parent
= self
.state_machine
.node
226 # enable the reporter to determine source and source-line
227 if not hasattr(self
.reporter
, 'get_source_and_line'):
228 self
.reporter
.get_source_and_line
= self
.state_machine
.get_source_and_line
231 def goto_line(self
, abs_line_offset
):
233 Jump to input line `abs_line_offset`, ignoring jumps past the end.
236 self
.state_machine
.goto_line(abs_line_offset
)
240 def no_match(self
, context
, transitions
):
242 Override `StateWS.no_match` to generate a system message.
244 This code should never be run.
246 self
.reporter
.severe(
247 'Internal error: no transition pattern match. State: "%s"; '
248 'transitions: %s; context: %s; current line: %r.'
249 % (self
.__class
__.__name
__, transitions
, context
,
250 self
.state_machine
.line
))
251 return context
, None, []
253 def bof(self
, context
):
254 """Called at beginning of file."""
257 def nested_parse(self
, block
, input_offset
, node
, match_titles
=False,
258 state_machine_class
=None, state_machine_kwargs
=None):
260 Create a new StateMachine rooted at `node` and run it over the input
264 if state_machine_class
is None:
265 state_machine_class
= self
.nested_sm
267 if state_machine_kwargs
is None:
268 state_machine_kwargs
= self
.nested_sm_kwargs
270 block_length
= len(block
)
275 state_machine
= self
.nested_sm_cache
.pop()
278 if not state_machine
:
279 state_machine
= state_machine_class(debug
=self
.debug
,
280 **state_machine_kwargs
)
281 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
282 node
=node
, match_titles
=match_titles
)
284 self
.nested_sm_cache
.append(state_machine
)
286 state_machine
.unlink()
287 new_offset
= state_machine
.abs_line_offset()
288 # No `block.parent` implies disconnected -- lines aren't in sync:
289 if block
.parent
and (len(block
) - block_length
) != 0:
290 # Adjustment for block if modified in nested parse:
291 self
.state_machine
.next_line(len(block
) - block_length
)
294 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
296 blank_finish_state
=None,
299 state_machine_class
=None,
300 state_machine_kwargs
=None):
302 Create a new StateMachine rooted at `node` and run it over the input
303 `block`. Also keep track of optional intermediate blank lines and the
306 if state_machine_class
is None:
307 state_machine_class
= self
.nested_sm
308 if state_machine_kwargs
is None:
309 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
310 state_machine_kwargs
['initial_state'] = initial_state
311 state_machine
= state_machine_class(debug
=self
.debug
,
312 **state_machine_kwargs
)
313 if blank_finish_state
is None:
314 blank_finish_state
= initial_state
315 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
316 for key
, value
in extra_settings
.items():
317 setattr(state_machine
.states
[initial_state
], key
, value
)
318 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
319 node
=node
, match_titles
=match_titles
)
320 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
321 state_machine
.unlink()
322 return state_machine
.abs_line_offset(), blank_finish
324 def section(self
, title
, source
, style
, lineno
, messages
):
325 """Check for a valid subsection and create one if it checks out."""
326 if self
.check_subsection(source
, style
, lineno
):
327 self
.new_subsection(title
, lineno
, messages
)
329 def check_subsection(self
, source
, style
, lineno
):
331 Check for a valid subsection header. Return 1 (true) or None (false).
333 When a new section is reached that isn't a subsection of the current
334 section, back up the line count (use ``previous_line(-x)``), then
335 ``raise EOFError``. The current StateMachine will finish, then the
336 calling StateMachine can re-examine the title. This will work its way
337 back up the calling chain until the correct section level isreached.
339 @@@ Alternative: Evaluate the title, store the title info & level, and
340 back up the chain until that level is reached. Store in memo? Or
343 :Exception: `EOFError` when a sibling or supersection encountered.
346 title_styles
= memo
.title_styles
347 mylevel
= memo
.section_level
348 try: # check for existing title style
349 level
= title_styles
.index(style
) + 1
350 except ValueError: # new title style
351 if len(title_styles
) == memo
.section_level
: # new subsection
352 title_styles
.append(style
)
354 else: # not at lowest level
355 self
.parent
+= self
.title_inconsistent(source
, lineno
)
357 if level
<= mylevel
: # sibling or supersection
358 memo
.section_level
= level
# bubble up to parent section
360 memo
.section_bubble_up_kludge
= True
361 # back up 2 lines for underline title, 3 for overline title
362 self
.state_machine
.previous_line(len(style
) + 1)
363 raise EOFError # let parent section re-evaluate
364 if level
== mylevel
+ 1: # immediate subsection
366 else: # invalid subsection
367 self
.parent
+= self
.title_inconsistent(source
, lineno
)
370 def title_inconsistent(self
, sourcetext
, lineno
):
371 error
= self
.reporter
.severe(
372 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
376 def new_subsection(self
, title
, lineno
, messages
):
377 """Append new subsection to document tree. On return, check level."""
379 mylevel
= memo
.section_level
380 memo
.section_level
+= 1
381 section_node
= nodes
.section()
382 self
.parent
+= section_node
383 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
384 titlenode
= nodes
.title(title
, '', *textnodes
)
385 name
= normalize_name(titlenode
.astext())
386 section_node
['names'].append(name
)
387 section_node
+= titlenode
388 section_node
+= messages
389 section_node
+= title_messages
390 self
.document
.note_implicit_target(section_node
, section_node
)
391 offset
= self
.state_machine
.line_offset
+ 1
392 absoffset
= self
.state_machine
.abs_line_offset() + 1
393 newabsoffset
= self
.nested_parse(
394 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
395 node
=section_node
, match_titles
=True)
396 self
.goto_line(newabsoffset
)
397 if memo
.section_level
<= mylevel
: # can't handle next section?
398 raise EOFError # bubble up to supersection
399 # reset section_level; next pass will detect it properly
400 memo
.section_level
= mylevel
402 def paragraph(self
, lines
, lineno
):
404 Return a list (paragraph & messages) & a boolean: literal_block next?
406 data
= '\n'.join(lines
).rstrip()
407 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
410 elif data
[-3] in ' \n':
411 text
= data
[:-3].rstrip()
418 textnodes
, messages
= self
.inline_text(text
, lineno
)
419 p
= nodes
.paragraph(data
, '', *textnodes
)
420 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
421 return [p
] + messages
, literalnext
423 def inline_text(self
, text
, lineno
):
425 Return 2 lists: nodes (text and inline elements), and system_messages.
427 nodes
, messages
= self
.inliner
.parse(text
, lineno
,
428 self
.memo
, self
.parent
)
429 return nodes
, messages
431 def unindent_warning(self
, node_name
):
432 # the actual problem is one line below the current line
433 lineno
= self
.state_machine
.abs_line_number()+1
434 return self
.reporter
.warning('%s ends without a blank line; '
435 'unexpected unindent.' % node_name
,
439 def build_regexp(definition
, compile=True):
441 Build, compile and return a regular expression based on `definition`.
443 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
444 where "parts" is a list of regular expressions and/or regular
445 expression definitions to be joined into an or-group.
447 name
, prefix
, suffix
, parts
= definition
450 if type(part
) is tuple:
451 part_strings
.append(build_regexp(part
, None))
453 part_strings
.append(part
)
454 or_group
= '|'.join(part_strings
)
455 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
457 return re
.compile(regexp
, re
.UNICODE
)
465 Parse inline markup; call the `parse()` method.
469 self
.implicit_dispatch
= []
470 """List of (pattern, bound method) tuples, used by
471 `self.implicit_inline`."""
473 def init_customizations(self
, settings
):
474 # lookahead and look-behind expressions for inline markup rules
475 if getattr(settings
, 'character_level_inline_markup', False):
476 start_string_prefix
= u
'(^|(?<!\x00))'
477 end_string_suffix
= u
''
479 start_string_prefix
= (u
'(^|(?<=\\s|[%s%s]))' %
480 (punctuation_chars
.openers
,
481 punctuation_chars
.delimiters
))
482 end_string_suffix
= (u
'($|(?=\\s|[\x00%s%s%s]))' %
483 (punctuation_chars
.closing_delimiters
,
484 punctuation_chars
.delimiters
,
485 punctuation_chars
.closers
))
486 args
= locals().copy()
487 args
.update(vars(self
.__class
__))
489 parts
= ('initial_inline', start_string_prefix
, '',
490 [('start', '', self
.non_whitespace_after
, # simple start-strings
492 r
'\*(?!\*)', # emphasis but not strong
494 r
'_`', # inline internal target
495 r
'\|(?!\|)'] # substitution reference
497 ('whole', '', end_string_suffix
, # whole constructs
498 [# reference name & end-string
499 r
'(?P<refname>%s)(?P<refend>__?)' % self
.simplename
,
500 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
501 [r
'[0-9]+', # manually numbered
502 r
'\#(%s)?' % self
.simplename
, # auto-numbered (w/ label?)
504 r
'(?P<citationlabel>%s)' % self
.simplename
] # citation reference
508 ('backquote', # interpreted text or phrase reference
509 '(?P<role>(:%s:)?)' % self
.simplename
, # optional role
510 self
.non_whitespace_after
,
511 ['`(?!`)'] # but not literal
515 self
.start_string_prefix
= start_string_prefix
516 self
.end_string_suffix
= end_string_suffix
519 self
.patterns
= Struct(
520 initial
=build_regexp(parts
),
521 emphasis
=re
.compile(self
.non_whitespace_escape_before
522 + r
'(\*)' + end_string_suffix
, re
.UNICODE
),
523 strong
=re
.compile(self
.non_whitespace_escape_before
524 + r
'(\*\*)' + end_string_suffix
, re
.UNICODE
),
525 interpreted_or_phrase_ref
=re
.compile(
527 %(non_unescaped_whitespace_escape_before)s
531 (?P<role>:%(simplename)s:)?
535 %(end_string_suffix)s
536 """ % args
, re
.VERBOSE | re
.UNICODE
),
537 embedded_link
=re
.compile(
540 (?:[ \n]+|^) # spaces or beginning of line/string
542 %(non_whitespace_after)s
543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
544 %(non_whitespace_escape_before)s
548 """ % args
, re
.VERBOSE | re
.UNICODE
),
549 literal
=re
.compile(self
.non_whitespace_before
+ '(``)'
550 + end_string_suffix
, re
.UNICODE
),
551 target
=re
.compile(self
.non_whitespace_escape_before
552 + r
'(`)' + end_string_suffix
, re
.UNICODE
),
553 substitution_ref
=re
.compile(self
.non_whitespace_escape_before
555 + end_string_suffix
, re
.UNICODE
),
556 email
=re
.compile(self
.email_pattern
% args
+ '$',
557 re
.VERBOSE | re
.UNICODE
),
560 %(start_string_prefix)s
562 (?P<absolute> # absolute URI
563 (?P<scheme> # scheme (http, ftp, mailto)
564 [a-zA-Z][a-zA-Z0-9.+-]*
569 (//?)? # hierarchical URI
570 %(uric)s* # URI characters
571 %(uri_end)s # final URI char
577 ( # optional fragment
584 (?P<email> # email address
585 """ + self
.email_pattern
+ r
"""
588 %(end_string_suffix)s
589 """) % args
, re
.VERBOSE | re
.UNICODE
),
592 %(start_string_prefix)s
594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
596 (PEP\s+(?P<pepnum2>\d+)) # reference by name
598 %(end_string_suffix)s""" % args
, re
.VERBOSE | re
.UNICODE
),
601 %(start_string_prefix)s
602 (RFC(-|\s+)?(?P<rfcnum>\d+))
603 %(end_string_suffix)s""" % args
, re
.VERBOSE | re
.UNICODE
))
605 self
.implicit_dispatch
.append((self
.patterns
.uri
,
606 self
.standalone_uri
))
607 if settings
.pep_references
:
608 self
.implicit_dispatch
.append((self
.patterns
.pep
,
610 if settings
.rfc_references
:
611 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
614 def parse(self
, text
, lineno
, memo
, parent
):
615 # Needs to be refactored for nested inline markup.
616 # Add nested_parse() method?
618 Return 2 lists: nodes (text and inline elements), and system_messages.
620 Using `self.patterns.initial`, a pattern which matches start-strings
621 (emphasis, strong, interpreted, phrase reference, literal,
622 substitution reference, and inline target) and complete constructs
623 (simple reference, footnote reference), search for a candidate. When
624 one is found, check for validity (e.g., not a quoted '*' character).
625 If valid, search for the corresponding end string if applicable, and
626 check it for validity. If not found or invalid, generate a warning
627 and ignore the start-string. Implicit inline markup (e.g. standalone
630 self
.reporter
= memo
.reporter
631 self
.document
= memo
.document
632 self
.language
= memo
.language
634 pattern_search
= self
.patterns
.initial
.search
635 dispatch
= self
.dispatch
636 remaining
= escape2null(text
)
641 match
= pattern_search(remaining
)
643 groups
= match
.groupdict()
644 method
= dispatch
[groups
['start'] or groups
['backquote']
645 or groups
['refend'] or groups
['fnend']]
646 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
648 unprocessed
.append(before
)
649 messages
+= sysmessages
651 processed
+= self
.implicit_inline(''.join(unprocessed
),
657 remaining
= ''.join(unprocessed
) + remaining
659 processed
+= self
.implicit_inline(remaining
, lineno
)
660 return processed
, messages
662 # Inline object recognition
663 # -------------------------
664 # See also init_customizations().
665 non_whitespace_before
= r
'(?<!\s)'
666 non_whitespace_escape_before
= r
'(?<![\s\x00])'
667 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[\s\x00])'
668 non_whitespace_after
= r
'(?!\s)'
669 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
670 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
671 # Valid URI characters (see RFC 2396 & RFC 2732);
672 # final \x00 allows backslash escapes in URIs:
673 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
674 # Delimiter indicating the end of a URI (not part of the URI):
675 uri_end_delim
= r
"""[>]"""
676 # Last URI character; same as uric but no punctuation:
677 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
678 # End of a URI (either 'urilast' or 'uric followed by a
680 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
681 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
683 %(emailc)s+(?:\.%(emailc)s+)* # name
685 %(emailc)s+(?:\.%(emailc)s*)* # host
686 %(uri_end)s # final URI char
689 def quoted_start(self
, match
):
690 """Test if inline markup start-string is 'quoted'.
692 'Quoted' in this context means the start-string is enclosed in a pair
693 of matching opening/closing delimiters (not necessarily quotes)
694 or at the end of the match.
696 string
= match
.string
697 start
= match
.start()
698 if start
== 0: # start-string at beginning of text
700 prestart
= string
[start
- 1]
702 poststart
= string
[match
.end()]
703 except IndexError: # start-string at end of text
704 return True # not "quoted" but no markup start-string either
705 return punctuation_chars
.match_chars(prestart
, poststart
)
707 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
708 restore_backslashes
=False):
709 string
= match
.string
710 matchstart
= match
.start('start')
711 matchend
= match
.end('start')
712 if self
.quoted_start(match
):
713 return (string
[:matchend
], [], string
[matchend
:], [], '')
714 endmatch
= end_pattern
.search(string
[matchend
:])
715 if endmatch
and endmatch
.start(1): # 1 or more chars
716 _text
= endmatch
.string
[:endmatch
.start(1)]
717 text
= unescape(_text
, restore_backslashes
)
718 textend
= matchend
+ endmatch
.end(1)
719 rawsource
= unescape(string
[matchstart
:textend
], True)
720 node
= nodeclass(rawsource
, text
)
721 node
[0].rawsource
= unescape(_text
, True)
722 return (string
[:matchstart
], [node
],
723 string
[textend
:], [], endmatch
.group(1))
724 msg
= self
.reporter
.warning(
725 'Inline %s start-string without end-string.'
726 % nodeclass
.__name
__, line
=lineno
)
727 text
= unescape(string
[matchstart
:matchend
], True)
728 rawsource
= unescape(string
[matchstart
:matchend
], True)
729 prb
= self
.problematic(text
, rawsource
, msg
)
730 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
732 def problematic(self
, text
, rawsource
, message
):
733 msgid
= self
.document
.set_id(message
, self
.parent
)
734 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
735 prbid
= self
.document
.set_id(problematic
)
736 message
.add_backref(prbid
)
739 def emphasis(self
, match
, lineno
):
740 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
741 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
742 return before
, inlines
, remaining
, sysmessages
744 def strong(self
, match
, lineno
):
745 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
746 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
747 return before
, inlines
, remaining
, sysmessages
749 def interpreted_or_phrase_ref(self
, match
, lineno
):
750 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
751 string
= match
.string
752 matchstart
= match
.start('backquote')
753 matchend
= match
.end('backquote')
754 rolestart
= match
.start('role')
755 role
= match
.group('role')
760 elif self
.quoted_start(match
):
761 return (string
[:matchend
], [], string
[matchend
:], [])
762 endmatch
= end_pattern
.search(string
[matchend
:])
763 if endmatch
and endmatch
.start(1): # 1 or more chars
764 textend
= matchend
+ endmatch
.end()
765 if endmatch
.group('role'):
767 msg
= self
.reporter
.warning(
768 'Multiple roles in interpreted text (both '
769 'prefix and suffix present; only one allowed).',
771 text
= unescape(string
[rolestart
:textend
], True)
772 prb
= self
.problematic(text
, text
, msg
)
773 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
774 role
= endmatch
.group('suffix')[1:-1]
776 escaped
= endmatch
.string
[:endmatch
.start(1)]
777 rawsource
= unescape(string
[matchstart
:textend
], True)
778 if rawsource
[-1:] == '_':
780 msg
= self
.reporter
.warning(
781 'Mismatch: both interpreted text role %s and '
782 'reference suffix.' % position
, line
=lineno
)
783 text
= unescape(string
[rolestart
:textend
], True)
784 prb
= self
.problematic(text
, text
, msg
)
785 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
786 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
787 rawsource
, escaped
, unescape(escaped
))
789 rawsource
= unescape(string
[rolestart
:textend
], True)
790 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
792 return (string
[:rolestart
], nodelist
,
793 string
[textend
:], messages
)
794 msg
= self
.reporter
.warning(
795 'Inline interpreted text or phrase reference start-string '
796 'without end-string.', line
=lineno
)
797 text
= unescape(string
[matchstart
:matchend
], True)
798 prb
= self
.problematic(text
, text
, msg
)
799 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
801 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
802 match
= self
.patterns
.embedded_link
.search(escaped
)
803 if match
: # embedded <URI> or <alias_>
804 text
= unescape(escaped
[:match
.start(0)])
805 rawtext
= unescape(escaped
[:match
.start(0)], True)
806 aliastext
= unescape(match
.group(2))
807 rawaliastext
= unescape(match
.group(2), True)
808 underscore_escaped
= rawaliastext
.endswith(r
'\_')
809 if aliastext
.endswith('_') and not (underscore_escaped
810 or self
.patterns
.uri
.match(aliastext
)):
812 alias
= normalize_name(aliastext
[:-1])
813 target
= nodes
.target(match
.group(1), refname
=alias
)
814 target
.indirect_reference_name
= aliastext
[:-1]
817 alias_parts
= split_escaped_whitespace(match
.group(2))
818 alias
= ' '.join(''.join(unescape(part
).split())
819 for part
in alias_parts
)
820 alias
= self
.adjust_uri(alias
)
821 if alias
.endswith(r
'\_'):
822 alias
= alias
[:-2] + '_'
823 target
= nodes
.target(match
.group(1), refuri
=alias
)
824 target
.referenced
= 1
826 raise ApplicationError('problem with embedded link: %r'
830 rawtext
= rawaliastext
833 rawtext
= unescape(escaped
, True)
835 refname
= normalize_name(text
)
836 reference
= nodes
.reference(rawsource
, text
,
837 name
=whitespace_normalize_name(text
))
838 reference
[0].rawsource
= rawtext
840 node_list
= [reference
]
842 if rawsource
[-2:] == '__':
843 if target
and (aliastype
== 'name'):
844 reference
['refname'] = alias
845 self
.document
.note_refname(reference
)
846 # self.document.note_indirect_target(target) # required?
847 elif target
and (aliastype
== 'uri'):
848 reference
['refuri'] = alias
850 reference
['anonymous'] = 1
853 target
['names'].append(refname
)
854 if aliastype
== 'name':
855 reference
['refname'] = alias
856 self
.document
.note_indirect_target(target
)
857 self
.document
.note_refname(reference
)
859 reference
['refuri'] = alias
860 self
.document
.note_explicit_target(target
, self
.parent
)
861 # target.note_referenced_by(name=refname)
862 node_list
.append(target
)
864 reference
['refname'] = refname
865 self
.document
.note_refname(reference
)
866 return before
, node_list
, after
, []
869 def adjust_uri(self
, uri
):
870 match
= self
.patterns
.email
.match(uri
)
872 return 'mailto:' + uri
876 def interpreted(self
, rawsource
, text
, role
, lineno
):
877 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
880 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
882 nodes
[0][0].rawsource
= unescape(text
, True)
885 return nodes
, messages
+ messages2
887 msg
= self
.reporter
.error(
888 'Unknown interpreted text role "%s".' % role
,
890 return ([self
.problematic(rawsource
, rawsource
, msg
)],
893 def literal(self
, match
, lineno
):
894 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
895 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
896 restore_backslashes
=True)
897 return before
, inlines
, remaining
, sysmessages
899 def inline_internal_target(self
, match
, lineno
):
900 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
901 match
, lineno
, self
.patterns
.target
, nodes
.target
)
902 if inlines
and isinstance(inlines
[0], nodes
.target
):
903 assert len(inlines
) == 1
905 name
= normalize_name(target
.astext())
906 target
['names'].append(name
)
907 self
.document
.note_explicit_target(target
, self
.parent
)
908 return before
, inlines
, remaining
, sysmessages
910 def substitution_reference(self
, match
, lineno
):
911 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
912 match
, lineno
, self
.patterns
.substitution_ref
,
913 nodes
.substitution_reference
)
914 if len(inlines
) == 1:
915 subref_node
= inlines
[0]
916 if isinstance(subref_node
, nodes
.substitution_reference
):
917 subref_text
= subref_node
.astext()
918 self
.document
.note_substitution_ref(subref_node
, subref_text
)
919 if endstring
[-1:] == '_':
920 reference_node
= nodes
.reference(
921 '|%s%s' % (subref_text
, endstring
), '')
922 if endstring
[-2:] == '__':
923 reference_node
['anonymous'] = 1
925 reference_node
['refname'] = normalize_name(subref_text
)
926 self
.document
.note_refname(reference_node
)
927 reference_node
+= subref_node
928 inlines
= [reference_node
]
929 return before
, inlines
, remaining
, sysmessages
931 def footnote_reference(self
, match
, lineno
):
933 Handles `nodes.footnote_reference` and `nodes.citation_reference`
936 label
= match
.group('footnotelabel')
937 refname
= normalize_name(label
)
938 string
= match
.string
939 before
= string
[:match
.start('whole')]
940 remaining
= string
[match
.end('whole'):]
941 if match
.group('citationlabel'):
942 refnode
= nodes
.citation_reference('[%s]_' % label
,
944 refnode
+= nodes
.Text(label
)
945 self
.document
.note_citation_ref(refnode
)
947 refnode
= nodes
.footnote_reference('[%s]_' % label
)
948 if refname
[0] == '#':
949 refname
= refname
[1:]
951 self
.document
.note_autofootnote_ref(refnode
)
954 refnode
['auto'] = '*'
955 self
.document
.note_symbol_footnote_ref(
958 refnode
+= nodes
.Text(label
)
960 refnode
['refname'] = refname
961 self
.document
.note_footnote_ref(refnode
)
962 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
963 before
= before
.rstrip()
964 return (before
, [refnode
], remaining
, [])
966 def reference(self
, match
, lineno
, anonymous
=False):
967 referencename
= match
.group('refname')
968 refname
= normalize_name(referencename
)
969 referencenode
= nodes
.reference(
970 referencename
+ match
.group('refend'), referencename
,
971 name
=whitespace_normalize_name(referencename
))
972 referencenode
[0].rawsource
= referencename
974 referencenode
['anonymous'] = 1
976 referencenode
['refname'] = refname
977 self
.document
.note_refname(referencenode
)
978 string
= match
.string
979 matchstart
= match
.start('whole')
980 matchend
= match
.end('whole')
981 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
983 def anonymous_reference(self
, match
, lineno
):
984 return self
.reference(match
, lineno
, anonymous
=1)
986 def standalone_uri(self
, match
, lineno
):
987 if (not match
.group('scheme')
988 or match
.group('scheme').lower() in urischemes
.schemes
):
989 if match
.group('email'):
990 addscheme
= 'mailto:'
993 text
= match
.group('whole')
994 unescaped
= unescape(text
)
995 rawsource
= unescape(text
, True)
996 reference
= nodes
.reference(rawsource
, unescaped
,
997 refuri
=addscheme
+ unescaped
)
998 reference
[0].rawsource
= rawsource
1000 else: # not a valid scheme
1001 raise MarkupMismatch
1003 def pep_reference(self
, match
, lineno
):
1004 text
= match
.group(0)
1005 if text
.startswith('pep-'):
1006 pepnum
= int(match
.group('pepnum1'))
1007 elif text
.startswith('PEP'):
1008 pepnum
= int(match
.group('pepnum2'))
1010 raise MarkupMismatch
1011 ref
= (self
.document
.settings
.pep_base_url
1012 + self
.document
.settings
.pep_file_url_template
% pepnum
)
1013 unescaped
= unescape(text
)
1014 return [nodes
.reference(unescape(text
, True), unescaped
, refuri
=ref
)]
1016 rfc_url
= 'rfc%d.html'
1018 def rfc_reference(self
, match
, lineno
):
1019 text
= match
.group(0)
1020 if text
.startswith('RFC'):
1021 rfcnum
= int(match
.group('rfcnum'))
1022 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
1024 raise MarkupMismatch
1025 unescaped
= unescape(text
)
1026 return [nodes
.reference(unescape(text
, True), unescaped
, refuri
=ref
)]
1028 def implicit_inline(self
, text
, lineno
):
1030 Check each of the patterns in `self.implicit_dispatch` for a match,
1031 and dispatch to the stored method for the pattern. Recursively check
1032 the text before and after the match. Return a list of `nodes.Text`
1033 and inline element nodes.
1037 for pattern
, method
in self
.implicit_dispatch
:
1038 match
= pattern
.search(text
)
1041 # Must recurse on strings before *and* after the match;
1042 # there may be multiple patterns.
1043 return (self
.implicit_inline(text
[:match
.start()], lineno
)
1044 + method(match
, lineno
) +
1045 self
.implicit_inline(text
[match
.end():], lineno
))
1046 except MarkupMismatch
:
1048 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, True))]
1050 dispatch
= {'*': emphasis
,
1052 '`': interpreted_or_phrase_ref
,
1054 '_`': inline_internal_target
,
1055 ']_': footnote_reference
,
1056 '|': substitution_reference
,
1058 '__': anonymous_reference
}
1061 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1062 return ord(s
) - _zero
1064 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1065 return ord(s
) - _zero
1067 def _lowerroman_to_int(s
):
1068 return roman
.fromRoman(s
.upper())
1071 class Body(RSTState
):
1074 Generic classifier of the first line of a block.
1077 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1078 """Padding character for East Asian double-width text."""
1081 """Enumerated list parsing information."""
1084 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1085 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1086 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1087 enum
.formats
= enum
.formatinfo
.keys()
1088 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1089 'lowerroman', 'upperroman'] # ORDERED!
1090 enum
.sequencepats
= {'arabic': '[0-9]+',
1091 'loweralpha': '[a-z]',
1092 'upperalpha': '[A-Z]',
1093 'lowerroman': '[ivxlcdm]+',
1094 'upperroman': '[IVXLCDM]+',}
1095 enum
.converters
= {'arabic': int,
1096 'loweralpha': _loweralpha_to_int
,
1097 'upperalpha': _upperalpha_to_int
,
1098 'lowerroman': _lowerroman_to_int
,
1099 'upperroman': roman
.fromRoman
}
1101 enum
.sequenceregexps
= {}
1102 for sequence
in enum
.sequences
:
1103 enum
.sequenceregexps
[sequence
] = re
.compile(
1104 enum
.sequencepats
[sequence
] + '$', re
.UNICODE
)
1106 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1107 """Matches the top (& bottom) of a full table)."""
1109 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1110 """Matches the top of a simple table."""
1112 simple_table_border_pat
= re
.compile('=+[ =]*$')
1113 """Matches the bottom & header bottom of a simple table."""
1116 """Fragments of patterns used by transitions."""
1118 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1119 pats
['alpha'] = '[a-zA-Z]'
1120 pats
['alphanum'] = '[a-zA-Z0-9]'
1121 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1122 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1123 '|%(upperroman)s|#)' % enum
.sequencepats
)
1124 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1125 # @@@ Loosen up the pattern? Allow Unicode?
1126 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1127 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1128 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1129 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1131 for format
in enum
.formats
:
1132 pats
[format
] = '(?P<%s>%s%s%s)' % (
1133 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1134 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1137 'bullet': u
'[-+*\u2022\u2023\u2043]( +|$)',
1138 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1139 'field_marker': r
':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1140 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1141 'doctest': r
'>>>( +|$)',
1142 'line_block': r
'\|( +|$)',
1143 'grid_table_top': grid_table_top_pat
,
1144 'simple_table_top': simple_table_top_pat
,
1145 'explicit_markup': r
'\.\.( +|$)',
1146 'anonymous': r
'__( +|$)',
1147 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1149 initial_transitions
= (
1163 def indent(self
, match
, context
, next_state
):
1165 indented
, indent
, line_offset
, blank_finish
= \
1166 self
.state_machine
.get_indented()
1167 elements
= self
.block_quote(indented
, line_offset
)
1168 self
.parent
+= elements
1169 if not blank_finish
:
1170 self
.parent
+= self
.unindent_warning('Block quote')
1171 return context
, next_state
, []
1173 def block_quote(self
, indented
, line_offset
):
1180 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1181 blockquote
= nodes
.block_quote()
1182 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1183 elements
.append(blockquote
)
1184 if attribution_lines
:
1185 attribution
, messages
= self
.parse_attribution(
1186 attribution_lines
, attribution_offset
)
1187 blockquote
+= attribution
1188 elements
+= messages
1189 line_offset
= new_line_offset
1190 while indented
and not indented
[0]:
1191 indented
= indented
[1:]
1195 # U+2014 is an em-dash:
1196 attribution_pattern
= re
.compile(u
'(---?(?!-)|\u2014) *(?=[^ \\n])',
1199 def split_attribution(self
, indented
, line_offset
):
1201 Check for a block quote attribution and split it off:
1203 * First line after a blank line must begin with a dash ("--", "---",
1204 em-dash; matches `self.attribution_pattern`).
1205 * Every line after that must have consistent indentation.
1206 * Attributions must be preceded by block quote content.
1208 Return a tuple of: (block quote content lines, content offset,
1209 attribution lines, attribution offset, remaining indented lines).
1212 nonblank_seen
= False
1213 for i
in range(len(indented
)):
1214 line
= indented
[i
].rstrip()
1216 if nonblank_seen
and blank
== i
- 1: # last line blank
1217 match
= self
.attribution_pattern
.match(line
)
1219 attribution_end
, indent
= self
.check_attribution(
1222 a_lines
= indented
[i
:attribution_end
]
1223 a_lines
.trim_left(match
.end(), end
=1)
1224 a_lines
.trim_left(indent
, start
=1)
1225 return (indented
[:i
], a_lines
,
1226 i
, indented
[attribution_end
:],
1227 line_offset
+ attribution_end
)
1228 nonblank_seen
= True
1232 return (indented
, None, None, None, None)
1234 def check_attribution(self
, indented
, attribution_start
):
1236 Check attribution shape.
1237 Return the index past the end of the attribution, and the indent.
1240 i
= attribution_start
+ 1
1241 for i
in range(attribution_start
+ 1, len(indented
)):
1242 line
= indented
[i
].rstrip()
1246 indent
= len(line
) - len(line
.lstrip())
1247 elif len(line
) - len(line
.lstrip()) != indent
:
1248 return None, None # bad shape; not an attribution
1250 # return index of line after last attribution line:
1252 return i
, (indent
or 0)
1254 def parse_attribution(self
, indented
, line_offset
):
1255 text
= '\n'.join(indented
).rstrip()
1256 lineno
= self
.state_machine
.abs_line_number() + line_offset
1257 textnodes
, messages
= self
.inline_text(text
, lineno
)
1258 node
= nodes
.attribution(text
, '', *textnodes
)
1259 node
.source
, node
.line
= self
.state_machine
.get_source_and_line(lineno
)
1260 return node
, messages
1262 def bullet(self
, match
, context
, next_state
):
1263 """Bullet list item."""
1264 bulletlist
= nodes
.bullet_list()
1266 bulletlist
.line
) = self
.state_machine
.get_source_and_line()
1267 self
.parent
+= bulletlist
1268 bulletlist
['bullet'] = match
.string
[0]
1269 i
, blank_finish
= self
.list_item(match
.end())
1271 offset
= self
.state_machine
.line_offset
+ 1 # next line
1272 new_line_offset
, blank_finish
= self
.nested_list_parse(
1273 self
.state_machine
.input_lines
[offset
:],
1274 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1275 node
=bulletlist
, initial_state
='BulletList',
1276 blank_finish
=blank_finish
)
1277 self
.goto_line(new_line_offset
)
1278 if not blank_finish
:
1279 self
.parent
+= self
.unindent_warning('Bullet list')
1280 return [], next_state
, []
1282 def list_item(self
, indent
):
1283 if self
.state_machine
.line
[indent
:]:
1284 indented
, line_offset
, blank_finish
= (
1285 self
.state_machine
.get_known_indented(indent
))
1287 indented
, indent
, line_offset
, blank_finish
= (
1288 self
.state_machine
.get_first_known_indented(indent
))
1289 listitem
= nodes
.list_item('\n'.join(indented
))
1291 self
.nested_parse(indented
, input_offset
=line_offset
,
1293 return listitem
, blank_finish
1295 def enumerator(self
, match
, context
, next_state
):
1296 """Enumerated List Item"""
1297 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1298 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1299 raise statemachine
.TransitionCorrection('text')
1300 enumlist
= nodes
.enumerated_list()
1301 self
.parent
+= enumlist
1303 enumlist
['enumtype'] = 'arabic'
1305 enumlist
['enumtype'] = sequence
1306 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1307 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1309 enumlist
['start'] = ordinal
1310 msg
= self
.reporter
.info(
1311 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1314 listitem
, blank_finish
= self
.list_item(match
.end())
1315 enumlist
+= listitem
1316 offset
= self
.state_machine
.line_offset
+ 1 # next line
1317 newline_offset
, blank_finish
= self
.nested_list_parse(
1318 self
.state_machine
.input_lines
[offset
:],
1319 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1320 node
=enumlist
, initial_state
='EnumeratedList',
1321 blank_finish
=blank_finish
,
1322 extra_settings
={'lastordinal': ordinal
,
1324 'auto': sequence
== '#'})
1325 self
.goto_line(newline_offset
)
1326 if not blank_finish
:
1327 self
.parent
+= self
.unindent_warning('Enumerated list')
1328 return [], next_state
, []
1330 def parse_enumerator(self
, match
, expected_sequence
=None):
1332 Analyze an enumerator and return the results.
1335 - the enumerator format ('period', 'parens', or 'rparen'),
1336 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1337 - the text of the enumerator, stripped of formatting, and
1338 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1339 ``None`` is returned for invalid enumerator text).
1341 The enumerator format has already been determined by the regular
1342 expression match. If `expected_sequence` is given, that sequence is
1343 tried first. If not, we check for Roman numeral 1. This way,
1344 single-character Roman numerals (which are also alphabetical) can be
1345 matched. If no sequence has been matched, all sequences are checked in
1348 groupdict
= match
.groupdict()
1350 for format
in self
.enum
.formats
:
1351 if groupdict
[format
]: # was this the format matched?
1352 break # yes; keep `format`
1353 else: # shouldn't happen
1354 raise ParserError('enumerator format not matched')
1355 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1356 :self
.enum
.formatinfo
[format
].end
]
1359 elif expected_sequence
:
1361 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1362 sequence
= expected_sequence
1363 except KeyError: # shouldn't happen
1364 raise ParserError('unknown enumerator sequence: %s'
1367 sequence
= 'lowerroman'
1369 sequence
= 'upperroman'
1371 for sequence
in self
.enum
.sequences
:
1372 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1374 else: # shouldn't happen
1375 raise ParserError('enumerator sequence not matched')
1380 ordinal
= self
.enum
.converters
[sequence
](text
)
1381 except roman
.InvalidRomanNumeralError
:
1383 return format
, sequence
, text
, ordinal
1385 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1387 Check validity based on the ordinal value and the second line.
1389 Return true if the ordinal is valid and the second line is blank,
1390 indented, or starts with the next enumerator or an auto-enumerator.
1395 next_line
= self
.state_machine
.next_line()
1396 except EOFError: # end of input lines
1397 self
.state_machine
.previous_line()
1400 self
.state_machine
.previous_line()
1401 if not next_line
[:1].strip(): # blank or indented
1403 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1405 next_enumerator
, auto_enumerator
= result
1407 if ( next_line
.startswith(next_enumerator
) or
1408 next_line
.startswith(auto_enumerator
) ):
1414 def make_enumerator(self
, ordinal
, sequence
, format
):
1416 Construct and return the next enumerated list item marker, and an
1417 auto-enumerator ("#" instead of the regular enumerator).
1419 Return ``None`` for invalid (out of range) ordinals.
1423 elif sequence
== 'arabic':
1424 enumerator
= str(ordinal
)
1426 if sequence
.endswith('alpha'):
1429 enumerator
= chr(ordinal
+ ord('a') - 1)
1430 elif sequence
.endswith('roman'):
1432 enumerator
= roman
.toRoman(ordinal
)
1433 except roman
.RomanError
:
1435 else: # shouldn't happen
1436 raise ParserError('unknown enumerator sequence: "%s"'
1438 if sequence
.startswith('lower'):
1439 enumerator
= enumerator
.lower()
1440 elif sequence
.startswith('upper'):
1441 enumerator
= enumerator
.upper()
1442 else: # shouldn't happen
1443 raise ParserError('unknown enumerator sequence: "%s"'
1445 formatinfo
= self
.enum
.formatinfo
[format
]
1446 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1448 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1449 return next_enumerator
, auto_enumerator
1451 def field_marker(self
, match
, context
, next_state
):
1452 """Field list item."""
1453 field_list
= nodes
.field_list()
1454 self
.parent
+= field_list
1455 field
, blank_finish
= self
.field(match
)
1457 offset
= self
.state_machine
.line_offset
+ 1 # next line
1458 newline_offset
, blank_finish
= self
.nested_list_parse(
1459 self
.state_machine
.input_lines
[offset
:],
1460 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1461 node
=field_list
, initial_state
='FieldList',
1462 blank_finish
=blank_finish
)
1463 self
.goto_line(newline_offset
)
1464 if not blank_finish
:
1465 self
.parent
+= self
.unindent_warning('Field list')
1466 return [], next_state
, []
1468 def field(self
, match
):
1469 name
= self
.parse_field_marker(match
)
1470 src
, srcline
= self
.state_machine
.get_source_and_line()
1471 lineno
= self
.state_machine
.abs_line_number()
1472 indented
, indent
, line_offset
, blank_finish
= \
1473 self
.state_machine
.get_first_known_indented(match
.end())
1474 field_node
= nodes
.field()
1475 field_node
.source
= src
1476 field_node
.line
= srcline
1477 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1478 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1479 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1480 field_node
+= field_body
1482 self
.parse_field_body(indented
, line_offset
, field_body
)
1483 return field_node
, blank_finish
1485 def parse_field_marker(self
, match
):
1486 """Extract & return field name from a field marker match."""
1487 field
= match
.group()[1:] # strip off leading ':'
1488 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1491 def parse_field_body(self
, indented
, offset
, node
):
1492 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1494 def option_marker(self
, match
, context
, next_state
):
1495 """Option list item."""
1496 optionlist
= nodes
.option_list()
1497 (optionlist
.source
, optionlist
.line
) = self
.state_machine
.get_source_and_line()
1499 listitem
, blank_finish
= self
.option_list_item(match
)
1500 except MarkupError
, error
:
1501 # This shouldn't happen; pattern won't match.
1502 msg
= self
.reporter
.error(u
'Invalid option list marker: %s' %
1505 indented
, indent
, line_offset
, blank_finish
= \
1506 self
.state_machine
.get_first_known_indented(match
.end())
1507 elements
= self
.block_quote(indented
, line_offset
)
1508 self
.parent
+= elements
1509 if not blank_finish
:
1510 self
.parent
+= self
.unindent_warning('Option list')
1511 return [], next_state
, []
1512 self
.parent
+= optionlist
1513 optionlist
+= listitem
1514 offset
= self
.state_machine
.line_offset
+ 1 # next line
1515 newline_offset
, blank_finish
= self
.nested_list_parse(
1516 self
.state_machine
.input_lines
[offset
:],
1517 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1518 node
=optionlist
, initial_state
='OptionList',
1519 blank_finish
=blank_finish
)
1520 self
.goto_line(newline_offset
)
1521 if not blank_finish
:
1522 self
.parent
+= self
.unindent_warning('Option list')
1523 return [], next_state
, []
1525 def option_list_item(self
, match
):
1526 offset
= self
.state_machine
.abs_line_offset()
1527 options
= self
.parse_option_marker(match
)
1528 indented
, indent
, line_offset
, blank_finish
= \
1529 self
.state_machine
.get_first_known_indented(match
.end())
1530 if not indented
: # not an option list item
1531 self
.goto_line(offset
)
1532 raise statemachine
.TransitionCorrection('text')
1533 option_group
= nodes
.option_group('', *options
)
1534 description
= nodes
.description('\n'.join(indented
))
1535 option_list_item
= nodes
.option_list_item('', option_group
,
1538 self
.nested_parse(indented
, input_offset
=line_offset
,
1540 return option_list_item
, blank_finish
1542 def parse_option_marker(self
, match
):
1544 Return a list of `node.option` and `node.option_argument` objects,
1545 parsed from an option marker match.
1547 :Exception: `MarkupError` for invalid option markers.
1550 optionstrings
= match
.group().rstrip().split(', ')
1551 for optionstring
in optionstrings
:
1552 tokens
= optionstring
.split()
1554 firstopt
= tokens
[0].split('=', 1)
1555 if len(firstopt
) > 1:
1556 # "--opt=value" form
1557 tokens
[:1] = firstopt
1559 elif (len(tokens
[0]) > 2
1560 and ((tokens
[0].startswith('-')
1561 and not tokens
[0].startswith('--'))
1562 or tokens
[0].startswith('+'))):
1564 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1566 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1567 and tokens
[-1].endswith('>')):
1568 # "-o <value1 value2>" form; join all values into one token
1569 tokens
[1:] = [' '.join(tokens
[1:])]
1570 if 0 < len(tokens
) <= 2:
1571 option
= nodes
.option(optionstring
)
1572 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1574 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1575 delimiter
=delimiter
)
1576 optlist
.append(option
)
1579 'wrong number of option tokens (=%s), should be 1 or 2: '
1580 '"%s"' % (len(tokens
), optionstring
))
1583 def doctest(self
, match
, context
, next_state
):
1584 data
= '\n'.join(self
.state_machine
.get_text_block())
1585 # TODO: prepend class value ['pycon'] (Python Console)
1586 # parse with `directives.body.CodeBlock` (returns literal-block
1587 # with class "code" and syntax highlight markup).
1588 self
.parent
+= nodes
.doctest_block(data
, data
)
1589 return [], next_state
, []
1591 def line_block(self
, match
, context
, next_state
):
1592 """First line of a line block."""
1593 block
= nodes
.line_block()
1594 self
.parent
+= block
1595 lineno
= self
.state_machine
.abs_line_number()
1596 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1598 self
.parent
+= messages
1599 if not blank_finish
:
1600 offset
= self
.state_machine
.line_offset
+ 1 # next line
1601 new_line_offset
, blank_finish
= self
.nested_list_parse(
1602 self
.state_machine
.input_lines
[offset
:],
1603 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1604 node
=block
, initial_state
='LineBlock',
1606 self
.goto_line(new_line_offset
)
1607 if not blank_finish
:
1608 self
.parent
+= self
.reporter
.warning(
1609 'Line block ends without a blank line.',
1612 if block
[0].indent
is None:
1614 self
.nest_line_block_lines(block
)
1615 return [], next_state
, []
1617 def line_block_line(self
, match
, lineno
):
1618 """Return one line element of a line_block."""
1619 indented
, indent
, line_offset
, blank_finish
= \
1620 self
.state_machine
.get_first_known_indented(match
.end(),
1622 text
= u
'\n'.join(indented
)
1623 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1624 line
= nodes
.line(text
, '', *text_nodes
)
1625 if match
.string
.rstrip() != '|': # not empty
1626 line
.indent
= len(match
.group(1)) - 1
1627 return line
, messages
, blank_finish
1629 def nest_line_block_lines(self
, block
):
1630 for index
in range(1, len(block
)):
1631 if getattr(block
[index
], 'indent', None) is None:
1632 block
[index
].indent
= block
[index
- 1].indent
1633 self
.nest_line_block_segment(block
)
1635 def nest_line_block_segment(self
, block
):
1636 indents
= [item
.indent
for item
in block
]
1637 least
= min(indents
)
1639 new_block
= nodes
.line_block()
1641 if item
.indent
> least
:
1642 new_block
.append(item
)
1645 self
.nest_line_block_segment(new_block
)
1646 new_items
.append(new_block
)
1647 new_block
= nodes
.line_block()
1648 new_items
.append(item
)
1650 self
.nest_line_block_segment(new_block
)
1651 new_items
.append(new_block
)
1652 block
[:] = new_items
1654 def grid_table_top(self
, match
, context
, next_state
):
1655 """Top border of a full table."""
1656 return self
.table_top(match
, context
, next_state
,
1657 self
.isolate_grid_table
,
1658 tableparser
.GridTableParser
)
1660 def simple_table_top(self
, match
, context
, next_state
):
1661 """Top border of a simple table."""
1662 return self
.table_top(match
, context
, next_state
,
1663 self
.isolate_simple_table
,
1664 tableparser
.SimpleTableParser
)
1666 def table_top(self
, match
, context
, next_state
,
1667 isolate_function
, parser_class
):
1668 """Top border of a generic table."""
1669 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1670 self
.parent
+= nodelist
1671 if not blank_finish
:
1672 msg
= self
.reporter
.warning(
1673 'Blank line required after table.',
1674 line
=self
.state_machine
.abs_line_number()+1)
1676 return [], next_state
, []
1678 def table(self
, isolate_function
, parser_class
):
1679 """Parse a table."""
1680 block
, messages
, blank_finish
= isolate_function()
1683 parser
= parser_class()
1684 tabledata
= parser
.parse(block
)
1685 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1687 table
= self
.build_table(tabledata
, tableline
)
1688 nodelist
= [table
] + messages
1689 except tableparser
.TableMarkupError
, err
:
1690 nodelist
= self
.malformed_table(block
, ' '.join(err
.args
),
1691 offset
=err
.offset
) + messages
1694 return nodelist
, blank_finish
1696 def isolate_grid_table(self
):
1700 block
= self
.state_machine
.get_text_block(flush_left
=True)
1701 except statemachine
.UnexpectedIndentationError
, err
:
1702 block
, src
, srcline
= err
.args
1703 messages
.append(self
.reporter
.error('Unexpected indentation.',
1704 source
=src
, line
=srcline
))
1707 # for East Asian chars:
1708 block
.pad_double_width(self
.double_width_pad_char
)
1709 width
= len(block
[0].strip())
1710 for i
in range(len(block
)):
1711 block
[i
] = block
[i
].strip()
1712 if block
[i
][0] not in '+|': # check left edge
1714 self
.state_machine
.previous_line(len(block
) - i
)
1717 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1719 # from second-last to third line of table:
1720 for i
in range(len(block
) - 2, 1, -1):
1721 if self
.grid_table_top_pat
.match(block
[i
]):
1722 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1726 messages
.extend(self
.malformed_table(block
))
1727 return [], messages
, blank_finish
1728 for i
in range(len(block
)): # check right edge
1729 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1730 messages
.extend(self
.malformed_table(block
))
1731 return [], messages
, blank_finish
1732 return block
, messages
, blank_finish
1734 def isolate_simple_table(self
):
1735 start
= self
.state_machine
.line_offset
1736 lines
= self
.state_machine
.input_lines
1737 limit
= len(lines
) - 1
1738 toplen
= len(lines
[start
].strip())
1739 pattern_match
= self
.simple_table_border_pat
.match
1745 match
= pattern_match(line
)
1747 if len(line
.strip()) != toplen
:
1748 self
.state_machine
.next_line(i
- start
)
1749 messages
= self
.malformed_table(
1750 lines
[start
:i
+1], 'Bottom/header table border does '
1751 'not match top border.')
1752 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1755 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1759 else: # reached end of input_lines
1761 extra
= ' or no blank line after table bottom'
1762 self
.state_machine
.next_line(found_at
- start
)
1763 block
= lines
[start
:found_at
+1]
1766 self
.state_machine
.next_line(i
- start
- 1)
1767 block
= lines
[start
:]
1768 messages
= self
.malformed_table(
1769 block
, 'No bottom table border found%s.' % extra
)
1770 return [], messages
, not extra
1771 self
.state_machine
.next_line(end
- start
)
1772 block
= lines
[start
:end
+1]
1773 # for East Asian chars:
1774 block
.pad_double_width(self
.double_width_pad_char
)
1775 return block
, [], end
== limit
or not lines
[end
+1].strip()
1777 def malformed_table(self
, block
, detail
='', offset
=0):
1778 block
.replace(self
.double_width_pad_char
, '')
1779 data
= '\n'.join(block
)
1780 message
= 'Malformed table.'
1781 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1783 message
+= '\n' + detail
1784 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1785 line
=startline
+offset
)
1788 def build_table(self
, tabledata
, tableline
, stub_columns
=0, widths
=None):
1789 colwidths
, headrows
, bodyrows
= tabledata
1790 table
= nodes
.table()
1791 if widths
== 'auto':
1792 table
['classes'] += ['colwidths-auto']
1793 elif widths
: # "grid" or list of integers
1794 table
['classes'] += ['colwidths-given']
1795 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1797 for colwidth
in colwidths
:
1798 colspec
= nodes
.colspec(colwidth
=colwidth
)
1800 colspec
.attributes
['stub'] = 1
1804 thead
= nodes
.thead()
1806 for row
in headrows
:
1807 thead
+= self
.build_table_row(row
, tableline
)
1808 tbody
= nodes
.tbody()
1810 for row
in bodyrows
:
1811 tbody
+= self
.build_table_row(row
, tableline
)
1814 def build_table_row(self
, rowdata
, tableline
):
1816 for cell
in rowdata
:
1819 morerows
, morecols
, offset
, cellblock
= cell
1822 attributes
['morerows'] = morerows
1824 attributes
['morecols'] = morecols
1825 entry
= nodes
.entry(**attributes
)
1827 if ''.join(cellblock
):
1828 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1834 """Patterns and constants used for explicit markup recognition."""
1836 explicit
.patterns
= Struct(
1837 target
=re
.compile(r
"""
1839 _ # anonymous target
1841 (?!_) # no underscore at the beginning
1842 (?P<quote>`?) # optional open quote
1843 (?![ `]) # first char. not space or
1845 (?P<name> # reference name
1848 %(non_whitespace_escape_before)s
1849 (?P=quote) # close quote if open quote used
1851 (?<!(?<!\x00):) # no unescaped colon at end
1852 %(non_whitespace_escape_before)s
1853 [ ]? # optional space
1854 : # end of reference name
1855 ([ ]+|$) # followed by whitespace
1856 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1857 reference
=re
.compile(r
"""
1859 (?P<simple>%(simplename)s)_
1863 (?P<phrase>.+?) # hyperlink phrase
1864 %(non_whitespace_escape_before)s
1865 `_ # close backquote,
1869 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1870 substitution
=re
.compile(r
"""
1872 (?![ ]) # first char. not space
1873 (?P<name>.+?) # substitution text
1874 %(non_whitespace_escape_before)s
1875 \| # close delimiter
1877 ([ ]+|$) # followed by whitespace
1878 """ % vars(Inliner
),
1879 re
.VERBOSE | re
.UNICODE
),)
1881 def footnote(self
, match
):
1882 src
, srcline
= self
.state_machine
.get_source_and_line()
1883 indented
, indent
, offset
, blank_finish
= \
1884 self
.state_machine
.get_first_known_indented(match
.end())
1885 label
= match
.group(1)
1886 name
= normalize_name(label
)
1887 footnote
= nodes
.footnote('\n'.join(indented
))
1888 footnote
.source
= src
1889 footnote
.line
= srcline
1890 if name
[0] == '#': # auto-numbered
1891 name
= name
[1:] # autonumber label
1892 footnote
['auto'] = 1
1894 footnote
['names'].append(name
)
1895 self
.document
.note_autofootnote(footnote
)
1896 elif name
== '*': # auto-symbol
1898 footnote
['auto'] = '*'
1899 self
.document
.note_symbol_footnote(footnote
)
1900 else: # manually numbered
1901 footnote
+= nodes
.label('', label
)
1902 footnote
['names'].append(name
)
1903 self
.document
.note_footnote(footnote
)
1905 self
.document
.note_explicit_target(footnote
, footnote
)
1907 self
.document
.set_id(footnote
, footnote
)
1909 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1910 return [footnote
], blank_finish
1912 def citation(self
, match
):
1913 src
, srcline
= self
.state_machine
.get_source_and_line()
1914 indented
, indent
, offset
, blank_finish
= \
1915 self
.state_machine
.get_first_known_indented(match
.end())
1916 label
= match
.group(1)
1917 name
= normalize_name(label
)
1918 citation
= nodes
.citation('\n'.join(indented
))
1919 citation
.source
= src
1920 citation
.line
= srcline
1921 citation
+= nodes
.label('', label
)
1922 citation
['names'].append(name
)
1923 self
.document
.note_citation(citation
)
1924 self
.document
.note_explicit_target(citation
, citation
)
1926 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1927 return [citation
], blank_finish
1929 def hyperlink_target(self
, match
):
1930 pattern
= self
.explicit
.patterns
.target
1931 lineno
= self
.state_machine
.abs_line_number()
1932 block
, indent
, offset
, blank_finish
= \
1933 self
.state_machine
.get_first_known_indented(
1934 match
.end(), until_blank
=True, strip_indent
=False)
1935 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1936 block
= [escape2null(line
) for line
in block
]
1940 targetmatch
= pattern
.match(escaped
)
1945 escaped
+= block
[blockindex
]
1947 raise MarkupError('malformed hyperlink target.')
1948 del block
[:blockindex
]
1949 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1950 target
= self
.make_target(block
, blocktext
, lineno
,
1951 targetmatch
.group('name'))
1952 return [target
], blank_finish
1954 def make_target(self
, block
, block_text
, lineno
, target_name
):
1955 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1956 if target_type
== 'refname':
1957 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1958 target
.indirect_reference_name
= data
1959 self
.add_target(target_name
, '', target
, lineno
)
1960 self
.document
.note_indirect_target(target
)
1962 elif target_type
== 'refuri':
1963 target
= nodes
.target(block_text
, '')
1964 self
.add_target(target_name
, data
, target
, lineno
)
1969 def parse_target(self
, block
, block_text
, lineno
):
1971 Determine the type of reference of a target.
1973 :Return: A 2-tuple, one of:
1975 - 'refname' and the indirect reference name
1976 - 'refuri' and the URI
1977 - 'malformed' and a system_message node
1979 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1980 reference
= ' '.join([line
.strip() for line
in block
])
1981 refname
= self
.is_reference(reference
)
1983 return 'refname', refname
1984 ref_parts
= split_escaped_whitespace(' '.join(block
))
1985 reference
= ' '.join(''.join(unescape(part
).split())
1986 for part
in ref_parts
)
1987 return 'refuri', reference
1989 def is_reference(self
, reference
):
1990 match
= self
.explicit
.patterns
.reference
.match(
1991 whitespace_normalize_name(reference
))
1994 return unescape(match
.group('simple') or match
.group('phrase'))
1996 def add_target(self
, targetname
, refuri
, target
, lineno
):
1997 target
.line
= lineno
1999 name
= normalize_name(unescape(targetname
))
2000 target
['names'].append(name
)
2002 uri
= self
.inliner
.adjust_uri(refuri
)
2004 target
['refuri'] = uri
2006 raise ApplicationError('problem with URI: %r' % refuri
)
2007 self
.document
.note_explicit_target(target
, self
.parent
)
2008 else: # anonymous target
2010 target
['refuri'] = refuri
2011 target
['anonymous'] = 1
2012 self
.document
.note_anonymous_target(target
)
2014 def substitution_def(self
, match
):
2015 pattern
= self
.explicit
.patterns
.substitution
2016 src
, srcline
= self
.state_machine
.get_source_and_line()
2017 block
, indent
, offset
, blank_finish
= \
2018 self
.state_machine
.get_first_known_indented(match
.end(),
2020 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
2022 escaped
= escape2null(block
[0].rstrip())
2025 subdefmatch
= pattern
.match(escaped
)
2030 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
2032 raise MarkupError('malformed substitution definition.')
2033 del block
[:blockindex
] # strip out the substitution marker
2034 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
2038 while block
and not block
[-1].strip():
2040 subname
= subdefmatch
.group('name')
2041 substitution_node
= nodes
.substitution_definition(blocktext
)
2042 substitution_node
.source
= src
2043 substitution_node
.line
= srcline
2045 msg
= self
.reporter
.warning(
2046 'Substitution definition "%s" missing contents.' % subname
,
2047 nodes
.literal_block(blocktext
, blocktext
),
2048 source
=src
, line
=srcline
)
2049 return [msg
], blank_finish
2050 block
[0] = block
[0].strip()
2051 substitution_node
['names'].append(
2052 nodes
.whitespace_normalize_name(subname
))
2053 new_abs_offset
, blank_finish
= self
.nested_list_parse(
2054 block
, input_offset
=offset
, node
=substitution_node
,
2055 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
2057 for node
in substitution_node
[:]:
2058 if not (isinstance(node
, nodes
.Inline
) or
2059 isinstance(node
, nodes
.Text
)):
2060 self
.parent
+= substitution_node
[i
]
2061 del substitution_node
[i
]
2064 for node
in substitution_node
.traverse(nodes
.Element
):
2065 if self
.disallowed_inside_substitution_definitions(node
):
2066 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2067 msg
= self
.reporter
.error(
2068 'Substitution definition contains illegal element:',
2069 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2070 source
=src
, line
=srcline
)
2071 return [msg
], blank_finish
2072 if len(substitution_node
) == 0:
2073 msg
= self
.reporter
.warning(
2074 'Substitution definition "%s" empty or invalid.' % subname
,
2075 nodes
.literal_block(blocktext
, blocktext
),
2076 source
=src
, line
=srcline
)
2077 return [msg
], blank_finish
2078 self
.document
.note_substitution_def(
2079 substitution_node
, subname
, self
.parent
)
2080 return [substitution_node
], blank_finish
2082 def disallowed_inside_substitution_definitions(self
, node
):
2084 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
2085 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
2090 def directive(self
, match
, **option_presets
):
2091 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2092 type_name
= match
.group(1)
2093 directive_class
, messages
= directives
.directive(
2094 type_name
, self
.memo
.language
, self
.document
)
2095 self
.parent
+= messages
2097 return self
.run_directive(
2098 directive_class
, match
, type_name
, option_presets
)
2100 return self
.unknown_directive(type_name
)
2102 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2104 Parse a directive then run its directive function.
2108 - `directive`: The class implementing the directive. Must be
2109 a subclass of `rst.Directive`.
2111 - `match`: A regular expression match object which matched the first
2112 line of the directive.
2114 - `type_name`: The directive name, as used in the source text.
2116 - `option_presets`: A dictionary of preset options, defaults for the
2117 directive options. Currently, only an "alt" option is passed by
2118 substitution definitions (value: the substitution name), which may
2119 be used by an embedded image directive.
2121 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2123 if isinstance(directive
, (FunctionType
, MethodType
)):
2124 from docutils
.parsers
.rst
import convert_directive_function
2125 directive
= convert_directive_function(directive
)
2126 lineno
= self
.state_machine
.abs_line_number()
2127 initial_line_offset
= self
.state_machine
.line_offset
2128 indented
, indent
, line_offset
, blank_finish \
2129 = self
.state_machine
.get_first_known_indented(match
.end(),
2131 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2132 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2134 arguments
, options
, content
, content_offset
= (
2135 self
.parse_directive_block(indented
, line_offset
,
2136 directive
, option_presets
))
2137 except MarkupError
, detail
:
2138 error
= self
.reporter
.error(
2139 'Error in "%s" directive:\n%s.' % (type_name
,
2140 ' '.join(detail
.args
)),
2141 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2142 return [error
], blank_finish
2143 directive_instance
= directive(
2144 type_name
, arguments
, options
, content
, lineno
,
2145 content_offset
, block_text
, self
, self
.state_machine
)
2147 result
= directive_instance
.run()
2148 except docutils
.parsers
.rst
.DirectiveError
, error
:
2149 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2151 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2153 assert isinstance(result
, list), \
2154 'Directive "%s" must return a list of nodes.' % type_name
2155 for i
in range(len(result
)):
2156 assert isinstance(result
[i
], nodes
.Node
), \
2157 ('Directive "%s" returned non-Node object (index %s): %r'
2158 % (type_name
, i
, result
[i
]))
2160 blank_finish
or self
.state_machine
.is_next_line_blank())
2162 def parse_directive_block(self
, indented
, line_offset
, directive
,
2164 option_spec
= directive
.option_spec
2165 has_content
= directive
.has_content
2166 if indented
and not indented
[0].strip():
2167 indented
.trim_start()
2169 while indented
and not indented
[-1].strip():
2171 if indented
and (directive
.required_arguments
2172 or directive
.optional_arguments
2174 for i
, line
in enumerate(indented
):
2175 if not line
.strip():
2179 arg_block
= indented
[:i
]
2180 content
= indented
[i
+1:]
2181 content_offset
= line_offset
+ i
+ 1
2184 content_offset
= line_offset
2187 options
, arg_block
= self
.parse_directive_options(
2188 option_presets
, option_spec
, arg_block
)
2191 if arg_block
and not (directive
.required_arguments
2192 or directive
.optional_arguments
):
2193 content
= arg_block
+ indented
[i
:]
2194 content_offset
= line_offset
2196 while content
and not content
[0].strip():
2197 content
.trim_start()
2199 if directive
.required_arguments
or directive
.optional_arguments
:
2200 arguments
= self
.parse_directive_arguments(
2201 directive
, arg_block
)
2204 if content
and not has_content
:
2205 raise MarkupError('no content permitted')
2206 return (arguments
, options
, content
, content_offset
)
2208 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2209 options
= option_presets
.copy()
2210 for i
, line
in enumerate(arg_block
):
2211 if re
.match(Body
.patterns
['field_marker'], line
):
2212 opt_block
= arg_block
[i
:]
2213 arg_block
= arg_block
[:i
]
2218 success
, data
= self
.parse_extension_options(option_spec
,
2220 if success
: # data is a dict of options
2221 options
.update(data
)
2222 else: # data is an error string
2223 raise MarkupError(data
)
2224 return options
, arg_block
2226 def parse_directive_arguments(self
, directive
, arg_block
):
2227 required
= directive
.required_arguments
2228 optional
= directive
.optional_arguments
2229 arg_text
= '\n'.join(arg_block
)
2230 arguments
= arg_text
.split()
2231 if len(arguments
) < required
:
2232 raise MarkupError('%s argument(s) required, %s supplied'
2233 % (required
, len(arguments
)))
2234 elif len(arguments
) > required
+ optional
:
2235 if directive
.final_argument_whitespace
:
2236 arguments
= arg_text
.split(None, required
+ optional
- 1)
2239 'maximum %s argument(s) allowed, %s supplied'
2240 % (required
+ optional
, len(arguments
)))
2243 def parse_extension_options(self
, option_spec
, datalines
):
2245 Parse `datalines` for a field list containing extension options
2246 matching `option_spec`.
2249 - `option_spec`: a mapping of option name to conversion
2250 function, which should raise an exception on bad input.
2251 - `datalines`: a list of input strings.
2254 - Success value, 1 or 0.
2255 - An option dictionary on success, an error string on failure.
2257 node
= nodes
.field_list()
2258 newline_offset
, blank_finish
= self
.nested_list_parse(
2259 datalines
, 0, node
, initial_state
='ExtensionOptions',
2261 if newline_offset
!= len(datalines
): # incomplete parse of block
2262 return 0, 'invalid option block'
2264 options
= utils
.extract_extension_options(node
, option_spec
)
2265 except KeyError, detail
:
2266 return 0, ('unknown option: "%s"' % detail
.args
[0])
2267 except (ValueError, TypeError), detail
:
2268 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2269 except utils
.ExtensionOptionError
, detail
:
2270 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2274 return 0, 'option data incompletely parsed'
2276 def unknown_directive(self
, type_name
):
2277 lineno
= self
.state_machine
.abs_line_number()
2278 indented
, indent
, offset
, blank_finish
= \
2279 self
.state_machine
.get_first_known_indented(0, strip_indent
=False)
2280 text
= '\n'.join(indented
)
2281 error
= self
.reporter
.error(
2282 'Unknown directive type "%s".' % type_name
,
2283 nodes
.literal_block(text
, text
), line
=lineno
)
2284 return [error
], blank_finish
2286 def comment(self
, match
):
2287 if not match
.string
[match
.end():].strip() \
2288 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2289 return [nodes
.comment()], 1 # "A tiny but practical wart."
2290 indented
, indent
, offset
, blank_finish
= \
2291 self
.state_machine
.get_first_known_indented(match
.end())
2292 while indented
and not indented
[-1].strip():
2294 text
= '\n'.join(indented
)
2295 return [nodes
.comment(text
, text
)], blank_finish
2297 explicit
.constructs
= [
2300 \.\.[ ]+ # explicit markup start
2303 [0-9]+ # manually numbered footnote
2305 \# # anonymous auto-numbered footnote
2307 \#%s # auto-number ed?) footnote label
2309 \* # auto-symbol footnote
2312 ([ ]+|$) # whitespace or end of line
2313 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2316 \.\.[ ]+ # explicit markup start
2317 \[(%s)\] # citation label
2318 ([ ]+|$) # whitespace or end of line
2319 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2322 \.\.[ ]+ # explicit markup start
2323 _ # target indicator
2324 (?![ ]|$) # first char. not space or EOL
2325 """, re
.VERBOSE | re
.UNICODE
)),
2328 \.\.[ ]+ # explicit markup start
2329 \| # substitution indicator
2330 (?![ ]|$) # first char. not space or EOL
2331 """, re
.VERBOSE | re
.UNICODE
)),
2334 \.\.[ ]+ # explicit markup start
2335 (%s) # directive name
2336 [ ]? # optional space
2337 :: # directive delimiter
2338 ([ ]+|$) # whitespace or end of line
2339 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2341 def explicit_markup(self
, match
, context
, next_state
):
2342 """Footnotes, hyperlink targets, directives, comments."""
2343 nodelist
, blank_finish
= self
.explicit_construct(match
)
2344 self
.parent
+= nodelist
2345 self
.explicit_list(blank_finish
)
2346 return [], next_state
, []
2348 def explicit_construct(self
, match
):
2349 """Determine which explicit construct this is, parse & return it."""
2351 for method
, pattern
in self
.explicit
.constructs
:
2352 expmatch
= pattern
.match(match
.string
)
2355 return method(self
, expmatch
)
2356 except MarkupError
, error
:
2357 lineno
= self
.state_machine
.abs_line_number()
2358 message
= ' '.join(error
.args
)
2359 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2361 nodelist
, blank_finish
= self
.comment(match
)
2362 return nodelist
+ errors
, blank_finish
2364 def explicit_list(self
, blank_finish
):
2366 Create a nested state machine for a series of explicit markup
2367 constructs (including anonymous hyperlink targets).
2369 offset
= self
.state_machine
.line_offset
+ 1 # next line
2370 newline_offset
, blank_finish
= self
.nested_list_parse(
2371 self
.state_machine
.input_lines
[offset
:],
2372 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2373 node
=self
.parent
, initial_state
='Explicit',
2374 blank_finish
=blank_finish
,
2375 match_titles
=self
.state_machine
.match_titles
)
2376 self
.goto_line(newline_offset
)
2377 if not blank_finish
:
2378 self
.parent
+= self
.unindent_warning('Explicit markup')
2380 def anonymous(self
, match
, context
, next_state
):
2381 """Anonymous hyperlink targets."""
2382 nodelist
, blank_finish
= self
.anonymous_target(match
)
2383 self
.parent
+= nodelist
2384 self
.explicit_list(blank_finish
)
2385 return [], next_state
, []
2387 def anonymous_target(self
, match
):
2388 lineno
= self
.state_machine
.abs_line_number()
2389 block
, indent
, offset
, blank_finish \
2390 = self
.state_machine
.get_first_known_indented(match
.end(),
2392 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2393 block
= [escape2null(line
) for line
in block
]
2394 target
= self
.make_target(block
, blocktext
, lineno
, '')
2395 return [target
], blank_finish
2397 def line(self
, match
, context
, next_state
):
2398 """Section title overline or transition marker."""
2399 if self
.state_machine
.match_titles
:
2400 return [match
.string
], 'Line', []
2401 elif match
.string
.strip() == '::':
2402 raise statemachine
.TransitionCorrection('text')
2403 elif len(match
.string
.strip()) < 4:
2404 msg
= self
.reporter
.info(
2405 'Unexpected possible title overline or transition.\n'
2406 "Treating it as ordinary text because it's so short.",
2407 line
=self
.state_machine
.abs_line_number())
2409 raise statemachine
.TransitionCorrection('text')
2411 blocktext
= self
.state_machine
.line
2412 msg
= self
.reporter
.severe(
2413 'Unexpected section title or transition.',
2414 nodes
.literal_block(blocktext
, blocktext
),
2415 line
=self
.state_machine
.abs_line_number())
2417 return [], next_state
, []
2419 def text(self
, match
, context
, next_state
):
2420 """Titles, definition lists, paragraphs."""
2421 return [match
.string
], 'Text', []
2424 class RFC2822Body(Body
):
2427 RFC2822 headers are only valid as the first constructs in documents. As
2428 soon as anything else appears, the `Body` state should take over.
2431 patterns
= Body
.patterns
.copy() # can't modify the original
2432 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2433 initial_transitions
= [(name
, 'Body')
2434 for name
in Body
.initial_transitions
]
2435 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2437 def rfc2822(self
, match
, context
, next_state
):
2438 """RFC2822-style field list item."""
2439 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2440 self
.parent
+= fieldlist
2441 field
, blank_finish
= self
.rfc2822_field(match
)
2443 offset
= self
.state_machine
.line_offset
+ 1 # next line
2444 newline_offset
, blank_finish
= self
.nested_list_parse(
2445 self
.state_machine
.input_lines
[offset
:],
2446 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2447 node
=fieldlist
, initial_state
='RFC2822List',
2448 blank_finish
=blank_finish
)
2449 self
.goto_line(newline_offset
)
2450 if not blank_finish
:
2451 self
.parent
+= self
.unindent_warning(
2452 'RFC2822-style field list')
2453 return [], next_state
, []
2455 def rfc2822_field(self
, match
):
2456 name
= match
.string
[:match
.string
.find(':')]
2457 indented
, indent
, line_offset
, blank_finish
= \
2458 self
.state_machine
.get_first_known_indented(match
.end(),
2460 fieldnode
= nodes
.field()
2461 fieldnode
+= nodes
.field_name(name
, name
)
2462 fieldbody
= nodes
.field_body('\n'.join(indented
))
2463 fieldnode
+= fieldbody
2465 self
.nested_parse(indented
, input_offset
=line_offset
,
2467 return fieldnode
, blank_finish
2470 class SpecializedBody(Body
):
2473 Superclass for second and subsequent compound element members. Compound
2474 elements are lists and list-like constructs.
2476 All transition methods are disabled (redefined as `invalid_input`).
2477 Override individual methods in subclasses to re-enable.
2479 For example, once an initial bullet list item, say, is recognized, the
2480 `BulletList` subclass takes over, with a "bullet_list" node as its
2481 container. Upon encountering the initial bullet list item, `Body.bullet`
2482 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2483 starts up a nested parsing session with `BulletList` as the initial state.
2484 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2485 as only bullet list items are encountered, they are parsed and inserted
2486 into the container. The first construct which is *not* a bullet list item
2487 triggers the `invalid_input` method, which ends the nested parse and
2488 closes the container. `BulletList` needs to recognize input that is
2489 invalid in the context of a bullet list, which means everything *other
2490 than* bullet list items, so it inherits the transition list created in
2494 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2495 """Not a compound element member. Abort this state machine."""
2496 self
.state_machine
.previous_line() # back up so parent SM can reassess
2499 indent
= invalid_input
2500 bullet
= invalid_input
2501 enumerator
= invalid_input
2502 field_marker
= invalid_input
2503 option_marker
= invalid_input
2504 doctest
= invalid_input
2505 line_block
= invalid_input
2506 grid_table_top
= invalid_input
2507 simple_table_top
= invalid_input
2508 explicit_markup
= invalid_input
2509 anonymous
= invalid_input
2510 line
= invalid_input
2511 text
= invalid_input
2514 class BulletList(SpecializedBody
):
2516 """Second and subsequent bullet_list list_items."""
2518 def bullet(self
, match
, context
, next_state
):
2519 """Bullet list item."""
2520 if match
.string
[0] != self
.parent
['bullet']:
2521 # different bullet: new list
2522 self
.invalid_input()
2523 listitem
, blank_finish
= self
.list_item(match
.end())
2524 self
.parent
+= listitem
2525 self
.blank_finish
= blank_finish
2526 return [], next_state
, []
2529 class DefinitionList(SpecializedBody
):
2531 """Second and subsequent definition_list_items."""
2533 def text(self
, match
, context
, next_state
):
2534 """Definition lists."""
2535 return [match
.string
], 'Definition', []
2538 class EnumeratedList(SpecializedBody
):
2540 """Second and subsequent enumerated_list list_items."""
2542 def enumerator(self
, match
, context
, next_state
):
2543 """Enumerated list item."""
2544 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2545 match
, self
.parent
['enumtype'])
2546 if ( format
!= self
.format
2547 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2549 or ordinal
!= (self
.lastordinal
+ 1)))
2550 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2551 # different enumeration: new list
2552 self
.invalid_input()
2555 listitem
, blank_finish
= self
.list_item(match
.end())
2556 self
.parent
+= listitem
2557 self
.blank_finish
= blank_finish
2558 self
.lastordinal
= ordinal
2559 return [], next_state
, []
2562 class FieldList(SpecializedBody
):
2564 """Second and subsequent field_list fields."""
2566 def field_marker(self
, match
, context
, next_state
):
2567 """Field list field."""
2568 field
, blank_finish
= self
.field(match
)
2569 self
.parent
+= field
2570 self
.blank_finish
= blank_finish
2571 return [], next_state
, []
2574 class OptionList(SpecializedBody
):
2576 """Second and subsequent option_list option_list_items."""
2578 def option_marker(self
, match
, context
, next_state
):
2579 """Option list item."""
2581 option_list_item
, blank_finish
= self
.option_list_item(match
)
2583 self
.invalid_input()
2584 self
.parent
+= option_list_item
2585 self
.blank_finish
= blank_finish
2586 return [], next_state
, []
2589 class RFC2822List(SpecializedBody
, RFC2822Body
):
2591 """Second and subsequent RFC2822-style field_list fields."""
2593 patterns
= RFC2822Body
.patterns
2594 initial_transitions
= RFC2822Body
.initial_transitions
2596 def rfc2822(self
, match
, context
, next_state
):
2597 """RFC2822-style field list item."""
2598 field
, blank_finish
= self
.rfc2822_field(match
)
2599 self
.parent
+= field
2600 self
.blank_finish
= blank_finish
2601 return [], 'RFC2822List', []
2603 blank
= SpecializedBody
.invalid_input
2606 class ExtensionOptions(FieldList
):
2609 Parse field_list fields for extension options.
2611 No nested parsing is done (including inline markup parsing).
2614 def parse_field_body(self
, indented
, offset
, node
):
2615 """Override `Body.parse_field_body` for simpler parsing."""
2617 for line
in list(indented
) + ['']:
2621 text
= '\n'.join(lines
)
2622 node
+= nodes
.paragraph(text
, text
)
2626 class LineBlock(SpecializedBody
):
2628 """Second and subsequent lines of a line_block."""
2630 blank
= SpecializedBody
.invalid_input
2632 def line_block(self
, match
, context
, next_state
):
2633 """New line of line block."""
2634 lineno
= self
.state_machine
.abs_line_number()
2635 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2637 self
.parent
.parent
+= messages
2638 self
.blank_finish
= blank_finish
2639 return [], next_state
, []
2642 class Explicit(SpecializedBody
):
2644 """Second and subsequent explicit markup construct."""
2646 def explicit_markup(self
, match
, context
, next_state
):
2647 """Footnotes, hyperlink targets, directives, comments."""
2648 nodelist
, blank_finish
= self
.explicit_construct(match
)
2649 self
.parent
+= nodelist
2650 self
.blank_finish
= blank_finish
2651 return [], next_state
, []
2653 def anonymous(self
, match
, context
, next_state
):
2654 """Anonymous hyperlink targets."""
2655 nodelist
, blank_finish
= self
.anonymous_target(match
)
2656 self
.parent
+= nodelist
2657 self
.blank_finish
= blank_finish
2658 return [], next_state
, []
2660 blank
= SpecializedBody
.invalid_input
2663 class SubstitutionDef(Body
):
2666 Parser for the contents of a substitution_definition element.
2670 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2671 % Inliner
.simplename
, re
.UNICODE
),
2673 initial_transitions
= ['embedded_directive', 'text']
2675 def embedded_directive(self
, match
, context
, next_state
):
2676 nodelist
, blank_finish
= self
.directive(match
,
2677 alt
=self
.parent
['names'][0])
2678 self
.parent
+= nodelist
2679 if not self
.state_machine
.at_eof():
2680 self
.blank_finish
= blank_finish
2683 def text(self
, match
, context
, next_state
):
2684 if not self
.state_machine
.at_eof():
2685 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2689 class Text(RSTState
):
2692 Classifier of second line of a text block.
2694 Could be a paragraph, a definition list item, or a title.
2697 patterns
= {'underline': Body
.patterns
['line'],
2699 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2701 def blank(self
, match
, context
, next_state
):
2702 """End of paragraph."""
2703 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2704 paragraph
, literalnext
= self
.paragraph(
2705 context
, self
.state_machine
.abs_line_number() - 1)
2706 self
.parent
+= paragraph
2708 self
.parent
+= self
.literal_block()
2709 return [], 'Body', []
2711 def eof(self
, context
):
2713 self
.blank(None, context
, None)
2716 def indent(self
, match
, context
, next_state
):
2717 """Definition list item."""
2718 definitionlist
= nodes
.definition_list()
2719 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2720 definitionlist
+= definitionlistitem
2721 self
.parent
+= definitionlist
2722 offset
= self
.state_machine
.line_offset
+ 1 # next line
2723 newline_offset
, blank_finish
= self
.nested_list_parse(
2724 self
.state_machine
.input_lines
[offset
:],
2725 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2726 node
=definitionlist
, initial_state
='DefinitionList',
2727 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2728 self
.goto_line(newline_offset
)
2729 if not blank_finish
:
2730 self
.parent
+= self
.unindent_warning('Definition list')
2731 return [], 'Body', []
2733 def underline(self
, match
, context
, next_state
):
2734 """Section title."""
2735 lineno
= self
.state_machine
.abs_line_number()
2736 title
= context
[0].rstrip()
2737 underline
= match
.string
.rstrip()
2738 source
= title
+ '\n' + underline
2740 if column_width(title
) > len(underline
):
2741 if len(underline
) < 4:
2742 if self
.state_machine
.match_titles
:
2743 msg
= self
.reporter
.info(
2744 'Possible title underline, too short for the title.\n'
2745 "Treating it as ordinary text because it's so short.",
2748 raise statemachine
.TransitionCorrection('text')
2750 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2751 msg
= self
.reporter
.warning('Title underline too short.',
2752 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2753 messages
.append(msg
)
2754 if not self
.state_machine
.match_titles
:
2755 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2756 # We need get_source_and_line() here to report correctly
2757 src
, srcline
= self
.state_machine
.get_source_and_line()
2758 # TODO: why is abs_line_number() == srcline+1
2759 # if the error is in a table (try with test_tables.py)?
2760 # print "get_source_and_line", srcline
2761 # print "abs_line_number", self.state_machine.abs_line_number()
2762 msg
= self
.reporter
.severe('Unexpected section title.',
2763 nodes
.literal_block(blocktext
, blocktext
),
2764 source
=src
, line
=srcline
)
2765 self
.parent
+= messages
2767 return [], next_state
, []
2768 style
= underline
[0]
2770 self
.section(title
, source
, style
, lineno
- 1, messages
)
2771 return [], next_state
, []
2773 def text(self
, match
, context
, next_state
):
2775 startline
= self
.state_machine
.abs_line_number() - 1
2778 block
= self
.state_machine
.get_text_block(flush_left
=True)
2779 except statemachine
.UnexpectedIndentationError
, err
:
2780 block
, src
, srcline
= err
.args
2781 msg
= self
.reporter
.error('Unexpected indentation.',
2782 source
=src
, line
=srcline
)
2783 lines
= context
+ list(block
)
2784 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2785 self
.parent
+= paragraph
2789 self
.state_machine
.next_line()
2792 self
.parent
+= self
.literal_block()
2793 return [], next_state
, []
2795 def literal_block(self
):
2796 """Return a list of nodes."""
2797 indented
, indent
, offset
, blank_finish
= \
2798 self
.state_machine
.get_indented()
2799 while indented
and not indented
[-1].strip():
2802 return self
.quoted_literal_block()
2803 data
= '\n'.join(indented
)
2804 literal_block
= nodes
.literal_block(data
, data
)
2805 literal_block
.line
= offset
+ 1
2806 nodelist
= [literal_block
]
2807 if not blank_finish
:
2808 nodelist
.append(self
.unindent_warning('Literal block'))
2811 def quoted_literal_block(self
):
2812 abs_line_offset
= self
.state_machine
.abs_line_offset()
2813 offset
= self
.state_machine
.line_offset
2814 parent_node
= nodes
.Element()
2815 new_abs_offset
= self
.nested_parse(
2816 self
.state_machine
.input_lines
[offset
:],
2817 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=False,
2818 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2819 'initial_state': 'QuotedLiteralBlock'})
2820 self
.goto_line(new_abs_offset
)
2821 return parent_node
.children
2823 def definition_list_item(self
, termline
):
2824 indented
, indent
, line_offset
, blank_finish
= \
2825 self
.state_machine
.get_indented()
2826 itemnode
= nodes
.definition_list_item(
2827 '\n'.join(termline
+ list(indented
)))
2828 lineno
= self
.state_machine
.abs_line_number() - 1
2830 itemnode
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2831 termlist
, messages
= self
.term(termline
, lineno
)
2832 itemnode
+= termlist
2833 definition
= nodes
.definition('', *messages
)
2834 itemnode
+= definition
2835 if termline
[0][-2:] == '::':
2836 definition
+= self
.reporter
.info(
2837 'Blank line missing before literal block (after the "::")? '
2838 'Interpreted as a definition list item.',
2840 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2841 return itemnode
, blank_finish
2843 classifier_delimiter
= re
.compile(' +: +')
2845 def term(self
, lines
, lineno
):
2846 """Return a definition_list's term and optional classifiers."""
2847 assert len(lines
) == 1
2848 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2849 term_node
= nodes
.term(lines
[0])
2851 term_node
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2852 node_list
= [term_node
]
2853 for i
in range(len(text_nodes
)):
2854 node
= text_nodes
[i
]
2855 if isinstance(node
, nodes
.Text
):
2856 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2858 node_list
[-1] += node
2860 rawtext
= parts
[0].rstrip()
2861 textnode
= nodes
.Text(utils
.unescape_rawsource(rawtext
))
2862 textnode
.rawsource
= rawtext
2863 node_list
[-1] += textnode
2864 for part
in parts
[1:]:
2865 classifier_node
= nodes
.classifier(part
,
2866 utils
.unescape_rawsource(part
))
2867 classifier_node
[0].rawsource
= part
2868 node_list
.append(classifier_node
)
2870 node_list
[-1] += node
2871 return node_list
, messages
2874 class SpecializedText(Text
):
2877 Superclass for second and subsequent lines of Text-variants.
2879 All transition methods are disabled. Override individual methods in
2880 subclasses to re-enable.
2883 def eof(self
, context
):
2884 """Incomplete construct."""
2887 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2888 """Not a compound element member. Abort this state machine."""
2891 blank
= invalid_input
2892 indent
= invalid_input
2893 underline
= invalid_input
2894 text
= invalid_input
2897 class Definition(SpecializedText
):
2899 """Second line of potential definition_list_item."""
2901 def eof(self
, context
):
2902 """Not a definition."""
2903 self
.state_machine
.previous_line(2) # so parent SM can reassess
2906 def indent(self
, match
, context
, next_state
):
2907 """Definition list item."""
2908 itemnode
, blank_finish
= self
.definition_list_item(context
)
2909 self
.parent
+= itemnode
2910 self
.blank_finish
= blank_finish
2911 return [], 'DefinitionList', []
2914 class Line(SpecializedText
):
2917 Second line of over- & underlined section title or transition marker.
2920 eofcheck
= 1 # @@@ ???
2921 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2923 def eof(self
, context
):
2924 """Transition marker at end of section or document."""
2925 marker
= context
[0].strip()
2926 if self
.memo
.section_bubble_up_kludge
:
2927 self
.memo
.section_bubble_up_kludge
= False
2928 elif len(marker
) < 4:
2929 self
.state_correction(context
)
2930 if self
.eofcheck
: # ignore EOFError with sections
2931 lineno
= self
.state_machine
.abs_line_number() - 1
2932 transition
= nodes
.transition(rawsource
=context
[0])
2933 transition
.line
= lineno
2934 self
.parent
+= transition
2938 def blank(self
, match
, context
, next_state
):
2939 """Transition marker."""
2940 src
, srcline
= self
.state_machine
.get_source_and_line()
2941 marker
= context
[0].strip()
2943 self
.state_correction(context
)
2944 transition
= nodes
.transition(rawsource
=marker
)
2945 transition
.source
= src
2946 transition
.line
= srcline
- 1
2947 self
.parent
+= transition
2948 return [], 'Body', []
2950 def text(self
, match
, context
, next_state
):
2951 """Potential over- & underlined title."""
2952 lineno
= self
.state_machine
.abs_line_number() - 1
2953 overline
= context
[0]
2954 title
= match
.string
2957 underline
= self
.state_machine
.next_line()
2959 blocktext
= overline
+ '\n' + title
2960 if len(overline
.rstrip()) < 4:
2961 self
.short_overline(context
, blocktext
, lineno
, 2)
2963 msg
= self
.reporter
.severe(
2964 'Incomplete section title.',
2965 nodes
.literal_block(blocktext
, blocktext
),
2968 return [], 'Body', []
2969 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2970 overline
= overline
.rstrip()
2971 underline
= underline
.rstrip()
2972 if not self
.transitions
['underline'][0].match(underline
):
2973 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2974 if len(overline
.rstrip()) < 4:
2975 self
.short_overline(context
, blocktext
, lineno
, 2)
2977 msg
= self
.reporter
.severe(
2978 'Missing matching underline for section title overline.',
2979 nodes
.literal_block(source
, source
),
2982 return [], 'Body', []
2983 elif overline
!= underline
:
2984 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2985 if len(overline
.rstrip()) < 4:
2986 self
.short_overline(context
, blocktext
, lineno
, 2)
2988 msg
= self
.reporter
.severe(
2989 'Title overline & underline mismatch.',
2990 nodes
.literal_block(source
, source
),
2993 return [], 'Body', []
2994 title
= title
.rstrip()
2996 if column_width(title
) > len(overline
):
2997 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2998 if len(overline
.rstrip()) < 4:
2999 self
.short_overline(context
, blocktext
, lineno
, 2)
3001 msg
= self
.reporter
.warning(
3002 'Title overline too short.',
3003 nodes
.literal_block(source
, source
),
3005 messages
.append(msg
)
3006 style
= (overline
[0], underline
[0])
3007 self
.eofcheck
= 0 # @@@ not sure this is correct
3008 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
3010 return [], 'Body', []
3012 indent
= text
# indented title
3014 def underline(self
, match
, context
, next_state
):
3015 overline
= context
[0]
3016 blocktext
= overline
+ '\n' + self
.state_machine
.line
3017 lineno
= self
.state_machine
.abs_line_number() - 1
3018 if len(overline
.rstrip()) < 4:
3019 self
.short_overline(context
, blocktext
, lineno
, 1)
3020 msg
= self
.reporter
.error(
3021 'Invalid section title or transition marker.',
3022 nodes
.literal_block(blocktext
, blocktext
),
3025 return [], 'Body', []
3027 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
3028 msg
= self
.reporter
.info(
3029 'Possible incomplete section title.\nTreating the overline as '
3030 "ordinary text because it's so short.",
3033 self
.state_correction(context
, lines
)
3035 def state_correction(self
, context
, lines
=1):
3036 self
.state_machine
.previous_line(lines
)
3038 raise statemachine
.StateCorrection('Body', 'text')
3041 class QuotedLiteralBlock(RSTState
):
3044 Nested parse handler for quoted (unindented) literal blocks.
3046 Special-purpose. Not for inclusion in `state_classes`.
3049 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
3051 initial_transitions
= ('initial_quoted', 'text')
3053 def __init__(self
, state_machine
, debug
=False):
3054 RSTState
.__init
__(self
, state_machine
, debug
)
3056 self
.initial_lineno
= None
3058 def blank(self
, match
, context
, next_state
):
3062 return context
, next_state
, []
3064 def eof(self
, context
):
3066 src
, srcline
= self
.state_machine
.get_source_and_line(
3067 self
.initial_lineno
)
3068 text
= '\n'.join(context
)
3069 literal_block
= nodes
.literal_block(text
, text
)
3070 literal_block
.source
= src
3071 literal_block
.line
= srcline
3072 self
.parent
+= literal_block
3074 self
.parent
+= self
.reporter
.warning(
3075 'Literal block expected; none found.',
3076 line
=self
.state_machine
.abs_line_number())
3077 # src not available, because statemachine.input_lines is empty
3078 self
.state_machine
.previous_line()
3079 self
.parent
+= self
.messages
3082 def indent(self
, match
, context
, next_state
):
3083 assert context
, ('QuotedLiteralBlock.indent: context should not '
3085 self
.messages
.append(
3086 self
.reporter
.error('Unexpected indentation.',
3087 line
=self
.state_machine
.abs_line_number()))
3088 self
.state_machine
.previous_line()
3091 def initial_quoted(self
, match
, context
, next_state
):
3092 """Match arbitrary quote character on the first line only."""
3093 self
.remove_transition('initial_quoted')
3094 quote
= match
.string
[0]
3095 pattern
= re
.compile(re
.escape(quote
), re
.UNICODE
)
3096 # New transition matches consistent quotes only:
3097 self
.add_transition('quoted',
3098 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3099 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3100 return [match
.string
], next_state
, []
3102 def quoted(self
, match
, context
, next_state
):
3103 """Match consistent quotes on subsequent lines."""
3104 context
.append(match
.string
)
3105 return context
, next_state
, []
3107 def text(self
, match
, context
, next_state
):
3109 self
.messages
.append(
3110 self
.reporter
.error('Inconsistent literal block quoting.',
3111 line
=self
.state_machine
.abs_line_number()))
3112 self
.state_machine
.previous_line()
3116 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3117 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3118 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3119 """Standard set of State classes used to start `RSTStateMachine`."""