2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
107 from types
import FunctionType
, MethodType
109 from docutils
import nodes
, statemachine
, utils
110 from docutils
import ApplicationError
, DataError
111 from docutils
.statemachine
import StateMachineWS
, StateWS
112 from docutils
.nodes
import fully_normalize_name
as normalize_name
113 from docutils
.nodes
import unescape
, whitespace_normalize_name
114 import docutils
.parsers
.rst
115 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
116 from docutils
.utils
import escape2null
, column_width
117 from docutils
.utils
import punctuation_chars
, roman
, urischemes
118 from docutils
.utils
import split_escaped_whitespace
121 class MarkupError(DataError
): pass
122 class UnknownInterpretedRoleError(DataError
): pass
123 class InterpretedRoleNotImplementedError(DataError
): pass
124 class ParserError(ApplicationError
): pass
125 class MarkupMismatch(Exception): pass
130 """Stores data attributes for dotted-attribute access."""
132 def __init__(self
, **keywordargs
):
133 self
.__dict
__.update(keywordargs
)
136 class RSTStateMachine(StateMachineWS
):
139 reStructuredText's master StateMachine.
141 The entry point to reStructuredText parsing is the `run()` method.
144 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=True,
147 Parse `input_lines` and modify the `document` node in place.
149 Extend `StateMachineWS.run()`: set up parse-global data and
150 run the StateMachine.
152 self
.language
= languages
.get_language(
153 document
.settings
.language_code
, document
.reporter
)
154 self
.match_titles
= match_titles
157 inliner
.init_customizations(document
.settings
)
158 self
.memo
= Struct(document
=document
,
159 reporter
=document
.reporter
,
160 language
=self
.language
,
163 section_bubble_up_kludge
=False,
165 self
.document
= document
166 self
.attach_observer(document
.note_source
)
167 self
.reporter
= self
.memo
.reporter
169 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
170 input_source
=document
['source'])
171 assert results
== [], 'RSTStateMachine.run() results should be empty!'
172 self
.node
= self
.memo
= None # remove unneeded references
175 class NestedStateMachine(StateMachineWS
):
178 StateMachine run from within other StateMachine runs, to parse nested
182 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=True):
184 Parse `input_lines` and populate a `docutils.nodes.document` instance.
186 Extend `StateMachineWS.run()`: set up document-wide data.
188 self
.match_titles
= match_titles
190 self
.document
= memo
.document
191 self
.attach_observer(self
.document
.note_source
)
192 self
.reporter
= memo
.reporter
193 self
.language
= memo
.language
195 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
196 assert results
== [], ('NestedStateMachine.run() results should be '
201 class RSTState(StateWS
):
204 reStructuredText State superclass.
206 Contains methods used by all State subclasses.
209 nested_sm
= NestedStateMachine
212 def __init__(self
, state_machine
, debug
=False):
213 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
214 'initial_state': 'Body'}
215 StateWS
.__init
__(self
, state_machine
, debug
)
217 def runtime_init(self
):
218 StateWS
.runtime_init(self
)
219 memo
= self
.state_machine
.memo
221 self
.reporter
= memo
.reporter
222 self
.inliner
= memo
.inliner
223 self
.document
= memo
.document
224 self
.parent
= self
.state_machine
.node
225 # enable the reporter to determine source and source-line
226 if not hasattr(self
.reporter
, 'get_source_and_line'):
227 self
.reporter
.get_source_and_line
= self
.state_machine
.get_source_and_line
# noqa:E501
229 def goto_line(self
, abs_line_offset
):
231 Jump to input line `abs_line_offset`, ignoring jumps past the end.
234 self
.state_machine
.goto_line(abs_line_offset
)
238 def no_match(self
, context
, transitions
):
240 Override `StateWS.no_match` to generate a system message.
242 This code should never be run.
244 self
.reporter
.severe(
245 'Internal error: no transition pattern match. State: "%s"; '
246 'transitions: %s; context: %s; current line: %r.'
247 % (self
.__class
__.__name
__, transitions
, context
,
248 self
.state_machine
.line
))
249 return context
, None, []
251 def bof(self
, context
):
252 """Called at beginning of file."""
255 def nested_parse(self
, block
, input_offset
, node
, match_titles
=False,
256 state_machine_class
=None, state_machine_kwargs
=None):
258 Create a new StateMachine rooted at `node` and run it over the input
262 if state_machine_class
is None:
263 state_machine_class
= self
.nested_sm
265 if state_machine_kwargs
is None:
266 state_machine_kwargs
= self
.nested_sm_kwargs
268 block_length
= len(block
)
273 state_machine
= self
.nested_sm_cache
.pop()
276 if not state_machine
:
277 state_machine
= state_machine_class(debug
=self
.debug
,
278 **state_machine_kwargs
)
279 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
280 node
=node
, match_titles
=match_titles
)
282 self
.nested_sm_cache
.append(state_machine
)
284 state_machine
.unlink()
285 new_offset
= state_machine
.abs_line_offset()
286 # No `block.parent` implies disconnected -- lines aren't in sync:
287 if block
.parent
and (len(block
) - block_length
) != 0:
288 # Adjustment for block if modified in nested parse:
289 self
.state_machine
.next_line(len(block
) - block_length
)
292 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
294 blank_finish_state
=None,
297 state_machine_class
=None,
298 state_machine_kwargs
=None):
300 Create a new StateMachine rooted at `node` and run it over the input
301 `block`. Also keep track of optional intermediate blank lines and the
304 if state_machine_class
is None:
305 state_machine_class
= self
.nested_sm
306 if state_machine_kwargs
is None:
307 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
308 state_machine_kwargs
['initial_state'] = initial_state
309 state_machine
= state_machine_class(debug
=self
.debug
,
310 **state_machine_kwargs
)
311 if blank_finish_state
is None:
312 blank_finish_state
= initial_state
313 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
314 for key
, value
in extra_settings
.items():
315 setattr(state_machine
.states
[initial_state
], key
, value
)
316 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
317 node
=node
, match_titles
=match_titles
)
318 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
319 state_machine
.unlink()
320 return state_machine
.abs_line_offset(), blank_finish
322 def section(self
, title
, source
, style
, lineno
, messages
):
323 """Check for a valid subsection and create one if it checks out."""
324 if self
.check_subsection(source
, style
, lineno
):
325 self
.new_subsection(title
, lineno
, messages
)
327 def check_subsection(self
, source
, style
, lineno
):
329 Check for a valid subsection header. Return True or False.
331 When a new section is reached that isn't a subsection of the current
332 section, back up the line count (use ``previous_line(-x)``), then
333 ``raise EOFError``. The current StateMachine will finish, then the
334 calling StateMachine can re-examine the title. This will work its way
335 back up the calling chain until the correct section level isreached.
337 @@@ Alternative: Evaluate the title, store the title info & level, and
338 back up the chain until that level is reached. Store in memo? Or
341 :Exception: `EOFError` when a sibling or supersection encountered.
344 title_styles
= memo
.title_styles
345 mylevel
= memo
.section_level
346 try: # check for existing title style
347 level
= title_styles
.index(style
) + 1
348 except ValueError: # new title style
349 if len(title_styles
) == memo
.section_level
: # new subsection
350 title_styles
.append(style
)
352 else: # not at lowest level
353 self
.parent
+= self
.title_inconsistent(source
, lineno
)
355 if level
<= mylevel
: # sibling or supersection
356 memo
.section_level
= level
# bubble up to parent section
358 memo
.section_bubble_up_kludge
= True
359 # back up 2 lines for underline title, 3 for overline title
360 self
.state_machine
.previous_line(len(style
) + 1)
361 raise EOFError # let parent section re-evaluate
362 if level
== mylevel
+ 1: # immediate subsection
364 else: # invalid subsection
365 self
.parent
+= self
.title_inconsistent(source
, lineno
)
368 def title_inconsistent(self
, sourcetext
, lineno
):
369 error
= self
.reporter
.severe(
370 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
374 def new_subsection(self
, title
, lineno
, messages
):
375 """Append new subsection to document tree. On return, check level."""
377 mylevel
= memo
.section_level
378 memo
.section_level
+= 1
379 section_node
= nodes
.section()
380 self
.parent
+= section_node
381 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
382 titlenode
= nodes
.title(title
, '', *textnodes
)
383 name
= normalize_name(titlenode
.astext())
384 section_node
['names'].append(name
)
385 section_node
+= titlenode
386 section_node
+= messages
387 section_node
+= title_messages
388 self
.document
.note_implicit_target(section_node
, section_node
)
389 offset
= self
.state_machine
.line_offset
+ 1
390 absoffset
= self
.state_machine
.abs_line_offset() + 1
391 newabsoffset
= self
.nested_parse(
392 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
393 node
=section_node
, match_titles
=True)
394 self
.goto_line(newabsoffset
)
395 if memo
.section_level
<= mylevel
: # can't handle next section?
396 raise EOFError # bubble up to supersection
397 # reset section_level; next pass will detect it properly
398 memo
.section_level
= mylevel
400 def paragraph(self
, lines
, lineno
):
402 Return a list (paragraph & messages) & a boolean: literal_block next?
404 data
= '\n'.join(lines
).rstrip()
405 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
408 elif data
[-3] in ' \n':
409 text
= data
[:-3].rstrip()
416 textnodes
, messages
= self
.inline_text(text
, lineno
)
417 p
= nodes
.paragraph(data
, '', *textnodes
)
418 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
419 return [p
] + messages
, literalnext
421 def inline_text(self
, text
, lineno
):
423 Return 2 lists: nodes (text and inline elements), and system_messages.
425 nodes
, messages
= self
.inliner
.parse(text
, lineno
,
426 self
.memo
, self
.parent
)
427 return nodes
, messages
429 def unindent_warning(self
, node_name
):
430 # the actual problem is one line below the current line
431 lineno
= self
.state_machine
.abs_line_number() + 1
432 return self
.reporter
.warning('%s ends without a blank line; '
433 'unexpected unindent.' % node_name
,
437 def build_regexp(definition
, compile=True):
439 Build, compile and return a regular expression based on `definition`.
441 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
442 where "parts" is a list of regular expressions and/or regular
443 expression definitions to be joined into an or-group.
445 name
, prefix
, suffix
, parts
= definition
448 if isinstance(part
, tuple):
449 part_strings
.append(build_regexp(part
, None))
451 part_strings
.append(part
)
452 or_group
= '|'.join(part_strings
)
453 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
455 return re
.compile(regexp
)
463 Parse inline markup; call the `parse()` method.
467 self
.implicit_dispatch
= []
468 """List of (pattern, bound method) tuples, used by
469 `self.implicit_inline`."""
471 def init_customizations(self
, settings
):
472 # lookahead and look-behind expressions for inline markup rules
473 if getattr(settings
, 'character_level_inline_markup', False):
474 start_string_prefix
= '(^|(?<!\x00))'
475 end_string_suffix
= ''
477 start_string_prefix
= ('(^|(?<=\\s|[%s%s]))' %
478 (punctuation_chars
.openers
,
479 punctuation_chars
.delimiters
))
480 end_string_suffix
= ('($|(?=\\s|[\x00%s%s%s]))' %
481 (punctuation_chars
.closing_delimiters
,
482 punctuation_chars
.delimiters
,
483 punctuation_chars
.closers
))
484 args
= locals().copy()
485 args
.update(vars(self
.__class
__))
487 parts
= ('initial_inline', start_string_prefix
, '',
489 ('start', '', self
.non_whitespace_after
, # simple start-strings
491 r
'\*(?!\*)', # emphasis but not strong
493 r
'_`', # inline internal target
494 r
'\|(?!\|)'] # substitution reference
496 ('whole', '', end_string_suffix
, # whole constructs
497 [ # reference name & end-string
498 r
'(?P<refname>%s)(?P<refend>__?)' % self
.simplename
,
499 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
500 [r
'[0-9]+', # manually numbered
501 r
'\#(%s)?' % self
.simplename
, # auto-numbered (w/ label?)
503 r
'(?P<citationlabel>%s)' % self
.simplename
, # citation ref
508 ('backquote', # interpreted text or phrase reference
509 '(?P<role>(:%s:)?)' % self
.simplename
, # optional role
510 self
.non_whitespace_after
,
511 ['`(?!`)'] # but not literal
515 self
.start_string_prefix
= start_string_prefix
516 self
.end_string_suffix
= end_string_suffix
519 self
.patterns
= Struct(
520 initial
=build_regexp(parts
),
521 emphasis
=re
.compile(self
.non_whitespace_escape_before
522 + r
'(\*)' + end_string_suffix
),
523 strong
=re
.compile(self
.non_whitespace_escape_before
524 + r
'(\*\*)' + end_string_suffix
),
525 interpreted_or_phrase_ref
=re
.compile(
527 %(non_unescaped_whitespace_escape_before)s
531 (?P<role>:%(simplename)s:)?
535 %(end_string_suffix)s
536 """ % args
, re
.VERBOSE
),
537 embedded_link
=re
.compile(
540 (?:[ \n]+|^) # spaces or beginning of line/string
542 %(non_whitespace_after)s
543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
544 %(non_whitespace_escape_before)s
548 """ % args
, re
.VERBOSE
),
549 literal
=re
.compile(self
.non_whitespace_before
+ '(``)'
550 + end_string_suffix
),
551 target
=re
.compile(self
.non_whitespace_escape_before
552 + r
'(`)' + end_string_suffix
),
553 substitution_ref
=re
.compile(self
.non_whitespace_escape_before
555 + end_string_suffix
),
556 email
=re
.compile(self
.email_pattern
% args
+ '$',
560 %(start_string_prefix)s
562 (?P<absolute> # absolute URI
563 (?P<scheme> # scheme (http, ftp, mailto)
564 [a-zA-Z][a-zA-Z0-9.+-]*
569 (//?)? # hierarchical URI
570 %(uric)s* # URI characters
571 %(uri_end)s # final URI char
577 ( # optional fragment
584 (?P<email> # email address
585 """ + self
.email_pattern
+ r
"""
588 %(end_string_suffix)s
589 """) % args
, re
.VERBOSE
),
592 %(start_string_prefix)s
594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
596 (PEP\s+(?P<pepnum2>\d+)) # reference by name
598 %(end_string_suffix)s""" % args
, re
.VERBOSE
),
601 %(start_string_prefix)s
602 (RFC(-|\s+)?(?P<rfcnum>\d+))
603 %(end_string_suffix)s""" % args
, re
.VERBOSE
))
605 self
.implicit_dispatch
.append((self
.patterns
.uri
,
606 self
.standalone_uri
))
607 if settings
.pep_references
:
608 self
.implicit_dispatch
.append((self
.patterns
.pep
,
610 if settings
.rfc_references
:
611 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
614 def parse(self
, text
, lineno
, memo
, parent
):
615 # Needs to be refactored for nested inline markup.
616 # Add nested_parse() method?
618 Return 2 lists: nodes (text and inline elements), and system_messages.
620 Using `self.patterns.initial`, a pattern which matches start-strings
621 (emphasis, strong, interpreted, phrase reference, literal,
622 substitution reference, and inline target) and complete constructs
623 (simple reference, footnote reference), search for a candidate. When
624 one is found, check for validity (e.g., not a quoted '*' character).
625 If valid, search for the corresponding end string if applicable, and
626 check it for validity. If not found or invalid, generate a warning
627 and ignore the start-string. Implicit inline markup (e.g. standalone
631 :lineno: absolute line number (cf. statemachine.get_source_and_line())
633 self
.reporter
= memo
.reporter
634 self
.document
= memo
.document
635 self
.language
= memo
.language
637 pattern_search
= self
.patterns
.initial
.search
638 dispatch
= self
.dispatch
639 remaining
= escape2null(text
)
644 match
= pattern_search(remaining
)
646 groups
= match
.groupdict()
647 method
= dispatch
[groups
['start'] or groups
['backquote']
648 or groups
['refend'] or groups
['fnend']]
649 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
651 unprocessed
.append(before
)
652 messages
+= sysmessages
654 processed
+= self
.implicit_inline(''.join(unprocessed
),
660 remaining
= ''.join(unprocessed
) + remaining
662 processed
+= self
.implicit_inline(remaining
, lineno
)
663 return processed
, messages
665 # Inline object recognition
666 # -------------------------
667 # See also init_customizations().
668 non_whitespace_before
= r
'(?<!\s)'
669 non_whitespace_escape_before
= r
'(?<![\s\x00])'
670 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[\s\x00])'
671 non_whitespace_after
= r
'(?!\s)'
672 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
673 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
674 # Valid URI characters (see RFC 2396 & RFC 2732);
675 # final \x00 allows backslash escapes in URIs:
676 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
677 # Delimiter indicating the end of a URI (not part of the URI):
678 uri_end_delim
= r
"""[>]"""
679 # Last URI character; same as uric but no punctuation:
680 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
681 # End of a URI (either 'urilast' or 'uric followed by a
683 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
684 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
686 %(emailc)s+(?:\.%(emailc)s+)* # name
688 %(emailc)s+(?:\.%(emailc)s*)* # host
689 %(uri_end)s # final URI char
692 def quoted_start(self
, match
):
693 """Test if inline markup start-string is 'quoted'.
695 'Quoted' in this context means the start-string is enclosed in a pair
696 of matching opening/closing delimiters (not necessarily quotes)
697 or at the end of the match.
699 string
= match
.string
700 start
= match
.start()
701 if start
== 0: # start-string at beginning of text
703 prestart
= string
[start
- 1]
705 poststart
= string
[match
.end()]
706 except IndexError: # start-string at end of text
707 return True # not "quoted" but no markup start-string either
708 return punctuation_chars
.match_chars(prestart
, poststart
)
710 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
711 restore_backslashes
=False):
712 string
= match
.string
713 matchstart
= match
.start('start')
714 matchend
= match
.end('start')
715 if self
.quoted_start(match
):
716 return string
[:matchend
], [], string
[matchend
:], [], ''
717 endmatch
= end_pattern
.search(string
[matchend
:])
718 if endmatch
and endmatch
.start(1): # 1 or more chars
719 text
= endmatch
.string
[:endmatch
.start(1)]
720 if restore_backslashes
:
721 text
= unescape(text
, True)
722 textend
= matchend
+ endmatch
.end(1)
723 rawsource
= unescape(string
[matchstart
:textend
], True)
724 node
= nodeclass(rawsource
, text
)
725 return (string
[:matchstart
], [node
],
726 string
[textend
:], [], endmatch
.group(1))
727 msg
= self
.reporter
.warning(
728 'Inline %s start-string without end-string.'
729 % nodeclass
.__name
__, line
=lineno
)
730 text
= unescape(string
[matchstart
:matchend
], True)
731 prb
= self
.problematic(text
, text
, msg
)
732 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
734 def problematic(self
, text
, rawsource
, message
):
735 msgid
= self
.document
.set_id(message
, self
.parent
)
736 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
737 prbid
= self
.document
.set_id(problematic
)
738 message
.add_backref(prbid
)
741 def emphasis(self
, match
, lineno
):
742 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
743 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
744 return before
, inlines
, remaining
, sysmessages
746 def strong(self
, match
, lineno
):
747 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
748 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
749 return before
, inlines
, remaining
, sysmessages
751 def interpreted_or_phrase_ref(self
, match
, lineno
):
752 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
753 string
= match
.string
754 matchstart
= match
.start('backquote')
755 matchend
= match
.end('backquote')
756 rolestart
= match
.start('role')
757 role
= match
.group('role')
762 elif self
.quoted_start(match
):
763 return string
[:matchend
], [], string
[matchend
:], []
764 endmatch
= end_pattern
.search(string
[matchend
:])
765 if endmatch
and endmatch
.start(1): # 1 or more chars
766 textend
= matchend
+ endmatch
.end()
767 if endmatch
.group('role'):
769 msg
= self
.reporter
.warning(
770 'Multiple roles in interpreted text (both '
771 'prefix and suffix present; only one allowed).',
773 text
= unescape(string
[rolestart
:textend
], True)
774 prb
= self
.problematic(text
, text
, msg
)
775 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
776 role
= endmatch
.group('suffix')[1:-1]
778 escaped
= endmatch
.string
[:endmatch
.start(1)]
779 rawsource
= unescape(string
[matchstart
:textend
], True)
780 if rawsource
[-1:] == '_':
782 msg
= self
.reporter
.warning(
783 'Mismatch: both interpreted text role %s and '
784 'reference suffix.' % position
, line
=lineno
)
785 text
= unescape(string
[rolestart
:textend
], True)
786 prb
= self
.problematic(text
, text
, msg
)
787 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
788 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
791 rawsource
= unescape(string
[rolestart
:textend
], True)
792 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
794 return (string
[:rolestart
], nodelist
,
795 string
[textend
:], messages
)
796 msg
= self
.reporter
.warning(
797 'Inline interpreted text or phrase reference start-string '
798 'without end-string.', line
=lineno
)
799 text
= unescape(string
[matchstart
:matchend
], True)
800 prb
= self
.problematic(text
, text
, msg
)
801 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
803 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
=None):
804 # `text` is ignored (since 0.16)
805 match
= self
.patterns
.embedded_link
.search(escaped
)
806 if match
: # embedded <URI> or <alias_>
807 text
= escaped
[:match
.start(0)]
808 unescaped
= unescape(text
)
809 rawtext
= unescape(text
, True)
810 aliastext
= match
.group(2)
811 rawaliastext
= unescape(aliastext
, True)
812 underscore_escaped
= rawaliastext
.endswith(r
'\_')
813 if (aliastext
.endswith('_')
814 and not (underscore_escaped
815 or self
.patterns
.uri
.match(aliastext
))):
817 alias
= normalize_name(unescape(aliastext
[:-1]))
818 target
= nodes
.target(match
.group(1), refname
=alias
)
819 target
.indirect_reference_name
= whitespace_normalize_name(
820 unescape(aliastext
[:-1]))
823 # remove unescaped whitespace
824 alias_parts
= split_escaped_whitespace(match
.group(2))
825 alias
= ' '.join(''.join(part
.split())
826 for part
in alias_parts
)
827 alias
= self
.adjust_uri(unescape(alias
))
828 if alias
.endswith(r
'\_'):
829 alias
= alias
[:-2] + '_'
830 target
= nodes
.target(match
.group(1), refuri
=alias
)
831 target
.referenced
= 1
833 raise ApplicationError('problem with embedded link: %r'
837 unescaped
= unescape(text
)
838 rawtext
= rawaliastext
841 unescaped
= unescape(text
)
843 rawtext
= unescape(escaped
, True)
845 refname
= normalize_name(unescaped
)
846 reference
= nodes
.reference(rawsource
, text
,
847 name
=whitespace_normalize_name(unescaped
))
848 reference
[0].rawsource
= rawtext
850 node_list
= [reference
]
852 if rawsource
[-2:] == '__':
853 if target
and (aliastype
== 'name'):
854 reference
['refname'] = alias
855 self
.document
.note_refname(reference
)
856 # self.document.note_indirect_target(target) # required?
857 elif target
and (aliastype
== 'uri'):
858 reference
['refuri'] = alias
860 reference
['anonymous'] = 1
863 target
['names'].append(refname
)
864 if aliastype
== 'name':
865 reference
['refname'] = alias
866 self
.document
.note_indirect_target(target
)
867 self
.document
.note_refname(reference
)
869 reference
['refuri'] = alias
870 self
.document
.note_explicit_target(target
, self
.parent
)
871 # target.note_referenced_by(name=refname)
872 node_list
.append(target
)
874 reference
['refname'] = refname
875 self
.document
.note_refname(reference
)
876 return before
, node_list
, after
, []
878 def adjust_uri(self
, uri
):
879 match
= self
.patterns
.email
.match(uri
)
881 return 'mailto:' + uri
885 def interpreted(self
, rawsource
, text
, role
, lineno
):
886 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
889 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
890 return nodes
, messages
+ messages2
892 msg
= self
.reporter
.error(
893 'Unknown interpreted text role "%s".' % role
,
895 return ([self
.problematic(rawsource
, rawsource
, msg
)],
898 def literal(self
, match
, lineno
):
899 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
900 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
901 restore_backslashes
=True)
902 return before
, inlines
, remaining
, sysmessages
904 def inline_internal_target(self
, match
, lineno
):
905 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
906 match
, lineno
, self
.patterns
.target
, nodes
.target
)
907 if inlines
and isinstance(inlines
[0], nodes
.target
):
908 assert len(inlines
) == 1
910 name
= normalize_name(target
.astext())
911 target
['names'].append(name
)
912 self
.document
.note_explicit_target(target
, self
.parent
)
913 return before
, inlines
, remaining
, sysmessages
915 def substitution_reference(self
, match
, lineno
):
916 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
917 match
, lineno
, self
.patterns
.substitution_ref
,
918 nodes
.substitution_reference
)
919 if len(inlines
) == 1:
920 subref_node
= inlines
[0]
921 if isinstance(subref_node
, nodes
.substitution_reference
):
922 subref_text
= subref_node
.astext()
923 self
.document
.note_substitution_ref(subref_node
, subref_text
)
924 if endstring
[-1:] == '_':
925 reference_node
= nodes
.reference(
926 '|%s%s' % (subref_text
, endstring
), '')
927 if endstring
[-2:] == '__':
928 reference_node
['anonymous'] = 1
930 reference_node
['refname'] = normalize_name(subref_text
)
931 self
.document
.note_refname(reference_node
)
932 reference_node
+= subref_node
933 inlines
= [reference_node
]
934 return before
, inlines
, remaining
, sysmessages
936 def footnote_reference(self
, match
, lineno
):
938 Handles `nodes.footnote_reference` and `nodes.citation_reference`
941 label
= match
.group('footnotelabel')
942 refname
= normalize_name(label
)
943 string
= match
.string
944 before
= string
[:match
.start('whole')]
945 remaining
= string
[match
.end('whole'):]
946 if match
.group('citationlabel'):
947 refnode
= nodes
.citation_reference('[%s]_' % label
,
949 refnode
+= nodes
.Text(label
)
950 self
.document
.note_citation_ref(refnode
)
952 refnode
= nodes
.footnote_reference('[%s]_' % label
)
953 if refname
[0] == '#':
954 refname
= refname
[1:]
956 self
.document
.note_autofootnote_ref(refnode
)
959 refnode
['auto'] = '*'
960 self
.document
.note_symbol_footnote_ref(
963 refnode
+= nodes
.Text(label
)
965 refnode
['refname'] = refname
966 self
.document
.note_footnote_ref(refnode
)
967 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
968 before
= before
.rstrip()
969 return before
, [refnode
], remaining
, []
971 def reference(self
, match
, lineno
, anonymous
=False):
972 referencename
= match
.group('refname')
973 refname
= normalize_name(referencename
)
974 referencenode
= nodes
.reference(
975 referencename
+ match
.group('refend'), referencename
,
976 name
=whitespace_normalize_name(referencename
))
977 referencenode
[0].rawsource
= referencename
979 referencenode
['anonymous'] = 1
981 referencenode
['refname'] = refname
982 self
.document
.note_refname(referencenode
)
983 string
= match
.string
984 matchstart
= match
.start('whole')
985 matchend
= match
.end('whole')
986 return string
[:matchstart
], [referencenode
], string
[matchend
:], []
988 def anonymous_reference(self
, match
, lineno
):
989 return self
.reference(match
, lineno
, anonymous
=True)
991 def standalone_uri(self
, match
, lineno
):
992 if (not match
.group('scheme')
993 or match
.group('scheme').lower() in urischemes
.schemes
):
994 if match
.group('email'):
995 addscheme
= 'mailto:'
998 text
= match
.group('whole')
999 refuri
= addscheme
+ unescape(text
)
1000 reference
= nodes
.reference(unescape(text
, True), text
,
1003 else: # not a valid scheme
1004 raise MarkupMismatch
1006 def pep_reference(self
, match
, lineno
):
1007 text
= match
.group(0)
1008 if text
.startswith('pep-'):
1009 pepnum
= int(unescape(match
.group('pepnum1')))
1010 elif text
.startswith('PEP'):
1011 pepnum
= int(unescape(match
.group('pepnum2')))
1013 raise MarkupMismatch
1014 ref
= (self
.document
.settings
.pep_base_url
1015 + self
.document
.settings
.pep_file_url_template
% pepnum
)
1016 return [nodes
.reference(unescape(text
, True), text
, refuri
=ref
)]
1018 rfc_url
= 'rfc%d.html'
1020 def rfc_reference(self
, match
, lineno
):
1021 text
= match
.group(0)
1022 if text
.startswith('RFC'):
1023 rfcnum
= int(unescape(match
.group('rfcnum')))
1024 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
1026 raise MarkupMismatch
1027 return [nodes
.reference(unescape(text
, True), text
, refuri
=ref
)]
1029 def implicit_inline(self
, text
, lineno
):
1031 Check each of the patterns in `self.implicit_dispatch` for a match,
1032 and dispatch to the stored method for the pattern. Recursively check
1033 the text before and after the match. Return a list of `nodes.Text`
1034 and inline element nodes.
1038 for pattern
, method
in self
.implicit_dispatch
:
1039 match
= pattern
.search(text
)
1042 # Must recurse on strings before *and* after the match;
1043 # there may be multiple patterns.
1044 return (self
.implicit_inline(text
[:match
.start()], lineno
)
1045 + method(match
, lineno
)
1046 + self
.implicit_inline(text
[match
.end():], lineno
))
1047 except MarkupMismatch
:
1049 return [nodes
.Text(text
)]
1051 dispatch
= {'*': emphasis
,
1053 '`': interpreted_or_phrase_ref
,
1055 '_`': inline_internal_target
,
1056 ']_': footnote_reference
,
1057 '|': substitution_reference
,
1059 '__': anonymous_reference
}
1062 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1063 return ord(s
) - _zero
1066 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1067 return ord(s
) - _zero
1070 def _lowerroman_to_int(s
):
1071 return roman
.fromRoman(s
.upper())
1074 class Body(RSTState
):
1077 Generic classifier of the first line of a block.
1080 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1081 """Padding character for East Asian double-width text."""
1084 """Enumerated list parsing information."""
1087 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1088 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1089 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1090 enum
.formats
= enum
.formatinfo
.keys()
1091 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1092 'lowerroman', 'upperroman'] # ORDERED!
1093 enum
.sequencepats
= {'arabic': '[0-9]+',
1094 'loweralpha': '[a-z]',
1095 'upperalpha': '[A-Z]',
1096 'lowerroman': '[ivxlcdm]+',
1097 'upperroman': '[IVXLCDM]+'}
1098 enum
.converters
= {'arabic': int,
1099 'loweralpha': _loweralpha_to_int
,
1100 'upperalpha': _upperalpha_to_int
,
1101 'lowerroman': _lowerroman_to_int
,
1102 'upperroman': roman
.fromRoman
}
1104 enum
.sequenceregexps
= {}
1105 for sequence
in enum
.sequences
:
1106 enum
.sequenceregexps
[sequence
] = re
.compile(
1107 enum
.sequencepats
[sequence
] + '$')
1109 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1110 """Matches the top (& bottom) of a full table)."""
1112 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1113 """Matches the top of a simple table."""
1115 simple_table_border_pat
= re
.compile('=+[ =]*$')
1116 """Matches the bottom & header bottom of a simple table."""
1119 """Fragments of patterns used by transitions."""
1121 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1122 pats
['alpha'] = '[a-zA-Z]'
1123 pats
['alphanum'] = '[a-zA-Z0-9]'
1124 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1125 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1126 '|%(upperroman)s|#)' % enum
.sequencepats
)
1127 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1128 # @@@ Loosen up the pattern? Allow Unicode?
1129 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1130 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1131 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1132 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1134 for format
in enum
.formats
:
1135 pats
[format
] = '(?P<%s>%s%s%s)' % (
1136 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1137 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1140 'bullet': '[-+*\u2022\u2023\u2043]( +|$)',
1141 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1142 'field_marker': r
':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1143 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1144 'doctest': r
'>>>( +|$)',
1145 'line_block': r
'\|( +|$)',
1146 'grid_table_top': grid_table_top_pat
,
1147 'simple_table_top': simple_table_top_pat
,
1148 'explicit_markup': r
'\.\.( +|$)',
1149 'anonymous': r
'__( +|$)',
1150 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1152 initial_transitions
= (
1166 def indent(self
, match
, context
, next_state
):
1168 (indented
, indent
, line_offset
, blank_finish
1169 ) = self
.state_machine
.get_indented()
1170 elements
= self
.block_quote(indented
, line_offset
)
1171 self
.parent
+= elements
1172 if not blank_finish
:
1173 self
.parent
+= self
.unindent_warning('Block quote')
1174 return context
, next_state
, []
1176 def block_quote(self
, indented
, line_offset
):
1179 blockquote
= nodes
.block_quote(rawsource
='\n'.join(indented
))
1180 (blockquote
.source
, blockquote
.line
1181 ) = self
.state_machine
.get_source_and_line(line_offset
+1)
1186 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1187 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1188 elements
.append(blockquote
)
1189 if attribution_lines
:
1190 attribution
, messages
= self
.parse_attribution(
1191 attribution_lines
, line_offset
+attribution_offset
)
1192 blockquote
+= attribution
1193 elements
+= messages
1194 line_offset
= new_line_offset
1195 while indented
and not indented
[0]:
1196 indented
= indented
[1:]
1200 # U+2014 is an em-dash:
1201 attribution_pattern
= re
.compile('(---?(?!-)|\u2014) *(?=[^ \\n])')
1203 def split_attribution(self
, indented
, line_offset
):
1205 Check for a block quote attribution and split it off:
1207 * First line after a blank line must begin with a dash ("--", "---",
1208 em-dash; matches `self.attribution_pattern`).
1209 * Every line after that must have consistent indentation.
1210 * Attributions must be preceded by block quote content.
1212 Return a tuple of: (block quote content lines, attribution lines,
1213 attribution offset, remaining indented lines, remaining lines offset).
1216 nonblank_seen
= False
1217 for i
in range(len(indented
)):
1218 line
= indented
[i
].rstrip()
1220 if nonblank_seen
and blank
== i
- 1: # last line blank
1221 match
= self
.attribution_pattern
.match(line
)
1223 attribution_end
, indent
= self
.check_attribution(
1226 a_lines
= indented
[i
:attribution_end
]
1227 a_lines
.trim_left(match
.end(), end
=1)
1228 a_lines
.trim_left(indent
, start
=1)
1229 return (indented
[:i
], a_lines
,
1230 i
, indented
[attribution_end
:],
1231 line_offset
+ attribution_end
)
1232 nonblank_seen
= True
1236 return indented
, None, None, None, None
1238 def check_attribution(self
, indented
, attribution_start
):
1240 Check attribution shape.
1241 Return the index past the end of the attribution, and the indent.
1244 i
= attribution_start
+ 1
1245 for i
in range(attribution_start
+ 1, len(indented
)):
1246 line
= indented
[i
].rstrip()
1250 indent
= len(line
) - len(line
.lstrip())
1251 elif len(line
) - len(line
.lstrip()) != indent
:
1252 return None, None # bad shape; not an attribution
1254 # return index of line after last attribution line:
1256 return i
, (indent
or 0)
1258 def parse_attribution(self
, indented
, line_offset
):
1259 text
= '\n'.join(indented
).rstrip()
1260 lineno
= 1 + line_offset
# line_offset is zero-based
1261 textnodes
, messages
= self
.inline_text(text
, lineno
)
1262 node
= nodes
.attribution(text
, '', *textnodes
)
1263 node
.source
, node
.line
= self
.state_machine
.get_source_and_line(lineno
)
1264 return node
, messages
1266 def bullet(self
, match
, context
, next_state
):
1267 """Bullet list item."""
1268 ul
= nodes
.bullet_list()
1269 ul
.source
, ul
.line
= self
.state_machine
.get_source_and_line()
1271 ul
['bullet'] = match
.string
[0]
1272 i
, blank_finish
= self
.list_item(match
.end())
1274 offset
= self
.state_machine
.line_offset
+ 1 # next line
1275 new_line_offset
, blank_finish
= self
.nested_list_parse(
1276 self
.state_machine
.input_lines
[offset
:],
1277 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1278 node
=ul
, initial_state
='BulletList',
1279 blank_finish
=blank_finish
)
1280 self
.goto_line(new_line_offset
)
1281 if not blank_finish
:
1282 self
.parent
+= self
.unindent_warning('Bullet list')
1283 return [], next_state
, []
1285 def list_item(self
, indent
):
1286 src
, srcline
= self
.state_machine
.get_source_and_line()
1287 if self
.state_machine
.line
[indent
:]:
1288 indented
, line_offset
, blank_finish
= (
1289 self
.state_machine
.get_known_indented(indent
))
1291 indented
, indent
, line_offset
, blank_finish
= (
1292 self
.state_machine
.get_first_known_indented(indent
))
1293 listitem
= nodes
.list_item('\n'.join(indented
))
1294 listitem
.source
, listitem
.line
= src
, srcline
1296 self
.nested_parse(indented
, input_offset
=line_offset
,
1298 return listitem
, blank_finish
1300 def enumerator(self
, match
, context
, next_state
):
1301 """Enumerated List Item"""
1302 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1303 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1304 raise statemachine
.TransitionCorrection('text')
1305 enumlist
= nodes
.enumerated_list()
1306 self
.parent
+= enumlist
1308 enumlist
['enumtype'] = 'arabic'
1310 enumlist
['enumtype'] = sequence
1311 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1312 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1314 enumlist
['start'] = ordinal
1315 msg
= self
.reporter
.info(
1316 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1319 listitem
, blank_finish
= self
.list_item(match
.end())
1320 enumlist
+= listitem
1321 offset
= self
.state_machine
.line_offset
+ 1 # next line
1322 newline_offset
, blank_finish
= self
.nested_list_parse(
1323 self
.state_machine
.input_lines
[offset
:],
1324 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1325 node
=enumlist
, initial_state
='EnumeratedList',
1326 blank_finish
=blank_finish
,
1327 extra_settings
={'lastordinal': ordinal
,
1329 'auto': sequence
== '#'})
1330 self
.goto_line(newline_offset
)
1331 if not blank_finish
:
1332 self
.parent
+= self
.unindent_warning('Enumerated list')
1333 return [], next_state
, []
1335 def parse_enumerator(self
, match
, expected_sequence
=None):
1337 Analyze an enumerator and return the results.
1340 - the enumerator format ('period', 'parens', or 'rparen'),
1341 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1342 - the text of the enumerator, stripped of formatting, and
1343 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1344 ``None`` is returned for invalid enumerator text).
1346 The enumerator format has already been determined by the regular
1347 expression match. If `expected_sequence` is given, that sequence is
1348 tried first. If not, we check for Roman numeral 1. This way,
1349 single-character Roman numerals (which are also alphabetical) can be
1350 matched. If no sequence has been matched, all sequences are checked in
1353 groupdict
= match
.groupdict()
1355 for format
in self
.enum
.formats
:
1356 if groupdict
[format
]: # was this the format matched?
1357 break # yes; keep `format`
1358 else: # shouldn't happen
1359 raise ParserError('enumerator format not matched')
1360 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
# noqa: E203,E501
1361 : self
.enum
.formatinfo
[format
].end
]
1364 elif expected_sequence
:
1366 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1367 sequence
= expected_sequence
1368 except KeyError: # shouldn't happen
1369 raise ParserError('unknown enumerator sequence: %s'
1372 sequence
= 'lowerroman'
1374 sequence
= 'upperroman'
1376 for sequence
in self
.enum
.sequences
:
1377 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1379 else: # shouldn't happen
1380 raise ParserError('enumerator sequence not matched')
1385 ordinal
= self
.enum
.converters
[sequence
](text
)
1386 except roman
.InvalidRomanNumeralError
:
1388 return format
, sequence
, text
, ordinal
1390 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1392 Check validity based on the ordinal value and the second line.
1394 Return true if the ordinal is valid and the second line is blank,
1395 indented, or starts with the next enumerator or an auto-enumerator.
1400 next_line
= self
.state_machine
.next_line()
1401 except EOFError: # end of input lines
1402 self
.state_machine
.previous_line()
1405 self
.state_machine
.previous_line()
1406 if not next_line
[:1].strip(): # blank or indented
1408 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1410 next_enumerator
, auto_enumerator
= result
1412 if (next_line
.startswith(next_enumerator
)
1413 or next_line
.startswith(auto_enumerator
)):
1419 def make_enumerator(self
, ordinal
, sequence
, format
):
1421 Construct and return the next enumerated list item marker, and an
1422 auto-enumerator ("#" instead of the regular enumerator).
1424 Return ``None`` for invalid (out of range) ordinals.
1428 elif sequence
== 'arabic':
1429 enumerator
= str(ordinal
)
1431 if sequence
.endswith('alpha'):
1434 enumerator
= chr(ordinal
+ ord('a') - 1)
1435 elif sequence
.endswith('roman'):
1437 enumerator
= roman
.toRoman(ordinal
)
1438 except roman
.RomanError
:
1440 else: # shouldn't happen
1441 raise ParserError('unknown enumerator sequence: "%s"'
1443 if sequence
.startswith('lower'):
1444 enumerator
= enumerator
.lower()
1445 elif sequence
.startswith('upper'):
1446 enumerator
= enumerator
.upper()
1447 else: # shouldn't happen
1448 raise ParserError('unknown enumerator sequence: "%s"'
1450 formatinfo
= self
.enum
.formatinfo
[format
]
1451 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1453 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1454 return next_enumerator
, auto_enumerator
1456 def field_marker(self
, match
, context
, next_state
):
1457 """Field list item."""
1458 field_list
= nodes
.field_list()
1459 self
.parent
+= field_list
1460 field
, blank_finish
= self
.field(match
)
1462 offset
= self
.state_machine
.line_offset
+ 1 # next line
1463 newline_offset
, blank_finish
= self
.nested_list_parse(
1464 self
.state_machine
.input_lines
[offset
:],
1465 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1466 node
=field_list
, initial_state
='FieldList',
1467 blank_finish
=blank_finish
)
1468 self
.goto_line(newline_offset
)
1469 if not blank_finish
:
1470 self
.parent
+= self
.unindent_warning('Field list')
1471 return [], next_state
, []
1473 def field(self
, match
):
1474 name
= self
.parse_field_marker(match
)
1475 src
, srcline
= self
.state_machine
.get_source_and_line()
1476 lineno
= self
.state_machine
.abs_line_number()
1477 (indented
, indent
, line_offset
, blank_finish
1478 ) = self
.state_machine
.get_first_known_indented(match
.end())
1479 field_node
= nodes
.field()
1480 field_node
.source
= src
1481 field_node
.line
= srcline
1482 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1483 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1484 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1485 field_node
+= field_body
1487 self
.parse_field_body(indented
, line_offset
, field_body
)
1488 return field_node
, blank_finish
1490 def parse_field_marker(self
, match
):
1491 """Extract & return field name from a field marker match."""
1492 field
= match
.group()[1:] # strip off leading ':'
1493 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1496 def parse_field_body(self
, indented
, offset
, node
):
1497 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1499 def option_marker(self
, match
, context
, next_state
):
1500 """Option list item."""
1501 optionlist
= nodes
.option_list()
1502 (optionlist
.source
, optionlist
.line
1503 ) = self
.state_machine
.get_source_and_line()
1505 listitem
, blank_finish
= self
.option_list_item(match
)
1506 except MarkupError
as error
:
1507 # This shouldn't happen; pattern won't match.
1508 msg
= self
.reporter
.error('Invalid option list marker: %s'
1511 (indented
, indent
, line_offset
, blank_finish
1512 ) = self
.state_machine
.get_first_known_indented(match
.end())
1513 elements
= self
.block_quote(indented
, line_offset
)
1514 self
.parent
+= elements
1515 if not blank_finish
:
1516 self
.parent
+= self
.unindent_warning('Option list')
1517 return [], next_state
, []
1518 self
.parent
+= optionlist
1519 optionlist
+= listitem
1520 offset
= self
.state_machine
.line_offset
+ 1 # next line
1521 newline_offset
, blank_finish
= self
.nested_list_parse(
1522 self
.state_machine
.input_lines
[offset
:],
1523 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1524 node
=optionlist
, initial_state
='OptionList',
1525 blank_finish
=blank_finish
)
1526 self
.goto_line(newline_offset
)
1527 if not blank_finish
:
1528 self
.parent
+= self
.unindent_warning('Option list')
1529 return [], next_state
, []
1531 def option_list_item(self
, match
):
1532 offset
= self
.state_machine
.abs_line_offset()
1533 options
= self
.parse_option_marker(match
)
1534 (indented
, indent
, line_offset
, blank_finish
1535 ) = self
.state_machine
.get_first_known_indented(match
.end())
1536 if not indented
: # not an option list item
1537 self
.goto_line(offset
)
1538 raise statemachine
.TransitionCorrection('text')
1539 option_group
= nodes
.option_group('', *options
)
1540 description
= nodes
.description('\n'.join(indented
))
1541 option_list_item
= nodes
.option_list_item('', option_group
,
1544 self
.nested_parse(indented
, input_offset
=line_offset
,
1546 return option_list_item
, blank_finish
1548 def parse_option_marker(self
, match
):
1550 Return a list of `node.option` and `node.option_argument` objects,
1551 parsed from an option marker match.
1553 :Exception: `MarkupError` for invalid option markers.
1556 # split at ", ", except inside < > (complex arguments)
1557 optionstrings
= re
.split(r
', (?![^<]*>)', match
.group().rstrip())
1558 for optionstring
in optionstrings
:
1559 tokens
= optionstring
.split()
1561 firstopt
= tokens
[0].split('=', 1)
1562 if len(firstopt
) > 1:
1563 # "--opt=value" form
1564 tokens
[:1] = firstopt
1566 elif (len(tokens
[0]) > 2
1567 and ((tokens
[0].startswith('-')
1568 and not tokens
[0].startswith('--'))
1569 or tokens
[0].startswith('+'))):
1571 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1573 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1574 and tokens
[-1].endswith('>')):
1575 # "-o <value1 value2>" form; join all values into one token
1576 tokens
[1:] = [' '.join(tokens
[1:])]
1577 if 0 < len(tokens
) <= 2:
1578 option
= nodes
.option(optionstring
)
1579 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1581 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1582 delimiter
=delimiter
)
1583 optlist
.append(option
)
1586 'wrong number of option tokens (=%s), should be 1 or 2: '
1587 '"%s"' % (len(tokens
), optionstring
))
1590 def doctest(self
, match
, context
, next_state
):
1591 data
= '\n'.join(self
.state_machine
.get_text_block())
1592 # TODO: prepend class value ['pycon'] (Python Console)
1593 # parse with `directives.body.CodeBlock` (returns literal-block
1594 # with class "code" and syntax highlight markup).
1595 self
.parent
+= nodes
.doctest_block(data
, data
)
1596 return [], next_state
, []
1598 def line_block(self
, match
, context
, next_state
):
1599 """First line of a line block."""
1600 block
= nodes
.line_block()
1601 self
.parent
+= block
1602 lineno
= self
.state_machine
.abs_line_number()
1603 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1605 self
.parent
+= messages
1606 if not blank_finish
:
1607 offset
= self
.state_machine
.line_offset
+ 1 # next line
1608 new_line_offset
, blank_finish
= self
.nested_list_parse(
1609 self
.state_machine
.input_lines
[offset
:],
1610 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1611 node
=block
, initial_state
='LineBlock',
1613 self
.goto_line(new_line_offset
)
1614 if not blank_finish
:
1615 self
.parent
+= self
.reporter
.warning(
1616 'Line block ends without a blank line.',
1619 if block
[0].indent
is None:
1621 self
.nest_line_block_lines(block
)
1622 return [], next_state
, []
1624 def line_block_line(self
, match
, lineno
):
1625 """Return one line element of a line_block."""
1626 (indented
, indent
, line_offset
, blank_finish
1627 ) = self
.state_machine
.get_first_known_indented(match
.end(),
1629 text
= '\n'.join(indented
)
1630 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1631 line
= nodes
.line(text
, '', *text_nodes
)
1632 if match
.string
.rstrip() != '|': # not empty
1633 line
.indent
= len(match
.group(1)) - 1
1634 return line
, messages
, blank_finish
1636 def nest_line_block_lines(self
, block
):
1637 for index
in range(1, len(block
)):
1638 if getattr(block
[index
], 'indent', None) is None:
1639 block
[index
].indent
= block
[index
- 1].indent
1640 self
.nest_line_block_segment(block
)
1642 def nest_line_block_segment(self
, block
):
1643 indents
= [item
.indent
for item
in block
]
1644 least
= min(indents
)
1646 new_block
= nodes
.line_block()
1648 if item
.indent
> least
:
1649 new_block
.append(item
)
1652 self
.nest_line_block_segment(new_block
)
1653 new_items
.append(new_block
)
1654 new_block
= nodes
.line_block()
1655 new_items
.append(item
)
1657 self
.nest_line_block_segment(new_block
)
1658 new_items
.append(new_block
)
1659 block
[:] = new_items
1661 def grid_table_top(self
, match
, context
, next_state
):
1662 """Top border of a full table."""
1663 return self
.table_top(match
, context
, next_state
,
1664 self
.isolate_grid_table
,
1665 tableparser
.GridTableParser
)
1667 def simple_table_top(self
, match
, context
, next_state
):
1668 """Top border of a simple table."""
1669 return self
.table_top(match
, context
, next_state
,
1670 self
.isolate_simple_table
,
1671 tableparser
.SimpleTableParser
)
1673 def table_top(self
, match
, context
, next_state
,
1674 isolate_function
, parser_class
):
1675 """Top border of a generic table."""
1676 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1677 self
.parent
+= nodelist
1678 if not blank_finish
:
1679 msg
= self
.reporter
.warning(
1680 'Blank line required after table.',
1681 line
=self
.state_machine
.abs_line_number()+1)
1683 return [], next_state
, []
1685 def table(self
, isolate_function
, parser_class
):
1686 """Parse a table."""
1687 block
, messages
, blank_finish
= isolate_function()
1690 parser
= parser_class()
1691 tabledata
= parser
.parse(block
)
1692 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1694 table
= self
.build_table(tabledata
, tableline
)
1695 nodelist
= [table
] + messages
1696 except tableparser
.TableMarkupError
as err
:
1697 nodelist
= self
.malformed_table(block
, ' '.join(err
.args
),
1698 offset
=err
.offset
) + messages
1701 return nodelist
, blank_finish
1703 def isolate_grid_table(self
):
1707 block
= self
.state_machine
.get_text_block(flush_left
=True)
1708 except statemachine
.UnexpectedIndentationError
as err
:
1709 block
, src
, srcline
= err
.args
1710 messages
.append(self
.reporter
.error('Unexpected indentation.',
1711 source
=src
, line
=srcline
))
1714 # for East Asian chars:
1715 block
.pad_double_width(self
.double_width_pad_char
)
1716 width
= len(block
[0].strip())
1717 for i
in range(len(block
)):
1718 block
[i
] = block
[i
].strip()
1719 if block
[i
][0] not in '+|': # check left edge
1721 self
.state_machine
.previous_line(len(block
) - i
)
1724 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1726 # from second-last to third line of table:
1727 for i
in range(len(block
) - 2, 1, -1):
1728 if self
.grid_table_top_pat
.match(block
[i
]):
1729 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1733 messages
.extend(self
.malformed_table(block
))
1734 return [], messages
, blank_finish
1735 for i
in range(len(block
)): # check right edge
1736 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1737 messages
.extend(self
.malformed_table(block
))
1738 return [], messages
, blank_finish
1739 return block
, messages
, blank_finish
1741 def isolate_simple_table(self
):
1742 start
= self
.state_machine
.line_offset
1743 lines
= self
.state_machine
.input_lines
1744 limit
= len(lines
) - 1
1745 toplen
= len(lines
[start
].strip())
1746 pattern_match
= self
.simple_table_border_pat
.match
1752 match
= pattern_match(line
)
1754 if len(line
.strip()) != toplen
:
1755 self
.state_machine
.next_line(i
- start
)
1756 messages
= self
.malformed_table(
1757 lines
[start
:i
+1], 'Bottom/header table border does '
1758 'not match top border.')
1759 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1762 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1766 else: # reached end of input_lines
1768 extra
= ' or no blank line after table bottom'
1769 self
.state_machine
.next_line(found_at
- start
)
1770 block
= lines
[start
:found_at
+1]
1773 self
.state_machine
.next_line(i
- start
- 1)
1774 block
= lines
[start
:]
1775 messages
= self
.malformed_table(
1776 block
, 'No bottom table border found%s.' % extra
)
1777 return [], messages
, not extra
1778 self
.state_machine
.next_line(end
- start
)
1779 block
= lines
[start
:end
+1]
1780 # for East Asian chars:
1781 block
.pad_double_width(self
.double_width_pad_char
)
1782 return block
, [], end
== limit
or not lines
[end
+1].strip()
1784 def malformed_table(self
, block
, detail
='', offset
=0):
1785 block
.replace(self
.double_width_pad_char
, '')
1786 data
= '\n'.join(block
)
1787 message
= 'Malformed table.'
1788 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1790 message
+= '\n' + detail
1791 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1792 line
=startline
+offset
)
1795 def build_table(self
, tabledata
, tableline
, stub_columns
=0, widths
=None):
1796 colwidths
, headrows
, bodyrows
= tabledata
1797 table
= nodes
.table()
1798 if widths
== 'auto':
1799 table
['classes'] += ['colwidths-auto']
1800 elif widths
: # "grid" or list of integers
1801 table
['classes'] += ['colwidths-given']
1802 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1804 for colwidth
in colwidths
:
1805 colspec
= nodes
.colspec(colwidth
=colwidth
)
1807 colspec
.attributes
['stub'] = 1
1811 thead
= nodes
.thead()
1813 for row
in headrows
:
1814 thead
+= self
.build_table_row(row
, tableline
)
1815 tbody
= nodes
.tbody()
1817 for row
in bodyrows
:
1818 tbody
+= self
.build_table_row(row
, tableline
)
1821 def build_table_row(self
, rowdata
, tableline
):
1823 for cell
in rowdata
:
1826 morerows
, morecols
, offset
, cellblock
= cell
1829 attributes
['morerows'] = morerows
1831 attributes
['morecols'] = morecols
1832 entry
= nodes
.entry(**attributes
)
1834 if ''.join(cellblock
):
1835 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1840 """Patterns and constants used for explicit markup recognition."""
1842 explicit
.patterns
= Struct(
1843 target
=re
.compile(r
"""
1845 _ # anonymous target
1847 (?!_) # no underscore at the beginning
1848 (?P<quote>`?) # optional open quote
1849 (?![ `]) # first char. not space or
1851 (?P<name> # reference name
1854 %(non_whitespace_escape_before)s
1855 (?P=quote) # close quote if open quote used
1857 (?<!(?<!\x00):) # no unescaped colon at end
1858 %(non_whitespace_escape_before)s
1859 [ ]? # optional space
1860 : # end of reference name
1861 ([ ]+|$) # followed by whitespace
1862 """ % vars(Inliner
), re
.VERBOSE
),
1863 reference
=re
.compile(r
"""
1865 (?P<simple>%(simplename)s)_
1869 (?P<phrase>.+?) # hyperlink phrase
1870 %(non_whitespace_escape_before)s
1871 `_ # close backquote,
1875 """ % vars(Inliner
), re
.VERBOSE
),
1876 substitution
=re
.compile(r
"""
1878 (?![ ]) # first char. not space
1879 (?P<name>.+?) # substitution text
1880 %(non_whitespace_escape_before)s
1881 \| # close delimiter
1883 ([ ]+|$) # followed by whitespace
1884 """ % vars(Inliner
),
1887 def footnote(self
, match
):
1888 src
, srcline
= self
.state_machine
.get_source_and_line()
1889 (indented
, indent
, offset
, blank_finish
1890 ) = self
.state_machine
.get_first_known_indented(match
.end())
1891 label
= match
.group(1)
1892 name
= normalize_name(label
)
1893 footnote
= nodes
.footnote('\n'.join(indented
))
1894 footnote
.source
= src
1895 footnote
.line
= srcline
1896 if name
[0] == '#': # auto-numbered
1897 name
= name
[1:] # autonumber label
1898 footnote
['auto'] = 1
1900 footnote
['names'].append(name
)
1901 self
.document
.note_autofootnote(footnote
)
1902 elif name
== '*': # auto-symbol
1904 footnote
['auto'] = '*'
1905 self
.document
.note_symbol_footnote(footnote
)
1906 else: # manually numbered
1907 footnote
+= nodes
.label('', label
)
1908 footnote
['names'].append(name
)
1909 self
.document
.note_footnote(footnote
)
1911 self
.document
.note_explicit_target(footnote
, footnote
)
1913 self
.document
.set_id(footnote
, footnote
)
1915 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1916 return [footnote
], blank_finish
1918 def citation(self
, match
):
1919 src
, srcline
= self
.state_machine
.get_source_and_line()
1920 (indented
, indent
, offset
, blank_finish
1921 ) = self
.state_machine
.get_first_known_indented(match
.end())
1922 label
= match
.group(1)
1923 name
= normalize_name(label
)
1924 citation
= nodes
.citation('\n'.join(indented
))
1925 citation
.source
= src
1926 citation
.line
= srcline
1927 citation
+= nodes
.label('', label
)
1928 citation
['names'].append(name
)
1929 self
.document
.note_citation(citation
)
1930 self
.document
.note_explicit_target(citation
, citation
)
1932 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1933 return [citation
], blank_finish
1935 def hyperlink_target(self
, match
):
1936 pattern
= self
.explicit
.patterns
.target
1937 lineno
= self
.state_machine
.abs_line_number()
1938 (block
, indent
, offset
, blank_finish
1939 ) = self
.state_machine
.get_first_known_indented(
1940 match
.end(), until_blank
=True, strip_indent
=False)
1941 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1942 block
= [escape2null(line
) for line
in block
]
1946 targetmatch
= pattern
.match(escaped
)
1951 escaped
+= block
[blockindex
]
1953 raise MarkupError('malformed hyperlink target.')
1954 del block
[:blockindex
]
1955 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1956 target
= self
.make_target(block
, blocktext
, lineno
,
1957 targetmatch
.group('name'))
1958 return [target
], blank_finish
1960 def make_target(self
, block
, block_text
, lineno
, target_name
):
1961 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1962 if target_type
== 'refname':
1963 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1964 target
.indirect_reference_name
= data
1965 self
.add_target(target_name
, '', target
, lineno
)
1966 self
.document
.note_indirect_target(target
)
1968 elif target_type
== 'refuri':
1969 target
= nodes
.target(block_text
, '')
1970 self
.add_target(target_name
, data
, target
, lineno
)
1975 def parse_target(self
, block
, block_text
, lineno
):
1977 Determine the type of reference of a target.
1979 :Return: A 2-tuple, one of:
1981 - 'refname' and the indirect reference name
1982 - 'refuri' and the URI
1983 - 'malformed' and a system_message node
1985 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1986 reference
= ' '.join(line
.strip() for line
in block
)
1987 refname
= self
.is_reference(reference
)
1989 return 'refname', refname
1990 ref_parts
= split_escaped_whitespace(' '.join(block
))
1991 reference
= ' '.join(''.join(unescape(part
).split())
1992 for part
in ref_parts
)
1993 return 'refuri', reference
1995 def is_reference(self
, reference
):
1996 match
= self
.explicit
.patterns
.reference
.match(
1997 whitespace_normalize_name(reference
))
2000 return unescape(match
.group('simple') or match
.group('phrase'))
2002 def add_target(self
, targetname
, refuri
, target
, lineno
):
2003 target
.line
= lineno
2005 name
= normalize_name(unescape(targetname
))
2006 target
['names'].append(name
)
2008 uri
= self
.inliner
.adjust_uri(refuri
)
2010 target
['refuri'] = uri
2012 raise ApplicationError('problem with URI: %r' % refuri
)
2013 self
.document
.note_explicit_target(target
, self
.parent
)
2014 else: # anonymous target
2016 target
['refuri'] = refuri
2017 target
['anonymous'] = 1
2018 self
.document
.note_anonymous_target(target
)
2020 def substitution_def(self
, match
):
2021 pattern
= self
.explicit
.patterns
.substitution
2022 src
, srcline
= self
.state_machine
.get_source_and_line()
2023 (block
, indent
, offset
, blank_finish
2024 ) = self
.state_machine
.get_first_known_indented(match
.end(),
2026 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
2028 escaped
= escape2null(block
[0].rstrip())
2031 subdefmatch
= pattern
.match(escaped
)
2036 escaped
= escaped
+ ' ' + escape2null(
2037 block
[blockindex
].strip())
2039 raise MarkupError('malformed substitution definition.')
2040 del block
[:blockindex
] # strip out the substitution marker
2041 start
= subdefmatch
.end()-len(escaped
)-1
2042 block
[0] = (block
[0].strip() + ' ')[start
:-1]
2046 while block
and not block
[-1].strip():
2048 subname
= subdefmatch
.group('name')
2049 substitution_node
= nodes
.substitution_definition(blocktext
)
2050 substitution_node
.source
= src
2051 substitution_node
.line
= srcline
2053 msg
= self
.reporter
.warning(
2054 'Substitution definition "%s" missing contents.' % subname
,
2055 nodes
.literal_block(blocktext
, blocktext
),
2056 source
=src
, line
=srcline
)
2057 return [msg
], blank_finish
2058 block
[0] = block
[0].strip()
2059 substitution_node
['names'].append(
2060 nodes
.whitespace_normalize_name(subname
))
2061 new_abs_offset
, blank_finish
= self
.nested_list_parse(
2062 block
, input_offset
=offset
, node
=substitution_node
,
2063 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
2065 for node
in substitution_node
[:]:
2066 if not (isinstance(node
, nodes
.Inline
)
2067 or isinstance(node
, nodes
.Text
)):
2068 self
.parent
+= substitution_node
[i
]
2069 del substitution_node
[i
]
2072 for node
in substitution_node
.findall(nodes
.Element
):
2073 if self
.disallowed_inside_substitution_definitions(node
):
2074 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2075 msg
= self
.reporter
.error(
2076 'Substitution definition contains illegal element <%s>:'
2078 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2079 source
=src
, line
=srcline
)
2080 return [msg
], blank_finish
2081 if len(substitution_node
) == 0:
2082 msg
= self
.reporter
.warning(
2083 'Substitution definition "%s" empty or invalid.' % subname
,
2084 nodes
.literal_block(blocktext
, blocktext
),
2085 source
=src
, line
=srcline
)
2086 return [msg
], blank_finish
2087 self
.document
.note_substitution_def(
2088 substitution_node
, subname
, self
.parent
)
2089 return [substitution_node
], blank_finish
2091 def disallowed_inside_substitution_definitions(self
, node
):
2093 or isinstance(node
, nodes
.reference
) and node
.get('anonymous')
2094 or isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')): # noqa: E501
2099 def directive(self
, match
, **option_presets
):
2100 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2101 type_name
= match
.group(1)
2102 directive_class
, messages
= directives
.directive(
2103 type_name
, self
.memo
.language
, self
.document
)
2104 self
.parent
+= messages
2106 return self
.run_directive(
2107 directive_class
, match
, type_name
, option_presets
)
2109 return self
.unknown_directive(type_name
)
2111 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2113 Parse a directive then run its directive function.
2117 - `directive`: The class implementing the directive. Must be
2118 a subclass of `rst.Directive`.
2120 - `match`: A regular expression match object which matched the first
2121 line of the directive.
2123 - `type_name`: The directive name, as used in the source text.
2125 - `option_presets`: A dictionary of preset options, defaults for the
2126 directive options. Currently, only an "alt" option is passed by
2127 substitution definitions (value: the substitution name), which may
2128 be used by an embedded image directive.
2130 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2132 if isinstance(directive
, (FunctionType
, MethodType
)):
2133 from docutils
.parsers
.rst
import convert_directive_function
2134 directive
= convert_directive_function(directive
)
2135 lineno
= self
.state_machine
.abs_line_number()
2136 initial_line_offset
= self
.state_machine
.line_offset
2137 (indented
, indent
, line_offset
, blank_finish
2138 ) = self
.state_machine
.get_first_known_indented(match
.end(),
2140 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2141 initial_line_offset
: self
.state_machine
.line_offset
+ 1]) # noqa: E203,E501
2143 arguments
, options
, content
, content_offset
= (
2144 self
.parse_directive_block(indented
, line_offset
,
2145 directive
, option_presets
))
2146 except MarkupError
as detail
:
2147 error
= self
.reporter
.error(
2148 'Error in "%s" directive:\n%s.' % (type_name
,
2149 ' '.join(detail
.args
)),
2150 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2151 return [error
], blank_finish
2152 directive_instance
= directive(
2153 type_name
, arguments
, options
, content
, lineno
,
2154 content_offset
, block_text
, self
, self
.state_machine
)
2156 result
= directive_instance
.run()
2157 except docutils
.parsers
.rst
.DirectiveError
as error
:
2158 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2160 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2162 assert isinstance(result
, list), \
2163 'Directive "%s" must return a list of nodes.' % type_name
2164 for i
in range(len(result
)):
2165 assert isinstance(result
[i
], nodes
.Node
), \
2166 ('Directive "%s" returned non-Node object (index %s): %r'
2167 % (type_name
, i
, result
[i
]))
2169 blank_finish
or self
.state_machine
.is_next_line_blank())
2171 def parse_directive_block(self
, indented
, line_offset
, directive
,
2173 option_spec
= directive
.option_spec
2174 has_content
= directive
.has_content
2175 if indented
and not indented
[0].strip():
2176 indented
.trim_start()
2178 while indented
and not indented
[-1].strip():
2180 if indented
and (directive
.required_arguments
2181 or directive
.optional_arguments
2183 for i
, line
in enumerate(indented
):
2184 if not line
.strip():
2188 arg_block
= indented
[:i
]
2189 content
= indented
[i
+1:]
2190 content_offset
= line_offset
+ i
+ 1
2193 content_offset
= line_offset
2196 options
, arg_block
= self
.parse_directive_options(
2197 option_presets
, option_spec
, arg_block
)
2200 if arg_block
and not (directive
.required_arguments
2201 or directive
.optional_arguments
):
2202 content
= arg_block
+ indented
[i
:]
2203 content_offset
= line_offset
2205 while content
and not content
[0].strip():
2206 content
.trim_start()
2208 if directive
.required_arguments
or directive
.optional_arguments
:
2209 arguments
= self
.parse_directive_arguments(
2210 directive
, arg_block
)
2213 if content
and not has_content
:
2214 raise MarkupError('no content permitted')
2215 return arguments
, options
, content
, content_offset
2217 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2218 options
= option_presets
.copy()
2219 for i
, line
in enumerate(arg_block
):
2220 if re
.match(Body
.patterns
['field_marker'], line
):
2221 opt_block
= arg_block
[i
:]
2222 arg_block
= arg_block
[:i
]
2227 success
, data
= self
.parse_extension_options(option_spec
,
2229 if success
: # data is a dict of options
2230 options
.update(data
)
2231 else: # data is an error string
2232 raise MarkupError(data
)
2233 return options
, arg_block
2235 def parse_directive_arguments(self
, directive
, arg_block
):
2236 required
= directive
.required_arguments
2237 optional
= directive
.optional_arguments
2238 arg_text
= '\n'.join(arg_block
)
2239 arguments
= arg_text
.split()
2240 if len(arguments
) < required
:
2241 raise MarkupError('%s argument(s) required, %s supplied'
2242 % (required
, len(arguments
)))
2243 elif len(arguments
) > required
+ optional
:
2244 if directive
.final_argument_whitespace
:
2245 arguments
= arg_text
.split(None, required
+ optional
- 1)
2248 'maximum %s argument(s) allowed, %s supplied'
2249 % (required
+ optional
, len(arguments
)))
2252 def parse_extension_options(self
, option_spec
, datalines
):
2254 Parse `datalines` for a field list containing extension options
2255 matching `option_spec`.
2258 - `option_spec`: a mapping of option name to conversion
2259 function, which should raise an exception on bad input.
2260 - `datalines`: a list of input strings.
2263 - Success value, 1 or 0.
2264 - An option dictionary on success, an error string on failure.
2266 node
= nodes
.field_list()
2267 newline_offset
, blank_finish
= self
.nested_list_parse(
2268 datalines
, 0, node
, initial_state
='ExtensionOptions',
2270 if newline_offset
!= len(datalines
): # incomplete parse of block
2271 return 0, 'invalid option block'
2273 options
= utils
.extract_extension_options(node
, option_spec
)
2274 except KeyError as detail
:
2275 return 0, 'unknown option: "%s"' % detail
.args
[0]
2276 except (ValueError, TypeError) as detail
:
2277 return 0, 'invalid option value: %s' % ' '.join(detail
.args
)
2278 except utils
.ExtensionOptionError
as detail
:
2279 return 0, 'invalid option data: %s' % ' '.join(detail
.args
)
2283 return 0, 'option data incompletely parsed'
2285 def unknown_directive(self
, type_name
):
2286 lineno
= self
.state_machine
.abs_line_number()
2287 (indented
, indent
, offset
, blank_finish
2288 ) = self
.state_machine
.get_first_known_indented(0, strip_indent
=False)
2289 text
= '\n'.join(indented
)
2290 error
= self
.reporter
.error('Unknown directive type "%s".' % type_name
,
2291 nodes
.literal_block(text
, text
),
2293 return [error
], blank_finish
2295 def comment(self
, match
):
2296 if self
.state_machine
.is_next_line_blank():
2297 first_comment_line
= match
.string
[match
.end():]
2298 if not first_comment_line
.strip(): # empty comment
2299 return [nodes
.comment()], True # "A tiny but practical wart."
2300 if first_comment_line
.startswith('end of inclusion from "'):
2301 # cf. parsers.rst.directives.misc.Include
2302 self
.document
.include_log
.pop()
2304 (indented
, indent
, offset
, blank_finish
2305 ) = self
.state_machine
.get_first_known_indented(match
.end())
2306 while indented
and not indented
[-1].strip():
2308 text
= '\n'.join(indented
)
2309 return [nodes
.comment(text
, text
)], blank_finish
2311 explicit
.constructs
= [
2314 \.\.[ ]+ # explicit markup start
2317 [0-9]+ # manually numbered footnote
2319 \# # anonymous auto-numbered footnote
2321 \#%s # auto-number ed?) footnote label
2323 \* # auto-symbol footnote
2326 ([ ]+|$) # whitespace or end of line
2327 """ % Inliner
.simplename
, re
.VERBOSE
)),
2330 \.\.[ ]+ # explicit markup start
2331 \[(%s)\] # citation label
2332 ([ ]+|$) # whitespace or end of line
2333 """ % Inliner
.simplename
, re
.VERBOSE
)),
2336 \.\.[ ]+ # explicit markup start
2337 _ # target indicator
2338 (?![ ]|$) # first char. not space or EOL
2342 \.\.[ ]+ # explicit markup start
2343 \| # substitution indicator
2344 (?![ ]|$) # first char. not space or EOL
2348 \.\.[ ]+ # explicit markup start
2349 (%s) # directive name
2350 [ ]? # optional space
2351 :: # directive delimiter
2352 ([ ]+|$) # whitespace or end of line
2353 """ % Inliner
.simplename
, re
.VERBOSE
))]
2355 def explicit_markup(self
, match
, context
, next_state
):
2356 """Footnotes, hyperlink targets, directives, comments."""
2357 nodelist
, blank_finish
= self
.explicit_construct(match
)
2358 self
.parent
+= nodelist
2359 self
.explicit_list(blank_finish
)
2360 return [], next_state
, []
2362 def explicit_construct(self
, match
):
2363 """Determine which explicit construct this is, parse & return it."""
2365 for method
, pattern
in self
.explicit
.constructs
:
2366 expmatch
= pattern
.match(match
.string
)
2369 return method(self
, expmatch
)
2370 except MarkupError
as error
:
2371 lineno
= self
.state_machine
.abs_line_number()
2372 message
= ' '.join(error
.args
)
2373 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2375 nodelist
, blank_finish
= self
.comment(match
)
2376 return nodelist
+ errors
, blank_finish
2378 def explicit_list(self
, blank_finish
):
2380 Create a nested state machine for a series of explicit markup
2381 constructs (including anonymous hyperlink targets).
2383 offset
= self
.state_machine
.line_offset
+ 1 # next line
2384 newline_offset
, blank_finish
= self
.nested_list_parse(
2385 self
.state_machine
.input_lines
[offset
:],
2386 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2387 node
=self
.parent
, initial_state
='Explicit',
2388 blank_finish
=blank_finish
,
2389 match_titles
=self
.state_machine
.match_titles
)
2390 self
.goto_line(newline_offset
)
2391 if not blank_finish
:
2392 self
.parent
+= self
.unindent_warning('Explicit markup')
2394 def anonymous(self
, match
, context
, next_state
):
2395 """Anonymous hyperlink targets."""
2396 nodelist
, blank_finish
= self
.anonymous_target(match
)
2397 self
.parent
+= nodelist
2398 self
.explicit_list(blank_finish
)
2399 return [], next_state
, []
2401 def anonymous_target(self
, match
):
2402 lineno
= self
.state_machine
.abs_line_number()
2403 (block
, indent
, offset
, blank_finish
2404 ) = self
.state_machine
.get_first_known_indented(match
.end(),
2406 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2407 block
= [escape2null(line
) for line
in block
]
2408 target
= self
.make_target(block
, blocktext
, lineno
, '')
2409 return [target
], blank_finish
2411 def line(self
, match
, context
, next_state
):
2412 """Section title overline or transition marker."""
2413 if self
.state_machine
.match_titles
:
2414 return [match
.string
], 'Line', []
2415 elif match
.string
.strip() == '::':
2416 raise statemachine
.TransitionCorrection('text')
2417 elif len(match
.string
.strip()) < 4:
2418 msg
= self
.reporter
.info(
2419 'Unexpected possible title overline or transition.\n'
2420 "Treating it as ordinary text because it's so short.",
2421 line
=self
.state_machine
.abs_line_number())
2423 raise statemachine
.TransitionCorrection('text')
2425 blocktext
= self
.state_machine
.line
2426 msg
= self
.reporter
.severe(
2427 'Unexpected section title or transition.',
2428 nodes
.literal_block(blocktext
, blocktext
),
2429 line
=self
.state_machine
.abs_line_number())
2431 return [], next_state
, []
2433 def text(self
, match
, context
, next_state
):
2434 """Titles, definition lists, paragraphs."""
2435 return [match
.string
], 'Text', []
2438 class RFC2822Body(Body
):
2441 RFC2822 headers are only valid as the first constructs in documents. As
2442 soon as anything else appears, the `Body` state should take over.
2445 patterns
= Body
.patterns
.copy() # can't modify the original
2446 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2447 initial_transitions
= [(name
, 'Body')
2448 for name
in Body
.initial_transitions
]
2449 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2451 def rfc2822(self
, match
, context
, next_state
):
2452 """RFC2822-style field list item."""
2453 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2454 self
.parent
+= fieldlist
2455 field
, blank_finish
= self
.rfc2822_field(match
)
2457 offset
= self
.state_machine
.line_offset
+ 1 # next line
2458 newline_offset
, blank_finish
= self
.nested_list_parse(
2459 self
.state_machine
.input_lines
[offset
:],
2460 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2461 node
=fieldlist
, initial_state
='RFC2822List',
2462 blank_finish
=blank_finish
)
2463 self
.goto_line(newline_offset
)
2464 if not blank_finish
:
2465 self
.parent
+= self
.unindent_warning(
2466 'RFC2822-style field list')
2467 return [], next_state
, []
2469 def rfc2822_field(self
, match
):
2470 name
= match
.string
[:match
.string
.find(':')]
2471 (indented
, indent
, line_offset
, blank_finish
2472 ) = self
.state_machine
.get_first_known_indented(match
.end(),
2474 fieldnode
= nodes
.field()
2475 fieldnode
+= nodes
.field_name(name
, name
)
2476 fieldbody
= nodes
.field_body('\n'.join(indented
))
2477 fieldnode
+= fieldbody
2479 self
.nested_parse(indented
, input_offset
=line_offset
,
2481 return fieldnode
, blank_finish
2484 class SpecializedBody(Body
):
2487 Superclass for second and subsequent compound element members. Compound
2488 elements are lists and list-like constructs.
2490 All transition methods are disabled (redefined as `invalid_input`).
2491 Override individual methods in subclasses to re-enable.
2493 For example, once an initial bullet list item, say, is recognized, the
2494 `BulletList` subclass takes over, with a "bullet_list" node as its
2495 container. Upon encountering the initial bullet list item, `Body.bullet`
2496 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2497 starts up a nested parsing session with `BulletList` as the initial state.
2498 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2499 as only bullet list items are encountered, they are parsed and inserted
2500 into the container. The first construct which is *not* a bullet list item
2501 triggers the `invalid_input` method, which ends the nested parse and
2502 closes the container. `BulletList` needs to recognize input that is
2503 invalid in the context of a bullet list, which means everything *other
2504 than* bullet list items, so it inherits the transition list created in
2508 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2509 """Not a compound element member. Abort this state machine."""
2510 self
.state_machine
.previous_line() # back up so parent SM can reassess
2513 indent
= invalid_input
2514 bullet
= invalid_input
2515 enumerator
= invalid_input
2516 field_marker
= invalid_input
2517 option_marker
= invalid_input
2518 doctest
= invalid_input
2519 line_block
= invalid_input
2520 grid_table_top
= invalid_input
2521 simple_table_top
= invalid_input
2522 explicit_markup
= invalid_input
2523 anonymous
= invalid_input
2524 line
= invalid_input
2525 text
= invalid_input
2528 class BulletList(SpecializedBody
):
2530 """Second and subsequent bullet_list list_items."""
2532 def bullet(self
, match
, context
, next_state
):
2533 """Bullet list item."""
2534 if match
.string
[0] != self
.parent
['bullet']:
2535 # different bullet: new list
2536 self
.invalid_input()
2537 listitem
, blank_finish
= self
.list_item(match
.end())
2538 self
.parent
+= listitem
2539 self
.blank_finish
= blank_finish
2540 return [], next_state
, []
2543 class DefinitionList(SpecializedBody
):
2545 """Second and subsequent definition_list_items."""
2547 def text(self
, match
, context
, next_state
):
2548 """Definition lists."""
2549 return [match
.string
], 'Definition', []
2552 class EnumeratedList(SpecializedBody
):
2554 """Second and subsequent enumerated_list list_items."""
2556 def enumerator(self
, match
, context
, next_state
):
2557 """Enumerated list item."""
2558 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2559 match
, self
.parent
['enumtype'])
2560 if (format
!= self
.format
2561 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2563 or ordinal
!= (self
.lastordinal
+ 1)))
2564 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2565 # different enumeration: new list
2566 self
.invalid_input()
2569 listitem
, blank_finish
= self
.list_item(match
.end())
2570 self
.parent
+= listitem
2571 self
.blank_finish
= blank_finish
2572 self
.lastordinal
= ordinal
2573 return [], next_state
, []
2576 class FieldList(SpecializedBody
):
2578 """Second and subsequent field_list fields."""
2580 def field_marker(self
, match
, context
, next_state
):
2581 """Field list field."""
2582 field
, blank_finish
= self
.field(match
)
2583 self
.parent
+= field
2584 self
.blank_finish
= blank_finish
2585 return [], next_state
, []
2588 class OptionList(SpecializedBody
):
2590 """Second and subsequent option_list option_list_items."""
2592 def option_marker(self
, match
, context
, next_state
):
2593 """Option list item."""
2595 option_list_item
, blank_finish
= self
.option_list_item(match
)
2597 self
.invalid_input()
2598 self
.parent
+= option_list_item
2599 self
.blank_finish
= blank_finish
2600 return [], next_state
, []
2603 class RFC2822List(SpecializedBody
, RFC2822Body
):
2605 """Second and subsequent RFC2822-style field_list fields."""
2607 patterns
= RFC2822Body
.patterns
2608 initial_transitions
= RFC2822Body
.initial_transitions
2610 def rfc2822(self
, match
, context
, next_state
):
2611 """RFC2822-style field list item."""
2612 field
, blank_finish
= self
.rfc2822_field(match
)
2613 self
.parent
+= field
2614 self
.blank_finish
= blank_finish
2615 return [], 'RFC2822List', []
2617 blank
= SpecializedBody
.invalid_input
2620 class ExtensionOptions(FieldList
):
2623 Parse field_list fields for extension options.
2625 No nested parsing is done (including inline markup parsing).
2628 def parse_field_body(self
, indented
, offset
, node
):
2629 """Override `Body.parse_field_body` for simpler parsing."""
2631 for line
in list(indented
) + ['']:
2635 text
= '\n'.join(lines
)
2636 node
+= nodes
.paragraph(text
, text
)
2640 class LineBlock(SpecializedBody
):
2642 """Second and subsequent lines of a line_block."""
2644 blank
= SpecializedBody
.invalid_input
2646 def line_block(self
, match
, context
, next_state
):
2647 """New line of line block."""
2648 lineno
= self
.state_machine
.abs_line_number()
2649 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2651 self
.parent
.parent
+= messages
2652 self
.blank_finish
= blank_finish
2653 return [], next_state
, []
2656 class Explicit(SpecializedBody
):
2658 """Second and subsequent explicit markup construct."""
2660 def explicit_markup(self
, match
, context
, next_state
):
2661 """Footnotes, hyperlink targets, directives, comments."""
2662 nodelist
, blank_finish
= self
.explicit_construct(match
)
2663 self
.parent
+= nodelist
2664 self
.blank_finish
= blank_finish
2665 return [], next_state
, []
2667 def anonymous(self
, match
, context
, next_state
):
2668 """Anonymous hyperlink targets."""
2669 nodelist
, blank_finish
= self
.anonymous_target(match
)
2670 self
.parent
+= nodelist
2671 self
.blank_finish
= blank_finish
2672 return [], next_state
, []
2674 blank
= SpecializedBody
.invalid_input
2677 class SubstitutionDef(Body
):
2680 Parser for the contents of a substitution_definition element.
2684 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2685 % Inliner
.simplename
),
2687 initial_transitions
= ['embedded_directive', 'text']
2689 def embedded_directive(self
, match
, context
, next_state
):
2690 nodelist
, blank_finish
= self
.directive(match
,
2691 alt
=self
.parent
['names'][0])
2692 self
.parent
+= nodelist
2693 if not self
.state_machine
.at_eof():
2694 self
.blank_finish
= blank_finish
2697 def text(self
, match
, context
, next_state
):
2698 if not self
.state_machine
.at_eof():
2699 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2703 class Text(RSTState
):
2706 Classifier of second line of a text block.
2708 Could be a paragraph, a definition list item, or a title.
2711 patterns
= {'underline': Body
.patterns
['line'],
2713 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2715 def blank(self
, match
, context
, next_state
):
2716 """End of paragraph."""
2717 # NOTE: self.paragraph returns [node, system_message(s)], literalnext
2718 paragraph
, literalnext
= self
.paragraph(
2719 context
, self
.state_machine
.abs_line_number() - 1)
2720 self
.parent
+= paragraph
2722 self
.parent
+= self
.literal_block()
2723 return [], 'Body', []
2725 def eof(self
, context
):
2727 self
.blank(None, context
, None)
2730 def indent(self
, match
, context
, next_state
):
2731 """Definition list item."""
2732 dl
= nodes
.definition_list()
2733 # the definition list starts on the line before the indent:
2734 lineno
= self
.state_machine
.abs_line_number() - 1
2735 dl
.source
, dl
.line
= self
.state_machine
.get_source_and_line(lineno
)
2736 dl_item
, blank_finish
= self
.definition_list_item(context
)
2739 offset
= self
.state_machine
.line_offset
+ 1 # next line
2740 newline_offset
, blank_finish
= self
.nested_list_parse(
2741 self
.state_machine
.input_lines
[offset
:],
2742 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2743 node
=dl
, initial_state
='DefinitionList',
2744 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2745 self
.goto_line(newline_offset
)
2746 if not blank_finish
:
2747 self
.parent
+= self
.unindent_warning('Definition list')
2748 return [], 'Body', []
2750 def underline(self
, match
, context
, next_state
):
2751 """Section title."""
2752 lineno
= self
.state_machine
.abs_line_number()
2753 title
= context
[0].rstrip()
2754 underline
= match
.string
.rstrip()
2755 source
= title
+ '\n' + underline
2757 if column_width(title
) > len(underline
):
2758 if len(underline
) < 4:
2759 if self
.state_machine
.match_titles
:
2760 msg
= self
.reporter
.info(
2761 'Possible title underline, too short for the title.\n'
2762 "Treating it as ordinary text because it's so short.",
2765 raise statemachine
.TransitionCorrection('text')
2767 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2768 msg
= self
.reporter
.warning(
2769 'Title underline too short.',
2770 nodes
.literal_block(blocktext
, blocktext
),
2772 messages
.append(msg
)
2773 if not self
.state_machine
.match_titles
:
2774 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2775 # We need get_source_and_line() here to report correctly
2776 src
, srcline
= self
.state_machine
.get_source_and_line()
2777 # TODO: why is abs_line_number() == srcline+1
2778 # if the error is in a table (try with test_tables.py)?
2779 # print("get_source_and_line", srcline)
2780 # print("abs_line_number", self.state_machine.abs_line_number())
2781 msg
= self
.reporter
.severe(
2782 'Unexpected section title.',
2783 nodes
.literal_block(blocktext
, blocktext
),
2784 source
=src
, line
=srcline
)
2785 self
.parent
+= messages
2787 return [], next_state
, []
2788 style
= underline
[0]
2790 self
.section(title
, source
, style
, lineno
- 1, messages
)
2791 return [], next_state
, []
2793 def text(self
, match
, context
, next_state
):
2795 startline
= self
.state_machine
.abs_line_number() - 1
2798 block
= self
.state_machine
.get_text_block(flush_left
=True)
2799 except statemachine
.UnexpectedIndentationError
as err
:
2800 block
, src
, srcline
= err
.args
2801 msg
= self
.reporter
.error('Unexpected indentation.',
2802 source
=src
, line
=srcline
)
2803 lines
= context
+ list(block
)
2804 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2805 self
.parent
+= paragraph
2809 self
.state_machine
.next_line()
2812 self
.parent
+= self
.literal_block()
2813 return [], next_state
, []
2815 def literal_block(self
):
2816 """Return a list of nodes."""
2817 (indented
, indent
, offset
, blank_finish
2818 ) = self
.state_machine
.get_indented()
2819 while indented
and not indented
[-1].strip():
2822 return self
.quoted_literal_block()
2823 data
= '\n'.join(indented
)
2824 literal_block
= nodes
.literal_block(data
, data
)
2825 (literal_block
.source
,
2826 literal_block
.line
) = self
.state_machine
.get_source_and_line(offset
+1)
2827 nodelist
= [literal_block
]
2828 if not blank_finish
:
2829 nodelist
.append(self
.unindent_warning('Literal block'))
2832 def quoted_literal_block(self
):
2833 abs_line_offset
= self
.state_machine
.abs_line_offset()
2834 offset
= self
.state_machine
.line_offset
2835 parent_node
= nodes
.Element()
2836 new_abs_offset
= self
.nested_parse(
2837 self
.state_machine
.input_lines
[offset
:],
2838 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=False,
2839 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2840 'initial_state': 'QuotedLiteralBlock'})
2841 self
.goto_line(new_abs_offset
)
2842 return parent_node
.children
2844 def definition_list_item(self
, termline
):
2845 # the parser is already on the second (indented) line:
2846 dd_lineno
= self
.state_machine
.abs_line_number()
2847 dt_lineno
= dd_lineno
- 1
2848 (indented
, indent
, line_offset
, blank_finish
2849 ) = self
.state_machine
.get_indented()
2850 dl_item
= nodes
.definition_list_item(
2851 '\n'.join(termline
+ list(indented
)))
2853 dl_item
.line
) = self
.state_machine
.get_source_and_line(dt_lineno
)
2854 dt_nodes
, messages
= self
.term(termline
, dt_lineno
)
2856 dd
= nodes
.definition('', *messages
)
2857 dd
.source
, dd
.line
= self
.state_machine
.get_source_and_line(dd_lineno
)
2859 if termline
[0][-2:] == '::':
2860 dd
+= self
.reporter
.info(
2861 'Blank line missing before literal block (after the "::")? '
2862 'Interpreted as a definition list item.',
2864 # TODO: drop a definition if it is an empty comment to allow
2865 # definition list items with several terms?
2866 # https://sourceforge.net/p/docutils/feature-requests/60/
2867 self
.nested_parse(indented
, input_offset
=line_offset
, node
=dd
)
2868 return dl_item
, blank_finish
2870 classifier_delimiter
= re
.compile(' +: +')
2872 def term(self
, lines
, lineno
):
2873 """Return a definition_list's term and optional classifiers."""
2874 assert len(lines
) == 1
2875 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2876 dt
= nodes
.term(lines
[0])
2877 dt
.source
, dt
.line
= self
.state_machine
.get_source_and_line(lineno
)
2879 for i
in range(len(text_nodes
)):
2880 node
= text_nodes
[i
]
2881 if isinstance(node
, nodes
.Text
):
2882 parts
= self
.classifier_delimiter
.split(node
)
2884 node_list
[-1] += node
2886 text
= parts
[0].rstrip()
2887 textnode
= nodes
.Text(text
)
2888 node_list
[-1] += textnode
2889 for part
in parts
[1:]:
2891 nodes
.classifier(unescape(part
, True), part
))
2893 node_list
[-1] += node
2894 return node_list
, messages
2897 class SpecializedText(Text
):
2900 Superclass for second and subsequent lines of Text-variants.
2902 All transition methods are disabled. Override individual methods in
2903 subclasses to re-enable.
2906 def eof(self
, context
):
2907 """Incomplete construct."""
2910 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2911 """Not a compound element member. Abort this state machine."""
2914 blank
= invalid_input
2915 indent
= invalid_input
2916 underline
= invalid_input
2917 text
= invalid_input
2920 class Definition(SpecializedText
):
2922 """Second line of potential definition_list_item."""
2924 def eof(self
, context
):
2925 """Not a definition."""
2926 self
.state_machine
.previous_line(2) # so parent SM can reassess
2929 def indent(self
, match
, context
, next_state
):
2930 """Definition list item."""
2931 dl_item
, blank_finish
= self
.definition_list_item(context
)
2932 self
.parent
+= dl_item
2933 self
.blank_finish
= blank_finish
2934 return [], 'DefinitionList', []
2937 class Line(SpecializedText
):
2940 Second line of over- & underlined section title or transition marker.
2943 eofcheck
= 1 # @@@ ???
2944 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2946 def eof(self
, context
):
2947 """Transition marker at end of section or document."""
2948 marker
= context
[0].strip()
2949 if self
.memo
.section_bubble_up_kludge
:
2950 self
.memo
.section_bubble_up_kludge
= False
2951 elif len(marker
) < 4:
2952 self
.state_correction(context
)
2953 if self
.eofcheck
: # ignore EOFError with sections
2954 src
, srcline
= self
.state_machine
.get_source_and_line()
2955 # lineno = self.state_machine.abs_line_number() - 1
2956 transition
= nodes
.transition(rawsource
=context
[0])
2957 transition
.source
= src
2958 transition
.line
= srcline
- 1
2959 # transition.line = lineno
2960 self
.parent
+= transition
2964 def blank(self
, match
, context
, next_state
):
2965 """Transition marker."""
2966 src
, srcline
= self
.state_machine
.get_source_and_line()
2967 marker
= context
[0].strip()
2969 self
.state_correction(context
)
2970 transition
= nodes
.transition(rawsource
=marker
)
2971 transition
.source
= src
2972 transition
.line
= srcline
- 1
2973 self
.parent
+= transition
2974 return [], 'Body', []
2976 def text(self
, match
, context
, next_state
):
2977 """Potential over- & underlined title."""
2978 lineno
= self
.state_machine
.abs_line_number() - 1
2979 overline
= context
[0]
2980 title
= match
.string
2983 underline
= self
.state_machine
.next_line()
2985 blocktext
= overline
+ '\n' + title
2986 if len(overline
.rstrip()) < 4:
2987 self
.short_overline(context
, blocktext
, lineno
, 2)
2989 msg
= self
.reporter
.severe(
2990 'Incomplete section title.',
2991 nodes
.literal_block(blocktext
, blocktext
),
2994 return [], 'Body', []
2995 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2996 overline
= overline
.rstrip()
2997 underline
= underline
.rstrip()
2998 if not self
.transitions
['underline'][0].match(underline
):
2999 blocktext
= overline
+ '\n' + title
+ '\n' + underline
3000 if len(overline
.rstrip()) < 4:
3001 self
.short_overline(context
, blocktext
, lineno
, 2)
3003 msg
= self
.reporter
.severe(
3004 'Missing matching underline for section title overline.',
3005 nodes
.literal_block(source
, source
),
3008 return [], 'Body', []
3009 elif overline
!= underline
:
3010 blocktext
= overline
+ '\n' + title
+ '\n' + underline
3011 if len(overline
.rstrip()) < 4:
3012 self
.short_overline(context
, blocktext
, lineno
, 2)
3014 msg
= self
.reporter
.severe(
3015 'Title overline & underline mismatch.',
3016 nodes
.literal_block(source
, source
),
3019 return [], 'Body', []
3020 title
= title
.rstrip()
3022 if column_width(title
) > len(overline
):
3023 blocktext
= overline
+ '\n' + title
+ '\n' + underline
3024 if len(overline
.rstrip()) < 4:
3025 self
.short_overline(context
, blocktext
, lineno
, 2)
3027 msg
= self
.reporter
.warning(
3028 'Title overline too short.',
3029 nodes
.literal_block(source
, source
),
3031 messages
.append(msg
)
3032 style
= (overline
[0], underline
[0])
3033 self
.eofcheck
= 0 # @@@ not sure this is correct
3034 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
3036 return [], 'Body', []
3038 indent
= text
# indented title
3040 def underline(self
, match
, context
, next_state
):
3041 overline
= context
[0]
3042 blocktext
= overline
+ '\n' + self
.state_machine
.line
3043 lineno
= self
.state_machine
.abs_line_number() - 1
3044 if len(overline
.rstrip()) < 4:
3045 self
.short_overline(context
, blocktext
, lineno
, 1)
3046 msg
= self
.reporter
.error(
3047 'Invalid section title or transition marker.',
3048 nodes
.literal_block(blocktext
, blocktext
),
3051 return [], 'Body', []
3053 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
3054 msg
= self
.reporter
.info(
3055 'Possible incomplete section title.\nTreating the overline as '
3056 "ordinary text because it's so short.",
3059 self
.state_correction(context
, lines
)
3061 def state_correction(self
, context
, lines
=1):
3062 self
.state_machine
.previous_line(lines
)
3064 raise statemachine
.StateCorrection('Body', 'text')
3067 class QuotedLiteralBlock(RSTState
):
3070 Nested parse handler for quoted (unindented) literal blocks.
3072 Special-purpose. Not for inclusion in `state_classes`.
3075 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
3077 initial_transitions
= ('initial_quoted', 'text')
3079 def __init__(self
, state_machine
, debug
=False):
3080 RSTState
.__init
__(self
, state_machine
, debug
)
3082 self
.initial_lineno
= None
3084 def blank(self
, match
, context
, next_state
):
3088 return context
, next_state
, []
3090 def eof(self
, context
):
3092 src
, srcline
= self
.state_machine
.get_source_and_line(
3093 self
.initial_lineno
)
3094 text
= '\n'.join(context
)
3095 literal_block
= nodes
.literal_block(text
, text
)
3096 literal_block
.source
= src
3097 literal_block
.line
= srcline
3098 self
.parent
+= literal_block
3100 self
.parent
+= self
.reporter
.warning(
3101 'Literal block expected; none found.',
3102 line
=self
.state_machine
.abs_line_number()
3103 ) # src not available, statemachine.input_lines is empty
3104 self
.state_machine
.previous_line()
3105 self
.parent
+= self
.messages
3108 def indent(self
, match
, context
, next_state
):
3109 assert context
, ('QuotedLiteralBlock.indent: context should not '
3111 self
.messages
.append(
3112 self
.reporter
.error('Unexpected indentation.',
3113 line
=self
.state_machine
.abs_line_number()))
3114 self
.state_machine
.previous_line()
3117 def initial_quoted(self
, match
, context
, next_state
):
3118 """Match arbitrary quote character on the first line only."""
3119 self
.remove_transition('initial_quoted')
3120 quote
= match
.string
[0]
3121 pattern
= re
.compile(re
.escape(quote
))
3122 # New transition matches consistent quotes only:
3123 self
.add_transition('quoted',
3124 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3125 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3126 return [match
.string
], next_state
, []
3128 def quoted(self
, match
, context
, next_state
):
3129 """Match consistent quotes on subsequent lines."""
3130 context
.append(match
.string
)
3131 return context
, next_state
, []
3133 def text(self
, match
, context
, next_state
):
3135 self
.messages
.append(
3136 self
.reporter
.error('Inconsistent literal block quoting.',
3137 line
=self
.state_machine
.abs_line_number()))
3138 self
.state_machine
.previous_line()
3142 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3143 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3144 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3145 """Standard set of State classes used to start `RSTStateMachine`."""