2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
108 from types
import FunctionType
, MethodType
110 from docutils
import nodes
, statemachine
, utils
111 from docutils
import ApplicationError
, DataError
112 from docutils
.statemachine
import StateMachineWS
, StateWS
113 from docutils
.nodes
import fully_normalize_name
as normalize_name
114 from docutils
.nodes
import whitespace_normalize_name
115 import docutils
.parsers
.rst
116 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
117 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
118 from docutils
.utils
import escape2null
, unescape
, column_width
119 from docutils
.utils
import punctuation_chars
, roman
, urischemes
121 class MarkupError(DataError
): pass
122 class UnknownInterpretedRoleError(DataError
): pass
123 class InterpretedRoleNotImplementedError(DataError
): pass
124 class ParserError(ApplicationError
): pass
125 class MarkupMismatch(Exception): pass
130 """Stores data attributes for dotted-attribute access."""
132 def __init__(self
, **keywordargs
):
133 self
.__dict
__.update(keywordargs
)
136 class RSTStateMachine(StateMachineWS
):
139 reStructuredText's master StateMachine.
141 The entry point to reStructuredText parsing is the `run()` method.
144 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=True,
147 Parse `input_lines` and modify the `document` node in place.
149 Extend `StateMachineWS.run()`: set up parse-global data and
150 run the StateMachine.
152 self
.language
= languages
.get_language(
153 document
.settings
.language_code
)
154 self
.match_titles
= match_titles
157 inliner
.init_customizations(document
.settings
)
158 self
.memo
= Struct(document
=document
,
159 reporter
=document
.reporter
,
160 language
=self
.language
,
163 section_bubble_up_kludge
=False,
165 self
.document
= document
166 self
.attach_observer(document
.note_source
)
167 self
.reporter
= self
.memo
.reporter
169 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
170 input_source
=document
['source'])
171 assert results
== [], 'RSTStateMachine.run() results should be empty!'
172 self
.node
= self
.memo
= None # remove unneeded references
175 class NestedStateMachine(StateMachineWS
):
178 StateMachine run from within other StateMachine runs, to parse nested
182 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=True):
184 Parse `input_lines` and populate a `docutils.nodes.document` instance.
186 Extend `StateMachineWS.run()`: set up document-wide data.
188 self
.match_titles
= match_titles
190 self
.document
= memo
.document
191 self
.attach_observer(self
.document
.note_source
)
192 self
.reporter
= memo
.reporter
193 self
.language
= memo
.language
195 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
196 assert results
== [], ('NestedStateMachine.run() results should be '
201 class RSTState(StateWS
):
204 reStructuredText State superclass.
206 Contains methods used by all State subclasses.
209 nested_sm
= NestedStateMachine
212 def __init__(self
, state_machine
, debug
=False):
213 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
214 'initial_state': 'Body'}
215 StateWS
.__init
__(self
, state_machine
, debug
)
217 def runtime_init(self
):
218 StateWS
.runtime_init(self
)
219 memo
= self
.state_machine
.memo
221 self
.reporter
= memo
.reporter
222 self
.inliner
= memo
.inliner
223 self
.document
= memo
.document
224 self
.parent
= self
.state_machine
.node
225 # enable the reporter to determine source and source-line
226 if not hasattr(self
.reporter
, 'get_source_and_line'):
227 self
.reporter
.get_source_and_line
= self
.state_machine
.get_source_and_line
228 # print "adding get_source_and_line to reporter", self.state_machine.input_offset
231 def goto_line(self
, abs_line_offset
):
233 Jump to input line `abs_line_offset`, ignoring jumps past the end.
236 self
.state_machine
.goto_line(abs_line_offset
)
240 def no_match(self
, context
, transitions
):
242 Override `StateWS.no_match` to generate a system message.
244 This code should never be run.
246 self
.reporter
.severe(
247 'Internal error: no transition pattern match. State: "%s"; '
248 'transitions: %s; context: %s; current line: %r.'
249 % (self
.__class
__.__name
__, transitions
, context
,
250 self
.state_machine
.line
))
251 return context
, None, []
253 def bof(self
, context
):
254 """Called at beginning of file."""
257 def nested_parse(self
, block
, input_offset
, node
, match_titles
=False,
258 state_machine_class
=None, state_machine_kwargs
=None):
260 Create a new StateMachine rooted at `node` and run it over the input
264 if state_machine_class
is None:
265 state_machine_class
= self
.nested_sm
267 if state_machine_kwargs
is None:
268 state_machine_kwargs
= self
.nested_sm_kwargs
270 block_length
= len(block
)
275 state_machine
= self
.nested_sm_cache
.pop()
278 if not state_machine
:
279 state_machine
= state_machine_class(debug
=self
.debug
,
280 **state_machine_kwargs
)
281 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
282 node
=node
, match_titles
=match_titles
)
284 self
.nested_sm_cache
.append(state_machine
)
286 state_machine
.unlink()
287 new_offset
= state_machine
.abs_line_offset()
288 # No `block.parent` implies disconnected -- lines aren't in sync:
289 if block
.parent
and (len(block
) - block_length
) != 0:
290 # Adjustment for block if modified in nested parse:
291 self
.state_machine
.next_line(len(block
) - block_length
)
294 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
296 blank_finish_state
=None,
299 state_machine_class
=None,
300 state_machine_kwargs
=None):
302 Create a new StateMachine rooted at `node` and run it over the input
303 `block`. Also keep track of optional intermediate blank lines and the
306 if state_machine_class
is None:
307 state_machine_class
= self
.nested_sm
308 if state_machine_kwargs
is None:
309 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
310 state_machine_kwargs
['initial_state'] = initial_state
311 state_machine
= state_machine_class(debug
=self
.debug
,
312 **state_machine_kwargs
)
313 if blank_finish_state
is None:
314 blank_finish_state
= initial_state
315 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
316 for key
, value
in extra_settings
.items():
317 setattr(state_machine
.states
[initial_state
], key
, value
)
318 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
319 node
=node
, match_titles
=match_titles
)
320 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
321 state_machine
.unlink()
322 return state_machine
.abs_line_offset(), blank_finish
324 def section(self
, title
, source
, style
, lineno
, messages
):
325 """Check for a valid subsection and create one if it checks out."""
326 if self
.check_subsection(source
, style
, lineno
):
327 self
.new_subsection(title
, lineno
, messages
)
329 def check_subsection(self
, source
, style
, lineno
):
331 Check for a valid subsection header. Return 1 (true) or None (false).
333 When a new section is reached that isn't a subsection of the current
334 section, back up the line count (use ``previous_line(-x)``), then
335 ``raise EOFError``. The current StateMachine will finish, then the
336 calling StateMachine can re-examine the title. This will work its way
337 back up the calling chain until the correct section level isreached.
339 @@@ Alternative: Evaluate the title, store the title info & level, and
340 back up the chain until that level is reached. Store in memo? Or
343 :Exception: `EOFError` when a sibling or supersection encountered.
346 title_styles
= memo
.title_styles
347 mylevel
= memo
.section_level
348 try: # check for existing title style
349 level
= title_styles
.index(style
) + 1
350 except ValueError: # new title style
351 if len(title_styles
) == memo
.section_level
: # new subsection
352 title_styles
.append(style
)
354 else: # not at lowest level
355 self
.parent
+= self
.title_inconsistent(source
, lineno
)
357 if level
<= mylevel
: # sibling or supersection
358 memo
.section_level
= level
# bubble up to parent section
360 memo
.section_bubble_up_kludge
= True
361 # back up 2 lines for underline title, 3 for overline title
362 self
.state_machine
.previous_line(len(style
) + 1)
363 raise EOFError # let parent section re-evaluate
364 if level
== mylevel
+ 1: # immediate subsection
366 else: # invalid subsection
367 self
.parent
+= self
.title_inconsistent(source
, lineno
)
370 def title_inconsistent(self
, sourcetext
, lineno
):
371 error
= self
.reporter
.severe(
372 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
376 def new_subsection(self
, title
, lineno
, messages
):
377 """Append new subsection to document tree. On return, check level."""
379 mylevel
= memo
.section_level
380 memo
.section_level
+= 1
381 section_node
= nodes
.section()
382 self
.parent
+= section_node
383 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
384 titlenode
= nodes
.title(title
, '', *textnodes
)
385 name
= normalize_name(titlenode
.astext())
386 section_node
['names'].append(name
)
387 section_node
+= titlenode
388 section_node
+= messages
389 section_node
+= title_messages
390 self
.document
.note_implicit_target(section_node
, section_node
)
391 offset
= self
.state_machine
.line_offset
+ 1
392 absoffset
= self
.state_machine
.abs_line_offset() + 1
393 newabsoffset
= self
.nested_parse(
394 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
395 node
=section_node
, match_titles
=True)
396 self
.goto_line(newabsoffset
)
397 if memo
.section_level
<= mylevel
: # can't handle next section?
398 raise EOFError # bubble up to supersection
399 # reset section_level; next pass will detect it properly
400 memo
.section_level
= mylevel
402 def paragraph(self
, lines
, lineno
):
404 Return a list (paragraph & messages) & a boolean: literal_block next?
406 data
= '\n'.join(lines
).rstrip()
407 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
410 elif data
[-3] in ' \n':
411 text
= data
[:-3].rstrip()
418 textnodes
, messages
= self
.inline_text(text
, lineno
)
419 p
= nodes
.paragraph(data
, '', *textnodes
)
420 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
421 return [p
] + messages
, literalnext
423 def inline_text(self
, text
, lineno
):
425 Return 2 lists: nodes (text and inline elements), and system_messages.
427 return self
.inliner
.parse(text
, lineno
, self
.memo
, self
.parent
)
429 def unindent_warning(self
, node_name
):
430 # the actual problem is one line below the current line
431 lineno
= self
.state_machine
.abs_line_number()+1
432 return self
.reporter
.warning('%s ends without a blank line; '
433 'unexpected unindent.' % node_name
,
437 def build_regexp(definition
, compile=True):
439 Build, compile and return a regular expression based on `definition`.
441 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
442 where "parts" is a list of regular expressions and/or regular
443 expression definitions to be joined into an or-group.
445 name
, prefix
, suffix
, parts
= definition
448 if type(part
) is tuple:
449 part_strings
.append(build_regexp(part
, None))
451 part_strings
.append(part
)
452 or_group
= '|'.join(part_strings
)
453 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
455 return re
.compile(regexp
, re
.UNICODE
)
463 Parse inline markup; call the `parse()` method.
467 self
.implicit_dispatch
= [(self
.patterns
.uri
, self
.standalone_uri
),]
468 """List of (pattern, bound method) tuples, used by
469 `self.implicit_inline`."""
471 def init_customizations(self
, settings
):
472 """Setting-based customizations; run when parsing begins."""
473 if settings
.pep_references
:
474 self
.implicit_dispatch
.append((self
.patterns
.pep
,
476 if settings
.rfc_references
:
477 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
480 def parse(self
, text
, lineno
, memo
, parent
):
481 # Needs to be refactored for nested inline markup.
482 # Add nested_parse() method?
484 Return 2 lists: nodes (text and inline elements), and system_messages.
486 Using `self.patterns.initial`, a pattern which matches start-strings
487 (emphasis, strong, interpreted, phrase reference, literal,
488 substitution reference, and inline target) and complete constructs
489 (simple reference, footnote reference), search for a candidate. When
490 one is found, check for validity (e.g., not a quoted '*' character).
491 If valid, search for the corresponding end string if applicable, and
492 check it for validity. If not found or invalid, generate a warning
493 and ignore the start-string. Implicit inline markup (e.g. standalone
496 self
.reporter
= memo
.reporter
497 self
.document
= memo
.document
498 self
.language
= memo
.language
500 pattern_search
= self
.patterns
.initial
.search
501 dispatch
= self
.dispatch
502 remaining
= escape2null(text
)
507 match
= pattern_search(remaining
)
509 groups
= match
.groupdict()
510 method
= dispatch
[groups
['start'] or groups
['backquote']
511 or groups
['refend'] or groups
['fnend']]
512 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
514 unprocessed
.append(before
)
515 messages
+= sysmessages
517 processed
+= self
.implicit_inline(''.join(unprocessed
),
523 remaining
= ''.join(unprocessed
) + remaining
525 processed
+= self
.implicit_inline(remaining
, lineno
)
526 return processed
, messages
528 # Inline object recognition
529 # -------------------------
530 # lookahead and look-behind expressions for inline markup rules
531 start_string_prefix
= (u
'(^|(?<=\\s|[%s%s]))' %
532 (punctuation_chars
.openers
,
533 punctuation_chars
.delimiters
))
534 end_string_suffix
= (u
'($|(?=\\s|[\x00%s%s%s]))' %
535 (punctuation_chars
.closing_delimiters
,
536 punctuation_chars
.delimiters
,
537 punctuation_chars
.closers
))
538 # print start_string_prefix.encode('utf8')
539 # TODO: support non-ASCII whitespace in the following 4 patterns?
540 non_whitespace_before
= r
'(?<![ \n])'
541 non_whitespace_escape_before
= r
'(?<![ \n\x00])'
542 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[ \n\x00])'
543 non_whitespace_after
= r
'(?![ \n])'
544 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
545 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
546 # Valid URI characters (see RFC 2396 & RFC 2732);
547 # final \x00 allows backslash escapes in URIs:
548 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
549 # Delimiter indicating the end of a URI (not part of the URI):
550 uri_end_delim
= r
"""[>]"""
551 # Last URI character; same as uric but no punctuation:
552 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
553 # End of a URI (either 'urilast' or 'uric followed by a
555 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
556 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
558 %(emailc)s+(?:\.%(emailc)s+)* # name
560 %(emailc)s+(?:\.%(emailc)s*)* # host
561 %(uri_end)s # final URI char
563 parts
= ('initial_inline', start_string_prefix
, '',
564 [('start', '', non_whitespace_after
, # simple start-strings
566 r
'\*(?!\*)', # emphasis but not strong
568 r
'_`', # inline internal target
569 r
'\|(?!\|)'] # substitution reference
571 ('whole', '', end_string_suffix
, # whole constructs
572 [# reference name & end-string
573 r
'(?P<refname>%s)(?P<refend>__?)' % simplename
,
574 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
575 [r
'[0-9]+', # manually numbered
576 r
'\#(%s)?' % simplename
, # auto-numbered (w/ label?)
578 r
'(?P<citationlabel>%s)' % simplename
] # citation reference
582 ('backquote', # interpreted text or phrase reference
583 '(?P<role>(:%s:)?)' % simplename
, # optional role
584 non_whitespace_after
,
585 ['`(?!`)'] # but not literal
590 initial
=build_regexp(parts
),
591 emphasis
=re
.compile(non_whitespace_escape_before
592 + r
'(\*)' + end_string_suffix
, re
.UNICODE
),
593 strong
=re
.compile(non_whitespace_escape_before
594 + r
'(\*\*)' + end_string_suffix
, re
.UNICODE
),
595 interpreted_or_phrase_ref
=re
.compile(
597 %(non_unescaped_whitespace_escape_before)s
601 (?P<role>:%(simplename)s:)?
605 %(end_string_suffix)s
606 """ % locals(), re
.VERBOSE | re
.UNICODE
),
607 embedded_link
=re
.compile(
610 (?:[ \n]+|^) # spaces or beginning of line/string
612 %(non_whitespace_after)s
613 ([^<>\x00]+(\x00_)?) # anything but angle brackets & nulls
614 # except escaped trailing low line
615 %(non_whitespace_before)s
616 > # close bracket w/o whitespace before
619 """ % locals(), re
.VERBOSE | re
.UNICODE
),
620 literal
=re
.compile(non_whitespace_before
+ '(``)'
621 + end_string_suffix
),
622 target
=re
.compile(non_whitespace_escape_before
623 + r
'(`)' + end_string_suffix
),
624 substitution_ref
=re
.compile(non_whitespace_escape_before
626 + end_string_suffix
),
627 email
=re
.compile(email_pattern
% locals() + '$',
628 re
.VERBOSE | re
.UNICODE
),
631 %(start_string_prefix)s
633 (?P<absolute> # absolute URI
634 (?P<scheme> # scheme (http, ftp, mailto)
635 [a-zA-Z][a-zA-Z0-9.+-]*
640 (//?)? # hierarchical URI
641 %(uric)s* # URI characters
642 %(uri_end)s # final URI char
648 ( # optional fragment
655 (?P<email> # email address
656 """ + email_pattern
+ r
"""
659 %(end_string_suffix)s
660 """) % locals(), re
.VERBOSE | re
.UNICODE
),
663 %(start_string_prefix)s
665 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
667 (PEP\s+(?P<pepnum2>\d+)) # reference by name
669 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
),
672 %(start_string_prefix)s
673 (RFC(-|\s+)?(?P<rfcnum>\d+))
674 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
))
676 def quoted_start(self
, match
):
677 """Test if inline markup start-string is 'quoted'.
679 'Quoted' in this context means the start-string is enclosed in a pair
680 of matching opening/closing delimiters (not necessarily quotes)
681 or at the end of the match.
683 string
= match
.string
684 start
= match
.start()
685 if start
== 0: # start-string at beginning of text
687 prestart
= string
[start
- 1]
689 poststart
= string
[match
.end()]
690 except IndexError: # start-string at end of text
691 return True # not "quoted" but no markup start-string either
692 return punctuation_chars
.match_chars(prestart
, poststart
)
694 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
695 restore_backslashes
=False):
696 string
= match
.string
697 matchstart
= match
.start('start')
698 matchend
= match
.end('start')
699 if self
.quoted_start(match
):
700 return (string
[:matchend
], [], string
[matchend
:], [], '')
701 endmatch
= end_pattern
.search(string
[matchend
:])
702 if endmatch
and endmatch
.start(1): # 1 or more chars
703 text
= unescape(endmatch
.string
[:endmatch
.start(1)],
705 textend
= matchend
+ endmatch
.end(1)
706 rawsource
= unescape(string
[matchstart
:textend
], 1)
707 return (string
[:matchstart
], [nodeclass(rawsource
, text
)],
708 string
[textend
:], [], endmatch
.group(1))
709 msg
= self
.reporter
.warning(
710 'Inline %s start-string without end-string.'
711 % nodeclass
.__name
__, line
=lineno
)
712 text
= unescape(string
[matchstart
:matchend
], 1)
713 rawsource
= unescape(string
[matchstart
:matchend
], 1)
714 prb
= self
.problematic(text
, rawsource
, msg
)
715 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
717 def problematic(self
, text
, rawsource
, message
):
718 msgid
= self
.document
.set_id(message
, self
.parent
)
719 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
720 prbid
= self
.document
.set_id(problematic
)
721 message
.add_backref(prbid
)
724 def emphasis(self
, match
, lineno
):
725 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
726 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
727 return before
, inlines
, remaining
, sysmessages
729 def strong(self
, match
, lineno
):
730 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
731 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
732 return before
, inlines
, remaining
, sysmessages
734 def interpreted_or_phrase_ref(self
, match
, lineno
):
735 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
736 string
= match
.string
737 matchstart
= match
.start('backquote')
738 matchend
= match
.end('backquote')
739 rolestart
= match
.start('role')
740 role
= match
.group('role')
745 elif self
.quoted_start(match
):
746 return (string
[:matchend
], [], string
[matchend
:], [])
747 endmatch
= end_pattern
.search(string
[matchend
:])
748 if endmatch
and endmatch
.start(1): # 1 or more chars
749 textend
= matchend
+ endmatch
.end()
750 if endmatch
.group('role'):
752 msg
= self
.reporter
.warning(
753 'Multiple roles in interpreted text (both '
754 'prefix and suffix present; only one allowed).',
756 text
= unescape(string
[rolestart
:textend
], 1)
757 prb
= self
.problematic(text
, text
, msg
)
758 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
759 role
= endmatch
.group('suffix')[1:-1]
761 escaped
= endmatch
.string
[:endmatch
.start(1)]
762 rawsource
= unescape(string
[matchstart
:textend
], 1)
763 if rawsource
[-1:] == '_':
765 msg
= self
.reporter
.warning(
766 'Mismatch: both interpreted text role %s and '
767 'reference suffix.' % position
, line
=lineno
)
768 text
= unescape(string
[rolestart
:textend
], 1)
769 prb
= self
.problematic(text
, text
, msg
)
770 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
771 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
772 rawsource
, escaped
, unescape(escaped
))
774 rawsource
= unescape(string
[rolestart
:textend
], 1)
775 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
777 return (string
[:rolestart
], nodelist
,
778 string
[textend
:], messages
)
779 msg
= self
.reporter
.warning(
780 'Inline interpreted text or phrase reference start-string '
781 'without end-string.', line
=lineno
)
782 text
= unescape(string
[matchstart
:matchend
], 1)
783 prb
= self
.problematic(text
, text
, msg
)
784 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
786 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
787 match
= self
.patterns
.embedded_link
.search(escaped
)
788 if match
: # embedded <URI> or <alias_>
789 text
= unescape(escaped
[:match
.start(0)])
790 aliastext
= unescape(match
.group(2), restore_backslashes
=True)
791 if aliastext
.endswith('_') and not (aliastext
.endswith(r
'\_')
792 or self
.patterns
.uri
.match(aliastext
)):
794 alias
= normalize_name(aliastext
[:-1])
795 target
= nodes
.target(match
.group(1), refname
=alias
)
796 target
.indirect_reference_name
= aliastext
[:-1]
799 alias
= ''.join(aliastext
.split())
800 alias
= self
.adjust_uri(alias
)
801 if alias
.endswith(r
'\_'):
802 alias
= alias
[:-2] + '_'
803 target
= nodes
.target(match
.group(1), refuri
=alias
)
804 target
.referenced
= 1
806 raise ApplicationError('problem with embedded link: %r'
813 refname
= normalize_name(text
)
814 reference
= nodes
.reference(rawsource
, text
,
815 name
=whitespace_normalize_name(text
))
816 node_list
= [reference
]
818 if rawsource
[-2:] == '__':
819 if target
and (aliastype
== 'name'):
820 reference
['refname'] = alias
821 self
.document
.note_refname(reference
)
822 # self.document.note_indirect_target(target) # required?
823 elif target
and (aliastype
== 'uri'):
824 reference
['refuri'] = alias
826 reference
['anonymous'] = 1
829 target
['names'].append(refname
)
830 if aliastype
== 'name':
831 reference
['refname'] = alias
832 self
.document
.note_indirect_target(target
)
833 self
.document
.note_refname(reference
)
835 reference
['refuri'] = alias
836 self
.document
.note_explicit_target(target
, self
.parent
)
837 # target.note_referenced_by(name=refname)
838 node_list
.append(target
)
840 reference
['refname'] = refname
841 self
.document
.note_refname(reference
)
842 return before
, node_list
, after
, []
845 def adjust_uri(self
, uri
):
846 match
= self
.patterns
.email
.match(uri
)
848 return 'mailto:' + uri
852 def interpreted(self
, rawsource
, text
, role
, lineno
):
853 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
856 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
857 return nodes
, messages
+ messages2
859 msg
= self
.reporter
.error(
860 'Unknown interpreted text role "%s".' % role
,
862 return ([self
.problematic(rawsource
, rawsource
, msg
)],
865 def literal(self
, match
, lineno
):
866 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
867 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
868 restore_backslashes
=True)
869 return before
, inlines
, remaining
, sysmessages
871 def inline_internal_target(self
, match
, lineno
):
872 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
873 match
, lineno
, self
.patterns
.target
, nodes
.target
)
874 if inlines
and isinstance(inlines
[0], nodes
.target
):
875 assert len(inlines
) == 1
877 name
= normalize_name(target
.astext())
878 target
['names'].append(name
)
879 self
.document
.note_explicit_target(target
, self
.parent
)
880 return before
, inlines
, remaining
, sysmessages
882 def substitution_reference(self
, match
, lineno
):
883 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
884 match
, lineno
, self
.patterns
.substitution_ref
,
885 nodes
.substitution_reference
)
886 if len(inlines
) == 1:
887 subref_node
= inlines
[0]
888 if isinstance(subref_node
, nodes
.substitution_reference
):
889 subref_text
= subref_node
.astext()
890 self
.document
.note_substitution_ref(subref_node
, subref_text
)
891 if endstring
[-1:] == '_':
892 reference_node
= nodes
.reference(
893 '|%s%s' % (subref_text
, endstring
), '')
894 if endstring
[-2:] == '__':
895 reference_node
['anonymous'] = 1
897 reference_node
['refname'] = normalize_name(subref_text
)
898 self
.document
.note_refname(reference_node
)
899 reference_node
+= subref_node
900 inlines
= [reference_node
]
901 return before
, inlines
, remaining
, sysmessages
903 def footnote_reference(self
, match
, lineno
):
905 Handles `nodes.footnote_reference` and `nodes.citation_reference`
908 label
= match
.group('footnotelabel')
909 refname
= normalize_name(label
)
910 string
= match
.string
911 before
= string
[:match
.start('whole')]
912 remaining
= string
[match
.end('whole'):]
913 if match
.group('citationlabel'):
914 refnode
= nodes
.citation_reference('[%s]_' % label
,
916 refnode
+= nodes
.Text(label
)
917 self
.document
.note_citation_ref(refnode
)
919 refnode
= nodes
.footnote_reference('[%s]_' % label
)
920 if refname
[0] == '#':
921 refname
= refname
[1:]
923 self
.document
.note_autofootnote_ref(refnode
)
926 refnode
['auto'] = '*'
927 self
.document
.note_symbol_footnote_ref(
930 refnode
+= nodes
.Text(label
)
932 refnode
['refname'] = refname
933 self
.document
.note_footnote_ref(refnode
)
934 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
935 before
= before
.rstrip()
936 return (before
, [refnode
], remaining
, [])
938 def reference(self
, match
, lineno
, anonymous
=False):
939 referencename
= match
.group('refname')
940 refname
= normalize_name(referencename
)
941 referencenode
= nodes
.reference(
942 referencename
+ match
.group('refend'), referencename
,
943 name
=whitespace_normalize_name(referencename
))
945 referencenode
['anonymous'] = 1
947 referencenode
['refname'] = refname
948 self
.document
.note_refname(referencenode
)
949 string
= match
.string
950 matchstart
= match
.start('whole')
951 matchend
= match
.end('whole')
952 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
954 def anonymous_reference(self
, match
, lineno
):
955 return self
.reference(match
, lineno
, anonymous
=1)
957 def standalone_uri(self
, match
, lineno
):
958 if (not match
.group('scheme')
959 or match
.group('scheme').lower() in urischemes
.schemes
):
960 if match
.group('email'):
961 addscheme
= 'mailto:'
964 text
= match
.group('whole')
965 unescaped
= unescape(text
, 0)
966 return [nodes
.reference(unescape(text
, 1), unescaped
,
967 refuri
=addscheme
+ unescaped
)]
968 else: # not a valid scheme
971 def pep_reference(self
, match
, lineno
):
972 text
= match
.group(0)
973 if text
.startswith('pep-'):
974 pepnum
= int(match
.group('pepnum1'))
975 elif text
.startswith('PEP'):
976 pepnum
= int(match
.group('pepnum2'))
979 ref
= (self
.document
.settings
.pep_base_url
980 + self
.document
.settings
.pep_file_url_template
% pepnum
)
981 unescaped
= unescape(text
, 0)
982 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
984 rfc_url
= 'rfc%d.html'
986 def rfc_reference(self
, match
, lineno
):
987 text
= match
.group(0)
988 if text
.startswith('RFC'):
989 rfcnum
= int(match
.group('rfcnum'))
990 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
993 unescaped
= unescape(text
, 0)
994 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
996 def implicit_inline(self
, text
, lineno
):
998 Check each of the patterns in `self.implicit_dispatch` for a match,
999 and dispatch to the stored method for the pattern. Recursively check
1000 the text before and after the match. Return a list of `nodes.Text`
1001 and inline element nodes.
1005 for pattern
, method
in self
.implicit_dispatch
:
1006 match
= pattern
.search(text
)
1009 # Must recurse on strings before *and* after the match;
1010 # there may be multiple patterns.
1011 return (self
.implicit_inline(text
[:match
.start()], lineno
)
1012 + method(match
, lineno
) +
1013 self
.implicit_inline(text
[match
.end():], lineno
))
1014 except MarkupMismatch
:
1016 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, 1))]
1018 dispatch
= {'*': emphasis
,
1020 '`': interpreted_or_phrase_ref
,
1022 '_`': inline_internal_target
,
1023 ']_': footnote_reference
,
1024 '|': substitution_reference
,
1026 '__': anonymous_reference
}
1029 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1030 return ord(s
) - _zero
1032 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1033 return ord(s
) - _zero
1035 def _lowerroman_to_int(s
):
1036 return roman
.fromRoman(s
.upper())
1039 class Body(RSTState
):
1042 Generic classifier of the first line of a block.
1045 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1046 """Padding character for East Asian double-width text."""
1049 """Enumerated list parsing information."""
1052 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1053 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1054 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1055 enum
.formats
= enum
.formatinfo
.keys()
1056 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1057 'lowerroman', 'upperroman'] # ORDERED!
1058 enum
.sequencepats
= {'arabic': '[0-9]+',
1059 'loweralpha': '[a-z]',
1060 'upperalpha': '[A-Z]',
1061 'lowerroman': '[ivxlcdm]+',
1062 'upperroman': '[IVXLCDM]+',}
1063 enum
.converters
= {'arabic': int,
1064 'loweralpha': _loweralpha_to_int
,
1065 'upperalpha': _upperalpha_to_int
,
1066 'lowerroman': _lowerroman_to_int
,
1067 'upperroman': roman
.fromRoman
}
1069 enum
.sequenceregexps
= {}
1070 for sequence
in enum
.sequences
:
1071 enum
.sequenceregexps
[sequence
] = re
.compile(
1072 enum
.sequencepats
[sequence
] + '$', re
.UNICODE
)
1074 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1075 """Matches the top (& bottom) of a full table)."""
1077 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1078 """Matches the top of a simple table."""
1080 simple_table_border_pat
= re
.compile('=+[ =]*$')
1081 """Matches the bottom & header bottom of a simple table."""
1084 """Fragments of patterns used by transitions."""
1086 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1087 pats
['alpha'] = '[a-zA-Z]'
1088 pats
['alphanum'] = '[a-zA-Z0-9]'
1089 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1090 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1091 '|%(upperroman)s|#)' % enum
.sequencepats
)
1092 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1093 # @@@ Loosen up the pattern? Allow Unicode?
1094 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1095 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1096 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1097 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1099 for format
in enum
.formats
:
1100 pats
[format
] = '(?P<%s>%s%s%s)' % (
1101 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1102 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1105 'bullet': u
'[-+*\u2022\u2023\u2043]( +|$)',
1106 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1107 'field_marker': r
':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1108 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1109 'doctest': r
'>>>( +|$)',
1110 'line_block': r
'\|( +|$)',
1111 'grid_table_top': grid_table_top_pat
,
1112 'simple_table_top': simple_table_top_pat
,
1113 'explicit_markup': r
'\.\.( +|$)',
1114 'anonymous': r
'__( +|$)',
1115 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1117 initial_transitions
= (
1131 def indent(self
, match
, context
, next_state
):
1133 indented
, indent
, line_offset
, blank_finish
= \
1134 self
.state_machine
.get_indented()
1135 elements
= self
.block_quote(indented
, line_offset
)
1136 self
.parent
+= elements
1137 if not blank_finish
:
1138 self
.parent
+= self
.unindent_warning('Block quote')
1139 return context
, next_state
, []
1141 def block_quote(self
, indented
, line_offset
):
1148 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1149 blockquote
= nodes
.block_quote()
1150 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1151 elements
.append(blockquote
)
1152 if attribution_lines
:
1153 attribution
, messages
= self
.parse_attribution(
1154 attribution_lines
, attribution_offset
)
1155 blockquote
+= attribution
1156 elements
+= messages
1157 line_offset
= new_line_offset
1158 while indented
and not indented
[0]:
1159 indented
= indented
[1:]
1163 # U+2014 is an em-dash:
1164 attribution_pattern
= re
.compile(u
'(---?(?!-)|\u2014) *(?=[^ \\n])',
1167 def split_attribution(self
, indented
, line_offset
):
1169 Check for a block quote attribution and split it off:
1171 * First line after a blank line must begin with a dash ("--", "---",
1172 em-dash; matches `self.attribution_pattern`).
1173 * Every line after that must have consistent indentation.
1174 * Attributions must be preceded by block quote content.
1176 Return a tuple of: (block quote content lines, content offset,
1177 attribution lines, attribution offset, remaining indented lines).
1180 nonblank_seen
= False
1181 for i
in range(len(indented
)):
1182 line
= indented
[i
].rstrip()
1184 if nonblank_seen
and blank
== i
- 1: # last line blank
1185 match
= self
.attribution_pattern
.match(line
)
1187 attribution_end
, indent
= self
.check_attribution(
1190 a_lines
= indented
[i
:attribution_end
]
1191 a_lines
.trim_left(match
.end(), end
=1)
1192 a_lines
.trim_left(indent
, start
=1)
1193 return (indented
[:i
], a_lines
,
1194 i
, indented
[attribution_end
:],
1195 line_offset
+ attribution_end
)
1196 nonblank_seen
= True
1200 return (indented
, None, None, None, None)
1202 def check_attribution(self
, indented
, attribution_start
):
1204 Check attribution shape.
1205 Return the index past the end of the attribution, and the indent.
1208 i
= attribution_start
+ 1
1209 for i
in range(attribution_start
+ 1, len(indented
)):
1210 line
= indented
[i
].rstrip()
1214 indent
= len(line
) - len(line
.lstrip())
1215 elif len(line
) - len(line
.lstrip()) != indent
:
1216 return None, None # bad shape; not an attribution
1218 # return index of line after last attribution line:
1220 return i
, (indent
or 0)
1222 def parse_attribution(self
, indented
, line_offset
):
1223 text
= '\n'.join(indented
).rstrip()
1224 lineno
= self
.state_machine
.abs_line_number() + line_offset
1225 textnodes
, messages
= self
.inline_text(text
, lineno
)
1226 node
= nodes
.attribution(text
, '', *textnodes
)
1227 node
.source
, node
.line
= self
.state_machine
.get_source_and_line(lineno
)
1228 return node
, messages
1230 def bullet(self
, match
, context
, next_state
):
1231 """Bullet list item."""
1232 bulletlist
= nodes
.bullet_list()
1233 self
.parent
+= bulletlist
1234 bulletlist
['bullet'] = match
.string
[0]
1235 i
, blank_finish
= self
.list_item(match
.end())
1237 offset
= self
.state_machine
.line_offset
+ 1 # next line
1238 new_line_offset
, blank_finish
= self
.nested_list_parse(
1239 self
.state_machine
.input_lines
[offset
:],
1240 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1241 node
=bulletlist
, initial_state
='BulletList',
1242 blank_finish
=blank_finish
)
1243 self
.goto_line(new_line_offset
)
1244 if not blank_finish
:
1245 self
.parent
+= self
.unindent_warning('Bullet list')
1246 return [], next_state
, []
1248 def list_item(self
, indent
):
1249 if self
.state_machine
.line
[indent
:]:
1250 indented
, line_offset
, blank_finish
= (
1251 self
.state_machine
.get_known_indented(indent
))
1253 indented
, indent
, line_offset
, blank_finish
= (
1254 self
.state_machine
.get_first_known_indented(indent
))
1255 listitem
= nodes
.list_item('\n'.join(indented
))
1257 self
.nested_parse(indented
, input_offset
=line_offset
,
1259 return listitem
, blank_finish
1261 def enumerator(self
, match
, context
, next_state
):
1262 """Enumerated List Item"""
1263 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1264 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1265 raise statemachine
.TransitionCorrection('text')
1266 enumlist
= nodes
.enumerated_list()
1267 self
.parent
+= enumlist
1269 enumlist
['enumtype'] = 'arabic'
1271 enumlist
['enumtype'] = sequence
1272 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1273 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1275 enumlist
['start'] = ordinal
1276 msg
= self
.reporter
.info(
1277 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1280 listitem
, blank_finish
= self
.list_item(match
.end())
1281 enumlist
+= listitem
1282 offset
= self
.state_machine
.line_offset
+ 1 # next line
1283 newline_offset
, blank_finish
= self
.nested_list_parse(
1284 self
.state_machine
.input_lines
[offset
:],
1285 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1286 node
=enumlist
, initial_state
='EnumeratedList',
1287 blank_finish
=blank_finish
,
1288 extra_settings
={'lastordinal': ordinal
,
1290 'auto': sequence
== '#'})
1291 self
.goto_line(newline_offset
)
1292 if not blank_finish
:
1293 self
.parent
+= self
.unindent_warning('Enumerated list')
1294 return [], next_state
, []
1296 def parse_enumerator(self
, match
, expected_sequence
=None):
1298 Analyze an enumerator and return the results.
1301 - the enumerator format ('period', 'parens', or 'rparen'),
1302 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1303 - the text of the enumerator, stripped of formatting, and
1304 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1305 ``None`` is returned for invalid enumerator text).
1307 The enumerator format has already been determined by the regular
1308 expression match. If `expected_sequence` is given, that sequence is
1309 tried first. If not, we check for Roman numeral 1. This way,
1310 single-character Roman numerals (which are also alphabetical) can be
1311 matched. If no sequence has been matched, all sequences are checked in
1314 groupdict
= match
.groupdict()
1316 for format
in self
.enum
.formats
:
1317 if groupdict
[format
]: # was this the format matched?
1318 break # yes; keep `format`
1319 else: # shouldn't happen
1320 raise ParserError('enumerator format not matched')
1321 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1322 :self
.enum
.formatinfo
[format
].end
]
1325 elif expected_sequence
:
1327 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1328 sequence
= expected_sequence
1329 except KeyError: # shouldn't happen
1330 raise ParserError('unknown enumerator sequence: %s'
1333 sequence
= 'lowerroman'
1335 sequence
= 'upperroman'
1337 for sequence
in self
.enum
.sequences
:
1338 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1340 else: # shouldn't happen
1341 raise ParserError('enumerator sequence not matched')
1346 ordinal
= self
.enum
.converters
[sequence
](text
)
1347 except roman
.InvalidRomanNumeralError
:
1349 return format
, sequence
, text
, ordinal
1351 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1353 Check validity based on the ordinal value and the second line.
1355 Return true if the ordinal is valid and the second line is blank,
1356 indented, or starts with the next enumerator or an auto-enumerator.
1361 next_line
= self
.state_machine
.next_line()
1362 except EOFError: # end of input lines
1363 self
.state_machine
.previous_line()
1366 self
.state_machine
.previous_line()
1367 if not next_line
[:1].strip(): # blank or indented
1369 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1371 next_enumerator
, auto_enumerator
= result
1373 if ( next_line
.startswith(next_enumerator
) or
1374 next_line
.startswith(auto_enumerator
) ):
1380 def make_enumerator(self
, ordinal
, sequence
, format
):
1382 Construct and return the next enumerated list item marker, and an
1383 auto-enumerator ("#" instead of the regular enumerator).
1385 Return ``None`` for invalid (out of range) ordinals.
1389 elif sequence
== 'arabic':
1390 enumerator
= str(ordinal
)
1392 if sequence
.endswith('alpha'):
1395 enumerator
= chr(ordinal
+ ord('a') - 1)
1396 elif sequence
.endswith('roman'):
1398 enumerator
= roman
.toRoman(ordinal
)
1399 except roman
.RomanError
:
1401 else: # shouldn't happen
1402 raise ParserError('unknown enumerator sequence: "%s"'
1404 if sequence
.startswith('lower'):
1405 enumerator
= enumerator
.lower()
1406 elif sequence
.startswith('upper'):
1407 enumerator
= enumerator
.upper()
1408 else: # shouldn't happen
1409 raise ParserError('unknown enumerator sequence: "%s"'
1411 formatinfo
= self
.enum
.formatinfo
[format
]
1412 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1414 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1415 return next_enumerator
, auto_enumerator
1417 def field_marker(self
, match
, context
, next_state
):
1418 """Field list item."""
1419 field_list
= nodes
.field_list()
1420 self
.parent
+= field_list
1421 field
, blank_finish
= self
.field(match
)
1423 offset
= self
.state_machine
.line_offset
+ 1 # next line
1424 newline_offset
, blank_finish
= self
.nested_list_parse(
1425 self
.state_machine
.input_lines
[offset
:],
1426 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1427 node
=field_list
, initial_state
='FieldList',
1428 blank_finish
=blank_finish
)
1429 self
.goto_line(newline_offset
)
1430 if not blank_finish
:
1431 self
.parent
+= self
.unindent_warning('Field list')
1432 return [], next_state
, []
1434 def field(self
, match
):
1435 name
= self
.parse_field_marker(match
)
1436 src
, srcline
= self
.state_machine
.get_source_and_line()
1437 lineno
= self
.state_machine
.abs_line_number()
1438 indented
, indent
, line_offset
, blank_finish
= \
1439 self
.state_machine
.get_first_known_indented(match
.end())
1440 field_node
= nodes
.field()
1441 field_node
.source
= src
1442 field_node
.line
= srcline
1443 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1444 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1445 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1446 field_node
+= field_body
1448 self
.parse_field_body(indented
, line_offset
, field_body
)
1449 return field_node
, blank_finish
1451 def parse_field_marker(self
, match
):
1452 """Extract & return field name from a field marker match."""
1453 field
= match
.group()[1:] # strip off leading ':'
1454 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1457 def parse_field_body(self
, indented
, offset
, node
):
1458 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1460 def option_marker(self
, match
, context
, next_state
):
1461 """Option list item."""
1462 optionlist
= nodes
.option_list()
1464 listitem
, blank_finish
= self
.option_list_item(match
)
1465 except MarkupError
, error
:
1466 # This shouldn't happen; pattern won't match.
1467 msg
= self
.reporter
.error(u
'Invalid option list marker: %s' %
1470 indented
, indent
, line_offset
, blank_finish
= \
1471 self
.state_machine
.get_first_known_indented(match
.end())
1472 elements
= self
.block_quote(indented
, line_offset
)
1473 self
.parent
+= elements
1474 if not blank_finish
:
1475 self
.parent
+= self
.unindent_warning('Option list')
1476 return [], next_state
, []
1477 self
.parent
+= optionlist
1478 optionlist
+= listitem
1479 offset
= self
.state_machine
.line_offset
+ 1 # next line
1480 newline_offset
, blank_finish
= self
.nested_list_parse(
1481 self
.state_machine
.input_lines
[offset
:],
1482 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1483 node
=optionlist
, initial_state
='OptionList',
1484 blank_finish
=blank_finish
)
1485 self
.goto_line(newline_offset
)
1486 if not blank_finish
:
1487 self
.parent
+= self
.unindent_warning('Option list')
1488 return [], next_state
, []
1490 def option_list_item(self
, match
):
1491 offset
= self
.state_machine
.abs_line_offset()
1492 options
= self
.parse_option_marker(match
)
1493 indented
, indent
, line_offset
, blank_finish
= \
1494 self
.state_machine
.get_first_known_indented(match
.end())
1495 if not indented
: # not an option list item
1496 self
.goto_line(offset
)
1497 raise statemachine
.TransitionCorrection('text')
1498 option_group
= nodes
.option_group('', *options
)
1499 description
= nodes
.description('\n'.join(indented
))
1500 option_list_item
= nodes
.option_list_item('', option_group
,
1503 self
.nested_parse(indented
, input_offset
=line_offset
,
1505 return option_list_item
, blank_finish
1507 def parse_option_marker(self
, match
):
1509 Return a list of `node.option` and `node.option_argument` objects,
1510 parsed from an option marker match.
1512 :Exception: `MarkupError` for invalid option markers.
1515 optionstrings
= match
.group().rstrip().split(', ')
1516 for optionstring
in optionstrings
:
1517 tokens
= optionstring
.split()
1519 firstopt
= tokens
[0].split('=', 1)
1520 if len(firstopt
) > 1:
1521 # "--opt=value" form
1522 tokens
[:1] = firstopt
1524 elif (len(tokens
[0]) > 2
1525 and ((tokens
[0].startswith('-')
1526 and not tokens
[0].startswith('--'))
1527 or tokens
[0].startswith('+'))):
1529 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1531 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1532 and tokens
[-1].endswith('>')):
1533 # "-o <value1 value2>" form; join all values into one token
1534 tokens
[1:] = [' '.join(tokens
[1:])]
1535 if 0 < len(tokens
) <= 2:
1536 option
= nodes
.option(optionstring
)
1537 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1539 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1540 delimiter
=delimiter
)
1541 optlist
.append(option
)
1544 'wrong number of option tokens (=%s), should be 1 or 2: '
1545 '"%s"' % (len(tokens
), optionstring
))
1548 def doctest(self
, match
, context
, next_state
):
1549 data
= '\n'.join(self
.state_machine
.get_text_block())
1550 # TODO: prepend class value ['pycon'] (Python Console)
1551 # parse with `directives.body.CodeBlock` (returns literal-block
1552 # with class "code" and syntax highlight markup).
1553 self
.parent
+= nodes
.doctest_block(data
, data
)
1554 return [], next_state
, []
1556 def line_block(self
, match
, context
, next_state
):
1557 """First line of a line block."""
1558 block
= nodes
.line_block()
1559 self
.parent
+= block
1560 lineno
= self
.state_machine
.abs_line_number()
1561 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1563 self
.parent
+= messages
1564 if not blank_finish
:
1565 offset
= self
.state_machine
.line_offset
+ 1 # next line
1566 new_line_offset
, blank_finish
= self
.nested_list_parse(
1567 self
.state_machine
.input_lines
[offset
:],
1568 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1569 node
=block
, initial_state
='LineBlock',
1571 self
.goto_line(new_line_offset
)
1572 if not blank_finish
:
1573 self
.parent
+= self
.reporter
.warning(
1574 'Line block ends without a blank line.',
1577 if block
[0].indent
is None:
1579 self
.nest_line_block_lines(block
)
1580 return [], next_state
, []
1582 def line_block_line(self
, match
, lineno
):
1583 """Return one line element of a line_block."""
1584 indented
, indent
, line_offset
, blank_finish
= \
1585 self
.state_machine
.get_first_known_indented(match
.end(),
1587 text
= u
'\n'.join(indented
)
1588 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1589 line
= nodes
.line(text
, '', *text_nodes
)
1590 if match
.string
.rstrip() != '|': # not empty
1591 line
.indent
= len(match
.group(1)) - 1
1592 return line
, messages
, blank_finish
1594 def nest_line_block_lines(self
, block
):
1595 for index
in range(1, len(block
)):
1596 if getattr(block
[index
], 'indent', None) is None:
1597 block
[index
].indent
= block
[index
- 1].indent
1598 self
.nest_line_block_segment(block
)
1600 def nest_line_block_segment(self
, block
):
1601 indents
= [item
.indent
for item
in block
]
1602 least
= min(indents
)
1604 new_block
= nodes
.line_block()
1606 if item
.indent
> least
:
1607 new_block
.append(item
)
1610 self
.nest_line_block_segment(new_block
)
1611 new_items
.append(new_block
)
1612 new_block
= nodes
.line_block()
1613 new_items
.append(item
)
1615 self
.nest_line_block_segment(new_block
)
1616 new_items
.append(new_block
)
1617 block
[:] = new_items
1619 def grid_table_top(self
, match
, context
, next_state
):
1620 """Top border of a full table."""
1621 return self
.table_top(match
, context
, next_state
,
1622 self
.isolate_grid_table
,
1623 tableparser
.GridTableParser
)
1625 def simple_table_top(self
, match
, context
, next_state
):
1626 """Top border of a simple table."""
1627 return self
.table_top(match
, context
, next_state
,
1628 self
.isolate_simple_table
,
1629 tableparser
.SimpleTableParser
)
1631 def table_top(self
, match
, context
, next_state
,
1632 isolate_function
, parser_class
):
1633 """Top border of a generic table."""
1634 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1635 self
.parent
+= nodelist
1636 if not blank_finish
:
1637 msg
= self
.reporter
.warning(
1638 'Blank line required after table.',
1639 line
=self
.state_machine
.abs_line_number()+1)
1641 return [], next_state
, []
1643 def table(self
, isolate_function
, parser_class
):
1644 """Parse a table."""
1645 block
, messages
, blank_finish
= isolate_function()
1648 parser
= parser_class()
1649 tabledata
= parser
.parse(block
)
1650 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1652 table
= self
.build_table(tabledata
, tableline
)
1653 nodelist
= [table
] + messages
1654 except tableparser
.TableMarkupError
, err
:
1655 nodelist
= self
.malformed_table(block
, ' '.join(err
.args
),
1656 offset
=err
.offset
) + messages
1659 return nodelist
, blank_finish
1661 def isolate_grid_table(self
):
1665 block
= self
.state_machine
.get_text_block(flush_left
=True)
1666 except statemachine
.UnexpectedIndentationError
, err
:
1667 block
, src
, srcline
= err
.args
1668 messages
.append(self
.reporter
.error('Unexpected indentation.',
1669 source
=src
, line
=srcline
))
1672 # for East Asian chars:
1673 block
.pad_double_width(self
.double_width_pad_char
)
1674 width
= len(block
[0].strip())
1675 for i
in range(len(block
)):
1676 block
[i
] = block
[i
].strip()
1677 if block
[i
][0] not in '+|': # check left edge
1679 self
.state_machine
.previous_line(len(block
) - i
)
1682 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1684 # from second-last to third line of table:
1685 for i
in range(len(block
) - 2, 1, -1):
1686 if self
.grid_table_top_pat
.match(block
[i
]):
1687 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1691 messages
.extend(self
.malformed_table(block
))
1692 return [], messages
, blank_finish
1693 for i
in range(len(block
)): # check right edge
1694 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1695 messages
.extend(self
.malformed_table(block
))
1696 return [], messages
, blank_finish
1697 return block
, messages
, blank_finish
1699 def isolate_simple_table(self
):
1700 start
= self
.state_machine
.line_offset
1701 lines
= self
.state_machine
.input_lines
1702 limit
= len(lines
) - 1
1703 toplen
= len(lines
[start
].strip())
1704 pattern_match
= self
.simple_table_border_pat
.match
1710 match
= pattern_match(line
)
1712 if len(line
.strip()) != toplen
:
1713 self
.state_machine
.next_line(i
- start
)
1714 messages
= self
.malformed_table(
1715 lines
[start
:i
+1], 'Bottom/header table border does '
1716 'not match top border.')
1717 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1720 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1724 else: # reached end of input_lines
1726 extra
= ' or no blank line after table bottom'
1727 self
.state_machine
.next_line(found_at
- start
)
1728 block
= lines
[start
:found_at
+1]
1731 self
.state_machine
.next_line(i
- start
- 1)
1732 block
= lines
[start
:]
1733 messages
= self
.malformed_table(
1734 block
, 'No bottom table border found%s.' % extra
)
1735 return [], messages
, not extra
1736 self
.state_machine
.next_line(end
- start
)
1737 block
= lines
[start
:end
+1]
1738 # for East Asian chars:
1739 block
.pad_double_width(self
.double_width_pad_char
)
1740 return block
, [], end
== limit
or not lines
[end
+1].strip()
1742 def malformed_table(self
, block
, detail
='', offset
=0):
1743 block
.replace(self
.double_width_pad_char
, '')
1744 data
= '\n'.join(block
)
1745 message
= 'Malformed table.'
1746 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1748 message
+= '\n' + detail
1749 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1750 line
=startline
+offset
)
1753 def build_table(self
, tabledata
, tableline
, stub_columns
=0, widths
='auto'):
1754 colwidths
, headrows
, bodyrows
= tabledata
1755 table
= nodes
.table()
1756 tgroup
= nodes
.tgroup(cols
=len(colwidths
), colwidths
=widths
)
1758 for colwidth
in colwidths
:
1759 colspec
= nodes
.colspec(colwidth
=colwidth
)
1761 colspec
.attributes
['stub'] = 1
1765 thead
= nodes
.thead()
1767 for row
in headrows
:
1768 thead
+= self
.build_table_row(row
, tableline
)
1769 tbody
= nodes
.tbody()
1771 for row
in bodyrows
:
1772 tbody
+= self
.build_table_row(row
, tableline
)
1775 def build_table_row(self
, rowdata
, tableline
):
1777 for cell
in rowdata
:
1780 morerows
, morecols
, offset
, cellblock
= cell
1783 attributes
['morerows'] = morerows
1785 attributes
['morecols'] = morecols
1786 entry
= nodes
.entry(**attributes
)
1788 if ''.join(cellblock
):
1789 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1795 """Patterns and constants used for explicit markup recognition."""
1797 explicit
.patterns
= Struct(
1798 target
=re
.compile(r
"""
1800 _ # anonymous target
1802 (?!_) # no underscore at the beginning
1803 (?P<quote>`?) # optional open quote
1804 (?![ `]) # first char. not space or
1806 (?P<name> # reference name
1809 %(non_whitespace_escape_before)s
1810 (?P=quote) # close quote if open quote used
1812 (?<!(?<!\x00):) # no unescaped colon at end
1813 %(non_whitespace_escape_before)s
1814 [ ]? # optional space
1815 : # end of reference name
1816 ([ ]+|$) # followed by whitespace
1817 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1818 reference
=re
.compile(r
"""
1820 (?P<simple>%(simplename)s)_
1824 (?P<phrase>.+?) # hyperlink phrase
1825 %(non_whitespace_escape_before)s
1826 `_ # close backquote,
1830 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1831 substitution
=re
.compile(r
"""
1833 (?![ ]) # first char. not space
1834 (?P<name>.+?) # substitution text
1835 %(non_whitespace_escape_before)s
1836 \| # close delimiter
1838 ([ ]+|$) # followed by whitespace
1839 """ % vars(Inliner
),
1840 re
.VERBOSE | re
.UNICODE
),)
1842 def footnote(self
, match
):
1843 src
, srcline
= self
.state_machine
.get_source_and_line()
1844 indented
, indent
, offset
, blank_finish
= \
1845 self
.state_machine
.get_first_known_indented(match
.end())
1846 label
= match
.group(1)
1847 name
= normalize_name(label
)
1848 footnote
= nodes
.footnote('\n'.join(indented
))
1849 footnote
.source
= src
1850 footnote
.line
= srcline
1851 if name
[0] == '#': # auto-numbered
1852 name
= name
[1:] # autonumber label
1853 footnote
['auto'] = 1
1855 footnote
['names'].append(name
)
1856 self
.document
.note_autofootnote(footnote
)
1857 elif name
== '*': # auto-symbol
1859 footnote
['auto'] = '*'
1860 self
.document
.note_symbol_footnote(footnote
)
1861 else: # manually numbered
1862 footnote
+= nodes
.label('', label
)
1863 footnote
['names'].append(name
)
1864 self
.document
.note_footnote(footnote
)
1866 self
.document
.note_explicit_target(footnote
, footnote
)
1868 self
.document
.set_id(footnote
, footnote
)
1870 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1871 return [footnote
], blank_finish
1873 def citation(self
, match
):
1874 src
, srcline
= self
.state_machine
.get_source_and_line()
1875 indented
, indent
, offset
, blank_finish
= \
1876 self
.state_machine
.get_first_known_indented(match
.end())
1877 label
= match
.group(1)
1878 name
= normalize_name(label
)
1879 citation
= nodes
.citation('\n'.join(indented
))
1880 citation
.source
= src
1881 citation
.line
= srcline
1882 citation
+= nodes
.label('', label
)
1883 citation
['names'].append(name
)
1884 self
.document
.note_citation(citation
)
1885 self
.document
.note_explicit_target(citation
, citation
)
1887 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1888 return [citation
], blank_finish
1890 def hyperlink_target(self
, match
):
1891 pattern
= self
.explicit
.patterns
.target
1892 lineno
= self
.state_machine
.abs_line_number()
1893 block
, indent
, offset
, blank_finish
= \
1894 self
.state_machine
.get_first_known_indented(
1895 match
.end(), until_blank
=True, strip_indent
=False)
1896 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1897 block
= [escape2null(line
) for line
in block
]
1901 targetmatch
= pattern
.match(escaped
)
1906 escaped
+= block
[blockindex
]
1908 raise MarkupError('malformed hyperlink target.')
1909 del block
[:blockindex
]
1910 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1911 target
= self
.make_target(block
, blocktext
, lineno
,
1912 targetmatch
.group('name'))
1913 return [target
], blank_finish
1915 def make_target(self
, block
, block_text
, lineno
, target_name
):
1916 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1917 if target_type
== 'refname':
1918 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1919 target
.indirect_reference_name
= data
1920 self
.add_target(target_name
, '', target
, lineno
)
1921 self
.document
.note_indirect_target(target
)
1923 elif target_type
== 'refuri':
1924 target
= nodes
.target(block_text
, '')
1925 self
.add_target(target_name
, data
, target
, lineno
)
1930 def parse_target(self
, block
, block_text
, lineno
):
1932 Determine the type of reference of a target.
1934 :Return: A 2-tuple, one of:
1936 - 'refname' and the indirect reference name
1937 - 'refuri' and the URI
1938 - 'malformed' and a system_message node
1940 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1941 reference
= ' '.join([line
.strip() for line
in block
])
1942 refname
= self
.is_reference(reference
)
1944 return 'refname', refname
1945 reference
= ''.join([''.join(line
.split()) for line
in block
])
1946 return 'refuri', unescape(reference
)
1948 def is_reference(self
, reference
):
1949 match
= self
.explicit
.patterns
.reference
.match(
1950 whitespace_normalize_name(reference
))
1953 return unescape(match
.group('simple') or match
.group('phrase'))
1955 def add_target(self
, targetname
, refuri
, target
, lineno
):
1956 target
.line
= lineno
1958 name
= normalize_name(unescape(targetname
))
1959 target
['names'].append(name
)
1961 uri
= self
.inliner
.adjust_uri(refuri
)
1963 target
['refuri'] = uri
1965 raise ApplicationError('problem with URI: %r' % refuri
)
1966 self
.document
.note_explicit_target(target
, self
.parent
)
1967 else: # anonymous target
1969 target
['refuri'] = refuri
1970 target
['anonymous'] = 1
1971 self
.document
.note_anonymous_target(target
)
1973 def substitution_def(self
, match
):
1974 pattern
= self
.explicit
.patterns
.substitution
1975 src
, srcline
= self
.state_machine
.get_source_and_line()
1976 block
, indent
, offset
, blank_finish
= \
1977 self
.state_machine
.get_first_known_indented(match
.end(),
1979 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
1981 escaped
= escape2null(block
[0].rstrip())
1984 subdefmatch
= pattern
.match(escaped
)
1989 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
1991 raise MarkupError('malformed substitution definition.')
1992 del block
[:blockindex
] # strip out the substitution marker
1993 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
1997 while block
and not block
[-1].strip():
1999 subname
= subdefmatch
.group('name')
2000 substitution_node
= nodes
.substitution_definition(blocktext
)
2001 substitution_node
.source
= src
2002 substitution_node
.line
= srcline
2004 msg
= self
.reporter
.warning(
2005 'Substitution definition "%s" missing contents.' % subname
,
2006 nodes
.literal_block(blocktext
, blocktext
),
2007 source
=src
, line
=srcline
)
2008 return [msg
], blank_finish
2009 block
[0] = block
[0].strip()
2010 substitution_node
['names'].append(
2011 nodes
.whitespace_normalize_name(subname
))
2012 new_abs_offset
, blank_finish
= self
.nested_list_parse(
2013 block
, input_offset
=offset
, node
=substitution_node
,
2014 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
2016 for node
in substitution_node
[:]:
2017 if not (isinstance(node
, nodes
.Inline
) or
2018 isinstance(node
, nodes
.Text
)):
2019 self
.parent
+= substitution_node
[i
]
2020 del substitution_node
[i
]
2023 for node
in substitution_node
.traverse(nodes
.Element
):
2024 if self
.disallowed_inside_substitution_definitions(node
):
2025 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2026 msg
= self
.reporter
.error(
2027 'Substitution definition contains illegal element:',
2028 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2029 source
=src
, line
=srcline
)
2030 return [msg
], blank_finish
2031 if len(substitution_node
) == 0:
2032 msg
= self
.reporter
.warning(
2033 'Substitution definition "%s" empty or invalid.' % subname
,
2034 nodes
.literal_block(blocktext
, blocktext
),
2035 source
=src
, line
=srcline
)
2036 return [msg
], blank_finish
2037 self
.document
.note_substitution_def(
2038 substitution_node
, subname
, self
.parent
)
2039 return [substitution_node
], blank_finish
2041 def disallowed_inside_substitution_definitions(self
, node
):
2043 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
2044 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
2049 def directive(self
, match
, **option_presets
):
2050 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2051 type_name
= match
.group(1)
2052 directive_class
, messages
= directives
.directive(
2053 type_name
, self
.memo
.language
, self
.document
)
2054 self
.parent
+= messages
2056 return self
.run_directive(
2057 directive_class
, match
, type_name
, option_presets
)
2059 return self
.unknown_directive(type_name
)
2061 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2063 Parse a directive then run its directive function.
2067 - `directive`: The class implementing the directive. Must be
2068 a subclass of `rst.Directive`.
2070 - `match`: A regular expression match object which matched the first
2071 line of the directive.
2073 - `type_name`: The directive name, as used in the source text.
2075 - `option_presets`: A dictionary of preset options, defaults for the
2076 directive options. Currently, only an "alt" option is passed by
2077 substitution definitions (value: the substitution name), which may
2078 be used by an embedded image directive.
2080 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2082 if isinstance(directive
, (FunctionType
, MethodType
)):
2083 from docutils
.parsers
.rst
import convert_directive_function
2084 directive
= convert_directive_function(directive
)
2085 lineno
= self
.state_machine
.abs_line_number()
2086 initial_line_offset
= self
.state_machine
.line_offset
2087 indented
, indent
, line_offset
, blank_finish \
2088 = self
.state_machine
.get_first_known_indented(match
.end(),
2090 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2091 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2093 arguments
, options
, content
, content_offset
= (
2094 self
.parse_directive_block(indented
, line_offset
,
2095 directive
, option_presets
))
2096 except MarkupError
, detail
:
2097 error
= self
.reporter
.error(
2098 'Error in "%s" directive:\n%s.' % (type_name
,
2099 ' '.join(detail
.args
)),
2100 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2101 return [error
], blank_finish
2102 directive_instance
= directive(
2103 type_name
, arguments
, options
, content
, lineno
,
2104 content_offset
, block_text
, self
, self
.state_machine
)
2106 result
= directive_instance
.run()
2107 except docutils
.parsers
.rst
.DirectiveError
, error
:
2108 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2110 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2112 assert isinstance(result
, list), \
2113 'Directive "%s" must return a list of nodes.' % type_name
2114 for i
in range(len(result
)):
2115 assert isinstance(result
[i
], nodes
.Node
), \
2116 ('Directive "%s" returned non-Node object (index %s): %r'
2117 % (type_name
, i
, result
[i
]))
2119 blank_finish
or self
.state_machine
.is_next_line_blank())
2121 def parse_directive_block(self
, indented
, line_offset
, directive
,
2123 option_spec
= directive
.option_spec
2124 has_content
= directive
.has_content
2125 if indented
and not indented
[0].strip():
2126 indented
.trim_start()
2128 while indented
and not indented
[-1].strip():
2130 if indented
and (directive
.required_arguments
2131 or directive
.optional_arguments
2133 for i
, line
in enumerate(indented
):
2134 if not line
.strip():
2138 arg_block
= indented
[:i
]
2139 content
= indented
[i
+1:]
2140 content_offset
= line_offset
+ i
+ 1
2143 content_offset
= line_offset
2146 options
, arg_block
= self
.parse_directive_options(
2147 option_presets
, option_spec
, arg_block
)
2150 if arg_block
and not (directive
.required_arguments
2151 or directive
.optional_arguments
):
2152 content
= arg_block
+ indented
[i
:]
2153 content_offset
= line_offset
2155 while content
and not content
[0].strip():
2156 content
.trim_start()
2158 if directive
.required_arguments
or directive
.optional_arguments
:
2159 arguments
= self
.parse_directive_arguments(
2160 directive
, arg_block
)
2163 if content
and not has_content
:
2164 raise MarkupError('no content permitted')
2165 return (arguments
, options
, content
, content_offset
)
2167 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2168 options
= option_presets
.copy()
2169 for i
, line
in enumerate(arg_block
):
2170 if re
.match(Body
.patterns
['field_marker'], line
):
2171 opt_block
= arg_block
[i
:]
2172 arg_block
= arg_block
[:i
]
2177 success
, data
= self
.parse_extension_options(option_spec
,
2179 if success
: # data is a dict of options
2180 options
.update(data
)
2181 else: # data is an error string
2182 raise MarkupError(data
)
2183 return options
, arg_block
2185 def parse_directive_arguments(self
, directive
, arg_block
):
2186 required
= directive
.required_arguments
2187 optional
= directive
.optional_arguments
2188 arg_text
= '\n'.join(arg_block
)
2189 arguments
= arg_text
.split()
2190 if len(arguments
) < required
:
2191 raise MarkupError('%s argument(s) required, %s supplied'
2192 % (required
, len(arguments
)))
2193 elif len(arguments
) > required
+ optional
:
2194 if directive
.final_argument_whitespace
:
2195 arguments
= arg_text
.split(None, required
+ optional
- 1)
2198 'maximum %s argument(s) allowed, %s supplied'
2199 % (required
+ optional
, len(arguments
)))
2202 def parse_extension_options(self
, option_spec
, datalines
):
2204 Parse `datalines` for a field list containing extension options
2205 matching `option_spec`.
2208 - `option_spec`: a mapping of option name to conversion
2209 function, which should raise an exception on bad input.
2210 - `datalines`: a list of input strings.
2213 - Success value, 1 or 0.
2214 - An option dictionary on success, an error string on failure.
2216 node
= nodes
.field_list()
2217 newline_offset
, blank_finish
= self
.nested_list_parse(
2218 datalines
, 0, node
, initial_state
='ExtensionOptions',
2220 if newline_offset
!= len(datalines
): # incomplete parse of block
2221 return 0, 'invalid option block'
2223 options
= utils
.extract_extension_options(node
, option_spec
)
2224 except KeyError, detail
:
2225 return 0, ('unknown option: "%s"' % detail
.args
[0])
2226 except (ValueError, TypeError), detail
:
2227 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2228 except utils
.ExtensionOptionError
, detail
:
2229 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2233 return 0, 'option data incompletely parsed'
2235 def unknown_directive(self
, type_name
):
2236 lineno
= self
.state_machine
.abs_line_number()
2237 indented
, indent
, offset
, blank_finish
= \
2238 self
.state_machine
.get_first_known_indented(0, strip_indent
=False)
2239 text
= '\n'.join(indented
)
2240 error
= self
.reporter
.error(
2241 'Unknown directive type "%s".' % type_name
,
2242 nodes
.literal_block(text
, text
), line
=lineno
)
2243 return [error
], blank_finish
2245 def comment(self
, match
):
2246 if not match
.string
[match
.end():].strip() \
2247 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2248 return [nodes
.comment()], 1 # "A tiny but practical wart."
2249 indented
, indent
, offset
, blank_finish
= \
2250 self
.state_machine
.get_first_known_indented(match
.end())
2251 while indented
and not indented
[-1].strip():
2253 text
= '\n'.join(indented
)
2254 return [nodes
.comment(text
, text
)], blank_finish
2256 explicit
.constructs
= [
2259 \.\.[ ]+ # explicit markup start
2262 [0-9]+ # manually numbered footnote
2264 \# # anonymous auto-numbered footnote
2266 \#%s # auto-number ed?) footnote label
2268 \* # auto-symbol footnote
2271 ([ ]+|$) # whitespace or end of line
2272 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2275 \.\.[ ]+ # explicit markup start
2276 \[(%s)\] # citation label
2277 ([ ]+|$) # whitespace or end of line
2278 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2281 \.\.[ ]+ # explicit markup start
2282 _ # target indicator
2283 (?![ ]|$) # first char. not space or EOL
2284 """, re
.VERBOSE | re
.UNICODE
)),
2287 \.\.[ ]+ # explicit markup start
2288 \| # substitution indicator
2289 (?![ ]|$) # first char. not space or EOL
2290 """, re
.VERBOSE | re
.UNICODE
)),
2293 \.\.[ ]+ # explicit markup start
2294 (%s) # directive name
2295 [ ]? # optional space
2296 :: # directive delimiter
2297 ([ ]+|$) # whitespace or end of line
2298 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2300 def explicit_markup(self
, match
, context
, next_state
):
2301 """Footnotes, hyperlink targets, directives, comments."""
2302 nodelist
, blank_finish
= self
.explicit_construct(match
)
2303 self
.parent
+= nodelist
2304 self
.explicit_list(blank_finish
)
2305 return [], next_state
, []
2307 def explicit_construct(self
, match
):
2308 """Determine which explicit construct this is, parse & return it."""
2310 for method
, pattern
in self
.explicit
.constructs
:
2311 expmatch
= pattern
.match(match
.string
)
2314 return method(self
, expmatch
)
2315 except MarkupError
, error
:
2316 lineno
= self
.state_machine
.abs_line_number()
2317 message
= ' '.join(error
.args
)
2318 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2320 nodelist
, blank_finish
= self
.comment(match
)
2321 return nodelist
+ errors
, blank_finish
2323 def explicit_list(self
, blank_finish
):
2325 Create a nested state machine for a series of explicit markup
2326 constructs (including anonymous hyperlink targets).
2328 offset
= self
.state_machine
.line_offset
+ 1 # next line
2329 newline_offset
, blank_finish
= self
.nested_list_parse(
2330 self
.state_machine
.input_lines
[offset
:],
2331 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2332 node
=self
.parent
, initial_state
='Explicit',
2333 blank_finish
=blank_finish
,
2334 match_titles
=self
.state_machine
.match_titles
)
2335 self
.goto_line(newline_offset
)
2336 if not blank_finish
:
2337 self
.parent
+= self
.unindent_warning('Explicit markup')
2339 def anonymous(self
, match
, context
, next_state
):
2340 """Anonymous hyperlink targets."""
2341 nodelist
, blank_finish
= self
.anonymous_target(match
)
2342 self
.parent
+= nodelist
2343 self
.explicit_list(blank_finish
)
2344 return [], next_state
, []
2346 def anonymous_target(self
, match
):
2347 lineno
= self
.state_machine
.abs_line_number()
2348 block
, indent
, offset
, blank_finish \
2349 = self
.state_machine
.get_first_known_indented(match
.end(),
2351 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2352 block
= [escape2null(line
) for line
in block
]
2353 target
= self
.make_target(block
, blocktext
, lineno
, '')
2354 return [target
], blank_finish
2356 def line(self
, match
, context
, next_state
):
2357 """Section title overline or transition marker."""
2358 if self
.state_machine
.match_titles
:
2359 return [match
.string
], 'Line', []
2360 elif match
.string
.strip() == '::':
2361 raise statemachine
.TransitionCorrection('text')
2362 elif len(match
.string
.strip()) < 4:
2363 msg
= self
.reporter
.info(
2364 'Unexpected possible title overline or transition.\n'
2365 "Treating it as ordinary text because it's so short.",
2366 line
=self
.state_machine
.abs_line_number())
2368 raise statemachine
.TransitionCorrection('text')
2370 blocktext
= self
.state_machine
.line
2371 msg
= self
.reporter
.severe(
2372 'Unexpected section title or transition.',
2373 nodes
.literal_block(blocktext
, blocktext
),
2374 line
=self
.state_machine
.abs_line_number())
2376 return [], next_state
, []
2378 def text(self
, match
, context
, next_state
):
2379 """Titles, definition lists, paragraphs."""
2380 return [match
.string
], 'Text', []
2383 class RFC2822Body(Body
):
2386 RFC2822 headers are only valid as the first constructs in documents. As
2387 soon as anything else appears, the `Body` state should take over.
2390 patterns
= Body
.patterns
.copy() # can't modify the original
2391 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2392 initial_transitions
= [(name
, 'Body')
2393 for name
in Body
.initial_transitions
]
2394 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2396 def rfc2822(self
, match
, context
, next_state
):
2397 """RFC2822-style field list item."""
2398 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2399 self
.parent
+= fieldlist
2400 field
, blank_finish
= self
.rfc2822_field(match
)
2402 offset
= self
.state_machine
.line_offset
+ 1 # next line
2403 newline_offset
, blank_finish
= self
.nested_list_parse(
2404 self
.state_machine
.input_lines
[offset
:],
2405 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2406 node
=fieldlist
, initial_state
='RFC2822List',
2407 blank_finish
=blank_finish
)
2408 self
.goto_line(newline_offset
)
2409 if not blank_finish
:
2410 self
.parent
+= self
.unindent_warning(
2411 'RFC2822-style field list')
2412 return [], next_state
, []
2414 def rfc2822_field(self
, match
):
2415 name
= match
.string
[:match
.string
.find(':')]
2416 indented
, indent
, line_offset
, blank_finish
= \
2417 self
.state_machine
.get_first_known_indented(match
.end(),
2419 fieldnode
= nodes
.field()
2420 fieldnode
+= nodes
.field_name(name
, name
)
2421 fieldbody
= nodes
.field_body('\n'.join(indented
))
2422 fieldnode
+= fieldbody
2424 self
.nested_parse(indented
, input_offset
=line_offset
,
2426 return fieldnode
, blank_finish
2429 class SpecializedBody(Body
):
2432 Superclass for second and subsequent compound element members. Compound
2433 elements are lists and list-like constructs.
2435 All transition methods are disabled (redefined as `invalid_input`).
2436 Override individual methods in subclasses to re-enable.
2438 For example, once an initial bullet list item, say, is recognized, the
2439 `BulletList` subclass takes over, with a "bullet_list" node as its
2440 container. Upon encountering the initial bullet list item, `Body.bullet`
2441 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2442 starts up a nested parsing session with `BulletList` as the initial state.
2443 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2444 as only bullet list items are encountered, they are parsed and inserted
2445 into the container. The first construct which is *not* a bullet list item
2446 triggers the `invalid_input` method, which ends the nested parse and
2447 closes the container. `BulletList` needs to recognize input that is
2448 invalid in the context of a bullet list, which means everything *other
2449 than* bullet list items, so it inherits the transition list created in
2453 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2454 """Not a compound element member. Abort this state machine."""
2455 self
.state_machine
.previous_line() # back up so parent SM can reassess
2458 indent
= invalid_input
2459 bullet
= invalid_input
2460 enumerator
= invalid_input
2461 field_marker
= invalid_input
2462 option_marker
= invalid_input
2463 doctest
= invalid_input
2464 line_block
= invalid_input
2465 grid_table_top
= invalid_input
2466 simple_table_top
= invalid_input
2467 explicit_markup
= invalid_input
2468 anonymous
= invalid_input
2469 line
= invalid_input
2470 text
= invalid_input
2473 class BulletList(SpecializedBody
):
2475 """Second and subsequent bullet_list list_items."""
2477 def bullet(self
, match
, context
, next_state
):
2478 """Bullet list item."""
2479 if match
.string
[0] != self
.parent
['bullet']:
2480 # different bullet: new list
2481 self
.invalid_input()
2482 listitem
, blank_finish
= self
.list_item(match
.end())
2483 self
.parent
+= listitem
2484 self
.blank_finish
= blank_finish
2485 return [], next_state
, []
2488 class DefinitionList(SpecializedBody
):
2490 """Second and subsequent definition_list_items."""
2492 def text(self
, match
, context
, next_state
):
2493 """Definition lists."""
2494 return [match
.string
], 'Definition', []
2497 class EnumeratedList(SpecializedBody
):
2499 """Second and subsequent enumerated_list list_items."""
2501 def enumerator(self
, match
, context
, next_state
):
2502 """Enumerated list item."""
2503 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2504 match
, self
.parent
['enumtype'])
2505 if ( format
!= self
.format
2506 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2508 or ordinal
!= (self
.lastordinal
+ 1)))
2509 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2510 # different enumeration: new list
2511 self
.invalid_input()
2514 listitem
, blank_finish
= self
.list_item(match
.end())
2515 self
.parent
+= listitem
2516 self
.blank_finish
= blank_finish
2517 self
.lastordinal
= ordinal
2518 return [], next_state
, []
2521 class FieldList(SpecializedBody
):
2523 """Second and subsequent field_list fields."""
2525 def field_marker(self
, match
, context
, next_state
):
2526 """Field list field."""
2527 field
, blank_finish
= self
.field(match
)
2528 self
.parent
+= field
2529 self
.blank_finish
= blank_finish
2530 return [], next_state
, []
2533 class OptionList(SpecializedBody
):
2535 """Second and subsequent option_list option_list_items."""
2537 def option_marker(self
, match
, context
, next_state
):
2538 """Option list item."""
2540 option_list_item
, blank_finish
= self
.option_list_item(match
)
2542 self
.invalid_input()
2543 self
.parent
+= option_list_item
2544 self
.blank_finish
= blank_finish
2545 return [], next_state
, []
2548 class RFC2822List(SpecializedBody
, RFC2822Body
):
2550 """Second and subsequent RFC2822-style field_list fields."""
2552 patterns
= RFC2822Body
.patterns
2553 initial_transitions
= RFC2822Body
.initial_transitions
2555 def rfc2822(self
, match
, context
, next_state
):
2556 """RFC2822-style field list item."""
2557 field
, blank_finish
= self
.rfc2822_field(match
)
2558 self
.parent
+= field
2559 self
.blank_finish
= blank_finish
2560 return [], 'RFC2822List', []
2562 blank
= SpecializedBody
.invalid_input
2565 class ExtensionOptions(FieldList
):
2568 Parse field_list fields for extension options.
2570 No nested parsing is done (including inline markup parsing).
2573 def parse_field_body(self
, indented
, offset
, node
):
2574 """Override `Body.parse_field_body` for simpler parsing."""
2576 for line
in list(indented
) + ['']:
2580 text
= '\n'.join(lines
)
2581 node
+= nodes
.paragraph(text
, text
)
2585 class LineBlock(SpecializedBody
):
2587 """Second and subsequent lines of a line_block."""
2589 blank
= SpecializedBody
.invalid_input
2591 def line_block(self
, match
, context
, next_state
):
2592 """New line of line block."""
2593 lineno
= self
.state_machine
.abs_line_number()
2594 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2596 self
.parent
.parent
+= messages
2597 self
.blank_finish
= blank_finish
2598 return [], next_state
, []
2601 class Explicit(SpecializedBody
):
2603 """Second and subsequent explicit markup construct."""
2605 def explicit_markup(self
, match
, context
, next_state
):
2606 """Footnotes, hyperlink targets, directives, comments."""
2607 nodelist
, blank_finish
= self
.explicit_construct(match
)
2608 self
.parent
+= nodelist
2609 self
.blank_finish
= blank_finish
2610 return [], next_state
, []
2612 def anonymous(self
, match
, context
, next_state
):
2613 """Anonymous hyperlink targets."""
2614 nodelist
, blank_finish
= self
.anonymous_target(match
)
2615 self
.parent
+= nodelist
2616 self
.blank_finish
= blank_finish
2617 return [], next_state
, []
2619 blank
= SpecializedBody
.invalid_input
2622 class SubstitutionDef(Body
):
2625 Parser for the contents of a substitution_definition element.
2629 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2630 % Inliner
.simplename
, re
.UNICODE
),
2632 initial_transitions
= ['embedded_directive', 'text']
2634 def embedded_directive(self
, match
, context
, next_state
):
2635 nodelist
, blank_finish
= self
.directive(match
,
2636 alt
=self
.parent
['names'][0])
2637 self
.parent
+= nodelist
2638 if not self
.state_machine
.at_eof():
2639 self
.blank_finish
= blank_finish
2642 def text(self
, match
, context
, next_state
):
2643 if not self
.state_machine
.at_eof():
2644 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2648 class Text(RSTState
):
2651 Classifier of second line of a text block.
2653 Could be a paragraph, a definition list item, or a title.
2656 patterns
= {'underline': Body
.patterns
['line'],
2658 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2660 def blank(self
, match
, context
, next_state
):
2661 """End of paragraph."""
2662 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2663 paragraph
, literalnext
= self
.paragraph(
2664 context
, self
.state_machine
.abs_line_number() - 1)
2665 self
.parent
+= paragraph
2667 self
.parent
+= self
.literal_block()
2668 return [], 'Body', []
2670 def eof(self
, context
):
2672 self
.blank(None, context
, None)
2675 def indent(self
, match
, context
, next_state
):
2676 """Definition list item."""
2677 definitionlist
= nodes
.definition_list()
2678 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2679 definitionlist
+= definitionlistitem
2680 self
.parent
+= definitionlist
2681 offset
= self
.state_machine
.line_offset
+ 1 # next line
2682 newline_offset
, blank_finish
= self
.nested_list_parse(
2683 self
.state_machine
.input_lines
[offset
:],
2684 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2685 node
=definitionlist
, initial_state
='DefinitionList',
2686 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2687 self
.goto_line(newline_offset
)
2688 if not blank_finish
:
2689 self
.parent
+= self
.unindent_warning('Definition list')
2690 return [], 'Body', []
2692 def underline(self
, match
, context
, next_state
):
2693 """Section title."""
2694 lineno
= self
.state_machine
.abs_line_number()
2695 title
= context
[0].rstrip()
2696 underline
= match
.string
.rstrip()
2697 source
= title
+ '\n' + underline
2699 if column_width(title
) > len(underline
):
2700 if len(underline
) < 4:
2701 if self
.state_machine
.match_titles
:
2702 msg
= self
.reporter
.info(
2703 'Possible title underline, too short for the title.\n'
2704 "Treating it as ordinary text because it's so short.",
2707 raise statemachine
.TransitionCorrection('text')
2709 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2710 msg
= self
.reporter
.warning('Title underline too short.',
2711 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2712 messages
.append(msg
)
2713 if not self
.state_machine
.match_titles
:
2714 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2715 # We need get_source_and_line() here to report correctly
2716 src
, srcline
= self
.state_machine
.get_source_and_line()
2717 # TODO: why is abs_line_number() == srcline+1
2718 # if the error is in a table (try with test_tables.py)?
2719 # print "get_source_and_line", srcline
2720 # print "abs_line_number", self.state_machine.abs_line_number()
2721 msg
= self
.reporter
.severe('Unexpected section title.',
2722 nodes
.literal_block(blocktext
, blocktext
),
2723 source
=src
, line
=srcline
)
2724 self
.parent
+= messages
2726 return [], next_state
, []
2727 style
= underline
[0]
2729 self
.section(title
, source
, style
, lineno
- 1, messages
)
2730 return [], next_state
, []
2732 def text(self
, match
, context
, next_state
):
2734 startline
= self
.state_machine
.abs_line_number() - 1
2737 block
= self
.state_machine
.get_text_block(flush_left
=True)
2738 except statemachine
.UnexpectedIndentationError
, err
:
2739 block
, src
, srcline
= err
.args
2740 msg
= self
.reporter
.error('Unexpected indentation.',
2741 source
=src
, line
=srcline
)
2742 lines
= context
+ list(block
)
2743 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2744 self
.parent
+= paragraph
2748 self
.state_machine
.next_line()
2751 self
.parent
+= self
.literal_block()
2752 return [], next_state
, []
2754 def literal_block(self
):
2755 """Return a list of nodes."""
2756 indented
, indent
, offset
, blank_finish
= \
2757 self
.state_machine
.get_indented()
2758 while indented
and not indented
[-1].strip():
2761 return self
.quoted_literal_block()
2762 data
= '\n'.join(indented
)
2763 literal_block
= nodes
.literal_block(data
, data
)
2764 literal_block
.line
= offset
+ 1
2765 nodelist
= [literal_block
]
2766 if not blank_finish
:
2767 nodelist
.append(self
.unindent_warning('Literal block'))
2770 def quoted_literal_block(self
):
2771 abs_line_offset
= self
.state_machine
.abs_line_offset()
2772 offset
= self
.state_machine
.line_offset
2773 parent_node
= nodes
.Element()
2774 new_abs_offset
= self
.nested_parse(
2775 self
.state_machine
.input_lines
[offset
:],
2776 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=False,
2777 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2778 'initial_state': 'QuotedLiteralBlock'})
2779 self
.goto_line(new_abs_offset
)
2780 return parent_node
.children
2782 def definition_list_item(self
, termline
):
2783 indented
, indent
, line_offset
, blank_finish
= \
2784 self
.state_machine
.get_indented()
2785 itemnode
= nodes
.definition_list_item(
2786 '\n'.join(termline
+ list(indented
)))
2787 lineno
= self
.state_machine
.abs_line_number() - 1
2789 itemnode
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2790 termlist
, messages
= self
.term(termline
, lineno
)
2791 itemnode
+= termlist
2792 definition
= nodes
.definition('', *messages
)
2793 itemnode
+= definition
2794 if termline
[0][-2:] == '::':
2795 definition
+= self
.reporter
.info(
2796 'Blank line missing before literal block (after the "::")? '
2797 'Interpreted as a definition list item.',
2799 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2800 return itemnode
, blank_finish
2802 classifier_delimiter
= re
.compile(' +: +')
2804 def term(self
, lines
, lineno
):
2805 """Return a definition_list's term and optional classifiers."""
2806 assert len(lines
) == 1
2807 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2808 term_node
= nodes
.term()
2810 term_node
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2811 term_node
.rawsource
= unescape(lines
[0])
2812 node_list
= [term_node
]
2813 for i
in range(len(text_nodes
)):
2814 node
= text_nodes
[i
]
2815 if isinstance(node
, nodes
.Text
):
2816 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2818 node_list
[-1] += node
2821 node_list
[-1] += nodes
.Text(parts
[0].rstrip())
2822 for part
in parts
[1:]:
2823 classifier_node
= nodes
.classifier('', part
)
2824 node_list
.append(classifier_node
)
2826 node_list
[-1] += node
2827 return node_list
, messages
2830 class SpecializedText(Text
):
2833 Superclass for second and subsequent lines of Text-variants.
2835 All transition methods are disabled. Override individual methods in
2836 subclasses to re-enable.
2839 def eof(self
, context
):
2840 """Incomplete construct."""
2843 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2844 """Not a compound element member. Abort this state machine."""
2847 blank
= invalid_input
2848 indent
= invalid_input
2849 underline
= invalid_input
2850 text
= invalid_input
2853 class Definition(SpecializedText
):
2855 """Second line of potential definition_list_item."""
2857 def eof(self
, context
):
2858 """Not a definition."""
2859 self
.state_machine
.previous_line(2) # so parent SM can reassess
2862 def indent(self
, match
, context
, next_state
):
2863 """Definition list item."""
2864 itemnode
, blank_finish
= self
.definition_list_item(context
)
2865 self
.parent
+= itemnode
2866 self
.blank_finish
= blank_finish
2867 return [], 'DefinitionList', []
2870 class Line(SpecializedText
):
2873 Second line of over- & underlined section title or transition marker.
2876 eofcheck
= 1 # @@@ ???
2877 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2879 def eof(self
, context
):
2880 """Transition marker at end of section or document."""
2881 marker
= context
[0].strip()
2882 if self
.memo
.section_bubble_up_kludge
:
2883 self
.memo
.section_bubble_up_kludge
= False
2884 elif len(marker
) < 4:
2885 self
.state_correction(context
)
2886 if self
.eofcheck
: # ignore EOFError with sections
2887 lineno
= self
.state_machine
.abs_line_number() - 1
2888 transition
= nodes
.transition(rawsource
=context
[0])
2889 transition
.line
= lineno
2890 self
.parent
+= transition
2894 def blank(self
, match
, context
, next_state
):
2895 """Transition marker."""
2896 src
, srcline
= self
.state_machine
.get_source_and_line()
2897 marker
= context
[0].strip()
2899 self
.state_correction(context
)
2900 transition
= nodes
.transition(rawsource
=marker
)
2901 transition
.source
= src
2902 transition
.line
= srcline
- 1
2903 self
.parent
+= transition
2904 return [], 'Body', []
2906 def text(self
, match
, context
, next_state
):
2907 """Potential over- & underlined title."""
2908 lineno
= self
.state_machine
.abs_line_number() - 1
2909 overline
= context
[0]
2910 title
= match
.string
2913 underline
= self
.state_machine
.next_line()
2915 blocktext
= overline
+ '\n' + title
2916 if len(overline
.rstrip()) < 4:
2917 self
.short_overline(context
, blocktext
, lineno
, 2)
2919 msg
= self
.reporter
.severe(
2920 'Incomplete section title.',
2921 nodes
.literal_block(blocktext
, blocktext
),
2924 return [], 'Body', []
2925 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2926 overline
= overline
.rstrip()
2927 underline
= underline
.rstrip()
2928 if not self
.transitions
['underline'][0].match(underline
):
2929 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2930 if len(overline
.rstrip()) < 4:
2931 self
.short_overline(context
, blocktext
, lineno
, 2)
2933 msg
= self
.reporter
.severe(
2934 'Missing matching underline for section title overline.',
2935 nodes
.literal_block(source
, source
),
2938 return [], 'Body', []
2939 elif overline
!= underline
:
2940 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2941 if len(overline
.rstrip()) < 4:
2942 self
.short_overline(context
, blocktext
, lineno
, 2)
2944 msg
= self
.reporter
.severe(
2945 'Title overline & underline mismatch.',
2946 nodes
.literal_block(source
, source
),
2949 return [], 'Body', []
2950 title
= title
.rstrip()
2952 if column_width(title
) > len(overline
):
2953 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2954 if len(overline
.rstrip()) < 4:
2955 self
.short_overline(context
, blocktext
, lineno
, 2)
2957 msg
= self
.reporter
.warning(
2958 'Title overline too short.',
2959 nodes
.literal_block(source
, source
),
2961 messages
.append(msg
)
2962 style
= (overline
[0], underline
[0])
2963 self
.eofcheck
= 0 # @@@ not sure this is correct
2964 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
2966 return [], 'Body', []
2968 indent
= text
# indented title
2970 def underline(self
, match
, context
, next_state
):
2971 overline
= context
[0]
2972 blocktext
= overline
+ '\n' + self
.state_machine
.line
2973 lineno
= self
.state_machine
.abs_line_number() - 1
2974 if len(overline
.rstrip()) < 4:
2975 self
.short_overline(context
, blocktext
, lineno
, 1)
2976 msg
= self
.reporter
.error(
2977 'Invalid section title or transition marker.',
2978 nodes
.literal_block(blocktext
, blocktext
),
2981 return [], 'Body', []
2983 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
2984 msg
= self
.reporter
.info(
2985 'Possible incomplete section title.\nTreating the overline as '
2986 "ordinary text because it's so short.",
2989 self
.state_correction(context
, lines
)
2991 def state_correction(self
, context
, lines
=1):
2992 self
.state_machine
.previous_line(lines
)
2994 raise statemachine
.StateCorrection('Body', 'text')
2997 class QuotedLiteralBlock(RSTState
):
3000 Nested parse handler for quoted (unindented) literal blocks.
3002 Special-purpose. Not for inclusion in `state_classes`.
3005 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
3007 initial_transitions
= ('initial_quoted', 'text')
3009 def __init__(self
, state_machine
, debug
=False):
3010 RSTState
.__init
__(self
, state_machine
, debug
)
3012 self
.initial_lineno
= None
3014 def blank(self
, match
, context
, next_state
):
3018 return context
, next_state
, []
3020 def eof(self
, context
):
3022 src
, srcline
= self
.state_machine
.get_source_and_line(
3023 self
.initial_lineno
)
3024 text
= '\n'.join(context
)
3025 literal_block
= nodes
.literal_block(text
, text
)
3026 literal_block
.source
= src
3027 literal_block
.line
= srcline
3028 self
.parent
+= literal_block
3030 self
.parent
+= self
.reporter
.warning(
3031 'Literal block expected; none found.',
3032 line
=self
.state_machine
.abs_line_number())
3033 # src not available, because statemachine.input_lines is empty
3034 self
.state_machine
.previous_line()
3035 self
.parent
+= self
.messages
3038 def indent(self
, match
, context
, next_state
):
3039 assert context
, ('QuotedLiteralBlock.indent: context should not '
3041 self
.messages
.append(
3042 self
.reporter
.error('Unexpected indentation.',
3043 line
=self
.state_machine
.abs_line_number()))
3044 self
.state_machine
.previous_line()
3047 def initial_quoted(self
, match
, context
, next_state
):
3048 """Match arbitrary quote character on the first line only."""
3049 self
.remove_transition('initial_quoted')
3050 quote
= match
.string
[0]
3051 pattern
= re
.compile(re
.escape(quote
), re
.UNICODE
)
3052 # New transition matches consistent quotes only:
3053 self
.add_transition('quoted',
3054 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3055 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3056 return [match
.string
], next_state
, []
3058 def quoted(self
, match
, context
, next_state
):
3059 """Match consistent quotes on subsequent lines."""
3060 context
.append(match
.string
)
3061 return context
, next_state
, []
3063 def text(self
, match
, context
, next_state
):
3065 self
.messages
.append(
3066 self
.reporter
.error('Inconsistent literal block quoting.',
3067 line
=self
.state_machine
.abs_line_number()))
3068 self
.state_machine
.previous_line()
3072 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3073 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3074 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3075 """Standard set of State classes used to start `RSTStateMachine`."""