2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
111 import docutils
.utils
.roman
as roman
112 from types
import FunctionType
, MethodType
114 from docutils
import nodes
, statemachine
, utils
115 from docutils
import ApplicationError
, DataError
116 from docutils
.statemachine
import StateMachineWS
, StateWS
117 from docutils
.nodes
import fully_normalize_name
as normalize_name
118 from docutils
.nodes
import whitespace_normalize_name
119 import docutils
.parsers
.rst
120 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
121 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
122 from docutils
.utils
import escape2null
, unescape
, column_width
123 from docutils
.utils
import punctuation_chars
, urischemes
125 class MarkupError(DataError
): pass
126 class UnknownInterpretedRoleError(DataError
): pass
127 class InterpretedRoleNotImplementedError(DataError
): pass
128 class ParserError(ApplicationError
): pass
129 class MarkupMismatch(Exception): pass
134 """Stores data attributes for dotted-attribute access."""
136 def __init__(self
, **keywordargs
):
137 self
.__dict
__.update(keywordargs
)
140 class RSTStateMachine(StateMachineWS
):
143 reStructuredText's master StateMachine.
145 The entry point to reStructuredText parsing is the `run()` method.
148 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=True,
151 Parse `input_lines` and modify the `document` node in place.
153 Extend `StateMachineWS.run()`: set up parse-global data and
154 run the StateMachine.
156 self
.language
= languages
.get_language(
157 document
.settings
.language_code
)
158 self
.match_titles
= match_titles
161 inliner
.init_customizations(document
.settings
)
162 self
.memo
= Struct(document
=document
,
163 reporter
=document
.reporter
,
164 language
=self
.language
,
167 section_bubble_up_kludge
=False,
169 self
.document
= document
170 self
.attach_observer(document
.note_source
)
171 self
.reporter
= self
.memo
.reporter
173 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
174 input_source
=document
['source'])
175 assert results
== [], 'RSTStateMachine.run() results should be empty!'
176 self
.node
= self
.memo
= None # remove unneeded references
179 class NestedStateMachine(StateMachineWS
):
182 StateMachine run from within other StateMachine runs, to parse nested
186 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=True):
188 Parse `input_lines` and populate a `docutils.nodes.document` instance.
190 Extend `StateMachineWS.run()`: set up document-wide data.
192 self
.match_titles
= match_titles
194 self
.document
= memo
.document
195 self
.attach_observer(self
.document
.note_source
)
196 self
.reporter
= memo
.reporter
197 self
.language
= memo
.language
199 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
200 assert results
== [], ('NestedStateMachine.run() results should be '
205 class RSTState(StateWS
):
208 reStructuredText State superclass.
210 Contains methods used by all State subclasses.
213 nested_sm
= NestedStateMachine
216 def __init__(self
, state_machine
, debug
=False):
217 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
218 'initial_state': 'Body'}
219 StateWS
.__init
__(self
, state_machine
, debug
)
221 def runtime_init(self
):
222 StateWS
.runtime_init(self
)
223 memo
= self
.state_machine
.memo
225 self
.reporter
= memo
.reporter
226 self
.inliner
= memo
.inliner
227 self
.document
= memo
.document
228 self
.parent
= self
.state_machine
.node
229 # enable the reporter to determine source and source-line
230 if not hasattr(self
.reporter
, 'get_source_and_line'):
231 self
.reporter
.get_source_and_line
= self
.state_machine
.get_source_and_line
232 # print "adding get_source_and_line to reporter", self.state_machine.input_offset
235 def goto_line(self
, abs_line_offset
):
237 Jump to input line `abs_line_offset`, ignoring jumps past the end.
240 self
.state_machine
.goto_line(abs_line_offset
)
244 def no_match(self
, context
, transitions
):
246 Override `StateWS.no_match` to generate a system message.
248 This code should never be run.
250 self
.reporter
.severe(
251 'Internal error: no transition pattern match. State: "%s"; '
252 'transitions: %s; context: %s; current line: %r.'
253 % (self
.__class
__.__name
__, transitions
, context
,
254 self
.state_machine
.line
))
255 return context
, None, []
257 def bof(self
, context
):
258 """Called at beginning of file."""
261 def nested_parse(self
, block
, input_offset
, node
, match_titles
=False,
262 state_machine_class
=None, state_machine_kwargs
=None):
264 Create a new StateMachine rooted at `node` and run it over the input
268 if state_machine_class
is None:
269 state_machine_class
= self
.nested_sm
271 if state_machine_kwargs
is None:
272 state_machine_kwargs
= self
.nested_sm_kwargs
274 block_length
= len(block
)
279 state_machine
= self
.nested_sm_cache
.pop()
282 if not state_machine
:
283 state_machine
= state_machine_class(debug
=self
.debug
,
284 **state_machine_kwargs
)
285 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
286 node
=node
, match_titles
=match_titles
)
288 self
.nested_sm_cache
.append(state_machine
)
290 state_machine
.unlink()
291 new_offset
= state_machine
.abs_line_offset()
292 # No `block.parent` implies disconnected -- lines aren't in sync:
293 if block
.parent
and (len(block
) - block_length
) != 0:
294 # Adjustment for block if modified in nested parse:
295 self
.state_machine
.next_line(len(block
) - block_length
)
298 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
300 blank_finish_state
=None,
303 state_machine_class
=None,
304 state_machine_kwargs
=None):
306 Create a new StateMachine rooted at `node` and run it over the input
307 `block`. Also keep track of optional intermediate blank lines and the
310 if state_machine_class
is None:
311 state_machine_class
= self
.nested_sm
312 if state_machine_kwargs
is None:
313 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
314 state_machine_kwargs
['initial_state'] = initial_state
315 state_machine
= state_machine_class(debug
=self
.debug
,
316 **state_machine_kwargs
)
317 if blank_finish_state
is None:
318 blank_finish_state
= initial_state
319 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
320 for key
, value
in extra_settings
.items():
321 setattr(state_machine
.states
[initial_state
], key
, value
)
322 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
323 node
=node
, match_titles
=match_titles
)
324 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
325 state_machine
.unlink()
326 return state_machine
.abs_line_offset(), blank_finish
328 def section(self
, title
, source
, style
, lineno
, messages
):
329 """Check for a valid subsection and create one if it checks out."""
330 if self
.check_subsection(source
, style
, lineno
):
331 self
.new_subsection(title
, lineno
, messages
)
333 def check_subsection(self
, source
, style
, lineno
):
335 Check for a valid subsection header. Return 1 (true) or None (false).
337 When a new section is reached that isn't a subsection of the current
338 section, back up the line count (use ``previous_line(-x)``), then
339 ``raise EOFError``. The current StateMachine will finish, then the
340 calling StateMachine can re-examine the title. This will work its way
341 back up the calling chain until the correct section level isreached.
343 @@@ Alternative: Evaluate the title, store the title info & level, and
344 back up the chain until that level is reached. Store in memo? Or
347 :Exception: `EOFError` when a sibling or supersection encountered.
350 title_styles
= memo
.title_styles
351 mylevel
= memo
.section_level
352 try: # check for existing title style
353 level
= title_styles
.index(style
) + 1
354 except ValueError: # new title style
355 if len(title_styles
) == memo
.section_level
: # new subsection
356 title_styles
.append(style
)
358 else: # not at lowest level
359 self
.parent
+= self
.title_inconsistent(source
, lineno
)
361 if level
<= mylevel
: # sibling or supersection
362 memo
.section_level
= level
# bubble up to parent section
364 memo
.section_bubble_up_kludge
= True
365 # back up 2 lines for underline title, 3 for overline title
366 self
.state_machine
.previous_line(len(style
) + 1)
367 raise EOFError # let parent section re-evaluate
368 if level
== mylevel
+ 1: # immediate subsection
370 else: # invalid subsection
371 self
.parent
+= self
.title_inconsistent(source
, lineno
)
374 def title_inconsistent(self
, sourcetext
, lineno
):
375 error
= self
.reporter
.severe(
376 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
380 def new_subsection(self
, title
, lineno
, messages
):
381 """Append new subsection to document tree. On return, check level."""
383 mylevel
= memo
.section_level
384 memo
.section_level
+= 1
385 section_node
= nodes
.section()
386 self
.parent
+= section_node
387 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
388 titlenode
= nodes
.title(title
, '', *textnodes
)
389 name
= normalize_name(titlenode
.astext())
390 section_node
['names'].append(name
)
391 section_node
+= titlenode
392 section_node
+= messages
393 section_node
+= title_messages
394 self
.document
.note_implicit_target(section_node
, section_node
)
395 offset
= self
.state_machine
.line_offset
+ 1
396 absoffset
= self
.state_machine
.abs_line_offset() + 1
397 newabsoffset
= self
.nested_parse(
398 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
399 node
=section_node
, match_titles
=True)
400 self
.goto_line(newabsoffset
)
401 if memo
.section_level
<= mylevel
: # can't handle next section?
402 raise EOFError # bubble up to supersection
403 # reset section_level; next pass will detect it properly
404 memo
.section_level
= mylevel
406 def paragraph(self
, lines
, lineno
):
408 Return a list (paragraph & messages) & a boolean: literal_block next?
410 data
= '\n'.join(lines
).rstrip()
411 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
414 elif data
[-3] in ' \n':
415 text
= data
[:-3].rstrip()
422 textnodes
, messages
= self
.inline_text(text
, lineno
)
423 p
= nodes
.paragraph(data
, '', *textnodes
)
424 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
425 return [p
] + messages
, literalnext
427 def inline_text(self
, text
, lineno
):
429 Return 2 lists: nodes (text and inline elements), and system_messages.
431 return self
.inliner
.parse(text
, lineno
, self
.memo
, self
.parent
)
433 def unindent_warning(self
, node_name
):
434 # the actual problem is one line below the current line
435 lineno
= self
.state_machine
.abs_line_number()+1
436 return self
.reporter
.warning('%s ends without a blank line; '
437 'unexpected unindent.' % node_name
,
441 def build_regexp(definition
, compile=True):
443 Build, compile and return a regular expression based on `definition`.
445 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
446 where "parts" is a list of regular expressions and/or regular
447 expression definitions to be joined into an or-group.
449 name
, prefix
, suffix
, parts
= definition
452 if type(part
) is tuple:
453 part_strings
.append(build_regexp(part
, None))
455 part_strings
.append(part
)
456 or_group
= '|'.join(part_strings
)
457 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
459 return re
.compile(regexp
, re
.UNICODE
)
467 Parse inline markup; call the `parse()` method.
471 self
.implicit_dispatch
= [(self
.patterns
.uri
, self
.standalone_uri
),]
472 """List of (pattern, bound method) tuples, used by
473 `self.implicit_inline`."""
475 def init_customizations(self
, settings
):
476 """Setting-based customizations; run when parsing begins."""
477 if settings
.pep_references
:
478 self
.implicit_dispatch
.append((self
.patterns
.pep
,
480 if settings
.rfc_references
:
481 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
484 def parse(self
, text
, lineno
, memo
, parent
):
485 # Needs to be refactored for nested inline markup.
486 # Add nested_parse() method?
488 Return 2 lists: nodes (text and inline elements), and system_messages.
490 Using `self.patterns.initial`, a pattern which matches start-strings
491 (emphasis, strong, interpreted, phrase reference, literal,
492 substitution reference, and inline target) and complete constructs
493 (simple reference, footnote reference), search for a candidate. When
494 one is found, check for validity (e.g., not a quoted '*' character).
495 If valid, search for the corresponding end string if applicable, and
496 check it for validity. If not found or invalid, generate a warning
497 and ignore the start-string. Implicit inline markup (e.g. standalone
500 self
.reporter
= memo
.reporter
501 self
.document
= memo
.document
502 self
.language
= memo
.language
504 pattern_search
= self
.patterns
.initial
.search
505 dispatch
= self
.dispatch
506 remaining
= escape2null(text
)
511 match
= pattern_search(remaining
)
513 groups
= match
.groupdict()
514 method
= dispatch
[groups
['start'] or groups
['backquote']
515 or groups
['refend'] or groups
['fnend']]
516 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
518 unprocessed
.append(before
)
519 messages
+= sysmessages
521 processed
+= self
.implicit_inline(''.join(unprocessed
),
527 remaining
= ''.join(unprocessed
) + remaining
529 processed
+= self
.implicit_inline(remaining
, lineno
)
530 return processed
, messages
532 # Inline object recognition
533 # -------------------------
534 # lookahead and look-behind expressions for inline markup rules
535 start_string_prefix
= (u
'(^|(?<=\\s|[%s%s]))' %
536 (punctuation_chars
.openers
,
537 punctuation_chars
.delimiters
))
538 end_string_suffix
= (u
'($|(?=\\s|[\x00%s%s%s]))' %
539 (punctuation_chars
.closing_delimiters
,
540 punctuation_chars
.delimiters
,
541 punctuation_chars
.closers
))
542 # print start_string_prefix.encode('utf8')
543 # TODO: support non-ASCII whitespace in the following 4 patterns?
544 non_whitespace_before
= r
'(?<![ \n])'
545 non_whitespace_escape_before
= r
'(?<![ \n\x00])'
546 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[ \n\x00])'
547 non_whitespace_after
= r
'(?![ \n])'
548 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
549 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
550 # Valid URI characters (see RFC 2396 & RFC 2732);
551 # final \x00 allows backslash escapes in URIs:
552 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
553 # Delimiter indicating the end of a URI (not part of the URI):
554 uri_end_delim
= r
"""[>]"""
555 # Last URI character; same as uric but no punctuation:
556 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
557 # End of a URI (either 'urilast' or 'uric followed by a
559 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
560 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
562 %(emailc)s+(?:\.%(emailc)s+)* # name
564 %(emailc)s+(?:\.%(emailc)s*)* # host
565 %(uri_end)s # final URI char
567 parts
= ('initial_inline', start_string_prefix
, '',
568 [('start', '', non_whitespace_after
, # simple start-strings
570 r
'\*(?!\*)', # emphasis but not strong
572 r
'_`', # inline internal target
573 r
'\|(?!\|)'] # substitution reference
575 ('whole', '', end_string_suffix
, # whole constructs
576 [# reference name & end-string
577 r
'(?P<refname>%s)(?P<refend>__?)' % simplename
,
578 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
579 [r
'[0-9]+', # manually numbered
580 r
'\#(%s)?' % simplename
, # auto-numbered (w/ label?)
582 r
'(?P<citationlabel>%s)' % simplename
] # citation reference
586 ('backquote', # interpreted text or phrase reference
587 '(?P<role>(:%s:)?)' % simplename
, # optional role
588 non_whitespace_after
,
589 ['`(?!`)'] # but not literal
594 initial
=build_regexp(parts
),
595 emphasis
=re
.compile(non_whitespace_escape_before
596 + r
'(\*)' + end_string_suffix
, re
.UNICODE
),
597 strong
=re
.compile(non_whitespace_escape_before
598 + r
'(\*\*)' + end_string_suffix
, re
.UNICODE
),
599 interpreted_or_phrase_ref
=re
.compile(
601 %(non_unescaped_whitespace_escape_before)s
605 (?P<role>:%(simplename)s:)?
609 %(end_string_suffix)s
610 """ % locals(), re
.VERBOSE | re
.UNICODE
),
611 embedded_uri
=re
.compile(
614 (?:[ \n]+|^) # spaces or beginning of line/string
616 %(non_whitespace_after)s
617 ([^<>\x00]+) # anything but angle brackets & nulls
618 %(non_whitespace_before)s
619 > # close bracket w/o whitespace before
622 """ % locals(), re
.VERBOSE | re
.UNICODE
),
623 literal
=re
.compile(non_whitespace_before
+ '(``)'
624 + end_string_suffix
),
625 target
=re
.compile(non_whitespace_escape_before
626 + r
'(`)' + end_string_suffix
),
627 substitution_ref
=re
.compile(non_whitespace_escape_before
629 + end_string_suffix
),
630 email
=re
.compile(email_pattern
% locals() + '$',
631 re
.VERBOSE | re
.UNICODE
),
634 %(start_string_prefix)s
636 (?P<absolute> # absolute URI
637 (?P<scheme> # scheme (http, ftp, mailto)
638 [a-zA-Z][a-zA-Z0-9.+-]*
643 (//?)? # hierarchical URI
644 %(uric)s* # URI characters
645 %(uri_end)s # final URI char
651 ( # optional fragment
658 (?P<email> # email address
659 """ + email_pattern
+ r
"""
662 %(end_string_suffix)s
663 """) % locals(), re
.VERBOSE | re
.UNICODE
),
666 %(start_string_prefix)s
668 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
670 (PEP\s+(?P<pepnum2>\d+)) # reference by name
672 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
),
675 %(start_string_prefix)s
676 (RFC(-|\s+)?(?P<rfcnum>\d+))
677 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
))
679 def quoted_start(self
, match
):
680 """Test if inline markup start-string is 'quoted'.
682 'Quoted' in this context means the start-string is enclosed in a pair
683 of matching opening/closing delimiters (not necessarily quotes)
684 or at the end of the match.
686 string
= match
.string
687 start
= match
.start()
688 if start
== 0: # start-string at beginning of text
690 prestart
= string
[start
- 1]
692 poststart
= string
[match
.end()]
693 except IndexError: # start-string at end of text
694 return True # not "quoted" but no markup start-string either
695 return punctuation_chars
.match_chars(prestart
, poststart
)
697 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
698 restore_backslashes
=False):
699 string
= match
.string
700 matchstart
= match
.start('start')
701 matchend
= match
.end('start')
702 if self
.quoted_start(match
):
703 return (string
[:matchend
], [], string
[matchend
:], [], '')
704 endmatch
= end_pattern
.search(string
[matchend
:])
705 if endmatch
and endmatch
.start(1): # 1 or more chars
706 text
= unescape(endmatch
.string
[:endmatch
.start(1)],
708 textend
= matchend
+ endmatch
.end(1)
709 rawsource
= unescape(string
[matchstart
:textend
], 1)
710 return (string
[:matchstart
], [nodeclass(rawsource
, text
)],
711 string
[textend
:], [], endmatch
.group(1))
712 msg
= self
.reporter
.warning(
713 'Inline %s start-string without end-string.'
714 % nodeclass
.__name
__, line
=lineno
)
715 text
= unescape(string
[matchstart
:matchend
], 1)
716 rawsource
= unescape(string
[matchstart
:matchend
], 1)
717 prb
= self
.problematic(text
, rawsource
, msg
)
718 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
720 def problematic(self
, text
, rawsource
, message
):
721 msgid
= self
.document
.set_id(message
, self
.parent
)
722 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
723 prbid
= self
.document
.set_id(problematic
)
724 message
.add_backref(prbid
)
727 def emphasis(self
, match
, lineno
):
728 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
729 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
730 return before
, inlines
, remaining
, sysmessages
732 def strong(self
, match
, lineno
):
733 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
734 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
735 return before
, inlines
, remaining
, sysmessages
737 def interpreted_or_phrase_ref(self
, match
, lineno
):
738 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
739 string
= match
.string
740 matchstart
= match
.start('backquote')
741 matchend
= match
.end('backquote')
742 rolestart
= match
.start('role')
743 role
= match
.group('role')
748 elif self
.quoted_start(match
):
749 return (string
[:matchend
], [], string
[matchend
:], [])
750 endmatch
= end_pattern
.search(string
[matchend
:])
751 if endmatch
and endmatch
.start(1): # 1 or more chars
752 textend
= matchend
+ endmatch
.end()
753 if endmatch
.group('role'):
755 msg
= self
.reporter
.warning(
756 'Multiple roles in interpreted text (both '
757 'prefix and suffix present; only one allowed).',
759 text
= unescape(string
[rolestart
:textend
], 1)
760 prb
= self
.problematic(text
, text
, msg
)
761 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
762 role
= endmatch
.group('suffix')[1:-1]
764 escaped
= endmatch
.string
[:endmatch
.start(1)]
765 rawsource
= unescape(string
[matchstart
:textend
], 1)
766 if rawsource
[-1:] == '_':
768 msg
= self
.reporter
.warning(
769 'Mismatch: both interpreted text role %s and '
770 'reference suffix.' % position
, line
=lineno
)
771 text
= unescape(string
[rolestart
:textend
], 1)
772 prb
= self
.problematic(text
, text
, msg
)
773 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
774 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
775 rawsource
, escaped
, unescape(escaped
))
777 rawsource
= unescape(string
[rolestart
:textend
], 1)
778 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
780 return (string
[:rolestart
], nodelist
,
781 string
[textend
:], messages
)
782 msg
= self
.reporter
.warning(
783 'Inline interpreted text or phrase reference start-string '
784 'without end-string.', line
=lineno
)
785 text
= unescape(string
[matchstart
:matchend
], 1)
786 prb
= self
.problematic(text
, text
, msg
)
787 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
789 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
790 match
= self
.patterns
.embedded_uri
.search(escaped
)
792 text
= unescape(escaped
[:match
.start(0)])
793 uri_text
= match
.group(2)
794 uri
= ''.join(uri_text
.split())
795 uri
= self
.adjust_uri(uri
)
797 target
= nodes
.target(match
.group(1), refuri
=uri
)
798 target
.referenced
= 1
800 raise ApplicationError('problem with URI: %r' % uri_text
)
805 refname
= normalize_name(text
)
806 reference
= nodes
.reference(rawsource
, text
,
807 name
=whitespace_normalize_name(text
))
808 node_list
= [reference
]
809 if rawsource
[-2:] == '__':
811 reference
['refuri'] = uri
813 reference
['anonymous'] = 1
816 reference
['refuri'] = uri
817 target
['names'].append(refname
)
818 self
.document
.note_explicit_target(target
, self
.parent
)
819 node_list
.append(target
)
821 reference
['refname'] = refname
822 self
.document
.note_refname(reference
)
823 return before
, node_list
, after
, []
825 def adjust_uri(self
, uri
):
826 match
= self
.patterns
.email
.match(uri
)
828 return 'mailto:' + uri
832 def interpreted(self
, rawsource
, text
, role
, lineno
):
833 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
836 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
837 return nodes
, messages
+ messages2
839 msg
= self
.reporter
.error(
840 'Unknown interpreted text role "%s".' % role
,
842 return ([self
.problematic(rawsource
, rawsource
, msg
)],
845 def literal(self
, match
, lineno
):
846 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
847 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
848 restore_backslashes
=True)
849 return before
, inlines
, remaining
, sysmessages
851 def inline_internal_target(self
, match
, lineno
):
852 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
853 match
, lineno
, self
.patterns
.target
, nodes
.target
)
854 if inlines
and isinstance(inlines
[0], nodes
.target
):
855 assert len(inlines
) == 1
857 name
= normalize_name(target
.astext())
858 target
['names'].append(name
)
859 self
.document
.note_explicit_target(target
, self
.parent
)
860 return before
, inlines
, remaining
, sysmessages
862 def substitution_reference(self
, match
, lineno
):
863 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
864 match
, lineno
, self
.patterns
.substitution_ref
,
865 nodes
.substitution_reference
)
866 if len(inlines
) == 1:
867 subref_node
= inlines
[0]
868 if isinstance(subref_node
, nodes
.substitution_reference
):
869 subref_text
= subref_node
.astext()
870 self
.document
.note_substitution_ref(subref_node
, subref_text
)
871 if endstring
[-1:] == '_':
872 reference_node
= nodes
.reference(
873 '|%s%s' % (subref_text
, endstring
), '')
874 if endstring
[-2:] == '__':
875 reference_node
['anonymous'] = 1
877 reference_node
['refname'] = normalize_name(subref_text
)
878 self
.document
.note_refname(reference_node
)
879 reference_node
+= subref_node
880 inlines
= [reference_node
]
881 return before
, inlines
, remaining
, sysmessages
883 def footnote_reference(self
, match
, lineno
):
885 Handles `nodes.footnote_reference` and `nodes.citation_reference`
888 label
= match
.group('footnotelabel')
889 refname
= normalize_name(label
)
890 string
= match
.string
891 before
= string
[:match
.start('whole')]
892 remaining
= string
[match
.end('whole'):]
893 if match
.group('citationlabel'):
894 refnode
= nodes
.citation_reference('[%s]_' % label
,
896 refnode
+= nodes
.Text(label
)
897 self
.document
.note_citation_ref(refnode
)
899 refnode
= nodes
.footnote_reference('[%s]_' % label
)
900 if refname
[0] == '#':
901 refname
= refname
[1:]
903 self
.document
.note_autofootnote_ref(refnode
)
906 refnode
['auto'] = '*'
907 self
.document
.note_symbol_footnote_ref(
910 refnode
+= nodes
.Text(label
)
912 refnode
['refname'] = refname
913 self
.document
.note_footnote_ref(refnode
)
914 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
915 before
= before
.rstrip()
916 return (before
, [refnode
], remaining
, [])
918 def reference(self
, match
, lineno
, anonymous
=False):
919 referencename
= match
.group('refname')
920 refname
= normalize_name(referencename
)
921 referencenode
= nodes
.reference(
922 referencename
+ match
.group('refend'), referencename
,
923 name
=whitespace_normalize_name(referencename
))
925 referencenode
['anonymous'] = 1
927 referencenode
['refname'] = refname
928 self
.document
.note_refname(referencenode
)
929 string
= match
.string
930 matchstart
= match
.start('whole')
931 matchend
= match
.end('whole')
932 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
934 def anonymous_reference(self
, match
, lineno
):
935 return self
.reference(match
, lineno
, anonymous
=1)
937 def standalone_uri(self
, match
, lineno
):
938 if (not match
.group('scheme')
939 or match
.group('scheme').lower() in urischemes
.schemes
):
940 if match
.group('email'):
941 addscheme
= 'mailto:'
944 text
= match
.group('whole')
945 unescaped
= unescape(text
, 0)
946 return [nodes
.reference(unescape(text
, 1), unescaped
,
947 refuri
=addscheme
+ unescaped
)]
948 else: # not a valid scheme
951 def pep_reference(self
, match
, lineno
):
952 text
= match
.group(0)
953 if text
.startswith('pep-'):
954 pepnum
= int(match
.group('pepnum1'))
955 elif text
.startswith('PEP'):
956 pepnum
= int(match
.group('pepnum2'))
959 ref
= (self
.document
.settings
.pep_base_url
960 + self
.document
.settings
.pep_file_url_template
% pepnum
)
961 unescaped
= unescape(text
, 0)
962 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
964 rfc_url
= 'rfc%d.html'
966 def rfc_reference(self
, match
, lineno
):
967 text
= match
.group(0)
968 if text
.startswith('RFC'):
969 rfcnum
= int(match
.group('rfcnum'))
970 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
973 unescaped
= unescape(text
, 0)
974 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
976 def implicit_inline(self
, text
, lineno
):
978 Check each of the patterns in `self.implicit_dispatch` for a match,
979 and dispatch to the stored method for the pattern. Recursively check
980 the text before and after the match. Return a list of `nodes.Text`
981 and inline element nodes.
985 for pattern
, method
in self
.implicit_dispatch
:
986 match
= pattern
.search(text
)
989 # Must recurse on strings before *and* after the match;
990 # there may be multiple patterns.
991 return (self
.implicit_inline(text
[:match
.start()], lineno
)
992 + method(match
, lineno
) +
993 self
.implicit_inline(text
[match
.end():], lineno
))
994 except MarkupMismatch
:
996 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, 1))]
998 dispatch
= {'*': emphasis
,
1000 '`': interpreted_or_phrase_ref
,
1002 '_`': inline_internal_target
,
1003 ']_': footnote_reference
,
1004 '|': substitution_reference
,
1006 '__': anonymous_reference
}
1009 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1010 return ord(s
) - _zero
1012 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1013 return ord(s
) - _zero
1015 def _lowerroman_to_int(s
):
1016 return roman
.fromRoman(s
.upper())
1019 class Body(RSTState
):
1022 Generic classifier of the first line of a block.
1025 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1026 """Padding character for East Asian double-width text."""
1029 """Enumerated list parsing information."""
1032 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1033 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1034 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1035 enum
.formats
= enum
.formatinfo
.keys()
1036 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1037 'lowerroman', 'upperroman'] # ORDERED!
1038 enum
.sequencepats
= {'arabic': '[0-9]+',
1039 'loweralpha': '[a-z]',
1040 'upperalpha': '[A-Z]',
1041 'lowerroman': '[ivxlcdm]+',
1042 'upperroman': '[IVXLCDM]+',}
1043 enum
.converters
= {'arabic': int,
1044 'loweralpha': _loweralpha_to_int
,
1045 'upperalpha': _upperalpha_to_int
,
1046 'lowerroman': _lowerroman_to_int
,
1047 'upperroman': roman
.fromRoman
}
1049 enum
.sequenceregexps
= {}
1050 for sequence
in enum
.sequences
:
1051 enum
.sequenceregexps
[sequence
] = re
.compile(
1052 enum
.sequencepats
[sequence
] + '$', re
.UNICODE
)
1054 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1055 """Matches the top (& bottom) of a full table)."""
1057 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1058 """Matches the top of a simple table."""
1060 simple_table_border_pat
= re
.compile('=+[ =]*$')
1061 """Matches the bottom & header bottom of a simple table."""
1064 """Fragments of patterns used by transitions."""
1066 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1067 pats
['alpha'] = '[a-zA-Z]'
1068 pats
['alphanum'] = '[a-zA-Z0-9]'
1069 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1070 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1071 '|%(upperroman)s|#)' % enum
.sequencepats
)
1072 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1073 # @@@ Loosen up the pattern? Allow Unicode?
1074 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1075 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1076 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1077 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1079 for format
in enum
.formats
:
1080 pats
[format
] = '(?P<%s>%s%s%s)' % (
1081 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1082 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1085 'bullet': u
'[-+*\u2022\u2023\u2043]( +|$)',
1086 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1087 'field_marker': r
':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1088 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1089 'doctest': r
'>>>( +|$)',
1090 'line_block': r
'\|( +|$)',
1091 'grid_table_top': grid_table_top_pat
,
1092 'simple_table_top': simple_table_top_pat
,
1093 'explicit_markup': r
'\.\.( +|$)',
1094 'anonymous': r
'__( +|$)',
1095 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1097 initial_transitions
= (
1111 def indent(self
, match
, context
, next_state
):
1113 indented
, indent
, line_offset
, blank_finish
= \
1114 self
.state_machine
.get_indented()
1115 elements
= self
.block_quote(indented
, line_offset
)
1116 self
.parent
+= elements
1117 if not blank_finish
:
1118 self
.parent
+= self
.unindent_warning('Block quote')
1119 return context
, next_state
, []
1121 def block_quote(self
, indented
, line_offset
):
1128 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1129 blockquote
= nodes
.block_quote()
1130 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1131 elements
.append(blockquote
)
1132 if attribution_lines
:
1133 attribution
, messages
= self
.parse_attribution(
1134 attribution_lines
, attribution_offset
)
1135 blockquote
+= attribution
1136 elements
+= messages
1137 line_offset
= new_line_offset
1138 while indented
and not indented
[0]:
1139 indented
= indented
[1:]
1143 # U+2014 is an em-dash:
1144 attribution_pattern
= re
.compile(u
'(---?(?!-)|\u2014) *(?=[^ \\n])',
1147 def split_attribution(self
, indented
, line_offset
):
1149 Check for a block quote attribution and split it off:
1151 * First line after a blank line must begin with a dash ("--", "---",
1152 em-dash; matches `self.attribution_pattern`).
1153 * Every line after that must have consistent indentation.
1154 * Attributions must be preceded by block quote content.
1156 Return a tuple of: (block quote content lines, content offset,
1157 attribution lines, attribution offset, remaining indented lines).
1160 nonblank_seen
= False
1161 for i
in range(len(indented
)):
1162 line
= indented
[i
].rstrip()
1164 if nonblank_seen
and blank
== i
- 1: # last line blank
1165 match
= self
.attribution_pattern
.match(line
)
1167 attribution_end
, indent
= self
.check_attribution(
1170 a_lines
= indented
[i
:attribution_end
]
1171 a_lines
.trim_left(match
.end(), end
=1)
1172 a_lines
.trim_left(indent
, start
=1)
1173 return (indented
[:i
], a_lines
,
1174 i
, indented
[attribution_end
:],
1175 line_offset
+ attribution_end
)
1176 nonblank_seen
= True
1180 return (indented
, None, None, None, None)
1182 def check_attribution(self
, indented
, attribution_start
):
1184 Check attribution shape.
1185 Return the index past the end of the attribution, and the indent.
1188 i
= attribution_start
+ 1
1189 for i
in range(attribution_start
+ 1, len(indented
)):
1190 line
= indented
[i
].rstrip()
1194 indent
= len(line
) - len(line
.lstrip())
1195 elif len(line
) - len(line
.lstrip()) != indent
:
1196 return None, None # bad shape; not an attribution
1198 # return index of line after last attribution line:
1200 return i
, (indent
or 0)
1202 def parse_attribution(self
, indented
, line_offset
):
1203 text
= '\n'.join(indented
).rstrip()
1204 lineno
= self
.state_machine
.abs_line_number() + line_offset
1205 textnodes
, messages
= self
.inline_text(text
, lineno
)
1206 node
= nodes
.attribution(text
, '', *textnodes
)
1207 node
.source
, node
.line
= self
.state_machine
.get_source_and_line(lineno
)
1208 return node
, messages
1210 def bullet(self
, match
, context
, next_state
):
1211 """Bullet list item."""
1212 bulletlist
= nodes
.bullet_list()
1213 self
.parent
+= bulletlist
1214 bulletlist
['bullet'] = match
.string
[0]
1215 i
, blank_finish
= self
.list_item(match
.end())
1217 offset
= self
.state_machine
.line_offset
+ 1 # next line
1218 new_line_offset
, blank_finish
= self
.nested_list_parse(
1219 self
.state_machine
.input_lines
[offset
:],
1220 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1221 node
=bulletlist
, initial_state
='BulletList',
1222 blank_finish
=blank_finish
)
1223 self
.goto_line(new_line_offset
)
1224 if not blank_finish
:
1225 self
.parent
+= self
.unindent_warning('Bullet list')
1226 return [], next_state
, []
1228 def list_item(self
, indent
):
1229 if self
.state_machine
.line
[indent
:]:
1230 indented
, line_offset
, blank_finish
= (
1231 self
.state_machine
.get_known_indented(indent
))
1233 indented
, indent
, line_offset
, blank_finish
= (
1234 self
.state_machine
.get_first_known_indented(indent
))
1235 listitem
= nodes
.list_item('\n'.join(indented
))
1237 self
.nested_parse(indented
, input_offset
=line_offset
,
1239 return listitem
, blank_finish
1241 def enumerator(self
, match
, context
, next_state
):
1242 """Enumerated List Item"""
1243 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1244 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1245 raise statemachine
.TransitionCorrection('text')
1246 enumlist
= nodes
.enumerated_list()
1247 self
.parent
+= enumlist
1249 enumlist
['enumtype'] = 'arabic'
1251 enumlist
['enumtype'] = sequence
1252 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1253 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1255 enumlist
['start'] = ordinal
1256 msg
= self
.reporter
.info(
1257 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1260 listitem
, blank_finish
= self
.list_item(match
.end())
1261 enumlist
+= listitem
1262 offset
= self
.state_machine
.line_offset
+ 1 # next line
1263 newline_offset
, blank_finish
= self
.nested_list_parse(
1264 self
.state_machine
.input_lines
[offset
:],
1265 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1266 node
=enumlist
, initial_state
='EnumeratedList',
1267 blank_finish
=blank_finish
,
1268 extra_settings
={'lastordinal': ordinal
,
1270 'auto': sequence
== '#'})
1271 self
.goto_line(newline_offset
)
1272 if not blank_finish
:
1273 self
.parent
+= self
.unindent_warning('Enumerated list')
1274 return [], next_state
, []
1276 def parse_enumerator(self
, match
, expected_sequence
=None):
1278 Analyze an enumerator and return the results.
1281 - the enumerator format ('period', 'parens', or 'rparen'),
1282 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1283 - the text of the enumerator, stripped of formatting, and
1284 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1285 ``None`` is returned for invalid enumerator text).
1287 The enumerator format has already been determined by the regular
1288 expression match. If `expected_sequence` is given, that sequence is
1289 tried first. If not, we check for Roman numeral 1. This way,
1290 single-character Roman numerals (which are also alphabetical) can be
1291 matched. If no sequence has been matched, all sequences are checked in
1294 groupdict
= match
.groupdict()
1296 for format
in self
.enum
.formats
:
1297 if groupdict
[format
]: # was this the format matched?
1298 break # yes; keep `format`
1299 else: # shouldn't happen
1300 raise ParserError('enumerator format not matched')
1301 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1302 :self
.enum
.formatinfo
[format
].end
]
1305 elif expected_sequence
:
1307 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1308 sequence
= expected_sequence
1309 except KeyError: # shouldn't happen
1310 raise ParserError('unknown enumerator sequence: %s'
1313 sequence
= 'lowerroman'
1315 sequence
= 'upperroman'
1317 for sequence
in self
.enum
.sequences
:
1318 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1320 else: # shouldn't happen
1321 raise ParserError('enumerator sequence not matched')
1326 ordinal
= self
.enum
.converters
[sequence
](text
)
1327 except roman
.InvalidRomanNumeralError
:
1329 return format
, sequence
, text
, ordinal
1331 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1333 Check validity based on the ordinal value and the second line.
1335 Return true if the ordinal is valid and the second line is blank,
1336 indented, or starts with the next enumerator or an auto-enumerator.
1341 next_line
= self
.state_machine
.next_line()
1342 except EOFError: # end of input lines
1343 self
.state_machine
.previous_line()
1346 self
.state_machine
.previous_line()
1347 if not next_line
[:1].strip(): # blank or indented
1349 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1351 next_enumerator
, auto_enumerator
= result
1353 if ( next_line
.startswith(next_enumerator
) or
1354 next_line
.startswith(auto_enumerator
) ):
1360 def make_enumerator(self
, ordinal
, sequence
, format
):
1362 Construct and return the next enumerated list item marker, and an
1363 auto-enumerator ("#" instead of the regular enumerator).
1365 Return ``None`` for invalid (out of range) ordinals.
1369 elif sequence
== 'arabic':
1370 enumerator
= str(ordinal
)
1372 if sequence
.endswith('alpha'):
1375 enumerator
= chr(ordinal
+ ord('a') - 1)
1376 elif sequence
.endswith('roman'):
1378 enumerator
= roman
.toRoman(ordinal
)
1379 except roman
.RomanError
:
1381 else: # shouldn't happen
1382 raise ParserError('unknown enumerator sequence: "%s"'
1384 if sequence
.startswith('lower'):
1385 enumerator
= enumerator
.lower()
1386 elif sequence
.startswith('upper'):
1387 enumerator
= enumerator
.upper()
1388 else: # shouldn't happen
1389 raise ParserError('unknown enumerator sequence: "%s"'
1391 formatinfo
= self
.enum
.formatinfo
[format
]
1392 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1394 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1395 return next_enumerator
, auto_enumerator
1397 def field_marker(self
, match
, context
, next_state
):
1398 """Field list item."""
1399 field_list
= nodes
.field_list()
1400 self
.parent
+= field_list
1401 field
, blank_finish
= self
.field(match
)
1403 offset
= self
.state_machine
.line_offset
+ 1 # next line
1404 newline_offset
, blank_finish
= self
.nested_list_parse(
1405 self
.state_machine
.input_lines
[offset
:],
1406 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1407 node
=field_list
, initial_state
='FieldList',
1408 blank_finish
=blank_finish
)
1409 self
.goto_line(newline_offset
)
1410 if not blank_finish
:
1411 self
.parent
+= self
.unindent_warning('Field list')
1412 return [], next_state
, []
1414 def field(self
, match
):
1415 name
= self
.parse_field_marker(match
)
1416 src
, srcline
= self
.state_machine
.get_source_and_line()
1417 lineno
= self
.state_machine
.abs_line_number()
1418 indented
, indent
, line_offset
, blank_finish
= \
1419 self
.state_machine
.get_first_known_indented(match
.end())
1420 field_node
= nodes
.field()
1421 field_node
.source
= src
1422 field_node
.line
= srcline
1423 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1424 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1425 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1426 field_node
+= field_body
1428 self
.parse_field_body(indented
, line_offset
, field_body
)
1429 return field_node
, blank_finish
1431 def parse_field_marker(self
, match
):
1432 """Extract & return field name from a field marker match."""
1433 field
= match
.group()[1:] # strip off leading ':'
1434 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1437 def parse_field_body(self
, indented
, offset
, node
):
1438 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1440 def option_marker(self
, match
, context
, next_state
):
1441 """Option list item."""
1442 optionlist
= nodes
.option_list()
1444 listitem
, blank_finish
= self
.option_list_item(match
)
1445 except MarkupError
, error
:
1446 # This shouldn't happen; pattern won't match.
1447 msg
= self
.reporter
.error(u
'Invalid option list marker: %s' %
1450 indented
, indent
, line_offset
, blank_finish
= \
1451 self
.state_machine
.get_first_known_indented(match
.end())
1452 elements
= self
.block_quote(indented
, line_offset
)
1453 self
.parent
+= elements
1454 if not blank_finish
:
1455 self
.parent
+= self
.unindent_warning('Option list')
1456 return [], next_state
, []
1457 self
.parent
+= optionlist
1458 optionlist
+= listitem
1459 offset
= self
.state_machine
.line_offset
+ 1 # next line
1460 newline_offset
, blank_finish
= self
.nested_list_parse(
1461 self
.state_machine
.input_lines
[offset
:],
1462 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1463 node
=optionlist
, initial_state
='OptionList',
1464 blank_finish
=blank_finish
)
1465 self
.goto_line(newline_offset
)
1466 if not blank_finish
:
1467 self
.parent
+= self
.unindent_warning('Option list')
1468 return [], next_state
, []
1470 def option_list_item(self
, match
):
1471 offset
= self
.state_machine
.abs_line_offset()
1472 options
= self
.parse_option_marker(match
)
1473 indented
, indent
, line_offset
, blank_finish
= \
1474 self
.state_machine
.get_first_known_indented(match
.end())
1475 if not indented
: # not an option list item
1476 self
.goto_line(offset
)
1477 raise statemachine
.TransitionCorrection('text')
1478 option_group
= nodes
.option_group('', *options
)
1479 description
= nodes
.description('\n'.join(indented
))
1480 option_list_item
= nodes
.option_list_item('', option_group
,
1483 self
.nested_parse(indented
, input_offset
=line_offset
,
1485 return option_list_item
, blank_finish
1487 def parse_option_marker(self
, match
):
1489 Return a list of `node.option` and `node.option_argument` objects,
1490 parsed from an option marker match.
1492 :Exception: `MarkupError` for invalid option markers.
1495 optionstrings
= match
.group().rstrip().split(', ')
1496 for optionstring
in optionstrings
:
1497 tokens
= optionstring
.split()
1499 firstopt
= tokens
[0].split('=', 1)
1500 if len(firstopt
) > 1:
1501 # "--opt=value" form
1502 tokens
[:1] = firstopt
1504 elif (len(tokens
[0]) > 2
1505 and ((tokens
[0].startswith('-')
1506 and not tokens
[0].startswith('--'))
1507 or tokens
[0].startswith('+'))):
1509 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1511 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1512 and tokens
[-1].endswith('>')):
1513 # "-o <value1 value2>" form; join all values into one token
1514 tokens
[1:] = [' '.join(tokens
[1:])]
1515 if 0 < len(tokens
) <= 2:
1516 option
= nodes
.option(optionstring
)
1517 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1519 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1520 delimiter
=delimiter
)
1521 optlist
.append(option
)
1524 'wrong number of option tokens (=%s), should be 1 or 2: '
1525 '"%s"' % (len(tokens
), optionstring
))
1528 def doctest(self
, match
, context
, next_state
):
1529 data
= '\n'.join(self
.state_machine
.get_text_block())
1530 self
.parent
+= nodes
.doctest_block(data
, data
)
1531 return [], next_state
, []
1533 def line_block(self
, match
, context
, next_state
):
1534 """First line of a line block."""
1535 block
= nodes
.line_block()
1536 self
.parent
+= block
1537 lineno
= self
.state_machine
.abs_line_number()
1538 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1540 self
.parent
+= messages
1541 if not blank_finish
:
1542 offset
= self
.state_machine
.line_offset
+ 1 # next line
1543 new_line_offset
, blank_finish
= self
.nested_list_parse(
1544 self
.state_machine
.input_lines
[offset
:],
1545 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1546 node
=block
, initial_state
='LineBlock',
1548 self
.goto_line(new_line_offset
)
1549 if not blank_finish
:
1550 self
.parent
+= self
.reporter
.warning(
1551 'Line block ends without a blank line.',
1554 if block
[0].indent
is None:
1556 self
.nest_line_block_lines(block
)
1557 return [], next_state
, []
1559 def line_block_line(self
, match
, lineno
):
1560 """Return one line element of a line_block."""
1561 indented
, indent
, line_offset
, blank_finish
= \
1562 self
.state_machine
.get_first_known_indented(match
.end(),
1564 text
= u
'\n'.join(indented
)
1565 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1566 line
= nodes
.line(text
, '', *text_nodes
)
1567 if match
.string
.rstrip() != '|': # not empty
1568 line
.indent
= len(match
.group(1)) - 1
1569 return line
, messages
, blank_finish
1571 def nest_line_block_lines(self
, block
):
1572 for index
in range(1, len(block
)):
1573 if block
[index
].indent
is None:
1574 block
[index
].indent
= block
[index
- 1].indent
1575 self
.nest_line_block_segment(block
)
1577 def nest_line_block_segment(self
, block
):
1578 indents
= [item
.indent
for item
in block
]
1579 least
= min(indents
)
1581 new_block
= nodes
.line_block()
1583 if item
.indent
> least
:
1584 new_block
.append(item
)
1587 self
.nest_line_block_segment(new_block
)
1588 new_items
.append(new_block
)
1589 new_block
= nodes
.line_block()
1590 new_items
.append(item
)
1592 self
.nest_line_block_segment(new_block
)
1593 new_items
.append(new_block
)
1594 block
[:] = new_items
1596 def grid_table_top(self
, match
, context
, next_state
):
1597 """Top border of a full table."""
1598 return self
.table_top(match
, context
, next_state
,
1599 self
.isolate_grid_table
,
1600 tableparser
.GridTableParser
)
1602 def simple_table_top(self
, match
, context
, next_state
):
1603 """Top border of a simple table."""
1604 return self
.table_top(match
, context
, next_state
,
1605 self
.isolate_simple_table
,
1606 tableparser
.SimpleTableParser
)
1608 def table_top(self
, match
, context
, next_state
,
1609 isolate_function
, parser_class
):
1610 """Top border of a generic table."""
1611 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1612 self
.parent
+= nodelist
1613 if not blank_finish
:
1614 msg
= self
.reporter
.warning(
1615 'Blank line required after table.',
1616 line
=self
.state_machine
.abs_line_number()+1)
1618 return [], next_state
, []
1620 def table(self
, isolate_function
, parser_class
):
1621 """Parse a table."""
1622 block
, messages
, blank_finish
= isolate_function()
1625 parser
= parser_class()
1626 tabledata
= parser
.parse(block
)
1627 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1629 table
= self
.build_table(tabledata
, tableline
)
1630 nodelist
= [table
] + messages
1631 except tableparser
.TableMarkupError
, err
:
1632 nodelist
= self
.malformed_table(block
, ' '.join(err
.args
),
1633 offset
=err
.offset
) + messages
1636 return nodelist
, blank_finish
1638 def isolate_grid_table(self
):
1642 block
= self
.state_machine
.get_text_block(flush_left
=True)
1643 except statemachine
.UnexpectedIndentationError
, err
:
1644 block
, src
, srcline
= err
.args
1645 messages
.append(self
.reporter
.error('Unexpected indentation.',
1646 source
=src
, line
=srcline
))
1649 # for East Asian chars:
1650 block
.pad_double_width(self
.double_width_pad_char
)
1651 width
= len(block
[0].strip())
1652 for i
in range(len(block
)):
1653 block
[i
] = block
[i
].strip()
1654 if block
[i
][0] not in '+|': # check left edge
1656 self
.state_machine
.previous_line(len(block
) - i
)
1659 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1661 # from second-last to third line of table:
1662 for i
in range(len(block
) - 2, 1, -1):
1663 if self
.grid_table_top_pat
.match(block
[i
]):
1664 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1668 messages
.extend(self
.malformed_table(block
))
1669 return [], messages
, blank_finish
1670 for i
in range(len(block
)): # check right edge
1671 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1672 messages
.extend(self
.malformed_table(block
))
1673 return [], messages
, blank_finish
1674 return block
, messages
, blank_finish
1676 def isolate_simple_table(self
):
1677 start
= self
.state_machine
.line_offset
1678 lines
= self
.state_machine
.input_lines
1679 limit
= len(lines
) - 1
1680 toplen
= len(lines
[start
].strip())
1681 pattern_match
= self
.simple_table_border_pat
.match
1687 match
= pattern_match(line
)
1689 if len(line
.strip()) != toplen
:
1690 self
.state_machine
.next_line(i
- start
)
1691 messages
= self
.malformed_table(
1692 lines
[start
:i
+1], 'Bottom/header table border does '
1693 'not match top border.')
1694 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1697 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1701 else: # reached end of input_lines
1703 extra
= ' or no blank line after table bottom'
1704 self
.state_machine
.next_line(found_at
- start
)
1705 block
= lines
[start
:found_at
+1]
1708 self
.state_machine
.next_line(i
- start
- 1)
1709 block
= lines
[start
:]
1710 messages
= self
.malformed_table(
1711 block
, 'No bottom table border found%s.' % extra
)
1712 return [], messages
, not extra
1713 self
.state_machine
.next_line(end
- start
)
1714 block
= lines
[start
:end
+1]
1715 # for East Asian chars:
1716 block
.pad_double_width(self
.double_width_pad_char
)
1717 return block
, [], end
== limit
or not lines
[end
+1].strip()
1719 def malformed_table(self
, block
, detail
='', offset
=0):
1720 block
.replace(self
.double_width_pad_char
, '')
1721 data
= '\n'.join(block
)
1722 message
= 'Malformed table.'
1723 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1725 message
+= '\n' + detail
1726 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1727 line
=startline
+offset
)
1730 def build_table(self
, tabledata
, tableline
, stub_columns
=0):
1731 colwidths
, headrows
, bodyrows
= tabledata
1732 table
= nodes
.table()
1733 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1735 for colwidth
in colwidths
:
1736 colspec
= nodes
.colspec(colwidth
=colwidth
)
1738 colspec
.attributes
['stub'] = 1
1742 thead
= nodes
.thead()
1744 for row
in headrows
:
1745 thead
+= self
.build_table_row(row
, tableline
)
1746 tbody
= nodes
.tbody()
1748 for row
in bodyrows
:
1749 tbody
+= self
.build_table_row(row
, tableline
)
1752 def build_table_row(self
, rowdata
, tableline
):
1754 for cell
in rowdata
:
1757 morerows
, morecols
, offset
, cellblock
= cell
1760 attributes
['morerows'] = morerows
1762 attributes
['morecols'] = morecols
1763 entry
= nodes
.entry(**attributes
)
1765 if ''.join(cellblock
):
1766 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1772 """Patterns and constants used for explicit markup recognition."""
1774 explicit
.patterns
= Struct(
1775 target
=re
.compile(r
"""
1777 _ # anonymous target
1779 (?!_) # no underscore at the beginning
1780 (?P<quote>`?) # optional open quote
1781 (?![ `]) # first char. not space or
1783 (?P<name> # reference name
1786 %(non_whitespace_escape_before)s
1787 (?P=quote) # close quote if open quote used
1789 (?<!(?<!\x00):) # no unescaped colon at end
1790 %(non_whitespace_escape_before)s
1791 [ ]? # optional space
1792 : # end of reference name
1793 ([ ]+|$) # followed by whitespace
1794 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1795 reference
=re
.compile(r
"""
1797 (?P<simple>%(simplename)s)_
1801 (?P<phrase>.+?) # hyperlink phrase
1802 %(non_whitespace_escape_before)s
1803 `_ # close backquote,
1807 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1808 substitution
=re
.compile(r
"""
1810 (?![ ]) # first char. not space
1811 (?P<name>.+?) # substitution text
1812 %(non_whitespace_escape_before)s
1813 \| # close delimiter
1815 ([ ]+|$) # followed by whitespace
1816 """ % vars(Inliner
),
1817 re
.VERBOSE | re
.UNICODE
),)
1819 def footnote(self
, match
):
1820 src
, srcline
= self
.state_machine
.get_source_and_line()
1821 indented
, indent
, offset
, blank_finish
= \
1822 self
.state_machine
.get_first_known_indented(match
.end())
1823 label
= match
.group(1)
1824 name
= normalize_name(label
)
1825 footnote
= nodes
.footnote('\n'.join(indented
))
1826 footnote
.source
= src
1827 footnote
.line
= srcline
1828 if name
[0] == '#': # auto-numbered
1829 name
= name
[1:] # autonumber label
1830 footnote
['auto'] = 1
1832 footnote
['names'].append(name
)
1833 self
.document
.note_autofootnote(footnote
)
1834 elif name
== '*': # auto-symbol
1836 footnote
['auto'] = '*'
1837 self
.document
.note_symbol_footnote(footnote
)
1838 else: # manually numbered
1839 footnote
+= nodes
.label('', label
)
1840 footnote
['names'].append(name
)
1841 self
.document
.note_footnote(footnote
)
1843 self
.document
.note_explicit_target(footnote
, footnote
)
1845 self
.document
.set_id(footnote
, footnote
)
1847 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1848 return [footnote
], blank_finish
1850 def citation(self
, match
):
1851 src
, srcline
= self
.state_machine
.get_source_and_line()
1852 indented
, indent
, offset
, blank_finish
= \
1853 self
.state_machine
.get_first_known_indented(match
.end())
1854 label
= match
.group(1)
1855 name
= normalize_name(label
)
1856 citation
= nodes
.citation('\n'.join(indented
))
1857 citation
.source
= src
1858 citation
.line
= srcline
1859 citation
+= nodes
.label('', label
)
1860 citation
['names'].append(name
)
1861 self
.document
.note_citation(citation
)
1862 self
.document
.note_explicit_target(citation
, citation
)
1864 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1865 return [citation
], blank_finish
1867 def hyperlink_target(self
, match
):
1868 pattern
= self
.explicit
.patterns
.target
1869 lineno
= self
.state_machine
.abs_line_number()
1870 block
, indent
, offset
, blank_finish
= \
1871 self
.state_machine
.get_first_known_indented(
1872 match
.end(), until_blank
=True, strip_indent
=False)
1873 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1874 block
= [escape2null(line
) for line
in block
]
1878 targetmatch
= pattern
.match(escaped
)
1883 escaped
+= block
[blockindex
]
1885 raise MarkupError('malformed hyperlink target.')
1886 del block
[:blockindex
]
1887 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1888 target
= self
.make_target(block
, blocktext
, lineno
,
1889 targetmatch
.group('name'))
1890 return [target
], blank_finish
1892 def make_target(self
, block
, block_text
, lineno
, target_name
):
1893 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1894 if target_type
== 'refname':
1895 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1896 target
.indirect_reference_name
= data
1897 self
.add_target(target_name
, '', target
, lineno
)
1898 self
.document
.note_indirect_target(target
)
1900 elif target_type
== 'refuri':
1901 target
= nodes
.target(block_text
, '')
1902 self
.add_target(target_name
, data
, target
, lineno
)
1907 def parse_target(self
, block
, block_text
, lineno
):
1909 Determine the type of reference of a target.
1911 :Return: A 2-tuple, one of:
1913 - 'refname' and the indirect reference name
1914 - 'refuri' and the URI
1915 - 'malformed' and a system_message node
1917 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1918 reference
= ' '.join([line
.strip() for line
in block
])
1919 refname
= self
.is_reference(reference
)
1921 return 'refname', refname
1922 reference
= ''.join([''.join(line
.split()) for line
in block
])
1923 return 'refuri', unescape(reference
)
1925 def is_reference(self
, reference
):
1926 match
= self
.explicit
.patterns
.reference
.match(
1927 whitespace_normalize_name(reference
))
1930 return unescape(match
.group('simple') or match
.group('phrase'))
1932 def add_target(self
, targetname
, refuri
, target
, lineno
):
1933 target
.line
= lineno
1935 name
= normalize_name(unescape(targetname
))
1936 target
['names'].append(name
)
1938 uri
= self
.inliner
.adjust_uri(refuri
)
1940 target
['refuri'] = uri
1942 raise ApplicationError('problem with URI: %r' % refuri
)
1943 self
.document
.note_explicit_target(target
, self
.parent
)
1944 else: # anonymous target
1946 target
['refuri'] = refuri
1947 target
['anonymous'] = 1
1948 self
.document
.note_anonymous_target(target
)
1950 def substitution_def(self
, match
):
1951 pattern
= self
.explicit
.patterns
.substitution
1952 src
, srcline
= self
.state_machine
.get_source_and_line()
1953 block
, indent
, offset
, blank_finish
= \
1954 self
.state_machine
.get_first_known_indented(match
.end(),
1956 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
1958 escaped
= escape2null(block
[0].rstrip())
1961 subdefmatch
= pattern
.match(escaped
)
1966 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
1968 raise MarkupError('malformed substitution definition.')
1969 del block
[:blockindex
] # strip out the substitution marker
1970 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
1974 while block
and not block
[-1].strip():
1976 subname
= subdefmatch
.group('name')
1977 substitution_node
= nodes
.substitution_definition(blocktext
)
1978 substitution_node
.source
= src
1979 substitution_node
.line
= srcline
1981 msg
= self
.reporter
.warning(
1982 'Substitution definition "%s" missing contents.' % subname
,
1983 nodes
.literal_block(blocktext
, blocktext
),
1984 source
=src
, line
=srcline
)
1985 return [msg
], blank_finish
1986 block
[0] = block
[0].strip()
1987 substitution_node
['names'].append(
1988 nodes
.whitespace_normalize_name(subname
))
1989 new_abs_offset
, blank_finish
= self
.nested_list_parse(
1990 block
, input_offset
=offset
, node
=substitution_node
,
1991 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
1993 for node
in substitution_node
[:]:
1994 if not (isinstance(node
, nodes
.Inline
) or
1995 isinstance(node
, nodes
.Text
)):
1996 self
.parent
+= substitution_node
[i
]
1997 del substitution_node
[i
]
2000 for node
in substitution_node
.traverse(nodes
.Element
):
2001 if self
.disallowed_inside_substitution_definitions(node
):
2002 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2003 msg
= self
.reporter
.error(
2004 'Substitution definition contains illegal element:',
2005 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2006 source
=src
, line
=srcline
)
2007 return [msg
], blank_finish
2008 if len(substitution_node
) == 0:
2009 msg
= self
.reporter
.warning(
2010 'Substitution definition "%s" empty or invalid.' % subname
,
2011 nodes
.literal_block(blocktext
, blocktext
),
2012 source
=src
, line
=srcline
)
2013 return [msg
], blank_finish
2014 self
.document
.note_substitution_def(
2015 substitution_node
, subname
, self
.parent
)
2016 return [substitution_node
], blank_finish
2018 def disallowed_inside_substitution_definitions(self
, node
):
2020 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
2021 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
2026 def directive(self
, match
, **option_presets
):
2027 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2028 type_name
= match
.group(1)
2029 directive_class
, messages
= directives
.directive(
2030 type_name
, self
.memo
.language
, self
.document
)
2031 self
.parent
+= messages
2033 return self
.run_directive(
2034 directive_class
, match
, type_name
, option_presets
)
2036 return self
.unknown_directive(type_name
)
2038 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2040 Parse a directive then run its directive function.
2044 - `directive`: The class implementing the directive. Must be
2045 a subclass of `rst.Directive`.
2047 - `match`: A regular expression match object which matched the first
2048 line of the directive.
2050 - `type_name`: The directive name, as used in the source text.
2052 - `option_presets`: A dictionary of preset options, defaults for the
2053 directive options. Currently, only an "alt" option is passed by
2054 substitution definitions (value: the substitution name), which may
2055 be used by an embedded image directive.
2057 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2059 if isinstance(directive
, (FunctionType
, MethodType
)):
2060 from docutils
.parsers
.rst
import convert_directive_function
2061 directive
= convert_directive_function(directive
)
2062 lineno
= self
.state_machine
.abs_line_number()
2063 initial_line_offset
= self
.state_machine
.line_offset
2064 indented
, indent
, line_offset
, blank_finish \
2065 = self
.state_machine
.get_first_known_indented(match
.end(),
2067 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2068 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2070 arguments
, options
, content
, content_offset
= (
2071 self
.parse_directive_block(indented
, line_offset
,
2072 directive
, option_presets
))
2073 except MarkupError
, detail
:
2074 error
= self
.reporter
.error(
2075 'Error in "%s" directive:\n%s.' % (type_name
,
2076 ' '.join(detail
.args
)),
2077 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2078 return [error
], blank_finish
2079 directive_instance
= directive(
2080 type_name
, arguments
, options
, content
, lineno
,
2081 content_offset
, block_text
, self
, self
.state_machine
)
2083 result
= directive_instance
.run()
2084 except docutils
.parsers
.rst
.DirectiveError
, error
:
2085 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2087 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2089 assert isinstance(result
, list), \
2090 'Directive "%s" must return a list of nodes.' % type_name
2091 for i
in range(len(result
)):
2092 assert isinstance(result
[i
], nodes
.Node
), \
2093 ('Directive "%s" returned non-Node object (index %s): %r'
2094 % (type_name
, i
, result
[i
]))
2096 blank_finish
or self
.state_machine
.is_next_line_blank())
2098 def parse_directive_block(self
, indented
, line_offset
, directive
,
2100 option_spec
= directive
.option_spec
2101 has_content
= directive
.has_content
2102 if indented
and not indented
[0].strip():
2103 indented
.trim_start()
2105 while indented
and not indented
[-1].strip():
2107 if indented
and (directive
.required_arguments
2108 or directive
.optional_arguments
2110 for i
, line
in enumerate(indented
):
2111 if not line
.strip():
2115 arg_block
= indented
[:i
]
2116 content
= indented
[i
+1:]
2117 content_offset
= line_offset
+ i
+ 1
2120 content_offset
= line_offset
2123 options
, arg_block
= self
.parse_directive_options(
2124 option_presets
, option_spec
, arg_block
)
2127 if arg_block
and not (directive
.required_arguments
2128 or directive
.optional_arguments
):
2129 content
= arg_block
+ indented
[i
:]
2130 content_offset
= line_offset
2132 while content
and not content
[0].strip():
2133 content
.trim_start()
2135 if directive
.required_arguments
or directive
.optional_arguments
:
2136 arguments
= self
.parse_directive_arguments(
2137 directive
, arg_block
)
2140 if content
and not has_content
:
2141 raise MarkupError('no content permitted')
2142 return (arguments
, options
, content
, content_offset
)
2144 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2145 options
= option_presets
.copy()
2146 for i
, line
in enumerate(arg_block
):
2147 if re
.match(Body
.patterns
['field_marker'], line
):
2148 opt_block
= arg_block
[i
:]
2149 arg_block
= arg_block
[:i
]
2154 success
, data
= self
.parse_extension_options(option_spec
,
2156 if success
: # data is a dict of options
2157 options
.update(data
)
2158 else: # data is an error string
2159 raise MarkupError(data
)
2160 return options
, arg_block
2162 def parse_directive_arguments(self
, directive
, arg_block
):
2163 required
= directive
.required_arguments
2164 optional
= directive
.optional_arguments
2165 arg_text
= '\n'.join(arg_block
)
2166 arguments
= arg_text
.split()
2167 if len(arguments
) < required
:
2168 raise MarkupError('%s argument(s) required, %s supplied'
2169 % (required
, len(arguments
)))
2170 elif len(arguments
) > required
+ optional
:
2171 if directive
.final_argument_whitespace
:
2172 arguments
= arg_text
.split(None, required
+ optional
- 1)
2175 'maximum %s argument(s) allowed, %s supplied'
2176 % (required
+ optional
, len(arguments
)))
2179 def parse_extension_options(self
, option_spec
, datalines
):
2181 Parse `datalines` for a field list containing extension options
2182 matching `option_spec`.
2185 - `option_spec`: a mapping of option name to conversion
2186 function, which should raise an exception on bad input.
2187 - `datalines`: a list of input strings.
2190 - Success value, 1 or 0.
2191 - An option dictionary on success, an error string on failure.
2193 node
= nodes
.field_list()
2194 newline_offset
, blank_finish
= self
.nested_list_parse(
2195 datalines
, 0, node
, initial_state
='ExtensionOptions',
2197 if newline_offset
!= len(datalines
): # incomplete parse of block
2198 return 0, 'invalid option block'
2200 options
= utils
.extract_extension_options(node
, option_spec
)
2201 except KeyError, detail
:
2202 return 0, ('unknown option: "%s"' % detail
.args
[0])
2203 except (ValueError, TypeError), detail
:
2204 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2205 except utils
.ExtensionOptionError
, detail
:
2206 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2210 return 0, 'option data incompletely parsed'
2212 def unknown_directive(self
, type_name
):
2213 lineno
= self
.state_machine
.abs_line_number()
2214 indented
, indent
, offset
, blank_finish
= \
2215 self
.state_machine
.get_first_known_indented(0, strip_indent
=False)
2216 text
= '\n'.join(indented
)
2217 error
= self
.reporter
.error(
2218 'Unknown directive type "%s".' % type_name
,
2219 nodes
.literal_block(text
, text
), line
=lineno
)
2220 return [error
], blank_finish
2222 def comment(self
, match
):
2223 if not match
.string
[match
.end():].strip() \
2224 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2225 return [nodes
.comment()], 1 # "A tiny but practical wart."
2226 indented
, indent
, offset
, blank_finish
= \
2227 self
.state_machine
.get_first_known_indented(match
.end())
2228 while indented
and not indented
[-1].strip():
2230 text
= '\n'.join(indented
)
2231 return [nodes
.comment(text
, text
)], blank_finish
2233 explicit
.constructs
= [
2236 \.\.[ ]+ # explicit markup start
2239 [0-9]+ # manually numbered footnote
2241 \# # anonymous auto-numbered footnote
2243 \#%s # auto-number ed?) footnote label
2245 \* # auto-symbol footnote
2248 ([ ]+|$) # whitespace or end of line
2249 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2252 \.\.[ ]+ # explicit markup start
2253 \[(%s)\] # citation label
2254 ([ ]+|$) # whitespace or end of line
2255 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2258 \.\.[ ]+ # explicit markup start
2259 _ # target indicator
2260 (?![ ]|$) # first char. not space or EOL
2261 """, re
.VERBOSE | re
.UNICODE
)),
2264 \.\.[ ]+ # explicit markup start
2265 \| # substitution indicator
2266 (?![ ]|$) # first char. not space or EOL
2267 """, re
.VERBOSE | re
.UNICODE
)),
2270 \.\.[ ]+ # explicit markup start
2271 (%s) # directive name
2272 [ ]? # optional space
2273 :: # directive delimiter
2274 ([ ]+|$) # whitespace or end of line
2275 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2277 def explicit_markup(self
, match
, context
, next_state
):
2278 """Footnotes, hyperlink targets, directives, comments."""
2279 nodelist
, blank_finish
= self
.explicit_construct(match
)
2280 self
.parent
+= nodelist
2281 self
.explicit_list(blank_finish
)
2282 return [], next_state
, []
2284 def explicit_construct(self
, match
):
2285 """Determine which explicit construct this is, parse & return it."""
2287 for method
, pattern
in self
.explicit
.constructs
:
2288 expmatch
= pattern
.match(match
.string
)
2291 return method(self
, expmatch
)
2292 except MarkupError
, error
:
2293 lineno
= self
.state_machine
.abs_line_number()
2294 message
= ' '.join(error
.args
)
2295 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2297 nodelist
, blank_finish
= self
.comment(match
)
2298 return nodelist
+ errors
, blank_finish
2300 def explicit_list(self
, blank_finish
):
2302 Create a nested state machine for a series of explicit markup
2303 constructs (including anonymous hyperlink targets).
2305 offset
= self
.state_machine
.line_offset
+ 1 # next line
2306 newline_offset
, blank_finish
= self
.nested_list_parse(
2307 self
.state_machine
.input_lines
[offset
:],
2308 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2309 node
=self
.parent
, initial_state
='Explicit',
2310 blank_finish
=blank_finish
,
2311 match_titles
=self
.state_machine
.match_titles
)
2312 self
.goto_line(newline_offset
)
2313 if not blank_finish
:
2314 self
.parent
+= self
.unindent_warning('Explicit markup')
2316 def anonymous(self
, match
, context
, next_state
):
2317 """Anonymous hyperlink targets."""
2318 nodelist
, blank_finish
= self
.anonymous_target(match
)
2319 self
.parent
+= nodelist
2320 self
.explicit_list(blank_finish
)
2321 return [], next_state
, []
2323 def anonymous_target(self
, match
):
2324 lineno
= self
.state_machine
.abs_line_number()
2325 block
, indent
, offset
, blank_finish \
2326 = self
.state_machine
.get_first_known_indented(match
.end(),
2328 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2329 block
= [escape2null(line
) for line
in block
]
2330 target
= self
.make_target(block
, blocktext
, lineno
, '')
2331 return [target
], blank_finish
2333 def line(self
, match
, context
, next_state
):
2334 """Section title overline or transition marker."""
2335 if self
.state_machine
.match_titles
:
2336 return [match
.string
], 'Line', []
2337 elif match
.string
.strip() == '::':
2338 raise statemachine
.TransitionCorrection('text')
2339 elif len(match
.string
.strip()) < 4:
2340 msg
= self
.reporter
.info(
2341 'Unexpected possible title overline or transition.\n'
2342 "Treating it as ordinary text because it's so short.",
2343 line
=self
.state_machine
.abs_line_number())
2345 raise statemachine
.TransitionCorrection('text')
2347 blocktext
= self
.state_machine
.line
2348 msg
= self
.reporter
.severe(
2349 'Unexpected section title or transition.',
2350 nodes
.literal_block(blocktext
, blocktext
),
2351 line
=self
.state_machine
.abs_line_number())
2353 return [], next_state
, []
2355 def text(self
, match
, context
, next_state
):
2356 """Titles, definition lists, paragraphs."""
2357 return [match
.string
], 'Text', []
2360 class RFC2822Body(Body
):
2363 RFC2822 headers are only valid as the first constructs in documents. As
2364 soon as anything else appears, the `Body` state should take over.
2367 patterns
= Body
.patterns
.copy() # can't modify the original
2368 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2369 initial_transitions
= [(name
, 'Body')
2370 for name
in Body
.initial_transitions
]
2371 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2373 def rfc2822(self
, match
, context
, next_state
):
2374 """RFC2822-style field list item."""
2375 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2376 self
.parent
+= fieldlist
2377 field
, blank_finish
= self
.rfc2822_field(match
)
2379 offset
= self
.state_machine
.line_offset
+ 1 # next line
2380 newline_offset
, blank_finish
= self
.nested_list_parse(
2381 self
.state_machine
.input_lines
[offset
:],
2382 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2383 node
=fieldlist
, initial_state
='RFC2822List',
2384 blank_finish
=blank_finish
)
2385 self
.goto_line(newline_offset
)
2386 if not blank_finish
:
2387 self
.parent
+= self
.unindent_warning(
2388 'RFC2822-style field list')
2389 return [], next_state
, []
2391 def rfc2822_field(self
, match
):
2392 name
= match
.string
[:match
.string
.find(':')]
2393 indented
, indent
, line_offset
, blank_finish
= \
2394 self
.state_machine
.get_first_known_indented(match
.end(),
2396 fieldnode
= nodes
.field()
2397 fieldnode
+= nodes
.field_name(name
, name
)
2398 fieldbody
= nodes
.field_body('\n'.join(indented
))
2399 fieldnode
+= fieldbody
2401 self
.nested_parse(indented
, input_offset
=line_offset
,
2403 return fieldnode
, blank_finish
2406 class SpecializedBody(Body
):
2409 Superclass for second and subsequent compound element members. Compound
2410 elements are lists and list-like constructs.
2412 All transition methods are disabled (redefined as `invalid_input`).
2413 Override individual methods in subclasses to re-enable.
2415 For example, once an initial bullet list item, say, is recognized, the
2416 `BulletList` subclass takes over, with a "bullet_list" node as its
2417 container. Upon encountering the initial bullet list item, `Body.bullet`
2418 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2419 starts up a nested parsing session with `BulletList` as the initial state.
2420 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2421 as only bullet list items are encountered, they are parsed and inserted
2422 into the container. The first construct which is *not* a bullet list item
2423 triggers the `invalid_input` method, which ends the nested parse and
2424 closes the container. `BulletList` needs to recognize input that is
2425 invalid in the context of a bullet list, which means everything *other
2426 than* bullet list items, so it inherits the transition list created in
2430 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2431 """Not a compound element member. Abort this state machine."""
2432 self
.state_machine
.previous_line() # back up so parent SM can reassess
2435 indent
= invalid_input
2436 bullet
= invalid_input
2437 enumerator
= invalid_input
2438 field_marker
= invalid_input
2439 option_marker
= invalid_input
2440 doctest
= invalid_input
2441 line_block
= invalid_input
2442 grid_table_top
= invalid_input
2443 simple_table_top
= invalid_input
2444 explicit_markup
= invalid_input
2445 anonymous
= invalid_input
2446 line
= invalid_input
2447 text
= invalid_input
2450 class BulletList(SpecializedBody
):
2452 """Second and subsequent bullet_list list_items."""
2454 def bullet(self
, match
, context
, next_state
):
2455 """Bullet list item."""
2456 if match
.string
[0] != self
.parent
['bullet']:
2457 # different bullet: new list
2458 self
.invalid_input()
2459 listitem
, blank_finish
= self
.list_item(match
.end())
2460 self
.parent
+= listitem
2461 self
.blank_finish
= blank_finish
2462 return [], next_state
, []
2465 class DefinitionList(SpecializedBody
):
2467 """Second and subsequent definition_list_items."""
2469 def text(self
, match
, context
, next_state
):
2470 """Definition lists."""
2471 return [match
.string
], 'Definition', []
2474 class EnumeratedList(SpecializedBody
):
2476 """Second and subsequent enumerated_list list_items."""
2478 def enumerator(self
, match
, context
, next_state
):
2479 """Enumerated list item."""
2480 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2481 match
, self
.parent
['enumtype'])
2482 if ( format
!= self
.format
2483 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2485 or ordinal
!= (self
.lastordinal
+ 1)))
2486 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2487 # different enumeration: new list
2488 self
.invalid_input()
2491 listitem
, blank_finish
= self
.list_item(match
.end())
2492 self
.parent
+= listitem
2493 self
.blank_finish
= blank_finish
2494 self
.lastordinal
= ordinal
2495 return [], next_state
, []
2498 class FieldList(SpecializedBody
):
2500 """Second and subsequent field_list fields."""
2502 def field_marker(self
, match
, context
, next_state
):
2503 """Field list field."""
2504 field
, blank_finish
= self
.field(match
)
2505 self
.parent
+= field
2506 self
.blank_finish
= blank_finish
2507 return [], next_state
, []
2510 class OptionList(SpecializedBody
):
2512 """Second and subsequent option_list option_list_items."""
2514 def option_marker(self
, match
, context
, next_state
):
2515 """Option list item."""
2517 option_list_item
, blank_finish
= self
.option_list_item(match
)
2519 self
.invalid_input()
2520 self
.parent
+= option_list_item
2521 self
.blank_finish
= blank_finish
2522 return [], next_state
, []
2525 class RFC2822List(SpecializedBody
, RFC2822Body
):
2527 """Second and subsequent RFC2822-style field_list fields."""
2529 patterns
= RFC2822Body
.patterns
2530 initial_transitions
= RFC2822Body
.initial_transitions
2532 def rfc2822(self
, match
, context
, next_state
):
2533 """RFC2822-style field list item."""
2534 field
, blank_finish
= self
.rfc2822_field(match
)
2535 self
.parent
+= field
2536 self
.blank_finish
= blank_finish
2537 return [], 'RFC2822List', []
2539 blank
= SpecializedBody
.invalid_input
2542 class ExtensionOptions(FieldList
):
2545 Parse field_list fields for extension options.
2547 No nested parsing is done (including inline markup parsing).
2550 def parse_field_body(self
, indented
, offset
, node
):
2551 """Override `Body.parse_field_body` for simpler parsing."""
2553 for line
in list(indented
) + ['']:
2557 text
= '\n'.join(lines
)
2558 node
+= nodes
.paragraph(text
, text
)
2562 class LineBlock(SpecializedBody
):
2564 """Second and subsequent lines of a line_block."""
2566 blank
= SpecializedBody
.invalid_input
2568 def line_block(self
, match
, context
, next_state
):
2569 """New line of line block."""
2570 lineno
= self
.state_machine
.abs_line_number()
2571 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2573 self
.parent
.parent
+= messages
2574 self
.blank_finish
= blank_finish
2575 return [], next_state
, []
2578 class Explicit(SpecializedBody
):
2580 """Second and subsequent explicit markup construct."""
2582 def explicit_markup(self
, match
, context
, next_state
):
2583 """Footnotes, hyperlink targets, directives, comments."""
2584 nodelist
, blank_finish
= self
.explicit_construct(match
)
2585 self
.parent
+= nodelist
2586 self
.blank_finish
= blank_finish
2587 return [], next_state
, []
2589 def anonymous(self
, match
, context
, next_state
):
2590 """Anonymous hyperlink targets."""
2591 nodelist
, blank_finish
= self
.anonymous_target(match
)
2592 self
.parent
+= nodelist
2593 self
.blank_finish
= blank_finish
2594 return [], next_state
, []
2596 blank
= SpecializedBody
.invalid_input
2599 class SubstitutionDef(Body
):
2602 Parser for the contents of a substitution_definition element.
2606 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2607 % Inliner
.simplename
, re
.UNICODE
),
2609 initial_transitions
= ['embedded_directive', 'text']
2611 def embedded_directive(self
, match
, context
, next_state
):
2612 nodelist
, blank_finish
= self
.directive(match
,
2613 alt
=self
.parent
['names'][0])
2614 self
.parent
+= nodelist
2615 if not self
.state_machine
.at_eof():
2616 self
.blank_finish
= blank_finish
2619 def text(self
, match
, context
, next_state
):
2620 if not self
.state_machine
.at_eof():
2621 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2625 class Text(RSTState
):
2628 Classifier of second line of a text block.
2630 Could be a paragraph, a definition list item, or a title.
2633 patterns
= {'underline': Body
.patterns
['line'],
2635 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2637 def blank(self
, match
, context
, next_state
):
2638 """End of paragraph."""
2639 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2640 paragraph
, literalnext
= self
.paragraph(
2641 context
, self
.state_machine
.abs_line_number() - 1)
2642 self
.parent
+= paragraph
2644 self
.parent
+= self
.literal_block()
2645 return [], 'Body', []
2647 def eof(self
, context
):
2649 self
.blank(None, context
, None)
2652 def indent(self
, match
, context
, next_state
):
2653 """Definition list item."""
2654 definitionlist
= nodes
.definition_list()
2655 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2656 definitionlist
+= definitionlistitem
2657 self
.parent
+= definitionlist
2658 offset
= self
.state_machine
.line_offset
+ 1 # next line
2659 newline_offset
, blank_finish
= self
.nested_list_parse(
2660 self
.state_machine
.input_lines
[offset
:],
2661 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2662 node
=definitionlist
, initial_state
='DefinitionList',
2663 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2664 self
.goto_line(newline_offset
)
2665 if not blank_finish
:
2666 self
.parent
+= self
.unindent_warning('Definition list')
2667 return [], 'Body', []
2669 def underline(self
, match
, context
, next_state
):
2670 """Section title."""
2671 lineno
= self
.state_machine
.abs_line_number()
2672 title
= context
[0].rstrip()
2673 underline
= match
.string
.rstrip()
2674 source
= title
+ '\n' + underline
2676 if column_width(title
) > len(underline
):
2677 if len(underline
) < 4:
2678 if self
.state_machine
.match_titles
:
2679 msg
= self
.reporter
.info(
2680 'Possible title underline, too short for the title.\n'
2681 "Treating it as ordinary text because it's so short.",
2684 raise statemachine
.TransitionCorrection('text')
2686 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2687 msg
= self
.reporter
.warning('Title underline too short.',
2688 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2689 messages
.append(msg
)
2690 if not self
.state_machine
.match_titles
:
2691 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2692 # We need get_source_and_line() here to report correctly
2693 src
, srcline
= self
.state_machine
.get_source_and_line()
2694 # TODO: why is abs_line_number() == srcline+1
2695 # if the error is in a table (try with test_tables.py)?
2696 # print "get_source_and_line", srcline
2697 # print "abs_line_number", self.state_machine.abs_line_number()
2698 msg
= self
.reporter
.severe('Unexpected section title.',
2699 nodes
.literal_block(blocktext
, blocktext
),
2700 source
=src
, line
=srcline
)
2701 self
.parent
+= messages
2703 return [], next_state
, []
2704 style
= underline
[0]
2706 self
.section(title
, source
, style
, lineno
- 1, messages
)
2707 return [], next_state
, []
2709 def text(self
, match
, context
, next_state
):
2711 startline
= self
.state_machine
.abs_line_number() - 1
2714 block
= self
.state_machine
.get_text_block(flush_left
=True)
2715 except statemachine
.UnexpectedIndentationError
, err
:
2716 block
, src
, srcline
= err
.args
2717 msg
= self
.reporter
.error('Unexpected indentation.',
2718 source
=src
, line
=srcline
)
2719 lines
= context
+ list(block
)
2720 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2721 self
.parent
+= paragraph
2725 self
.state_machine
.next_line()
2728 self
.parent
+= self
.literal_block()
2729 return [], next_state
, []
2731 def literal_block(self
):
2732 """Return a list of nodes."""
2733 indented
, indent
, offset
, blank_finish
= \
2734 self
.state_machine
.get_indented()
2735 while indented
and not indented
[-1].strip():
2738 return self
.quoted_literal_block()
2739 data
= '\n'.join(indented
)
2740 literal_block
= nodes
.literal_block(data
, data
)
2741 literal_block
.line
= offset
+ 1
2742 nodelist
= [literal_block
]
2743 if not blank_finish
:
2744 nodelist
.append(self
.unindent_warning('Literal block'))
2747 def quoted_literal_block(self
):
2748 abs_line_offset
= self
.state_machine
.abs_line_offset()
2749 offset
= self
.state_machine
.line_offset
2750 parent_node
= nodes
.Element()
2751 new_abs_offset
= self
.nested_parse(
2752 self
.state_machine
.input_lines
[offset
:],
2753 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=False,
2754 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2755 'initial_state': 'QuotedLiteralBlock'})
2756 self
.goto_line(new_abs_offset
)
2757 return parent_node
.children
2759 def definition_list_item(self
, termline
):
2760 indented
, indent
, line_offset
, blank_finish
= \
2761 self
.state_machine
.get_indented()
2762 itemnode
= nodes
.definition_list_item(
2763 '\n'.join(termline
+ list(indented
)))
2764 lineno
= self
.state_machine
.abs_line_number() - 1
2766 itemnode
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2767 termlist
, messages
= self
.term(termline
, lineno
)
2768 itemnode
+= termlist
2769 definition
= nodes
.definition('', *messages
)
2770 itemnode
+= definition
2771 if termline
[0][-2:] == '::':
2772 definition
+= self
.reporter
.info(
2773 'Blank line missing before literal block (after the "::")? '
2774 'Interpreted as a definition list item.',
2776 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2777 return itemnode
, blank_finish
2779 classifier_delimiter
= re
.compile(' +: +')
2781 def term(self
, lines
, lineno
):
2782 """Return a definition_list's term and optional classifiers."""
2783 assert len(lines
) == 1
2784 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2785 term_node
= nodes
.term()
2786 node_list
= [term_node
]
2787 for i
in range(len(text_nodes
)):
2788 node
= text_nodes
[i
]
2789 if isinstance(node
, nodes
.Text
):
2790 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2792 node_list
[-1] += node
2795 node_list
[-1] += nodes
.Text(parts
[0].rstrip())
2796 for part
in parts
[1:]:
2797 classifier_node
= nodes
.classifier('', part
)
2798 node_list
.append(classifier_node
)
2800 node_list
[-1] += node
2801 return node_list
, messages
2804 class SpecializedText(Text
):
2807 Superclass for second and subsequent lines of Text-variants.
2809 All transition methods are disabled. Override individual methods in
2810 subclasses to re-enable.
2813 def eof(self
, context
):
2814 """Incomplete construct."""
2817 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2818 """Not a compound element member. Abort this state machine."""
2821 blank
= invalid_input
2822 indent
= invalid_input
2823 underline
= invalid_input
2824 text
= invalid_input
2827 class Definition(SpecializedText
):
2829 """Second line of potential definition_list_item."""
2831 def eof(self
, context
):
2832 """Not a definition."""
2833 self
.state_machine
.previous_line(2) # so parent SM can reassess
2836 def indent(self
, match
, context
, next_state
):
2837 """Definition list item."""
2838 itemnode
, blank_finish
= self
.definition_list_item(context
)
2839 self
.parent
+= itemnode
2840 self
.blank_finish
= blank_finish
2841 return [], 'DefinitionList', []
2844 class Line(SpecializedText
):
2847 Second line of over- & underlined section title or transition marker.
2850 eofcheck
= 1 # @@@ ???
2851 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2853 def eof(self
, context
):
2854 """Transition marker at end of section or document."""
2855 marker
= context
[0].strip()
2856 if self
.memo
.section_bubble_up_kludge
:
2857 self
.memo
.section_bubble_up_kludge
= False
2858 elif len(marker
) < 4:
2859 self
.state_correction(context
)
2860 if self
.eofcheck
: # ignore EOFError with sections
2861 lineno
= self
.state_machine
.abs_line_number() - 1
2862 transition
= nodes
.transition(rawsource
=context
[0])
2863 transition
.line
= lineno
2864 self
.parent
+= transition
2868 def blank(self
, match
, context
, next_state
):
2869 """Transition marker."""
2870 src
, srcline
= self
.state_machine
.get_source_and_line()
2871 marker
= context
[0].strip()
2873 self
.state_correction(context
)
2874 transition
= nodes
.transition(rawsource
=marker
)
2875 transition
.source
= src
2876 transition
.line
= srcline
- 1
2877 self
.parent
+= transition
2878 return [], 'Body', []
2880 def text(self
, match
, context
, next_state
):
2881 """Potential over- & underlined title."""
2882 lineno
= self
.state_machine
.abs_line_number() - 1
2883 overline
= context
[0]
2884 title
= match
.string
2887 underline
= self
.state_machine
.next_line()
2889 blocktext
= overline
+ '\n' + title
2890 if len(overline
.rstrip()) < 4:
2891 self
.short_overline(context
, blocktext
, lineno
, 2)
2893 msg
= self
.reporter
.severe(
2894 'Incomplete section title.',
2895 nodes
.literal_block(blocktext
, blocktext
),
2898 return [], 'Body', []
2899 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2900 overline
= overline
.rstrip()
2901 underline
= underline
.rstrip()
2902 if not self
.transitions
['underline'][0].match(underline
):
2903 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2904 if len(overline
.rstrip()) < 4:
2905 self
.short_overline(context
, blocktext
, lineno
, 2)
2907 msg
= self
.reporter
.severe(
2908 'Missing matching underline for section title overline.',
2909 nodes
.literal_block(source
, source
),
2912 return [], 'Body', []
2913 elif overline
!= underline
:
2914 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2915 if len(overline
.rstrip()) < 4:
2916 self
.short_overline(context
, blocktext
, lineno
, 2)
2918 msg
= self
.reporter
.severe(
2919 'Title overline & underline mismatch.',
2920 nodes
.literal_block(source
, source
),
2923 return [], 'Body', []
2924 title
= title
.rstrip()
2926 if column_width(title
) > len(overline
):
2927 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2928 if len(overline
.rstrip()) < 4:
2929 self
.short_overline(context
, blocktext
, lineno
, 2)
2931 msg
= self
.reporter
.warning(
2932 'Title overline too short.',
2933 nodes
.literal_block(source
, source
),
2935 messages
.append(msg
)
2936 style
= (overline
[0], underline
[0])
2937 self
.eofcheck
= 0 # @@@ not sure this is correct
2938 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
2940 return [], 'Body', []
2942 indent
= text
# indented title
2944 def underline(self
, match
, context
, next_state
):
2945 overline
= context
[0]
2946 blocktext
= overline
+ '\n' + self
.state_machine
.line
2947 lineno
= self
.state_machine
.abs_line_number() - 1
2948 if len(overline
.rstrip()) < 4:
2949 self
.short_overline(context
, blocktext
, lineno
, 1)
2950 msg
= self
.reporter
.error(
2951 'Invalid section title or transition marker.',
2952 nodes
.literal_block(blocktext
, blocktext
),
2955 return [], 'Body', []
2957 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
2958 msg
= self
.reporter
.info(
2959 'Possible incomplete section title.\nTreating the overline as '
2960 "ordinary text because it's so short.",
2963 self
.state_correction(context
, lines
)
2965 def state_correction(self
, context
, lines
=1):
2966 self
.state_machine
.previous_line(lines
)
2968 raise statemachine
.StateCorrection('Body', 'text')
2971 class QuotedLiteralBlock(RSTState
):
2974 Nested parse handler for quoted (unindented) literal blocks.
2976 Special-purpose. Not for inclusion in `state_classes`.
2979 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
2981 initial_transitions
= ('initial_quoted', 'text')
2983 def __init__(self
, state_machine
, debug
=False):
2984 RSTState
.__init
__(self
, state_machine
, debug
)
2986 self
.initial_lineno
= None
2988 def blank(self
, match
, context
, next_state
):
2992 return context
, next_state
, []
2994 def eof(self
, context
):
2996 src
, srcline
= self
.state_machine
.get_source_and_line(
2997 self
.initial_lineno
)
2998 text
= '\n'.join(context
)
2999 literal_block
= nodes
.literal_block(text
, text
)
3000 literal_block
.source
= src
3001 literal_block
.line
= srcline
3002 self
.parent
+= literal_block
3004 self
.parent
+= self
.reporter
.warning(
3005 'Literal block expected; none found.',
3006 line
=self
.state_machine
.abs_line_number())
3007 # src not available, because statemachine.input_lines is empty
3008 self
.state_machine
.previous_line()
3009 self
.parent
+= self
.messages
3012 def indent(self
, match
, context
, next_state
):
3013 assert context
, ('QuotedLiteralBlock.indent: context should not '
3015 self
.messages
.append(
3016 self
.reporter
.error('Unexpected indentation.',
3017 line
=self
.state_machine
.abs_line_number()))
3018 self
.state_machine
.previous_line()
3021 def initial_quoted(self
, match
, context
, next_state
):
3022 """Match arbitrary quote character on the first line only."""
3023 self
.remove_transition('initial_quoted')
3024 quote
= match
.string
[0]
3025 pattern
= re
.compile(re
.escape(quote
), re
.UNICODE
)
3026 # New transition matches consistent quotes only:
3027 self
.add_transition('quoted',
3028 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3029 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3030 return [match
.string
], next_state
, []
3032 def quoted(self
, match
, context
, next_state
):
3033 """Match consistent quotes on subsequent lines."""
3034 context
.append(match
.string
)
3035 return context
, next_state
, []
3037 def text(self
, match
, context
, next_state
):
3039 self
.messages
.append(
3040 self
.reporter
.error('Inconsistent literal block quoting.',
3041 line
=self
.state_machine
.abs_line_number()))
3042 self
.state_machine
.previous_line()
3046 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3047 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3048 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3049 """Standard set of State classes used to start `RSTStateMachine`."""