2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.restructuredtext.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
109 from types
import FunctionType
, MethodType
110 from docutils
import nodes
, statemachine
, utils
, urischemes
111 from docutils
import ApplicationError
, DataError
112 from docutils
.statemachine
import StateMachineWS
, StateWS
113 from docutils
.nodes
import fully_normalize_name
as normalize_name
114 from docutils
.nodes
import whitespace_normalize_name
115 from docutils
.utils
import escape2null
, unescape
, column_width
116 import docutils
.parsers
.rst
117 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
118 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
121 class MarkupError(DataError
): pass
122 class UnknownInterpretedRoleError(DataError
): pass
123 class InterpretedRoleNotImplementedError(DataError
): pass
124 class ParserError(ApplicationError
): pass
125 class MarkupMismatch(Exception): pass
130 """Stores data attributes for dotted-attribute access."""
132 def __init__(self
, **keywordargs
):
133 self
.__dict
__.update(keywordargs
)
136 class RSTStateMachine(StateMachineWS
):
139 reStructuredText's master StateMachine.
141 The entry point to reStructuredText parsing is the `run()` method.
144 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=1,
147 Parse `input_lines` and modify the `document` node in place.
149 Extend `StateMachineWS.run()`: set up parse-global data and
150 run the StateMachine.
152 self
.language
= languages
.get_language(
153 document
.settings
.language_code
)
154 self
.match_titles
= match_titles
157 inliner
.init_customizations(document
.settings
)
158 self
.memo
= Struct(document
=document
,
159 reporter
=document
.reporter
,
160 language
=self
.language
,
163 section_bubble_up_kludge
=0,
165 self
.document
= document
166 self
.attach_observer(document
.note_source
)
167 self
.reporter
= self
.memo
.reporter
169 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
170 input_source
=document
['source'])
171 assert results
== [], 'RSTStateMachine.run() results should be empty!'
172 self
.node
= self
.memo
= None # remove unneeded references
175 class NestedStateMachine(StateMachineWS
):
178 StateMachine run from within other StateMachine runs, to parse nested
182 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=1):
184 Parse `input_lines` and populate a `docutils.nodes.document` instance.
186 Extend `StateMachineWS.run()`: set up document-wide data.
188 self
.match_titles
= match_titles
190 self
.document
= memo
.document
191 self
.attach_observer(self
.document
.note_source
)
192 self
.reporter
= memo
.reporter
193 self
.language
= memo
.language
195 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
196 assert results
== [], ('NestedStateMachine.run() results should be '
201 class RSTState(StateWS
):
204 reStructuredText State superclass.
206 Contains methods used by all State subclasses.
209 nested_sm
= NestedStateMachine
211 def __init__(self
, state_machine
, debug
=0):
212 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
213 'initial_state': 'Body'}
214 StateWS
.__init
__(self
, state_machine
, debug
)
216 def runtime_init(self
):
217 StateWS
.runtime_init(self
)
218 memo
= self
.state_machine
.memo
220 self
.reporter
= memo
.reporter
221 self
.inliner
= memo
.inliner
222 self
.document
= memo
.document
223 self
.parent
= self
.state_machine
.node
225 def goto_line(self
, abs_line_offset
):
227 Jump to input line `abs_line_offset`, ignoring jumps past the end.
230 self
.state_machine
.goto_line(abs_line_offset
)
234 def no_match(self
, context
, transitions
):
236 Override `StateWS.no_match` to generate a system message.
238 This code should never be run.
240 self
.reporter
.severe(
241 'Internal error: no transition pattern match. State: "%s"; '
242 'transitions: %s; context: %s; current line: %r.'
243 % (self
.__class
__.__name
__, transitions
, context
,
244 self
.state_machine
.line
),
245 line
=self
.state_machine
.abs_line_number())
246 return context
, None, []
248 def bof(self
, context
):
249 """Called at beginning of file."""
252 def nested_parse(self
, block
, input_offset
, node
, match_titles
=0,
253 state_machine_class
=None, state_machine_kwargs
=None):
255 Create a new StateMachine rooted at `node` and run it over the input
258 if state_machine_class
is None:
259 state_machine_class
= self
.nested_sm
260 if state_machine_kwargs
is None:
261 state_machine_kwargs
= self
.nested_sm_kwargs
262 block_length
= len(block
)
263 state_machine
= state_machine_class(debug
=self
.debug
,
264 **state_machine_kwargs
)
265 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
266 node
=node
, match_titles
=match_titles
)
267 state_machine
.unlink()
268 new_offset
= state_machine
.abs_line_offset()
269 # No `block.parent` implies disconnected -- lines aren't in sync:
270 if block
.parent
and (len(block
) - block_length
) != 0:
271 # Adjustment for block if modified in nested parse:
272 self
.state_machine
.next_line(len(block
) - block_length
)
275 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
277 blank_finish_state
=None,
280 state_machine_class
=None,
281 state_machine_kwargs
=None):
283 Create a new StateMachine rooted at `node` and run it over the input
284 `block`. Also keep track of optional intermediate blank lines and the
287 if state_machine_class
is None:
288 state_machine_class
= self
.nested_sm
289 if state_machine_kwargs
is None:
290 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
291 state_machine_kwargs
['initial_state'] = initial_state
292 state_machine
= state_machine_class(debug
=self
.debug
,
293 **state_machine_kwargs
)
294 if blank_finish_state
is None:
295 blank_finish_state
= initial_state
296 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
297 for key
, value
in extra_settings
.items():
298 setattr(state_machine
.states
[initial_state
], key
, value
)
299 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
300 node
=node
, match_titles
=match_titles
)
301 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
302 state_machine
.unlink()
303 return state_machine
.abs_line_offset(), blank_finish
305 def section(self
, title
, source
, style
, lineno
, messages
):
306 """Check for a valid subsection and create one if it checks out."""
307 if self
.check_subsection(source
, style
, lineno
):
308 self
.new_subsection(title
, lineno
, messages
)
310 def check_subsection(self
, source
, style
, lineno
):
312 Check for a valid subsection header. Return 1 (true) or None (false).
314 When a new section is reached that isn't a subsection of the current
315 section, back up the line count (use ``previous_line(-x)``), then
316 ``raise EOFError``. The current StateMachine will finish, then the
317 calling StateMachine can re-examine the title. This will work its way
318 back up the calling chain until the correct section level isreached.
320 @@@ Alternative: Evaluate the title, store the title info & level, and
321 back up the chain until that level is reached. Store in memo? Or
324 :Exception: `EOFError` when a sibling or supersection encountered.
327 title_styles
= memo
.title_styles
328 mylevel
= memo
.section_level
329 try: # check for existing title style
330 level
= title_styles
.index(style
) + 1
331 except ValueError: # new title style
332 if len(title_styles
) == memo
.section_level
: # new subsection
333 title_styles
.append(style
)
335 else: # not at lowest level
336 self
.parent
+= self
.title_inconsistent(source
, lineno
)
338 if level
<= mylevel
: # sibling or supersection
339 memo
.section_level
= level
# bubble up to parent section
341 memo
.section_bubble_up_kludge
= 1
342 # back up 2 lines for underline title, 3 for overline title
343 self
.state_machine
.previous_line(len(style
) + 1)
344 raise EOFError # let parent section re-evaluate
345 if level
== mylevel
+ 1: # immediate subsection
347 else: # invalid subsection
348 self
.parent
+= self
.title_inconsistent(source
, lineno
)
351 def title_inconsistent(self
, sourcetext
, lineno
):
352 error
= self
.reporter
.severe(
353 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
357 def new_subsection(self
, title
, lineno
, messages
):
358 """Append new subsection to document tree. On return, check level."""
360 mylevel
= memo
.section_level
361 memo
.section_level
+= 1
362 section_node
= nodes
.section()
363 self
.parent
+= section_node
364 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
365 titlenode
= nodes
.title(title
, '', *textnodes
)
366 name
= normalize_name(titlenode
.astext())
367 section_node
['names'].append(name
)
368 section_node
+= titlenode
369 section_node
+= messages
370 section_node
+= title_messages
371 self
.document
.note_implicit_target(section_node
, section_node
)
372 offset
= self
.state_machine
.line_offset
+ 1
373 absoffset
= self
.state_machine
.abs_line_offset() + 1
374 newabsoffset
= self
.nested_parse(
375 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
376 node
=section_node
, match_titles
=1)
377 self
.goto_line(newabsoffset
)
378 if memo
.section_level
<= mylevel
: # can't handle next section?
379 raise EOFError # bubble up to supersection
380 # reset section_level; next pass will detect it properly
381 memo
.section_level
= mylevel
383 def paragraph(self
, lines
, lineno
):
385 Return a list (paragraph & messages) & a boolean: literal_block next?
387 data
= '\n'.join(lines
).rstrip()
388 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
391 elif data
[-3] in ' \n':
392 text
= data
[:-3].rstrip()
399 textnodes
, messages
= self
.inline_text(text
, lineno
)
400 p
= nodes
.paragraph(data
, '', *textnodes
)
402 return [p
] + messages
, literalnext
404 def inline_text(self
, text
, lineno
):
406 Return 2 lists: nodes (text and inline elements), and system_messages.
408 return self
.inliner
.parse(text
, lineno
, self
.memo
, self
.parent
)
410 def unindent_warning(self
, node_name
):
411 return self
.reporter
.warning(
412 '%s ends without a blank line; unexpected unindent.' % node_name
,
413 line
=(self
.state_machine
.abs_line_number() + 1))
416 def build_regexp(definition
, compile=1):
418 Build, compile and return a regular expression based on `definition`.
420 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
421 where "parts" is a list of regular expressions and/or regular
422 expression definitions to be joined into an or-group.
424 name
, prefix
, suffix
, parts
= definition
427 if type(part
) is tuple:
428 part_strings
.append(build_regexp(part
, None))
430 part_strings
.append(part
)
431 or_group
= '|'.join(part_strings
)
432 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
434 return re
.compile(regexp
, re
.UNICODE
)
442 Parse inline markup; call the `parse()` method.
446 self
.implicit_dispatch
= [(self
.patterns
.uri
, self
.standalone_uri
),]
447 """List of (pattern, bound method) tuples, used by
448 `self.implicit_inline`."""
450 def init_customizations(self
, settings
):
451 """Setting-based customizations; run when parsing begins."""
452 if settings
.pep_references
:
453 self
.implicit_dispatch
.append((self
.patterns
.pep
,
455 if settings
.rfc_references
:
456 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
459 def parse(self
, text
, lineno
, memo
, parent
):
460 # Needs to be refactored for nested inline markup.
461 # Add nested_parse() method?
463 Return 2 lists: nodes (text and inline elements), and system_messages.
465 Using `self.patterns.initial`, a pattern which matches start-strings
466 (emphasis, strong, interpreted, phrase reference, literal,
467 substitution reference, and inline target) and complete constructs
468 (simple reference, footnote reference), search for a candidate. When
469 one is found, check for validity (e.g., not a quoted '*' character).
470 If valid, search for the corresponding end string if applicable, and
471 check it for validity. If not found or invalid, generate a warning
472 and ignore the start-string. Implicit inline markup (e.g. standalone
475 self
.reporter
= memo
.reporter
476 self
.document
= memo
.document
477 self
.language
= memo
.language
479 pattern_search
= self
.patterns
.initial
.search
480 dispatch
= self
.dispatch
481 remaining
= escape2null(text
)
486 match
= pattern_search(remaining
)
488 groups
= match
.groupdict()
489 method
= dispatch
[groups
['start'] or groups
['backquote']
490 or groups
['refend'] or groups
['fnend']]
491 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
493 unprocessed
.append(before
)
494 messages
+= sysmessages
496 processed
+= self
.implicit_inline(''.join(unprocessed
),
502 remaining
= ''.join(unprocessed
) + remaining
504 processed
+= self
.implicit_inline(remaining
, lineno
)
505 return processed
, messages
507 openers
= u
'\'"([{<\u2018\u201c\xab\u00a1\u00bf' # see quoted_start below
508 closers
= u
'\'")]}>\u2019\u201d\xbb!?'
509 unicode_delimiters
= u
'\u2010\u2011\u2012\u2013\u2014\u00a0'
510 start_string_prefix
= (ur
'((?<=^)|(?<=[-/: \n\u2019%s%s]))'
511 % (re
.escape(unicode_delimiters
),
513 end_string_suffix
= (r
'((?=$)|(?=[-/:.,; \n\x00%s%s]))'
514 % (re
.escape(unicode_delimiters
),
516 non_whitespace_before
= r
'(?<![ \n])'
517 non_whitespace_escape_before
= r
'(?<![ \n\x00])'
518 non_whitespace_after
= r
'(?![ \n])'
519 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
520 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
521 # Valid URI characters (see RFC 2396 & RFC 2732);
522 # final \x00 allows backslash escapes in URIs:
523 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
524 # Delimiter indicating the end of a URI (not part of the URI):
525 uri_end_delim
= r
"""[>]"""
526 # Last URI character; same as uric but no punctuation:
527 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
528 # End of a URI (either 'urilast' or 'uric followed by a
530 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
531 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
533 %(emailc)s+(?:\.%(emailc)s+)* # name
535 %(emailc)s+(?:\.%(emailc)s*)* # host
536 %(uri_end)s # final URI char
538 parts
= ('initial_inline', start_string_prefix
, '',
539 [('start', '', non_whitespace_after
, # simple start-strings
541 r
'\*(?!\*)', # emphasis but not strong
543 r
'_`', # inline internal target
544 r
'\|(?!\|)'] # substitution reference
546 ('whole', '', end_string_suffix
, # whole constructs
547 [# reference name & end-string
548 r
'(?P<refname>%s)(?P<refend>__?)' % simplename
,
549 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
550 [r
'[0-9]+', # manually numbered
551 r
'\#(%s)?' % simplename
, # auto-numbered (w/ label?)
553 r
'(?P<citationlabel>%s)' % simplename
] # citation reference
557 ('backquote', # interpreted text or phrase reference
558 '(?P<role>(:%s:)?)' % simplename
, # optional role
559 non_whitespace_after
,
560 ['`(?!`)'] # but not literal
565 initial
=build_regexp(parts
),
566 emphasis
=re
.compile(non_whitespace_escape_before
567 + r
'(\*)' + end_string_suffix
),
568 strong
=re
.compile(non_whitespace_escape_before
569 + r
'(\*\*)' + end_string_suffix
),
570 interpreted_or_phrase_ref
=re
.compile(
572 %(non_whitespace_escape_before)s
576 (?P<role>:%(simplename)s:)?
580 %(end_string_suffix)s
581 """ % locals(), re
.VERBOSE | re
.UNICODE
),
582 embedded_uri
=re
.compile(
585 (?:[ \n]+|^) # spaces or beginning of line/string
587 %(non_whitespace_after)s
588 ([^<>\x00]+) # anything but angle brackets & nulls
589 %(non_whitespace_before)s
590 > # close bracket w/o whitespace before
593 """ % locals(), re
.VERBOSE
),
594 literal
=re
.compile(non_whitespace_before
+ '(``)'
595 + end_string_suffix
),
596 target
=re
.compile(non_whitespace_escape_before
597 + r
'(`)' + end_string_suffix
),
598 substitution_ref
=re
.compile(non_whitespace_escape_before
600 + end_string_suffix
),
601 email
=re
.compile(email_pattern
% locals() + '$', re
.VERBOSE
),
604 %(start_string_prefix)s
606 (?P<absolute> # absolute URI
607 (?P<scheme> # scheme (http, ftp, mailto)
608 [a-zA-Z][a-zA-Z0-9.+-]*
613 (//?)? # hierarchical URI
614 %(uric)s* # URI characters
615 %(uri_end)s # final URI char
621 ( # optional fragment
628 (?P<email> # email address
629 """ + email_pattern
+ r
"""
632 %(end_string_suffix)s
633 """) % locals(), re
.VERBOSE
),
636 %(start_string_prefix)s
638 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
640 (PEP\s+(?P<pepnum2>\d+)) # reference by name
642 %(end_string_suffix)s""" % locals(), re
.VERBOSE
),
645 %(start_string_prefix)s
646 (RFC(-|\s+)?(?P<rfcnum>\d+))
647 %(end_string_suffix)s""" % locals(), re
.VERBOSE
))
649 def quoted_start(self
, match
):
650 """Return 1 if inline markup start-string is 'quoted', 0 if not."""
651 string
= match
.string
652 start
= match
.start()
654 if start
== 0: # start-string at beginning of text
656 prestart
= string
[start
- 1]
658 poststart
= string
[end
]
659 if self
.openers
.index(prestart
) \
660 == self
.closers
.index(poststart
): # quoted
662 except IndexError: # start-string at end of text
664 except ValueError: # not quoted
668 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
669 restore_backslashes
=0):
670 string
= match
.string
671 matchstart
= match
.start('start')
672 matchend
= match
.end('start')
673 if self
.quoted_start(match
):
674 return (string
[:matchend
], [], string
[matchend
:], [], '')
675 endmatch
= end_pattern
.search(string
[matchend
:])
676 if endmatch
and endmatch
.start(1): # 1 or more chars
677 text
= unescape(endmatch
.string
[:endmatch
.start(1)],
679 textend
= matchend
+ endmatch
.end(1)
680 rawsource
= unescape(string
[matchstart
:textend
], 1)
681 return (string
[:matchstart
], [nodeclass(rawsource
, text
)],
682 string
[textend
:], [], endmatch
.group(1))
683 msg
= self
.reporter
.warning(
684 'Inline %s start-string without end-string.'
685 % nodeclass
.__name
__, line
=lineno
)
686 text
= unescape(string
[matchstart
:matchend
], 1)
687 rawsource
= unescape(string
[matchstart
:matchend
], 1)
688 prb
= self
.problematic(text
, rawsource
, msg
)
689 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
691 def problematic(self
, text
, rawsource
, message
):
692 msgid
= self
.document
.set_id(message
, self
.parent
)
693 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
694 prbid
= self
.document
.set_id(problematic
)
695 message
.add_backref(prbid
)
698 def emphasis(self
, match
, lineno
):
699 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
700 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
701 return before
, inlines
, remaining
, sysmessages
703 def strong(self
, match
, lineno
):
704 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
705 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
706 return before
, inlines
, remaining
, sysmessages
708 def interpreted_or_phrase_ref(self
, match
, lineno
):
709 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
710 string
= match
.string
711 matchstart
= match
.start('backquote')
712 matchend
= match
.end('backquote')
713 rolestart
= match
.start('role')
714 role
= match
.group('role')
719 elif self
.quoted_start(match
):
720 return (string
[:matchend
], [], string
[matchend
:], [])
721 endmatch
= end_pattern
.search(string
[matchend
:])
722 if endmatch
and endmatch
.start(1): # 1 or more chars
723 textend
= matchend
+ endmatch
.end()
724 if endmatch
.group('role'):
726 msg
= self
.reporter
.warning(
727 'Multiple roles in interpreted text (both '
728 'prefix and suffix present; only one allowed).',
730 text
= unescape(string
[rolestart
:textend
], 1)
731 prb
= self
.problematic(text
, text
, msg
)
732 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
733 role
= endmatch
.group('suffix')[1:-1]
735 escaped
= endmatch
.string
[:endmatch
.start(1)]
736 rawsource
= unescape(string
[matchstart
:textend
], 1)
737 if rawsource
[-1:] == '_':
739 msg
= self
.reporter
.warning(
740 'Mismatch: both interpreted text role %s and '
741 'reference suffix.' % position
, line
=lineno
)
742 text
= unescape(string
[rolestart
:textend
], 1)
743 prb
= self
.problematic(text
, text
, msg
)
744 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
745 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
746 rawsource
, escaped
, unescape(escaped
))
748 rawsource
= unescape(string
[rolestart
:textend
], 1)
749 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
751 return (string
[:rolestart
], nodelist
,
752 string
[textend
:], messages
)
753 msg
= self
.reporter
.warning(
754 'Inline interpreted text or phrase reference start-string '
755 'without end-string.', line
=lineno
)
756 text
= unescape(string
[matchstart
:matchend
], 1)
757 prb
= self
.problematic(text
, text
, msg
)
758 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
760 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
761 match
= self
.patterns
.embedded_uri
.search(escaped
)
763 text
= unescape(escaped
[:match
.start(0)])
764 uri_text
= match
.group(2)
765 uri
= ''.join(uri_text
.split())
766 uri
= self
.adjust_uri(uri
)
768 target
= nodes
.target(match
.group(1), refuri
=uri
)
770 raise ApplicationError('problem with URI: %r' % uri_text
)
775 refname
= normalize_name(text
)
776 reference
= nodes
.reference(rawsource
, text
,
777 name
=whitespace_normalize_name(text
))
778 node_list
= [reference
]
779 if rawsource
[-2:] == '__':
781 reference
['refuri'] = uri
783 reference
['anonymous'] = 1
786 reference
['refuri'] = uri
787 target
['names'].append(refname
)
788 self
.document
.note_explicit_target(target
, self
.parent
)
789 node_list
.append(target
)
791 reference
['refname'] = refname
792 self
.document
.note_refname(reference
)
793 return before
, node_list
, after
, []
795 def adjust_uri(self
, uri
):
796 match
= self
.patterns
.email
.match(uri
)
798 return 'mailto:' + uri
802 def interpreted(self
, rawsource
, text
, role
, lineno
):
803 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
806 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
807 return nodes
, messages
+ messages2
809 msg
= self
.reporter
.error(
810 'Unknown interpreted text role "%s".' % role
,
812 return ([self
.problematic(rawsource
, rawsource
, msg
)],
815 def literal(self
, match
, lineno
):
816 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
817 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
818 restore_backslashes
=1)
819 return before
, inlines
, remaining
, sysmessages
821 def inline_internal_target(self
, match
, lineno
):
822 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
823 match
, lineno
, self
.patterns
.target
, nodes
.target
)
824 if inlines
and isinstance(inlines
[0], nodes
.target
):
825 assert len(inlines
) == 1
827 name
= normalize_name(target
.astext())
828 target
['names'].append(name
)
829 self
.document
.note_explicit_target(target
, self
.parent
)
830 return before
, inlines
, remaining
, sysmessages
832 def substitution_reference(self
, match
, lineno
):
833 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
834 match
, lineno
, self
.patterns
.substitution_ref
,
835 nodes
.substitution_reference
)
836 if len(inlines
) == 1:
837 subref_node
= inlines
[0]
838 if isinstance(subref_node
, nodes
.substitution_reference
):
839 subref_text
= subref_node
.astext()
840 self
.document
.note_substitution_ref(subref_node
, subref_text
)
841 if endstring
[-1:] == '_':
842 reference_node
= nodes
.reference(
843 '|%s%s' % (subref_text
, endstring
), '')
844 if endstring
[-2:] == '__':
845 reference_node
['anonymous'] = 1
847 reference_node
['refname'] = normalize_name(subref_text
)
848 self
.document
.note_refname(reference_node
)
849 reference_node
+= subref_node
850 inlines
= [reference_node
]
851 return before
, inlines
, remaining
, sysmessages
853 def footnote_reference(self
, match
, lineno
):
855 Handles `nodes.footnote_reference` and `nodes.citation_reference`
858 label
= match
.group('footnotelabel')
859 refname
= normalize_name(label
)
860 string
= match
.string
861 before
= string
[:match
.start('whole')]
862 remaining
= string
[match
.end('whole'):]
863 if match
.group('citationlabel'):
864 refnode
= nodes
.citation_reference('[%s]_' % label
,
866 refnode
+= nodes
.Text(label
)
867 self
.document
.note_citation_ref(refnode
)
869 refnode
= nodes
.footnote_reference('[%s]_' % label
)
870 if refname
[0] == '#':
871 refname
= refname
[1:]
873 self
.document
.note_autofootnote_ref(refnode
)
876 refnode
['auto'] = '*'
877 self
.document
.note_symbol_footnote_ref(
880 refnode
+= nodes
.Text(label
)
882 refnode
['refname'] = refname
883 self
.document
.note_footnote_ref(refnode
)
884 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
885 before
= before
.rstrip()
886 return (before
, [refnode
], remaining
, [])
888 def reference(self
, match
, lineno
, anonymous
=None):
889 referencename
= match
.group('refname')
890 refname
= normalize_name(referencename
)
891 referencenode
= nodes
.reference(
892 referencename
+ match
.group('refend'), referencename
,
893 name
=whitespace_normalize_name(referencename
))
895 referencenode
['anonymous'] = 1
897 referencenode
['refname'] = refname
898 self
.document
.note_refname(referencenode
)
899 string
= match
.string
900 matchstart
= match
.start('whole')
901 matchend
= match
.end('whole')
902 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
904 def anonymous_reference(self
, match
, lineno
):
905 return self
.reference(match
, lineno
, anonymous
=1)
907 def standalone_uri(self
, match
, lineno
):
908 if (not match
.group('scheme')
909 or match
.group('scheme').lower() in urischemes
.schemes
):
910 if match
.group('email'):
911 addscheme
= 'mailto:'
914 text
= match
.group('whole')
915 unescaped
= unescape(text
, 0)
916 return [nodes
.reference(unescape(text
, 1), unescaped
,
917 refuri
=addscheme
+ unescaped
)]
918 else: # not a valid scheme
921 def pep_reference(self
, match
, lineno
):
922 text
= match
.group(0)
923 if text
.startswith('pep-'):
924 pepnum
= int(match
.group('pepnum1'))
925 elif text
.startswith('PEP'):
926 pepnum
= int(match
.group('pepnum2'))
929 ref
= (self
.document
.settings
.pep_base_url
930 + self
.document
.settings
.pep_file_url_template
% pepnum
)
931 unescaped
= unescape(text
, 0)
932 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
934 rfc_url
= 'rfc%d.html'
936 def rfc_reference(self
, match
, lineno
):
937 text
= match
.group(0)
938 if text
.startswith('RFC'):
939 rfcnum
= int(match
.group('rfcnum'))
940 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
943 unescaped
= unescape(text
, 0)
944 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
946 def implicit_inline(self
, text
, lineno
):
948 Check each of the patterns in `self.implicit_dispatch` for a match,
949 and dispatch to the stored method for the pattern. Recursively check
950 the text before and after the match. Return a list of `nodes.Text`
951 and inline element nodes.
955 for pattern
, method
in self
.implicit_dispatch
:
956 match
= pattern
.search(text
)
959 # Must recurse on strings before *and* after the match;
960 # there may be multiple patterns.
961 return (self
.implicit_inline(text
[:match
.start()], lineno
)
962 + method(match
, lineno
) +
963 self
.implicit_inline(text
[match
.end():], lineno
))
964 except MarkupMismatch
:
966 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, 1))]
968 dispatch
= {'*': emphasis
,
970 '`': interpreted_or_phrase_ref
,
972 '_`': inline_internal_target
,
973 ']_': footnote_reference
,
974 '|': substitution_reference
,
976 '__': anonymous_reference
}
979 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
980 return ord(s
) - _zero
982 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
983 return ord(s
) - _zero
985 def _lowerroman_to_int(s
):
986 return roman
.fromRoman(s
.upper())
989 class Body(RSTState
):
992 Generic classifier of the first line of a block.
995 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
996 """Padding character for East Asian double-width text."""
999 """Enumerated list parsing information."""
1002 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1003 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1004 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1005 enum
.formats
= enum
.formatinfo
.keys()
1006 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1007 'lowerroman', 'upperroman'] # ORDERED!
1008 enum
.sequencepats
= {'arabic': '[0-9]+',
1009 'loweralpha': '[a-z]',
1010 'upperalpha': '[A-Z]',
1011 'lowerroman': '[ivxlcdm]+',
1012 'upperroman': '[IVXLCDM]+',}
1013 enum
.converters
= {'arabic': int,
1014 'loweralpha': _loweralpha_to_int
,
1015 'upperalpha': _upperalpha_to_int
,
1016 'lowerroman': _lowerroman_to_int
,
1017 'upperroman': roman
.fromRoman
}
1019 enum
.sequenceregexps
= {}
1020 for sequence
in enum
.sequences
:
1021 enum
.sequenceregexps
[sequence
] = re
.compile(
1022 enum
.sequencepats
[sequence
] + '$')
1024 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1025 """Matches the top (& bottom) of a full table)."""
1027 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1028 """Matches the top of a simple table."""
1030 simple_table_border_pat
= re
.compile('=+[ =]*$')
1031 """Matches the bottom & header bottom of a simple table."""
1034 """Fragments of patterns used by transitions."""
1036 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1037 pats
['alpha'] = '[a-zA-Z]'
1038 pats
['alphanum'] = '[a-zA-Z0-9]'
1039 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1040 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1041 '|%(upperroman)s|#)' % enum
.sequencepats
)
1042 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1043 # @@@ Loosen up the pattern? Allow Unicode?
1044 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1045 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1046 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1047 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1049 for format
in enum
.formats
:
1050 pats
[format
] = '(?P<%s>%s%s%s)' % (
1051 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1052 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1055 'bullet': ur
'[-+*\u2022\u2023\u2043]( +|$)',
1056 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1057 'field_marker': r
':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1058 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1059 'doctest': r
'>>>( +|$)',
1060 'line_block': r
'\|( +|$)',
1061 'grid_table_top': grid_table_top_pat
,
1062 'simple_table_top': simple_table_top_pat
,
1063 'explicit_markup': r
'\.\.( +|$)',
1064 'anonymous': r
'__( +|$)',
1065 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1067 initial_transitions
= (
1081 def indent(self
, match
, context
, next_state
):
1083 indented
, indent
, line_offset
, blank_finish
= \
1084 self
.state_machine
.get_indented()
1085 elements
= self
.block_quote(indented
, line_offset
)
1086 self
.parent
+= elements
1087 if not blank_finish
:
1088 self
.parent
+= self
.unindent_warning('Block quote')
1089 return context
, next_state
, []
1091 def block_quote(self
, indented
, line_offset
):
1098 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1099 blockquote
= nodes
.block_quote()
1100 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1101 elements
.append(blockquote
)
1102 if attribution_lines
:
1103 attribution
, messages
= self
.parse_attribution(
1104 attribution_lines
, attribution_offset
)
1105 blockquote
+= attribution
1106 elements
+= messages
1107 line_offset
= new_line_offset
1108 while indented
and not indented
[0]:
1109 indented
= indented
[1:]
1113 # U+2014 is an em-dash:
1114 attribution_pattern
= re
.compile(ur
'(---?(?!-)|\u2014) *(?=[^ \n])')
1116 def split_attribution(self
, indented
, line_offset
):
1118 Check for a block quote attribution and split it off:
1120 * First line after a blank line must begin with a dash ("--", "---",
1121 em-dash; matches `self.attribution_pattern`).
1122 * Every line after that must have consistent indentation.
1123 * Attributions must be preceded by block quote content.
1125 Return a tuple of: (block quote content lines, content offset,
1126 attribution lines, attribution offset, remaining indented lines).
1129 nonblank_seen
= False
1130 for i
in range(len(indented
)):
1131 line
= indented
[i
].rstrip()
1133 if nonblank_seen
and blank
== i
- 1: # last line blank
1134 match
= self
.attribution_pattern
.match(line
)
1136 attribution_end
, indent
= self
.check_attribution(
1139 a_lines
= indented
[i
:attribution_end
]
1140 a_lines
.trim_left(match
.end(), end
=1)
1141 a_lines
.trim_left(indent
, start
=1)
1142 return (indented
[:i
], a_lines
,
1143 i
, indented
[attribution_end
:],
1144 line_offset
+ attribution_end
)
1145 nonblank_seen
= True
1149 return (indented
, None, None, None, None)
1151 def check_attribution(self
, indented
, attribution_start
):
1153 Check attribution shape.
1154 Return the index past the end of the attribution, and the indent.
1157 i
= attribution_start
+ 1
1158 for i
in range(attribution_start
+ 1, len(indented
)):
1159 line
= indented
[i
].rstrip()
1163 indent
= len(line
) - len(line
.lstrip())
1164 elif len(line
) - len(line
.lstrip()) != indent
:
1165 return None, None # bad shape; not an attribution
1167 # return index of line after last attribution line:
1169 return i
, (indent
or 0)
1171 def parse_attribution(self
, indented
, line_offset
):
1172 text
= '\n'.join(indented
).rstrip()
1173 lineno
= self
.state_machine
.abs_line_number() + line_offset
1174 textnodes
, messages
= self
.inline_text(text
, lineno
)
1175 node
= nodes
.attribution(text
, '', *textnodes
)
1177 return node
, messages
1179 def bullet(self
, match
, context
, next_state
):
1180 """Bullet list item."""
1181 bulletlist
= nodes
.bullet_list()
1182 self
.parent
+= bulletlist
1183 bulletlist
['bullet'] = match
.string
[0]
1184 i
, blank_finish
= self
.list_item(match
.end())
1186 offset
= self
.state_machine
.line_offset
+ 1 # next line
1187 new_line_offset
, blank_finish
= self
.nested_list_parse(
1188 self
.state_machine
.input_lines
[offset
:],
1189 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1190 node
=bulletlist
, initial_state
='BulletList',
1191 blank_finish
=blank_finish
)
1192 self
.goto_line(new_line_offset
)
1193 if not blank_finish
:
1194 self
.parent
+= self
.unindent_warning('Bullet list')
1195 return [], next_state
, []
1197 def list_item(self
, indent
):
1198 if self
.state_machine
.line
[indent
:]:
1199 indented
, line_offset
, blank_finish
= (
1200 self
.state_machine
.get_known_indented(indent
))
1202 indented
, indent
, line_offset
, blank_finish
= (
1203 self
.state_machine
.get_first_known_indented(indent
))
1204 listitem
= nodes
.list_item('\n'.join(indented
))
1206 self
.nested_parse(indented
, input_offset
=line_offset
,
1208 return listitem
, blank_finish
1210 def enumerator(self
, match
, context
, next_state
):
1211 """Enumerated List Item"""
1212 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1213 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1214 raise statemachine
.TransitionCorrection('text')
1215 enumlist
= nodes
.enumerated_list()
1216 self
.parent
+= enumlist
1218 enumlist
['enumtype'] = 'arabic'
1220 enumlist
['enumtype'] = sequence
1221 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1222 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1224 enumlist
['start'] = ordinal
1225 msg
= self
.reporter
.info(
1226 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1227 % (text
, ordinal
), line
=self
.state_machine
.abs_line_number())
1229 listitem
, blank_finish
= self
.list_item(match
.end())
1230 enumlist
+= listitem
1231 offset
= self
.state_machine
.line_offset
+ 1 # next line
1232 newline_offset
, blank_finish
= self
.nested_list_parse(
1233 self
.state_machine
.input_lines
[offset
:],
1234 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1235 node
=enumlist
, initial_state
='EnumeratedList',
1236 blank_finish
=blank_finish
,
1237 extra_settings
={'lastordinal': ordinal
,
1239 'auto': sequence
== '#'})
1240 self
.goto_line(newline_offset
)
1241 if not blank_finish
:
1242 self
.parent
+= self
.unindent_warning('Enumerated list')
1243 return [], next_state
, []
1245 def parse_enumerator(self
, match
, expected_sequence
=None):
1247 Analyze an enumerator and return the results.
1250 - the enumerator format ('period', 'parens', or 'rparen'),
1251 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1252 - the text of the enumerator, stripped of formatting, and
1253 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1254 ``None`` is returned for invalid enumerator text).
1256 The enumerator format has already been determined by the regular
1257 expression match. If `expected_sequence` is given, that sequence is
1258 tried first. If not, we check for Roman numeral 1. This way,
1259 single-character Roman numerals (which are also alphabetical) can be
1260 matched. If no sequence has been matched, all sequences are checked in
1263 groupdict
= match
.groupdict()
1265 for format
in self
.enum
.formats
:
1266 if groupdict
[format
]: # was this the format matched?
1267 break # yes; keep `format`
1268 else: # shouldn't happen
1269 raise ParserError('enumerator format not matched')
1270 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1271 :self
.enum
.formatinfo
[format
].end
]
1274 elif expected_sequence
:
1276 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1277 sequence
= expected_sequence
1278 except KeyError: # shouldn't happen
1279 raise ParserError('unknown enumerator sequence: %s'
1282 sequence
= 'lowerroman'
1284 sequence
= 'upperroman'
1286 for sequence
in self
.enum
.sequences
:
1287 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1289 else: # shouldn't happen
1290 raise ParserError('enumerator sequence not matched')
1295 ordinal
= self
.enum
.converters
[sequence
](text
)
1296 except roman
.InvalidRomanNumeralError
:
1298 return format
, sequence
, text
, ordinal
1300 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1302 Check validity based on the ordinal value and the second line.
1304 Return true iff the ordinal is valid and the second line is blank,
1305 indented, or starts with the next enumerator or an auto-enumerator.
1310 next_line
= self
.state_machine
.next_line()
1311 except EOFError: # end of input lines
1312 self
.state_machine
.previous_line()
1315 self
.state_machine
.previous_line()
1316 if not next_line
[:1].strip(): # blank or indented
1318 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1320 next_enumerator
, auto_enumerator
= result
1322 if ( next_line
.startswith(next_enumerator
) or
1323 next_line
.startswith(auto_enumerator
) ):
1329 def make_enumerator(self
, ordinal
, sequence
, format
):
1331 Construct and return the next enumerated list item marker, and an
1332 auto-enumerator ("#" instead of the regular enumerator).
1334 Return ``None`` for invalid (out of range) ordinals.
1338 elif sequence
== 'arabic':
1339 enumerator
= str(ordinal
)
1341 if sequence
.endswith('alpha'):
1344 enumerator
= chr(ordinal
+ ord('a') - 1)
1345 elif sequence
.endswith('roman'):
1347 enumerator
= roman
.toRoman(ordinal
)
1348 except roman
.RomanError
:
1350 else: # shouldn't happen
1351 raise ParserError('unknown enumerator sequence: "%s"'
1353 if sequence
.startswith('lower'):
1354 enumerator
= enumerator
.lower()
1355 elif sequence
.startswith('upper'):
1356 enumerator
= enumerator
.upper()
1357 else: # shouldn't happen
1358 raise ParserError('unknown enumerator sequence: "%s"'
1360 formatinfo
= self
.enum
.formatinfo
[format
]
1361 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1363 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1364 return next_enumerator
, auto_enumerator
1366 def field_marker(self
, match
, context
, next_state
):
1367 """Field list item."""
1368 field_list
= nodes
.field_list()
1369 self
.parent
+= field_list
1370 field
, blank_finish
= self
.field(match
)
1372 offset
= self
.state_machine
.line_offset
+ 1 # next line
1373 newline_offset
, blank_finish
= self
.nested_list_parse(
1374 self
.state_machine
.input_lines
[offset
:],
1375 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1376 node
=field_list
, initial_state
='FieldList',
1377 blank_finish
=blank_finish
)
1378 self
.goto_line(newline_offset
)
1379 if not blank_finish
:
1380 self
.parent
+= self
.unindent_warning('Field list')
1381 return [], next_state
, []
1383 def field(self
, match
):
1384 name
= self
.parse_field_marker(match
)
1385 lineno
= self
.state_machine
.abs_line_number()
1386 indented
, indent
, line_offset
, blank_finish
= \
1387 self
.state_machine
.get_first_known_indented(match
.end())
1388 field_node
= nodes
.field()
1389 field_node
.line
= lineno
1390 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1391 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1392 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1393 field_node
+= field_body
1395 self
.parse_field_body(indented
, line_offset
, field_body
)
1396 return field_node
, blank_finish
1398 def parse_field_marker(self
, match
):
1399 """Extract & return field name from a field marker match."""
1400 field
= match
.group()[1:] # strip off leading ':'
1401 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1404 def parse_field_body(self
, indented
, offset
, node
):
1405 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1407 def option_marker(self
, match
, context
, next_state
):
1408 """Option list item."""
1409 optionlist
= nodes
.option_list()
1411 listitem
, blank_finish
= self
.option_list_item(match
)
1412 except MarkupError
, (message
, lineno
):
1413 # This shouldn't happen; pattern won't match.
1414 msg
= self
.reporter
.error(
1415 'Invalid option list marker: %s' % message
, line
=lineno
)
1417 indented
, indent
, line_offset
, blank_finish
= \
1418 self
.state_machine
.get_first_known_indented(match
.end())
1419 elements
= self
.block_quote(indented
, line_offset
)
1420 self
.parent
+= elements
1421 if not blank_finish
:
1422 self
.parent
+= self
.unindent_warning('Option list')
1423 return [], next_state
, []
1424 self
.parent
+= optionlist
1425 optionlist
+= listitem
1426 offset
= self
.state_machine
.line_offset
+ 1 # next line
1427 newline_offset
, blank_finish
= self
.nested_list_parse(
1428 self
.state_machine
.input_lines
[offset
:],
1429 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1430 node
=optionlist
, initial_state
='OptionList',
1431 blank_finish
=blank_finish
)
1432 self
.goto_line(newline_offset
)
1433 if not blank_finish
:
1434 self
.parent
+= self
.unindent_warning('Option list')
1435 return [], next_state
, []
1437 def option_list_item(self
, match
):
1438 offset
= self
.state_machine
.abs_line_offset()
1439 options
= self
.parse_option_marker(match
)
1440 indented
, indent
, line_offset
, blank_finish
= \
1441 self
.state_machine
.get_first_known_indented(match
.end())
1442 if not indented
: # not an option list item
1443 self
.goto_line(offset
)
1444 raise statemachine
.TransitionCorrection('text')
1445 option_group
= nodes
.option_group('', *options
)
1446 description
= nodes
.description('\n'.join(indented
))
1447 option_list_item
= nodes
.option_list_item('', option_group
,
1450 self
.nested_parse(indented
, input_offset
=line_offset
,
1452 return option_list_item
, blank_finish
1454 def parse_option_marker(self
, match
):
1456 Return a list of `node.option` and `node.option_argument` objects,
1457 parsed from an option marker match.
1459 :Exception: `MarkupError` for invalid option markers.
1462 optionstrings
= match
.group().rstrip().split(', ')
1463 for optionstring
in optionstrings
:
1464 tokens
= optionstring
.split()
1466 firstopt
= tokens
[0].split('=')
1467 if len(firstopt
) > 1:
1468 # "--opt=value" form
1469 tokens
[:1] = firstopt
1471 elif (len(tokens
[0]) > 2
1472 and ((tokens
[0].startswith('-')
1473 and not tokens
[0].startswith('--'))
1474 or tokens
[0].startswith('+'))):
1476 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1478 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1479 and tokens
[-1].endswith('>')):
1480 # "-o <value1 value2>" form; join all values into one token
1481 tokens
[1:] = [' '.join(tokens
[1:])]
1482 if 0 < len(tokens
) <= 2:
1483 option
= nodes
.option(optionstring
)
1484 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1486 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1487 delimiter
=delimiter
)
1488 optlist
.append(option
)
1491 'wrong number of option tokens (=%s), should be 1 or 2: '
1492 '"%s"' % (len(tokens
), optionstring
),
1493 self
.state_machine
.abs_line_number() + 1)
1496 def doctest(self
, match
, context
, next_state
):
1497 data
= '\n'.join(self
.state_machine
.get_text_block())
1498 self
.parent
+= nodes
.doctest_block(data
, data
)
1499 return [], next_state
, []
1501 def line_block(self
, match
, context
, next_state
):
1502 """First line of a line block."""
1503 block
= nodes
.line_block()
1504 self
.parent
+= block
1505 lineno
= self
.state_machine
.abs_line_number()
1506 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1508 self
.parent
+= messages
1509 if not blank_finish
:
1510 offset
= self
.state_machine
.line_offset
+ 1 # next line
1511 new_line_offset
, blank_finish
= self
.nested_list_parse(
1512 self
.state_machine
.input_lines
[offset
:],
1513 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1514 node
=block
, initial_state
='LineBlock',
1516 self
.goto_line(new_line_offset
)
1517 if not blank_finish
:
1518 self
.parent
+= self
.reporter
.warning(
1519 'Line block ends without a blank line.',
1520 line
=(self
.state_machine
.abs_line_number() + 1))
1522 if block
[0].indent
is None:
1524 self
.nest_line_block_lines(block
)
1525 return [], next_state
, []
1527 def line_block_line(self
, match
, lineno
):
1528 """Return one line element of a line_block."""
1529 indented
, indent
, line_offset
, blank_finish
= \
1530 self
.state_machine
.get_first_known_indented(match
.end(),
1532 text
= u
'\n'.join(indented
)
1533 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1534 line
= nodes
.line(text
, '', *text_nodes
)
1535 if match
.string
.rstrip() != '|': # not empty
1536 line
.indent
= len(match
.group(1)) - 1
1537 return line
, messages
, blank_finish
1539 def nest_line_block_lines(self
, block
):
1540 for index
in range(1, len(block
)):
1541 if block
[index
].indent
is None:
1542 block
[index
].indent
= block
[index
- 1].indent
1543 self
.nest_line_block_segment(block
)
1545 def nest_line_block_segment(self
, block
):
1546 indents
= [item
.indent
for item
in block
]
1547 least
= min(indents
)
1549 new_block
= nodes
.line_block()
1551 if item
.indent
> least
:
1552 new_block
.append(item
)
1555 self
.nest_line_block_segment(new_block
)
1556 new_items
.append(new_block
)
1557 new_block
= nodes
.line_block()
1558 new_items
.append(item
)
1560 self
.nest_line_block_segment(new_block
)
1561 new_items
.append(new_block
)
1562 block
[:] = new_items
1564 def grid_table_top(self
, match
, context
, next_state
):
1565 """Top border of a full table."""
1566 return self
.table_top(match
, context
, next_state
,
1567 self
.isolate_grid_table
,
1568 tableparser
.GridTableParser
)
1570 def simple_table_top(self
, match
, context
, next_state
):
1571 """Top border of a simple table."""
1572 return self
.table_top(match
, context
, next_state
,
1573 self
.isolate_simple_table
,
1574 tableparser
.SimpleTableParser
)
1576 def table_top(self
, match
, context
, next_state
,
1577 isolate_function
, parser_class
):
1578 """Top border of a generic table."""
1579 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1580 self
.parent
+= nodelist
1581 if not blank_finish
:
1582 msg
= self
.reporter
.warning(
1583 'Blank line required after table.',
1584 line
=self
.state_machine
.abs_line_number() + 1)
1586 return [], next_state
, []
1588 def table(self
, isolate_function
, parser_class
):
1589 """Parse a table."""
1590 block
, messages
, blank_finish
= isolate_function()
1593 parser
= parser_class()
1594 tabledata
= parser
.parse(block
)
1595 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1597 table
= self
.build_table(tabledata
, tableline
)
1598 nodelist
= [table
] + messages
1599 except tableparser
.TableMarkupError
, detail
:
1600 nodelist
= self
.malformed_table(
1601 block
, ' '.join(detail
.args
)) + messages
1604 return nodelist
, blank_finish
1606 def isolate_grid_table(self
):
1610 block
= self
.state_machine
.get_text_block(flush_left
=1)
1611 except statemachine
.UnexpectedIndentationError
, instance
:
1612 block
, source
, lineno
= instance
.args
1613 messages
.append(self
.reporter
.error('Unexpected indentation.',
1614 source
=source
, line
=lineno
))
1617 # for East Asian chars:
1618 block
.pad_double_width(self
.double_width_pad_char
)
1619 width
= len(block
[0].strip())
1620 for i
in range(len(block
)):
1621 block
[i
] = block
[i
].strip()
1622 if block
[i
][0] not in '+|': # check left edge
1624 self
.state_machine
.previous_line(len(block
) - i
)
1627 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1629 # from second-last to third line of table:
1630 for i
in range(len(block
) - 2, 1, -1):
1631 if self
.grid_table_top_pat
.match(block
[i
]):
1632 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1636 messages
.extend(self
.malformed_table(block
))
1637 return [], messages
, blank_finish
1638 for i
in range(len(block
)): # check right edge
1639 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1640 messages
.extend(self
.malformed_table(block
))
1641 return [], messages
, blank_finish
1642 return block
, messages
, blank_finish
1644 def isolate_simple_table(self
):
1645 start
= self
.state_machine
.line_offset
1646 lines
= self
.state_machine
.input_lines
1647 limit
= len(lines
) - 1
1648 toplen
= len(lines
[start
].strip())
1649 pattern_match
= self
.simple_table_border_pat
.match
1655 match
= pattern_match(line
)
1657 if len(line
.strip()) != toplen
:
1658 self
.state_machine
.next_line(i
- start
)
1659 messages
= self
.malformed_table(
1660 lines
[start
:i
+1], 'Bottom/header table border does '
1661 'not match top border.')
1662 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1665 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1669 else: # reached end of input_lines
1671 extra
= ' or no blank line after table bottom'
1672 self
.state_machine
.next_line(found_at
- start
)
1673 block
= lines
[start
:found_at
+1]
1676 self
.state_machine
.next_line(i
- start
- 1)
1677 block
= lines
[start
:]
1678 messages
= self
.malformed_table(
1679 block
, 'No bottom table border found%s.' % extra
)
1680 return [], messages
, not extra
1681 self
.state_machine
.next_line(end
- start
)
1682 block
= lines
[start
:end
+1]
1683 # for East Asian chars:
1684 block
.pad_double_width(self
.double_width_pad_char
)
1685 return block
, [], end
== limit
or not lines
[end
+1].strip()
1687 def malformed_table(self
, block
, detail
=''):
1688 block
.replace(self
.double_width_pad_char
, '')
1689 data
= '\n'.join(block
)
1690 message
= 'Malformed table.'
1691 lineno
= self
.state_machine
.abs_line_number() - len(block
) + 1
1693 message
+= '\n' + detail
1694 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1698 def build_table(self
, tabledata
, tableline
, stub_columns
=0):
1699 colwidths
, headrows
, bodyrows
= tabledata
1700 table
= nodes
.table()
1701 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1703 for colwidth
in colwidths
:
1704 colspec
= nodes
.colspec(colwidth
=colwidth
)
1706 colspec
.attributes
['stub'] = 1
1710 thead
= nodes
.thead()
1712 for row
in headrows
:
1713 thead
+= self
.build_table_row(row
, tableline
)
1714 tbody
= nodes
.tbody()
1716 for row
in bodyrows
:
1717 tbody
+= self
.build_table_row(row
, tableline
)
1720 def build_table_row(self
, rowdata
, tableline
):
1722 for cell
in rowdata
:
1725 morerows
, morecols
, offset
, cellblock
= cell
1728 attributes
['morerows'] = morerows
1730 attributes
['morecols'] = morecols
1731 entry
= nodes
.entry(**attributes
)
1733 if ''.join(cellblock
):
1734 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1740 """Patterns and constants used for explicit markup recognition."""
1742 explicit
.patterns
= Struct(
1743 target
=re
.compile(r
"""
1745 _ # anonymous target
1747 (?!_) # no underscore at the beginning
1748 (?P<quote>`?) # optional open quote
1749 (?![ `]) # first char. not space or
1751 (?P<name> # reference name
1754 %(non_whitespace_escape_before)s
1755 (?P=quote) # close quote if open quote used
1757 (?<!(?<!\x00):) # no unescaped colon at end
1758 %(non_whitespace_escape_before)s
1759 [ ]? # optional space
1760 : # end of reference name
1761 ([ ]+|$) # followed by whitespace
1762 """ % vars(Inliner
), re
.VERBOSE
),
1763 reference
=re
.compile(r
"""
1765 (?P<simple>%(simplename)s)_
1769 (?P<phrase>.+?) # hyperlink phrase
1770 %(non_whitespace_escape_before)s
1771 `_ # close backquote,
1775 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1776 substitution
=re
.compile(r
"""
1778 (?![ ]) # first char. not space
1779 (?P<name>.+?) # substitution text
1780 %(non_whitespace_escape_before)s
1781 \| # close delimiter
1783 ([ ]+|$) # followed by whitespace
1784 """ % vars(Inliner
), re
.VERBOSE
),)
1786 def footnote(self
, match
):
1787 lineno
= self
.state_machine
.abs_line_number()
1788 indented
, indent
, offset
, blank_finish
= \
1789 self
.state_machine
.get_first_known_indented(match
.end())
1790 label
= match
.group(1)
1791 name
= normalize_name(label
)
1792 footnote
= nodes
.footnote('\n'.join(indented
))
1793 footnote
.line
= lineno
1794 if name
[0] == '#': # auto-numbered
1795 name
= name
[1:] # autonumber label
1796 footnote
['auto'] = 1
1798 footnote
['names'].append(name
)
1799 self
.document
.note_autofootnote(footnote
)
1800 elif name
== '*': # auto-symbol
1802 footnote
['auto'] = '*'
1803 self
.document
.note_symbol_footnote(footnote
)
1804 else: # manually numbered
1805 footnote
+= nodes
.label('', label
)
1806 footnote
['names'].append(name
)
1807 self
.document
.note_footnote(footnote
)
1809 self
.document
.note_explicit_target(footnote
, footnote
)
1811 self
.document
.set_id(footnote
, footnote
)
1813 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1814 return [footnote
], blank_finish
1816 def citation(self
, match
):
1817 lineno
= self
.state_machine
.abs_line_number()
1818 indented
, indent
, offset
, blank_finish
= \
1819 self
.state_machine
.get_first_known_indented(match
.end())
1820 label
= match
.group(1)
1821 name
= normalize_name(label
)
1822 citation
= nodes
.citation('\n'.join(indented
))
1823 citation
.line
= lineno
1824 citation
+= nodes
.label('', label
)
1825 citation
['names'].append(name
)
1826 self
.document
.note_citation(citation
)
1827 self
.document
.note_explicit_target(citation
, citation
)
1829 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1830 return [citation
], blank_finish
1832 def hyperlink_target(self
, match
):
1833 pattern
= self
.explicit
.patterns
.target
1834 lineno
= self
.state_machine
.abs_line_number()
1835 block
, indent
, offset
, blank_finish
= \
1836 self
.state_machine
.get_first_known_indented(
1837 match
.end(), until_blank
=1, strip_indent
=0)
1838 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1839 block
= [escape2null(line
) for line
in block
]
1843 targetmatch
= pattern
.match(escaped
)
1848 escaped
+= block
[blockindex
]
1850 raise MarkupError('malformed hyperlink target.', lineno
)
1851 del block
[:blockindex
]
1852 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1853 target
= self
.make_target(block
, blocktext
, lineno
,
1854 targetmatch
.group('name'))
1855 return [target
], blank_finish
1857 def make_target(self
, block
, block_text
, lineno
, target_name
):
1858 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1859 if target_type
== 'refname':
1860 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1861 target
.indirect_reference_name
= data
1862 self
.add_target(target_name
, '', target
, lineno
)
1863 self
.document
.note_indirect_target(target
)
1865 elif target_type
== 'refuri':
1866 target
= nodes
.target(block_text
, '')
1867 self
.add_target(target_name
, data
, target
, lineno
)
1872 def parse_target(self
, block
, block_text
, lineno
):
1874 Determine the type of reference of a target.
1876 :Return: A 2-tuple, one of:
1878 - 'refname' and the indirect reference name
1879 - 'refuri' and the URI
1880 - 'malformed' and a system_message node
1882 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1883 reference
= ' '.join([line
.strip() for line
in block
])
1884 refname
= self
.is_reference(reference
)
1886 return 'refname', refname
1887 reference
= ''.join([''.join(line
.split()) for line
in block
])
1888 return 'refuri', unescape(reference
)
1890 def is_reference(self
, reference
):
1891 match
= self
.explicit
.patterns
.reference
.match(
1892 whitespace_normalize_name(reference
))
1895 return unescape(match
.group('simple') or match
.group('phrase'))
1897 def add_target(self
, targetname
, refuri
, target
, lineno
):
1898 target
.line
= lineno
1900 name
= normalize_name(unescape(targetname
))
1901 target
['names'].append(name
)
1903 uri
= self
.inliner
.adjust_uri(refuri
)
1905 target
['refuri'] = uri
1907 raise ApplicationError('problem with URI: %r' % refuri
)
1908 self
.document
.note_explicit_target(target
, self
.parent
)
1909 else: # anonymous target
1911 target
['refuri'] = refuri
1912 target
['anonymous'] = 1
1913 self
.document
.note_anonymous_target(target
)
1915 def substitution_def(self
, match
):
1916 pattern
= self
.explicit
.patterns
.substitution
1917 lineno
= self
.state_machine
.abs_line_number()
1918 block
, indent
, offset
, blank_finish
= \
1919 self
.state_machine
.get_first_known_indented(match
.end(),
1921 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
1923 escaped
= escape2null(block
[0].rstrip())
1926 subdefmatch
= pattern
.match(escaped
)
1931 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
1933 raise MarkupError('malformed substitution definition.',
1935 del block
[:blockindex
] # strip out the substitution marker
1936 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
1940 while block
and not block
[-1].strip():
1942 subname
= subdefmatch
.group('name')
1943 substitution_node
= nodes
.substitution_definition(blocktext
)
1944 substitution_node
.line
= lineno
1946 msg
= self
.reporter
.warning(
1947 'Substitution definition "%s" missing contents.' % subname
,
1948 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
1949 return [msg
], blank_finish
1950 block
[0] = block
[0].strip()
1951 substitution_node
['names'].append(
1952 nodes
.whitespace_normalize_name(subname
))
1953 new_abs_offset
, blank_finish
= self
.nested_list_parse(
1954 block
, input_offset
=offset
, node
=substitution_node
,
1955 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
1957 for node
in substitution_node
[:]:
1958 if not (isinstance(node
, nodes
.Inline
) or
1959 isinstance(node
, nodes
.Text
)):
1960 self
.parent
+= substitution_node
[i
]
1961 del substitution_node
[i
]
1964 for node
in substitution_node
.traverse(nodes
.Element
):
1965 if self
.disallowed_inside_substitution_definitions(node
):
1966 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
1967 msg
= self
.reporter
.error(
1968 'Substitution definition contains illegal element:',
1969 pformat
, nodes
.literal_block(blocktext
, blocktext
),
1971 return [msg
], blank_finish
1972 if len(substitution_node
) == 0:
1973 msg
= self
.reporter
.warning(
1974 'Substitution definition "%s" empty or invalid.'
1976 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
1977 return [msg
], blank_finish
1978 self
.document
.note_substitution_def(
1979 substitution_node
, subname
, self
.parent
)
1980 return [substitution_node
], blank_finish
1982 def disallowed_inside_substitution_definitions(self
, node
):
1984 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
1985 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
1990 def directive(self
, match
, **option_presets
):
1991 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
1992 type_name
= match
.group(1)
1993 directive_class
, messages
= directives
.directive(
1994 type_name
, self
.memo
.language
, self
.document
)
1995 self
.parent
+= messages
1997 return self
.run_directive(
1998 directive_class
, match
, type_name
, option_presets
)
2000 return self
.unknown_directive(type_name
)
2002 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2004 Parse a directive then run its directive function.
2008 - `directive`: The class implementing the directive. Must be
2009 a subclass of `rst.Directive`.
2011 - `match`: A regular expression match object which matched the first
2012 line of the directive.
2014 - `type_name`: The directive name, as used in the source text.
2016 - `option_presets`: A dictionary of preset options, defaults for the
2017 directive options. Currently, only an "alt" option is passed by
2018 substitution definitions (value: the substitution name), which may
2019 be used by an embedded image directive.
2021 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2023 if isinstance(directive
, (FunctionType
, MethodType
)):
2024 from docutils
.parsers
.rst
import convert_directive_function
2025 directive
= convert_directive_function(directive
)
2026 lineno
= self
.state_machine
.abs_line_number()
2027 initial_line_offset
= self
.state_machine
.line_offset
2028 indented
, indent
, line_offset
, blank_finish \
2029 = self
.state_machine
.get_first_known_indented(match
.end(),
2031 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2032 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2034 arguments
, options
, content
, content_offset
= (
2035 self
.parse_directive_block(indented
, line_offset
,
2036 directive
, option_presets
))
2037 except MarkupError
, detail
:
2038 error
= self
.reporter
.error(
2039 'Error in "%s" directive:\n%s.' % (type_name
,
2040 ' '.join(detail
.args
)),
2041 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2042 return [error
], blank_finish
2043 directive_instance
= directive(
2044 type_name
, arguments
, options
, content
, lineno
,
2045 content_offset
, block_text
, self
, self
.state_machine
)
2047 result
= directive_instance
.run()
2048 except docutils
.parsers
.rst
.DirectiveError
, directive_error
:
2049 msg_node
= self
.reporter
.system_message(directive_error
.level
,
2050 directive_error
.message
)
2051 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2052 msg_node
['line'] = lineno
2054 assert isinstance(result
, list), \
2055 'Directive "%s" must return a list of nodes.' % type_name
2056 for i
in range(len(result
)):
2057 assert isinstance(result
[i
], nodes
.Node
), \
2058 ('Directive "%s" returned non-Node object (index %s): %r'
2059 % (type_name
, i
, result
[i
]))
2061 blank_finish
or self
.state_machine
.is_next_line_blank())
2063 def parse_directive_block(self
, indented
, line_offset
, directive
,
2065 option_spec
= directive
.option_spec
2066 has_content
= directive
.has_content
2067 if indented
and not indented
[0].strip():
2068 indented
.trim_start()
2070 while indented
and not indented
[-1].strip():
2072 if indented
and (directive
.required_arguments
2073 or directive
.optional_arguments
2075 for i
in range(len(indented
)):
2076 if not indented
[i
].strip():
2080 arg_block
= indented
[:i
]
2081 content
= indented
[i
+1:]
2082 content_offset
= line_offset
+ i
+ 1
2085 content_offset
= line_offset
2087 while content
and not content
[0].strip():
2088 content
.trim_start()
2091 options
, arg_block
= self
.parse_directive_options(
2092 option_presets
, option_spec
, arg_block
)
2093 if arg_block
and not (directive
.required_arguments
2094 or directive
.optional_arguments
):
2095 raise MarkupError('no arguments permitted; blank line '
2096 'required before content block')
2099 if directive
.required_arguments
or directive
.optional_arguments
:
2100 arguments
= self
.parse_directive_arguments(
2101 directive
, arg_block
)
2104 if content
and not has_content
:
2105 raise MarkupError('no content permitted')
2106 return (arguments
, options
, content
, content_offset
)
2108 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2109 options
= option_presets
.copy()
2110 for i
in range(len(arg_block
)):
2111 if arg_block
[i
][:1] == ':':
2112 opt_block
= arg_block
[i
:]
2113 arg_block
= arg_block
[:i
]
2118 success
, data
= self
.parse_extension_options(option_spec
,
2120 if success
: # data is a dict of options
2121 options
.update(data
)
2122 else: # data is an error string
2123 raise MarkupError(data
)
2124 return options
, arg_block
2126 def parse_directive_arguments(self
, directive
, arg_block
):
2127 required
= directive
.required_arguments
2128 optional
= directive
.optional_arguments
2129 arg_text
= '\n'.join(arg_block
)
2130 arguments
= arg_text
.split()
2131 if len(arguments
) < required
:
2132 raise MarkupError('%s argument(s) required, %s supplied'
2133 % (required
, len(arguments
)))
2134 elif len(arguments
) > required
+ optional
:
2135 if directive
.final_argument_whitespace
:
2136 arguments
= arg_text
.split(None, required
+ optional
- 1)
2139 'maximum %s argument(s) allowed, %s supplied'
2140 % (required
+ optional
, len(arguments
)))
2143 def parse_extension_options(self
, option_spec
, datalines
):
2145 Parse `datalines` for a field list containing extension options
2146 matching `option_spec`.
2149 - `option_spec`: a mapping of option name to conversion
2150 function, which should raise an exception on bad input.
2151 - `datalines`: a list of input strings.
2154 - Success value, 1 or 0.
2155 - An option dictionary on success, an error string on failure.
2157 node
= nodes
.field_list()
2158 newline_offset
, blank_finish
= self
.nested_list_parse(
2159 datalines
, 0, node
, initial_state
='ExtensionOptions',
2161 if newline_offset
!= len(datalines
): # incomplete parse of block
2162 return 0, 'invalid option block'
2164 options
= utils
.extract_extension_options(node
, option_spec
)
2165 except KeyError, detail
:
2166 return 0, ('unknown option: "%s"' % detail
.args
[0])
2167 except (ValueError, TypeError), detail
:
2168 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2169 except utils
.ExtensionOptionError
, detail
:
2170 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2174 return 0, 'option data incompletely parsed'
2176 def unknown_directive(self
, type_name
):
2177 lineno
= self
.state_machine
.abs_line_number()
2178 indented
, indent
, offset
, blank_finish
= \
2179 self
.state_machine
.get_first_known_indented(0, strip_indent
=0)
2180 text
= '\n'.join(indented
)
2181 error
= self
.reporter
.error(
2182 'Unknown directive type "%s".' % type_name
,
2183 nodes
.literal_block(text
, text
), line
=lineno
)
2184 return [error
], blank_finish
2186 def comment(self
, match
):
2187 if not match
.string
[match
.end():].strip() \
2188 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2189 return [nodes
.comment()], 1 # "A tiny but practical wart."
2190 indented
, indent
, offset
, blank_finish
= \
2191 self
.state_machine
.get_first_known_indented(match
.end())
2192 while indented
and not indented
[-1].strip():
2194 text
= '\n'.join(indented
)
2195 return [nodes
.comment(text
, text
)], blank_finish
2197 explicit
.constructs
= [
2200 \.\.[ ]+ # explicit markup start
2203 [0-9]+ # manually numbered footnote
2205 \# # anonymous auto-numbered footnote
2207 \#%s # auto-number ed?) footnote label
2209 \* # auto-symbol footnote
2212 ([ ]+|$) # whitespace or end of line
2213 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2216 \.\.[ ]+ # explicit markup start
2217 \[(%s)\] # citation label
2218 ([ ]+|$) # whitespace or end of line
2219 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2222 \.\.[ ]+ # explicit markup start
2223 _ # target indicator
2224 (?![ ]|$) # first char. not space or EOL
2228 \.\.[ ]+ # explicit markup start
2229 \| # substitution indicator
2230 (?![ ]|$) # first char. not space or EOL
2234 \.\.[ ]+ # explicit markup start
2235 (%s) # directive name
2236 [ ]? # optional space
2237 :: # directive delimiter
2238 ([ ]+|$) # whitespace or end of line
2239 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2241 def explicit_markup(self
, match
, context
, next_state
):
2242 """Footnotes, hyperlink targets, directives, comments."""
2243 nodelist
, blank_finish
= self
.explicit_construct(match
)
2244 self
.parent
+= nodelist
2245 self
.explicit_list(blank_finish
)
2246 return [], next_state
, []
2248 def explicit_construct(self
, match
):
2249 """Determine which explicit construct this is, parse & return it."""
2251 for method
, pattern
in self
.explicit
.constructs
:
2252 expmatch
= pattern
.match(match
.string
)
2255 return method(self
, expmatch
)
2256 except MarkupError
, error
: # never reached?
2257 message
, lineno
= error
.args
2258 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2260 nodelist
, blank_finish
= self
.comment(match
)
2261 return nodelist
+ errors
, blank_finish
2263 def explicit_list(self
, blank_finish
):
2265 Create a nested state machine for a series of explicit markup
2266 constructs (including anonymous hyperlink targets).
2268 offset
= self
.state_machine
.line_offset
+ 1 # next line
2269 newline_offset
, blank_finish
= self
.nested_list_parse(
2270 self
.state_machine
.input_lines
[offset
:],
2271 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2272 node
=self
.parent
, initial_state
='Explicit',
2273 blank_finish
=blank_finish
,
2274 match_titles
=self
.state_machine
.match_titles
)
2275 self
.goto_line(newline_offset
)
2276 if not blank_finish
:
2277 self
.parent
+= self
.unindent_warning('Explicit markup')
2279 def anonymous(self
, match
, context
, next_state
):
2280 """Anonymous hyperlink targets."""
2281 nodelist
, blank_finish
= self
.anonymous_target(match
)
2282 self
.parent
+= nodelist
2283 self
.explicit_list(blank_finish
)
2284 return [], next_state
, []
2286 def anonymous_target(self
, match
):
2287 lineno
= self
.state_machine
.abs_line_number()
2288 block
, indent
, offset
, blank_finish \
2289 = self
.state_machine
.get_first_known_indented(match
.end(),
2291 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2292 block
= [escape2null(line
) for line
in block
]
2293 target
= self
.make_target(block
, blocktext
, lineno
, '')
2294 return [target
], blank_finish
2296 def line(self
, match
, context
, next_state
):
2297 """Section title overline or transition marker."""
2298 if self
.state_machine
.match_titles
:
2299 return [match
.string
], 'Line', []
2300 elif match
.string
.strip() == '::':
2301 raise statemachine
.TransitionCorrection('text')
2302 elif len(match
.string
.strip()) < 4:
2303 msg
= self
.reporter
.info(
2304 'Unexpected possible title overline or transition.\n'
2305 "Treating it as ordinary text because it's so short.",
2306 line
=self
.state_machine
.abs_line_number())
2308 raise statemachine
.TransitionCorrection('text')
2310 blocktext
= self
.state_machine
.line
2311 msg
= self
.reporter
.severe(
2312 'Unexpected section title or transition.',
2313 nodes
.literal_block(blocktext
, blocktext
),
2314 line
=self
.state_machine
.abs_line_number())
2316 return [], next_state
, []
2318 def text(self
, match
, context
, next_state
):
2319 """Titles, definition lists, paragraphs."""
2320 return [match
.string
], 'Text', []
2323 class RFC2822Body(Body
):
2326 RFC2822 headers are only valid as the first constructs in documents. As
2327 soon as anything else appears, the `Body` state should take over.
2330 patterns
= Body
.patterns
.copy() # can't modify the original
2331 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2332 initial_transitions
= [(name
, 'Body')
2333 for name
in Body
.initial_transitions
]
2334 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2336 def rfc2822(self
, match
, context
, next_state
):
2337 """RFC2822-style field list item."""
2338 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2339 self
.parent
+= fieldlist
2340 field
, blank_finish
= self
.rfc2822_field(match
)
2342 offset
= self
.state_machine
.line_offset
+ 1 # next line
2343 newline_offset
, blank_finish
= self
.nested_list_parse(
2344 self
.state_machine
.input_lines
[offset
:],
2345 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2346 node
=fieldlist
, initial_state
='RFC2822List',
2347 blank_finish
=blank_finish
)
2348 self
.goto_line(newline_offset
)
2349 if not blank_finish
:
2350 self
.parent
+= self
.unindent_warning(
2351 'RFC2822-style field list')
2352 return [], next_state
, []
2354 def rfc2822_field(self
, match
):
2355 name
= match
.string
[:match
.string
.find(':')]
2356 indented
, indent
, line_offset
, blank_finish
= \
2357 self
.state_machine
.get_first_known_indented(match
.end(),
2359 fieldnode
= nodes
.field()
2360 fieldnode
+= nodes
.field_name(name
, name
)
2361 fieldbody
= nodes
.field_body('\n'.join(indented
))
2362 fieldnode
+= fieldbody
2364 self
.nested_parse(indented
, input_offset
=line_offset
,
2366 return fieldnode
, blank_finish
2369 class SpecializedBody(Body
):
2372 Superclass for second and subsequent compound element members. Compound
2373 elements are lists and list-like constructs.
2375 All transition methods are disabled (redefined as `invalid_input`).
2376 Override individual methods in subclasses to re-enable.
2378 For example, once an initial bullet list item, say, is recognized, the
2379 `BulletList` subclass takes over, with a "bullet_list" node as its
2380 container. Upon encountering the initial bullet list item, `Body.bullet`
2381 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2382 starts up a nested parsing session with `BulletList` as the initial state.
2383 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2384 as only bullet list items are encountered, they are parsed and inserted
2385 into the container. The first construct which is *not* a bullet list item
2386 triggers the `invalid_input` method, which ends the nested parse and
2387 closes the container. `BulletList` needs to recognize input that is
2388 invalid in the context of a bullet list, which means everything *other
2389 than* bullet list items, so it inherits the transition list created in
2393 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2394 """Not a compound element member. Abort this state machine."""
2395 self
.state_machine
.previous_line() # back up so parent SM can reassess
2398 indent
= invalid_input
2399 bullet
= invalid_input
2400 enumerator
= invalid_input
2401 field_marker
= invalid_input
2402 option_marker
= invalid_input
2403 doctest
= invalid_input
2404 line_block
= invalid_input
2405 grid_table_top
= invalid_input
2406 simple_table_top
= invalid_input
2407 explicit_markup
= invalid_input
2408 anonymous
= invalid_input
2409 line
= invalid_input
2410 text
= invalid_input
2413 class BulletList(SpecializedBody
):
2415 """Second and subsequent bullet_list list_items."""
2417 def bullet(self
, match
, context
, next_state
):
2418 """Bullet list item."""
2419 if match
.string
[0] != self
.parent
['bullet']:
2420 # different bullet: new list
2421 self
.invalid_input()
2422 listitem
, blank_finish
= self
.list_item(match
.end())
2423 self
.parent
+= listitem
2424 self
.blank_finish
= blank_finish
2425 return [], next_state
, []
2428 class DefinitionList(SpecializedBody
):
2430 """Second and subsequent definition_list_items."""
2432 def text(self
, match
, context
, next_state
):
2433 """Definition lists."""
2434 return [match
.string
], 'Definition', []
2437 class EnumeratedList(SpecializedBody
):
2439 """Second and subsequent enumerated_list list_items."""
2441 def enumerator(self
, match
, context
, next_state
):
2442 """Enumerated list item."""
2443 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2444 match
, self
.parent
['enumtype'])
2445 if ( format
!= self
.format
2446 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2448 or ordinal
!= (self
.lastordinal
+ 1)))
2449 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2450 # different enumeration: new list
2451 self
.invalid_input()
2454 listitem
, blank_finish
= self
.list_item(match
.end())
2455 self
.parent
+= listitem
2456 self
.blank_finish
= blank_finish
2457 self
.lastordinal
= ordinal
2458 return [], next_state
, []
2461 class FieldList(SpecializedBody
):
2463 """Second and subsequent field_list fields."""
2465 def field_marker(self
, match
, context
, next_state
):
2466 """Field list field."""
2467 field
, blank_finish
= self
.field(match
)
2468 self
.parent
+= field
2469 self
.blank_finish
= blank_finish
2470 return [], next_state
, []
2473 class OptionList(SpecializedBody
):
2475 """Second and subsequent option_list option_list_items."""
2477 def option_marker(self
, match
, context
, next_state
):
2478 """Option list item."""
2480 option_list_item
, blank_finish
= self
.option_list_item(match
)
2481 except MarkupError
, (message
, lineno
):
2482 self
.invalid_input()
2483 self
.parent
+= option_list_item
2484 self
.blank_finish
= blank_finish
2485 return [], next_state
, []
2488 class RFC2822List(SpecializedBody
, RFC2822Body
):
2490 """Second and subsequent RFC2822-style field_list fields."""
2492 patterns
= RFC2822Body
.patterns
2493 initial_transitions
= RFC2822Body
.initial_transitions
2495 def rfc2822(self
, match
, context
, next_state
):
2496 """RFC2822-style field list item."""
2497 field
, blank_finish
= self
.rfc2822_field(match
)
2498 self
.parent
+= field
2499 self
.blank_finish
= blank_finish
2500 return [], 'RFC2822List', []
2502 blank
= SpecializedBody
.invalid_input
2505 class ExtensionOptions(FieldList
):
2508 Parse field_list fields for extension options.
2510 No nested parsing is done (including inline markup parsing).
2513 def parse_field_body(self
, indented
, offset
, node
):
2514 """Override `Body.parse_field_body` for simpler parsing."""
2516 for line
in list(indented
) + ['']:
2520 text
= '\n'.join(lines
)
2521 node
+= nodes
.paragraph(text
, text
)
2525 class LineBlock(SpecializedBody
):
2527 """Second and subsequent lines of a line_block."""
2529 blank
= SpecializedBody
.invalid_input
2531 def line_block(self
, match
, context
, next_state
):
2532 """New line of line block."""
2533 lineno
= self
.state_machine
.abs_line_number()
2534 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2536 self
.parent
.parent
+= messages
2537 self
.blank_finish
= blank_finish
2538 return [], next_state
, []
2541 class Explicit(SpecializedBody
):
2543 """Second and subsequent explicit markup construct."""
2545 def explicit_markup(self
, match
, context
, next_state
):
2546 """Footnotes, hyperlink targets, directives, comments."""
2547 nodelist
, blank_finish
= self
.explicit_construct(match
)
2548 self
.parent
+= nodelist
2549 self
.blank_finish
= blank_finish
2550 return [], next_state
, []
2552 def anonymous(self
, match
, context
, next_state
):
2553 """Anonymous hyperlink targets."""
2554 nodelist
, blank_finish
= self
.anonymous_target(match
)
2555 self
.parent
+= nodelist
2556 self
.blank_finish
= blank_finish
2557 return [], next_state
, []
2559 blank
= SpecializedBody
.invalid_input
2562 class SubstitutionDef(Body
):
2565 Parser for the contents of a substitution_definition element.
2569 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2570 % Inliner
.simplename
, re
.UNICODE
),
2572 initial_transitions
= ['embedded_directive', 'text']
2574 def embedded_directive(self
, match
, context
, next_state
):
2575 nodelist
, blank_finish
= self
.directive(match
,
2576 alt
=self
.parent
['names'][0])
2577 self
.parent
+= nodelist
2578 if not self
.state_machine
.at_eof():
2579 self
.blank_finish
= blank_finish
2582 def text(self
, match
, context
, next_state
):
2583 if not self
.state_machine
.at_eof():
2584 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2588 class Text(RSTState
):
2591 Classifier of second line of a text block.
2593 Could be a paragraph, a definition list item, or a title.
2596 patterns
= {'underline': Body
.patterns
['line'],
2598 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2600 def blank(self
, match
, context
, next_state
):
2601 """End of paragraph."""
2602 paragraph
, literalnext
= self
.paragraph(
2603 context
, self
.state_machine
.abs_line_number() - 1)
2604 self
.parent
+= paragraph
2606 self
.parent
+= self
.literal_block()
2607 return [], 'Body', []
2609 def eof(self
, context
):
2611 self
.blank(None, context
, None)
2614 def indent(self
, match
, context
, next_state
):
2615 """Definition list item."""
2616 definitionlist
= nodes
.definition_list()
2617 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2618 definitionlist
+= definitionlistitem
2619 self
.parent
+= definitionlist
2620 offset
= self
.state_machine
.line_offset
+ 1 # next line
2621 newline_offset
, blank_finish
= self
.nested_list_parse(
2622 self
.state_machine
.input_lines
[offset
:],
2623 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2624 node
=definitionlist
, initial_state
='DefinitionList',
2625 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2626 self
.goto_line(newline_offset
)
2627 if not blank_finish
:
2628 self
.parent
+= self
.unindent_warning('Definition list')
2629 return [], 'Body', []
2631 def underline(self
, match
, context
, next_state
):
2632 """Section title."""
2633 lineno
= self
.state_machine
.abs_line_number()
2634 title
= context
[0].rstrip()
2635 underline
= match
.string
.rstrip()
2636 source
= title
+ '\n' + underline
2638 if column_width(title
) > len(underline
):
2639 if len(underline
) < 4:
2640 if self
.state_machine
.match_titles
:
2641 msg
= self
.reporter
.info(
2642 'Possible title underline, too short for the title.\n'
2643 "Treating it as ordinary text because it's so short.",
2646 raise statemachine
.TransitionCorrection('text')
2648 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2649 msg
= self
.reporter
.warning(
2650 'Title underline too short.',
2651 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2652 messages
.append(msg
)
2653 if not self
.state_machine
.match_titles
:
2654 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2655 msg
= self
.reporter
.severe(
2656 'Unexpected section title.',
2657 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2658 self
.parent
+= messages
2660 return [], next_state
, []
2661 style
= underline
[0]
2663 self
.section(title
, source
, style
, lineno
- 1, messages
)
2664 return [], next_state
, []
2666 def text(self
, match
, context
, next_state
):
2668 startline
= self
.state_machine
.abs_line_number() - 1
2671 block
= self
.state_machine
.get_text_block(flush_left
=1)
2672 except statemachine
.UnexpectedIndentationError
, instance
:
2673 block
, source
, lineno
= instance
.args
2674 msg
= self
.reporter
.error('Unexpected indentation.',
2675 source
=source
, line
=lineno
)
2676 lines
= context
+ list(block
)
2677 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2678 self
.parent
+= paragraph
2682 self
.state_machine
.next_line()
2685 self
.parent
+= self
.literal_block()
2686 return [], next_state
, []
2688 def literal_block(self
):
2689 """Return a list of nodes."""
2690 indented
, indent
, offset
, blank_finish
= \
2691 self
.state_machine
.get_indented()
2692 while indented
and not indented
[-1].strip():
2695 return self
.quoted_literal_block()
2696 data
= '\n'.join(indented
)
2697 literal_block
= nodes
.literal_block(data
, data
)
2698 literal_block
.line
= offset
+ 1
2699 nodelist
= [literal_block
]
2700 if not blank_finish
:
2701 nodelist
.append(self
.unindent_warning('Literal block'))
2704 def quoted_literal_block(self
):
2705 abs_line_offset
= self
.state_machine
.abs_line_offset()
2706 offset
= self
.state_machine
.line_offset
2707 parent_node
= nodes
.Element()
2708 new_abs_offset
= self
.nested_parse(
2709 self
.state_machine
.input_lines
[offset
:],
2710 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=0,
2711 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2712 'initial_state': 'QuotedLiteralBlock'})
2713 self
.goto_line(new_abs_offset
)
2714 return parent_node
.children
2716 def definition_list_item(self
, termline
):
2717 indented
, indent
, line_offset
, blank_finish
= \
2718 self
.state_machine
.get_indented()
2719 definitionlistitem
= nodes
.definition_list_item(
2720 '\n'.join(termline
+ list(indented
)))
2721 lineno
= self
.state_machine
.abs_line_number() - 1
2722 definitionlistitem
.line
= lineno
2723 termlist
, messages
= self
.term(termline
, lineno
)
2724 definitionlistitem
+= termlist
2725 definition
= nodes
.definition('', *messages
)
2726 definitionlistitem
+= definition
2727 if termline
[0][-2:] == '::':
2728 definition
+= self
.reporter
.info(
2729 'Blank line missing before literal block (after the "::")? '
2730 'Interpreted as a definition list item.', line
=line_offset
+1)
2731 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2732 return definitionlistitem
, blank_finish
2734 classifier_delimiter
= re
.compile(' +: +')
2736 def term(self
, lines
, lineno
):
2737 """Return a definition_list's term and optional classifiers."""
2738 assert len(lines
) == 1
2739 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2740 term_node
= nodes
.term()
2741 node_list
= [term_node
]
2742 for i
in range(len(text_nodes
)):
2743 node
= text_nodes
[i
]
2744 if isinstance(node
, nodes
.Text
):
2745 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2747 node_list
[-1] += node
2750 node_list
[-1] += nodes
.Text(parts
[0].rstrip())
2751 for part
in parts
[1:]:
2752 classifier_node
= nodes
.classifier('', part
)
2753 node_list
.append(classifier_node
)
2755 node_list
[-1] += node
2756 return node_list
, messages
2759 class SpecializedText(Text
):
2762 Superclass for second and subsequent lines of Text-variants.
2764 All transition methods are disabled. Override individual methods in
2765 subclasses to re-enable.
2768 def eof(self
, context
):
2769 """Incomplete construct."""
2772 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2773 """Not a compound element member. Abort this state machine."""
2776 blank
= invalid_input
2777 indent
= invalid_input
2778 underline
= invalid_input
2779 text
= invalid_input
2782 class Definition(SpecializedText
):
2784 """Second line of potential definition_list_item."""
2786 def eof(self
, context
):
2787 """Not a definition."""
2788 self
.state_machine
.previous_line(2) # so parent SM can reassess
2791 def indent(self
, match
, context
, next_state
):
2792 """Definition list item."""
2793 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2794 self
.parent
+= definitionlistitem
2795 self
.blank_finish
= blank_finish
2796 return [], 'DefinitionList', []
2799 class Line(SpecializedText
):
2802 Second line of over- & underlined section title or transition marker.
2805 eofcheck
= 1 # @@@ ???
2806 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2808 def eof(self
, context
):
2809 """Transition marker at end of section or document."""
2810 marker
= context
[0].strip()
2811 if self
.memo
.section_bubble_up_kludge
:
2812 self
.memo
.section_bubble_up_kludge
= 0
2813 elif len(marker
) < 4:
2814 self
.state_correction(context
)
2815 if self
.eofcheck
: # ignore EOFError with sections
2816 lineno
= self
.state_machine
.abs_line_number() - 1
2817 transition
= nodes
.transition(rawsource
=context
[0])
2818 transition
.line
= lineno
2819 self
.parent
+= transition
2823 def blank(self
, match
, context
, next_state
):
2824 """Transition marker."""
2825 lineno
= self
.state_machine
.abs_line_number() - 1
2826 marker
= context
[0].strip()
2828 self
.state_correction(context
)
2829 transition
= nodes
.transition(rawsource
=marker
)
2830 transition
.line
= lineno
2831 self
.parent
+= transition
2832 return [], 'Body', []
2834 def text(self
, match
, context
, next_state
):
2835 """Potential over- & underlined title."""
2836 lineno
= self
.state_machine
.abs_line_number() - 1
2837 overline
= context
[0]
2838 title
= match
.string
2841 underline
= self
.state_machine
.next_line()
2843 blocktext
= overline
+ '\n' + title
2844 if len(overline
.rstrip()) < 4:
2845 self
.short_overline(context
, blocktext
, lineno
, 2)
2847 msg
= self
.reporter
.severe(
2848 'Incomplete section title.',
2849 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2851 return [], 'Body', []
2852 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2853 overline
= overline
.rstrip()
2854 underline
= underline
.rstrip()
2855 if not self
.transitions
['underline'][0].match(underline
):
2856 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2857 if len(overline
.rstrip()) < 4:
2858 self
.short_overline(context
, blocktext
, lineno
, 2)
2860 msg
= self
.reporter
.severe(
2861 'Missing matching underline for section title overline.',
2862 nodes
.literal_block(source
, source
), line
=lineno
)
2864 return [], 'Body', []
2865 elif overline
!= underline
:
2866 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2867 if len(overline
.rstrip()) < 4:
2868 self
.short_overline(context
, blocktext
, lineno
, 2)
2870 msg
= self
.reporter
.severe(
2871 'Title overline & underline mismatch.',
2872 nodes
.literal_block(source
, source
), line
=lineno
)
2874 return [], 'Body', []
2875 title
= title
.rstrip()
2877 if column_width(title
) > len(overline
):
2878 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2879 if len(overline
.rstrip()) < 4:
2880 self
.short_overline(context
, blocktext
, lineno
, 2)
2882 msg
= self
.reporter
.warning(
2883 'Title overline too short.',
2884 nodes
.literal_block(source
, source
), line
=lineno
)
2885 messages
.append(msg
)
2886 style
= (overline
[0], underline
[0])
2887 self
.eofcheck
= 0 # @@@ not sure this is correct
2888 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
2890 return [], 'Body', []
2892 indent
= text
# indented title
2894 def underline(self
, match
, context
, next_state
):
2895 overline
= context
[0]
2896 blocktext
= overline
+ '\n' + self
.state_machine
.line
2897 lineno
= self
.state_machine
.abs_line_number() - 1
2898 if len(overline
.rstrip()) < 4:
2899 self
.short_overline(context
, blocktext
, lineno
, 1)
2900 msg
= self
.reporter
.error(
2901 'Invalid section title or transition marker.',
2902 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2904 return [], 'Body', []
2906 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
2907 msg
= self
.reporter
.info(
2908 'Possible incomplete section title.\nTreating the overline as '
2909 "ordinary text because it's so short.", line
=lineno
)
2911 self
.state_correction(context
, lines
)
2913 def state_correction(self
, context
, lines
=1):
2914 self
.state_machine
.previous_line(lines
)
2916 raise statemachine
.StateCorrection('Body', 'text')
2919 class QuotedLiteralBlock(RSTState
):
2922 Nested parse handler for quoted (unindented) literal blocks.
2924 Special-purpose. Not for inclusion in `state_classes`.
2927 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
2929 initial_transitions
= ('initial_quoted', 'text')
2931 def __init__(self
, state_machine
, debug
=0):
2932 RSTState
.__init
__(self
, state_machine
, debug
)
2934 self
.initial_lineno
= None
2936 def blank(self
, match
, context
, next_state
):
2940 return context
, next_state
, []
2942 def eof(self
, context
):
2944 text
= '\n'.join(context
)
2945 literal_block
= nodes
.literal_block(text
, text
)
2946 literal_block
.line
= self
.initial_lineno
2947 self
.parent
+= literal_block
2949 self
.parent
+= self
.reporter
.warning(
2950 'Literal block expected; none found.',
2951 line
=self
.state_machine
.abs_line_number())
2952 self
.state_machine
.previous_line()
2953 self
.parent
+= self
.messages
2956 def indent(self
, match
, context
, next_state
):
2957 assert context
, ('QuotedLiteralBlock.indent: context should not '
2959 self
.messages
.append(
2960 self
.reporter
.error('Unexpected indentation.',
2961 line
=self
.state_machine
.abs_line_number()))
2962 self
.state_machine
.previous_line()
2965 def initial_quoted(self
, match
, context
, next_state
):
2966 """Match arbitrary quote character on the first line only."""
2967 self
.remove_transition('initial_quoted')
2968 quote
= match
.string
[0]
2969 pattern
= re
.compile(re
.escape(quote
))
2970 # New transition matches consistent quotes only:
2971 self
.add_transition('quoted',
2972 (pattern
, self
.quoted
, self
.__class
__.__name
__))
2973 self
.initial_lineno
= self
.state_machine
.abs_line_number()
2974 return [match
.string
], next_state
, []
2976 def quoted(self
, match
, context
, next_state
):
2977 """Match consistent quotes on subsequent lines."""
2978 context
.append(match
.string
)
2979 return context
, next_state
, []
2981 def text(self
, match
, context
, next_state
):
2983 self
.messages
.append(
2984 self
.reporter
.error('Inconsistent literal block quoting.',
2985 line
=self
.state_machine
.abs_line_number()))
2986 self
.state_machine
.previous_line()
2990 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
2991 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
2992 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
2993 """Standard set of State classes used to start `RSTStateMachine`."""