2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
109 from types
import FunctionType
, MethodType
111 from docutils
import nodes
, statemachine
, utils
, urischemes
112 from docutils
import ApplicationError
, DataError
113 from docutils
.statemachine
import StateMachineWS
, StateWS
114 from docutils
.nodes
import fully_normalize_name
as normalize_name
115 from docutils
.nodes
import whitespace_normalize_name
116 import docutils
.parsers
.rst
117 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
118 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
119 from docutils
.utils
import escape2null
, unescape
, column_width
120 from docutils
.utils
import punctuation_chars
122 class MarkupError(DataError
): pass
123 class UnknownInterpretedRoleError(DataError
): pass
124 class InterpretedRoleNotImplementedError(DataError
): pass
125 class ParserError(ApplicationError
): pass
126 class MarkupMismatch(Exception): pass
131 """Stores data attributes for dotted-attribute access."""
133 def __init__(self
, **keywordargs
):
134 self
.__dict
__.update(keywordargs
)
137 class RSTStateMachine(StateMachineWS
):
140 reStructuredText's master StateMachine.
142 The entry point to reStructuredText parsing is the `run()` method.
145 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=1,
148 Parse `input_lines` and modify the `document` node in place.
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
153 self
.language
= languages
.get_language(
154 document
.settings
.language_code
)
155 self
.match_titles
= match_titles
158 inliner
.init_customizations(document
.settings
)
159 self
.memo
= Struct(document
=document
,
160 reporter
=document
.reporter
,
161 language
=self
.language
,
164 section_bubble_up_kludge
=0,
166 self
.document
= document
167 self
.attach_observer(document
.note_source
)
168 self
.reporter
= self
.memo
.reporter
170 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
171 input_source
=document
['source'])
172 assert results
== [], 'RSTStateMachine.run() results should be empty!'
173 self
.node
= self
.memo
= None # remove unneeded references
176 class NestedStateMachine(StateMachineWS
):
179 StateMachine run from within other StateMachine runs, to parse nested
183 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=1):
185 Parse `input_lines` and populate a `docutils.nodes.document` instance.
187 Extend `StateMachineWS.run()`: set up document-wide data.
189 self
.match_titles
= match_titles
191 self
.document
= memo
.document
192 self
.attach_observer(self
.document
.note_source
)
193 self
.reporter
= memo
.reporter
194 self
.language
= memo
.language
196 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
197 assert results
== [], ('NestedStateMachine.run() results should be '
202 class RSTState(StateWS
):
205 reStructuredText State superclass.
207 Contains methods used by all State subclasses.
210 nested_sm
= NestedStateMachine
213 def __init__(self
, state_machine
, debug
=0):
214 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
215 'initial_state': 'Body'}
216 StateWS
.__init
__(self
, state_machine
, debug
)
218 def runtime_init(self
):
219 StateWS
.runtime_init(self
)
220 memo
= self
.state_machine
.memo
222 self
.reporter
= memo
.reporter
223 self
.inliner
= memo
.inliner
224 self
.document
= memo
.document
225 self
.parent
= self
.state_machine
.node
226 # enable the reporter to determine source and source-line
227 if not hasattr(self
.reporter
, 'locator'):
228 self
.reporter
.locator
= self
.state_machine
.get_source_and_line
229 # print "adding locator to reporter", self.state_machine.input_offset
232 def goto_line(self
, abs_line_offset
):
234 Jump to input line `abs_line_offset`, ignoring jumps past the end.
237 self
.state_machine
.goto_line(abs_line_offset
)
241 def no_match(self
, context
, transitions
):
243 Override `StateWS.no_match` to generate a system message.
245 This code should never be run.
247 src
, srcline
= self
.state_machine
.get_source_and_line()
248 self
.reporter
.severe(
249 'Internal error: no transition pattern match. State: "%s"; '
250 'transitions: %s; context: %s; current line: %r.'
251 % (self
.__class
__.__name
__, transitions
, context
,
252 self
.state_machine
.line
),
253 source
=src
, line
=srcline
)
254 return context
, None, []
256 def bof(self
, context
):
257 """Called at beginning of file."""
260 def nested_parse(self
, block
, input_offset
, node
, match_titles
=0,
261 state_machine_class
=None, state_machine_kwargs
=None):
263 Create a new StateMachine rooted at `node` and run it over the input
267 if state_machine_class
is None:
268 state_machine_class
= self
.nested_sm
270 if state_machine_kwargs
is None:
271 state_machine_kwargs
= self
.nested_sm_kwargs
273 block_length
= len(block
)
278 state_machine
= self
.nested_sm_cache
.pop()
281 if not state_machine
:
282 state_machine
= state_machine_class(debug
=self
.debug
,
283 **state_machine_kwargs
)
284 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
285 node
=node
, match_titles
=match_titles
)
287 self
.nested_sm_cache
.append(state_machine
)
289 state_machine
.unlink()
290 new_offset
= state_machine
.abs_line_offset()
291 # No `block.parent` implies disconnected -- lines aren't in sync:
292 if block
.parent
and (len(block
) - block_length
) != 0:
293 # Adjustment for block if modified in nested parse:
294 self
.state_machine
.next_line(len(block
) - block_length
)
297 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
299 blank_finish_state
=None,
302 state_machine_class
=None,
303 state_machine_kwargs
=None):
305 Create a new StateMachine rooted at `node` and run it over the input
306 `block`. Also keep track of optional intermediate blank lines and the
309 if state_machine_class
is None:
310 state_machine_class
= self
.nested_sm
311 if state_machine_kwargs
is None:
312 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
313 state_machine_kwargs
['initial_state'] = initial_state
314 state_machine
= state_machine_class(debug
=self
.debug
,
315 **state_machine_kwargs
)
316 if blank_finish_state
is None:
317 blank_finish_state
= initial_state
318 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
319 for key
, value
in extra_settings
.items():
320 setattr(state_machine
.states
[initial_state
], key
, value
)
321 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
322 node
=node
, match_titles
=match_titles
)
323 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
324 state_machine
.unlink()
325 return state_machine
.abs_line_offset(), blank_finish
327 def section(self
, title
, source
, style
, lineno
, messages
):
328 """Check for a valid subsection and create one if it checks out."""
329 if self
.check_subsection(source
, style
, lineno
):
330 self
.new_subsection(title
, lineno
, messages
)
332 def check_subsection(self
, source
, style
, lineno
):
334 Check for a valid subsection header. Return 1 (true) or None (false).
336 When a new section is reached that isn't a subsection of the current
337 section, back up the line count (use ``previous_line(-x)``), then
338 ``raise EOFError``. The current StateMachine will finish, then the
339 calling StateMachine can re-examine the title. This will work its way
340 back up the calling chain until the correct section level isreached.
342 @@@ Alternative: Evaluate the title, store the title info & level, and
343 back up the chain until that level is reached. Store in memo? Or
346 :Exception: `EOFError` when a sibling or supersection encountered.
349 title_styles
= memo
.title_styles
350 mylevel
= memo
.section_level
351 try: # check for existing title style
352 level
= title_styles
.index(style
) + 1
353 except ValueError: # new title style
354 if len(title_styles
) == memo
.section_level
: # new subsection
355 title_styles
.append(style
)
357 else: # not at lowest level
358 self
.parent
+= self
.title_inconsistent(source
, lineno
)
360 if level
<= mylevel
: # sibling or supersection
361 memo
.section_level
= level
# bubble up to parent section
363 memo
.section_bubble_up_kludge
= 1
364 # back up 2 lines for underline title, 3 for overline title
365 self
.state_machine
.previous_line(len(style
) + 1)
366 raise EOFError # let parent section re-evaluate
367 if level
== mylevel
+ 1: # immediate subsection
369 else: # invalid subsection
370 self
.parent
+= self
.title_inconsistent(source
, lineno
)
373 def title_inconsistent(self
, sourcetext
, lineno
):
374 src
, srcline
= self
.state_machine
.get_source_and_line(lineno
)
375 error
= self
.reporter
.severe(
376 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
377 source
=src
, line
=srcline
)
380 def new_subsection(self
, title
, lineno
, messages
):
381 """Append new subsection to document tree. On return, check level."""
383 mylevel
= memo
.section_level
384 memo
.section_level
+= 1
385 section_node
= nodes
.section()
386 self
.parent
+= section_node
387 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
388 titlenode
= nodes
.title(title
, '', *textnodes
)
389 name
= normalize_name(titlenode
.astext())
390 section_node
['names'].append(name
)
391 section_node
+= titlenode
392 section_node
+= messages
393 section_node
+= title_messages
394 self
.document
.note_implicit_target(section_node
, section_node
)
395 offset
= self
.state_machine
.line_offset
+ 1
396 absoffset
= self
.state_machine
.abs_line_offset() + 1
397 newabsoffset
= self
.nested_parse(
398 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
399 node
=section_node
, match_titles
=1)
400 self
.goto_line(newabsoffset
)
401 if memo
.section_level
<= mylevel
: # can't handle next section?
402 raise EOFError # bubble up to supersection
403 # reset section_level; next pass will detect it properly
404 memo
.section_level
= mylevel
406 def paragraph(self
, lines
, lineno
):
408 Return a list (paragraph & messages) & a boolean: literal_block next?
410 data
= '\n'.join(lines
).rstrip()
411 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
414 elif data
[-3] in ' \n':
415 text
= data
[:-3].rstrip()
422 textnodes
, messages
= self
.inline_text(text
, lineno
)
423 p
= nodes
.paragraph(data
, '', *textnodes
)
424 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
425 return [p
] + messages
, literalnext
427 def inline_text(self
, text
, lineno
):
429 Return 2 lists: nodes (text and inline elements), and system_messages.
431 return self
.inliner
.parse(text
, lineno
, self
.memo
, self
.parent
)
433 def unindent_warning(self
, node_name
):
434 # the actual problem is one line below the current line
435 src
, srcline
= self
.state_machine
.get_source_and_line()
436 return self
.reporter
.warning('%s ends without a blank line; '
437 'unexpected unindent.' % node_name
,
438 source
=src
, line
=srcline
+1)
441 def build_regexp(definition
, compile=1):
443 Build, compile and return a regular expression based on `definition`.
445 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
446 where "parts" is a list of regular expressions and/or regular
447 expression definitions to be joined into an or-group.
449 name
, prefix
, suffix
, parts
= definition
452 if type(part
) is tuple:
453 part_strings
.append(build_regexp(part
, None))
455 part_strings
.append(part
)
456 or_group
= '|'.join(part_strings
)
457 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
459 return re
.compile(regexp
, re
.UNICODE
)
467 Parse inline markup; call the `parse()` method.
471 self
.implicit_dispatch
= [(self
.patterns
.uri
, self
.standalone_uri
),]
472 """List of (pattern, bound method) tuples, used by
473 `self.implicit_inline`."""
475 def init_customizations(self
, settings
):
476 """Setting-based customizations; run when parsing begins."""
477 if settings
.pep_references
:
478 self
.implicit_dispatch
.append((self
.patterns
.pep
,
480 if settings
.rfc_references
:
481 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
484 def parse(self
, text
, lineno
, memo
, parent
):
485 # Needs to be refactored for nested inline markup.
486 # Add nested_parse() method?
488 Return 2 lists: nodes (text and inline elements), and system_messages.
490 Using `self.patterns.initial`, a pattern which matches start-strings
491 (emphasis, strong, interpreted, phrase reference, literal,
492 substitution reference, and inline target) and complete constructs
493 (simple reference, footnote reference), search for a candidate. When
494 one is found, check for validity (e.g., not a quoted '*' character).
495 If valid, search for the corresponding end string if applicable, and
496 check it for validity. If not found or invalid, generate a warning
497 and ignore the start-string. Implicit inline markup (e.g. standalone
500 self
.reporter
= memo
.reporter
501 self
.document
= memo
.document
502 self
.language
= memo
.language
504 pattern_search
= self
.patterns
.initial
.search
505 dispatch
= self
.dispatch
506 remaining
= escape2null(text
)
511 match
= pattern_search(remaining
)
513 groups
= match
.groupdict()
514 method
= dispatch
[groups
['start'] or groups
['backquote']
515 or groups
['refend'] or groups
['fnend']]
516 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
518 unprocessed
.append(before
)
519 messages
+= sysmessages
521 processed
+= self
.implicit_inline(''.join(unprocessed
),
527 remaining
= ''.join(unprocessed
) + remaining
529 processed
+= self
.implicit_inline(remaining
, lineno
)
530 return processed
, messages
532 # Inline object recognition
533 # -------------------------
534 # lookahead and look-behind expressions for inline markup rules
535 start_string_prefix
= (u
'(^|(?<=\\s|[%s%s]))' %
536 (punctuation_chars
.openers
,
537 punctuation_chars
.delimiters
))
538 end_string_suffix
= (u
'($|(?=\\s|[\x00%s%s%s]))' %
539 (punctuation_chars
.closing_delimiters
,
540 punctuation_chars
.delimiters
,
541 punctuation_chars
.closers
))
542 # print start_string_prefix.encode('utf8')
543 # TODO: support non-ASCII whitespace in the following 4 patterns?
544 non_whitespace_before
= r
'(?<![ \n])'
545 non_whitespace_escape_before
= r
'(?<![ \n\x00])'
546 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[ \n\x00])'
547 non_whitespace_after
= r
'(?![ \n])'
548 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
549 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
550 # Valid URI characters (see RFC 2396 & RFC 2732);
551 # final \x00 allows backslash escapes in URIs:
552 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
553 # Delimiter indicating the end of a URI (not part of the URI):
554 uri_end_delim
= r
"""[>]"""
555 # Last URI character; same as uric but no punctuation:
556 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
557 # End of a URI (either 'urilast' or 'uric followed by a
559 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
560 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
562 %(emailc)s+(?:\.%(emailc)s+)* # name
564 %(emailc)s+(?:\.%(emailc)s*)* # host
565 %(uri_end)s # final URI char
567 parts
= ('initial_inline', start_string_prefix
, '',
568 [('start', '', non_whitespace_after
, # simple start-strings
570 r
'\*(?!\*)', # emphasis but not strong
572 r
'_`', # inline internal target
573 r
'\|(?!\|)'] # substitution reference
575 ('whole', '', end_string_suffix
, # whole constructs
576 [# reference name & end-string
577 r
'(?P<refname>%s)(?P<refend>__?)' % simplename
,
578 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
579 [r
'[0-9]+', # manually numbered
580 r
'\#(%s)?' % simplename
, # auto-numbered (w/ label?)
582 r
'(?P<citationlabel>%s)' % simplename
] # citation reference
586 ('backquote', # interpreted text or phrase reference
587 '(?P<role>(:%s:)?)' % simplename
, # optional role
588 non_whitespace_after
,
589 ['`(?!`)'] # but not literal
594 initial
=build_regexp(parts
),
595 emphasis
=re
.compile(non_whitespace_escape_before
596 + r
'(\*)' + end_string_suffix
, re
.UNICODE
),
597 strong
=re
.compile(non_whitespace_escape_before
598 + r
'(\*\*)' + end_string_suffix
, re
.UNICODE
),
599 interpreted_or_phrase_ref
=re
.compile(
601 %(non_unescaped_whitespace_escape_before)s
605 (?P<role>:%(simplename)s:)?
609 %(end_string_suffix)s
610 """ % locals(), re
.VERBOSE | re
.UNICODE
),
611 embedded_uri
=re
.compile(
614 (?:[ \n]+|^) # spaces or beginning of line/string
616 %(non_whitespace_after)s
617 ([^<>\x00]+) # anything but angle brackets & nulls
618 %(non_whitespace_before)s
619 > # close bracket w/o whitespace before
622 """ % locals(), re
.VERBOSE | re
.UNICODE
),
623 literal
=re
.compile(non_whitespace_before
+ '(``)'
624 + end_string_suffix
),
625 target
=re
.compile(non_whitespace_escape_before
626 + r
'(`)' + end_string_suffix
),
627 substitution_ref
=re
.compile(non_whitespace_escape_before
629 + end_string_suffix
),
630 email
=re
.compile(email_pattern
% locals() + '$',
631 re
.VERBOSE | re
.UNICODE
),
634 %(start_string_prefix)s
636 (?P<absolute> # absolute URI
637 (?P<scheme> # scheme (http, ftp, mailto)
638 [a-zA-Z][a-zA-Z0-9.+-]*
643 (//?)? # hierarchical URI
644 %(uric)s* # URI characters
645 %(uri_end)s # final URI char
651 ( # optional fragment
658 (?P<email> # email address
659 """ + email_pattern
+ r
"""
662 %(end_string_suffix)s
663 """) % locals(), re
.VERBOSE | re
.UNICODE
),
666 %(start_string_prefix)s
668 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
670 (PEP\s+(?P<pepnum2>\d+)) # reference by name
672 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
),
675 %(start_string_prefix)s
676 (RFC(-|\s+)?(?P<rfcnum>\d+))
677 %(end_string_suffix)s""" % locals(), re
.VERBOSE | re
.UNICODE
))
679 def quoted_start(self
, match
):
680 """Test if inline markup start-string is 'quoted'.
682 'Quoted' in this context means the start-string is enclosed in a pair
683 of matching opening/closing delimiters (not necessarily quotes)
684 or at the end of the match.
686 string
= match
.string
687 start
= match
.start()
688 if start
== 0: # start-string at beginning of text
690 prestart
= string
[start
- 1]
692 poststart
= string
[match
.end()]
693 except IndexError: # start-string at end of text
694 return True # not "quoted" but no markup start-string either
695 return punctuation_chars
.match_chars(prestart
, poststart
)
697 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
698 restore_backslashes
=0):
699 string
= match
.string
700 matchstart
= match
.start('start')
701 matchend
= match
.end('start')
702 if self
.quoted_start(match
):
703 return (string
[:matchend
], [], string
[matchend
:], [], '')
704 endmatch
= end_pattern
.search(string
[matchend
:])
705 if endmatch
and endmatch
.start(1): # 1 or more chars
706 text
= unescape(endmatch
.string
[:endmatch
.start(1)],
708 textend
= matchend
+ endmatch
.end(1)
709 rawsource
= unescape(string
[matchstart
:textend
], 1)
710 return (string
[:matchstart
], [nodeclass(rawsource
, text
)],
711 string
[textend
:], [], endmatch
.group(1))
712 msg
= self
.reporter
.warning(
713 'Inline %s start-string without end-string.'
714 % nodeclass
.__name
__, line
=lineno
)
715 text
= unescape(string
[matchstart
:matchend
], 1)
716 rawsource
= unescape(string
[matchstart
:matchend
], 1)
717 prb
= self
.problematic(text
, rawsource
, msg
)
718 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
720 def problematic(self
, text
, rawsource
, message
):
721 msgid
= self
.document
.set_id(message
, self
.parent
)
722 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
723 prbid
= self
.document
.set_id(problematic
)
724 message
.add_backref(prbid
)
727 def emphasis(self
, match
, lineno
):
728 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
729 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
730 return before
, inlines
, remaining
, sysmessages
732 def strong(self
, match
, lineno
):
733 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
734 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
735 return before
, inlines
, remaining
, sysmessages
737 def interpreted_or_phrase_ref(self
, match
, lineno
):
738 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
739 string
= match
.string
740 matchstart
= match
.start('backquote')
741 matchend
= match
.end('backquote')
742 rolestart
= match
.start('role')
743 role
= match
.group('role')
748 elif self
.quoted_start(match
):
749 return (string
[:matchend
], [], string
[matchend
:], [])
750 endmatch
= end_pattern
.search(string
[matchend
:])
751 if endmatch
and endmatch
.start(1): # 1 or more chars
752 textend
= matchend
+ endmatch
.end()
753 if endmatch
.group('role'):
755 msg
= self
.reporter
.warning(
756 'Multiple roles in interpreted text (both '
757 'prefix and suffix present; only one allowed).',
759 text
= unescape(string
[rolestart
:textend
], 1)
760 prb
= self
.problematic(text
, text
, msg
)
761 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
762 role
= endmatch
.group('suffix')[1:-1]
764 escaped
= endmatch
.string
[:endmatch
.start(1)]
765 rawsource
= unescape(string
[matchstart
:textend
], 1)
766 if rawsource
[-1:] == '_':
768 msg
= self
.reporter
.warning(
769 'Mismatch: both interpreted text role %s and '
770 'reference suffix.' % position
, line
=lineno
)
771 text
= unescape(string
[rolestart
:textend
], 1)
772 prb
= self
.problematic(text
, text
, msg
)
773 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
774 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
775 rawsource
, escaped
, unescape(escaped
))
777 rawsource
= unescape(string
[rolestart
:textend
], 1)
778 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
780 return (string
[:rolestart
], nodelist
,
781 string
[textend
:], messages
)
782 msg
= self
.reporter
.warning(
783 'Inline interpreted text or phrase reference start-string '
784 'without end-string.', line
=lineno
)
785 text
= unescape(string
[matchstart
:matchend
], 1)
786 prb
= self
.problematic(text
, text
, msg
)
787 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
789 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
790 match
= self
.patterns
.embedded_uri
.search(escaped
)
792 text
= unescape(escaped
[:match
.start(0)])
793 uri_text
= match
.group(2)
794 uri
= ''.join(uri_text
.split())
795 uri
= self
.adjust_uri(uri
)
797 target
= nodes
.target(match
.group(1), refuri
=uri
)
799 raise ApplicationError('problem with URI: %r' % uri_text
)
804 refname
= normalize_name(text
)
805 reference
= nodes
.reference(rawsource
, text
,
806 name
=whitespace_normalize_name(text
))
807 node_list
= [reference
]
808 if rawsource
[-2:] == '__':
810 reference
['refuri'] = uri
812 reference
['anonymous'] = 1
815 reference
['refuri'] = uri
816 target
['names'].append(refname
)
817 self
.document
.note_explicit_target(target
, self
.parent
)
818 node_list
.append(target
)
820 reference
['refname'] = refname
821 self
.document
.note_refname(reference
)
822 return before
, node_list
, after
, []
824 def adjust_uri(self
, uri
):
825 match
= self
.patterns
.email
.match(uri
)
827 return 'mailto:' + uri
831 def interpreted(self
, rawsource
, text
, role
, lineno
):
832 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
835 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
836 return nodes
, messages
+ messages2
838 msg
= self
.reporter
.error(
839 'Unknown interpreted text role "%s".' % role
,
841 return ([self
.problematic(rawsource
, rawsource
, msg
)],
844 def literal(self
, match
, lineno
):
845 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
846 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
847 restore_backslashes
=1)
848 return before
, inlines
, remaining
, sysmessages
850 def inline_internal_target(self
, match
, lineno
):
851 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
852 match
, lineno
, self
.patterns
.target
, nodes
.target
)
853 if inlines
and isinstance(inlines
[0], nodes
.target
):
854 assert len(inlines
) == 1
856 name
= normalize_name(target
.astext())
857 target
['names'].append(name
)
858 self
.document
.note_explicit_target(target
, self
.parent
)
859 return before
, inlines
, remaining
, sysmessages
861 def substitution_reference(self
, match
, lineno
):
862 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
863 match
, lineno
, self
.patterns
.substitution_ref
,
864 nodes
.substitution_reference
)
865 if len(inlines
) == 1:
866 subref_node
= inlines
[0]
867 if isinstance(subref_node
, nodes
.substitution_reference
):
868 subref_text
= subref_node
.astext()
869 self
.document
.note_substitution_ref(subref_node
, subref_text
)
870 if endstring
[-1:] == '_':
871 reference_node
= nodes
.reference(
872 '|%s%s' % (subref_text
, endstring
), '')
873 if endstring
[-2:] == '__':
874 reference_node
['anonymous'] = 1
876 reference_node
['refname'] = normalize_name(subref_text
)
877 self
.document
.note_refname(reference_node
)
878 reference_node
+= subref_node
879 inlines
= [reference_node
]
880 return before
, inlines
, remaining
, sysmessages
882 def footnote_reference(self
, match
, lineno
):
884 Handles `nodes.footnote_reference` and `nodes.citation_reference`
887 label
= match
.group('footnotelabel')
888 refname
= normalize_name(label
)
889 string
= match
.string
890 before
= string
[:match
.start('whole')]
891 remaining
= string
[match
.end('whole'):]
892 if match
.group('citationlabel'):
893 refnode
= nodes
.citation_reference('[%s]_' % label
,
895 refnode
+= nodes
.Text(label
)
896 self
.document
.note_citation_ref(refnode
)
898 refnode
= nodes
.footnote_reference('[%s]_' % label
)
899 if refname
[0] == '#':
900 refname
= refname
[1:]
902 self
.document
.note_autofootnote_ref(refnode
)
905 refnode
['auto'] = '*'
906 self
.document
.note_symbol_footnote_ref(
909 refnode
+= nodes
.Text(label
)
911 refnode
['refname'] = refname
912 self
.document
.note_footnote_ref(refnode
)
913 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
914 before
= before
.rstrip()
915 return (before
, [refnode
], remaining
, [])
917 def reference(self
, match
, lineno
, anonymous
=None):
918 referencename
= match
.group('refname')
919 refname
= normalize_name(referencename
)
920 referencenode
= nodes
.reference(
921 referencename
+ match
.group('refend'), referencename
,
922 name
=whitespace_normalize_name(referencename
))
924 referencenode
['anonymous'] = 1
926 referencenode
['refname'] = refname
927 self
.document
.note_refname(referencenode
)
928 string
= match
.string
929 matchstart
= match
.start('whole')
930 matchend
= match
.end('whole')
931 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
933 def anonymous_reference(self
, match
, lineno
):
934 return self
.reference(match
, lineno
, anonymous
=1)
936 def standalone_uri(self
, match
, lineno
):
937 if (not match
.group('scheme')
938 or match
.group('scheme').lower() in urischemes
.schemes
):
939 if match
.group('email'):
940 addscheme
= 'mailto:'
943 text
= match
.group('whole')
944 unescaped
= unescape(text
, 0)
945 return [nodes
.reference(unescape(text
, 1), unescaped
,
946 refuri
=addscheme
+ unescaped
)]
947 else: # not a valid scheme
950 def pep_reference(self
, match
, lineno
):
951 text
= match
.group(0)
952 if text
.startswith('pep-'):
953 pepnum
= int(match
.group('pepnum1'))
954 elif text
.startswith('PEP'):
955 pepnum
= int(match
.group('pepnum2'))
958 ref
= (self
.document
.settings
.pep_base_url
959 + self
.document
.settings
.pep_file_url_template
% pepnum
)
960 unescaped
= unescape(text
, 0)
961 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
963 rfc_url
= 'rfc%d.html'
965 def rfc_reference(self
, match
, lineno
):
966 text
= match
.group(0)
967 if text
.startswith('RFC'):
968 rfcnum
= int(match
.group('rfcnum'))
969 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
972 unescaped
= unescape(text
, 0)
973 return [nodes
.reference(unescape(text
, 1), unescaped
, refuri
=ref
)]
975 def implicit_inline(self
, text
, lineno
):
977 Check each of the patterns in `self.implicit_dispatch` for a match,
978 and dispatch to the stored method for the pattern. Recursively check
979 the text before and after the match. Return a list of `nodes.Text`
980 and inline element nodes.
984 for pattern
, method
in self
.implicit_dispatch
:
985 match
= pattern
.search(text
)
988 # Must recurse on strings before *and* after the match;
989 # there may be multiple patterns.
990 return (self
.implicit_inline(text
[:match
.start()], lineno
)
991 + method(match
, lineno
) +
992 self
.implicit_inline(text
[match
.end():], lineno
))
993 except MarkupMismatch
:
995 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, 1))]
997 dispatch
= {'*': emphasis
,
999 '`': interpreted_or_phrase_ref
,
1001 '_`': inline_internal_target
,
1002 ']_': footnote_reference
,
1003 '|': substitution_reference
,
1005 '__': anonymous_reference
}
1008 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1009 return ord(s
) - _zero
1011 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1012 return ord(s
) - _zero
1014 def _lowerroman_to_int(s
):
1015 return roman
.fromRoman(s
.upper())
1018 class Body(RSTState
):
1021 Generic classifier of the first line of a block.
1024 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1025 """Padding character for East Asian double-width text."""
1028 """Enumerated list parsing information."""
1031 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1032 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1033 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1034 enum
.formats
= enum
.formatinfo
.keys()
1035 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1036 'lowerroman', 'upperroman'] # ORDERED!
1037 enum
.sequencepats
= {'arabic': '[0-9]+',
1038 'loweralpha': '[a-z]',
1039 'upperalpha': '[A-Z]',
1040 'lowerroman': '[ivxlcdm]+',
1041 'upperroman': '[IVXLCDM]+',}
1042 enum
.converters
= {'arabic': int,
1043 'loweralpha': _loweralpha_to_int
,
1044 'upperalpha': _upperalpha_to_int
,
1045 'lowerroman': _lowerroman_to_int
,
1046 'upperroman': roman
.fromRoman
}
1048 enum
.sequenceregexps
= {}
1049 for sequence
in enum
.sequences
:
1050 enum
.sequenceregexps
[sequence
] = re
.compile(
1051 enum
.sequencepats
[sequence
] + '$', re
.UNICODE
)
1053 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1054 """Matches the top (& bottom) of a full table)."""
1056 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1057 """Matches the top of a simple table."""
1059 simple_table_border_pat
= re
.compile('=+[ =]*$')
1060 """Matches the bottom & header bottom of a simple table."""
1063 """Fragments of patterns used by transitions."""
1065 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1066 pats
['alpha'] = '[a-zA-Z]'
1067 pats
['alphanum'] = '[a-zA-Z0-9]'
1068 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1069 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1070 '|%(upperroman)s|#)' % enum
.sequencepats
)
1071 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1072 # @@@ Loosen up the pattern? Allow Unicode?
1073 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1074 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1075 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1076 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1078 for format
in enum
.formats
:
1079 pats
[format
] = '(?P<%s>%s%s%s)' % (
1080 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1081 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1084 'bullet': u
'[-+*\u2022\u2023\u2043]( +|$)',
1085 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1086 'field_marker': r
':(?![: ])([^:\\]|\\.)*(?<! ):( +|$)',
1087 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1088 'doctest': r
'>>>( +|$)',
1089 'line_block': r
'\|( +|$)',
1090 'grid_table_top': grid_table_top_pat
,
1091 'simple_table_top': simple_table_top_pat
,
1092 'explicit_markup': r
'\.\.( +|$)',
1093 'anonymous': r
'__( +|$)',
1094 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1096 initial_transitions
= (
1110 def indent(self
, match
, context
, next_state
):
1112 indented
, indent
, line_offset
, blank_finish
= \
1113 self
.state_machine
.get_indented()
1114 elements
= self
.block_quote(indented
, line_offset
)
1115 self
.parent
+= elements
1116 if not blank_finish
:
1117 self
.parent
+= self
.unindent_warning('Block quote')
1118 return context
, next_state
, []
1120 def block_quote(self
, indented
, line_offset
):
1127 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1128 blockquote
= nodes
.block_quote()
1129 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1130 elements
.append(blockquote
)
1131 if attribution_lines
:
1132 attribution
, messages
= self
.parse_attribution(
1133 attribution_lines
, attribution_offset
)
1134 blockquote
+= attribution
1135 elements
+= messages
1136 line_offset
= new_line_offset
1137 while indented
and not indented
[0]:
1138 indented
= indented
[1:]
1142 # U+2014 is an em-dash:
1143 attribution_pattern
= re
.compile(u
'(---?(?!-)|\u2014) *(?=[^ \\n])',
1146 def split_attribution(self
, indented
, line_offset
):
1148 Check for a block quote attribution and split it off:
1150 * First line after a blank line must begin with a dash ("--", "---",
1151 em-dash; matches `self.attribution_pattern`).
1152 * Every line after that must have consistent indentation.
1153 * Attributions must be preceded by block quote content.
1155 Return a tuple of: (block quote content lines, content offset,
1156 attribution lines, attribution offset, remaining indented lines).
1159 nonblank_seen
= False
1160 for i
in range(len(indented
)):
1161 line
= indented
[i
].rstrip()
1163 if nonblank_seen
and blank
== i
- 1: # last line blank
1164 match
= self
.attribution_pattern
.match(line
)
1166 attribution_end
, indent
= self
.check_attribution(
1169 a_lines
= indented
[i
:attribution_end
]
1170 a_lines
.trim_left(match
.end(), end
=1)
1171 a_lines
.trim_left(indent
, start
=1)
1172 return (indented
[:i
], a_lines
,
1173 i
, indented
[attribution_end
:],
1174 line_offset
+ attribution_end
)
1175 nonblank_seen
= True
1179 return (indented
, None, None, None, None)
1181 def check_attribution(self
, indented
, attribution_start
):
1183 Check attribution shape.
1184 Return the index past the end of the attribution, and the indent.
1187 i
= attribution_start
+ 1
1188 for i
in range(attribution_start
+ 1, len(indented
)):
1189 line
= indented
[i
].rstrip()
1193 indent
= len(line
) - len(line
.lstrip())
1194 elif len(line
) - len(line
.lstrip()) != indent
:
1195 return None, None # bad shape; not an attribution
1197 # return index of line after last attribution line:
1199 return i
, (indent
or 0)
1201 def parse_attribution(self
, indented
, line_offset
):
1202 text
= '\n'.join(indented
).rstrip()
1203 lineno
= self
.state_machine
.abs_line_number() + line_offset
1204 textnodes
, messages
= self
.inline_text(text
, lineno
)
1205 node
= nodes
.attribution(text
, '', *textnodes
)
1207 # report with source and source-line results in
1208 # ``IndexError: list index out of range``
1209 # node.source, node.line = self.state_machine.get_source_and_line(lineno)
1210 return node
, messages
1212 def bullet(self
, match
, context
, next_state
):
1213 """Bullet list item."""
1214 bulletlist
= nodes
.bullet_list()
1215 self
.parent
+= bulletlist
1216 bulletlist
['bullet'] = match
.string
[0]
1217 i
, blank_finish
= self
.list_item(match
.end())
1219 offset
= self
.state_machine
.line_offset
+ 1 # next line
1220 new_line_offset
, blank_finish
= self
.nested_list_parse(
1221 self
.state_machine
.input_lines
[offset
:],
1222 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1223 node
=bulletlist
, initial_state
='BulletList',
1224 blank_finish
=blank_finish
)
1225 self
.goto_line(new_line_offset
)
1226 if not blank_finish
:
1227 self
.parent
+= self
.unindent_warning('Bullet list')
1228 return [], next_state
, []
1230 def list_item(self
, indent
):
1231 if self
.state_machine
.line
[indent
:]:
1232 indented
, line_offset
, blank_finish
= (
1233 self
.state_machine
.get_known_indented(indent
))
1235 indented
, indent
, line_offset
, blank_finish
= (
1236 self
.state_machine
.get_first_known_indented(indent
))
1237 listitem
= nodes
.list_item('\n'.join(indented
))
1239 self
.nested_parse(indented
, input_offset
=line_offset
,
1241 return listitem
, blank_finish
1243 def enumerator(self
, match
, context
, next_state
):
1244 """Enumerated List Item"""
1245 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1246 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1247 raise statemachine
.TransitionCorrection('text')
1248 enumlist
= nodes
.enumerated_list()
1249 self
.parent
+= enumlist
1251 enumlist
['enumtype'] = 'arabic'
1253 enumlist
['enumtype'] = sequence
1254 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1255 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1257 enumlist
['start'] = ordinal
1258 src
, srcline
= self
.state_machine
.get_source_and_line()
1259 msg
= self
.reporter
.info(
1260 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1261 % (text
, ordinal
), source
=src
, line
=srcline
)
1263 listitem
, blank_finish
= self
.list_item(match
.end())
1264 enumlist
+= listitem
1265 offset
= self
.state_machine
.line_offset
+ 1 # next line
1266 newline_offset
, blank_finish
= self
.nested_list_parse(
1267 self
.state_machine
.input_lines
[offset
:],
1268 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1269 node
=enumlist
, initial_state
='EnumeratedList',
1270 blank_finish
=blank_finish
,
1271 extra_settings
={'lastordinal': ordinal
,
1273 'auto': sequence
== '#'})
1274 self
.goto_line(newline_offset
)
1275 if not blank_finish
:
1276 self
.parent
+= self
.unindent_warning('Enumerated list')
1277 return [], next_state
, []
1279 def parse_enumerator(self
, match
, expected_sequence
=None):
1281 Analyze an enumerator and return the results.
1284 - the enumerator format ('period', 'parens', or 'rparen'),
1285 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1286 - the text of the enumerator, stripped of formatting, and
1287 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1288 ``None`` is returned for invalid enumerator text).
1290 The enumerator format has already been determined by the regular
1291 expression match. If `expected_sequence` is given, that sequence is
1292 tried first. If not, we check for Roman numeral 1. This way,
1293 single-character Roman numerals (which are also alphabetical) can be
1294 matched. If no sequence has been matched, all sequences are checked in
1297 groupdict
= match
.groupdict()
1299 for format
in self
.enum
.formats
:
1300 if groupdict
[format
]: # was this the format matched?
1301 break # yes; keep `format`
1302 else: # shouldn't happen
1303 raise ParserError('enumerator format not matched')
1304 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1305 :self
.enum
.formatinfo
[format
].end
]
1308 elif expected_sequence
:
1310 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1311 sequence
= expected_sequence
1312 except KeyError: # shouldn't happen
1313 raise ParserError('unknown enumerator sequence: %s'
1316 sequence
= 'lowerroman'
1318 sequence
= 'upperroman'
1320 for sequence
in self
.enum
.sequences
:
1321 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1323 else: # shouldn't happen
1324 raise ParserError('enumerator sequence not matched')
1329 ordinal
= self
.enum
.converters
[sequence
](text
)
1330 except roman
.InvalidRomanNumeralError
:
1332 return format
, sequence
, text
, ordinal
1334 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1336 Check validity based on the ordinal value and the second line.
1338 Return true if the ordinal is valid and the second line is blank,
1339 indented, or starts with the next enumerator or an auto-enumerator.
1344 next_line
= self
.state_machine
.next_line()
1345 except EOFError: # end of input lines
1346 self
.state_machine
.previous_line()
1349 self
.state_machine
.previous_line()
1350 if not next_line
[:1].strip(): # blank or indented
1352 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1354 next_enumerator
, auto_enumerator
= result
1356 if ( next_line
.startswith(next_enumerator
) or
1357 next_line
.startswith(auto_enumerator
) ):
1363 def make_enumerator(self
, ordinal
, sequence
, format
):
1365 Construct and return the next enumerated list item marker, and an
1366 auto-enumerator ("#" instead of the regular enumerator).
1368 Return ``None`` for invalid (out of range) ordinals.
1372 elif sequence
== 'arabic':
1373 enumerator
= str(ordinal
)
1375 if sequence
.endswith('alpha'):
1378 enumerator
= chr(ordinal
+ ord('a') - 1)
1379 elif sequence
.endswith('roman'):
1381 enumerator
= roman
.toRoman(ordinal
)
1382 except roman
.RomanError
:
1384 else: # shouldn't happen
1385 raise ParserError('unknown enumerator sequence: "%s"'
1387 if sequence
.startswith('lower'):
1388 enumerator
= enumerator
.lower()
1389 elif sequence
.startswith('upper'):
1390 enumerator
= enumerator
.upper()
1391 else: # shouldn't happen
1392 raise ParserError('unknown enumerator sequence: "%s"'
1394 formatinfo
= self
.enum
.formatinfo
[format
]
1395 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1397 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1398 return next_enumerator
, auto_enumerator
1400 def field_marker(self
, match
, context
, next_state
):
1401 """Field list item."""
1402 field_list
= nodes
.field_list()
1403 self
.parent
+= field_list
1404 field
, blank_finish
= self
.field(match
)
1406 offset
= self
.state_machine
.line_offset
+ 1 # next line
1407 newline_offset
, blank_finish
= self
.nested_list_parse(
1408 self
.state_machine
.input_lines
[offset
:],
1409 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1410 node
=field_list
, initial_state
='FieldList',
1411 blank_finish
=blank_finish
)
1412 self
.goto_line(newline_offset
)
1413 if not blank_finish
:
1414 self
.parent
+= self
.unindent_warning('Field list')
1415 return [], next_state
, []
1417 def field(self
, match
):
1418 name
= self
.parse_field_marker(match
)
1419 src
, srcline
= self
.state_machine
.get_source_and_line()
1420 lineno
= self
.state_machine
.abs_line_number()
1421 indented
, indent
, line_offset
, blank_finish
= \
1422 self
.state_machine
.get_first_known_indented(match
.end())
1423 field_node
= nodes
.field()
1424 field_node
.source
= src
1425 field_node
.line
= srcline
1426 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1427 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1428 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1429 field_node
+= field_body
1431 self
.parse_field_body(indented
, line_offset
, field_body
)
1432 return field_node
, blank_finish
1434 def parse_field_marker(self
, match
):
1435 """Extract & return field name from a field marker match."""
1436 field
= match
.group()[1:] # strip off leading ':'
1437 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1440 def parse_field_body(self
, indented
, offset
, node
):
1441 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1443 def option_marker(self
, match
, context
, next_state
):
1444 """Option list item."""
1445 optionlist
= nodes
.option_list()
1447 listitem
, blank_finish
= self
.option_list_item(match
)
1448 except MarkupError
, error
:
1449 # This shouldn't happen; pattern won't match.
1450 src
, srcline
= self
.state_machine
.get_source_and_line()
1451 msg
= self
.reporter
.error(u
'Invalid option list marker: %s' %
1452 error
, source
=src
, line
=srcline
)
1454 indented
, indent
, line_offset
, blank_finish
= \
1455 self
.state_machine
.get_first_known_indented(match
.end())
1456 elements
= self
.block_quote(indented
, line_offset
)
1457 self
.parent
+= elements
1458 if not blank_finish
:
1459 self
.parent
+= self
.unindent_warning('Option list')
1460 return [], next_state
, []
1461 self
.parent
+= optionlist
1462 optionlist
+= listitem
1463 offset
= self
.state_machine
.line_offset
+ 1 # next line
1464 newline_offset
, blank_finish
= self
.nested_list_parse(
1465 self
.state_machine
.input_lines
[offset
:],
1466 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1467 node
=optionlist
, initial_state
='OptionList',
1468 blank_finish
=blank_finish
)
1469 self
.goto_line(newline_offset
)
1470 if not blank_finish
:
1471 self
.parent
+= self
.unindent_warning('Option list')
1472 return [], next_state
, []
1474 def option_list_item(self
, match
):
1475 offset
= self
.state_machine
.abs_line_offset()
1476 options
= self
.parse_option_marker(match
)
1477 indented
, indent
, line_offset
, blank_finish
= \
1478 self
.state_machine
.get_first_known_indented(match
.end())
1479 if not indented
: # not an option list item
1480 self
.goto_line(offset
)
1481 raise statemachine
.TransitionCorrection('text')
1482 option_group
= nodes
.option_group('', *options
)
1483 description
= nodes
.description('\n'.join(indented
))
1484 option_list_item
= nodes
.option_list_item('', option_group
,
1487 self
.nested_parse(indented
, input_offset
=line_offset
,
1489 return option_list_item
, blank_finish
1491 def parse_option_marker(self
, match
):
1493 Return a list of `node.option` and `node.option_argument` objects,
1494 parsed from an option marker match.
1496 :Exception: `MarkupError` for invalid option markers.
1499 optionstrings
= match
.group().rstrip().split(', ')
1500 for optionstring
in optionstrings
:
1501 tokens
= optionstring
.split()
1503 firstopt
= tokens
[0].split('=', 1)
1504 if len(firstopt
) > 1:
1505 # "--opt=value" form
1506 tokens
[:1] = firstopt
1508 elif (len(tokens
[0]) > 2
1509 and ((tokens
[0].startswith('-')
1510 and not tokens
[0].startswith('--'))
1511 or tokens
[0].startswith('+'))):
1513 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1515 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1516 and tokens
[-1].endswith('>')):
1517 # "-o <value1 value2>" form; join all values into one token
1518 tokens
[1:] = [' '.join(tokens
[1:])]
1519 if 0 < len(tokens
) <= 2:
1520 option
= nodes
.option(optionstring
)
1521 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1523 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1524 delimiter
=delimiter
)
1525 optlist
.append(option
)
1528 'wrong number of option tokens (=%s), should be 1 or 2: '
1529 '"%s"' % (len(tokens
), optionstring
))
1532 def doctest(self
, match
, context
, next_state
):
1533 data
= '\n'.join(self
.state_machine
.get_text_block())
1534 self
.parent
+= nodes
.doctest_block(data
, data
)
1535 return [], next_state
, []
1537 def line_block(self
, match
, context
, next_state
):
1538 """First line of a line block."""
1539 block
= nodes
.line_block()
1540 self
.parent
+= block
1541 lineno
= self
.state_machine
.abs_line_number()
1542 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1544 self
.parent
+= messages
1545 if not blank_finish
:
1546 offset
= self
.state_machine
.line_offset
+ 1 # next line
1547 new_line_offset
, blank_finish
= self
.nested_list_parse(
1548 self
.state_machine
.input_lines
[offset
:],
1549 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1550 node
=block
, initial_state
='LineBlock',
1552 self
.goto_line(new_line_offset
)
1553 if not blank_finish
:
1554 src
, srcline
= self
.state_machine
.get_source_and_line()
1555 self
.parent
+= self
.reporter
.warning(
1556 'Line block ends without a blank line.',
1557 source
=src
, line
=srcline
+1)
1559 if block
[0].indent
is None:
1561 self
.nest_line_block_lines(block
)
1562 return [], next_state
, []
1564 def line_block_line(self
, match
, lineno
):
1565 """Return one line element of a line_block."""
1566 indented
, indent
, line_offset
, blank_finish
= \
1567 self
.state_machine
.get_first_known_indented(match
.end(),
1569 text
= u
'\n'.join(indented
)
1570 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1571 line
= nodes
.line(text
, '', *text_nodes
)
1572 if match
.string
.rstrip() != '|': # not empty
1573 line
.indent
= len(match
.group(1)) - 1
1574 return line
, messages
, blank_finish
1576 def nest_line_block_lines(self
, block
):
1577 for index
in range(1, len(block
)):
1578 if block
[index
].indent
is None:
1579 block
[index
].indent
= block
[index
- 1].indent
1580 self
.nest_line_block_segment(block
)
1582 def nest_line_block_segment(self
, block
):
1583 indents
= [item
.indent
for item
in block
]
1584 least
= min(indents
)
1586 new_block
= nodes
.line_block()
1588 if item
.indent
> least
:
1589 new_block
.append(item
)
1592 self
.nest_line_block_segment(new_block
)
1593 new_items
.append(new_block
)
1594 new_block
= nodes
.line_block()
1595 new_items
.append(item
)
1597 self
.nest_line_block_segment(new_block
)
1598 new_items
.append(new_block
)
1599 block
[:] = new_items
1601 def grid_table_top(self
, match
, context
, next_state
):
1602 """Top border of a full table."""
1603 return self
.table_top(match
, context
, next_state
,
1604 self
.isolate_grid_table
,
1605 tableparser
.GridTableParser
)
1607 def simple_table_top(self
, match
, context
, next_state
):
1608 """Top border of a simple table."""
1609 return self
.table_top(match
, context
, next_state
,
1610 self
.isolate_simple_table
,
1611 tableparser
.SimpleTableParser
)
1613 def table_top(self
, match
, context
, next_state
,
1614 isolate_function
, parser_class
):
1615 """Top border of a generic table."""
1616 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1617 self
.parent
+= nodelist
1618 if not blank_finish
:
1619 src
, srcline
= self
.state_machine
.get_source_and_line()
1620 msg
= self
.reporter
.warning(
1621 'Blank line required after table.',
1622 source
=src
, line
=srcline
+1)
1624 return [], next_state
, []
1626 def table(self
, isolate_function
, parser_class
):
1627 """Parse a table."""
1628 block
, messages
, blank_finish
= isolate_function()
1631 parser
= parser_class()
1632 tabledata
= parser
.parse(block
)
1633 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1635 table
= self
.build_table(tabledata
, tableline
)
1636 nodelist
= [table
] + messages
1637 except tableparser
.TableMarkupError
, detail
:
1638 nodelist
= self
.malformed_table(
1639 block
, ' '.join(detail
.args
)) + messages
1642 return nodelist
, blank_finish
1644 def isolate_grid_table(self
):
1648 block
= self
.state_machine
.get_text_block(flush_left
=1)
1649 except statemachine
.UnexpectedIndentationError
, instance
:
1650 block
, src
, srcline
= instance
.args
1651 messages
.append(self
.reporter
.error('Unexpected indentation.',
1652 source
=src
, line
=srcline
))
1655 # for East Asian chars:
1656 block
.pad_double_width(self
.double_width_pad_char
)
1657 width
= len(block
[0].strip())
1658 for i
in range(len(block
)):
1659 block
[i
] = block
[i
].strip()
1660 if block
[i
][0] not in '+|': # check left edge
1662 self
.state_machine
.previous_line(len(block
) - i
)
1665 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1667 # from second-last to third line of table:
1668 for i
in range(len(block
) - 2, 1, -1):
1669 if self
.grid_table_top_pat
.match(block
[i
]):
1670 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1674 messages
.extend(self
.malformed_table(block
))
1675 return [], messages
, blank_finish
1676 for i
in range(len(block
)): # check right edge
1677 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1678 messages
.extend(self
.malformed_table(block
))
1679 return [], messages
, blank_finish
1680 return block
, messages
, blank_finish
1682 def isolate_simple_table(self
):
1683 start
= self
.state_machine
.line_offset
1684 lines
= self
.state_machine
.input_lines
1685 limit
= len(lines
) - 1
1686 toplen
= len(lines
[start
].strip())
1687 pattern_match
= self
.simple_table_border_pat
.match
1693 match
= pattern_match(line
)
1695 if len(line
.strip()) != toplen
:
1696 self
.state_machine
.next_line(i
- start
)
1697 messages
= self
.malformed_table(
1698 lines
[start
:i
+1], 'Bottom/header table border does '
1699 'not match top border.')
1700 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1703 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1707 else: # reached end of input_lines
1709 extra
= ' or no blank line after table bottom'
1710 self
.state_machine
.next_line(found_at
- start
)
1711 block
= lines
[start
:found_at
+1]
1714 self
.state_machine
.next_line(i
- start
- 1)
1715 block
= lines
[start
:]
1716 messages
= self
.malformed_table(
1717 block
, 'No bottom table border found%s.' % extra
)
1718 return [], messages
, not extra
1719 self
.state_machine
.next_line(end
- start
)
1720 block
= lines
[start
:end
+1]
1721 # for East Asian chars:
1722 block
.pad_double_width(self
.double_width_pad_char
)
1723 return block
, [], end
== limit
or not lines
[end
+1].strip()
1725 def malformed_table(self
, block
, detail
=''):
1726 block
.replace(self
.double_width_pad_char
, '')
1727 data
= '\n'.join(block
)
1728 message
= 'Malformed table.'
1729 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1730 src
, srcline
= self
.state_machine
.get_source_and_line(startline
)
1732 message
+= '\n' + detail
1733 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1734 source
=src
, line
=srcline
)
1737 def build_table(self
, tabledata
, tableline
, stub_columns
=0):
1738 colwidths
, headrows
, bodyrows
= tabledata
1739 table
= nodes
.table()
1740 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1742 for colwidth
in colwidths
:
1743 colspec
= nodes
.colspec(colwidth
=colwidth
)
1745 colspec
.attributes
['stub'] = 1
1749 thead
= nodes
.thead()
1751 for row
in headrows
:
1752 thead
+= self
.build_table_row(row
, tableline
)
1753 tbody
= nodes
.tbody()
1755 for row
in bodyrows
:
1756 tbody
+= self
.build_table_row(row
, tableline
)
1759 def build_table_row(self
, rowdata
, tableline
):
1761 for cell
in rowdata
:
1764 morerows
, morecols
, offset
, cellblock
= cell
1767 attributes
['morerows'] = morerows
1769 attributes
['morecols'] = morecols
1770 entry
= nodes
.entry(**attributes
)
1772 if ''.join(cellblock
):
1773 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1779 """Patterns and constants used for explicit markup recognition."""
1781 explicit
.patterns
= Struct(
1782 target
=re
.compile(r
"""
1784 _ # anonymous target
1786 (?!_) # no underscore at the beginning
1787 (?P<quote>`?) # optional open quote
1788 (?![ `]) # first char. not space or
1790 (?P<name> # reference name
1793 %(non_whitespace_escape_before)s
1794 (?P=quote) # close quote if open quote used
1796 (?<!(?<!\x00):) # no unescaped colon at end
1797 %(non_whitespace_escape_before)s
1798 [ ]? # optional space
1799 : # end of reference name
1800 ([ ]+|$) # followed by whitespace
1801 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1802 reference
=re
.compile(r
"""
1804 (?P<simple>%(simplename)s)_
1808 (?P<phrase>.+?) # hyperlink phrase
1809 %(non_whitespace_escape_before)s
1810 `_ # close backquote,
1814 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1815 substitution
=re
.compile(r
"""
1817 (?![ ]) # first char. not space
1818 (?P<name>.+?) # substitution text
1819 %(non_whitespace_escape_before)s
1820 \| # close delimiter
1822 ([ ]+|$) # followed by whitespace
1823 """ % vars(Inliner
),
1824 re
.VERBOSE | re
.UNICODE
),)
1826 def footnote(self
, match
):
1827 src
, srcline
= self
.state_machine
.get_source_and_line()
1828 indented
, indent
, offset
, blank_finish
= \
1829 self
.state_machine
.get_first_known_indented(match
.end())
1830 label
= match
.group(1)
1831 name
= normalize_name(label
)
1832 footnote
= nodes
.footnote('\n'.join(indented
))
1833 footnote
.source
= src
1834 footnote
.line
= srcline
1835 if name
[0] == '#': # auto-numbered
1836 name
= name
[1:] # autonumber label
1837 footnote
['auto'] = 1
1839 footnote
['names'].append(name
)
1840 self
.document
.note_autofootnote(footnote
)
1841 elif name
== '*': # auto-symbol
1843 footnote
['auto'] = '*'
1844 self
.document
.note_symbol_footnote(footnote
)
1845 else: # manually numbered
1846 footnote
+= nodes
.label('', label
)
1847 footnote
['names'].append(name
)
1848 self
.document
.note_footnote(footnote
)
1850 self
.document
.note_explicit_target(footnote
, footnote
)
1852 self
.document
.set_id(footnote
, footnote
)
1854 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1855 return [footnote
], blank_finish
1857 def citation(self
, match
):
1858 src
, srcline
= self
.state_machine
.get_source_and_line()
1859 indented
, indent
, offset
, blank_finish
= \
1860 self
.state_machine
.get_first_known_indented(match
.end())
1861 label
= match
.group(1)
1862 name
= normalize_name(label
)
1863 citation
= nodes
.citation('\n'.join(indented
))
1864 citation
.source
= src
1865 citation
.line
= srcline
1866 citation
+= nodes
.label('', label
)
1867 citation
['names'].append(name
)
1868 self
.document
.note_citation(citation
)
1869 self
.document
.note_explicit_target(citation
, citation
)
1871 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1872 return [citation
], blank_finish
1874 def hyperlink_target(self
, match
):
1875 pattern
= self
.explicit
.patterns
.target
1876 lineno
= self
.state_machine
.abs_line_number()
1877 src
, srcline
= self
.state_machine
.get_source_and_line()
1878 block
, indent
, offset
, blank_finish
= \
1879 self
.state_machine
.get_first_known_indented(
1880 match
.end(), until_blank
=1, strip_indent
=0)
1881 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1882 block
= [escape2null(line
) for line
in block
]
1886 targetmatch
= pattern
.match(escaped
)
1891 escaped
+= block
[blockindex
]
1893 raise MarkupError('malformed hyperlink target.')
1894 del block
[:blockindex
]
1895 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1896 target
= self
.make_target(block
, blocktext
, lineno
,
1897 targetmatch
.group('name'))
1898 return [target
], blank_finish
1900 def make_target(self
, block
, block_text
, lineno
, target_name
):
1901 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1902 if target_type
== 'refname':
1903 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1904 target
.indirect_reference_name
= data
1905 self
.add_target(target_name
, '', target
, lineno
)
1906 self
.document
.note_indirect_target(target
)
1908 elif target_type
== 'refuri':
1909 target
= nodes
.target(block_text
, '')
1910 self
.add_target(target_name
, data
, target
, lineno
)
1915 def parse_target(self
, block
, block_text
, lineno
):
1917 Determine the type of reference of a target.
1919 :Return: A 2-tuple, one of:
1921 - 'refname' and the indirect reference name
1922 - 'refuri' and the URI
1923 - 'malformed' and a system_message node
1925 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1926 reference
= ' '.join([line
.strip() for line
in block
])
1927 refname
= self
.is_reference(reference
)
1929 return 'refname', refname
1930 reference
= ''.join([''.join(line
.split()) for line
in block
])
1931 return 'refuri', unescape(reference
)
1933 def is_reference(self
, reference
):
1934 match
= self
.explicit
.patterns
.reference
.match(
1935 whitespace_normalize_name(reference
))
1938 return unescape(match
.group('simple') or match
.group('phrase'))
1940 def add_target(self
, targetname
, refuri
, target
, lineno
):
1941 target
.line
= lineno
1943 name
= normalize_name(unescape(targetname
))
1944 target
['names'].append(name
)
1946 uri
= self
.inliner
.adjust_uri(refuri
)
1948 target
['refuri'] = uri
1950 raise ApplicationError('problem with URI: %r' % refuri
)
1951 self
.document
.note_explicit_target(target
, self
.parent
)
1952 else: # anonymous target
1954 target
['refuri'] = refuri
1955 target
['anonymous'] = 1
1956 self
.document
.note_anonymous_target(target
)
1958 def substitution_def(self
, match
):
1959 pattern
= self
.explicit
.patterns
.substitution
1960 src
, srcline
= self
.state_machine
.get_source_and_line()
1961 block
, indent
, offset
, blank_finish
= \
1962 self
.state_machine
.get_first_known_indented(match
.end(),
1964 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
1966 escaped
= escape2null(block
[0].rstrip())
1969 subdefmatch
= pattern
.match(escaped
)
1974 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
1976 raise MarkupError('malformed substitution definition.')
1977 del block
[:blockindex
] # strip out the substitution marker
1978 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
1982 while block
and not block
[-1].strip():
1984 subname
= subdefmatch
.group('name')
1985 substitution_node
= nodes
.substitution_definition(blocktext
)
1986 substitution_node
.source
= src
1987 substitution_node
.line
= srcline
1989 msg
= self
.reporter
.warning(
1990 'Substitution definition "%s" missing contents.' % subname
,
1991 nodes
.literal_block(blocktext
, blocktext
),
1992 source
=src
, line
=srcline
)
1993 return [msg
], blank_finish
1994 block
[0] = block
[0].strip()
1995 substitution_node
['names'].append(
1996 nodes
.whitespace_normalize_name(subname
))
1997 new_abs_offset
, blank_finish
= self
.nested_list_parse(
1998 block
, input_offset
=offset
, node
=substitution_node
,
1999 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
2001 for node
in substitution_node
[:]:
2002 if not (isinstance(node
, nodes
.Inline
) or
2003 isinstance(node
, nodes
.Text
)):
2004 self
.parent
+= substitution_node
[i
]
2005 del substitution_node
[i
]
2008 for node
in substitution_node
.traverse(nodes
.Element
):
2009 if self
.disallowed_inside_substitution_definitions(node
):
2010 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2011 msg
= self
.reporter
.error(
2012 'Substitution definition contains illegal element:',
2013 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2014 source
=src
, line
=srcline
)
2015 return [msg
], blank_finish
2016 if len(substitution_node
) == 0:
2017 msg
= self
.reporter
.warning(
2018 'Substitution definition "%s" empty or invalid.' % subname
,
2019 nodes
.literal_block(blocktext
, blocktext
),
2020 source
=src
, line
=srcline
)
2021 return [msg
], blank_finish
2022 self
.document
.note_substitution_def(
2023 substitution_node
, subname
, self
.parent
)
2024 return [substitution_node
], blank_finish
2026 def disallowed_inside_substitution_definitions(self
, node
):
2028 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
2029 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
2034 def directive(self
, match
, **option_presets
):
2035 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2036 type_name
= match
.group(1)
2037 directive_class
, messages
= directives
.directive(
2038 type_name
, self
.memo
.language
, self
.document
)
2039 self
.parent
+= messages
2041 return self
.run_directive(
2042 directive_class
, match
, type_name
, option_presets
)
2044 return self
.unknown_directive(type_name
)
2046 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2048 Parse a directive then run its directive function.
2052 - `directive`: The class implementing the directive. Must be
2053 a subclass of `rst.Directive`.
2055 - `match`: A regular expression match object which matched the first
2056 line of the directive.
2058 - `type_name`: The directive name, as used in the source text.
2060 - `option_presets`: A dictionary of preset options, defaults for the
2061 directive options. Currently, only an "alt" option is passed by
2062 substitution definitions (value: the substitution name), which may
2063 be used by an embedded image directive.
2065 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2067 if isinstance(directive
, (FunctionType
, MethodType
)):
2068 from docutils
.parsers
.rst
import convert_directive_function
2069 directive
= convert_directive_function(directive
)
2070 lineno
= self
.state_machine
.abs_line_number()
2071 src
, srcline
= self
.state_machine
.get_source_and_line()
2072 initial_line_offset
= self
.state_machine
.line_offset
2073 indented
, indent
, line_offset
, blank_finish \
2074 = self
.state_machine
.get_first_known_indented(match
.end(),
2076 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2077 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2079 arguments
, options
, content
, content_offset
= (
2080 self
.parse_directive_block(indented
, line_offset
,
2081 directive
, option_presets
))
2082 except MarkupError
, detail
:
2083 error
= self
.reporter
.error(
2084 'Error in "%s" directive:\n%s.' % (type_name
,
2085 ' '.join(detail
.args
)),
2086 nodes
.literal_block(block_text
, block_text
),
2087 source
=src
, line
=srcline
)
2088 return [error
], blank_finish
2089 directive_instance
= directive(
2090 type_name
, arguments
, options
, content
, lineno
,
2091 content_offset
, block_text
, self
, self
.state_machine
)
2093 result
= directive_instance
.run()
2094 except docutils
.parsers
.rst
.DirectiveError
, error
:
2095 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2096 source
=src
, line
=srcline
)
2097 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2099 assert isinstance(result
, list), \
2100 'Directive "%s" must return a list of nodes.' % type_name
2101 for i
in range(len(result
)):
2102 assert isinstance(result
[i
], nodes
.Node
), \
2103 ('Directive "%s" returned non-Node object (index %s): %r'
2104 % (type_name
, i
, result
[i
]))
2106 blank_finish
or self
.state_machine
.is_next_line_blank())
2108 def parse_directive_block(self
, indented
, line_offset
, directive
,
2110 option_spec
= directive
.option_spec
2111 has_content
= directive
.has_content
2112 if indented
and not indented
[0].strip():
2113 indented
.trim_start()
2115 while indented
and not indented
[-1].strip():
2117 if indented
and (directive
.required_arguments
2118 or directive
.optional_arguments
2120 for i
, line
in enumerate(indented
):
2121 if not line
.strip():
2125 arg_block
= indented
[:i
]
2126 content
= indented
[i
+1:]
2127 content_offset
= line_offset
+ i
+ 1
2130 content_offset
= line_offset
2133 options
, arg_block
= self
.parse_directive_options(
2134 option_presets
, option_spec
, arg_block
)
2137 if arg_block
and not (directive
.required_arguments
2138 or directive
.optional_arguments
):
2139 content
= arg_block
+ indented
[i
:]
2140 content_offset
= line_offset
2142 while content
and not content
[0].strip():
2143 content
.trim_start()
2145 if directive
.required_arguments
or directive
.optional_arguments
:
2146 arguments
= self
.parse_directive_arguments(
2147 directive
, arg_block
)
2150 if content
and not has_content
:
2151 raise MarkupError('no content permitted')
2152 return (arguments
, options
, content
, content_offset
)
2154 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2155 options
= option_presets
.copy()
2156 for i
in range(len(arg_block
)):
2157 if arg_block
[i
][:1] == ':':
2158 opt_block
= arg_block
[i
:]
2159 arg_block
= arg_block
[:i
]
2164 success
, data
= self
.parse_extension_options(option_spec
,
2166 if success
: # data is a dict of options
2167 options
.update(data
)
2168 else: # data is an error string
2169 raise MarkupError(data
)
2170 return options
, arg_block
2172 def parse_directive_arguments(self
, directive
, arg_block
):
2173 required
= directive
.required_arguments
2174 optional
= directive
.optional_arguments
2175 arg_text
= '\n'.join(arg_block
)
2176 arguments
= arg_text
.split()
2177 if len(arguments
) < required
:
2178 raise MarkupError('%s argument(s) required, %s supplied'
2179 % (required
, len(arguments
)))
2180 elif len(arguments
) > required
+ optional
:
2181 if directive
.final_argument_whitespace
:
2182 arguments
= arg_text
.split(None, required
+ optional
- 1)
2185 'maximum %s argument(s) allowed, %s supplied'
2186 % (required
+ optional
, len(arguments
)))
2189 def parse_extension_options(self
, option_spec
, datalines
):
2191 Parse `datalines` for a field list containing extension options
2192 matching `option_spec`.
2195 - `option_spec`: a mapping of option name to conversion
2196 function, which should raise an exception on bad input.
2197 - `datalines`: a list of input strings.
2200 - Success value, 1 or 0.
2201 - An option dictionary on success, an error string on failure.
2203 node
= nodes
.field_list()
2204 newline_offset
, blank_finish
= self
.nested_list_parse(
2205 datalines
, 0, node
, initial_state
='ExtensionOptions',
2207 if newline_offset
!= len(datalines
): # incomplete parse of block
2208 return 0, 'invalid option block'
2210 options
= utils
.extract_extension_options(node
, option_spec
)
2211 except KeyError, detail
:
2212 return 0, ('unknown option: "%s"' % detail
.args
[0])
2213 except (ValueError, TypeError), detail
:
2214 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2215 except utils
.ExtensionOptionError
, detail
:
2216 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2220 return 0, 'option data incompletely parsed'
2222 def unknown_directive(self
, type_name
):
2223 src
, srcline
= self
.state_machine
.get_source_and_line()
2224 indented
, indent
, offset
, blank_finish
= \
2225 self
.state_machine
.get_first_known_indented(0, strip_indent
=0)
2226 text
= '\n'.join(indented
)
2227 error
= self
.reporter
.error(
2228 'Unknown directive type "%s".' % type_name
,
2229 nodes
.literal_block(text
, text
), source
=src
, line
=srcline
)
2230 return [error
], blank_finish
2232 def comment(self
, match
):
2233 if not match
.string
[match
.end():].strip() \
2234 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2235 return [nodes
.comment()], 1 # "A tiny but practical wart."
2236 indented
, indent
, offset
, blank_finish
= \
2237 self
.state_machine
.get_first_known_indented(match
.end())
2238 while indented
and not indented
[-1].strip():
2240 text
= '\n'.join(indented
)
2241 return [nodes
.comment(text
, text
)], blank_finish
2243 explicit
.constructs
= [
2246 \.\.[ ]+ # explicit markup start
2249 [0-9]+ # manually numbered footnote
2251 \# # anonymous auto-numbered footnote
2253 \#%s # auto-number ed?) footnote label
2255 \* # auto-symbol footnote
2258 ([ ]+|$) # whitespace or end of line
2259 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2262 \.\.[ ]+ # explicit markup start
2263 \[(%s)\] # citation label
2264 ([ ]+|$) # whitespace or end of line
2265 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2268 \.\.[ ]+ # explicit markup start
2269 _ # target indicator
2270 (?![ ]|$) # first char. not space or EOL
2271 """, re
.VERBOSE | re
.UNICODE
)),
2274 \.\.[ ]+ # explicit markup start
2275 \| # substitution indicator
2276 (?![ ]|$) # first char. not space or EOL
2277 """, re
.VERBOSE | re
.UNICODE
)),
2280 \.\.[ ]+ # explicit markup start
2281 (%s) # directive name
2282 [ ]? # optional space
2283 :: # directive delimiter
2284 ([ ]+|$) # whitespace or end of line
2285 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2287 def explicit_markup(self
, match
, context
, next_state
):
2288 """Footnotes, hyperlink targets, directives, comments."""
2289 nodelist
, blank_finish
= self
.explicit_construct(match
)
2290 self
.parent
+= nodelist
2291 self
.explicit_list(blank_finish
)
2292 return [], next_state
, []
2294 def explicit_construct(self
, match
):
2295 """Determine which explicit construct this is, parse & return it."""
2297 for method
, pattern
in self
.explicit
.constructs
:
2298 expmatch
= pattern
.match(match
.string
)
2301 return method(self
, expmatch
)
2302 except MarkupError
, error
: # never reached?
2303 message
= ' '.join(error
.args
)
2304 src
, srcline
= self
.state_machine
.get_source_and_line()
2305 errors
.append(self
.reporter
.warning(
2306 message
, source
=src
, line
=srcline
))
2308 nodelist
, blank_finish
= self
.comment(match
)
2309 return nodelist
+ errors
, blank_finish
2311 def explicit_list(self
, blank_finish
):
2313 Create a nested state machine for a series of explicit markup
2314 constructs (including anonymous hyperlink targets).
2316 offset
= self
.state_machine
.line_offset
+ 1 # next line
2317 newline_offset
, blank_finish
= self
.nested_list_parse(
2318 self
.state_machine
.input_lines
[offset
:],
2319 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2320 node
=self
.parent
, initial_state
='Explicit',
2321 blank_finish
=blank_finish
,
2322 match_titles
=self
.state_machine
.match_titles
)
2323 self
.goto_line(newline_offset
)
2324 if not blank_finish
:
2325 self
.parent
+= self
.unindent_warning('Explicit markup')
2327 def anonymous(self
, match
, context
, next_state
):
2328 """Anonymous hyperlink targets."""
2329 nodelist
, blank_finish
= self
.anonymous_target(match
)
2330 self
.parent
+= nodelist
2331 self
.explicit_list(blank_finish
)
2332 return [], next_state
, []
2334 def anonymous_target(self
, match
):
2335 lineno
= self
.state_machine
.abs_line_number()
2336 block
, indent
, offset
, blank_finish \
2337 = self
.state_machine
.get_first_known_indented(match
.end(),
2339 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2340 block
= [escape2null(line
) for line
in block
]
2341 target
= self
.make_target(block
, blocktext
, lineno
, '')
2342 return [target
], blank_finish
2344 def line(self
, match
, context
, next_state
):
2345 """Section title overline or transition marker."""
2346 if self
.state_machine
.match_titles
:
2347 return [match
.string
], 'Line', []
2348 elif match
.string
.strip() == '::':
2349 raise statemachine
.TransitionCorrection('text')
2350 elif len(match
.string
.strip()) < 4:
2351 msg
= self
.reporter
.info(
2352 'Unexpected possible title overline or transition.\n'
2353 "Treating it as ordinary text because it's so short.",
2354 line
=self
.state_machine
.abs_line_number())
2356 raise statemachine
.TransitionCorrection('text')
2358 blocktext
= self
.state_machine
.line
2359 msg
= self
.reporter
.severe(
2360 'Unexpected section title or transition.',
2361 nodes
.literal_block(blocktext
, blocktext
),
2362 line
=self
.state_machine
.abs_line_number())
2364 return [], next_state
, []
2366 def text(self
, match
, context
, next_state
):
2367 """Titles, definition lists, paragraphs."""
2368 return [match
.string
], 'Text', []
2371 class RFC2822Body(Body
):
2374 RFC2822 headers are only valid as the first constructs in documents. As
2375 soon as anything else appears, the `Body` state should take over.
2378 patterns
= Body
.patterns
.copy() # can't modify the original
2379 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2380 initial_transitions
= [(name
, 'Body')
2381 for name
in Body
.initial_transitions
]
2382 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2384 def rfc2822(self
, match
, context
, next_state
):
2385 """RFC2822-style field list item."""
2386 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2387 self
.parent
+= fieldlist
2388 field
, blank_finish
= self
.rfc2822_field(match
)
2390 offset
= self
.state_machine
.line_offset
+ 1 # next line
2391 newline_offset
, blank_finish
= self
.nested_list_parse(
2392 self
.state_machine
.input_lines
[offset
:],
2393 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2394 node
=fieldlist
, initial_state
='RFC2822List',
2395 blank_finish
=blank_finish
)
2396 self
.goto_line(newline_offset
)
2397 if not blank_finish
:
2398 self
.parent
+= self
.unindent_warning(
2399 'RFC2822-style field list')
2400 return [], next_state
, []
2402 def rfc2822_field(self
, match
):
2403 name
= match
.string
[:match
.string
.find(':')]
2404 indented
, indent
, line_offset
, blank_finish
= \
2405 self
.state_machine
.get_first_known_indented(match
.end(),
2407 fieldnode
= nodes
.field()
2408 fieldnode
+= nodes
.field_name(name
, name
)
2409 fieldbody
= nodes
.field_body('\n'.join(indented
))
2410 fieldnode
+= fieldbody
2412 self
.nested_parse(indented
, input_offset
=line_offset
,
2414 return fieldnode
, blank_finish
2417 class SpecializedBody(Body
):
2420 Superclass for second and subsequent compound element members. Compound
2421 elements are lists and list-like constructs.
2423 All transition methods are disabled (redefined as `invalid_input`).
2424 Override individual methods in subclasses to re-enable.
2426 For example, once an initial bullet list item, say, is recognized, the
2427 `BulletList` subclass takes over, with a "bullet_list" node as its
2428 container. Upon encountering the initial bullet list item, `Body.bullet`
2429 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2430 starts up a nested parsing session with `BulletList` as the initial state.
2431 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2432 as only bullet list items are encountered, they are parsed and inserted
2433 into the container. The first construct which is *not* a bullet list item
2434 triggers the `invalid_input` method, which ends the nested parse and
2435 closes the container. `BulletList` needs to recognize input that is
2436 invalid in the context of a bullet list, which means everything *other
2437 than* bullet list items, so it inherits the transition list created in
2441 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2442 """Not a compound element member. Abort this state machine."""
2443 self
.state_machine
.previous_line() # back up so parent SM can reassess
2446 indent
= invalid_input
2447 bullet
= invalid_input
2448 enumerator
= invalid_input
2449 field_marker
= invalid_input
2450 option_marker
= invalid_input
2451 doctest
= invalid_input
2452 line_block
= invalid_input
2453 grid_table_top
= invalid_input
2454 simple_table_top
= invalid_input
2455 explicit_markup
= invalid_input
2456 anonymous
= invalid_input
2457 line
= invalid_input
2458 text
= invalid_input
2461 class BulletList(SpecializedBody
):
2463 """Second and subsequent bullet_list list_items."""
2465 def bullet(self
, match
, context
, next_state
):
2466 """Bullet list item."""
2467 if match
.string
[0] != self
.parent
['bullet']:
2468 # different bullet: new list
2469 self
.invalid_input()
2470 listitem
, blank_finish
= self
.list_item(match
.end())
2471 self
.parent
+= listitem
2472 self
.blank_finish
= blank_finish
2473 return [], next_state
, []
2476 class DefinitionList(SpecializedBody
):
2478 """Second and subsequent definition_list_items."""
2480 def text(self
, match
, context
, next_state
):
2481 """Definition lists."""
2482 return [match
.string
], 'Definition', []
2485 class EnumeratedList(SpecializedBody
):
2487 """Second and subsequent enumerated_list list_items."""
2489 def enumerator(self
, match
, context
, next_state
):
2490 """Enumerated list item."""
2491 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2492 match
, self
.parent
['enumtype'])
2493 if ( format
!= self
.format
2494 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2496 or ordinal
!= (self
.lastordinal
+ 1)))
2497 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2498 # different enumeration: new list
2499 self
.invalid_input()
2502 listitem
, blank_finish
= self
.list_item(match
.end())
2503 self
.parent
+= listitem
2504 self
.blank_finish
= blank_finish
2505 self
.lastordinal
= ordinal
2506 return [], next_state
, []
2509 class FieldList(SpecializedBody
):
2511 """Second and subsequent field_list fields."""
2513 def field_marker(self
, match
, context
, next_state
):
2514 """Field list field."""
2515 field
, blank_finish
= self
.field(match
)
2516 self
.parent
+= field
2517 self
.blank_finish
= blank_finish
2518 return [], next_state
, []
2521 class OptionList(SpecializedBody
):
2523 """Second and subsequent option_list option_list_items."""
2525 def option_marker(self
, match
, context
, next_state
):
2526 """Option list item."""
2528 option_list_item
, blank_finish
= self
.option_list_item(match
)
2530 self
.invalid_input()
2531 self
.parent
+= option_list_item
2532 self
.blank_finish
= blank_finish
2533 return [], next_state
, []
2536 class RFC2822List(SpecializedBody
, RFC2822Body
):
2538 """Second and subsequent RFC2822-style field_list fields."""
2540 patterns
= RFC2822Body
.patterns
2541 initial_transitions
= RFC2822Body
.initial_transitions
2543 def rfc2822(self
, match
, context
, next_state
):
2544 """RFC2822-style field list item."""
2545 field
, blank_finish
= self
.rfc2822_field(match
)
2546 self
.parent
+= field
2547 self
.blank_finish
= blank_finish
2548 return [], 'RFC2822List', []
2550 blank
= SpecializedBody
.invalid_input
2553 class ExtensionOptions(FieldList
):
2556 Parse field_list fields for extension options.
2558 No nested parsing is done (including inline markup parsing).
2561 def parse_field_body(self
, indented
, offset
, node
):
2562 """Override `Body.parse_field_body` for simpler parsing."""
2564 for line
in list(indented
) + ['']:
2568 text
= '\n'.join(lines
)
2569 node
+= nodes
.paragraph(text
, text
)
2573 class LineBlock(SpecializedBody
):
2575 """Second and subsequent lines of a line_block."""
2577 blank
= SpecializedBody
.invalid_input
2579 def line_block(self
, match
, context
, next_state
):
2580 """New line of line block."""
2581 lineno
= self
.state_machine
.abs_line_number()
2582 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2584 self
.parent
.parent
+= messages
2585 self
.blank_finish
= blank_finish
2586 return [], next_state
, []
2589 class Explicit(SpecializedBody
):
2591 """Second and subsequent explicit markup construct."""
2593 def explicit_markup(self
, match
, context
, next_state
):
2594 """Footnotes, hyperlink targets, directives, comments."""
2595 nodelist
, blank_finish
= self
.explicit_construct(match
)
2596 self
.parent
+= nodelist
2597 self
.blank_finish
= blank_finish
2598 return [], next_state
, []
2600 def anonymous(self
, match
, context
, next_state
):
2601 """Anonymous hyperlink targets."""
2602 nodelist
, blank_finish
= self
.anonymous_target(match
)
2603 self
.parent
+= nodelist
2604 self
.blank_finish
= blank_finish
2605 return [], next_state
, []
2607 blank
= SpecializedBody
.invalid_input
2610 class SubstitutionDef(Body
):
2613 Parser for the contents of a substitution_definition element.
2617 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2618 % Inliner
.simplename
, re
.UNICODE
),
2620 initial_transitions
= ['embedded_directive', 'text']
2622 def embedded_directive(self
, match
, context
, next_state
):
2623 nodelist
, blank_finish
= self
.directive(match
,
2624 alt
=self
.parent
['names'][0])
2625 self
.parent
+= nodelist
2626 if not self
.state_machine
.at_eof():
2627 self
.blank_finish
= blank_finish
2630 def text(self
, match
, context
, next_state
):
2631 if not self
.state_machine
.at_eof():
2632 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2636 class Text(RSTState
):
2639 Classifier of second line of a text block.
2641 Could be a paragraph, a definition list item, or a title.
2644 patterns
= {'underline': Body
.patterns
['line'],
2646 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2648 def blank(self
, match
, context
, next_state
):
2649 """End of paragraph."""
2650 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2651 paragraph
, literalnext
= self
.paragraph(
2652 context
, self
.state_machine
.abs_line_number() - 1)
2653 self
.parent
+= paragraph
2655 self
.parent
+= self
.literal_block()
2656 return [], 'Body', []
2658 def eof(self
, context
):
2660 self
.blank(None, context
, None)
2663 def indent(self
, match
, context
, next_state
):
2664 """Definition list item."""
2665 definitionlist
= nodes
.definition_list()
2666 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2667 definitionlist
+= definitionlistitem
2668 self
.parent
+= definitionlist
2669 offset
= self
.state_machine
.line_offset
+ 1 # next line
2670 newline_offset
, blank_finish
= self
.nested_list_parse(
2671 self
.state_machine
.input_lines
[offset
:],
2672 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2673 node
=definitionlist
, initial_state
='DefinitionList',
2674 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2675 self
.goto_line(newline_offset
)
2676 if not blank_finish
:
2677 self
.parent
+= self
.unindent_warning('Definition list')
2678 return [], 'Body', []
2680 def underline(self
, match
, context
, next_state
):
2681 """Section title."""
2682 lineno
= self
.state_machine
.abs_line_number()
2683 src
, srcline
= self
.state_machine
.get_source_and_line()
2684 title
= context
[0].rstrip()
2685 underline
= match
.string
.rstrip()
2686 source
= title
+ '\n' + underline
2688 if column_width(title
) > len(underline
):
2689 if len(underline
) < 4:
2690 if self
.state_machine
.match_titles
:
2691 msg
= self
.reporter
.info(
2692 'Possible title underline, too short for the title.\n'
2693 "Treating it as ordinary text because it's so short.",
2694 source
=src
, line
=srcline
)
2696 raise statemachine
.TransitionCorrection('text')
2698 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2699 msg
= self
.reporter
.warning(
2700 'Title underline too short.',
2701 nodes
.literal_block(blocktext
, blocktext
),
2702 source
=src
, line
=srcline
)
2703 messages
.append(msg
)
2704 if not self
.state_machine
.match_titles
:
2705 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2706 msg
= self
.reporter
.severe(
2707 'Unexpected section title.',
2708 nodes
.literal_block(blocktext
, blocktext
),
2709 source
=src
, line
=srcline
)
2710 self
.parent
+= messages
2712 return [], next_state
, []
2713 style
= underline
[0]
2715 self
.section(title
, source
, style
, lineno
- 1, messages
)
2716 return [], next_state
, []
2718 def text(self
, match
, context
, next_state
):
2720 startline
= self
.state_machine
.abs_line_number() - 1
2723 block
= self
.state_machine
.get_text_block(flush_left
=1)
2724 except statemachine
.UnexpectedIndentationError
, instance
:
2725 block
, src
, srcline
= instance
.args
2726 msg
= self
.reporter
.error('Unexpected indentation.',
2727 source
=src
, line
=srcline
)
2728 lines
= context
+ list(block
)
2729 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2730 self
.parent
+= paragraph
2734 self
.state_machine
.next_line()
2737 self
.parent
+= self
.literal_block()
2738 return [], next_state
, []
2740 def literal_block(self
):
2741 """Return a list of nodes."""
2742 indented
, indent
, offset
, blank_finish
= \
2743 self
.state_machine
.get_indented()
2744 while indented
and not indented
[-1].strip():
2747 return self
.quoted_literal_block()
2748 data
= '\n'.join(indented
)
2749 literal_block
= nodes
.literal_block(data
, data
)
2750 literal_block
.line
= offset
+ 1
2751 nodelist
= [literal_block
]
2752 if not blank_finish
:
2753 nodelist
.append(self
.unindent_warning('Literal block'))
2756 def quoted_literal_block(self
):
2757 abs_line_offset
= self
.state_machine
.abs_line_offset()
2758 offset
= self
.state_machine
.line_offset
2759 parent_node
= nodes
.Element()
2760 new_abs_offset
= self
.nested_parse(
2761 self
.state_machine
.input_lines
[offset
:],
2762 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=0,
2763 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2764 'initial_state': 'QuotedLiteralBlock'})
2765 self
.goto_line(new_abs_offset
)
2766 return parent_node
.children
2768 def definition_list_item(self
, termline
):
2769 indented
, indent
, line_offset
, blank_finish
= \
2770 self
.state_machine
.get_indented()
2771 definitionlistitem
= nodes
.definition_list_item(
2772 '\n'.join(termline
+ list(indented
)))
2773 lineno
= self
.state_machine
.abs_line_number() - 1
2774 src
, srcline
= self
.state_machine
.get_source_and_line()
2775 definitionlistitem
.source
= src
2776 definitionlistitem
.line
= srcline
- 1
2777 termlist
, messages
= self
.term(termline
, lineno
)
2778 definitionlistitem
+= termlist
2779 definition
= nodes
.definition('', *messages
)
2780 definitionlistitem
+= definition
2781 if termline
[0][-2:] == '::':
2782 definition
+= self
.reporter
.info(
2783 'Blank line missing before literal block (after the "::")? '
2784 'Interpreted as a definition list item.',
2785 source
=src
, line
=srcline
)
2786 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2787 return definitionlistitem
, blank_finish
2789 classifier_delimiter
= re
.compile(' +: +')
2791 def term(self
, lines
, lineno
):
2792 """Return a definition_list's term and optional classifiers."""
2793 assert len(lines
) == 1
2794 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2795 term_node
= nodes
.term()
2796 node_list
= [term_node
]
2797 for i
in range(len(text_nodes
)):
2798 node
= text_nodes
[i
]
2799 if isinstance(node
, nodes
.Text
):
2800 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2802 node_list
[-1] += node
2805 node_list
[-1] += nodes
.Text(parts
[0].rstrip())
2806 for part
in parts
[1:]:
2807 classifier_node
= nodes
.classifier('', part
)
2808 node_list
.append(classifier_node
)
2810 node_list
[-1] += node
2811 return node_list
, messages
2814 class SpecializedText(Text
):
2817 Superclass for second and subsequent lines of Text-variants.
2819 All transition methods are disabled. Override individual methods in
2820 subclasses to re-enable.
2823 def eof(self
, context
):
2824 """Incomplete construct."""
2827 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2828 """Not a compound element member. Abort this state machine."""
2831 blank
= invalid_input
2832 indent
= invalid_input
2833 underline
= invalid_input
2834 text
= invalid_input
2837 class Definition(SpecializedText
):
2839 """Second line of potential definition_list_item."""
2841 def eof(self
, context
):
2842 """Not a definition."""
2843 self
.state_machine
.previous_line(2) # so parent SM can reassess
2846 def indent(self
, match
, context
, next_state
):
2847 """Definition list item."""
2848 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2849 self
.parent
+= definitionlistitem
2850 self
.blank_finish
= blank_finish
2851 return [], 'DefinitionList', []
2854 class Line(SpecializedText
):
2857 Second line of over- & underlined section title or transition marker.
2860 eofcheck
= 1 # @@@ ???
2861 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2863 def eof(self
, context
):
2864 """Transition marker at end of section or document."""
2865 marker
= context
[0].strip()
2866 if self
.memo
.section_bubble_up_kludge
:
2867 self
.memo
.section_bubble_up_kludge
= 0
2868 elif len(marker
) < 4:
2869 self
.state_correction(context
)
2870 if self
.eofcheck
: # ignore EOFError with sections
2871 lineno
= self
.state_machine
.abs_line_number() - 1
2872 transition
= nodes
.transition(rawsource
=context
[0])
2873 transition
.line
= lineno
2874 self
.parent
+= transition
2878 def blank(self
, match
, context
, next_state
):
2879 """Transition marker."""
2880 src
, srcline
= self
.state_machine
.get_source_and_line()
2881 marker
= context
[0].strip()
2883 self
.state_correction(context
)
2884 transition
= nodes
.transition(rawsource
=marker
)
2885 transition
.source
= src
2886 transition
.line
= srcline
- 1
2887 self
.parent
+= transition
2888 return [], 'Body', []
2890 def text(self
, match
, context
, next_state
):
2891 """Potential over- & underlined title."""
2892 lineno
= self
.state_machine
.abs_line_number() - 1
2893 src
, srcline
= self
.state_machine
.get_source_and_line()
2894 overline
= context
[0]
2895 title
= match
.string
2898 underline
= self
.state_machine
.next_line()
2900 blocktext
= overline
+ '\n' + title
2901 if len(overline
.rstrip()) < 4:
2902 self
.short_overline(context
, blocktext
, lineno
, 2)
2904 msg
= self
.reporter
.severe(
2905 'Incomplete section title.',
2906 nodes
.literal_block(blocktext
, blocktext
),
2907 source
=src
, line
=srcline
-1)
2909 return [], 'Body', []
2910 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2911 overline
= overline
.rstrip()
2912 underline
= underline
.rstrip()
2913 if not self
.transitions
['underline'][0].match(underline
):
2914 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2915 if len(overline
.rstrip()) < 4:
2916 self
.short_overline(context
, blocktext
, lineno
, 2)
2918 msg
= self
.reporter
.severe(
2919 'Missing matching underline for section title overline.',
2920 nodes
.literal_block(source
, source
),
2921 source
=src
, line
=srcline
-1)
2923 return [], 'Body', []
2924 elif overline
!= underline
:
2925 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2926 if len(overline
.rstrip()) < 4:
2927 self
.short_overline(context
, blocktext
, lineno
, 2)
2929 msg
= self
.reporter
.severe(
2930 'Title overline & underline mismatch.',
2931 nodes
.literal_block(source
, source
),
2932 source
=src
, line
=srcline
-1)
2934 return [], 'Body', []
2935 title
= title
.rstrip()
2937 if column_width(title
) > len(overline
):
2938 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2939 if len(overline
.rstrip()) < 4:
2940 self
.short_overline(context
, blocktext
, lineno
, 2)
2942 msg
= self
.reporter
.warning(
2943 'Title overline too short.',
2944 nodes
.literal_block(source
, source
),
2945 source
=src
, line
=srcline
-1)
2946 messages
.append(msg
)
2947 style
= (overline
[0], underline
[0])
2948 self
.eofcheck
= 0 # @@@ not sure this is correct
2949 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
2951 return [], 'Body', []
2953 indent
= text
# indented title
2955 def underline(self
, match
, context
, next_state
):
2956 overline
= context
[0]
2957 blocktext
= overline
+ '\n' + self
.state_machine
.line
2958 lineno
= self
.state_machine
.abs_line_number() - 1
2959 src
, srcline
= self
.state_machine
.get_source_and_line()
2960 if len(overline
.rstrip()) < 4:
2961 self
.short_overline(context
, blocktext
, lineno
, 1)
2962 msg
= self
.reporter
.error(
2963 'Invalid section title or transition marker.',
2964 nodes
.literal_block(blocktext
, blocktext
),
2965 source
=src
, line
=srcline
-1)
2967 return [], 'Body', []
2969 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
2970 src
, srcline
= self
.state_machine
.get_source_and_line(lineno
)
2971 msg
= self
.reporter
.info(
2972 'Possible incomplete section title.\nTreating the overline as '
2973 "ordinary text because it's so short.",
2974 source
=src
, line
=srcline
)
2976 self
.state_correction(context
, lines
)
2978 def state_correction(self
, context
, lines
=1):
2979 self
.state_machine
.previous_line(lines
)
2981 raise statemachine
.StateCorrection('Body', 'text')
2984 class QuotedLiteralBlock(RSTState
):
2987 Nested parse handler for quoted (unindented) literal blocks.
2989 Special-purpose. Not for inclusion in `state_classes`.
2992 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
2994 initial_transitions
= ('initial_quoted', 'text')
2996 def __init__(self
, state_machine
, debug
=0):
2997 RSTState
.__init
__(self
, state_machine
, debug
)
2999 self
.initial_lineno
= None
3001 def blank(self
, match
, context
, next_state
):
3005 return context
, next_state
, []
3007 def eof(self
, context
):
3009 src
, srcline
= self
.state_machine
.get_source_and_line(
3010 self
.initial_lineno
)
3011 text
= '\n'.join(context
)
3012 literal_block
= nodes
.literal_block(text
, text
)
3013 literal_block
.source
= src
3014 literal_block
.line
= srcline
3015 self
.parent
+= literal_block
3017 self
.parent
+= self
.reporter
.warning(
3018 'Literal block expected; none found.',
3019 line
=self
.state_machine
.abs_line_number())
3020 # src not available, because statemachine.input_lines is empty
3021 self
.state_machine
.previous_line()
3022 self
.parent
+= self
.messages
3025 def indent(self
, match
, context
, next_state
):
3026 assert context
, ('QuotedLiteralBlock.indent: context should not '
3028 self
.messages
.append(
3029 self
.reporter
.error('Unexpected indentation.',
3030 line
=self
.state_machine
.abs_line_number()))
3031 self
.state_machine
.previous_line()
3034 def initial_quoted(self
, match
, context
, next_state
):
3035 """Match arbitrary quote character on the first line only."""
3036 self
.remove_transition('initial_quoted')
3037 quote
= match
.string
[0]
3038 pattern
= re
.compile(re
.escape(quote
), re
.UNICODE
)
3039 # New transition matches consistent quotes only:
3040 self
.add_transition('quoted',
3041 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3042 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3043 return [match
.string
], next_state
, []
3045 def quoted(self
, match
, context
, next_state
):
3046 """Match consistent quotes on subsequent lines."""
3047 context
.append(match
.string
)
3048 return context
, next_state
, []
3050 def text(self
, match
, context
, next_state
):
3052 src
, srcline
= self
.state_machine
.get_source_and_line()
3053 self
.messages
.append(
3054 self
.reporter
.error('Inconsistent literal block quoting.',
3055 source
=src
, line
=srcline
))
3056 self
.state_machine
.previous_line()
3060 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3061 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3062 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3063 """Standard set of State classes used to start `RSTStateMachine`."""