2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 This is the ``docutils.parsers.rst.states`` module, the core of
7 the reStructuredText parser. It defines the following:
10 - `RSTStateMachine`: reStructuredText parser's entry point.
11 - `NestedStateMachine`: recursive StateMachine.
12 - `RSTState`: reStructuredText State superclass.
13 - `Inliner`: For parsing inline markup.
14 - `Body`: Generic classifier of the first line of a block.
15 - `SpecializedBody`: Superclass for compound element members.
16 - `BulletList`: Second and subsequent bullet_list list_items
17 - `DefinitionList`: Second+ definition_list_items.
18 - `EnumeratedList`: Second+ enumerated_list list_items.
19 - `FieldList`: Second+ fields.
20 - `OptionList`: Second+ option_list_items.
21 - `RFC2822List`: Second+ RFC2822-style fields.
22 - `ExtensionOptions`: Parses directive option fields.
23 - `Explicit`: Second+ explicit markup constructs.
24 - `SubstitutionDef`: For embedded directives in substitution definitions.
25 - `Text`: Classifier of second line of a text block.
26 - `SpecializedText`: Superclass for continuation lines of Text-variants.
27 - `Definition`: Second line of potential definition_list_item.
28 - `Line`: Second line of overlined section title or transition marker.
29 - `Struct`: An auxiliary collection class.
37 - `escape2null()`: Return a string, escape-backslashes converted to nulls.
38 - `unescape()`: Return a string, nulls removed or restored to backslashes.
41 - `state_classes`: set of State classes used with `RSTStateMachine`.
46 The reStructuredText parser is implemented as a recursive state machine,
47 examining its input one line at a time. To understand how the parser works,
48 please first become familiar with the `docutils.statemachine` module. In the
49 description below, references are made to classes defined in this module;
50 please see the individual classes for details.
52 Parsing proceeds as follows:
54 1. The state machine examines each line of input, checking each of the
55 transition patterns of the state `Body`, in order, looking for a match.
56 The implicit transitions (blank lines and indentation) are checked before
57 any others. The 'text' transition is a catch-all (matches anything).
59 2. The method associated with the matched transition pattern is called.
61 A. Some transition methods are self-contained, appending elements to the
62 document tree (`Body.doctest` parses a doctest block). The parser's
63 current line index is advanced to the end of the element, and parsing
64 continues with step 1.
66 B. Other transition methods trigger the creation of a nested state machine,
67 whose job is to parse a compound construct ('indent' does a block quote,
68 'bullet' does a bullet list, 'overline' does a section [first checking
69 for a valid section header], etc.).
71 - In the case of lists and explicit markup, a one-off state machine is
72 created and run to parse contents of the first item.
74 - A new state machine is created and its initial state is set to the
75 appropriate specialized state (`BulletList` in the case of the
76 'bullet' transition; see `SpecializedBody` for more detail). This
77 state machine is run to parse the compound element (or series of
78 explicit markup elements), and returns as soon as a non-member element
79 is encountered. For example, the `BulletList` state machine ends as
80 soon as it encounters an element which is not a list item of that
81 bullet list. The optional omission of inter-element blank lines is
82 enabled by this nested state machine.
84 - The current line index is advanced to the end of the elements parsed,
85 and parsing continues with step 1.
87 C. The result of the 'text' transition depends on the next line of text.
88 The current state is changed to `Text`, under which the second line is
89 examined. If the second line is:
91 - Indented: The element is a definition list item, and parsing proceeds
92 similarly to step 2.B, using the `DefinitionList` state.
94 - A line of uniform punctuation characters: The element is a section
95 header; again, parsing proceeds as in step 2.B, and `Body` is still
98 - Anything else: The element is a paragraph, which is examined for
99 inline markup and appended to the parent element. Processing
100 continues with step 1.
103 __docformat__
= 'reStructuredText'
108 from types
import FunctionType
, MethodType
110 from docutils
import nodes
, statemachine
, utils
111 from docutils
import ApplicationError
, DataError
112 from docutils
.statemachine
import StateMachineWS
, StateWS
113 from docutils
.nodes
import fully_normalize_name
as normalize_name
114 from docutils
.nodes
import whitespace_normalize_name
115 import docutils
.parsers
.rst
116 from docutils
.parsers
.rst
import directives
, languages
, tableparser
, roles
117 from docutils
.parsers
.rst
.languages
import en
as _fallback_language_module
118 from docutils
.utils
import escape2null
, unescape
, column_width
119 from docutils
.utils
import punctuation_chars
, roman
, urischemes
120 from docutils
.utils
import split_escaped_whitespace
122 class MarkupError(DataError
): pass
123 class UnknownInterpretedRoleError(DataError
): pass
124 class InterpretedRoleNotImplementedError(DataError
): pass
125 class ParserError(ApplicationError
): pass
126 class MarkupMismatch(Exception): pass
131 """Stores data attributes for dotted-attribute access."""
133 def __init__(self
, **keywordargs
):
134 self
.__dict
__.update(keywordargs
)
137 class RSTStateMachine(StateMachineWS
):
140 reStructuredText's master StateMachine.
142 The entry point to reStructuredText parsing is the `run()` method.
145 def run(self
, input_lines
, document
, input_offset
=0, match_titles
=True,
148 Parse `input_lines` and modify the `document` node in place.
150 Extend `StateMachineWS.run()`: set up parse-global data and
151 run the StateMachine.
153 self
.language
= languages
.get_language(
154 document
.settings
.language_code
)
155 self
.match_titles
= match_titles
158 inliner
.init_customizations(document
.settings
)
159 self
.memo
= Struct(document
=document
,
160 reporter
=document
.reporter
,
161 language
=self
.language
,
164 section_bubble_up_kludge
=False,
166 self
.document
= document
167 self
.attach_observer(document
.note_source
)
168 self
.reporter
= self
.memo
.reporter
170 results
= StateMachineWS
.run(self
, input_lines
, input_offset
,
171 input_source
=document
['source'])
172 assert results
== [], 'RSTStateMachine.run() results should be empty!'
173 self
.node
= self
.memo
= None # remove unneeded references
176 class NestedStateMachine(StateMachineWS
):
179 StateMachine run from within other StateMachine runs, to parse nested
183 def run(self
, input_lines
, input_offset
, memo
, node
, match_titles
=True):
185 Parse `input_lines` and populate a `docutils.nodes.document` instance.
187 Extend `StateMachineWS.run()`: set up document-wide data.
189 self
.match_titles
= match_titles
191 self
.document
= memo
.document
192 self
.attach_observer(self
.document
.note_source
)
193 self
.reporter
= memo
.reporter
194 self
.language
= memo
.language
196 results
= StateMachineWS
.run(self
, input_lines
, input_offset
)
197 assert results
== [], ('NestedStateMachine.run() results should be '
202 class RSTState(StateWS
):
205 reStructuredText State superclass.
207 Contains methods used by all State subclasses.
210 nested_sm
= NestedStateMachine
213 def __init__(self
, state_machine
, debug
=False):
214 self
.nested_sm_kwargs
= {'state_classes': state_classes
,
215 'initial_state': 'Body'}
216 StateWS
.__init
__(self
, state_machine
, debug
)
218 def runtime_init(self
):
219 StateWS
.runtime_init(self
)
220 memo
= self
.state_machine
.memo
222 self
.reporter
= memo
.reporter
223 self
.inliner
= memo
.inliner
224 self
.document
= memo
.document
225 self
.parent
= self
.state_machine
.node
226 # enable the reporter to determine source and source-line
227 if not hasattr(self
.reporter
, 'get_source_and_line'):
228 self
.reporter
.get_source_and_line
= self
.state_machine
.get_source_and_line
231 def goto_line(self
, abs_line_offset
):
233 Jump to input line `abs_line_offset`, ignoring jumps past the end.
236 self
.state_machine
.goto_line(abs_line_offset
)
240 def no_match(self
, context
, transitions
):
242 Override `StateWS.no_match` to generate a system message.
244 This code should never be run.
246 self
.reporter
.severe(
247 'Internal error: no transition pattern match. State: "%s"; '
248 'transitions: %s; context: %s; current line: %r.'
249 % (self
.__class
__.__name
__, transitions
, context
,
250 self
.state_machine
.line
))
251 return context
, None, []
253 def bof(self
, context
):
254 """Called at beginning of file."""
257 def nested_parse(self
, block
, input_offset
, node
, match_titles
=False,
258 state_machine_class
=None, state_machine_kwargs
=None):
260 Create a new StateMachine rooted at `node` and run it over the input
264 if state_machine_class
is None:
265 state_machine_class
= self
.nested_sm
267 if state_machine_kwargs
is None:
268 state_machine_kwargs
= self
.nested_sm_kwargs
270 block_length
= len(block
)
275 state_machine
= self
.nested_sm_cache
.pop()
278 if not state_machine
:
279 state_machine
= state_machine_class(debug
=self
.debug
,
280 **state_machine_kwargs
)
281 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
282 node
=node
, match_titles
=match_titles
)
284 self
.nested_sm_cache
.append(state_machine
)
286 state_machine
.unlink()
287 new_offset
= state_machine
.abs_line_offset()
288 # No `block.parent` implies disconnected -- lines aren't in sync:
289 if block
.parent
and (len(block
) - block_length
) != 0:
290 # Adjustment for block if modified in nested parse:
291 self
.state_machine
.next_line(len(block
) - block_length
)
294 def nested_list_parse(self
, block
, input_offset
, node
, initial_state
,
296 blank_finish_state
=None,
299 state_machine_class
=None,
300 state_machine_kwargs
=None):
302 Create a new StateMachine rooted at `node` and run it over the input
303 `block`. Also keep track of optional intermediate blank lines and the
306 if state_machine_class
is None:
307 state_machine_class
= self
.nested_sm
308 if state_machine_kwargs
is None:
309 state_machine_kwargs
= self
.nested_sm_kwargs
.copy()
310 state_machine_kwargs
['initial_state'] = initial_state
311 state_machine
= state_machine_class(debug
=self
.debug
,
312 **state_machine_kwargs
)
313 if blank_finish_state
is None:
314 blank_finish_state
= initial_state
315 state_machine
.states
[blank_finish_state
].blank_finish
= blank_finish
316 for key
, value
in extra_settings
.items():
317 setattr(state_machine
.states
[initial_state
], key
, value
)
318 state_machine
.run(block
, input_offset
, memo
=self
.memo
,
319 node
=node
, match_titles
=match_titles
)
320 blank_finish
= state_machine
.states
[blank_finish_state
].blank_finish
321 state_machine
.unlink()
322 return state_machine
.abs_line_offset(), blank_finish
324 def section(self
, title
, source
, style
, lineno
, messages
):
325 """Check for a valid subsection and create one if it checks out."""
326 if self
.check_subsection(source
, style
, lineno
):
327 self
.new_subsection(title
, lineno
, messages
)
329 def check_subsection(self
, source
, style
, lineno
):
331 Check for a valid subsection header. Return 1 (true) or None (false).
333 When a new section is reached that isn't a subsection of the current
334 section, back up the line count (use ``previous_line(-x)``), then
335 ``raise EOFError``. The current StateMachine will finish, then the
336 calling StateMachine can re-examine the title. This will work its way
337 back up the calling chain until the correct section level isreached.
339 @@@ Alternative: Evaluate the title, store the title info & level, and
340 back up the chain until that level is reached. Store in memo? Or
343 :Exception: `EOFError` when a sibling or supersection encountered.
346 title_styles
= memo
.title_styles
347 mylevel
= memo
.section_level
348 try: # check for existing title style
349 level
= title_styles
.index(style
) + 1
350 except ValueError: # new title style
351 if len(title_styles
) == memo
.section_level
: # new subsection
352 title_styles
.append(style
)
354 else: # not at lowest level
355 self
.parent
+= self
.title_inconsistent(source
, lineno
)
357 if level
<= mylevel
: # sibling or supersection
358 memo
.section_level
= level
# bubble up to parent section
360 memo
.section_bubble_up_kludge
= True
361 # back up 2 lines for underline title, 3 for overline title
362 self
.state_machine
.previous_line(len(style
) + 1)
363 raise EOFError # let parent section re-evaluate
364 if level
== mylevel
+ 1: # immediate subsection
366 else: # invalid subsection
367 self
.parent
+= self
.title_inconsistent(source
, lineno
)
370 def title_inconsistent(self
, sourcetext
, lineno
):
371 error
= self
.reporter
.severe(
372 'Title level inconsistent:', nodes
.literal_block('', sourcetext
),
376 def new_subsection(self
, title
, lineno
, messages
):
377 """Append new subsection to document tree. On return, check level."""
379 mylevel
= memo
.section_level
380 memo
.section_level
+= 1
381 section_node
= nodes
.section()
382 self
.parent
+= section_node
383 textnodes
, title_messages
= self
.inline_text(title
, lineno
)
384 titlenode
= nodes
.title(title
, '', *textnodes
)
385 name
= normalize_name(titlenode
.astext())
386 section_node
['names'].append(name
)
387 section_node
+= titlenode
388 section_node
+= messages
389 section_node
+= title_messages
390 self
.document
.note_implicit_target(section_node
, section_node
)
391 offset
= self
.state_machine
.line_offset
+ 1
392 absoffset
= self
.state_machine
.abs_line_offset() + 1
393 newabsoffset
= self
.nested_parse(
394 self
.state_machine
.input_lines
[offset
:], input_offset
=absoffset
,
395 node
=section_node
, match_titles
=True)
396 self
.goto_line(newabsoffset
)
397 if memo
.section_level
<= mylevel
: # can't handle next section?
398 raise EOFError # bubble up to supersection
399 # reset section_level; next pass will detect it properly
400 memo
.section_level
= mylevel
402 def paragraph(self
, lines
, lineno
):
404 Return a list (paragraph & messages) & a boolean: literal_block next?
406 data
= '\n'.join(lines
).rstrip()
407 if re
.search(r
'(?<!\\)(\\\\)*::$', data
):
410 elif data
[-3] in ' \n':
411 text
= data
[:-3].rstrip()
418 textnodes
, messages
= self
.inline_text(text
, lineno
)
419 p
= nodes
.paragraph(data
, '', *textnodes
)
420 p
.source
, p
.line
= self
.state_machine
.get_source_and_line(lineno
)
421 return [p
] + messages
, literalnext
423 def inline_text(self
, text
, lineno
):
425 Return 2 lists: nodes (text and inline elements), and system_messages.
427 nodes
, messages
= self
.inliner
.parse(text
, lineno
,
428 self
.memo
, self
.parent
)
429 return nodes
, messages
431 def unindent_warning(self
, node_name
):
432 # the actual problem is one line below the current line
433 lineno
= self
.state_machine
.abs_line_number()+1
434 return self
.reporter
.warning('%s ends without a blank line; '
435 'unexpected unindent.' % node_name
,
439 def build_regexp(definition
, compile=True):
441 Build, compile and return a regular expression based on `definition`.
443 :Parameter: `definition`: a 4-tuple (group name, prefix, suffix, parts),
444 where "parts" is a list of regular expressions and/or regular
445 expression definitions to be joined into an or-group.
447 name
, prefix
, suffix
, parts
= definition
450 if type(part
) is tuple:
451 part_strings
.append(build_regexp(part
, None))
453 part_strings
.append(part
)
454 or_group
= '|'.join(part_strings
)
455 regexp
= '%(prefix)s(?P<%(name)s>%(or_group)s)%(suffix)s' % locals()
457 return re
.compile(regexp
, re
.UNICODE
)
465 Parse inline markup; call the `parse()` method.
469 self
.implicit_dispatch
= []
470 """List of (pattern, bound method) tuples, used by
471 `self.implicit_inline`."""
473 def init_customizations(self
, settings
):
474 # lookahead and look-behind expressions for inline markup rules
475 if getattr(settings
, 'character_level_inline_markup', False):
476 start_string_prefix
= u
'(^|(?<!\x00))'
477 end_string_suffix
= u
''
479 start_string_prefix
= (u
'(^|(?<=\\s|[%s%s]))' %
480 (punctuation_chars
.openers
,
481 punctuation_chars
.delimiters
))
482 end_string_suffix
= (u
'($|(?=\\s|[\x00%s%s%s]))' %
483 (punctuation_chars
.closing_delimiters
,
484 punctuation_chars
.delimiters
,
485 punctuation_chars
.closers
))
486 args
= locals().copy()
487 args
.update(vars(self
.__class
__))
489 parts
= ('initial_inline', start_string_prefix
, '',
490 [('start', '', self
.non_whitespace_after
, # simple start-strings
492 r
'\*(?!\*)', # emphasis but not strong
494 r
'_`', # inline internal target
495 r
'\|(?!\|)'] # substitution reference
497 ('whole', '', end_string_suffix
, # whole constructs
498 [# reference name & end-string
499 r
'(?P<refname>%s)(?P<refend>__?)' % self
.simplename
,
500 ('footnotelabel', r
'\[', r
'(?P<fnend>\]_)',
501 [r
'[0-9]+', # manually numbered
502 r
'\#(%s)?' % self
.simplename
, # auto-numbered (w/ label?)
504 r
'(?P<citationlabel>%s)' % self
.simplename
] # citation reference
508 ('backquote', # interpreted text or phrase reference
509 '(?P<role>(:%s:)?)' % self
.simplename
, # optional role
510 self
.non_whitespace_after
,
511 ['`(?!`)'] # but not literal
515 self
.start_string_prefix
= start_string_prefix
516 self
.end_string_suffix
= end_string_suffix
519 self
.patterns
= Struct(
520 initial
=build_regexp(parts
),
521 emphasis
=re
.compile(self
.non_whitespace_escape_before
522 + r
'(\*)' + end_string_suffix
, re
.UNICODE
),
523 strong
=re
.compile(self
.non_whitespace_escape_before
524 + r
'(\*\*)' + end_string_suffix
, re
.UNICODE
),
525 interpreted_or_phrase_ref
=re
.compile(
527 %(non_unescaped_whitespace_escape_before)s
531 (?P<role>:%(simplename)s:)?
535 %(end_string_suffix)s
536 """ % args
, re
.VERBOSE | re
.UNICODE
),
537 embedded_link
=re
.compile(
540 (?:[ \n]+|^) # spaces or beginning of line/string
542 %(non_whitespace_after)s
543 (([^<>]|\x00[<>])+) # anything but unescaped angle brackets
544 %(non_whitespace_escape_before)s
548 """ % args
, re
.VERBOSE | re
.UNICODE
),
549 literal
=re
.compile(self
.non_whitespace_before
+ '(``)'
550 + end_string_suffix
, re
.UNICODE
),
551 target
=re
.compile(self
.non_whitespace_escape_before
552 + r
'(`)' + end_string_suffix
, re
.UNICODE
),
553 substitution_ref
=re
.compile(self
.non_whitespace_escape_before
555 + end_string_suffix
, re
.UNICODE
),
556 email
=re
.compile(self
.email_pattern
% args
+ '$',
557 re
.VERBOSE | re
.UNICODE
),
560 %(start_string_prefix)s
562 (?P<absolute> # absolute URI
563 (?P<scheme> # scheme (http, ftp, mailto)
564 [a-zA-Z][a-zA-Z0-9.+-]*
569 (//?)? # hierarchical URI
570 %(uric)s* # URI characters
571 %(uri_end)s # final URI char
577 ( # optional fragment
584 (?P<email> # email address
585 """ + self
.email_pattern
+ r
"""
588 %(end_string_suffix)s
589 """) % args
, re
.VERBOSE | re
.UNICODE
),
592 %(start_string_prefix)s
594 (pep-(?P<pepnum1>\d+)(.txt)?) # reference to source file
596 (PEP\s+(?P<pepnum2>\d+)) # reference by name
598 %(end_string_suffix)s""" % args
, re
.VERBOSE | re
.UNICODE
),
601 %(start_string_prefix)s
602 (RFC(-|\s+)?(?P<rfcnum>\d+))
603 %(end_string_suffix)s""" % args
, re
.VERBOSE | re
.UNICODE
))
605 self
.implicit_dispatch
.append((self
.patterns
.uri
,
606 self
.standalone_uri
))
607 if settings
.pep_references
:
608 self
.implicit_dispatch
.append((self
.patterns
.pep
,
610 if settings
.rfc_references
:
611 self
.implicit_dispatch
.append((self
.patterns
.rfc
,
614 def parse(self
, text
, lineno
, memo
, parent
):
615 # Needs to be refactored for nested inline markup.
616 # Add nested_parse() method?
618 Return 2 lists: nodes (text and inline elements), and system_messages.
620 Using `self.patterns.initial`, a pattern which matches start-strings
621 (emphasis, strong, interpreted, phrase reference, literal,
622 substitution reference, and inline target) and complete constructs
623 (simple reference, footnote reference), search for a candidate. When
624 one is found, check for validity (e.g., not a quoted '*' character).
625 If valid, search for the corresponding end string if applicable, and
626 check it for validity. If not found or invalid, generate a warning
627 and ignore the start-string. Implicit inline markup (e.g. standalone
630 self
.reporter
= memo
.reporter
631 self
.document
= memo
.document
632 self
.language
= memo
.language
634 pattern_search
= self
.patterns
.initial
.search
635 dispatch
= self
.dispatch
636 remaining
= escape2null(text
)
641 match
= pattern_search(remaining
)
643 groups
= match
.groupdict()
644 method
= dispatch
[groups
['start'] or groups
['backquote']
645 or groups
['refend'] or groups
['fnend']]
646 before
, inlines
, remaining
, sysmessages
= method(self
, match
,
648 unprocessed
.append(before
)
649 messages
+= sysmessages
651 processed
+= self
.implicit_inline(''.join(unprocessed
),
657 remaining
= ''.join(unprocessed
) + remaining
659 processed
+= self
.implicit_inline(remaining
, lineno
)
660 return processed
, messages
662 # Inline object recognition
663 # -------------------------
664 # See also init_customizations().
665 non_whitespace_before
= r
'(?<!\s)'
666 non_whitespace_escape_before
= r
'(?<![\s\x00])'
667 non_unescaped_whitespace_escape_before
= r
'(?<!(?<!\x00)[\s\x00])'
668 non_whitespace_after
= r
'(?!\s)'
669 # Alphanumerics with isolated internal [-._+:] chars (i.e. not 2 together):
670 simplename
= r
'(?:(?!_)\w)+(?:[-._+:](?:(?!_)\w)+)*'
671 # Valid URI characters (see RFC 2396 & RFC 2732);
672 # final \x00 allows backslash escapes in URIs:
673 uric
= r
"""[-_.!~*'()[\];/:@&=+$,%a-zA-Z0-9\x00]"""
674 # Delimiter indicating the end of a URI (not part of the URI):
675 uri_end_delim
= r
"""[>]"""
676 # Last URI character; same as uric but no punctuation:
677 urilast
= r
"""[_~*/=+a-zA-Z0-9]"""
678 # End of a URI (either 'urilast' or 'uric followed by a
680 uri_end
= r
"""(?:%(urilast)s|%(uric)s(?=%(uri_end_delim)s))""" % locals()
681 emailc
= r
"""[-_!~*'{|}/#?^`&=+$%a-zA-Z0-9\x00]"""
683 %(emailc)s+(?:\.%(emailc)s+)* # name
685 %(emailc)s+(?:\.%(emailc)s*)* # host
686 %(uri_end)s # final URI char
689 def quoted_start(self
, match
):
690 """Test if inline markup start-string is 'quoted'.
692 'Quoted' in this context means the start-string is enclosed in a pair
693 of matching opening/closing delimiters (not necessarily quotes)
694 or at the end of the match.
696 string
= match
.string
697 start
= match
.start()
698 if start
== 0: # start-string at beginning of text
700 prestart
= string
[start
- 1]
702 poststart
= string
[match
.end()]
703 except IndexError: # start-string at end of text
704 return True # not "quoted" but no markup start-string either
705 return punctuation_chars
.match_chars(prestart
, poststart
)
707 def inline_obj(self
, match
, lineno
, end_pattern
, nodeclass
,
708 restore_backslashes
=False):
709 string
= match
.string
710 matchstart
= match
.start('start')
711 matchend
= match
.end('start')
712 if self
.quoted_start(match
):
713 return (string
[:matchend
], [], string
[matchend
:], [], '')
714 endmatch
= end_pattern
.search(string
[matchend
:])
715 if endmatch
and endmatch
.start(1): # 1 or more chars
716 _text
= endmatch
.string
[:endmatch
.start(1)]
717 text
= unescape(_text
, restore_backslashes
)
718 textend
= matchend
+ endmatch
.end(1)
719 rawsource
= unescape(string
[matchstart
:textend
], True)
720 node
= nodeclass(rawsource
, text
)
721 node
[0].rawsource
= unescape(_text
, True)
722 return (string
[:matchstart
], [node
],
723 string
[textend
:], [], endmatch
.group(1))
724 msg
= self
.reporter
.warning(
725 'Inline %s start-string without end-string.'
726 % nodeclass
.__name
__, line
=lineno
)
727 text
= unescape(string
[matchstart
:matchend
], True)
728 rawsource
= unescape(string
[matchstart
:matchend
], True)
729 prb
= self
.problematic(text
, rawsource
, msg
)
730 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
], ''
732 def problematic(self
, text
, rawsource
, message
):
733 msgid
= self
.document
.set_id(message
, self
.parent
)
734 problematic
= nodes
.problematic(rawsource
, text
, refid
=msgid
)
735 prbid
= self
.document
.set_id(problematic
)
736 message
.add_backref(prbid
)
739 def emphasis(self
, match
, lineno
):
740 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
741 match
, lineno
, self
.patterns
.emphasis
, nodes
.emphasis
)
742 return before
, inlines
, remaining
, sysmessages
744 def strong(self
, match
, lineno
):
745 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
746 match
, lineno
, self
.patterns
.strong
, nodes
.strong
)
747 return before
, inlines
, remaining
, sysmessages
749 def interpreted_or_phrase_ref(self
, match
, lineno
):
750 end_pattern
= self
.patterns
.interpreted_or_phrase_ref
751 string
= match
.string
752 matchstart
= match
.start('backquote')
753 matchend
= match
.end('backquote')
754 rolestart
= match
.start('role')
755 role
= match
.group('role')
760 elif self
.quoted_start(match
):
761 return (string
[:matchend
], [], string
[matchend
:], [])
762 endmatch
= end_pattern
.search(string
[matchend
:])
763 if endmatch
and endmatch
.start(1): # 1 or more chars
764 textend
= matchend
+ endmatch
.end()
765 if endmatch
.group('role'):
767 msg
= self
.reporter
.warning(
768 'Multiple roles in interpreted text (both '
769 'prefix and suffix present; only one allowed).',
771 text
= unescape(string
[rolestart
:textend
], True)
772 prb
= self
.problematic(text
, text
, msg
)
773 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
774 role
= endmatch
.group('suffix')[1:-1]
776 escaped
= endmatch
.string
[:endmatch
.start(1)]
777 rawsource
= unescape(string
[matchstart
:textend
], True)
778 if rawsource
[-1:] == '_':
780 msg
= self
.reporter
.warning(
781 'Mismatch: both interpreted text role %s and '
782 'reference suffix.' % position
, line
=lineno
)
783 text
= unescape(string
[rolestart
:textend
], True)
784 prb
= self
.problematic(text
, text
, msg
)
785 return string
[:rolestart
], [prb
], string
[textend
:], [msg
]
786 return self
.phrase_ref(string
[:matchstart
], string
[textend
:],
787 rawsource
, escaped
, unescape(escaped
))
789 rawsource
= unescape(string
[rolestart
:textend
], True)
790 nodelist
, messages
= self
.interpreted(rawsource
, escaped
, role
,
792 return (string
[:rolestart
], nodelist
,
793 string
[textend
:], messages
)
794 msg
= self
.reporter
.warning(
795 'Inline interpreted text or phrase reference start-string '
796 'without end-string.', line
=lineno
)
797 text
= unescape(string
[matchstart
:matchend
], True)
798 prb
= self
.problematic(text
, text
, msg
)
799 return string
[:matchstart
], [prb
], string
[matchend
:], [msg
]
801 def phrase_ref(self
, before
, after
, rawsource
, escaped
, text
):
802 match
= self
.patterns
.embedded_link
.search(escaped
)
803 if match
: # embedded <URI> or <alias_>
804 text
= unescape(escaped
[:match
.start(0)])
805 rawtext
= unescape(escaped
[:match
.start(0)], True)
806 aliastext
= unescape(match
.group(2))
807 rawaliastext
= unescape(match
.group(2), True)
808 underscore_escaped
= rawaliastext
.endswith(r
'\_')
809 if aliastext
.endswith('_') and not (underscore_escaped
810 or self
.patterns
.uri
.match(aliastext
)):
812 alias
= normalize_name(aliastext
[:-1])
813 target
= nodes
.target(match
.group(1), refname
=alias
)
814 target
.indirect_reference_name
= aliastext
[:-1]
817 alias_parts
= split_escaped_whitespace(match
.group(2))
818 alias
= ' '.join(''.join(unescape(part
).split())
819 for part
in alias_parts
)
820 alias
= self
.adjust_uri(alias
)
821 if alias
.endswith(r
'\_'):
822 alias
= alias
[:-2] + '_'
823 target
= nodes
.target(match
.group(1), refuri
=alias
)
824 target
.referenced
= 1
826 raise ApplicationError('problem with embedded link: %r'
830 rawtext
= rawaliastext
833 rawtext
= unescape(escaped
, True)
835 refname
= normalize_name(text
)
836 reference
= nodes
.reference(rawsource
, text
,
837 name
=whitespace_normalize_name(text
))
838 reference
[0].rawsource
= rawtext
840 node_list
= [reference
]
842 if rawsource
[-2:] == '__':
843 if target
and (aliastype
== 'name'):
844 reference
['refname'] = alias
845 self
.document
.note_refname(reference
)
846 # self.document.note_indirect_target(target) # required?
847 elif target
and (aliastype
== 'uri'):
848 reference
['refuri'] = alias
850 reference
['anonymous'] = 1
853 target
['names'].append(refname
)
854 if aliastype
== 'name':
855 reference
['refname'] = alias
856 self
.document
.note_indirect_target(target
)
857 self
.document
.note_refname(reference
)
859 reference
['refuri'] = alias
860 self
.document
.note_explicit_target(target
, self
.parent
)
861 # target.note_referenced_by(name=refname)
862 node_list
.append(target
)
864 reference
['refname'] = refname
865 self
.document
.note_refname(reference
)
866 return before
, node_list
, after
, []
869 def adjust_uri(self
, uri
):
870 match
= self
.patterns
.email
.match(uri
)
872 return 'mailto:' + uri
876 def interpreted(self
, rawsource
, text
, role
, lineno
):
877 role_fn
, messages
= roles
.role(role
, self
.language
, lineno
,
880 nodes
, messages2
= role_fn(role
, rawsource
, text
, lineno
, self
)
882 nodes
[0][0].rawsource
= unescape(text
, True)
885 return nodes
, messages
+ messages2
887 msg
= self
.reporter
.error(
888 'Unknown interpreted text role "%s".' % role
,
890 return ([self
.problematic(rawsource
, rawsource
, msg
)],
893 def literal(self
, match
, lineno
):
894 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
895 match
, lineno
, self
.patterns
.literal
, nodes
.literal
,
896 restore_backslashes
=True)
897 return before
, inlines
, remaining
, sysmessages
899 def inline_internal_target(self
, match
, lineno
):
900 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
901 match
, lineno
, self
.patterns
.target
, nodes
.target
)
902 if inlines
and isinstance(inlines
[0], nodes
.target
):
903 assert len(inlines
) == 1
905 name
= normalize_name(target
.astext())
906 target
['names'].append(name
)
907 self
.document
.note_explicit_target(target
, self
.parent
)
908 return before
, inlines
, remaining
, sysmessages
910 def substitution_reference(self
, match
, lineno
):
911 before
, inlines
, remaining
, sysmessages
, endstring
= self
.inline_obj(
912 match
, lineno
, self
.patterns
.substitution_ref
,
913 nodes
.substitution_reference
)
914 if len(inlines
) == 1:
915 subref_node
= inlines
[0]
916 if isinstance(subref_node
, nodes
.substitution_reference
):
917 subref_text
= subref_node
.astext()
918 self
.document
.note_substitution_ref(subref_node
, subref_text
)
919 if endstring
[-1:] == '_':
920 reference_node
= nodes
.reference(
921 '|%s%s' % (subref_text
, endstring
), '')
922 if endstring
[-2:] == '__':
923 reference_node
['anonymous'] = 1
925 reference_node
['refname'] = normalize_name(subref_text
)
926 self
.document
.note_refname(reference_node
)
927 reference_node
+= subref_node
928 inlines
= [reference_node
]
929 return before
, inlines
, remaining
, sysmessages
931 def footnote_reference(self
, match
, lineno
):
933 Handles `nodes.footnote_reference` and `nodes.citation_reference`
936 label
= match
.group('footnotelabel')
937 refname
= normalize_name(label
)
938 string
= match
.string
939 before
= string
[:match
.start('whole')]
940 remaining
= string
[match
.end('whole'):]
941 if match
.group('citationlabel'):
942 refnode
= nodes
.citation_reference('[%s]_' % label
,
944 refnode
+= nodes
.Text(label
)
945 self
.document
.note_citation_ref(refnode
)
947 refnode
= nodes
.footnote_reference('[%s]_' % label
)
948 if refname
[0] == '#':
949 refname
= refname
[1:]
951 self
.document
.note_autofootnote_ref(refnode
)
954 refnode
['auto'] = '*'
955 self
.document
.note_symbol_footnote_ref(
958 refnode
+= nodes
.Text(label
)
960 refnode
['refname'] = refname
961 self
.document
.note_footnote_ref(refnode
)
962 if utils
.get_trim_footnote_ref_space(self
.document
.settings
):
963 before
= before
.rstrip()
964 return (before
, [refnode
], remaining
, [])
966 def reference(self
, match
, lineno
, anonymous
=False):
967 referencename
= match
.group('refname')
968 refname
= normalize_name(referencename
)
969 referencenode
= nodes
.reference(
970 referencename
+ match
.group('refend'), referencename
,
971 name
=whitespace_normalize_name(referencename
))
972 referencenode
[0].rawsource
= referencename
974 referencenode
['anonymous'] = 1
976 referencenode
['refname'] = refname
977 self
.document
.note_refname(referencenode
)
978 string
= match
.string
979 matchstart
= match
.start('whole')
980 matchend
= match
.end('whole')
981 return (string
[:matchstart
], [referencenode
], string
[matchend
:], [])
983 def anonymous_reference(self
, match
, lineno
):
984 return self
.reference(match
, lineno
, anonymous
=1)
986 def standalone_uri(self
, match
, lineno
):
987 if (not match
.group('scheme')
988 or match
.group('scheme').lower() in urischemes
.schemes
):
989 if match
.group('email'):
990 addscheme
= 'mailto:'
993 text
= match
.group('whole')
994 unescaped
= unescape(text
)
995 rawsource
= unescape(text
, True)
996 reference
= nodes
.reference(rawsource
, unescaped
,
997 refuri
=addscheme
+ unescaped
)
998 reference
[0].rawsource
= rawsource
1000 else: # not a valid scheme
1001 raise MarkupMismatch
1003 def pep_reference(self
, match
, lineno
):
1004 text
= match
.group(0)
1005 if text
.startswith('pep-'):
1006 pepnum
= int(match
.group('pepnum1'))
1007 elif text
.startswith('PEP'):
1008 pepnum
= int(match
.group('pepnum2'))
1010 raise MarkupMismatch
1011 ref
= (self
.document
.settings
.pep_base_url
1012 + self
.document
.settings
.pep_file_url_template
% pepnum
)
1013 unescaped
= unescape(text
)
1014 return [nodes
.reference(unescape(text
, True), unescaped
, refuri
=ref
)]
1016 rfc_url
= 'rfc%d.html'
1018 def rfc_reference(self
, match
, lineno
):
1019 text
= match
.group(0)
1020 if text
.startswith('RFC'):
1021 rfcnum
= int(match
.group('rfcnum'))
1022 ref
= self
.document
.settings
.rfc_base_url
+ self
.rfc_url
% rfcnum
1024 raise MarkupMismatch
1025 unescaped
= unescape(text
)
1026 return [nodes
.reference(unescape(text
, True), unescaped
, refuri
=ref
)]
1028 def implicit_inline(self
, text
, lineno
):
1030 Check each of the patterns in `self.implicit_dispatch` for a match,
1031 and dispatch to the stored method for the pattern. Recursively check
1032 the text before and after the match. Return a list of `nodes.Text`
1033 and inline element nodes.
1037 for pattern
, method
in self
.implicit_dispatch
:
1038 match
= pattern
.search(text
)
1041 # Must recurse on strings before *and* after the match;
1042 # there may be multiple patterns.
1043 return (self
.implicit_inline(text
[:match
.start()], lineno
)
1044 + method(match
, lineno
) +
1045 self
.implicit_inline(text
[match
.end():], lineno
))
1046 except MarkupMismatch
:
1048 return [nodes
.Text(unescape(text
), rawsource
=unescape(text
, True))]
1050 dispatch
= {'*': emphasis
,
1052 '`': interpreted_or_phrase_ref
,
1054 '_`': inline_internal_target
,
1055 ']_': footnote_reference
,
1056 '|': substitution_reference
,
1058 '__': anonymous_reference
}
1061 def _loweralpha_to_int(s
, _zero
=(ord('a')-1)):
1062 return ord(s
) - _zero
1064 def _upperalpha_to_int(s
, _zero
=(ord('A')-1)):
1065 return ord(s
) - _zero
1067 def _lowerroman_to_int(s
):
1068 return roman
.fromRoman(s
.upper())
1071 class Body(RSTState
):
1074 Generic classifier of the first line of a block.
1077 double_width_pad_char
= tableparser
.TableParser
.double_width_pad_char
1078 """Padding character for East Asian double-width text."""
1081 """Enumerated list parsing information."""
1084 'parens': Struct(prefix
='(', suffix
=')', start
=1, end
=-1),
1085 'rparen': Struct(prefix
='', suffix
=')', start
=0, end
=-1),
1086 'period': Struct(prefix
='', suffix
='.', start
=0, end
=-1)}
1087 enum
.formats
= enum
.formatinfo
.keys()
1088 enum
.sequences
= ['arabic', 'loweralpha', 'upperalpha',
1089 'lowerroman', 'upperroman'] # ORDERED!
1090 enum
.sequencepats
= {'arabic': '[0-9]+',
1091 'loweralpha': '[a-z]',
1092 'upperalpha': '[A-Z]',
1093 'lowerroman': '[ivxlcdm]+',
1094 'upperroman': '[IVXLCDM]+',}
1095 enum
.converters
= {'arabic': int,
1096 'loweralpha': _loweralpha_to_int
,
1097 'upperalpha': _upperalpha_to_int
,
1098 'lowerroman': _lowerroman_to_int
,
1099 'upperroman': roman
.fromRoman
}
1101 enum
.sequenceregexps
= {}
1102 for sequence
in enum
.sequences
:
1103 enum
.sequenceregexps
[sequence
] = re
.compile(
1104 enum
.sequencepats
[sequence
] + '$', re
.UNICODE
)
1106 grid_table_top_pat
= re
.compile(r
'\+-[-+]+-\+ *$')
1107 """Matches the top (& bottom) of a full table)."""
1109 simple_table_top_pat
= re
.compile('=+( +=+)+ *$')
1110 """Matches the top of a simple table."""
1112 simple_table_border_pat
= re
.compile('=+[ =]*$')
1113 """Matches the bottom & header bottom of a simple table."""
1116 """Fragments of patterns used by transitions."""
1118 pats
['nonalphanum7bit'] = '[!-/:-@[-`{-~]'
1119 pats
['alpha'] = '[a-zA-Z]'
1120 pats
['alphanum'] = '[a-zA-Z0-9]'
1121 pats
['alphanumplus'] = '[a-zA-Z0-9_-]'
1122 pats
['enum'] = ('(%(arabic)s|%(loweralpha)s|%(upperalpha)s|%(lowerroman)s'
1123 '|%(upperroman)s|#)' % enum
.sequencepats
)
1124 pats
['optname'] = '%(alphanum)s%(alphanumplus)s*' % pats
1125 # @@@ Loosen up the pattern? Allow Unicode?
1126 pats
['optarg'] = '(%(alpha)s%(alphanumplus)s*|<[^<>]+>)' % pats
1127 pats
['shortopt'] = r
'(-|\+)%(alphanum)s( ?%(optarg)s)?' % pats
1128 pats
['longopt'] = r
'(--|/)%(optname)s([ =]%(optarg)s)?' % pats
1129 pats
['option'] = r
'(%(shortopt)s|%(longopt)s)' % pats
1131 for format
in enum
.formats
:
1132 pats
[format
] = '(?P<%s>%s%s%s)' % (
1133 format
, re
.escape(enum
.formatinfo
[format
].prefix
),
1134 pats
['enum'], re
.escape(enum
.formatinfo
[format
].suffix
))
1137 'bullet': u
'[-+*\u2022\u2023\u2043]( +|$)',
1138 'enumerator': r
'(%(parens)s|%(rparen)s|%(period)s)( +|$)' % pats
,
1139 'field_marker': r
':(?![: ])([^:\\]|\\.|:(?!([ `]|$)))*(?<! ):( +|$)',
1140 'option_marker': r
'%(option)s(, %(option)s)*( +| ?$)' % pats
,
1141 'doctest': r
'>>>( +|$)',
1142 'line_block': r
'\|( +|$)',
1143 'grid_table_top': grid_table_top_pat
,
1144 'simple_table_top': simple_table_top_pat
,
1145 'explicit_markup': r
'\.\.( +|$)',
1146 'anonymous': r
'__( +|$)',
1147 'line': r
'(%(nonalphanum7bit)s)\1* *$' % pats
,
1149 initial_transitions
= (
1163 def indent(self
, match
, context
, next_state
):
1165 indented
, indent
, line_offset
, blank_finish
= \
1166 self
.state_machine
.get_indented()
1167 elements
= self
.block_quote(indented
, line_offset
)
1168 self
.parent
+= elements
1169 if not blank_finish
:
1170 self
.parent
+= self
.unindent_warning('Block quote')
1171 return context
, next_state
, []
1173 def block_quote(self
, indented
, line_offset
):
1180 new_line_offset
) = self
.split_attribution(indented
, line_offset
)
1181 blockquote
= nodes
.block_quote()
1182 self
.nested_parse(blockquote_lines
, line_offset
, blockquote
)
1183 elements
.append(blockquote
)
1184 if attribution_lines
:
1185 attribution
, messages
= self
.parse_attribution(
1186 attribution_lines
, attribution_offset
)
1187 blockquote
+= attribution
1188 elements
+= messages
1189 line_offset
= new_line_offset
1190 while indented
and not indented
[0]:
1191 indented
= indented
[1:]
1195 # U+2014 is an em-dash:
1196 attribution_pattern
= re
.compile(u
'(---?(?!-)|\u2014) *(?=[^ \\n])',
1199 def split_attribution(self
, indented
, line_offset
):
1201 Check for a block quote attribution and split it off:
1203 * First line after a blank line must begin with a dash ("--", "---",
1204 em-dash; matches `self.attribution_pattern`).
1205 * Every line after that must have consistent indentation.
1206 * Attributions must be preceded by block quote content.
1208 Return a tuple of: (block quote content lines, content offset,
1209 attribution lines, attribution offset, remaining indented lines).
1212 nonblank_seen
= False
1213 for i
in range(len(indented
)):
1214 line
= indented
[i
].rstrip()
1216 if nonblank_seen
and blank
== i
- 1: # last line blank
1217 match
= self
.attribution_pattern
.match(line
)
1219 attribution_end
, indent
= self
.check_attribution(
1222 a_lines
= indented
[i
:attribution_end
]
1223 a_lines
.trim_left(match
.end(), end
=1)
1224 a_lines
.trim_left(indent
, start
=1)
1225 return (indented
[:i
], a_lines
,
1226 i
, indented
[attribution_end
:],
1227 line_offset
+ attribution_end
)
1228 nonblank_seen
= True
1232 return (indented
, None, None, None, None)
1234 def check_attribution(self
, indented
, attribution_start
):
1236 Check attribution shape.
1237 Return the index past the end of the attribution, and the indent.
1240 i
= attribution_start
+ 1
1241 for i
in range(attribution_start
+ 1, len(indented
)):
1242 line
= indented
[i
].rstrip()
1246 indent
= len(line
) - len(line
.lstrip())
1247 elif len(line
) - len(line
.lstrip()) != indent
:
1248 return None, None # bad shape; not an attribution
1250 # return index of line after last attribution line:
1252 return i
, (indent
or 0)
1254 def parse_attribution(self
, indented
, line_offset
):
1255 text
= '\n'.join(indented
).rstrip()
1256 lineno
= self
.state_machine
.abs_line_number() + line_offset
1257 textnodes
, messages
= self
.inline_text(text
, lineno
)
1258 node
= nodes
.attribution(text
, '', *textnodes
)
1259 node
.source
, node
.line
= self
.state_machine
.get_source_and_line(lineno
)
1260 return node
, messages
1262 def bullet(self
, match
, context
, next_state
):
1263 """Bullet list item."""
1264 bulletlist
= nodes
.bullet_list()
1266 bulletlist
.line
) = self
.state_machine
.get_source_and_line()
1267 self
.parent
+= bulletlist
1268 bulletlist
['bullet'] = match
.string
[0]
1269 i
, blank_finish
= self
.list_item(match
.end())
1271 offset
= self
.state_machine
.line_offset
+ 1 # next line
1272 new_line_offset
, blank_finish
= self
.nested_list_parse(
1273 self
.state_machine
.input_lines
[offset
:],
1274 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1275 node
=bulletlist
, initial_state
='BulletList',
1276 blank_finish
=blank_finish
)
1277 self
.goto_line(new_line_offset
)
1278 if not blank_finish
:
1279 self
.parent
+= self
.unindent_warning('Bullet list')
1280 return [], next_state
, []
1282 def list_item(self
, indent
):
1283 if self
.state_machine
.line
[indent
:]:
1284 indented
, line_offset
, blank_finish
= (
1285 self
.state_machine
.get_known_indented(indent
))
1287 indented
, indent
, line_offset
, blank_finish
= (
1288 self
.state_machine
.get_first_known_indented(indent
))
1289 listitem
= nodes
.list_item('\n'.join(indented
))
1291 self
.nested_parse(indented
, input_offset
=line_offset
,
1293 return listitem
, blank_finish
1295 def enumerator(self
, match
, context
, next_state
):
1296 """Enumerated List Item"""
1297 format
, sequence
, text
, ordinal
= self
.parse_enumerator(match
)
1298 if not self
.is_enumerated_list_item(ordinal
, sequence
, format
):
1299 raise statemachine
.TransitionCorrection('text')
1300 enumlist
= nodes
.enumerated_list()
1301 self
.parent
+= enumlist
1303 enumlist
['enumtype'] = 'arabic'
1305 enumlist
['enumtype'] = sequence
1306 enumlist
['prefix'] = self
.enum
.formatinfo
[format
].prefix
1307 enumlist
['suffix'] = self
.enum
.formatinfo
[format
].suffix
1309 enumlist
['start'] = ordinal
1310 msg
= self
.reporter
.info(
1311 'Enumerated list start value not ordinal-1: "%s" (ordinal %s)'
1314 listitem
, blank_finish
= self
.list_item(match
.end())
1315 enumlist
+= listitem
1316 offset
= self
.state_machine
.line_offset
+ 1 # next line
1317 newline_offset
, blank_finish
= self
.nested_list_parse(
1318 self
.state_machine
.input_lines
[offset
:],
1319 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1320 node
=enumlist
, initial_state
='EnumeratedList',
1321 blank_finish
=blank_finish
,
1322 extra_settings
={'lastordinal': ordinal
,
1324 'auto': sequence
== '#'})
1325 self
.goto_line(newline_offset
)
1326 if not blank_finish
:
1327 self
.parent
+= self
.unindent_warning('Enumerated list')
1328 return [], next_state
, []
1330 def parse_enumerator(self
, match
, expected_sequence
=None):
1332 Analyze an enumerator and return the results.
1335 - the enumerator format ('period', 'parens', or 'rparen'),
1336 - the sequence used ('arabic', 'loweralpha', 'upperroman', etc.),
1337 - the text of the enumerator, stripped of formatting, and
1338 - the ordinal value of the enumerator ('a' -> 1, 'ii' -> 2, etc.;
1339 ``None`` is returned for invalid enumerator text).
1341 The enumerator format has already been determined by the regular
1342 expression match. If `expected_sequence` is given, that sequence is
1343 tried first. If not, we check for Roman numeral 1. This way,
1344 single-character Roman numerals (which are also alphabetical) can be
1345 matched. If no sequence has been matched, all sequences are checked in
1348 groupdict
= match
.groupdict()
1350 for format
in self
.enum
.formats
:
1351 if groupdict
[format
]: # was this the format matched?
1352 break # yes; keep `format`
1353 else: # shouldn't happen
1354 raise ParserError('enumerator format not matched')
1355 text
= groupdict
[format
][self
.enum
.formatinfo
[format
].start
1356 :self
.enum
.formatinfo
[format
].end
]
1359 elif expected_sequence
:
1361 if self
.enum
.sequenceregexps
[expected_sequence
].match(text
):
1362 sequence
= expected_sequence
1363 except KeyError: # shouldn't happen
1364 raise ParserError('unknown enumerator sequence: %s'
1367 sequence
= 'lowerroman'
1369 sequence
= 'upperroman'
1371 for sequence
in self
.enum
.sequences
:
1372 if self
.enum
.sequenceregexps
[sequence
].match(text
):
1374 else: # shouldn't happen
1375 raise ParserError('enumerator sequence not matched')
1380 ordinal
= self
.enum
.converters
[sequence
](text
)
1381 except roman
.InvalidRomanNumeralError
:
1383 return format
, sequence
, text
, ordinal
1385 def is_enumerated_list_item(self
, ordinal
, sequence
, format
):
1387 Check validity based on the ordinal value and the second line.
1389 Return true if the ordinal is valid and the second line is blank,
1390 indented, or starts with the next enumerator or an auto-enumerator.
1395 next_line
= self
.state_machine
.next_line()
1396 except EOFError: # end of input lines
1397 self
.state_machine
.previous_line()
1400 self
.state_machine
.previous_line()
1401 if not next_line
[:1].strip(): # blank or indented
1403 result
= self
.make_enumerator(ordinal
+ 1, sequence
, format
)
1405 next_enumerator
, auto_enumerator
= result
1407 if ( next_line
.startswith(next_enumerator
) or
1408 next_line
.startswith(auto_enumerator
) ):
1414 def make_enumerator(self
, ordinal
, sequence
, format
):
1416 Construct and return the next enumerated list item marker, and an
1417 auto-enumerator ("#" instead of the regular enumerator).
1419 Return ``None`` for invalid (out of range) ordinals.
1423 elif sequence
== 'arabic':
1424 enumerator
= str(ordinal
)
1426 if sequence
.endswith('alpha'):
1429 enumerator
= chr(ordinal
+ ord('a') - 1)
1430 elif sequence
.endswith('roman'):
1432 enumerator
= roman
.toRoman(ordinal
)
1433 except roman
.RomanError
:
1435 else: # shouldn't happen
1436 raise ParserError('unknown enumerator sequence: "%s"'
1438 if sequence
.startswith('lower'):
1439 enumerator
= enumerator
.lower()
1440 elif sequence
.startswith('upper'):
1441 enumerator
= enumerator
.upper()
1442 else: # shouldn't happen
1443 raise ParserError('unknown enumerator sequence: "%s"'
1445 formatinfo
= self
.enum
.formatinfo
[format
]
1446 next_enumerator
= (formatinfo
.prefix
+ enumerator
+ formatinfo
.suffix
1448 auto_enumerator
= formatinfo
.prefix
+ '#' + formatinfo
.suffix
+ ' '
1449 return next_enumerator
, auto_enumerator
1451 def field_marker(self
, match
, context
, next_state
):
1452 """Field list item."""
1453 field_list
= nodes
.field_list()
1454 self
.parent
+= field_list
1455 field
, blank_finish
= self
.field(match
)
1457 offset
= self
.state_machine
.line_offset
+ 1 # next line
1458 newline_offset
, blank_finish
= self
.nested_list_parse(
1459 self
.state_machine
.input_lines
[offset
:],
1460 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1461 node
=field_list
, initial_state
='FieldList',
1462 blank_finish
=blank_finish
)
1463 self
.goto_line(newline_offset
)
1464 if not blank_finish
:
1465 self
.parent
+= self
.unindent_warning('Field list')
1466 return [], next_state
, []
1468 def field(self
, match
):
1469 name
= self
.parse_field_marker(match
)
1470 src
, srcline
= self
.state_machine
.get_source_and_line()
1471 lineno
= self
.state_machine
.abs_line_number()
1472 indented
, indent
, line_offset
, blank_finish
= \
1473 self
.state_machine
.get_first_known_indented(match
.end())
1474 field_node
= nodes
.field()
1475 field_node
.source
= src
1476 field_node
.line
= srcline
1477 name_nodes
, name_messages
= self
.inline_text(name
, lineno
)
1478 field_node
+= nodes
.field_name(name
, '', *name_nodes
)
1479 field_body
= nodes
.field_body('\n'.join(indented
), *name_messages
)
1480 field_node
+= field_body
1482 self
.parse_field_body(indented
, line_offset
, field_body
)
1483 return field_node
, blank_finish
1485 def parse_field_marker(self
, match
):
1486 """Extract & return field name from a field marker match."""
1487 field
= match
.group()[1:] # strip off leading ':'
1488 field
= field
[:field
.rfind(':')] # strip off trailing ':' etc.
1491 def parse_field_body(self
, indented
, offset
, node
):
1492 self
.nested_parse(indented
, input_offset
=offset
, node
=node
)
1494 def option_marker(self
, match
, context
, next_state
):
1495 """Option list item."""
1496 optionlist
= nodes
.option_list()
1497 (optionlist
.source
, optionlist
.line
) = self
.state_machine
.get_source_and_line()
1499 listitem
, blank_finish
= self
.option_list_item(match
)
1500 except MarkupError
, error
:
1501 # This shouldn't happen; pattern won't match.
1502 msg
= self
.reporter
.error(u
'Invalid option list marker: %s' %
1505 indented
, indent
, line_offset
, blank_finish
= \
1506 self
.state_machine
.get_first_known_indented(match
.end())
1507 elements
= self
.block_quote(indented
, line_offset
)
1508 self
.parent
+= elements
1509 if not blank_finish
:
1510 self
.parent
+= self
.unindent_warning('Option list')
1511 return [], next_state
, []
1512 self
.parent
+= optionlist
1513 optionlist
+= listitem
1514 offset
= self
.state_machine
.line_offset
+ 1 # next line
1515 newline_offset
, blank_finish
= self
.nested_list_parse(
1516 self
.state_machine
.input_lines
[offset
:],
1517 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1518 node
=optionlist
, initial_state
='OptionList',
1519 blank_finish
=blank_finish
)
1520 self
.goto_line(newline_offset
)
1521 if not blank_finish
:
1522 self
.parent
+= self
.unindent_warning('Option list')
1523 return [], next_state
, []
1525 def option_list_item(self
, match
):
1526 offset
= self
.state_machine
.abs_line_offset()
1527 options
= self
.parse_option_marker(match
)
1528 indented
, indent
, line_offset
, blank_finish
= \
1529 self
.state_machine
.get_first_known_indented(match
.end())
1530 if not indented
: # not an option list item
1531 self
.goto_line(offset
)
1532 raise statemachine
.TransitionCorrection('text')
1533 option_group
= nodes
.option_group('', *options
)
1534 description
= nodes
.description('\n'.join(indented
))
1535 option_list_item
= nodes
.option_list_item('', option_group
,
1538 self
.nested_parse(indented
, input_offset
=line_offset
,
1540 return option_list_item
, blank_finish
1542 def parse_option_marker(self
, match
):
1544 Return a list of `node.option` and `node.option_argument` objects,
1545 parsed from an option marker match.
1547 :Exception: `MarkupError` for invalid option markers.
1550 optionstrings
= match
.group().rstrip().split(', ')
1551 for optionstring
in optionstrings
:
1552 tokens
= optionstring
.split()
1554 firstopt
= tokens
[0].split('=', 1)
1555 if len(firstopt
) > 1:
1556 # "--opt=value" form
1557 tokens
[:1] = firstopt
1559 elif (len(tokens
[0]) > 2
1560 and ((tokens
[0].startswith('-')
1561 and not tokens
[0].startswith('--'))
1562 or tokens
[0].startswith('+'))):
1564 tokens
[:1] = [tokens
[0][:2], tokens
[0][2:]]
1566 if len(tokens
) > 1 and (tokens
[1].startswith('<')
1567 and tokens
[-1].endswith('>')):
1568 # "-o <value1 value2>" form; join all values into one token
1569 tokens
[1:] = [' '.join(tokens
[1:])]
1570 if 0 < len(tokens
) <= 2:
1571 option
= nodes
.option(optionstring
)
1572 option
+= nodes
.option_string(tokens
[0], tokens
[0])
1574 option
+= nodes
.option_argument(tokens
[1], tokens
[1],
1575 delimiter
=delimiter
)
1576 optlist
.append(option
)
1579 'wrong number of option tokens (=%s), should be 1 or 2: '
1580 '"%s"' % (len(tokens
), optionstring
))
1583 def doctest(self
, match
, context
, next_state
):
1584 data
= '\n'.join(self
.state_machine
.get_text_block())
1585 # TODO: prepend class value ['pycon'] (Python Console)
1586 # parse with `directives.body.CodeBlock` (returns literal-block
1587 # with class "code" and syntax highlight markup).
1588 self
.parent
+= nodes
.doctest_block(data
, data
)
1589 return [], next_state
, []
1591 def line_block(self
, match
, context
, next_state
):
1592 """First line of a line block."""
1593 block
= nodes
.line_block()
1594 self
.parent
+= block
1595 lineno
= self
.state_machine
.abs_line_number()
1596 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
1598 self
.parent
+= messages
1599 if not blank_finish
:
1600 offset
= self
.state_machine
.line_offset
+ 1 # next line
1601 new_line_offset
, blank_finish
= self
.nested_list_parse(
1602 self
.state_machine
.input_lines
[offset
:],
1603 input_offset
=self
.state_machine
.abs_line_offset() + 1,
1604 node
=block
, initial_state
='LineBlock',
1606 self
.goto_line(new_line_offset
)
1607 if not blank_finish
:
1608 self
.parent
+= self
.reporter
.warning(
1609 'Line block ends without a blank line.',
1612 if block
[0].indent
is None:
1614 self
.nest_line_block_lines(block
)
1615 return [], next_state
, []
1617 def line_block_line(self
, match
, lineno
):
1618 """Return one line element of a line_block."""
1619 indented
, indent
, line_offset
, blank_finish
= \
1620 self
.state_machine
.get_first_known_indented(match
.end(),
1622 text
= u
'\n'.join(indented
)
1623 text_nodes
, messages
= self
.inline_text(text
, lineno
)
1624 line
= nodes
.line(text
, '', *text_nodes
)
1625 if match
.string
.rstrip() != '|': # not empty
1626 line
.indent
= len(match
.group(1)) - 1
1627 return line
, messages
, blank_finish
1629 def nest_line_block_lines(self
, block
):
1630 for index
in range(1, len(block
)):
1631 if getattr(block
[index
], 'indent', None) is None:
1632 block
[index
].indent
= block
[index
- 1].indent
1633 self
.nest_line_block_segment(block
)
1635 def nest_line_block_segment(self
, block
):
1636 indents
= [item
.indent
for item
in block
]
1637 least
= min(indents
)
1639 new_block
= nodes
.line_block()
1641 if item
.indent
> least
:
1642 new_block
.append(item
)
1645 self
.nest_line_block_segment(new_block
)
1646 new_items
.append(new_block
)
1647 new_block
= nodes
.line_block()
1648 new_items
.append(item
)
1650 self
.nest_line_block_segment(new_block
)
1651 new_items
.append(new_block
)
1652 block
[:] = new_items
1654 def grid_table_top(self
, match
, context
, next_state
):
1655 """Top border of a full table."""
1656 return self
.table_top(match
, context
, next_state
,
1657 self
.isolate_grid_table
,
1658 tableparser
.GridTableParser
)
1660 def simple_table_top(self
, match
, context
, next_state
):
1661 """Top border of a simple table."""
1662 return self
.table_top(match
, context
, next_state
,
1663 self
.isolate_simple_table
,
1664 tableparser
.SimpleTableParser
)
1666 def table_top(self
, match
, context
, next_state
,
1667 isolate_function
, parser_class
):
1668 """Top border of a generic table."""
1669 nodelist
, blank_finish
= self
.table(isolate_function
, parser_class
)
1670 self
.parent
+= nodelist
1671 if not blank_finish
:
1672 msg
= self
.reporter
.warning(
1673 'Blank line required after table.',
1674 line
=self
.state_machine
.abs_line_number()+1)
1676 return [], next_state
, []
1678 def table(self
, isolate_function
, parser_class
):
1679 """Parse a table."""
1680 block
, messages
, blank_finish
= isolate_function()
1683 parser
= parser_class()
1684 tabledata
= parser
.parse(block
)
1685 tableline
= (self
.state_machine
.abs_line_number() - len(block
)
1687 table
= self
.build_table(tabledata
, tableline
)
1688 nodelist
= [table
] + messages
1689 except tableparser
.TableMarkupError
, err
:
1690 nodelist
= self
.malformed_table(block
, ' '.join(err
.args
),
1691 offset
=err
.offset
) + messages
1694 return nodelist
, blank_finish
1696 def isolate_grid_table(self
):
1700 block
= self
.state_machine
.get_text_block(flush_left
=True)
1701 except statemachine
.UnexpectedIndentationError
, err
:
1702 block
, src
, srcline
= err
.args
1703 messages
.append(self
.reporter
.error('Unexpected indentation.',
1704 source
=src
, line
=srcline
))
1707 # for East Asian chars:
1708 block
.pad_double_width(self
.double_width_pad_char
)
1709 width
= len(block
[0].strip())
1710 for i
in range(len(block
)):
1711 block
[i
] = block
[i
].strip()
1712 if block
[i
][0] not in '+|': # check left edge
1714 self
.state_machine
.previous_line(len(block
) - i
)
1717 if not self
.grid_table_top_pat
.match(block
[-1]): # find bottom
1719 # from second-last to third line of table:
1720 for i
in range(len(block
) - 2, 1, -1):
1721 if self
.grid_table_top_pat
.match(block
[i
]):
1722 self
.state_machine
.previous_line(len(block
) - i
+ 1)
1726 messages
.extend(self
.malformed_table(block
))
1727 return [], messages
, blank_finish
1728 for i
in range(len(block
)): # check right edge
1729 if len(block
[i
]) != width
or block
[i
][-1] not in '+|':
1730 messages
.extend(self
.malformed_table(block
))
1731 return [], messages
, blank_finish
1732 return block
, messages
, blank_finish
1734 def isolate_simple_table(self
):
1735 start
= self
.state_machine
.line_offset
1736 lines
= self
.state_machine
.input_lines
1737 limit
= len(lines
) - 1
1738 toplen
= len(lines
[start
].strip())
1739 pattern_match
= self
.simple_table_border_pat
.match
1745 match
= pattern_match(line
)
1747 if len(line
.strip()) != toplen
:
1748 self
.state_machine
.next_line(i
- start
)
1749 messages
= self
.malformed_table(
1750 lines
[start
:i
+1], 'Bottom/header table border does '
1751 'not match top border.')
1752 return [], messages
, i
== limit
or not lines
[i
+1].strip()
1755 if found
== 2 or i
== limit
or not lines
[i
+1].strip():
1759 else: # reached end of input_lines
1761 extra
= ' or no blank line after table bottom'
1762 self
.state_machine
.next_line(found_at
- start
)
1763 block
= lines
[start
:found_at
+1]
1766 self
.state_machine
.next_line(i
- start
- 1)
1767 block
= lines
[start
:]
1768 messages
= self
.malformed_table(
1769 block
, 'No bottom table border found%s.' % extra
)
1770 return [], messages
, not extra
1771 self
.state_machine
.next_line(end
- start
)
1772 block
= lines
[start
:end
+1]
1773 # for East Asian chars:
1774 block
.pad_double_width(self
.double_width_pad_char
)
1775 return block
, [], end
== limit
or not lines
[end
+1].strip()
1777 def malformed_table(self
, block
, detail
='', offset
=0):
1778 block
.replace(self
.double_width_pad_char
, '')
1779 data
= '\n'.join(block
)
1780 message
= 'Malformed table.'
1781 startline
= self
.state_machine
.abs_line_number() - len(block
) + 1
1783 message
+= '\n' + detail
1784 error
= self
.reporter
.error(message
, nodes
.literal_block(data
, data
),
1785 line
=startline
+offset
)
1788 def build_table(self
, tabledata
, tableline
, stub_columns
=0, widths
=None):
1789 colwidths
, headrows
, bodyrows
= tabledata
1790 table
= nodes
.table()
1791 if widths
== 'auto':
1792 table
['classes'] += ['colwidths-auto']
1793 elif widths
: # "grid" or list of integers
1794 table
['classes'] += ['colwidths-given']
1795 tgroup
= nodes
.tgroup(cols
=len(colwidths
))
1797 for colwidth
in colwidths
:
1798 colspec
= nodes
.colspec(colwidth
=colwidth
)
1800 colspec
.attributes
['stub'] = 1
1804 thead
= nodes
.thead()
1806 for row
in headrows
:
1807 thead
+= self
.build_table_row(row
, tableline
)
1808 tbody
= nodes
.tbody()
1810 for row
in bodyrows
:
1811 tbody
+= self
.build_table_row(row
, tableline
)
1814 def build_table_row(self
, rowdata
, tableline
):
1816 for cell
in rowdata
:
1819 morerows
, morecols
, offset
, cellblock
= cell
1822 attributes
['morerows'] = morerows
1824 attributes
['morecols'] = morecols
1825 entry
= nodes
.entry(**attributes
)
1827 if ''.join(cellblock
):
1828 self
.nested_parse(cellblock
, input_offset
=tableline
+offset
,
1834 """Patterns and constants used for explicit markup recognition."""
1836 explicit
.patterns
= Struct(
1837 target
=re
.compile(r
"""
1839 _ # anonymous target
1841 (?!_) # no underscore at the beginning
1842 (?P<quote>`?) # optional open quote
1843 (?![ `]) # first char. not space or
1845 (?P<name> # reference name
1848 %(non_whitespace_escape_before)s
1849 (?P=quote) # close quote if open quote used
1851 (?<!(?<!\x00):) # no unescaped colon at end
1852 %(non_whitespace_escape_before)s
1853 [ ]? # optional space
1854 : # end of reference name
1855 ([ ]+|$) # followed by whitespace
1856 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1857 reference
=re
.compile(r
"""
1859 (?P<simple>%(simplename)s)_
1863 (?P<phrase>.+?) # hyperlink phrase
1864 %(non_whitespace_escape_before)s
1865 `_ # close backquote,
1869 """ % vars(Inliner
), re
.VERBOSE | re
.UNICODE
),
1870 substitution
=re
.compile(r
"""
1872 (?![ ]) # first char. not space
1873 (?P<name>.+?) # substitution text
1874 %(non_whitespace_escape_before)s
1875 \| # close delimiter
1877 ([ ]+|$) # followed by whitespace
1878 """ % vars(Inliner
),
1879 re
.VERBOSE | re
.UNICODE
),)
1881 def footnote(self
, match
):
1882 src
, srcline
= self
.state_machine
.get_source_and_line()
1883 indented
, indent
, offset
, blank_finish
= \
1884 self
.state_machine
.get_first_known_indented(match
.end())
1885 label
= match
.group(1)
1886 name
= normalize_name(label
)
1887 footnote
= nodes
.footnote('\n'.join(indented
))
1888 footnote
.source
= src
1889 footnote
.line
= srcline
1890 if name
[0] == '#': # auto-numbered
1891 name
= name
[1:] # autonumber label
1892 footnote
['auto'] = 1
1894 footnote
['names'].append(name
)
1895 self
.document
.note_autofootnote(footnote
)
1896 elif name
== '*': # auto-symbol
1898 footnote
['auto'] = '*'
1899 self
.document
.note_symbol_footnote(footnote
)
1900 else: # manually numbered
1901 footnote
+= nodes
.label('', label
)
1902 footnote
['names'].append(name
)
1903 self
.document
.note_footnote(footnote
)
1905 self
.document
.note_explicit_target(footnote
, footnote
)
1907 self
.document
.set_id(footnote
, footnote
)
1909 self
.nested_parse(indented
, input_offset
=offset
, node
=footnote
)
1910 return [footnote
], blank_finish
1912 def citation(self
, match
):
1913 src
, srcline
= self
.state_machine
.get_source_and_line()
1914 indented
, indent
, offset
, blank_finish
= \
1915 self
.state_machine
.get_first_known_indented(match
.end())
1916 label
= match
.group(1)
1917 name
= normalize_name(label
)
1918 citation
= nodes
.citation('\n'.join(indented
))
1919 citation
.source
= src
1920 citation
.line
= srcline
1921 citation
+= nodes
.label('', label
)
1922 citation
['names'].append(name
)
1923 self
.document
.note_citation(citation
)
1924 self
.document
.note_explicit_target(citation
, citation
)
1926 self
.nested_parse(indented
, input_offset
=offset
, node
=citation
)
1927 return [citation
], blank_finish
1929 def hyperlink_target(self
, match
):
1930 pattern
= self
.explicit
.patterns
.target
1931 lineno
= self
.state_machine
.abs_line_number()
1932 block
, indent
, offset
, blank_finish
= \
1933 self
.state_machine
.get_first_known_indented(
1934 match
.end(), until_blank
=True, strip_indent
=False)
1935 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
1936 block
= [escape2null(line
) for line
in block
]
1940 targetmatch
= pattern
.match(escaped
)
1945 escaped
+= block
[blockindex
]
1947 raise MarkupError('malformed hyperlink target.')
1948 del block
[:blockindex
]
1949 block
[0] = (block
[0] + ' ')[targetmatch
.end()-len(escaped
)-1:].strip()
1950 target
= self
.make_target(block
, blocktext
, lineno
,
1951 targetmatch
.group('name'))
1952 return [target
], blank_finish
1954 def make_target(self
, block
, block_text
, lineno
, target_name
):
1955 target_type
, data
= self
.parse_target(block
, block_text
, lineno
)
1956 if target_type
== 'refname':
1957 target
= nodes
.target(block_text
, '', refname
=normalize_name(data
))
1958 target
.indirect_reference_name
= data
1959 self
.add_target(target_name
, '', target
, lineno
)
1960 self
.document
.note_indirect_target(target
)
1962 elif target_type
== 'refuri':
1963 target
= nodes
.target(block_text
, '')
1964 self
.add_target(target_name
, data
, target
, lineno
)
1969 def parse_target(self
, block
, block_text
, lineno
):
1971 Determine the type of reference of a target.
1973 :Return: A 2-tuple, one of:
1975 - 'refname' and the indirect reference name
1976 - 'refuri' and the URI
1977 - 'malformed' and a system_message node
1979 if block
and block
[-1].strip()[-1:] == '_': # possible indirect target
1980 reference
= ' '.join([line
.strip() for line
in block
])
1981 refname
= self
.is_reference(reference
)
1983 return 'refname', refname
1984 ref_parts
= split_escaped_whitespace(' '.join(block
))
1985 reference
= ' '.join(''.join(unescape(part
).split())
1986 for part
in ref_parts
)
1987 return 'refuri', reference
1989 def is_reference(self
, reference
):
1990 match
= self
.explicit
.patterns
.reference
.match(
1991 whitespace_normalize_name(reference
))
1994 return unescape(match
.group('simple') or match
.group('phrase'))
1996 def add_target(self
, targetname
, refuri
, target
, lineno
):
1997 target
.line
= lineno
1999 name
= normalize_name(unescape(targetname
))
2000 target
['names'].append(name
)
2002 uri
= self
.inliner
.adjust_uri(refuri
)
2004 target
['refuri'] = uri
2006 raise ApplicationError('problem with URI: %r' % refuri
)
2007 self
.document
.note_explicit_target(target
, self
.parent
)
2008 else: # anonymous target
2010 target
['refuri'] = refuri
2011 target
['anonymous'] = 1
2012 self
.document
.note_anonymous_target(target
)
2014 def substitution_def(self
, match
):
2015 pattern
= self
.explicit
.patterns
.substitution
2016 src
, srcline
= self
.state_machine
.get_source_and_line()
2017 block
, indent
, offset
, blank_finish
= \
2018 self
.state_machine
.get_first_known_indented(match
.end(),
2020 blocktext
= (match
.string
[:match
.end()] + '\n'.join(block
))
2022 escaped
= escape2null(block
[0].rstrip())
2025 subdefmatch
= pattern
.match(escaped
)
2030 escaped
= escaped
+ ' ' + escape2null(block
[blockindex
].strip())
2032 raise MarkupError('malformed substitution definition.')
2033 del block
[:blockindex
] # strip out the substitution marker
2034 block
[0] = (block
[0].strip() + ' ')[subdefmatch
.end()-len(escaped
)-1:-1]
2038 while block
and not block
[-1].strip():
2040 subname
= subdefmatch
.group('name')
2041 substitution_node
= nodes
.substitution_definition(blocktext
)
2042 substitution_node
.source
= src
2043 substitution_node
.line
= srcline
2045 msg
= self
.reporter
.warning(
2046 'Substitution definition "%s" missing contents.' % subname
,
2047 nodes
.literal_block(blocktext
, blocktext
),
2048 source
=src
, line
=srcline
)
2049 return [msg
], blank_finish
2050 block
[0] = block
[0].strip()
2051 substitution_node
['names'].append(
2052 nodes
.whitespace_normalize_name(subname
))
2053 new_abs_offset
, blank_finish
= self
.nested_list_parse(
2054 block
, input_offset
=offset
, node
=substitution_node
,
2055 initial_state
='SubstitutionDef', blank_finish
=blank_finish
)
2057 for node
in substitution_node
[:]:
2058 if not (isinstance(node
, nodes
.Inline
) or
2059 isinstance(node
, nodes
.Text
)):
2060 self
.parent
+= substitution_node
[i
]
2061 del substitution_node
[i
]
2064 for node
in substitution_node
.traverse(nodes
.Element
):
2065 if self
.disallowed_inside_substitution_definitions(node
):
2066 pformat
= nodes
.literal_block('', node
.pformat().rstrip())
2067 msg
= self
.reporter
.error(
2068 'Substitution definition contains illegal element <%s>:'
2070 pformat
, nodes
.literal_block(blocktext
, blocktext
),
2071 source
=src
, line
=srcline
)
2072 return [msg
], blank_finish
2073 if len(substitution_node
) == 0:
2074 msg
= self
.reporter
.warning(
2075 'Substitution definition "%s" empty or invalid.' % subname
,
2076 nodes
.literal_block(blocktext
, blocktext
),
2077 source
=src
, line
=srcline
)
2078 return [msg
], blank_finish
2079 self
.document
.note_substitution_def(
2080 substitution_node
, subname
, self
.parent
)
2081 return [substitution_node
], blank_finish
2083 def disallowed_inside_substitution_definitions(self
, node
):
2085 isinstance(node
, nodes
.reference
) and node
.get('anonymous') or
2086 isinstance(node
, nodes
.footnote_reference
) and node
.get('auto')):
2091 def directive(self
, match
, **option_presets
):
2092 """Returns a 2-tuple: list of nodes, and a "blank finish" boolean."""
2093 type_name
= match
.group(1)
2094 directive_class
, messages
= directives
.directive(
2095 type_name
, self
.memo
.language
, self
.document
)
2096 self
.parent
+= messages
2098 return self
.run_directive(
2099 directive_class
, match
, type_name
, option_presets
)
2101 return self
.unknown_directive(type_name
)
2103 def run_directive(self
, directive
, match
, type_name
, option_presets
):
2105 Parse a directive then run its directive function.
2109 - `directive`: The class implementing the directive. Must be
2110 a subclass of `rst.Directive`.
2112 - `match`: A regular expression match object which matched the first
2113 line of the directive.
2115 - `type_name`: The directive name, as used in the source text.
2117 - `option_presets`: A dictionary of preset options, defaults for the
2118 directive options. Currently, only an "alt" option is passed by
2119 substitution definitions (value: the substitution name), which may
2120 be used by an embedded image directive.
2122 Returns a 2-tuple: list of nodes, and a "blank finish" boolean.
2124 if isinstance(directive
, (FunctionType
, MethodType
)):
2125 from docutils
.parsers
.rst
import convert_directive_function
2126 directive
= convert_directive_function(directive
)
2127 lineno
= self
.state_machine
.abs_line_number()
2128 initial_line_offset
= self
.state_machine
.line_offset
2129 indented
, indent
, line_offset
, blank_finish \
2130 = self
.state_machine
.get_first_known_indented(match
.end(),
2132 block_text
= '\n'.join(self
.state_machine
.input_lines
[
2133 initial_line_offset
: self
.state_machine
.line_offset
+ 1])
2135 arguments
, options
, content
, content_offset
= (
2136 self
.parse_directive_block(indented
, line_offset
,
2137 directive
, option_presets
))
2138 except MarkupError
, detail
:
2139 error
= self
.reporter
.error(
2140 'Error in "%s" directive:\n%s.' % (type_name
,
2141 ' '.join(detail
.args
)),
2142 nodes
.literal_block(block_text
, block_text
), line
=lineno
)
2143 return [error
], blank_finish
2144 directive_instance
= directive(
2145 type_name
, arguments
, options
, content
, lineno
,
2146 content_offset
, block_text
, self
, self
.state_machine
)
2148 result
= directive_instance
.run()
2149 except docutils
.parsers
.rst
.DirectiveError
, error
:
2150 msg_node
= self
.reporter
.system_message(error
.level
, error
.msg
,
2152 msg_node
+= nodes
.literal_block(block_text
, block_text
)
2154 assert isinstance(result
, list), \
2155 'Directive "%s" must return a list of nodes.' % type_name
2156 for i
in range(len(result
)):
2157 assert isinstance(result
[i
], nodes
.Node
), \
2158 ('Directive "%s" returned non-Node object (index %s): %r'
2159 % (type_name
, i
, result
[i
]))
2161 blank_finish
or self
.state_machine
.is_next_line_blank())
2163 def parse_directive_block(self
, indented
, line_offset
, directive
,
2165 option_spec
= directive
.option_spec
2166 has_content
= directive
.has_content
2167 if indented
and not indented
[0].strip():
2168 indented
.trim_start()
2170 while indented
and not indented
[-1].strip():
2172 if indented
and (directive
.required_arguments
2173 or directive
.optional_arguments
2175 for i
, line
in enumerate(indented
):
2176 if not line
.strip():
2180 arg_block
= indented
[:i
]
2181 content
= indented
[i
+1:]
2182 content_offset
= line_offset
+ i
+ 1
2185 content_offset
= line_offset
2188 options
, arg_block
= self
.parse_directive_options(
2189 option_presets
, option_spec
, arg_block
)
2192 if arg_block
and not (directive
.required_arguments
2193 or directive
.optional_arguments
):
2194 content
= arg_block
+ indented
[i
:]
2195 content_offset
= line_offset
2197 while content
and not content
[0].strip():
2198 content
.trim_start()
2200 if directive
.required_arguments
or directive
.optional_arguments
:
2201 arguments
= self
.parse_directive_arguments(
2202 directive
, arg_block
)
2205 if content
and not has_content
:
2206 raise MarkupError('no content permitted')
2207 return (arguments
, options
, content
, content_offset
)
2209 def parse_directive_options(self
, option_presets
, option_spec
, arg_block
):
2210 options
= option_presets
.copy()
2211 for i
, line
in enumerate(arg_block
):
2212 if re
.match(Body
.patterns
['field_marker'], line
):
2213 opt_block
= arg_block
[i
:]
2214 arg_block
= arg_block
[:i
]
2219 success
, data
= self
.parse_extension_options(option_spec
,
2221 if success
: # data is a dict of options
2222 options
.update(data
)
2223 else: # data is an error string
2224 raise MarkupError(data
)
2225 return options
, arg_block
2227 def parse_directive_arguments(self
, directive
, arg_block
):
2228 required
= directive
.required_arguments
2229 optional
= directive
.optional_arguments
2230 arg_text
= '\n'.join(arg_block
)
2231 arguments
= arg_text
.split()
2232 if len(arguments
) < required
:
2233 raise MarkupError('%s argument(s) required, %s supplied'
2234 % (required
, len(arguments
)))
2235 elif len(arguments
) > required
+ optional
:
2236 if directive
.final_argument_whitespace
:
2237 arguments
= arg_text
.split(None, required
+ optional
- 1)
2240 'maximum %s argument(s) allowed, %s supplied'
2241 % (required
+ optional
, len(arguments
)))
2244 def parse_extension_options(self
, option_spec
, datalines
):
2246 Parse `datalines` for a field list containing extension options
2247 matching `option_spec`.
2250 - `option_spec`: a mapping of option name to conversion
2251 function, which should raise an exception on bad input.
2252 - `datalines`: a list of input strings.
2255 - Success value, 1 or 0.
2256 - An option dictionary on success, an error string on failure.
2258 node
= nodes
.field_list()
2259 newline_offset
, blank_finish
= self
.nested_list_parse(
2260 datalines
, 0, node
, initial_state
='ExtensionOptions',
2262 if newline_offset
!= len(datalines
): # incomplete parse of block
2263 return 0, 'invalid option block'
2265 options
= utils
.extract_extension_options(node
, option_spec
)
2266 except KeyError, detail
:
2267 return 0, ('unknown option: "%s"' % detail
.args
[0])
2268 except (ValueError, TypeError), detail
:
2269 return 0, ('invalid option value: %s' % ' '.join(detail
.args
))
2270 except utils
.ExtensionOptionError
, detail
:
2271 return 0, ('invalid option data: %s' % ' '.join(detail
.args
))
2275 return 0, 'option data incompletely parsed'
2277 def unknown_directive(self
, type_name
):
2278 lineno
= self
.state_machine
.abs_line_number()
2279 indented
, indent
, offset
, blank_finish
= \
2280 self
.state_machine
.get_first_known_indented(0, strip_indent
=False)
2281 text
= '\n'.join(indented
)
2282 error
= self
.reporter
.error(
2283 'Unknown directive type "%s".' % type_name
,
2284 nodes
.literal_block(text
, text
), line
=lineno
)
2285 return [error
], blank_finish
2287 def comment(self
, match
):
2288 if not match
.string
[match
.end():].strip() \
2289 and self
.state_machine
.is_next_line_blank(): # an empty comment?
2290 return [nodes
.comment()], 1 # "A tiny but practical wart."
2291 indented
, indent
, offset
, blank_finish
= \
2292 self
.state_machine
.get_first_known_indented(match
.end())
2293 while indented
and not indented
[-1].strip():
2295 text
= '\n'.join(indented
)
2296 return [nodes
.comment(text
, text
)], blank_finish
2298 explicit
.constructs
= [
2301 \.\.[ ]+ # explicit markup start
2304 [0-9]+ # manually numbered footnote
2306 \# # anonymous auto-numbered footnote
2308 \#%s # auto-number ed?) footnote label
2310 \* # auto-symbol footnote
2313 ([ ]+|$) # whitespace or end of line
2314 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2317 \.\.[ ]+ # explicit markup start
2318 \[(%s)\] # citation label
2319 ([ ]+|$) # whitespace or end of line
2320 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
)),
2323 \.\.[ ]+ # explicit markup start
2324 _ # target indicator
2325 (?![ ]|$) # first char. not space or EOL
2326 """, re
.VERBOSE | re
.UNICODE
)),
2329 \.\.[ ]+ # explicit markup start
2330 \| # substitution indicator
2331 (?![ ]|$) # first char. not space or EOL
2332 """, re
.VERBOSE | re
.UNICODE
)),
2335 \.\.[ ]+ # explicit markup start
2336 (%s) # directive name
2337 [ ]? # optional space
2338 :: # directive delimiter
2339 ([ ]+|$) # whitespace or end of line
2340 """ % Inliner
.simplename
, re
.VERBOSE | re
.UNICODE
))]
2342 def explicit_markup(self
, match
, context
, next_state
):
2343 """Footnotes, hyperlink targets, directives, comments."""
2344 nodelist
, blank_finish
= self
.explicit_construct(match
)
2345 self
.parent
+= nodelist
2346 self
.explicit_list(blank_finish
)
2347 return [], next_state
, []
2349 def explicit_construct(self
, match
):
2350 """Determine which explicit construct this is, parse & return it."""
2352 for method
, pattern
in self
.explicit
.constructs
:
2353 expmatch
= pattern
.match(match
.string
)
2356 return method(self
, expmatch
)
2357 except MarkupError
, error
:
2358 lineno
= self
.state_machine
.abs_line_number()
2359 message
= ' '.join(error
.args
)
2360 errors
.append(self
.reporter
.warning(message
, line
=lineno
))
2362 nodelist
, blank_finish
= self
.comment(match
)
2363 return nodelist
+ errors
, blank_finish
2365 def explicit_list(self
, blank_finish
):
2367 Create a nested state machine for a series of explicit markup
2368 constructs (including anonymous hyperlink targets).
2370 offset
= self
.state_machine
.line_offset
+ 1 # next line
2371 newline_offset
, blank_finish
= self
.nested_list_parse(
2372 self
.state_machine
.input_lines
[offset
:],
2373 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2374 node
=self
.parent
, initial_state
='Explicit',
2375 blank_finish
=blank_finish
,
2376 match_titles
=self
.state_machine
.match_titles
)
2377 self
.goto_line(newline_offset
)
2378 if not blank_finish
:
2379 self
.parent
+= self
.unindent_warning('Explicit markup')
2381 def anonymous(self
, match
, context
, next_state
):
2382 """Anonymous hyperlink targets."""
2383 nodelist
, blank_finish
= self
.anonymous_target(match
)
2384 self
.parent
+= nodelist
2385 self
.explicit_list(blank_finish
)
2386 return [], next_state
, []
2388 def anonymous_target(self
, match
):
2389 lineno
= self
.state_machine
.abs_line_number()
2390 block
, indent
, offset
, blank_finish \
2391 = self
.state_machine
.get_first_known_indented(match
.end(),
2393 blocktext
= match
.string
[:match
.end()] + '\n'.join(block
)
2394 block
= [escape2null(line
) for line
in block
]
2395 target
= self
.make_target(block
, blocktext
, lineno
, '')
2396 return [target
], blank_finish
2398 def line(self
, match
, context
, next_state
):
2399 """Section title overline or transition marker."""
2400 if self
.state_machine
.match_titles
:
2401 return [match
.string
], 'Line', []
2402 elif match
.string
.strip() == '::':
2403 raise statemachine
.TransitionCorrection('text')
2404 elif len(match
.string
.strip()) < 4:
2405 msg
= self
.reporter
.info(
2406 'Unexpected possible title overline or transition.\n'
2407 "Treating it as ordinary text because it's so short.",
2408 line
=self
.state_machine
.abs_line_number())
2410 raise statemachine
.TransitionCorrection('text')
2412 blocktext
= self
.state_machine
.line
2413 msg
= self
.reporter
.severe(
2414 'Unexpected section title or transition.',
2415 nodes
.literal_block(blocktext
, blocktext
),
2416 line
=self
.state_machine
.abs_line_number())
2418 return [], next_state
, []
2420 def text(self
, match
, context
, next_state
):
2421 """Titles, definition lists, paragraphs."""
2422 return [match
.string
], 'Text', []
2425 class RFC2822Body(Body
):
2428 RFC2822 headers are only valid as the first constructs in documents. As
2429 soon as anything else appears, the `Body` state should take over.
2432 patterns
= Body
.patterns
.copy() # can't modify the original
2433 patterns
['rfc2822'] = r
'[!-9;-~]+:( +|$)'
2434 initial_transitions
= [(name
, 'Body')
2435 for name
in Body
.initial_transitions
]
2436 initial_transitions
.insert(-1, ('rfc2822', 'Body')) # just before 'text'
2438 def rfc2822(self
, match
, context
, next_state
):
2439 """RFC2822-style field list item."""
2440 fieldlist
= nodes
.field_list(classes
=['rfc2822'])
2441 self
.parent
+= fieldlist
2442 field
, blank_finish
= self
.rfc2822_field(match
)
2444 offset
= self
.state_machine
.line_offset
+ 1 # next line
2445 newline_offset
, blank_finish
= self
.nested_list_parse(
2446 self
.state_machine
.input_lines
[offset
:],
2447 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2448 node
=fieldlist
, initial_state
='RFC2822List',
2449 blank_finish
=blank_finish
)
2450 self
.goto_line(newline_offset
)
2451 if not blank_finish
:
2452 self
.parent
+= self
.unindent_warning(
2453 'RFC2822-style field list')
2454 return [], next_state
, []
2456 def rfc2822_field(self
, match
):
2457 name
= match
.string
[:match
.string
.find(':')]
2458 indented
, indent
, line_offset
, blank_finish
= \
2459 self
.state_machine
.get_first_known_indented(match
.end(),
2461 fieldnode
= nodes
.field()
2462 fieldnode
+= nodes
.field_name(name
, name
)
2463 fieldbody
= nodes
.field_body('\n'.join(indented
))
2464 fieldnode
+= fieldbody
2466 self
.nested_parse(indented
, input_offset
=line_offset
,
2468 return fieldnode
, blank_finish
2471 class SpecializedBody(Body
):
2474 Superclass for second and subsequent compound element members. Compound
2475 elements are lists and list-like constructs.
2477 All transition methods are disabled (redefined as `invalid_input`).
2478 Override individual methods in subclasses to re-enable.
2480 For example, once an initial bullet list item, say, is recognized, the
2481 `BulletList` subclass takes over, with a "bullet_list" node as its
2482 container. Upon encountering the initial bullet list item, `Body.bullet`
2483 calls its ``self.nested_list_parse`` (`RSTState.nested_list_parse`), which
2484 starts up a nested parsing session with `BulletList` as the initial state.
2485 Only the ``bullet`` transition method is enabled in `BulletList`; as long
2486 as only bullet list items are encountered, they are parsed and inserted
2487 into the container. The first construct which is *not* a bullet list item
2488 triggers the `invalid_input` method, which ends the nested parse and
2489 closes the container. `BulletList` needs to recognize input that is
2490 invalid in the context of a bullet list, which means everything *other
2491 than* bullet list items, so it inherits the transition list created in
2495 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2496 """Not a compound element member. Abort this state machine."""
2497 self
.state_machine
.previous_line() # back up so parent SM can reassess
2500 indent
= invalid_input
2501 bullet
= invalid_input
2502 enumerator
= invalid_input
2503 field_marker
= invalid_input
2504 option_marker
= invalid_input
2505 doctest
= invalid_input
2506 line_block
= invalid_input
2507 grid_table_top
= invalid_input
2508 simple_table_top
= invalid_input
2509 explicit_markup
= invalid_input
2510 anonymous
= invalid_input
2511 line
= invalid_input
2512 text
= invalid_input
2515 class BulletList(SpecializedBody
):
2517 """Second and subsequent bullet_list list_items."""
2519 def bullet(self
, match
, context
, next_state
):
2520 """Bullet list item."""
2521 if match
.string
[0] != self
.parent
['bullet']:
2522 # different bullet: new list
2523 self
.invalid_input()
2524 listitem
, blank_finish
= self
.list_item(match
.end())
2525 self
.parent
+= listitem
2526 self
.blank_finish
= blank_finish
2527 return [], next_state
, []
2530 class DefinitionList(SpecializedBody
):
2532 """Second and subsequent definition_list_items."""
2534 def text(self
, match
, context
, next_state
):
2535 """Definition lists."""
2536 return [match
.string
], 'Definition', []
2539 class EnumeratedList(SpecializedBody
):
2541 """Second and subsequent enumerated_list list_items."""
2543 def enumerator(self
, match
, context
, next_state
):
2544 """Enumerated list item."""
2545 format
, sequence
, text
, ordinal
= self
.parse_enumerator(
2546 match
, self
.parent
['enumtype'])
2547 if ( format
!= self
.format
2548 or (sequence
!= '#' and (sequence
!= self
.parent
['enumtype']
2550 or ordinal
!= (self
.lastordinal
+ 1)))
2551 or not self
.is_enumerated_list_item(ordinal
, sequence
, format
)):
2552 # different enumeration: new list
2553 self
.invalid_input()
2556 listitem
, blank_finish
= self
.list_item(match
.end())
2557 self
.parent
+= listitem
2558 self
.blank_finish
= blank_finish
2559 self
.lastordinal
= ordinal
2560 return [], next_state
, []
2563 class FieldList(SpecializedBody
):
2565 """Second and subsequent field_list fields."""
2567 def field_marker(self
, match
, context
, next_state
):
2568 """Field list field."""
2569 field
, blank_finish
= self
.field(match
)
2570 self
.parent
+= field
2571 self
.blank_finish
= blank_finish
2572 return [], next_state
, []
2575 class OptionList(SpecializedBody
):
2577 """Second and subsequent option_list option_list_items."""
2579 def option_marker(self
, match
, context
, next_state
):
2580 """Option list item."""
2582 option_list_item
, blank_finish
= self
.option_list_item(match
)
2584 self
.invalid_input()
2585 self
.parent
+= option_list_item
2586 self
.blank_finish
= blank_finish
2587 return [], next_state
, []
2590 class RFC2822List(SpecializedBody
, RFC2822Body
):
2592 """Second and subsequent RFC2822-style field_list fields."""
2594 patterns
= RFC2822Body
.patterns
2595 initial_transitions
= RFC2822Body
.initial_transitions
2597 def rfc2822(self
, match
, context
, next_state
):
2598 """RFC2822-style field list item."""
2599 field
, blank_finish
= self
.rfc2822_field(match
)
2600 self
.parent
+= field
2601 self
.blank_finish
= blank_finish
2602 return [], 'RFC2822List', []
2604 blank
= SpecializedBody
.invalid_input
2607 class ExtensionOptions(FieldList
):
2610 Parse field_list fields for extension options.
2612 No nested parsing is done (including inline markup parsing).
2615 def parse_field_body(self
, indented
, offset
, node
):
2616 """Override `Body.parse_field_body` for simpler parsing."""
2618 for line
in list(indented
) + ['']:
2622 text
= '\n'.join(lines
)
2623 node
+= nodes
.paragraph(text
, text
)
2627 class LineBlock(SpecializedBody
):
2629 """Second and subsequent lines of a line_block."""
2631 blank
= SpecializedBody
.invalid_input
2633 def line_block(self
, match
, context
, next_state
):
2634 """New line of line block."""
2635 lineno
= self
.state_machine
.abs_line_number()
2636 line
, messages
, blank_finish
= self
.line_block_line(match
, lineno
)
2638 self
.parent
.parent
+= messages
2639 self
.blank_finish
= blank_finish
2640 return [], next_state
, []
2643 class Explicit(SpecializedBody
):
2645 """Second and subsequent explicit markup construct."""
2647 def explicit_markup(self
, match
, context
, next_state
):
2648 """Footnotes, hyperlink targets, directives, comments."""
2649 nodelist
, blank_finish
= self
.explicit_construct(match
)
2650 self
.parent
+= nodelist
2651 self
.blank_finish
= blank_finish
2652 return [], next_state
, []
2654 def anonymous(self
, match
, context
, next_state
):
2655 """Anonymous hyperlink targets."""
2656 nodelist
, blank_finish
= self
.anonymous_target(match
)
2657 self
.parent
+= nodelist
2658 self
.blank_finish
= blank_finish
2659 return [], next_state
, []
2661 blank
= SpecializedBody
.invalid_input
2664 class SubstitutionDef(Body
):
2667 Parser for the contents of a substitution_definition element.
2671 'embedded_directive': re
.compile(r
'(%s)::( +|$)'
2672 % Inliner
.simplename
, re
.UNICODE
),
2674 initial_transitions
= ['embedded_directive', 'text']
2676 def embedded_directive(self
, match
, context
, next_state
):
2677 nodelist
, blank_finish
= self
.directive(match
,
2678 alt
=self
.parent
['names'][0])
2679 self
.parent
+= nodelist
2680 if not self
.state_machine
.at_eof():
2681 self
.blank_finish
= blank_finish
2684 def text(self
, match
, context
, next_state
):
2685 if not self
.state_machine
.at_eof():
2686 self
.blank_finish
= self
.state_machine
.is_next_line_blank()
2690 class Text(RSTState
):
2693 Classifier of second line of a text block.
2695 Could be a paragraph, a definition list item, or a title.
2698 patterns
= {'underline': Body
.patterns
['line'],
2700 initial_transitions
= [('underline', 'Body'), ('text', 'Body')]
2702 def blank(self
, match
, context
, next_state
):
2703 """End of paragraph."""
2704 # NOTE: self.paragraph returns [ node, system_message(s) ], literalnext
2705 paragraph
, literalnext
= self
.paragraph(
2706 context
, self
.state_machine
.abs_line_number() - 1)
2707 self
.parent
+= paragraph
2709 self
.parent
+= self
.literal_block()
2710 return [], 'Body', []
2712 def eof(self
, context
):
2714 self
.blank(None, context
, None)
2717 def indent(self
, match
, context
, next_state
):
2718 """Definition list item."""
2719 definitionlist
= nodes
.definition_list()
2720 definitionlistitem
, blank_finish
= self
.definition_list_item(context
)
2721 definitionlist
+= definitionlistitem
2722 self
.parent
+= definitionlist
2723 offset
= self
.state_machine
.line_offset
+ 1 # next line
2724 newline_offset
, blank_finish
= self
.nested_list_parse(
2725 self
.state_machine
.input_lines
[offset
:],
2726 input_offset
=self
.state_machine
.abs_line_offset() + 1,
2727 node
=definitionlist
, initial_state
='DefinitionList',
2728 blank_finish
=blank_finish
, blank_finish_state
='Definition')
2729 self
.goto_line(newline_offset
)
2730 if not blank_finish
:
2731 self
.parent
+= self
.unindent_warning('Definition list')
2732 return [], 'Body', []
2734 def underline(self
, match
, context
, next_state
):
2735 """Section title."""
2736 lineno
= self
.state_machine
.abs_line_number()
2737 title
= context
[0].rstrip()
2738 underline
= match
.string
.rstrip()
2739 source
= title
+ '\n' + underline
2741 if column_width(title
) > len(underline
):
2742 if len(underline
) < 4:
2743 if self
.state_machine
.match_titles
:
2744 msg
= self
.reporter
.info(
2745 'Possible title underline, too short for the title.\n'
2746 "Treating it as ordinary text because it's so short.",
2749 raise statemachine
.TransitionCorrection('text')
2751 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2752 msg
= self
.reporter
.warning('Title underline too short.',
2753 nodes
.literal_block(blocktext
, blocktext
), line
=lineno
)
2754 messages
.append(msg
)
2755 if not self
.state_machine
.match_titles
:
2756 blocktext
= context
[0] + '\n' + self
.state_machine
.line
2757 # We need get_source_and_line() here to report correctly
2758 src
, srcline
= self
.state_machine
.get_source_and_line()
2759 # TODO: why is abs_line_number() == srcline+1
2760 # if the error is in a table (try with test_tables.py)?
2761 # print "get_source_and_line", srcline
2762 # print "abs_line_number", self.state_machine.abs_line_number()
2763 msg
= self
.reporter
.severe('Unexpected section title.',
2764 nodes
.literal_block(blocktext
, blocktext
),
2765 source
=src
, line
=srcline
)
2766 self
.parent
+= messages
2768 return [], next_state
, []
2769 style
= underline
[0]
2771 self
.section(title
, source
, style
, lineno
- 1, messages
)
2772 return [], next_state
, []
2774 def text(self
, match
, context
, next_state
):
2776 startline
= self
.state_machine
.abs_line_number() - 1
2779 block
= self
.state_machine
.get_text_block(flush_left
=True)
2780 except statemachine
.UnexpectedIndentationError
, err
:
2781 block
, src
, srcline
= err
.args
2782 msg
= self
.reporter
.error('Unexpected indentation.',
2783 source
=src
, line
=srcline
)
2784 lines
= context
+ list(block
)
2785 paragraph
, literalnext
= self
.paragraph(lines
, startline
)
2786 self
.parent
+= paragraph
2790 self
.state_machine
.next_line()
2793 self
.parent
+= self
.literal_block()
2794 return [], next_state
, []
2796 def literal_block(self
):
2797 """Return a list of nodes."""
2798 indented
, indent
, offset
, blank_finish
= \
2799 self
.state_machine
.get_indented()
2800 while indented
and not indented
[-1].strip():
2803 return self
.quoted_literal_block()
2804 data
= '\n'.join(indented
)
2805 literal_block
= nodes
.literal_block(data
, data
)
2806 (literal_block
.source
,
2807 literal_block
.line
) = self
.state_machine
.get_source_and_line(offset
+1)
2808 nodelist
= [literal_block
]
2809 if not blank_finish
:
2810 nodelist
.append(self
.unindent_warning('Literal block'))
2813 def quoted_literal_block(self
):
2814 abs_line_offset
= self
.state_machine
.abs_line_offset()
2815 offset
= self
.state_machine
.line_offset
2816 parent_node
= nodes
.Element()
2817 new_abs_offset
= self
.nested_parse(
2818 self
.state_machine
.input_lines
[offset
:],
2819 input_offset
=abs_line_offset
, node
=parent_node
, match_titles
=False,
2820 state_machine_kwargs
={'state_classes': (QuotedLiteralBlock
,),
2821 'initial_state': 'QuotedLiteralBlock'})
2822 self
.goto_line(new_abs_offset
)
2823 return parent_node
.children
2825 def definition_list_item(self
, termline
):
2826 indented
, indent
, line_offset
, blank_finish
= \
2827 self
.state_machine
.get_indented()
2828 itemnode
= nodes
.definition_list_item(
2829 '\n'.join(termline
+ list(indented
)))
2830 lineno
= self
.state_machine
.abs_line_number() - 1
2832 itemnode
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2833 termlist
, messages
= self
.term(termline
, lineno
)
2834 itemnode
+= termlist
2835 definition
= nodes
.definition('', *messages
)
2836 itemnode
+= definition
2837 if termline
[0][-2:] == '::':
2838 definition
+= self
.reporter
.info(
2839 'Blank line missing before literal block (after the "::")? '
2840 'Interpreted as a definition list item.',
2842 self
.nested_parse(indented
, input_offset
=line_offset
, node
=definition
)
2843 return itemnode
, blank_finish
2845 classifier_delimiter
= re
.compile(' +: +')
2847 def term(self
, lines
, lineno
):
2848 """Return a definition_list's term and optional classifiers."""
2849 assert len(lines
) == 1
2850 text_nodes
, messages
= self
.inline_text(lines
[0], lineno
)
2851 term_node
= nodes
.term(lines
[0])
2853 term_node
.line
) = self
.state_machine
.get_source_and_line(lineno
)
2854 node_list
= [term_node
]
2855 for i
in range(len(text_nodes
)):
2856 node
= text_nodes
[i
]
2857 if isinstance(node
, nodes
.Text
):
2858 parts
= self
.classifier_delimiter
.split(node
.rawsource
)
2860 node_list
[-1] += node
2862 rawtext
= parts
[0].rstrip()
2863 textnode
= nodes
.Text(utils
.unescape_rawsource(rawtext
))
2864 textnode
.rawsource
= rawtext
2865 node_list
[-1] += textnode
2866 for part
in parts
[1:]:
2867 classifier_node
= nodes
.classifier(part
,
2868 utils
.unescape_rawsource(part
))
2869 # might be a reference or similar in the next node
2870 # then classifier_node is empty
2871 if len(classifier_node
) > 0:
2872 classifier_node
[0].rawsource
= part
2873 node_list
.append(classifier_node
)
2875 node_list
[-1] += node
2876 return node_list
, messages
2879 class SpecializedText(Text
):
2882 Superclass for second and subsequent lines of Text-variants.
2884 All transition methods are disabled. Override individual methods in
2885 subclasses to re-enable.
2888 def eof(self
, context
):
2889 """Incomplete construct."""
2892 def invalid_input(self
, match
=None, context
=None, next_state
=None):
2893 """Not a compound element member. Abort this state machine."""
2896 blank
= invalid_input
2897 indent
= invalid_input
2898 underline
= invalid_input
2899 text
= invalid_input
2902 class Definition(SpecializedText
):
2904 """Second line of potential definition_list_item."""
2906 def eof(self
, context
):
2907 """Not a definition."""
2908 self
.state_machine
.previous_line(2) # so parent SM can reassess
2911 def indent(self
, match
, context
, next_state
):
2912 """Definition list item."""
2913 itemnode
, blank_finish
= self
.definition_list_item(context
)
2914 self
.parent
+= itemnode
2915 self
.blank_finish
= blank_finish
2916 return [], 'DefinitionList', []
2919 class Line(SpecializedText
):
2922 Second line of over- & underlined section title or transition marker.
2925 eofcheck
= 1 # @@@ ???
2926 """Set to 0 while parsing sections, so that we don't catch the EOF."""
2928 def eof(self
, context
):
2929 """Transition marker at end of section or document."""
2930 marker
= context
[0].strip()
2931 if self
.memo
.section_bubble_up_kludge
:
2932 self
.memo
.section_bubble_up_kludge
= False
2933 elif len(marker
) < 4:
2934 self
.state_correction(context
)
2935 if self
.eofcheck
: # ignore EOFError with sections
2936 lineno
= self
.state_machine
.abs_line_number() - 1
2937 transition
= nodes
.transition(rawsource
=context
[0])
2938 transition
.line
= lineno
2939 self
.parent
+= transition
2943 def blank(self
, match
, context
, next_state
):
2944 """Transition marker."""
2945 src
, srcline
= self
.state_machine
.get_source_and_line()
2946 marker
= context
[0].strip()
2948 self
.state_correction(context
)
2949 transition
= nodes
.transition(rawsource
=marker
)
2950 transition
.source
= src
2951 transition
.line
= srcline
- 1
2952 self
.parent
+= transition
2953 return [], 'Body', []
2955 def text(self
, match
, context
, next_state
):
2956 """Potential over- & underlined title."""
2957 lineno
= self
.state_machine
.abs_line_number() - 1
2958 overline
= context
[0]
2959 title
= match
.string
2962 underline
= self
.state_machine
.next_line()
2964 blocktext
= overline
+ '\n' + title
2965 if len(overline
.rstrip()) < 4:
2966 self
.short_overline(context
, blocktext
, lineno
, 2)
2968 msg
= self
.reporter
.severe(
2969 'Incomplete section title.',
2970 nodes
.literal_block(blocktext
, blocktext
),
2973 return [], 'Body', []
2974 source
= '%s\n%s\n%s' % (overline
, title
, underline
)
2975 overline
= overline
.rstrip()
2976 underline
= underline
.rstrip()
2977 if not self
.transitions
['underline'][0].match(underline
):
2978 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2979 if len(overline
.rstrip()) < 4:
2980 self
.short_overline(context
, blocktext
, lineno
, 2)
2982 msg
= self
.reporter
.severe(
2983 'Missing matching underline for section title overline.',
2984 nodes
.literal_block(source
, source
),
2987 return [], 'Body', []
2988 elif overline
!= underline
:
2989 blocktext
= overline
+ '\n' + title
+ '\n' + underline
2990 if len(overline
.rstrip()) < 4:
2991 self
.short_overline(context
, blocktext
, lineno
, 2)
2993 msg
= self
.reporter
.severe(
2994 'Title overline & underline mismatch.',
2995 nodes
.literal_block(source
, source
),
2998 return [], 'Body', []
2999 title
= title
.rstrip()
3001 if column_width(title
) > len(overline
):
3002 blocktext
= overline
+ '\n' + title
+ '\n' + underline
3003 if len(overline
.rstrip()) < 4:
3004 self
.short_overline(context
, blocktext
, lineno
, 2)
3006 msg
= self
.reporter
.warning(
3007 'Title overline too short.',
3008 nodes
.literal_block(source
, source
),
3010 messages
.append(msg
)
3011 style
= (overline
[0], underline
[0])
3012 self
.eofcheck
= 0 # @@@ not sure this is correct
3013 self
.section(title
.lstrip(), source
, style
, lineno
+ 1, messages
)
3015 return [], 'Body', []
3017 indent
= text
# indented title
3019 def underline(self
, match
, context
, next_state
):
3020 overline
= context
[0]
3021 blocktext
= overline
+ '\n' + self
.state_machine
.line
3022 lineno
= self
.state_machine
.abs_line_number() - 1
3023 if len(overline
.rstrip()) < 4:
3024 self
.short_overline(context
, blocktext
, lineno
, 1)
3025 msg
= self
.reporter
.error(
3026 'Invalid section title or transition marker.',
3027 nodes
.literal_block(blocktext
, blocktext
),
3030 return [], 'Body', []
3032 def short_overline(self
, context
, blocktext
, lineno
, lines
=1):
3033 msg
= self
.reporter
.info(
3034 'Possible incomplete section title.\nTreating the overline as '
3035 "ordinary text because it's so short.",
3038 self
.state_correction(context
, lines
)
3040 def state_correction(self
, context
, lines
=1):
3041 self
.state_machine
.previous_line(lines
)
3043 raise statemachine
.StateCorrection('Body', 'text')
3046 class QuotedLiteralBlock(RSTState
):
3049 Nested parse handler for quoted (unindented) literal blocks.
3051 Special-purpose. Not for inclusion in `state_classes`.
3054 patterns
= {'initial_quoted': r
'(%(nonalphanum7bit)s)' % Body
.pats
,
3056 initial_transitions
= ('initial_quoted', 'text')
3058 def __init__(self
, state_machine
, debug
=False):
3059 RSTState
.__init
__(self
, state_machine
, debug
)
3061 self
.initial_lineno
= None
3063 def blank(self
, match
, context
, next_state
):
3067 return context
, next_state
, []
3069 def eof(self
, context
):
3071 src
, srcline
= self
.state_machine
.get_source_and_line(
3072 self
.initial_lineno
)
3073 text
= '\n'.join(context
)
3074 literal_block
= nodes
.literal_block(text
, text
)
3075 literal_block
.source
= src
3076 literal_block
.line
= srcline
3077 self
.parent
+= literal_block
3079 self
.parent
+= self
.reporter
.warning(
3080 'Literal block expected; none found.',
3081 line
=self
.state_machine
.abs_line_number())
3082 # src not available, because statemachine.input_lines is empty
3083 self
.state_machine
.previous_line()
3084 self
.parent
+= self
.messages
3087 def indent(self
, match
, context
, next_state
):
3088 assert context
, ('QuotedLiteralBlock.indent: context should not '
3090 self
.messages
.append(
3091 self
.reporter
.error('Unexpected indentation.',
3092 line
=self
.state_machine
.abs_line_number()))
3093 self
.state_machine
.previous_line()
3096 def initial_quoted(self
, match
, context
, next_state
):
3097 """Match arbitrary quote character on the first line only."""
3098 self
.remove_transition('initial_quoted')
3099 quote
= match
.string
[0]
3100 pattern
= re
.compile(re
.escape(quote
), re
.UNICODE
)
3101 # New transition matches consistent quotes only:
3102 self
.add_transition('quoted',
3103 (pattern
, self
.quoted
, self
.__class
__.__name
__))
3104 self
.initial_lineno
= self
.state_machine
.abs_line_number()
3105 return [match
.string
], next_state
, []
3107 def quoted(self
, match
, context
, next_state
):
3108 """Match consistent quotes on subsequent lines."""
3109 context
.append(match
.string
)
3110 return context
, next_state
, []
3112 def text(self
, match
, context
, next_state
):
3114 self
.messages
.append(
3115 self
.reporter
.error('Inconsistent literal block quoting.',
3116 line
=self
.state_machine
.abs_line_number()))
3117 self
.state_machine
.previous_line()
3121 state_classes
= (Body
, BulletList
, DefinitionList
, EnumeratedList
, FieldList
,
3122 OptionList
, LineBlock
, ExtensionOptions
, Explicit
, Text
,
3123 Definition
, Line
, SubstitutionDef
, RFC2822Body
, RFC2822List
)
3124 """Standard set of State classes used to start `RSTStateMachine`."""