From aa216de5188d965be7580b0bf3137bb7a84135d3 Mon Sep 17 00:00:00 2001 From: goodger Date: Mon, 1 May 2006 02:31:07 +0000 Subject: [PATCH] Added support for multiple attributions in a physical block quote (indented text block), dividing it into multiple logical block quotes git-svn-id: https://docutils.svn.sourceforge.net/svnroot/docutils/trunk@4522 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/HISTORY.txt | 3 + docutils/docs/dev/todo.txt | 11 -- docutils/docs/ref/rst/restructuredtext.txt | 50 +++++++-- docutils/docutils/parsers/rst/states.py | 114 ++++++++++++--------- .../test_parsers/test_rst/test_block_quotes.py | 113 ++++++++++++++++++++ 5 files changed, 222 insertions(+), 69 deletions(-) diff --git a/docutils/HISTORY.txt b/docutils/HISTORY.txt index 403760f50..d463ae5f2 100644 --- a/docutils/HISTORY.txt +++ b/docutils/HISTORY.txt @@ -28,6 +28,9 @@ Changes Since 0.4 - Unquoted targets beginning with an underscore (``.. __target: URI``) are no longer accepted. + - Added support for multiple attributions in a physical block quote + (indented text block), dividing it into multiple logical block + quotes. * docutils/writers/html4css1/__init__.py: diff --git a/docutils/docs/dev/todo.txt b/docutils/docs/dev/todo.txt index 3137584d2..37de05034 100644 --- a/docutils/docs/dev/todo.txt +++ b/docutils/docs/dev/todo.txt @@ -782,17 +782,6 @@ __ rst/alternatives.html#or-not-to-do See . -* Allow multiple block quotes, only separated by attributions - (http://article.gmane.org/gmane.text.docutils.devel/2985), e.g.:: - - quote 1 - - ---Attrib 1 - - quote 2 - - ---Attrib 2 - * Change the specification so that more punctuation is allowed before/after inline markup start/end string (http://article.gmane.org/gmane.text.docutils.cvs/3824). diff --git a/docutils/docs/ref/rst/restructuredtext.txt b/docutils/docs/ref/rst/restructuredtext.txt index 1445619b3..48f8e741e 100644 --- a/docutils/docs/ref/rst/restructuredtext.txt +++ b/docutils/docs/ref/rst/restructuredtext.txt @@ -1215,9 +1215,9 @@ Block Quotes Doctree element: block_quote, attribution. A text block that is indented relative to the preceding text, without -markup indicating it to be a literal block, is a block quote. All -markup processing (for body elements and inline markup) continues -within the block quote:: +preceding markup indicating it to be a literal block or other content, +is a block quote. All markup processing (for body elements and inline +markup) continues within the block quote:: This is an ordinary paragraph, introducing a block quote. @@ -1225,10 +1225,38 @@ within the block quote:: -- Sherlock Holmes -If the final block of a block quote begins with "--", "---", or a true -em-dash (flush left within the block quote), it is interpreted as an -attribution. If the attribution consists of multiple lines, the left -edges of the second and subsequent lines must align. +A block quote may end with an attribution: a text block beginning with +"--", "---", or a true em-dash, flush left within the block quote. If +the attribution consists of multiple lines, the left edges of the +second and subsequent lines must align. + +Multiple block quotes may occur consecutively if terminated with +attributions. + + Unindented paragraph. + + Block quote 1. + + -- Attribution 1 + + Block quote 2. + +`Empty comments`_ may be used to explicitly terminate preceding +constructs that would otherwise consume a block quote:: + + * List item. + + .. + + Block quote 3. + +Empty comments may also be used to separate block quotes:: + + Block quote 4. + + .. + + Block quote 5. Blank lines are required before and after a block quote, but these blank lines are not included as part of the block quote. @@ -2240,10 +2268,12 @@ constructs is recognized, leave the ".." on a line by itself:: .. |even| this:: ! +.. _empty comments: + An explicit markup start followed by a blank line and nothing else -(apart from whitespace) is an "empty comment". It serves to terminate -a preceding construct, and does **not** consume any indented text -following. To have a block quote follow a list or any indented +(apart from whitespace) is an "_`empty comment`". It serves to +terminate a preceding construct, and does **not** consume any indented +text following. To have a block quote follow a list or any indented construct, insert an unindented empty comment in-between. Syntax diagram:: diff --git a/docutils/docutils/parsers/rst/states.py b/docutils/docutils/parsers/rst/states.py index 2eab7fe5d..b66978afa 100644 --- a/docutils/docutils/parsers/rst/states.py +++ b/docutils/docutils/parsers/rst/states.py @@ -1079,68 +1079,86 @@ class Body(RSTState): """Block quote.""" indented, indent, line_offset, blank_finish = \ self.state_machine.get_indented() - blockquote, messages = self.block_quote(indented, line_offset) - self.parent += blockquote - self.parent += messages + elements = self.block_quote(indented, line_offset) + self.parent += elements if not blank_finish: self.parent += self.unindent_warning('Block quote') return context, next_state, [] def block_quote(self, indented, line_offset): - blockquote_lines, attribution_lines, attribution_offset = \ - self.check_attribution(indented, line_offset) - blockquote = nodes.block_quote() - self.nested_parse(blockquote_lines, line_offset, blockquote) - messages = [] - if attribution_lines: - attribution, messages = self.parse_attribution(attribution_lines, - attribution_offset) - blockquote += attribution - return blockquote, messages - - # u'\u2014' is an em-dash: + elements = [] + while indented: + (blockquote_lines, + attribution_lines, + attribution_offset, + indented, + new_line_offset) = self.split_attribution(indented, line_offset) + blockquote = nodes.block_quote() + self.nested_parse(blockquote_lines, line_offset, blockquote) + elements.append(blockquote) + if attribution_lines: + attribution, messages = self.parse_attribution( + attribution_lines, attribution_offset) + blockquote += attribution + elements += messages + line_offset = new_line_offset + while indented and not indented[0]: + indented = indented[1:] + line_offset += 1 + return elements + + # U+2014 is an em-dash: attribution_pattern = re.compile(ur'(---?(?!-)|\u2014) *(?=[^ \n])') - def check_attribution(self, indented, line_offset): + def split_attribution(self, indented, line_offset): """ - Check for an attribution in the last contiguous block of `indented`. + Check for a block quote attribution and split it off: - * First line after last blank line must begin with "--" (etc.). + * First line after a blank line must begin with a dash ("--", "---", + em-dash; matches `self.attribution_pattern`). * Every line after that must have consistent indentation. + * Attributions must be preceded by block quote content. - Return a 3-tuple: (block quote lines, attribution lines, - attribution offset). + Return a tuple of: (block quote content lines, content offset, + attribution lines, attribution offset, remaining indented lines). """ - #import pdb ; pdb.set_trace() blank = None - nonblank_seen = None - indent = 0 - for i in range(len(indented) - 1, 0, -1): # don't check first line - this_line_blank = not indented[i].strip() - if nonblank_seen and this_line_blank: - match = self.attribution_pattern.match(indented[i + 1]) - if match: - blank = i - break - elif not this_line_blank: - nonblank_seen = 1 - if blank and len(indented) - blank > 2: # multi-line attribution - indent = (len(indented[blank + 2]) - - len(indented[blank + 2].lstrip())) - for j in range(blank + 3, len(indented)): - if ( indented[j] # may be blank last line - and indent != (len(indented[j]) - - len(indented[j].lstrip()))): - # bad shape - blank = None - break - if blank: - a_lines = indented[blank + 1:] - a_lines.trim_left(match.end(), end=1) - a_lines.trim_left(indent, start=1) - return (indented[:blank], a_lines, line_offset + blank + 1) + nonblank_seen = False + for i in range(len(indented)): + line = indented[i].rstrip() + if line: + if nonblank_seen and blank == i - 1: # last line blank + match = self.attribution_pattern.match(line) + if match: + attribution_end, indent = self.check_attribution( + indented, i) + if attribution_end: + a_lines = indented[i:attribution_end] + a_lines.trim_left(match.end(), end=1) + a_lines.trim_left(indent, start=1) + return (indented[:i], a_lines, + i, indented[attribution_end:], + line_offset + attribution_end) + nonblank_seen = True + else: + blank = i else: - return (indented, None, None) + return (indented, None, None, None, None) + + def check_attribution(self, indented, attribution_start): + """Check attribution shape + """ + indent = None + i = attribution_start + 1 + for i in range(attribution_start + 1, len(indented)): + line = indented[i].rstrip() + if not line: + break + if indent is None: + indent = len(line) - len(line.lstrip()) + elif len(line) - len(line.lstrip()) != indent: + return None, None # bad shape; not an attribution + return i, (indent or 0) def parse_attribution(self, indented, line_offset): text = '\n'.join(indented).rstrip() diff --git a/docutils/test/test_parsers/test_rst/test_block_quotes.py b/docutils/test/test_parsers/test_rst/test_block_quotes.py index dd7561723..a1773a778 100755 --- a/docutils/test/test_parsers/test_rst/test_block_quotes.py +++ b/docutils/test/test_parsers/test_rst/test_block_quotes.py @@ -218,6 +218,84 @@ Paragraph. ["""\ Paragraph. + Block quote 1. + + -- Attribution 1 + + Block quote 2. + + --Attribution 2 +""", +"""\ + + + Paragraph. + + + Block quote 1. + + Attribution 1 + + + Block quote 2. + + Attribution 2 +"""], +["""\ +Paragraph. + + Block quote 1. + + -- Attribution 1 + + Block quote 2. +""", +"""\ + + + Paragraph. + + + Block quote 1. + + Attribution 1 + + + Block quote 2. +"""], +["""\ +Unindented paragraph. + + Block quote 1. + + -- Attribution 1 + + Block quote 2. + +.. + + Block quote 3. +""", +"""\ + + + Unindented paragraph. + + + Block quote 1. + + Attribution 1 + + + Block quote 2. + + + + Block quote 3. +"""], +["""\ +Paragraph. + -- Not an attribution Paragraph. @@ -266,8 +344,43 @@ Paragraph. and line three """], +["""\ +Paragraph. + + -- Not a valid attribution + + Block quote 1. + + --Attribution 1 + + --Invalid attribution + + Block quote 2. + + --Attribution 2 +""", +"""\ + + + Paragraph. + + + -- Not a valid attribution + + Block quote 1. + + Attribution 1 + + + --Invalid attribution + + Block quote 2. + + Attribution 2 +"""], ] + if __name__ == '__main__': import unittest unittest.main(defaultTest='suite') -- 2.11.4.GIT