From 1bf825377afa5bd46540d2462a25edc5c5d50db5 Mon Sep 17 00:00:00 2001 From: milde Date: Mon, 26 Nov 2012 14:06:19 +0000 Subject: [PATCH] SmartQuotes transform: language-depended quote characters. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7540 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- HISTORY.txt | 4 +- RELEASE-NOTES.txt | 3 + docs/user/config.txt | 31 +++- docutils/nodes.py | 19 ++- docutils/transforms/universal.py | 78 +++++++--- docutils/utils/smartquotes.py | 238 ++++++++++++++++++++----------- test/test_transforms/test_smartquotes.py | 83 ++++++++++- 7 files changed, 342 insertions(+), 114 deletions(-) diff --git a/HISTORY.txt b/HISTORY.txt index 545d7a542..4a459938a 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -5,7 +5,7 @@ ================== :Author: David Goodger; open to all Docutils developers -:Contact: goodger@python.org +:Contact: docutils-develop@lists.sourceforge.net :Date: $Date$ :Revision: $Revision$ :Web site: http://docutils.sourceforge.net/ @@ -41,7 +41,7 @@ Changes Since 0.9.1 * docutils/transforms/universal.py - - Add SmartQuotes transform for typographic quotes and dashes. + - SmartQuotes transform for typographic quotes and dashes. * docutils/utils/__init__.py diff --git a/RELEASE-NOTES.txt b/RELEASE-NOTES.txt index 218525c84..7a4d79696 100644 --- a/RELEASE-NOTES.txt +++ b/RELEASE-NOTES.txt @@ -3,6 +3,7 @@ ======================== :Contact: grubert@users.sourceforge.net +:Maintainer: docutils-develop@lists.sourceforge.net :Date: $Date$ :Revision: $Revision$ :Web site: http://docutils.sourceforge.net/ @@ -40,6 +41,8 @@ Changes Since 0.9.1 * General: + - SmartQuotes transform for typographic quotes and dashes. + - ``docutils/math``, ``docutils/error_reporting.py``, and ``docutils/urischemes.py`` moved to the utils package. Code importing these modules needs to adapt, e.g.:: diff --git a/docs/user/config.txt b/docs/user/config.txt index 4b6b8f64c..b40abdef9 100644 --- a/docs/user/config.txt +++ b/docs/user/config.txt @@ -86,7 +86,7 @@ settings. Underscores ("_") and hyphens ("-") can be used interchangably in entry names; hyphens are automatically converted to underscores. -For on/off switch settings (booleans), the following values are +For on/off switch settings (_`booleans`), the following values are recognized: :On: "true", "yes", "on", "1" @@ -94,8 +94,8 @@ recognized: List values can be comma- or colon-delimited. -strip_classes_, strip_elements_with_classes_, stylesheet_, and -stylesheet_path_ use the comma as delimiter, +strip_classes_, strip_elements_with_classes_, stylesheet, and +stylesheet_path use the comma as delimiter, whitespace around list values is stripped. :: strip-classes: ham,eggs, @@ -588,7 +588,6 @@ __ `footnote_references [latex2e writer]`_ _`syntax_highlight` - Token type names used by Pygments_ when parsing contents of the code_ directive and role. @@ -605,11 +604,35 @@ _`syntax_highlight` Default: "long". Option: ``--syntax-highlight``. + New in Docutils 0.9. + .. _Pygments: http://pygments.org/ .. _code: ../ref/rst/directives.html#code .. _Pygments-generated stylesheets: http://pygments.org/docs/cmdline/#generating-styles +_`smart_quotes` + Change straight quotation marks to typographic form. `Quote characters`_ + are selected according to the language of the current block element (see + language_code_). Also changes consequtive runs of hyphen-minus and full + stops (``---``, ``--``, ``...``) to em-dash, en-dash and ellipsis + Unicode characters respectively. + + Supported values: + + booleans_ (yes/no) + Use smart quotes? + + alt (or "alternative") + Use alternative quote set (if defined for the language). + + Default: "no". Option: ``--smart-quotes``. + + New in Docutils 0.10. + +.. _quote characters: + http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks + [readers] --------- diff --git a/docutils/nodes.py b/docutils/nodes.py index 559253ff8..5f4d10d53 100644 --- a/docutils/nodes.py +++ b/docutils/nodes.py @@ -269,7 +269,7 @@ class Node(object): index = node.parent.index(node) for sibling in node.parent[index+1:]: r.extend(sibling.traverse(include_self=True, - descend=descend, + descend=descend, siblings=False, ascend=False, condition=condition)) if not ascend: @@ -620,9 +620,24 @@ class Element(Node): has_key = hasattr - # support operator in + # support operator ``in`` __contains__ = hasattr + def get_language_code(self, fallback=''): + """Return node's language tag. + + Look iteratively in self and parents for a class argument + starting with ``language-`` and return the remainder of it + (which should be a `BCP49` language tag) or the `fallback`. + """ + for cls in self.get('classes', []): + if cls.startswith('language-'): + return cls[9:] + try: + return self.parent.get_language(fallback) + except AttributeError: + return fallback + def append(self, item): self.setup_child(item) self.children.append(item) diff --git a/docutils/transforms/universal.py b/docutils/transforms/universal.py index 4f5626c1d..75a47fcd6 100644 --- a/docutils/transforms/universal.py +++ b/docutils/transforms/universal.py @@ -1,5 +1,7 @@ # $Id$ -# Authors: David Goodger ; Ueli Schlaepfer +# -*- coding: utf8 -*- +# Authors: David Goodger ; Ueli Schlaepfer; Günter Milde +# Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ @@ -212,13 +214,38 @@ class SmartQuotes(Transform): default_priority = 850 - texttype = {True: 'literal', - False: 'plain'} + def __init__(self, document, startnode): + Transform.__init__(self, document, startnode=startnode) + self.unsupported_languages = set() + + def get_tokens(self, txtnodes): + # A generator that yields ``(texttype, nodetext)`` tuples for a list + # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). + + texttype = {True: 'literal', # "literal" text is not changed: + False: 'plain'} + for txtnode in txtnodes: + nodetype = texttype[isinstance(txtnode.parent, + (nodes.literal, + nodes.math, + nodes.image, + nodes.raw, + nodes.problematic))] + yield (nodetype, txtnode.astext()) + def apply(self): - if self.document.settings.smart_quotes is False: + smart_quotes = self.document.settings.smart_quotes + if not smart_quotes: return - + try: + alternative = smart_quotes.startswith('alt') + except AttributeError: + alternative = False + # print repr(alternative) + + document_language = self.document.settings.language_code + # "Educate" quotes in normal text. Handle each block of text # (TextElement node) as a unit to keep context around inline nodes: for node in self.document.traverse(nodes.TextElement): @@ -233,20 +260,33 @@ class SmartQuotes(Transform): txtnodes = [txtnode for txtnode in node.traverse(nodes.Text) if not isinstance(txtnode.parent, nodes.option_string)] - # smartquotes.educate_tokens() iterates over - # ``(texttype, nodetext)`` tuples. `texttype` is "literal" - # or "plain" where "literal" text is not changed: - tokens = [(self.texttype[isinstance(txtnode.parent, - (nodes.literal, - nodes.math, - nodes.image, - nodes.raw, - nodes.problematic))], - txtnode.astext()) for txtnode in txtnodes] - - # Iterator educating quotes in plain text - # 2 : set all, using old school en- and em- dash shortcuts - teacher = smartquotes.educate_tokens(tokens, attr='2') + + # language: use smart-quotes for language "lang" + lang = node.get_language_code(document_language) + # use alternative form if `smart-quotes` setting starts with "alt": + if alternative: + if '-x-altquot' in lang: + lang = lang.replace('-x-altquot', '') + else: + lang += '-x-altquot' + # drop subtags missing in quotes: + for tag in utils.normalize_language_tag(lang): + if tag in smartquotes.smartchars.quotes: + lang = tag + break + else: # language not supported: (keep ASCII quotes) + if lang not in self.unsupported_languages: + self.document.reporter.warning('No smart quotes ' + 'defined for language "%s".'%lang, base_node=node) + self.unsupported_languages.add(lang) + lang = '' + + # Iterator educating quotes in plain text: + # '2': set all, using old school en- and em- dash shortcuts + teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes), + attr='2', language=lang) for txtnode, newtext in zip(txtnodes, teacher): txtnode.parent.replace(txtnode, nodes.Text(newtext)) + + self.unsupported_languages = set() # reset diff --git a/docutils/utils/smartquotes.py b/docutils/utils/smartquotes.py index 0eaa531cf..6ddfaaf66 100644 --- a/docutils/utils/smartquotes.py +++ b/docutils/utils/smartquotes.py @@ -5,6 +5,7 @@ # :Copyright: © 2010 Günter Milde, # original `SmartyPants`_: © 2003 John Gruber # smartypants.py: © 2004, 2007 Chad Miller +# :Maintainer: docutils-develop@lists.sourceforge.net # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, @@ -176,7 +177,7 @@ Escape Value Character ======== ===== ========= This is useful, for example, when you want to use straight quotes as -foot and inch marks: 6'2" tall; a 17" iMac. +foot and inch marks: 6\\'2\\" tall; a 17\\" iMac. Options ======= @@ -207,7 +208,7 @@ Numeric values are the easiest way to configure SmartyPants' behavior: "-1" Stupefy mode. Reverses the SmartyPants transformation process, turning the characters produced by SmartyPants into their ASCII equivalents. - E.g. "“" is turned into a simple double-quote ("), "—" is + E.g. "“" is turned into a simple double-quote (\"), "—" is turned into two dashes, etc. @@ -313,6 +314,9 @@ proper character for closing single-quotes (``’``) by hand. Version History =============== +1.7 2012-11-19 + - Internationalization: language-dependent quotes. + 1.6.1: 2012-11-06 - Refactor code, code cleanup, - `educate_tokens()` generator as interface for Docutils. @@ -359,30 +363,88 @@ default_smartypants_attr = "1" import re -class smart(object): +class smartchars(object): """Smart quotes and dashes - - TODO: internationalization, see e.g. - http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen """ + endash = u'–' # "–" EN DASH emdash = u'—' # "—" EM DASH - lquote = u'‘' # "‘" LEFT SINGLE QUOTATION MARK - rquote = u'’' # "’" RIGHT SINGLE QUOTATION MARK - #lquote = u'‚' # "‚" SINGLE LOW-9 QUOTATION MARK (German) - ldquote = u'“' # "“" LEFT DOUBLE QUOTATION MARK - rdquote = u'”' # "”" RIGHT DOUBLE QUOTATION MARK - #ldquote = u'„' # "𔄤" DOUBLE LOW-9 QUOTATION MARK (German) ellipsis = u'…' # "…" HORIZONTAL ELLIPSIS -def smartyPants(text, attr=default_smartypants_attr): + # quote characters (language-specific, set in __init__()) + # + # English smart quotes (open primary, close primary, open secondary, close + # secondary) are: + # opquote = u'“' # "“" LEFT DOUBLE QUOTATION MARK + # cpquote = u'”' # "”" RIGHT DOUBLE QUOTATION MARK + # osquote = u'‘' # "‘" LEFT SINGLE QUOTATION MARK + # csquote = u'’' # "’" RIGHT SINGLE QUOTATION MARK + # For other languages see: + # http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks + # http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen + quotes = {'af': u'“”‘’', + 'af-x-altquot': u'„”‚’', + 'ca': u'«»“”', + 'ca-x-altquot': u'“”‘’', + 'cs': u'„“‚‘', + 'cs-x-altquot': u'»«›‹', + 'de': u'„“‚‘', + 'de-x-altquot': u'»«›‹', + 'de-ch': u'«»‹›', + 'el': u'«»“”', + 'en': u'“”‘’', + 'en-UK': u'‘’“”', + 'eo': u'“”‘’', + 'es': u'«»“”', + 'es-x-altquot': u'“”‘’', + 'fi': u'””’’', + 'fi-x-altquot': u'»»’’', + 'fr': (u'« ', u' »', u'‹ ', u' ›'), + 'fr-x-altquot': (u'“ ', u' ”', u'‘ ', u' ’'), + 'fr-ch': u'«»‹›', + 'he': u'”“»«', + 'he-x-altquot': u'„”‚’', + 'it': u'«»“”', + 'it-ch': u'«»‹›', + 'it-x-altquot': u'“”‘’', + 'ja': u'「」『』', + 'lt': u'„“‚‘', + 'nl': u'“”‘’', + 'nl-x-altquot': u'„”‚’', + 'pl': u'„”«»', + 'pl-x-altquot': u'«»“”', + 'pt': u'«»“”', + 'pt_br': u'“”‘’', + 'ro': u'„”«»', + 'ro-x-altquot': u'«»„”', + 'ru': u'«»„“', + 'sk': u'„“‚‘', + 'sk-x-altquot': u'»«›‹', + 'sv': u'„“‚‘', + 'sv-x-altquot': u'»«›‹', + 'zh_cn': u'“”‘’', + 'it': u'«»“”', + 'zh_tw': u'「」『』', + } + + def __init__(self, language='en'): + self.language = language + try: + (self.opquote, self.cpquote, + self.osquote, self.csquote) = self.quotes[language] + except KeyError: + self.opquote, self.cpquote, self.osquote, self.csquote = u'""\'\'' + + +def smartyPants(text, attr=default_smartypants_attr, language='en'): """Main function for "traditional" use.""" - return "".join([t for t in educate_tokens(tokenize(text), attr)]) + return "".join([t for t in educate_tokens(tokenize(text), + attr, language)]) -def educate_tokens(text_tokens, attr=default_smartypants_attr): - """Return iterator that "educates" `text_tokens`. +def educate_tokens(text_tokens, attr=default_smartypants_attr, language='en'): + """Return iterator that "educates" the items of `text_tokens`. """ # Parse attributes: @@ -439,84 +501,89 @@ def educate_tokens(text_tokens, attr=default_smartypants_attr): if "w" in attr: convert_quot = True prev_token_last_char = " " - # Get context around inline mark-up. (Remember the last character of the - # previous text token, to use as context to curl single-character quote - # tokens correctly.) + # Last character of the previous text token. Used as + # context to curl leading quote characters correctly. - for cur_token in text_tokens: - t = cur_token[1] + for (ttype, text) in text_tokens: # skip HTML and/or XML tags (do not update last character) - if cur_token[0] == 'tag': - yield t + if ttype == 'tag': + yield text continue - last_char = t[-1:] # Remember last char of this token before processing. - # skip literal text (math, literal, raw, ...) - if cur_token[0] == 'literal': - yield t + if ttype == 'literal': + prev_token_last_char = text[-1] + yield text continue - t = processEscapes(t) + last_char = text[-1:] # Remember last char before processing. + + text = processEscapes(text) if convert_quot: - t = re.sub('"', '"', t) + text = re.sub('"', '"', text) if do_dashes == 1: - t = educateDashes(t) + text = educateDashes(text) elif do_dashes == 2: - t = educateDashesOldSchool(t) + text = educateDashesOldSchool(text) elif do_dashes == 3: - t = educateDashesOldSchoolInverted(t) + text = educateDashesOldSchoolInverted(text) if do_ellipses: - t = educateEllipses(t) + text = educateEllipses(text) # Note: backticks need to be processed before quotes. if do_backticks: - t = educateBackticks(t) + text = educateBackticks(text, language) if do_backticks == 2: - t = educateSingleBackticks(t) + text = educateSingleBackticks(text, language) if do_quotes: - t = educateQuotes(prev_token_last_char+t)[1:] + text = educateQuotes(prev_token_last_char+text, language)[1:] if do_stupefy: - t = stupefyEntities(t) + text = stupefyEntities(text, language) - # print prev_token_last_char, t.encode('utf8') + # Remember last char as context for the next token prev_token_last_char = last_char - yield t + text = processEscapes(text, restore=True) + + yield text -def educateQuotes(text): +def educateQuotes(text, language='en'): """ - Parameter: String (unicode or bytes). + Parameter: - text string (unicode or bytes). + - language (`BCP 47` language tag.) Returns: The `text`, with "educated" curly quote characters. Example input: "Isn't this fun?" Example output: “Isn’t this fun?“; """ + smart = smartchars(language) + # oldtext = text punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" # Special case if the very first character is a quote - # followed by punctuation at a non-word-break. Close the quotes by brute force: - text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.rquote, text) - text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.rdquote, text) + # followed by punctuation at a non-word-break. + # Close the quotes by brute force: + text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.csquote, text) + text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.cpquote, text) # Special case for double sets of quotes, e.g.: #

He said, "'Quoted' words in a larger quote."

- text = re.sub(r""""'(?=\w)""", smart.ldquote+smart.lquote, text) - text = re.sub(r"""'"(?=\w)""", smart.lquote+smart.ldquote, text) + text = re.sub(r""""'(?=\w)""", smart.opquote+smart.osquote, text) + text = re.sub(r"""'"(?=\w)""", smart.osquote+smart.opquote, text) # Special case for decade abbreviations (the '80s): - text = re.sub(r"""\b'(?=\d{2}s)""", smart.rquote, text) + text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text) close_class = r"""[^\ \t\r\n\[\{\(\-]""" dec_dashes = r"""–|—""" @@ -534,24 +601,24 @@ def educateQuotes(text): ' # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) - text = opening_single_quotes_regex.sub(r'\1'+smart.lquote, text) + text = opening_single_quotes_regex.sub(r'\1'+smart.osquote, text) closing_single_quotes_regex = re.compile(r""" (%s) ' (?!\s | s\b | \d) """ % (close_class,), re.VERBOSE) - text = closing_single_quotes_regex.sub(r'\1'+smart.rquote, text) + text = closing_single_quotes_regex.sub(r'\1'+smart.csquote, text) closing_single_quotes_regex = re.compile(r""" (%s) ' (\s | s\b) """ % (close_class,), re.VERBOSE) - text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.rquote, text) + text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.csquote, text) # Any remaining single quotes should be opening ones: - text = re.sub(r"""'""", smart.lquote, text) + text = re.sub(r"""'""", smart.osquote, text) # Get most opening double quotes: opening_double_quotes_regex = re.compile(r""" @@ -566,7 +633,7 @@ def educateQuotes(text): " # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) - text = opening_double_quotes_regex.sub(r'\1'+smart.ldquote, text) + text = opening_double_quotes_regex.sub(r'\1'+smart.opquote, text) # Double closing quotes: closing_double_quotes_regex = re.compile(r""" @@ -574,21 +641,21 @@ def educateQuotes(text): " (?=\s) """ % (close_class,), re.VERBOSE) - text = closing_double_quotes_regex.sub(smart.rdquote, text) + text = closing_double_quotes_regex.sub(smart.cpquote, text) closing_double_quotes_regex = re.compile(r""" (%s) # character that indicates the quote should be closing " """ % (close_class,), re.VERBOSE) - text = closing_double_quotes_regex.sub(r'\1'+smart.rdquote, text) + text = closing_double_quotes_regex.sub(r'\1'+smart.cpquote, text) # Any remaining quotes should be opening ones. - text = re.sub(r'"', smart.ldquote, text) + text = re.sub(r'"', smart.opquote, text) return text -def educateBackticks(text): +def educateBackticks(text, language='en'): """ Parameter: String (unicode or bytes). Returns: The `text`, with ``backticks'' -style double quotes @@ -596,13 +663,14 @@ def educateBackticks(text): Example input: ``Isn't this fun?'' Example output: “Isn't this fun?“; """ + smart = smartchars(language) - text = re.sub(r"""``""", smart.ldquote, text) - text = re.sub(r"""''""", smart.rdquote, text) + text = re.sub(r"""``""", smart.opquote, text) + text = re.sub(r"""''""", smart.cpquote, text) return text -def educateSingleBackticks(text): +def educateSingleBackticks(text, language='en'): """ Parameter: String (unicode or bytes). Returns: The `text`, with `backticks' -style single quotes @@ -611,9 +679,10 @@ def educateSingleBackticks(text): Example input: `Isn't this fun?' Example output: ‘Isn’t this fun?’ """ + smart = smartchars(language) - text = re.sub(r"""`""", smart.lquote, text) - text = re.sub(r"""'""", smart.rquote, text) + text = re.sub(r"""`""", smart.osquote, text) + text = re.sub(r"""'""", smart.csquote, text) return text @@ -624,8 +693,8 @@ def educateDashes(text): an em-dash character. """ - text = re.sub(r"""---""", smart.endash, text) # en (yes, backwards) - text = re.sub(r"""--""", smart.emdash, text) # em (yes, backwards) + text = re.sub(r"""---""", smartchars.endash, text) # en (yes, backwards) + text = re.sub(r"""--""", smartchars.emdash, text) # em (yes, backwards) return text @@ -637,8 +706,8 @@ def educateDashesOldSchool(text): an em-dash character. """ - text = re.sub(r"""---""", smart.emdash, text) # em (yes, backwards) - text = re.sub(r"""--""", smart.endash, text) # en (yes, backwards) + text = re.sub(r"""---""", smartchars.emdash, text) + text = re.sub(r"""--""", smartchars.endash, text) return text @@ -656,8 +725,8 @@ def educateDashesOldSchoolInverted(text): the shortcut should be shorter to type. (Thanks to Aaron Swartz for the idea.) """ - text = re.sub(r"""---""", smart.endash, text) # em - text = re.sub(r"""--""", smart.emdash, text) # en + text = re.sub(r"""---""", smartchars.endash, text) # em + text = re.sub(r"""--""", smartchars.emdash, text) # en return text @@ -672,12 +741,12 @@ def educateEllipses(text): Example output: Huh…? """ - text = re.sub(r"""\.\.\.""", smart.ellipsis, text) - text = re.sub(r"""\. \. \.""", smart.ellipsis, text) + text = re.sub(r"""\.\.\.""", smartchars.ellipsis, text) + text = re.sub(r"""\. \. \.""", smartchars.ellipsis, text) return text -def stupefyEntities(text): +def stupefyEntities(text, language='en'): """ Parameter: String (unicode or bytes). Returns: The `text`, with each SmartyPants character translated to @@ -686,22 +755,23 @@ def stupefyEntities(text): Example input: “Hello — world.” Example output: "Hello -- world." """ + smart = smartchars(language) text = re.sub(smart.endash, "-", text) # en-dash text = re.sub(smart.emdash, "--", text) # em-dash - text = re.sub(smart.lquote, "'", text) # open single quote - text = re.sub(smart.rquote, "'", text) # close single quote + text = re.sub(smart.osquote, "'", text) # open single quote + text = re.sub(smart.csquote, "'", text) # close single quote - text = re.sub(smart.ldquote, '"', text) # open double quote - text = re.sub(smart.rdquote, '"', text) # close double quote + text = re.sub(smart.opquote, '"', text) # open double quote + text = re.sub(smart.cpquote, '"', text) # close double quote text = re.sub(smart.ellipsis, '...', text)# ellipsis return text -def processEscapes(text): +def processEscapes(text, restore=False): r""" Parameter: String (unicode or bytes). Returns: The `text`, with after processing the following backslash @@ -717,12 +787,18 @@ def processEscapes(text): \- - \` ` """ - text = re.sub(r"""\\\\""", r"""\""", text) - text = re.sub(r'''\\"''', r""""""", text) - text = re.sub(r"""\\'""", r"""'""", text) - text = re.sub(r"""\\\.""", r""".""", text) - text = re.sub(r"""\\-""", r"""-""", text) - text = re.sub(r"""\\`""", r"""`""", text) + replacements = ((r'\\', r'\'), + (r'\"', r'"'), + (r"\'", r'''), + (r'\.', r'.'), + (r'\-', r'-'), + (r'\`', r'`')) + if restore: + for (ch, rep) in replacements: + text = text.replace(rep, ch[1]) + else: + for (ch, rep) in replacements: + text = text.replace(ch, rep) return text diff --git a/test/test_transforms/test_smartquotes.py b/test/test_transforms/test_smartquotes.py index 043b79e26..14332d2e1 100644 --- a/test/test_transforms/test_smartquotes.py +++ b/test/test_transforms/test_smartquotes.py @@ -1,9 +1,9 @@ #!/usr/bin/env python # -*- coding: utf8 -*- - # $Id$ - +# # :Copyright: © 2011 Günter Milde. +# :Maintainer: docutils-develop@lists.sourceforge.net # :License: Released under the terms of the `2-Clause BSD license`_, in short: # # Copying and distribution of this file, with or without modification, @@ -24,13 +24,20 @@ from docutils.parsers.rst import Parser def suite(): parser = Parser() + settings = {'smart_quotes': True} s = DocutilsTestSupport.TransformTestSuite( - parser, suite_settings={'smart_quotes': True}) + parser, suite_settings=settings) s.generateTests(totest) + settings['language_code'] = 'de' + s.generateTests(totest_de) + settings['smart_quotes'] = 'alternative' + s.generateTests(totest_de_alt) return s totest = {} +totest_de = {} +totest_de_alt = {} totest['transitions'] = ((SmartQuotes,), [ ["""\ @@ -47,7 +54,7 @@ u"""\ """], ["""\ Do not "educate" quotes ``inside "literal" text`` and :: - + "literal" blocks. Keep quotes straight in code and math: @@ -56,11 +63,11 @@ Keep quotes straight in code and math: .. code:: print "hello" - + .. math:: f'(x) = df(x)/dx - + """, u"""\ @@ -122,8 +129,72 @@ u"""\ inline “roles”\ """], +["""\ +.. class:: language-de + +German "smart quotes" and 'single smart quotes'. + +.. class:: language-foo + +"Quoting style" for unknown languages is 'ASCII'. + +.. class:: language-de-x-altquot + +Alternative German "smart quotes" and 'single smart quotes'. +""", +u"""\ + + + German „smart quotes“ and ‚single smart quotes‘. + + "Quoting style" for unknown languages is 'ASCII'. + + Alternative German »smart quotes« and ›single smart quotes‹. + + + No smart quotes defined for language "foo". +"""], +]) + +totest_de['transitions'] = ((SmartQuotes,), [ +["""\ +German "smart quotes" and 'single smart quotes'. + +.. class:: language-en-UK + +English "smart quotes" and 'single smart quotes'. +""", +u"""\ + + + German „smart quotes“ and ‚single smart quotes‘. + + English “smart quotes” and ‘single smart quotes’. +"""], ]) +totest_de_alt['transitions'] = ((SmartQuotes,), [ +["""\ +Alternative German "smart quotes" and 'single smart quotes'. + +.. class:: language-en-UK + +English "smart quotes" and 'single smart quotes' have no alternative. + +.. class:: language-ro + +Alternative Romanian "smart quotes" and 'single' smart quotes. +""", +u"""\ + + + Alternative German »smart quotes« and ›single smart quotes‹. + + English “smart quotes” and ‘single smart quotes’ have no alternative. + + Alternative Romanian «smart quotes» and „single” smart quotes. +"""], +]) if __name__ == '__main__': import unittest -- 2.11.4.GIT