From e9ba8d85312bc439b4aa345eda3fdd7f55e791ea Mon Sep 17 00:00:00 2001 From: milde Date: Fri, 31 Mar 2017 12:13:54 +0000 Subject: [PATCH] Update smartquotes: MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit * use the rules of the `Imprimerie nationale` as french default (full NBSP inside guillemets). * do not invert “ and ’ in en-UK: expect British authors to use u0027 APOSTROPHE for primary quotes and " for secondary quotes in the source. Set ``--smart-quotes=alt`` (or use en-UK-x-altquot) for inversion by `smartquotes`. * do not call ``educate_backticks`` in the SmartQuotes transform: backticks have a special meaning in rST. git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@8053 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- HISTORY.txt | 4 ++-- docutils/transforms/universal.py | 2 +- docutils/utils/smartquotes.py | 40 ++++++++++++++++---------------- test/test_transforms/test_smartquotes.py | 12 ++++++---- 4 files changed, 30 insertions(+), 28 deletions(-) diff --git a/HISTORY.txt b/HISTORY.txt index ff725ba67..02f41fd23 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -56,8 +56,8 @@ Changes Since 0.13.1 * docutils/utils/smartquotes.py: - - Update quote definitions for languages et, fi, ro, sv, tr, uk. - - New quote definitions for hr, hsb, hu, lv, sl. + - Update quote definitions for languages et, fi, fr, ro, sv, tr, uk. + - New quote definitions for hr, hsb, hu, lv, sh, sl, sr. - Fix [ 313 ] Differentiate apostrophe from closing single quote (if possible). diff --git a/docutils/transforms/universal.py b/docutils/transforms/universal.py index 10c1e2f31..8a03258ea 100644 --- a/docutils/transforms/universal.py +++ b/docutils/transforms/universal.py @@ -288,7 +288,7 @@ class SmartQuotes(Transform): # Iterator educating quotes in plain text: # '2': set all, using old school en- and em- dash shortcuts teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes), - attr='2', language=lang) + attr='qDe', language=lang) for txtnode, newtext in zip(txtnodes, teacher): txtnode.parent.replace(txtnode, nodes.Text(newtext)) diff --git a/docutils/utils/smartquotes.py b/docutils/utils/smartquotes.py index 0802d9e2a..970ae87d8 100644 --- a/docutils/utils/smartquotes.py +++ b/docutils/utils/smartquotes.py @@ -192,7 +192,7 @@ Numeric values are the easiest way to configure SmartyPants' behavior: "1" Performs default SmartyPants transformations: quotes (including \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) - is used to signify an em-dash; there is no support for en-dashes. + is used to signify an em-dash; there is no support for en-dashes "2" Same as smarty_pants="1", except that it uses the old-school typewriter @@ -217,7 +217,8 @@ individual transformations from within the smarty_pants attribute. For example, to educate normal quotes and em-dashes, but not ellipses or \`\`backticks'' -style quotes: -``py['smartypants_attributes'] = "1"`` +E.g. ``py['smartypants_attributes'] = "1"`` is equivalent to +``py['smartypants_attributes'] = "qBde"``. "q" Educates normal quote characters: (") and ('). @@ -329,11 +330,10 @@ from a single quote by the algorithm. Therefore, a text like:: will get a single closing guillemet instead of an apostrophe. This can be prevented by use use of the curly apostrophe character (’) in -the source: +the source:: - .. class:: language-de-CH - - "Er sagt: 'Ich fass' es nicht.'" → "Er sagt: 'Ich fass’ es nicht.'" + - "Er sagt: 'Ich fass' es nicht.'" + + "Er sagt: 'Ich fass’ es nicht.'" Version History @@ -399,20 +399,14 @@ class smartchars(object): endash = u'–' # "–" EN DASH emdash = u'—' # "—" EM DASH ellipsis = u'…' # "…" HORIZONTAL ELLIPSIS - apostrophe = u'’' + apostrophe = u'’' # "’" RIGHT SINGLE QUOTATION MARK # quote characters (language-specific, set in __init__()) - # - # English smart quotes (open primary, close primary, open secondary, close - # secondary) are: - # opquote = u'“' # "“" LEFT DOUBLE QUOTATION MARK - # cpquote = u'”' # "”" RIGHT DOUBLE QUOTATION MARK - # osquote = u'‘' # "‘" LEFT SINGLE QUOTATION MARK - # csquote = u'’' # "’" RIGHT SINGLE QUOTATION MARK - # For other languages see: # http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks # http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen # https://fr.wikipedia.org/wiki/Guillemet + # http://typographisme.net/post/Les-espaces-typographiques-et-le-web + # http://www.btb.termiumplus.gc.ca/tpv2guides/guides/redac/index-fra.html # https://en.wikipedia.org/wiki/Hebrew_punctuation#Quotation_marks # http://www.tustep.uni-tuebingen.de/bi/bi00/bi001t1-anfuehrung.pdf quotes = {'af': u'“”‘’', @@ -428,7 +422,7 @@ class smartchars(object): 'de-ch': u'«»‹›', 'el': u'«»“”', 'en': u'“”‘’', - 'en-uk': u'‘’“”', + 'en-uk-x-altquot': u'‘’“”', # Attention: " → ‘ and ' → “ ! 'eo': u'“”‘’', 'es': u'«»“”', 'es-x-altquot': u'“”‘’', @@ -437,10 +431,10 @@ class smartchars(object): 'eu': u'«»‹›', 'fi': u'””’’', 'fi-x-altquot': u'»»››', - 'fr': (u'« ', u' »', u'‹ ', u' ›'), # with narrow no-break space - 'fr-x-altquot': u'«»‹›', # for use with manually set spaces - # 'fr-x-altquot2': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used + 'fr': (u'« ', u' »', u'“', u'”'), # full no-break space + 'fr-x-altquot': (u'« ', u' »', u'“', u'”'), # narrow no-break space 'fr-ch': u'«»‹›', + 'fr-ch-x-altquot': (u'« ', u' »', u'‹ ', u' ›'), # narrow no-break space, http://typoguide.ch/ 'gl': u'«»“”', 'he': u'”“»«', 'he-x-altquot': u'„”‚’', @@ -465,8 +459,11 @@ class smartchars(object): 'pt-br': u'“”‘’', 'ro': u'„”«»', 'ru': u'«»„“', + 'sh': u'„”‚’', + 'sh-x-altquot': u'»«›‹', 'sk': u'„“‚‘', 'sk-x-altquot': u'»«›‹', + 'sr': u'„”’’', 'sl': u'„“‚‘', 'sl-x-altquot': u'»«›‹', 'sv': u'””’’', @@ -637,7 +634,8 @@ def educateQuotes(text, language='en'): text = re.sub(r"""'"(?=\w)""", smart.osquote+smart.opquote, text) # Special case for decade abbreviations (the '80s): - text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text) + if language.startswith('en'): # TODO similar cases in other languages? + text = re.sub(r"""'(?=\d{2}s)""", smart.apostrophe, text, re.UNICODE) close_class = r"""[^\ \t\r\n\[\{\(\-]""" dec_dashes = r"""–|—""" @@ -661,6 +659,8 @@ def educateQuotes(text, language='en'): if smart.csquote != smart.apostrophe: apostrophe_regex = re.compile(r"(?<=(\w|\d))'(?=\w)", re.UNICODE) text = apostrophe_regex.sub(smart.apostrophe, text) + # TODO: keep track of quoting level to recognize apostrophe in, e.g., + # "Ich fass' es nicht." closing_single_quotes_regex = re.compile(r""" (%s) diff --git a/test/test_transforms/test_smartquotes.py b/test/test_transforms/test_smartquotes.py index 309a6a987..4e5b4e53d 100644 --- a/test/test_transforms/test_smartquotes.py +++ b/test/test_transforms/test_smartquotes.py @@ -157,9 +157,10 @@ u"""\ German "smart quotes" and 'secondary smart quotes'. -.. class:: language-en-UK +.. class:: language-en-UK-x-altquot -British "quotes" use single and 'secondary quotes' double quote signs. +British "primary quotes" use single and +'secondary quotes' double quote signs. .. class:: language-foo @@ -173,13 +174,14 @@ u"""\ German „smart quotes“ and ‚secondary smart quotes‘. - - British ‘quotes’ use single and “secondary quotes” double quote signs. + + British ‘primary quotes’ use single and + “secondary quotes” double quote signs. "Quoting style" for unknown languages is 'ASCII'. Alternative German »smart quotes« and ›secondary smart quotes‹. - + No smart quotes defined for language "foo". """], -- 2.11.4.GIT