From 1111dc8aa18f812eba42d6208760835c7ae8c2e6 Mon Sep 17 00:00:00 2001 From: milde Date: Fri, 23 Nov 2012 01:18:49 +0000 Subject: [PATCH] normalize_language_tag() now returns `BCP 47`_ conformant tags Subtags separated by ``-``, not ``_``. git-svn-id: https://docutils.svn.sourceforge.net/svnroot/docutils/trunk/docutils@7538 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- HISTORY.txt | 5 ++++ docutils/languages/__init__.py | 1 + docutils/parsers/rst/languages/__init__.py | 1 + docutils/utils/__init__.py | 19 +++++++------- docutils/writers/latex2e/__init__.py | 41 +++++++++++++++++------------- docutils/writers/xetex/__init__.py | 27 +++++++++++--------- test/test_utils.py | 11 +++++--- 7 files changed, 63 insertions(+), 42 deletions(-) diff --git a/HISTORY.txt b/HISTORY.txt index 20730689c..545d7a542 100644 --- a/HISTORY.txt +++ b/HISTORY.txt @@ -43,6 +43,11 @@ Changes Since 0.9.1 - Add SmartQuotes transform for typographic quotes and dashes. +* docutils/utils/__init__.py + + - normalize_language_tag() now returns `BCP 47`_ conformant tags + with subtags separated by ``-``. + * docutils/writers/html4css1/__init__.py - Use ```` tag for inline "code", diff --git a/docutils/languages/__init__.py b/docutils/languages/__init__.py index c3d6c05a6..47d896851 100644 --- a/docutils/languages/__init__.py +++ b/docutils/languages/__init__.py @@ -27,6 +27,7 @@ def get_language(language_code, reporter=None): """ # TODO: use a dummy module returning emtpy strings?, configurable? for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: diff --git a/docutils/parsers/rst/languages/__init__.py b/docutils/parsers/rst/languages/__init__.py index 9730efcd0..c52989a4d 100644 --- a/docutils/parsers/rst/languages/__init__.py +++ b/docutils/parsers/rst/languages/__init__.py @@ -22,6 +22,7 @@ _languages = {} def get_language(language_code): for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: diff --git a/docutils/utils/__init__.py b/docutils/utils/__init__.py index ac2ad6edf..1aead4884 100644 --- a/docutils/utils/__init__.py +++ b/docutils/utils/__init__.py @@ -12,6 +12,7 @@ __docformat__ = 'reStructuredText' import sys import os import os.path +import re import warnings import unicodedata from docutils import ApplicationError, DataError @@ -642,20 +643,20 @@ def normalize_language_tag(tag): Example: - >>> normalize_language_tag('de-AT-1901') - ['de_at_1901', 'de_at', 'de_1901', 'de'] + >>> normalize_language_tag('de_AT-1901') + ['de-at-1901', 'de-at', 'de-1901', 'de'] """ # normalize: - tag = tag.lower().replace('-','_') - # find all combinations of subtags + tag = tag.lower().replace('_','-') + # split (except singletons, which mark the following tag as non-standard): + tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag) taglist = [] - base_tag= tag.split('_')[:1] - subtags = tag.split('_')[1:] - # print base_tag, subtags + subtags = [subtag.replace('_', '-') for subtag in tag.split('-')] + base_tag = [subtags.pop(0)] + # find all combinations of subtags for n in range(len(subtags), 0, -1): for tags in unique_combinations(subtags, n): - # print tags - taglist.append('_'.join(base_tag + tags)) + taglist.append('-'.join(base_tag+tags)) taglist += base_tag return taglist diff --git a/docutils/writers/latex2e/__init__.py b/docutils/writers/latex2e/__init__.py index a01a9c4d0..864cc9e13 100644 --- a/docutils/writers/latex2e/__init__.py +++ b/docutils/writers/latex2e/__init__.py @@ -293,18 +293,18 @@ class Babel(object): 'cy': 'welsh', 'da': 'danish', 'de': 'ngerman', # new spelling (de_1996) - 'de_1901': 'german', # old spelling - 'de_at': 'naustrian', - 'de_at_1901': 'austrian', + 'de-1901': 'german', # old spelling + 'de-AT': 'naustrian', + 'de-AT-1901': 'austrian', 'dsb': 'lowersorbian', 'el': 'greek', # monotonic (el-monoton) - 'el_polyton': 'polutonikogreek', + 'el-polyton': 'polutonikogreek', 'en': 'english', # TeX' default language - 'en_au': 'australian', - 'en_ca': 'canadian', - 'en_gb': 'british', - 'en_nz': 'newzealand', - 'en_us': 'american', + 'en-AU': 'australian', + 'en-CA': 'canadian', + 'en-GB': 'british', + 'en-NZ': 'newzealand', + 'en-US': 'american', 'eo': 'esperanto', # '^' is active 'es': 'spanish', 'et': 'estonian', @@ -312,10 +312,10 @@ class Babel(object): # 'fa': 'farsi', 'fi': 'finnish', 'fr': 'french', - 'fr_ca': 'canadien', + 'fr-CA': 'canadien', 'ga': 'irish', # Irish Gaelic # 'grc': # Ancient Greek - 'grc_ibycus': 'ibycus', # Ibycus encoding + 'grc-ibycus': 'ibycus', # Ibycus encoding 'gl': 'galician', 'he': 'hebrew', 'hr': 'croatian', @@ -338,24 +338,27 @@ class Babel(object): 'no': 'norsk', # Norwegian Bokmal 'pl': 'polish', 'pt': 'portuges', - 'pt_br': 'brazil', + 'pt-BR': 'brazil', 'ro': 'romanian', 'ru': 'russian', # '"' is active 'se': 'samin', # North Sami - # sh-cyrl: Serbo-Croatian, Cyrillic script - 'sh-latn': 'serbian', # Serbo-Croatian, Latin script + # sh-Cyrl: Serbo-Croatian, Cyrillic script + 'sh-Latn': 'serbian', # Serbo-Croatian, Latin script 'sk': 'slovak', 'sl': 'slovene', 'sq': 'albanian', - # 'sr-cyrl': Serbian, Cyrillic script (sr-cyrl) - 'sr-latn': 'serbian', # Serbian, Latin script, " active. + # 'sr-Cyrl': Serbian, Cyrillic script (sr-cyrl) + 'sr-Latn': 'serbian', # Serbian, Latin script, " active. 'sv': 'swedish', # 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'vi': 'vietnam', - # zh-latn: Chinese Pinyin + # zh-Latn: Chinese Pinyin } + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + warn_msg = 'Language "%s" not supported by LaTeX (babel)' def __init__(self, language_code, reporter=None): @@ -1595,8 +1598,12 @@ class LaTeXTranslator(nodes.NodeVisitor): self.out.append( '%\n\\begin{list}{}{}\n' ) else: self.out.append( '%\n\\begin{itemize}\n' ) + # if node['classes']: + # self.visit_inline(node) def depart_bullet_list(self, node): + # if node['classes']: + # self.depart_inline(node) if self.is_toc_list: self.out.append( '\n\\end{list}\n' ) else: diff --git a/docutils/writers/xetex/__init__.py b/docutils/writers/xetex/__init__.py index 8c5016e2f..150cb49ce 100644 --- a/docutils/writers/xetex/__init__.py +++ b/docutils/writers/xetex/__init__.py @@ -76,30 +76,33 @@ class Babel(latex2e.Babel): # code Polyglossia-name comment 'cop': 'coptic', 'de': 'german', # new spelling (de_1996) - 'de_1901': 'ogerman', # old spelling + 'de-1901': 'ogerman', # old spelling 'dv': 'divehi', # Maldivian 'dsb': 'lsorbian', - 'el_polyton': 'polygreek', + 'el-polyton': 'polygreek', 'fa': 'farsi', 'grc': 'ancientgreek', 'hsb': 'usorbian', - 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script - 'sh-latn': 'croatian', # Serbo-Croatian, Latin script + 'sh-Cyrl': 'serbian', # Serbo-Croatian, Cyrillic script + 'sh-Latn': 'croatian', # Serbo-Croatian, Latin script 'sq': 'albanian', - 'sr': 'serbian', # Cyrillic script (sr-cyrl) + 'sr': 'serbian', # Cyrillic script (sr-Cyrl) 'th': 'thai', 'vi': 'vietnamese', - # zh-latn: ??? # Chinese Pinyin + # zh-Latn: ??? # Chinese Pinyin }) + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + # Languages without Polyglossia support: for key in ('af', # 'afrikaans', - 'de_at', # 'naustrian', - 'de_at_1901', # 'austrian', - 'fr_ca', # 'canadien', - 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding) - 'sr-latn', # 'serbian script=latin' + 'de-AT', # 'naustrian', + 'de-AT-1901', # 'austrian', + 'fr-CA', # 'canadien', + 'grc-ibycus', # 'ibycus', (Greek Ibycus encoding) + 'sr-Latn', # 'serbian script=latin' ): - del(language_codes[key]) + del(language_codes[key.lower()]) def __init__(self, language_code, reporter): self.language_code = language_code diff --git a/test/test_utils.py b/test/test_utils.py index de5154ca2..dec123ee4 100755 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -240,12 +240,15 @@ class HelperFunctionsTests(unittest.TestCase): def test_normalize_language_tag(self): self.assertEqual(utils.normalize_language_tag('de'), ['de']) self.assertEqual(utils.normalize_language_tag('de-AT'), - ['de_at', 'de']) + ['de-at', 'de']) self.assertEqual(utils.normalize_language_tag('de-AT-1901'), - ['de_at_1901', 'de_at', 'de_1901', 'de']) + ['de-at-1901', 'de-at', 'de-1901', 'de']) self.assertEqual(utils.normalize_language_tag('de-AT-1901-frak'), - ['de_at_1901_frak', 'de_at_1901', 'de_at_frak', - 'de_1901_frak', 'de_at', 'de_1901', 'de_frak', 'de']) + ['de-at-1901-frak', 'de-at-1901', 'de-at-frak', + 'de-1901-frak', 'de-at', 'de-1901', 'de-frak', 'de']) + self.assertEqual(utils.normalize_language_tag('grc-ibycus-x-altquot'), + ['grc-ibycus-x-altquot', 'grc-ibycus', + 'grc-x-altquot', 'grc']) def test_column_width(self): self.assertEqual(utils.column_width(u'de'), 2) -- 2.11.4.GIT