From 2636dddfa65e6d1e7601aee4d1e8d7d47f7f13a7 Mon Sep 17 00:00:00 2001 From: Boris Bobrov Date: Tue, 11 Mar 2014 15:50:01 +0500 Subject: [PATCH] added new slugify with unidecode --- mediagoblin/tests/test_util.py | 6 ++++++ mediagoblin/tools/url.py | 18 ++---------------- setup.py | 1 + 3 files changed, 9 insertions(+), 16 deletions(-) diff --git a/mediagoblin/tests/test_util.py b/mediagoblin/tests/test_util.py index bc14f528..9d9b1c16 100644 --- a/mediagoblin/tests/test_util.py +++ b/mediagoblin/tests/test_util.py @@ -77,6 +77,12 @@ def test_slugify(): assert url.slugify(u'a w@lk in the park?') == u'a-w-lk-in-the-park' assert url.slugify(u'a walk in the par\u0107') == u'a-walk-in-the-parc' assert url.slugify(u'\u00E0\u0042\u00E7\u010F\u00EB\u0066') == u'abcdef' + # Russian + assert url.slugify(u'\u043f\u0440\u043e\u0433\u0443\u043b\u043a\u0430 ' + u'\u0432 \u043f\u0430\u0440\u043a\u0435') == u'progulka-v-parke' + # Korean + assert (url.slugify(u'\uacf5\uc6d0\uc5d0\uc11c \uc0b0\ucc45') == + u'gongweoneseo-sancaeg') def test_locale_to_lower_upper(): """ diff --git a/mediagoblin/tools/url.py b/mediagoblin/tools/url.py index d9179f9e..657c0373 100644 --- a/mediagoblin/tools/url.py +++ b/mediagoblin/tools/url.py @@ -15,15 +15,7 @@ # along with this program. If not, see . import re -# This import *is* used; see word.encode('tranlit/long') below. -from unicodedata import normalize - -try: - import translitcodec - USING_TRANSLITCODEC = True -except ImportError: - USING_TRANSLITCODEC = False - +from unidecode import unidecode _punct_re = re.compile(r'[\t !"#:$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') @@ -34,11 +26,5 @@ def slugify(text, delim=u'-'): """ result = [] for word in _punct_re.split(text.lower()): - if USING_TRANSLITCODEC: - word = word.encode('translit/long') - else: - word = normalize('NFKD', word).encode('ascii', 'ignore') - - if word: - result.append(word) + result.extend(unidecode(word).split()) return unicode(delim.join(result)) diff --git a/setup.py b/setup.py index 7abd896c..a3cc055c 100644 --- a/setup.py +++ b/setup.py @@ -65,6 +65,7 @@ try: 'pytz', 'six', 'oauthlib==0.5.0', + 'unidecode', ## Annoying. Please remove once we can! We only indirectly ## use pbr, and currently it breaks things, presumably till -- 2.11.4.GIT