python/lib/django_1_4/django/utils/translation/trans_real.py

   1 """Translation helper functions."""
   2
   3 import locale
   4 import os
   5 import re
   6 import sys
   7 import gettext as gettext_module
   8 from threading import local
   9
  10 try:
  11     from cStringIO import StringIO
  12 except ImportError:
  13     from StringIO import StringIO
  14
  15 from django.utils.importlib import import_module
  16 from django.utils.safestring import mark_safe, SafeData
  17
  18
  19 # Translations are cached in a dictionary for every language+app tuple.
  20 # The active translations are stored by threadid to make them thread local.
  21 _translations = {}
  22 _active = local()
  23
  24 # The default translation is based on the settings file.
  25 _default = None
  26
  27 # This is a cache for normalized accept-header languages to prevent multiple
  28 # file lookups when checking the same locale on repeated requests.
  29 _accepted = {}
  30
  31 # magic gettext number to separate context from message
  32 CONTEXT_SEPARATOR = u"\x04"
  33
  34 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
  35 accept_language_re = re.compile(r'''
  36         ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*)         # "en", "en-au", "x-y-z", "*"
  37         (?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:.0{,3})?))?   # Optional "q=1.00", "q=0.8"
  38         (?:\s*,\s*|$)                                 # Multiple accepts per header.
  39         ''', re.VERBOSE)
  40
  41 language_code_prefix_re = re.compile(r'^/([\w-]+)(/|$)')
  42
  43 def to_locale(language, to_lower=False):
  44     """
  45     Turns a language name (en-us) into a locale name (en_US). If 'to_lower' is
  46     True, the last component is lower-cased (en_us).
  47     """
  48     p = language.find('-')
  49     if p >= 0:
  50         if to_lower:
  51             return language[:p].lower()+'_'+language[p+1:].lower()
  52         else:
  53             # Get correct locale for sr-latn
  54             if len(language[p+1:]) > 2:
  55                 return language[:p].lower()+'_'+language[p+1].upper()+language[p+2:].lower()
  56             return language[:p].lower()+'_'+language[p+1:].upper()
  57     else:
  58         return language.lower()
  59
  60 def to_language(locale):
  61     """Turns a locale name (en_US) into a language name (en-us)."""
  62     p = locale.find('_')
  63     if p >= 0:
  64         return locale[:p].lower()+'-'+locale[p+1:].lower()
  65     else:
  66         return locale.lower()
  67
  68 class DjangoTranslation(gettext_module.GNUTranslations):
  69     """
  70     This class sets up the GNUTranslations context with regard to output
  71     charset.
  72     """
  73     def __init__(self, *args, **kw):
  74         gettext_module.GNUTranslations.__init__(self, *args, **kw)
  75         self.set_output_charset('utf-8')
  76         self.django_output_charset = 'utf-8'
  77         self.__language = '??'
  78
  79     def merge(self, other):
  80         self._catalog.update(other._catalog)
  81
  82     def set_language(self, language):
  83         self.__language = language
  84         self.__to_language = to_language(language)
  85
  86     def language(self):
  87         return self.__language
  88
  89     def to_language(self):
  90         return self.__to_language
  91
  92     def __repr__(self):
  93         return "<DjangoTranslation lang:%s>" % self.__language
  94
  95 def translation(language):
  96     """
  97     Returns a translation object.
  98
  99     This translation object will be constructed out of multiple GNUTranslations
 100     objects by merging their catalogs. It will construct a object for the
 101     requested language and add a fallback to the default language, if it's
 102     different from the requested language.
 103     """
 104     global _translations
 105
 106     t = _translations.get(language, None)
 107     if t is not None:
 108         return t
 109
 110     from django.conf import settings
 111
 112     globalpath = os.path.join(os.path.dirname(sys.modules[settings.__module__].__file__), 'locale')
 113
 114     if settings.SETTINGS_MODULE is not None:
 115         parts = settings.SETTINGS_MODULE.split('.')
 116         project = import_module(parts[0])
 117         projectpath = os.path.join(os.path.dirname(project.__file__), 'locale')
 118     else:
 119         projectpath = None
 120
 121     def _fetch(lang, fallback=None):
 122
 123         global _translations
 124
 125         res = _translations.get(lang, None)
 126         if res is not None:
 127             return res
 128
 129         loc = to_locale(lang)
 130
 131         def _translation(path):
 132             try:
 133                 t = gettext_module.translation('django', path, [loc], DjangoTranslation)
 134                 t.set_language(lang)
 135                 return t
 136             except IOError:
 137                 return None
 138
 139         res = _translation(globalpath)
 140
 141         # We want to ensure that, for example,  "en-gb" and "en-us" don't share
 142         # the same translation object (thus, merging en-us with a local update
 143         # doesn't affect en-gb), even though they will both use the core "en"
 144         # translation. So we have to subvert Python's internal gettext caching.
 145         base_lang = lambda x: x.split('-', 1)[0]
 146         if base_lang(lang) in [base_lang(trans) for trans in _translations]:
 147             res._info = res._info.copy()
 148             res._catalog = res._catalog.copy()
 149
 150         def _merge(path):
 151             t = _translation(path)
 152             if t is not None:
 153                 if res is None:
 154                     return t
 155                 else:
 156                     res.merge(t)
 157             return res
 158
 159         for appname in reversed(settings.INSTALLED_APPS):
 160             app = import_module(appname)
 161             apppath = os.path.join(os.path.dirname(app.__file__), 'locale')
 162
 163             if os.path.isdir(apppath):
 164                 res = _merge(apppath)
 165
 166         localepaths = [os.path.normpath(path) for path in settings.LOCALE_PATHS]
 167         if (projectpath and os.path.isdir(projectpath) and
 168                 os.path.normpath(projectpath) not in localepaths):
 169             res = _merge(projectpath)
 170
 171         for localepath in reversed(settings.LOCALE_PATHS):
 172             if os.path.isdir(localepath):
 173                 res = _merge(localepath)
 174
 175         if res is None:
 176             if fallback is not None:
 177                 res = fallback
 178             else:
 179                 return gettext_module.NullTranslations()
 180         _translations[lang] = res
 181         return res
 182
 183     default_translation = _fetch(settings.LANGUAGE_CODE)
 184     current_translation = _fetch(language, fallback=default_translation)
 185
 186     return current_translation
 187
 188 def activate(language):
 189     """
 190     Fetches the translation object for a given tuple of application name and
 191     language and installs it as the current translation object for the current
 192     thread.
 193     """
 194     _active.value = translation(language)
 195
 196 def deactivate():
 197     """
 198     Deinstalls the currently active translation object so that further _ calls
 199     will resolve against the default translation object, again.
 200     """
 201     if hasattr(_active, "value"):
 202         del _active.value
 203
 204 def deactivate_all():
 205     """
 206     Makes the active translation object a NullTranslations() instance. This is
 207     useful when we want delayed translations to appear as the original string
 208     for some reason.
 209     """
 210     _active.value = gettext_module.NullTranslations()
 211
 212 def get_language():
 213     """Returns the currently selected language."""
 214     t = getattr(_active, "value", None)
 215     if t is not None:
 216         try:
 217             return t.to_language()
 218         except AttributeError:
 219             pass
 220     # If we don't have a real translation object, assume it's the default language.
 221     from django.conf import settings
 222     return settings.LANGUAGE_CODE
 223
 224 def get_language_bidi():
 225     """
 226     Returns selected language's BiDi layout.
 227
 228     * False = left-to-right layout
 229     * True = right-to-left layout
 230     """
 231     from django.conf import settings
 232
 233     base_lang = get_language().split('-')[0]
 234     return base_lang in settings.LANGUAGES_BIDI
 235
 236 def catalog():
 237     """
 238     Returns the current active catalog for further processing.
 239     This can be used if you need to modify the catalog or want to access the
 240     whole message catalog instead of just translating one string.
 241     """
 242     global _default
 243
 244     t = getattr(_active, "value", None)
 245     if t is not None:
 246         return t
 247     if _default is None:
 248         from django.conf import settings
 249         _default = translation(settings.LANGUAGE_CODE)
 250     return _default
 251
 252 def do_translate(message, translation_function):
 253     """
 254     Translates 'message' using the given 'translation_function' name -- which
 255     will be either gettext or ugettext. It uses the current thread to find the
 256     translation object to use. If no current translation is activated, the
 257     message will be run through the default translation object.
 258     """
 259     global _default
 260
 261     eol_message = message.replace('\r\n', '\n').replace('\r', '\n')
 262     t = getattr(_active, "value", None)
 263     if t is not None:
 264         result = getattr(t, translation_function)(eol_message)
 265     else:
 266         if _default is None:
 267             from django.conf import settings
 268             _default = translation(settings.LANGUAGE_CODE)
 269         result = getattr(_default, translation_function)(eol_message)
 270     if isinstance(message, SafeData):
 271         return mark_safe(result)
 272     return result
 273
 274 def gettext(message):
 275     return do_translate(message, 'gettext')
 276
 277 def ugettext(message):
 278     return do_translate(message, 'ugettext')
 279
 280 def pgettext(context, message):
 281     result = do_translate(
 282         u"%s%s%s" % (context, CONTEXT_SEPARATOR, message), 'ugettext')
 283     if CONTEXT_SEPARATOR in result:
 284         # Translation not found
 285         result = message
 286     return result
 287
 288 def gettext_noop(message):
 289     """
 290     Marks strings for translation but doesn't translate them now. This can be
 291     used to store strings in global variables that should stay in the base
 292     language (because they might be used externally) and will be translated
 293     later.
 294     """
 295     return message
 296
 297 def do_ntranslate(singular, plural, number, translation_function):
 298     global _default
 299
 300     t = getattr(_active, "value", None)
 301     if t is not None:
 302         return getattr(t, translation_function)(singular, plural, number)
 303     if _default is None:
 304         from django.conf import settings
 305         _default = translation(settings.LANGUAGE_CODE)
 306     return getattr(_default, translation_function)(singular, plural, number)
 307
 308 def ngettext(singular, plural, number):
 309     """
 310     Returns a UTF-8 bytestring of the translation of either the singular or
 311     plural, based on the number.
 312     """
 313     return do_ntranslate(singular, plural, number, 'ngettext')
 314
 315 def ungettext(singular, plural, number):
 316     """
 317     Returns a unicode strings of the translation of either the singular or
 318     plural, based on the number.
 319     """
 320     return do_ntranslate(singular, plural, number, 'ungettext')
 321
 322 def npgettext(context, singular, plural, number):
 323     result = do_ntranslate(u"%s%s%s" % (context, CONTEXT_SEPARATOR, singular),
 324                            u"%s%s%s" % (context, CONTEXT_SEPARATOR, plural),
 325                            number, 'ungettext')
 326     if CONTEXT_SEPARATOR in result:
 327         # Translation not found
 328         result = do_ntranslate(singular, plural, number, 'ungettext')
 329     return result
 330
 331 def all_locale_paths():
 332     """
 333     Returns a list of paths to user-provides languages files.
 334     """
 335     from django.conf import settings
 336     globalpath = os.path.join(
 337         os.path.dirname(sys.modules[settings.__module__].__file__), 'locale')
 338     return [globalpath] + list(settings.LOCALE_PATHS)
 339
 340 def check_for_language(lang_code):
 341     """
 342     Checks whether there is a global language file for the given language
 343     code. This is used to decide whether a user-provided language is
 344     available. This is only used for language codes from either the cookies
 345     or session and during format localization.
 346     """
 347     for path in all_locale_paths():
 348         if gettext_module.find('django', path, [to_locale(lang_code)]) is not None:
 349             return True
 350     return False
 351
 352 def get_language_from_path(path, supported=None):
 353     """
 354     Returns the language-code if there is a valid language-code
 355     found in the `path`.
 356     """
 357     if supported is None:
 358         from django.conf import settings
 359         supported = dict(settings.LANGUAGES)
 360     regex_match = language_code_prefix_re.match(path)
 361     if regex_match:
 362         lang_code = regex_match.group(1)
 363         if lang_code in supported and check_for_language(lang_code):
 364             return lang_code
 365
 366 def get_language_from_request(request, check_path=False):
 367     """
 368     Analyzes the request to find what language the user wants the system to
 369     show. Only languages listed in settings.LANGUAGES are taken into account.
 370     If the user requests a sublanguage where we have a main language, we send
 371     out the main language.
 372
 373     If check_path is True, the URL path prefix will be checked for a language
 374     code, otherwise this is skipped for backwards compatibility.
 375     """
 376     global _accepted
 377     from django.conf import settings
 378     supported = dict(settings.LANGUAGES)
 379
 380     if check_path:
 381         lang_code = get_language_from_path(request.path_info, supported)
 382         if lang_code is not None:
 383             return lang_code
 384
 385     if hasattr(request, 'session'):
 386         lang_code = request.session.get('django_language', None)
 387         if lang_code in supported and lang_code is not None and check_for_language(lang_code):
 388             return lang_code
 389
 390     lang_code = request.COOKIES.get(settings.LANGUAGE_COOKIE_NAME)
 391
 392     if lang_code and lang_code not in supported:
 393         lang_code = lang_code.split('-')[0] # e.g. if fr-ca is not supported fallback to fr
 394
 395     if lang_code and lang_code in supported and check_for_language(lang_code):
 396         return lang_code
 397
 398     accept = request.META.get('HTTP_ACCEPT_LANGUAGE', '')
 399     for accept_lang, unused in parse_accept_lang_header(accept):
 400         if accept_lang == '*':
 401             break
 402
 403         # We have a very restricted form for our language files (no encoding
 404         # specifier, since they all must be UTF-8 and only one possible
 405         # language each time. So we avoid the overhead of gettext.find() and
 406         # work out the MO file manually.
 407
 408         # 'normalized' is the root name of the locale in POSIX format (which is
 409         # the format used for the directories holding the MO files).
 410         normalized = locale.locale_alias.get(to_locale(accept_lang, True))
 411         if not normalized:
 412             continue
 413         # Remove the default encoding from locale_alias.
 414         normalized = normalized.split('.')[0]
 415
 416         if normalized in _accepted:
 417             # We've seen this locale before and have an MO file for it, so no
 418             # need to check again.
 419             return _accepted[normalized]
 420
 421         for lang, dirname in ((accept_lang, normalized),
 422                 (accept_lang.split('-')[0], normalized.split('_')[0])):
 423             if lang.lower() not in supported:
 424                 continue
 425             for path in all_locale_paths():
 426                 if os.path.exists(os.path.join(path, dirname, 'LC_MESSAGES', 'django.mo')):
 427                     _accepted[normalized] = lang
 428                     return lang
 429
 430     return settings.LANGUAGE_CODE
 431
 432 dot_re = re.compile(r'\S')
 433 def blankout(src, char):
 434     """
 435     Changes every non-whitespace character to the given char.
 436     Used in the templatize function.
 437     """
 438     return dot_re.sub(char, src)
 439
 440 context_re = re.compile(r"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
 441 inline_re = re.compile(r"""^\s*trans\s+((?:"[^"]*?")|(?:'[^']*?'))(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*""")
 442 block_re = re.compile(r"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""")
 443 endblock_re = re.compile(r"""^\s*endblocktrans$""")
 444 plural_re = re.compile(r"""^\s*plural$""")
 445 constant_re = re.compile(r"""_\(((?:".*?")|(?:'.*?'))\)""")
 446 one_percent_re = re.compile(r"""(?<!%)%(?!%)""")
 447
 448
 449 def templatize(src, origin=None):
 450     """
 451     Turns a Django template into something that is understood by xgettext. It
 452     does so by translating the Django translation tags into standard gettext
 453     function invocations.
 454     """
 455     from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
 456             TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
 457     out = StringIO()
 458     message_context = None
 459     intrans = False
 460     inplural = False
 461     singular = []
 462     plural = []
 463     incomment = False
 464     comment = []
 465     for t in Lexer(src, origin).tokenize():
 466         if incomment:
 467             if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
 468                 content = ''.join(comment)
 469                 translators_comment_start = None
 470                 for lineno, line in enumerate(content.splitlines(True)):
 471                     if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
 472                         translators_comment_start = lineno
 473                 for lineno, line in enumerate(content.splitlines(True)):
 474                     if translators_comment_start is not None and lineno >= translators_comment_start:
 475                         out.write(' # %s' % line)
 476                     else:
 477                         out.write(' #\n')
 478                 incomment = False
 479                 comment = []
 480             else:
 481                 comment.append(t.contents)
 482         elif intrans:
 483             if t.token_type == TOKEN_BLOCK:
 484                 endbmatch = endblock_re.match(t.contents)
 485                 pluralmatch = plural_re.match(t.contents)
 486                 if endbmatch:
 487                     if inplural:
 488                         if message_context:
 489                             out.write(' npgettext(%r, %r, %r,count) ' % (message_context, ''.join(singular), ''.join(plural)))
 490                         else:
 491                             out.write(' ngettext(%r, %r, count) ' % (''.join(singular), ''.join(plural)))
 492                         for part in singular:
 493                             out.write(blankout(part, 'S'))
 494                         for part in plural:
 495                             out.write(blankout(part, 'P'))
 496                     else:
 497                         if message_context:
 498                             out.write(' pgettext(%r, %r) ' % (message_context, ''.join(singular)))
 499                         else:
 500                             out.write(' gettext(%r) ' % ''.join(singular))
 501                         for part in singular:
 502                             out.write(blankout(part, 'S'))
 503                     message_context = None
 504                     intrans = False
 505                     inplural = False
 506                     singular = []
 507                     plural = []
 508                 elif pluralmatch:
 509                     inplural = True
 510                 else:
 511                     filemsg = ''
 512                     if origin:
 513                         filemsg = 'file %s, ' % origin
 514                     raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno))
 515             elif t.token_type == TOKEN_VAR:
 516                 if inplural:
 517                     plural.append('%%(%s)s' % t.contents)
 518                 else:
 519                     singular.append('%%(%s)s' % t.contents)
 520             elif t.token_type == TOKEN_TEXT:
 521                 contents = one_percent_re.sub('%%', t.contents)
 522                 if inplural:
 523                     plural.append(contents)
 524                 else:
 525                     singular.append(contents)
 526         else:
 527             if t.token_type == TOKEN_BLOCK:
 528                 imatch = inline_re.match(t.contents)
 529                 bmatch = block_re.match(t.contents)
 530                 cmatches = constant_re.findall(t.contents)
 531                 if imatch:
 532                     g = imatch.group(1)
 533                     if g[0] == '"':
 534                         g = g.strip('"')
 535                     elif g[0] == "'":
 536                         g = g.strip("'")
 537                     g = one_percent_re.sub('%%', g)
 538                     if imatch.group(2):
 539                         # A context is provided
 540                         context_match = context_re.match(imatch.group(2))
 541                         message_context = context_match.group(1)
 542                         if message_context[0] == '"':
 543                             message_context = message_context.strip('"')
 544                         elif message_context[0] == "'":
 545                             message_context = message_context.strip("'")
 546                         out.write(' pgettext(%r, %r) ' % (message_context, g))
 547                         message_context = None
 548                     else:
 549                         out.write(' gettext(%r) ' % g)
 550                 elif bmatch:
 551                     for fmatch in constant_re.findall(t.contents):
 552                         out.write(' _(%s) ' % fmatch)
 553                     if bmatch.group(1):
 554                         # A context is provided
 555                         context_match = context_re.match(bmatch.group(1))
 556                         message_context = context_match.group(1)
 557                         if message_context[0] == '"':
 558                             message_context = message_context.strip('"')
 559                         elif message_context[0] == "'":
 560                             message_context = message_context.strip("'")
 561                     intrans = True
 562                     inplural = False
 563                     singular = []
 564                     plural = []
 565                 elif cmatches:
 566                     for cmatch in cmatches:
 567                         out.write(' _(%s) ' % cmatch)
 568                 elif t.contents == 'comment':
 569                     incomment = True
 570                 else:
 571                     out.write(blankout(t.contents, 'B'))
 572             elif t.token_type == TOKEN_VAR:
 573                 parts = t.contents.split('|')
 574                 cmatch = constant_re.match(parts[0])
 575                 if cmatch:
 576                     out.write(' _(%s) ' % cmatch.group(1))
 577                 for p in parts[1:]:
 578                     if p.find(':_(') >= 0:
 579                         out.write(' %s ' % p.split(':',1)[1])
 580                     else:
 581                         out.write(blankout(p, 'F'))
 582             elif t.token_type == TOKEN_COMMENT:
 583                 out.write(' # %s' % t.contents)
 584             else:
 585                 out.write(blankout(t.contents, 'X'))
 586     return out.getvalue()
 587
 588 def parse_accept_lang_header(lang_string):
 589     """
 590     Parses the lang_string, which is the body of an HTTP Accept-Language
 591     header, and returns a list of (lang, q-value), ordered by 'q' values.
 592
 593     Any format errors in lang_string results in an empty list being returned.
 594     """
 595     result = []
 596     pieces = accept_language_re.split(lang_string)
 597     if pieces[-1]:
 598         return []
 599     for i in range(0, len(pieces) - 1, 3):
 600         first, lang, priority = pieces[i : i + 3]
 601         if first:
 602             return []
 603         priority = priority and float(priority) or 1.0
 604         result.append((lang, priority))
 605     result.sort(key=lambda k: k[1], reverse=True)
 606     return result