1 """Translation helper functions."""
7 import gettext
as gettext_module
8 from threading
import local
11 from cStringIO
import StringIO
13 from StringIO
import StringIO
15 from django
.utils
.importlib
import import_module
16 from django
.utils
.safestring
import mark_safe
, SafeData
19 # Translations are cached in a dictionary for every language+app tuple.
20 # The active translations are stored by threadid to make them thread local.
24 # The default translation is based on the settings file.
27 # This is a cache for normalized accept-header languages to prevent multiple
28 # file lookups when checking the same locale on repeated requests.
31 # magic gettext number to separate context from message
32 CONTEXT_SEPARATOR
= u
"\x04"
34 # Format of Accept-Language header values. From RFC 2616, section 14.4 and 3.9.
35 accept_language_re
= re
.compile(r
'''
36 ([A-Za-z]{1,8}(?:-[A-Za-z]{1,8})*|\*) # "en", "en-au", "x-y-z", "*"
37 (?:\s*;\s*q=(0(?:\.\d{,3})?|1(?:.0{,3})?))? # Optional "q=1.00", "q=0.8"
38 (?:\s*,\s*|$) # Multiple accepts per header.
41 language_code_prefix_re
= re
.compile(r
'^/([\w-]+)(/|$)')
43 def to_locale(language
, to_lower
=False):
45 Turns a language name (en-us) into a locale name (en_US). If 'to_lower' is
46 True, the last component is lower-cased (en_us).
48 p
= language
.find('-')
51 return language
[:p
].lower()+'_'+language
[p
+1:].lower()
53 # Get correct locale for sr-latn
54 if len(language
[p
+1:]) > 2:
55 return language
[:p
].lower()+'_'+language
[p
+1].upper()+language
[p
+2:].lower()
56 return language
[:p
].lower()+'_'+language
[p
+1:].upper()
58 return language
.lower()
60 def to_language(locale
):
61 """Turns a locale name (en_US) into a language name (en-us)."""
64 return locale
[:p
].lower()+'-'+locale
[p
+1:].lower()
68 class DjangoTranslation(gettext_module
.GNUTranslations
):
70 This class sets up the GNUTranslations context with regard to output
73 def __init__(self
, *args
, **kw
):
74 gettext_module
.GNUTranslations
.__init
__(self
, *args
, **kw
)
75 self
.set_output_charset('utf-8')
76 self
.django_output_charset
= 'utf-8'
77 self
.__language
= '??'
79 def merge(self
, other
):
80 self
._catalog
.update(other
._catalog
)
82 def set_language(self
, language
):
83 self
.__language
= language
84 self
.__to
_language
= to_language(language
)
87 return self
.__language
89 def to_language(self
):
90 return self
.__to
_language
93 return "<DjangoTranslation lang:%s>" % self
.__language
95 def translation(language
):
97 Returns a translation object.
99 This translation object will be constructed out of multiple GNUTranslations
100 objects by merging their catalogs. It will construct a object for the
101 requested language and add a fallback to the default language, if it's
102 different from the requested language.
106 t
= _translations
.get(language
, None)
110 from django
.conf
import settings
112 globalpath
= os
.path
.join(os
.path
.dirname(sys
.modules
[settings
.__module
__].__file
__), 'locale')
114 if settings
.SETTINGS_MODULE
is not None:
115 parts
= settings
.SETTINGS_MODULE
.split('.')
116 project
= import_module(parts
[0])
117 projectpath
= os
.path
.join(os
.path
.dirname(project
.__file
__), 'locale')
121 def _fetch(lang
, fallback
=None):
125 res
= _translations
.get(lang
, None)
129 loc
= to_locale(lang
)
131 def _translation(path
):
133 t
= gettext_module
.translation('django', path
, [loc
], DjangoTranslation
)
139 res
= _translation(globalpath
)
141 # We want to ensure that, for example, "en-gb" and "en-us" don't share
142 # the same translation object (thus, merging en-us with a local update
143 # doesn't affect en-gb), even though they will both use the core "en"
144 # translation. So we have to subvert Python's internal gettext caching.
145 base_lang
= lambda x
: x
.split('-', 1)[0]
146 if base_lang(lang
) in [base_lang(trans
) for trans
in _translations
]:
147 res
._info
= res
._info
.copy()
148 res
._catalog
= res
._catalog
.copy()
151 t
= _translation(path
)
159 for appname
in reversed(settings
.INSTALLED_APPS
):
160 app
= import_module(appname
)
161 apppath
= os
.path
.join(os
.path
.dirname(app
.__file
__), 'locale')
163 if os
.path
.isdir(apppath
):
164 res
= _merge(apppath
)
166 localepaths
= [os
.path
.normpath(path
) for path
in settings
.LOCALE_PATHS
]
167 if (projectpath
and os
.path
.isdir(projectpath
) and
168 os
.path
.normpath(projectpath
) not in localepaths
):
169 res
= _merge(projectpath
)
171 for localepath
in reversed(settings
.LOCALE_PATHS
):
172 if os
.path
.isdir(localepath
):
173 res
= _merge(localepath
)
176 if fallback
is not None:
179 return gettext_module
.NullTranslations()
180 _translations
[lang
] = res
183 default_translation
= _fetch(settings
.LANGUAGE_CODE
)
184 current_translation
= _fetch(language
, fallback
=default_translation
)
186 return current_translation
188 def activate(language
):
190 Fetches the translation object for a given tuple of application name and
191 language and installs it as the current translation object for the current
194 _active
.value
= translation(language
)
198 Deinstalls the currently active translation object so that further _ calls
199 will resolve against the default translation object, again.
201 if hasattr(_active
, "value"):
204 def deactivate_all():
206 Makes the active translation object a NullTranslations() instance. This is
207 useful when we want delayed translations to appear as the original string
210 _active
.value
= gettext_module
.NullTranslations()
213 """Returns the currently selected language."""
214 t
= getattr(_active
, "value", None)
217 return t
.to_language()
218 except AttributeError:
220 # If we don't have a real translation object, assume it's the default language.
221 from django
.conf
import settings
222 return settings
.LANGUAGE_CODE
224 def get_language_bidi():
226 Returns selected language's BiDi layout.
228 * False = left-to-right layout
229 * True = right-to-left layout
231 from django
.conf
import settings
233 base_lang
= get_language().split('-')[0]
234 return base_lang
in settings
.LANGUAGES_BIDI
238 Returns the current active catalog for further processing.
239 This can be used if you need to modify the catalog or want to access the
240 whole message catalog instead of just translating one string.
244 t
= getattr(_active
, "value", None)
248 from django
.conf
import settings
249 _default
= translation(settings
.LANGUAGE_CODE
)
252 def do_translate(message
, translation_function
):
254 Translates 'message' using the given 'translation_function' name -- which
255 will be either gettext or ugettext. It uses the current thread to find the
256 translation object to use. If no current translation is activated, the
257 message will be run through the default translation object.
261 eol_message
= message
.replace('\r\n', '\n').replace('\r', '\n')
262 t
= getattr(_active
, "value", None)
264 result
= getattr(t
, translation_function
)(eol_message
)
267 from django
.conf
import settings
268 _default
= translation(settings
.LANGUAGE_CODE
)
269 result
= getattr(_default
, translation_function
)(eol_message
)
270 if isinstance(message
, SafeData
):
271 return mark_safe(result
)
274 def gettext(message
):
275 return do_translate(message
, 'gettext')
277 def ugettext(message
):
278 return do_translate(message
, 'ugettext')
280 def pgettext(context
, message
):
281 result
= do_translate(
282 u
"%s%s%s" % (context
, CONTEXT_SEPARATOR
, message
), 'ugettext')
283 if CONTEXT_SEPARATOR
in result
:
284 # Translation not found
288 def gettext_noop(message
):
290 Marks strings for translation but doesn't translate them now. This can be
291 used to store strings in global variables that should stay in the base
292 language (because they might be used externally) and will be translated
297 def do_ntranslate(singular
, plural
, number
, translation_function
):
300 t
= getattr(_active
, "value", None)
302 return getattr(t
, translation_function
)(singular
, plural
, number
)
304 from django
.conf
import settings
305 _default
= translation(settings
.LANGUAGE_CODE
)
306 return getattr(_default
, translation_function
)(singular
, plural
, number
)
308 def ngettext(singular
, plural
, number
):
310 Returns a UTF-8 bytestring of the translation of either the singular or
311 plural, based on the number.
313 return do_ntranslate(singular
, plural
, number
, 'ngettext')
315 def ungettext(singular
, plural
, number
):
317 Returns a unicode strings of the translation of either the singular or
318 plural, based on the number.
320 return do_ntranslate(singular
, plural
, number
, 'ungettext')
322 def npgettext(context
, singular
, plural
, number
):
323 result
= do_ntranslate(u
"%s%s%s" % (context
, CONTEXT_SEPARATOR
, singular
),
324 u
"%s%s%s" % (context
, CONTEXT_SEPARATOR
, plural
),
326 if CONTEXT_SEPARATOR
in result
:
327 # Translation not found
328 result
= do_ntranslate(singular
, plural
, number
, 'ungettext')
331 def all_locale_paths():
333 Returns a list of paths to user-provides languages files.
335 from django
.conf
import settings
336 globalpath
= os
.path
.join(
337 os
.path
.dirname(sys
.modules
[settings
.__module
__].__file
__), 'locale')
338 return [globalpath
] + list(settings
.LOCALE_PATHS
)
340 def check_for_language(lang_code
):
342 Checks whether there is a global language file for the given language
343 code. This is used to decide whether a user-provided language is
344 available. This is only used for language codes from either the cookies
345 or session and during format localization.
347 for path
in all_locale_paths():
348 if gettext_module
.find('django', path
, [to_locale(lang_code
)]) is not None:
352 def get_language_from_path(path
, supported
=None):
354 Returns the language-code if there is a valid language-code
357 if supported
is None:
358 from django
.conf
import settings
359 supported
= dict(settings
.LANGUAGES
)
360 regex_match
= language_code_prefix_re
.match(path
)
362 lang_code
= regex_match
.group(1)
363 if lang_code
in supported
and check_for_language(lang_code
):
366 def get_language_from_request(request
, check_path
=False):
368 Analyzes the request to find what language the user wants the system to
369 show. Only languages listed in settings.LANGUAGES are taken into account.
370 If the user requests a sublanguage where we have a main language, we send
371 out the main language.
373 If check_path is True, the URL path prefix will be checked for a language
374 code, otherwise this is skipped for backwards compatibility.
377 from django
.conf
import settings
378 supported
= dict(settings
.LANGUAGES
)
381 lang_code
= get_language_from_path(request
.path_info
, supported
)
382 if lang_code
is not None:
385 if hasattr(request
, 'session'):
386 lang_code
= request
.session
.get('django_language', None)
387 if lang_code
in supported
and lang_code
is not None and check_for_language(lang_code
):
390 lang_code
= request
.COOKIES
.get(settings
.LANGUAGE_COOKIE_NAME
)
392 if lang_code
and lang_code
not in supported
:
393 lang_code
= lang_code
.split('-')[0] # e.g. if fr-ca is not supported fallback to fr
395 if lang_code
and lang_code
in supported
and check_for_language(lang_code
):
398 accept
= request
.META
.get('HTTP_ACCEPT_LANGUAGE', '')
399 for accept_lang
, unused
in parse_accept_lang_header(accept
):
400 if accept_lang
== '*':
403 # We have a very restricted form for our language files (no encoding
404 # specifier, since they all must be UTF-8 and only one possible
405 # language each time. So we avoid the overhead of gettext.find() and
406 # work out the MO file manually.
408 # 'normalized' is the root name of the locale in POSIX format (which is
409 # the format used for the directories holding the MO files).
410 normalized
= locale
.locale_alias
.get(to_locale(accept_lang
, True))
413 # Remove the default encoding from locale_alias.
414 normalized
= normalized
.split('.')[0]
416 if normalized
in _accepted
:
417 # We've seen this locale before and have an MO file for it, so no
418 # need to check again.
419 return _accepted
[normalized
]
421 for lang
, dirname
in ((accept_lang
, normalized
),
422 (accept_lang
.split('-')[0], normalized
.split('_')[0])):
423 if lang
.lower() not in supported
:
425 for path
in all_locale_paths():
426 if os
.path
.exists(os
.path
.join(path
, dirname
, 'LC_MESSAGES', 'django.mo')):
427 _accepted
[normalized
] = lang
430 return settings
.LANGUAGE_CODE
432 dot_re
= re
.compile(r
'\S')
433 def blankout(src
, char
):
435 Changes every non-whitespace character to the given char.
436 Used in the templatize function.
438 return dot_re
.sub(char
, src
)
440 context_re
= re
.compile(r
"""^\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?'))\s*""")
441 inline_re
= re
.compile(r
"""^\s*trans\s+((?:"[^"]*?")|(?:'[^']*?'))(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?\s*""")
442 block_re
= re
.compile(r
"""^\s*blocktrans(\s+.*context\s+((?:"[^"]*?")|(?:'[^']*?')))?(?:\s+|$)""")
443 endblock_re
= re
.compile(r
"""^\s*endblocktrans$""")
444 plural_re
= re
.compile(r
"""^\s*plural$""")
445 constant_re
= re
.compile(r
"""_\(((?:".*?")|(?:'.*?'))\)""")
446 one_percent_re
= re
.compile(r
"""(?<!%)%(?!%)""")
449 def templatize(src
, origin
=None):
451 Turns a Django template into something that is understood by xgettext. It
452 does so by translating the Django translation tags into standard gettext
453 function invocations.
455 from django
.template
import (Lexer
, TOKEN_TEXT
, TOKEN_VAR
, TOKEN_BLOCK
,
456 TOKEN_COMMENT
, TRANSLATOR_COMMENT_MARK
)
458 message_context
= None
465 for t
in Lexer(src
, origin
).tokenize():
467 if t
.token_type
== TOKEN_BLOCK
and t
.contents
== 'endcomment':
468 content
= ''.join(comment
)
469 translators_comment_start
= None
470 for lineno
, line
in enumerate(content
.splitlines(True)):
471 if line
.lstrip().startswith(TRANSLATOR_COMMENT_MARK
):
472 translators_comment_start
= lineno
473 for lineno
, line
in enumerate(content
.splitlines(True)):
474 if translators_comment_start
is not None and lineno
>= translators_comment_start
:
475 out
.write(' # %s' % line
)
481 comment
.append(t
.contents
)
483 if t
.token_type
== TOKEN_BLOCK
:
484 endbmatch
= endblock_re
.match(t
.contents
)
485 pluralmatch
= plural_re
.match(t
.contents
)
489 out
.write(' npgettext(%r, %r, %r,count) ' % (message_context
, ''.join(singular
), ''.join(plural
)))
491 out
.write(' ngettext(%r, %r, count) ' % (''.join(singular
), ''.join(plural
)))
492 for part
in singular
:
493 out
.write(blankout(part
, 'S'))
495 out
.write(blankout(part
, 'P'))
498 out
.write(' pgettext(%r, %r) ' % (message_context
, ''.join(singular
)))
500 out
.write(' gettext(%r) ' % ''.join(singular
))
501 for part
in singular
:
502 out
.write(blankout(part
, 'S'))
503 message_context
= None
513 filemsg
= 'file %s, ' % origin
514 raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t
.contents
, filemsg
, t
.lineno
))
515 elif t
.token_type
== TOKEN_VAR
:
517 plural
.append('%%(%s)s' % t
.contents
)
519 singular
.append('%%(%s)s' % t
.contents
)
520 elif t
.token_type
== TOKEN_TEXT
:
521 contents
= one_percent_re
.sub('%%', t
.contents
)
523 plural
.append(contents
)
525 singular
.append(contents
)
527 if t
.token_type
== TOKEN_BLOCK
:
528 imatch
= inline_re
.match(t
.contents
)
529 bmatch
= block_re
.match(t
.contents
)
530 cmatches
= constant_re
.findall(t
.contents
)
537 g
= one_percent_re
.sub('%%', g
)
539 # A context is provided
540 context_match
= context_re
.match(imatch
.group(2))
541 message_context
= context_match
.group(1)
542 if message_context
[0] == '"':
543 message_context
= message_context
.strip('"')
544 elif message_context
[0] == "'":
545 message_context
= message_context
.strip("'")
546 out
.write(' pgettext(%r, %r) ' % (message_context
, g
))
547 message_context
= None
549 out
.write(' gettext(%r) ' % g
)
551 for fmatch
in constant_re
.findall(t
.contents
):
552 out
.write(' _(%s) ' % fmatch
)
554 # A context is provided
555 context_match
= context_re
.match(bmatch
.group(1))
556 message_context
= context_match
.group(1)
557 if message_context
[0] == '"':
558 message_context
= message_context
.strip('"')
559 elif message_context
[0] == "'":
560 message_context
= message_context
.strip("'")
566 for cmatch
in cmatches
:
567 out
.write(' _(%s) ' % cmatch
)
568 elif t
.contents
== 'comment':
571 out
.write(blankout(t
.contents
, 'B'))
572 elif t
.token_type
== TOKEN_VAR
:
573 parts
= t
.contents
.split('|')
574 cmatch
= constant_re
.match(parts
[0])
576 out
.write(' _(%s) ' % cmatch
.group(1))
578 if p
.find(':_(') >= 0:
579 out
.write(' %s ' % p
.split(':',1)[1])
581 out
.write(blankout(p
, 'F'))
582 elif t
.token_type
== TOKEN_COMMENT
:
583 out
.write(' # %s' % t
.contents
)
585 out
.write(blankout(t
.contents
, 'X'))
586 return out
.getvalue()
588 def parse_accept_lang_header(lang_string
):
590 Parses the lang_string, which is the body of an HTTP Accept-Language
591 header, and returns a list of (lang, q-value), ordered by 'q' values.
593 Any format errors in lang_string results in an empty list being returned.
596 pieces
= accept_language_re
.split(lang_string
)
599 for i
in range(0, len(pieces
) - 1, 3):
600 first
, lang
, priority
= pieces
[i
: i
+ 3]
603 priority
= priority
and float(priority
) or 1.0
604 result
.append((lang
, priority
))
605 result
.sort(key
=lambda k
: k
[1], reverse
=True)