dev: format code using "garden fmt" (black)
[git-cola.git] / cola / polib.py
blob6caf058eedaa5495acabeab3938a00a53a90c33f
1 # -* coding: utf-8 -*-
3 # License: MIT (see extras/polib/LICENSE file provided)
4 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
5 # pylint: disable=consider-using-with,no-else-return
7 """
8 **polib** allows you to manipulate, create, modify gettext files (pot, po and
9 mo files). You can load existing files, iterate through it's entries, add,
10 modify entries, comments or metadata, etc. or create new po files from scratch.
12 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
13 :func:`~polib.mofile` convenience functions.
14 """
15 from __future__ import absolute_import, division, print_function
16 import array
17 import codecs
18 import os
19 import re
20 import struct
21 import sys
22 import textwrap
23 import io
25 from . import compat
28 __author__ = 'David Jean Louis <izimobil@gmail.com>'
29 __version__ = '1.1.1'
30 __all__ = [
31 'pofile',
32 'POFile',
33 'POEntry',
34 'mofile',
35 'MOFile',
36 'MOEntry',
37 'default_encoding',
38 'escape',
39 'unescape',
40 'detect_encoding',
44 # the default encoding to use when encoding cannot be detected
45 default_encoding = 'utf-8'
47 # python 2/3 compatibility helpers {{{
50 if sys.version_info < (3,):
51 PY3 = False
52 text_type = compat.ustr
54 def b(s):
55 return s
57 def u(s):
58 return compat.ustr(s, "unicode_escape")
60 else:
61 PY3 = True
62 text_type = str
64 def b(s):
65 return s.encode("utf-8")
67 def u(s):
68 return s
71 # }}}
72 # _pofile_or_mofile {{{
75 def _pofile_or_mofile(f, filetype, **kwargs):
76 """
77 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
78 honor the DRY concept.
79 """
80 # get the file encoding
81 enc = kwargs.get('encoding')
82 if enc is None:
83 enc = detect_encoding(f, filetype == 'mofile')
85 # parse the file
86 kls = _POFileParser if filetype == 'pofile' else _MOFileParser
87 parser = kls(
89 encoding=enc,
90 check_for_duplicates=kwargs.get('check_for_duplicates', False),
91 klass=kwargs.get('klass'),
93 instance = parser.parse()
94 instance.wrapwidth = kwargs.get('wrapwidth', 78)
95 return instance
98 # }}}
99 # _is_file {{{
102 def _is_file(filename_or_contents):
104 Safely returns the value of os.path.exists(filename_or_contents).
106 Arguments:
108 ``filename_or_contents``
109 either a filename, or a string holding the contents of some file.
110 In the latter case, this function will always return False.
112 try:
113 return os.path.isfile(filename_or_contents)
114 except (TypeError, ValueError, UnicodeEncodeError):
115 return False
118 # }}}
119 # function pofile() {{{
122 # pylint: disable=redefined-outer-name
123 def pofile(pofile, **kwargs):
125 Convenience function that parses the po or pot file ``pofile`` and returns
126 a :class:`~polib.POFile` instance.
128 Arguments:
130 ``pofile``
131 string, full or relative path to the po/pot file or its content (data).
133 ``wrapwidth``
134 integer, the wrap width, only useful when the ``-w`` option was passed
135 to xgettext (optional, default: ``78``).
137 ``encoding``
138 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
139 encoding will be auto-detected).
141 ``check_for_duplicates``
142 whether to check for duplicate entries when adding entries to the
143 file (optional, default: ``False``).
145 ``klass``
146 class which is used to instantiate the return value (optional,
147 default: ``None``, the return value with be a :class:`~polib.POFile`
148 instance).
150 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
153 # }}}
154 # function mofile() {{{
157 # pylint: disable=redefined-outer-name
158 def mofile(mofile, **kwargs):
160 Convenience function that parses the mo file ``mofile`` and returns a
161 :class:`~polib.MOFile` instance.
163 Arguments:
165 ``mofile``
166 string, full or relative path to the mo file or its content (string
167 or bytes).
169 ``wrapwidth``
170 integer, the wrap width, only useful when the ``-w`` option was passed
171 to xgettext to generate the po file that was used to format the mo file
172 (optional, default: ``78``).
174 ``encoding``
175 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
176 encoding will be auto-detected).
178 ``check_for_duplicates``
179 whether to check for duplicate entries when adding entries to the
180 file (optional, default: ``False``).
182 ``klass``
183 class which is used to instantiate the return value (optional,
184 default: ``None``, the return value with be a :class:`~polib.POFile`
185 instance).
187 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
190 # }}}
191 # function detect_encoding() {{{
194 def detect_encoding(file, binary_mode=False):
196 Try to detect the encoding used by the ``file``. The ``file`` argument can
197 be a PO or MO file path or a string containing the contents of the file.
198 If the encoding cannot be detected, the function will return the value of
199 ``default_encoding``.
201 Arguments:
203 ``file``
204 string, full or relative path to the po/mo file or its content.
206 ``binary_mode``
207 boolean, set this to True if ``file`` is a mo file.
209 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
210 rxt = re.compile(u(PATTERN))
211 rxb = re.compile(b(PATTERN))
213 def charset_exists(charset):
214 """Check whether ``charset`` is valid or not."""
215 try:
216 codecs.lookup(charset)
217 except LookupError:
218 return False
219 return True
221 if not _is_file(file):
222 try:
223 match = rxt.search(file)
224 except TypeError:
225 match = rxb.search(file)
226 if match:
227 enc = match.group(1).strip()
228 if not isinstance(enc, text_type):
229 enc = enc.decode('utf-8')
230 if charset_exists(enc):
231 return enc
232 else:
233 # For PY3, always treat as binary
234 if binary_mode or PY3:
235 mode = 'rb'
236 rx = rxb
237 else:
238 mode = 'r'
239 rx = rxt
240 f = open(file, mode)
241 for line in f.readlines():
242 match = rx.search(line)
243 if match:
244 f.close()
245 enc = match.group(1).strip()
246 if not isinstance(enc, text_type):
247 enc = enc.decode('utf-8')
248 if charset_exists(enc):
249 return enc
250 f.close()
251 return default_encoding
254 # }}}
255 # function escape() {{{
258 def escape(st):
260 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
261 the given string ``st`` and returns it.
263 return (
264 st.replace('\\', r'\\')
265 .replace('\t', r'\t')
266 .replace('\r', r'\r')
267 .replace('\n', r'\n')
268 .replace('\"', r'\"')
272 # }}}
273 # function unescape() {{{
276 def unescape(st):
278 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
279 the given string ``st`` and returns it.
282 def unescape_repl(m):
283 m = m.group(1)
284 if m == 'n':
285 return '\n'
286 if m == 't':
287 return '\t'
288 if m == 'r':
289 return '\r'
290 if m == '\\':
291 return '\\'
292 return m # handles escaped double quote
294 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
297 # }}}
298 # function natural_sort() {{{
301 def natural_sort(lst):
303 Sort naturally the given list.
304 Credits: http://stackoverflow.com/a/4836734
307 def convert(text):
308 return int(text) if text.isdigit() else text.lower()
310 def alphanum_key(key):
311 return [convert(c) for c in re.split('([0-9]+)', key)]
313 return sorted(lst, key=alphanum_key)
316 # }}}
317 # class _BaseFile {{{
320 class _BaseFile(list):
322 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
323 classes. This class should **not** be instantiated directly.
326 def __init__(self, *_args, **kwargs):
328 Constructor, accepts the following keyword arguments:
330 ``pofile``
331 string, the path to the po or mo file, or its content as a string.
333 ``wrapwidth``
334 integer, the wrap width, only useful when the ``-w`` option was
335 passed to xgettext (optional, default: ``78``).
337 ``encoding``
338 string, the encoding to use, defaults to ``default_encoding``
339 global variable (optional).
341 ``check_for_duplicates``
342 whether to check for duplicate entries when adding entries to the
343 file, (optional, default: ``False``).
345 list.__init__(self)
346 # the opened file handle
347 pofile = kwargs.get('pofile', None) # pylint: disable=redefined-outer-name
348 if pofile and _is_file(pofile):
349 self.fpath = pofile
350 else:
351 self.fpath = kwargs.get('fpath')
352 # the width at which lines should be wrapped
353 self.wrapwidth = kwargs.get('wrapwidth', 78)
354 # the file encoding
355 self.encoding = kwargs.get('encoding', default_encoding)
356 # whether to check for duplicate entries or not
357 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
358 # header
359 self.header = ''
360 # both po and mo files have metadata
361 self.metadata = {}
362 self.metadata_is_fuzzy = 0
364 def __unicode__(self):
366 Returns the unicode representation of the file.
368 ret = []
369 entries = [self.metadata_as_entry()] + [e for e in self if not e.obsolete]
370 for entry in entries:
371 ret.append(entry.__unicode__(self.wrapwidth))
372 for entry in self.obsolete_entries(): # pylint: disable=no-member
373 ret.append(entry.__unicode__(self.wrapwidth))
374 ret = u('\n').join(ret)
375 return ret
377 if PY3:
379 def __str__(self):
380 return self.__unicode__()
382 else:
384 def __str__(self):
386 Returns the string representation of the file.
388 return compat.ustr(self).encode(self.encoding)
390 def __contains__(self, entry):
392 Overridden ``list`` method to implement the membership test (in and
393 not in).
394 The method considers that an entry is in the file if it finds an entry
395 that has the same msgid (the test is **case sensitive**) and the same
396 msgctxt (or none for both entries).
398 Argument:
400 ``entry``
401 an instance of :class:`~polib._BaseEntry`.
403 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) is not None
405 def __eq__(self, other):
406 return str(self) == str(other)
408 def __hash__(self):
409 return hash(str(self))
411 def append(self, entry):
413 Overridden method to check for duplicates entries, if a user tries to
414 add an entry that is already in the file, the method will raise a
415 ``ValueError`` exception.
417 Argument:
419 ``entry``
420 an instance of :class:`~polib._BaseEntry`.
422 # check_for_duplicates may not be defined (yet) when unpickling.
423 # But if pickling, we never want to check for duplicates anyway.
424 if getattr(self, 'check_for_duplicates', False) and entry in self:
425 raise ValueError('Entry "%s" already exists' % entry.msgid)
426 super(_BaseFile, self).append(entry)
428 def insert(self, index, entry):
430 Overridden method to check for duplicates entries, if a user tries to
431 add an entry that is already in the file, the method will raise a
432 ``ValueError`` exception.
434 Arguments:
436 ``index``
437 index at which the entry should be inserted.
439 ``entry``
440 an instance of :class:`~polib._BaseEntry`.
442 if self.check_for_duplicates and entry in self:
443 raise ValueError('Entry "%s" already exists' % entry.msgid)
444 super(_BaseFile, self).insert(index, entry)
446 def metadata_as_entry(self):
448 Returns the file metadata as a :class:`~polib.POFile` instance.
450 e = POEntry(msgid='')
451 mdata = self.ordered_metadata()
452 if mdata:
453 strs = []
454 for name, value in mdata:
455 # Strip whitespace off each line in a multi-line entry
456 strs.append('%s: %s' % (name, value))
457 e.msgstr = '\n'.join(strs) + '\n'
458 if self.metadata_is_fuzzy:
459 e.flags.append('fuzzy')
460 return e
462 def save(self, fpath=None, repr_method='__unicode__', newline=None):
464 Saves the po file to ``fpath``.
465 If it is an existing file and no ``fpath`` is provided, then the
466 existing file is rewritten with the modified data.
468 Keyword arguments:
470 ``fpath``
471 string, full or relative path to the file.
473 ``repr_method``
474 string, the method to use for output.
476 ``newline``
477 string, controls how universal newlines works
479 if self.fpath is None and fpath is None:
480 raise IOError('You must provide a file path to save() method')
481 contents = getattr(self, repr_method)()
482 if fpath is None:
483 fpath = self.fpath
484 if repr_method == 'to_binary':
485 fhandle = open(fpath, 'wb')
486 else:
487 fhandle = io.open(fpath, 'w', encoding=self.encoding, newline=newline)
488 if not isinstance(contents, text_type):
489 contents = contents.decode(self.encoding)
490 fhandle.write(contents)
491 fhandle.close()
492 # set the file path if not set
493 if self.fpath is None and fpath:
494 self.fpath = fpath
496 def find(self, st, by='msgid', include_obsolete_entries=False, msgctxt=False):
498 Find the entry which msgid (or property identified by the ``by``
499 argument) matches the string ``st``.
501 Keyword arguments:
503 ``st``
504 string, the string to search for.
506 ``by``
507 string, the property to use for comparison (default: ``msgid``).
509 ``include_obsolete_entries``
510 boolean, whether to also search in entries that are obsolete.
512 ``msgctxt``
513 string, allows specifying a specific message context for the
514 search.
516 if include_obsolete_entries:
517 entries = self[:]
518 else:
519 entries = [e for e in self if not e.obsolete]
520 matches = []
521 for e in entries:
522 if getattr(e, by) == st:
523 if msgctxt is not False and e.msgctxt != msgctxt:
524 continue
525 matches.append(e)
526 if len(matches) == 1:
527 return matches[0]
528 elif len(matches) > 1:
529 if not msgctxt:
530 # find the entry with no msgctx
531 e = None
532 for m in matches:
533 if not m.msgctxt:
534 e = m
535 if e:
536 return e
537 # fallback to the first entry found
538 return matches[0]
539 return None
541 def ordered_metadata(self):
543 Convenience method that returns an ordered version of the metadata
544 dictionary. The return value is list of tuples (metadata name,
545 metadata_value).
547 # copy the dict first
548 metadata = self.metadata.copy()
549 data_order = [
550 'Project-Id-Version',
551 'Report-Msgid-Bugs-To',
552 'POT-Creation-Date',
553 'PO-Revision-Date',
554 'Last-Translator',
555 'Language-Team',
556 'Language',
557 'MIME-Version',
558 'Content-Type',
559 'Content-Transfer-Encoding',
560 'Plural-Forms',
562 ordered_data = []
563 for data in data_order:
564 try:
565 value = metadata.pop(data)
566 ordered_data.append((data, value))
567 except KeyError:
568 pass
569 # the rest of the metadata will be alphabetically ordered since there
570 # are no specs for this AFAIK
571 for data in natural_sort(metadata.keys()):
572 value = metadata[data]
573 ordered_data.append((data, value))
574 return ordered_data
576 def to_binary(self):
578 Return the binary representation of the file.
580 offsets = []
581 entries = self.translated_entries() # pylint: disable=no-member
583 # the keys are sorted in the .mo file
584 def cmp(_self, other): # pylint: disable=unused-variable
585 # msgfmt compares entries with msgctxt if it exists
586 self_msgid = _self.msgctxt or _self.msgid
587 other_msgid = other.msgctxt or other.msgid
588 if self_msgid > other_msgid:
589 return 1
590 elif self_msgid < other_msgid:
591 return -1
592 else:
593 return 0
595 # add metadata entry
596 entries.sort(key=lambda o: o.msgid_with_context.encode('utf-8'))
597 mentry = self.metadata_as_entry()
598 entries = [mentry] + entries
599 entries_len = len(entries)
600 ids, strs = b(''), b('')
601 for e in entries:
602 # For each string, we need size and file offset. Each string is
603 # NUL terminated; the NUL does not count into the size.
604 msgid = b('')
605 if e.msgctxt:
606 # Contexts are stored by storing the concatenation of the
607 # context, a <EOT> byte, and the original string
608 msgid = self._encode(e.msgctxt + '\4')
609 if e.msgid_plural:
610 msgstr = []
611 for index in sorted(e.msgstr_plural.keys()):
612 msgstr.append(e.msgstr_plural[index])
613 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
614 msgstr = self._encode('\0'.join(msgstr))
615 else:
616 msgid += self._encode(e.msgid)
617 msgstr = self._encode(e.msgstr)
618 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
619 ids += msgid + b('\0')
620 strs += msgstr + b('\0')
622 # The header is 7 32-bit unsigned integers.
623 keystart = 7 * 4 + 16 * entries_len
624 # and the values start after the keys
625 valuestart = keystart + len(ids)
626 koffsets = []
627 voffsets = []
628 # The string table first has the list of keys, then the list of values.
629 # Each entry has first the size of the string, then the file offset.
630 for o1, l1, o2, l2 in offsets:
631 koffsets += [l1, o1 + keystart]
632 voffsets += [l2, o2 + valuestart]
633 offsets = koffsets + voffsets
635 output = struct.pack(
636 "Iiiiiii",
637 # Magic number
638 MOFile.MAGIC,
639 # Version
641 # number of entries
642 entries_len,
643 # start of key index
644 7 * 4,
645 # start of value index
646 7 * 4 + entries_len * 8,
647 # size and offset of hash table, we don't use hash tables
649 keystart,
651 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
652 output += array.array("i", offsets).tobytes()
653 else:
654 output += array.array("i", offsets).tostring() # pylint: disable=no-member
655 output += ids
656 output += strs
657 return output
659 def _encode(self, mixed):
661 Encodes the given ``mixed`` argument with the file encoding if and
662 only if it's an unicode string and returns the encoded string.
664 if isinstance(mixed, text_type):
665 mixed = mixed.encode(self.encoding)
666 return mixed
669 # }}}
670 # class POFile {{{
673 class POFile(_BaseFile):
675 Po (or Pot) file reader/writer.
676 This class inherits the :class:`~polib._BaseFile` class and, by extension,
677 the python ``list`` type.
680 def __unicode__(self):
682 Returns the unicode representation of the po file.
684 ret, headers = '', self.header.split('\n')
685 for header in headers:
686 if not header:
687 ret += "#\n"
688 elif header[:1] in [',', ':']:
689 ret += '#%s\n' % header
690 else:
691 ret += '# %s\n' % header
693 if not isinstance(ret, text_type):
694 ret = ret.decode(self.encoding)
696 return ret + _BaseFile.__unicode__(self)
698 def save_as_mofile(self, fpath):
700 Saves the binary representation of the file to given ``fpath``.
702 Keyword argument:
704 ``fpath``
705 string, full or relative path to the mo file.
707 _BaseFile.save(self, fpath, 'to_binary')
709 def percent_translated(self):
711 Convenience method that returns the percentage of translated
712 messages.
714 total = len([e for e in self if not e.obsolete])
715 if total == 0:
716 return 100
717 translated = len(self.translated_entries())
718 return int(translated * 100 / float(total))
720 def translated_entries(self):
722 Convenience method that returns the list of translated entries.
724 return [e for e in self if e.translated()]
726 def untranslated_entries(self):
728 Convenience method that returns the list of untranslated entries.
730 return [
731 e for e in self if not e.translated() and not e.obsolete and not e.fuzzy
734 def fuzzy_entries(self):
736 Convenience method that returns the list of fuzzy entries.
738 return [e for e in self if e.fuzzy and not e.obsolete]
740 def obsolete_entries(self):
742 Convenience method that returns the list of obsolete entries.
744 return [e for e in self if e.obsolete]
746 def merge(self, refpot):
748 Convenience method that merges the current pofile with the pot file
749 provided. It behaves exactly as the gettext msgmerge utility:
751 * comments of this file will be preserved, but extracted comments and
752 occurrences will be discarded;
753 * any translations or comments in the file will be discarded, however,
754 dot comments and file positions will be preserved;
755 * the fuzzy flags are preserved.
757 Keyword argument:
759 ``refpot``
760 object POFile, the reference catalog.
762 # Store entries in dict/set for faster access
763 self_entries = dict((entry.msgid_with_context, entry) for entry in self)
764 refpot_msgids = set(entry.msgid_with_context for entry in refpot)
765 # Merge entries that are in the refpot
766 for entry in refpot:
767 e = self_entries.get(entry.msgid_with_context)
768 if e is None:
769 e = POEntry()
770 self.append(e)
771 e.merge(entry)
772 # ok, now we must "obsolete" entries that are not in the refpot anymore
773 for entry in self:
774 if entry.msgid_with_context not in refpot_msgids:
775 entry.obsolete = True
778 # }}}
779 # class MOFile {{{
782 class MOFile(_BaseFile):
784 Mo file reader/writer.
785 This class inherits the :class:`~polib._BaseFile` class and, by
786 extension, the python ``list`` type.
789 MAGIC = 0x950412DE
790 MAGIC_SWAPPED = 0xDE120495
792 def __init__(self, *args, **kwargs):
794 Constructor, accepts all keywords arguments accepted by
795 :class:`~polib._BaseFile` class.
797 _BaseFile.__init__(self, *args, **kwargs)
798 self.magic_number = None
799 self.version = 0
801 def save_as_pofile(self, fpath):
803 Saves the mofile as a pofile to ``fpath``.
805 Keyword argument:
807 ``fpath``
808 string, full or relative path to the file.
810 _BaseFile.save(self, fpath)
812 # pylint: disable=arguments-differ
813 def save(self, fpath=None):
815 Saves the mofile to ``fpath``.
817 Keyword argument:
819 ``fpath``
820 string, full or relative path to the file.
822 _BaseFile.save(self, fpath, 'to_binary')
824 def percent_translated(self):
826 Convenience method to keep the same interface with POFile instances.
828 return 100
830 def translated_entries(self):
832 Convenience method to keep the same interface with POFile instances.
834 return self
836 def untranslated_entries(self):
838 Convenience method to keep the same interface with POFile instances.
840 return []
842 def fuzzy_entries(self):
844 Convenience method to keep the same interface with POFile instances.
846 return []
848 def obsolete_entries(self):
850 Convenience method to keep the same interface with POFile instances.
852 return []
855 # }}}
856 # class _BaseEntry {{{
859 class _BaseEntry(object):
861 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
862 This class should **not** be instantiated directly.
865 def __init__(self, *_args, **kwargs):
867 Constructor, accepts the following keyword arguments:
869 ``msgid``
870 string, the entry msgid.
872 ``msgstr``
873 string, the entry msgstr.
875 ``msgid_plural``
876 string, the entry msgid_plural.
878 ``msgstr_plural``
879 dict, the entry msgstr_plural lines.
881 ``msgctxt``
882 string, the entry context (msgctxt).
884 ``obsolete``
885 bool, whether the entry is "obsolete" or not.
887 ``encoding``
888 string, the encoding to use, defaults to ``default_encoding``
889 global variable (optional).
891 self.msgid = kwargs.get('msgid', '')
892 self.msgstr = kwargs.get('msgstr', '')
893 self.msgid_plural = kwargs.get('msgid_plural', '')
894 self.msgstr_plural = kwargs.get('msgstr_plural', {})
895 self.msgctxt = kwargs.get('msgctxt', None)
896 self.obsolete = kwargs.get('obsolete', False)
897 self.encoding = kwargs.get('encoding', default_encoding)
899 def __unicode__(self, wrapwidth=78):
901 Returns the unicode representation of the entry.
903 if self.obsolete:
904 delflag = '#~ '
905 else:
906 delflag = ''
907 ret = []
908 # write the msgctxt if any
909 if self.msgctxt is not None:
910 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
911 # write the msgid
912 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
913 # write the msgid_plural if any
914 if self.msgid_plural:
915 ret += self._str_field(
916 "msgid_plural", delflag, "", self.msgid_plural, wrapwidth
918 if self.msgstr_plural:
919 # write the msgstr_plural if any
920 msgstrs = self.msgstr_plural
921 keys = list(msgstrs)
922 keys.sort()
923 for index in keys:
924 msgstr = msgstrs[index]
925 plural_index = '[%s]' % index
926 ret += self._str_field(
927 "msgstr", delflag, plural_index, msgstr, wrapwidth
929 else:
930 # otherwise write the msgstr
931 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
932 ret.append('')
933 ret = u('\n').join(ret)
934 return ret
936 if PY3:
938 def __str__(self):
939 return self.__unicode__()
941 else:
943 def __str__(self):
945 Returns the string representation of the entry.
947 return compat.ustr(self).encode(self.encoding)
949 def __eq__(self, other):
950 return str(self) == str(other)
952 def __hash__(self):
953 return hash(str(self))
955 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
956 lines = field.splitlines(True)
957 if len(lines) > 1:
958 lines = [''] + lines # start with initial empty line
959 else:
960 escaped_field = escape(field)
961 specialchars_count = 0
962 for c in ['\\', '\n', '\r', '\t', '"']:
963 specialchars_count += field.count(c)
964 # comparison must take into account fieldname length + one space
965 # + 2 quotes (eg. msgid "<string>")
966 flength = len(fieldname) + 3
967 if plural_index:
968 flength += len(plural_index)
969 real_wrapwidth = wrapwidth - flength + specialchars_count
970 if wrapwidth > 0 and len(field) > real_wrapwidth:
971 # Wrap the line but take field name into account
972 lines = [''] + [
973 unescape(item)
974 for item in textwrap.wrap(
975 escaped_field,
976 wrapwidth - 2, # 2 for quotes ""
977 drop_whitespace=False,
978 break_long_words=False,
981 else:
982 lines = [field]
983 if fieldname.startswith('previous_'):
984 # quick and dirty trick to get the real field name
985 fieldname = fieldname[9:]
987 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, escape(lines.pop(0)))]
988 for line in lines:
989 ret.append('%s"%s"' % (delflag, escape(line)))
990 return ret
992 @property
993 def msgid_with_context(self):
994 if self.msgctxt:
995 return '%s%s%s' % (self.msgctxt, "\x04", self.msgid)
996 return self.msgid
999 # }}}
1000 # class POEntry {{{
1003 class POEntry(_BaseEntry):
1005 Represents a po file entry.
1008 def __init__(self, *args, **kwargs):
1010 Constructor, accepts the following keyword arguments:
1012 ``comment``
1013 string, the entry comment.
1015 ``tcomment``
1016 string, the entry translator comment.
1018 ``occurrences``
1019 list, the entry occurrences.
1021 ``flags``
1022 list, the entry flags.
1024 ``previous_msgctxt``
1025 string, the entry previous context.
1027 ``previous_msgid``
1028 string, the entry previous msgid.
1030 ``previous_msgid_plural``
1031 string, the entry previous msgid_plural.
1033 ``linenum``
1034 integer, the line number of the entry
1036 _BaseEntry.__init__(self, *args, **kwargs)
1037 self.comment = kwargs.get('comment', '')
1038 self.tcomment = kwargs.get('tcomment', '')
1039 self.occurrences = kwargs.get('occurrences', [])
1040 self.flags = kwargs.get('flags', [])
1041 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
1042 self.previous_msgid = kwargs.get('previous_msgid', None)
1043 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1044 self.linenum = kwargs.get('linenum', None)
1046 def __unicode__(self, wrapwidth=78):
1048 Returns the unicode representation of the entry.
1050 ret = []
1051 # comments first, if any (with text wrapping as xgettext does)
1052 if self.obsolete:
1053 comments = [('tcomment', '# ')]
1054 else:
1055 comments = [('comment', '#. '), ('tcomment', '# ')]
1056 for c in comments:
1057 val = getattr(self, c[0])
1058 if val:
1059 for comment in val.split('\n'):
1060 if len(comment) + len(c[1]) > wrapwidth > 0:
1061 ret += textwrap.wrap(
1062 comment,
1063 wrapwidth,
1064 initial_indent=c[1],
1065 subsequent_indent=c[1],
1066 break_long_words=False,
1068 else:
1069 ret.append('%s%s' % (c[1], comment))
1071 # occurrences (with text wrapping as xgettext does)
1072 if not self.obsolete and self.occurrences:
1073 filelist = []
1074 for fpath, lineno in self.occurrences:
1075 if lineno:
1076 filelist.append('%s:%s' % (fpath, lineno))
1077 else:
1078 filelist.append(fpath)
1079 filestr = ' '.join(filelist)
1080 if len(filestr) + 3 > wrapwidth > 0:
1081 # textwrap split words that contain hyphen, this is not
1082 # what we want for filenames, so the dirty hack is to
1083 # temporally replace hyphens with a char that a file cannot
1084 # contain, like "*"
1085 ret += [
1086 line.replace('*', '-')
1087 for line in textwrap.wrap(
1088 filestr.replace('-', '*'),
1089 wrapwidth,
1090 initial_indent='#: ',
1091 subsequent_indent='#: ',
1092 break_long_words=False,
1095 else:
1096 ret.append('#: ' + filestr)
1098 # flags (TODO: wrapping ?)
1099 if self.flags:
1100 ret.append('#, %s' % ', '.join(self.flags))
1102 # previous context and previous msgid/msgid_plural
1103 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1104 if self.obsolete:
1105 prefix = "#~| "
1106 else:
1107 prefix = "#| "
1108 for f in fields:
1109 val = getattr(self, f)
1110 if val is not None:
1111 ret += self._str_field(f, prefix, "", val, wrapwidth)
1113 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1114 ret = u('\n').join(ret)
1115 return ret
1117 # pylint: disable=too-many-return-statements
1118 def __cmp__(self, other):
1120 Called by comparison operations if rich comparison is not defined.
1122 # First: Obsolete test
1123 if self.obsolete != other.obsolete:
1124 if self.obsolete:
1125 return -1
1126 else:
1127 return 1
1128 # Work on a copy to protect original
1129 occ1 = sorted(self.occurrences[:])
1130 occ2 = sorted(other.occurrences[:])
1131 if occ1 > occ2:
1132 return 1
1133 if occ1 < occ2:
1134 return -1
1135 # Compare context
1136 msgctxt = self.msgctxt or '0'
1137 othermsgctxt = other.msgctxt or '0'
1138 if msgctxt > othermsgctxt:
1139 return 1
1140 elif msgctxt < othermsgctxt:
1141 return -1
1142 # Compare msgid_plural
1143 msgid_plural = self.msgid_plural or '0'
1144 othermsgid_plural = other.msgid_plural or '0'
1145 if msgid_plural > othermsgid_plural:
1146 return 1
1147 elif msgid_plural < othermsgid_plural:
1148 return -1
1149 # Compare msgstr_plural
1150 if self.msgstr_plural and isinstance(self.msgstr_plural, dict):
1151 msgstr_plural = list(self.msgstr_plural.values())
1152 else:
1153 msgstr_plural = []
1154 if other.msgstr_plural and isinstance(other.msgstr_plural, dict):
1155 othermsgstr_plural = list(other.msgstr_plural.values())
1156 else:
1157 othermsgstr_plural = []
1158 if msgstr_plural > othermsgstr_plural:
1159 return 1
1160 elif msgstr_plural < othermsgstr_plural:
1161 return -1
1162 # Compare msgid
1163 if self.msgid > other.msgid:
1164 return 1
1165 elif self.msgid < other.msgid:
1166 return -1
1167 # Compare msgstr
1168 if self.msgstr > other.msgstr:
1169 return 1
1170 elif self.msgstr < other.msgstr:
1171 return -1
1172 return 0
1174 def __gt__(self, other):
1175 return self.__cmp__(other) > 0
1177 def __lt__(self, other):
1178 return self.__cmp__(other) < 0
1180 def __ge__(self, other):
1181 return self.__cmp__(other) >= 0
1183 def __le__(self, other):
1184 return self.__cmp__(other) <= 0
1186 def __eq__(self, other):
1187 return self.__cmp__(other) == 0
1189 def __ne__(self, other):
1190 return self.__cmp__(other) != 0
1192 def translated(self):
1194 Returns ``True`` if the entry has been translated or ``False``
1195 otherwise.
1197 if self.obsolete or self.fuzzy:
1198 return False
1199 if self.msgstr != '':
1200 return True
1201 if self.msgstr_plural:
1202 for pos in self.msgstr_plural:
1203 if self.msgstr_plural[pos] == '':
1204 return False
1205 return True
1206 return False
1208 def merge(self, other):
1210 Merge the current entry with the given pot entry.
1212 self.msgid = other.msgid
1213 self.msgctxt = other.msgctxt
1214 self.occurrences = other.occurrences
1215 self.comment = other.comment
1216 fuzzy = self.fuzzy
1217 self.flags = other.flags[:] # clone flags
1218 if fuzzy:
1219 self.flags.append('fuzzy')
1220 self.msgid_plural = other.msgid_plural
1221 self.obsolete = other.obsolete
1222 self.previous_msgctxt = other.previous_msgctxt
1223 self.previous_msgid = other.previous_msgid
1224 self.previous_msgid_plural = other.previous_msgid_plural
1225 if other.msgstr_plural:
1226 for pos in other.msgstr_plural:
1227 try:
1228 # keep existing translation at pos if any
1229 self.msgstr_plural[pos]
1230 except KeyError:
1231 self.msgstr_plural[pos] = ''
1233 @property
1234 def fuzzy(self):
1235 return 'fuzzy' in self.flags
1237 def __hash__(self):
1238 return hash((self.msgid, self.msgstr))
1241 # }}}
1242 # class MOEntry {{{
1245 class MOEntry(_BaseEntry):
1247 Represents a mo file entry.
1250 def __init__(self, *args, **kwargs):
1252 Constructor, accepts the following keyword arguments,
1253 for consistency with :class:`~polib.POEntry`:
1255 ``comment``
1256 ``tcomment``
1257 ``occurrences``
1258 ``flags``
1259 ``previous_msgctxt``
1260 ``previous_msgid``
1261 ``previous_msgid_plural``
1263 Note: even though these keyword arguments are accepted,
1264 they hold no real meaning in the context of MO files
1265 and are simply ignored.
1267 _BaseEntry.__init__(self, *args, **kwargs)
1268 self.comment = ''
1269 self.tcomment = ''
1270 self.occurrences = []
1271 self.flags = []
1272 self.previous_msgctxt = None
1273 self.previous_msgid = None
1274 self.previous_msgid_plural = None
1276 def __hash__(self):
1277 return hash((self.msgid, self.msgstr))
1280 # }}}
1281 # class _POFileParser {{{
1284 class _POFileParser(object):
1286 A finite state machine to parse efficiently and correctly po
1287 file format.
1290 # pylint: disable=redefined-outer-name
1291 def __init__(self, pofile, *_args, **kwargs):
1293 Constructor.
1295 Keyword arguments:
1297 ``pofile``
1298 string, path to the po file or its content
1300 ``encoding``
1301 string, the encoding to use, defaults to ``default_encoding``
1302 global variable (optional).
1304 ``check_for_duplicates``
1305 whether to check for duplicate entries when adding entries to the
1306 file (optional, default: ``False``).
1308 enc = kwargs.get('encoding', default_encoding)
1309 if _is_file(pofile):
1310 try:
1311 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1312 except LookupError:
1313 enc = default_encoding
1314 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1315 else:
1316 self.fhandle = pofile.splitlines()
1318 klass = kwargs.get('klass')
1319 if klass is None:
1320 klass = POFile
1321 self.instance = klass(
1322 pofile=pofile,
1323 encoding=enc,
1324 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1326 self.transitions = {}
1327 self.current_line = 0
1328 self.current_entry = POEntry(linenum=self.current_line)
1329 self.current_state = 'st'
1330 self.current_token = None
1331 # two memo flags used in handlers
1332 self.msgstr_index = 0
1333 self.entry_obsolete = 0
1334 # Configure the state machine, by adding transitions.
1335 # Signification of symbols:
1336 # * ST: Beginning of the file (start)
1337 # * HE: Header
1338 # * TC: a translation comment
1339 # * GC: a generated comment
1340 # * OC: a file/line occurrence
1341 # * FL: a flags line
1342 # * CT: a message context
1343 # * PC: a previous msgctxt
1344 # * PM: a previous msgid
1345 # * PP: a previous msgid_plural
1346 # * MI: a msgid
1347 # * MP: a msgid plural
1348 # * MS: a msgstr
1349 # * MX: a msgstr plural
1350 # * MC: a msgid or msgstr continuation line
1351 # pylint: disable=redefined-builtin
1352 all = [
1353 'st',
1354 'he',
1355 'gc',
1356 'oc',
1357 'fl',
1358 'ct',
1359 'pc',
1360 'pm',
1361 'pp',
1362 'tc',
1363 'ms',
1364 'mp',
1365 'mx',
1366 'mi',
1369 self.add('tc', ['st', 'he'], 'he')
1370 self.add(
1371 'tc',
1372 ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'],
1373 'tc',
1375 self.add('gc', all, 'gc')
1376 self.add('oc', all, 'oc')
1377 self.add('fl', all, 'fl')
1378 self.add('pc', all, 'pc')
1379 self.add('pm', all, 'pm')
1380 self.add('pp', all, 'pp')
1381 self.add(
1382 'ct',
1383 ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
1384 'ct',
1386 self.add(
1387 'mi',
1388 ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
1389 'mi',
1391 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1392 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1393 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1394 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1396 # pylint: disable=too-many-branches
1397 def parse(self):
1399 Run the state machine, parse the file line by line and call process()
1400 with the current matched symbol.
1403 keywords = {
1404 'msgctxt': 'ct',
1405 'msgid': 'mi',
1406 'msgstr': 'ms',
1407 'msgid_plural': 'mp',
1409 prev_keywords = {
1410 'msgid_plural': 'pp',
1411 'msgid': 'pm',
1412 'msgctxt': 'pc',
1414 tokens = []
1415 fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
1416 for line in self.fhandle:
1417 self.current_line += 1
1418 if self.current_line == 1:
1419 BOM = codecs.BOM_UTF8.decode('utf-8')
1420 if line.startswith(BOM):
1421 line = line[len(BOM) :]
1422 line = line.strip()
1423 if line == '':
1424 continue
1426 tokens = line.split(None, 2)
1427 nb_tokens = len(tokens)
1429 if tokens[0] == '#~|':
1430 continue
1432 if tokens[0] == '#~' and nb_tokens > 1:
1433 line = line[3:].strip()
1434 tokens = tokens[1:]
1435 nb_tokens -= 1
1436 self.entry_obsolete = 1
1437 else:
1438 self.entry_obsolete = 0
1440 # Take care of keywords like
1441 # msgid, msgid_plural, msgctxt & msgstr.
1442 if tokens[0] in keywords and nb_tokens > 1:
1443 line = line[len(tokens[0]) :].lstrip()
1444 if re.search(r'([^\\]|^)"', line[1:-1]):
1445 raise IOError(
1446 'Syntax error in po file %s(line %s): '
1447 'unescaped double quote found' % (fpath, self.current_line)
1449 self.current_token = line
1450 self.process(keywords[tokens[0]])
1451 continue
1453 self.current_token = line
1455 if tokens[0] == '#:':
1456 if nb_tokens <= 1:
1457 continue
1458 # we are on a occurrences line
1459 self.process('oc')
1461 elif line[:1] == '"':
1462 # we are on a continuation line
1463 if re.search(r'([^\\]|^)"', line[1:-1]):
1464 raise IOError(
1465 'Syntax error in po file %s(line %s): '
1466 'unescaped double quote found' % (fpath, self.current_line)
1468 self.process('mc')
1470 elif line[:7] == 'msgstr[':
1471 # we are on a msgstr plural
1472 self.process('mx')
1474 elif tokens[0] == '#,':
1475 if nb_tokens <= 1:
1476 continue
1477 # we are on a flags line
1478 self.process('fl')
1480 elif tokens[0] == '#' or tokens[0].startswith('##'):
1481 if line == '#':
1482 line += ' '
1483 # we are on a translator comment line
1484 self.process('tc')
1486 elif tokens[0] == '#.':
1487 if nb_tokens <= 1:
1488 continue
1489 # we are on a generated comment line
1490 self.process('gc')
1492 elif tokens[0] == '#|':
1493 if nb_tokens <= 1:
1494 raise IOError(
1495 'Syntax error in po file %s(line %s)'
1496 % (fpath, self.current_line)
1499 # Remove the marker and any whitespace right after that.
1500 line = line[2:].lstrip()
1501 self.current_token = line
1503 if tokens[1].startswith('"'):
1504 # Continuation of previous metadata.
1505 self.process('mc')
1506 continue
1508 if nb_tokens == 2:
1509 # Invalid continuation line.
1510 raise IOError(
1511 'Syntax error in po file %s(line %s): '
1512 'invalid continuation line' % (fpath, self.current_line)
1515 # we are on a "previous translation" comment line,
1516 if tokens[1] not in prev_keywords:
1517 # Unknown keyword in previous translation comment.
1518 raise IOError(
1519 'Syntax error in po file %s(line %s): '
1520 'unknown keyword %s' % (fpath, self.current_line, tokens[1])
1523 # Remove the keyword and any whitespace
1524 # between it and the starting quote.
1525 line = line[len(tokens[1]) :].lstrip()
1526 self.current_token = line
1527 self.process(prev_keywords[tokens[1]])
1529 else:
1530 raise IOError(
1531 'Syntax error in po file %s(line %s)' % (fpath, self.current_line)
1534 if self.current_entry and len(tokens) > 0 and not tokens[0].startswith('#'):
1535 # since entries are added when another entry is found, we must add
1536 # the last entry here (only if there are lines). Trailing comments
1537 # are ignored
1538 self.instance.append(self.current_entry)
1540 # before returning the instance, check if there's metadata and if
1541 # so extract it in a dict
1542 metadataentry = self.instance.find('')
1543 if metadataentry: # metadata found
1544 # remove the entry
1545 self.instance.remove(metadataentry)
1546 self.instance.metadata_is_fuzzy = metadataentry.flags
1547 key = None
1548 for msg in metadataentry.msgstr.splitlines():
1549 try:
1550 key, val = msg.split(':', 1)
1551 self.instance.metadata[key] = val.strip()
1552 except (ValueError, KeyError):
1553 if key is not None:
1554 self.instance.metadata[key] += '\n' + msg.strip()
1555 # close opened file
1556 if not isinstance(self.fhandle, list): # must be file
1557 self.fhandle.close()
1558 return self.instance
1560 def add(self, symbol, states, next_state):
1562 Add a transition to the state machine.
1564 Keywords arguments:
1566 ``symbol``
1567 string, the matched token (two chars symbol).
1569 ``states``
1570 list, a list of states (two chars symbols).
1572 ``next_state``
1573 the next state the fsm will have after the action.
1575 for state in states:
1576 action = getattr(self, 'handle_%s' % next_state)
1577 self.transitions[(symbol, state)] = (action, next_state)
1579 def process(self, symbol):
1581 Process the transition corresponding to the current state and the
1582 symbol provided.
1584 Keywords arguments:
1586 ``symbol``
1587 string, the matched token (two chars symbol).
1589 ``linenum``
1590 integer, the current line number of the parsed file.
1592 try:
1593 (action, state) = self.transitions[(symbol, self.current_state)]
1594 if action():
1595 self.current_state = state
1596 except Exception:
1597 fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
1598 if hasattr(self.fhandle, 'close'):
1599 self.fhandle.close()
1600 raise IOError(
1601 'Syntax error in po file %s(line %s)' % (fpath, self.current_line)
1604 # state handlers
1606 def handle_he(self):
1607 """Handle a header comment."""
1608 if self.instance.header != '':
1609 self.instance.header += '\n'
1610 self.instance.header += self.current_token[2:]
1611 return 1
1613 def handle_tc(self):
1614 """Handle a translator comment."""
1615 if self.current_state in ['mc', 'ms', 'mx']:
1616 self.instance.append(self.current_entry)
1617 self.current_entry = POEntry(linenum=self.current_line)
1618 if self.current_entry.tcomment != '':
1619 self.current_entry.tcomment += '\n'
1620 tcomment = self.current_token.lstrip('#')
1621 if tcomment.startswith(' '):
1622 tcomment = tcomment[1:]
1623 self.current_entry.tcomment += tcomment
1624 return True
1626 def handle_gc(self):
1627 """Handle a generated comment."""
1628 if self.current_state in ['mc', 'ms', 'mx']:
1629 self.instance.append(self.current_entry)
1630 self.current_entry = POEntry(linenum=self.current_line)
1631 if self.current_entry.comment != '':
1632 self.current_entry.comment += '\n'
1633 self.current_entry.comment += self.current_token[3:]
1634 return True
1636 def handle_oc(self):
1637 """Handle a file:num occurrence."""
1638 if self.current_state in ['mc', 'ms', 'mx']:
1639 self.instance.append(self.current_entry)
1640 self.current_entry = POEntry(linenum=self.current_line)
1641 occurrences = self.current_token[3:].split()
1642 for occurrence in occurrences:
1643 if occurrence != '':
1644 try:
1645 fil, line = occurrence.rsplit(':', 1)
1646 if not line.isdigit():
1647 fil = occurrence
1648 line = ''
1649 self.current_entry.occurrences.append((fil, line))
1650 except (ValueError, AttributeError):
1651 self.current_entry.occurrences.append((occurrence, ''))
1652 return True
1654 def handle_fl(self):
1655 """Handle a flags line."""
1656 if self.current_state in ['mc', 'ms', 'mx']:
1657 self.instance.append(self.current_entry)
1658 self.current_entry = POEntry(linenum=self.current_line)
1659 self.current_entry.flags += [
1660 c.strip() for c in self.current_token[3:].split(',')
1662 return True
1664 def handle_pp(self):
1665 """Handle a previous msgid_plural line."""
1666 if self.current_state in ['mc', 'ms', 'mx']:
1667 self.instance.append(self.current_entry)
1668 self.current_entry = POEntry(linenum=self.current_line)
1669 self.current_entry.previous_msgid_plural = unescape(self.current_token[1:-1])
1670 return True
1672 def handle_pm(self):
1673 """Handle a previous msgid line."""
1674 if self.current_state in ['mc', 'ms', 'mx']:
1675 self.instance.append(self.current_entry)
1676 self.current_entry = POEntry(linenum=self.current_line)
1677 self.current_entry.previous_msgid = unescape(self.current_token[1:-1])
1678 return True
1680 def handle_pc(self):
1681 """Handle a previous msgctxt line."""
1682 if self.current_state in ['mc', 'ms', 'mx']:
1683 self.instance.append(self.current_entry)
1684 self.current_entry = POEntry(linenum=self.current_line)
1685 self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1])
1686 return True
1688 def handle_ct(self):
1689 """Handle a msgctxt."""
1690 if self.current_state in ['mc', 'ms', 'mx']:
1691 self.instance.append(self.current_entry)
1692 self.current_entry = POEntry(linenum=self.current_line)
1693 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1694 return True
1696 def handle_mi(self):
1697 """Handle a msgid."""
1698 if self.current_state in ['mc', 'ms', 'mx']:
1699 self.instance.append(self.current_entry)
1700 self.current_entry = POEntry(linenum=self.current_line)
1701 self.current_entry.obsolete = self.entry_obsolete
1702 self.current_entry.msgid = unescape(self.current_token[1:-1])
1703 return True
1705 def handle_mp(self):
1706 """Handle a msgid plural."""
1707 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1708 return True
1710 def handle_ms(self):
1711 """Handle a msgstr."""
1712 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1713 return True
1715 def handle_mx(self):
1716 """Handle a msgstr plural."""
1717 index = self.current_token[7]
1718 value = self.current_token[self.current_token.find('"') + 1 : -1]
1719 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1720 self.msgstr_index = int(index)
1721 return True
1723 def handle_mc(self):
1724 """Handle a msgid or msgstr continuation line."""
1725 token = unescape(self.current_token[1:-1])
1726 if self.current_state == 'ct':
1727 self.current_entry.msgctxt += token
1728 elif self.current_state == 'mi':
1729 self.current_entry.msgid += token
1730 elif self.current_state == 'mp':
1731 self.current_entry.msgid_plural += token
1732 elif self.current_state == 'ms':
1733 self.current_entry.msgstr += token
1734 elif self.current_state == 'mx':
1735 self.current_entry.msgstr_plural[self.msgstr_index] += token
1736 elif self.current_state == 'pp':
1737 self.current_entry.previous_msgid_plural += token
1738 elif self.current_state == 'pm':
1739 self.current_entry.previous_msgid += token
1740 elif self.current_state == 'pc':
1741 self.current_entry.previous_msgctxt += token
1742 # don't change the current state
1743 return False
1746 # }}}
1747 # class _MOFileParser {{{
1750 class _MOFileParser(object):
1752 A class to parse binary mo files.
1755 # pylint: disable=unused-argument,redefined-outer-name
1756 def __init__(self, mofile, *_args, **kwargs):
1758 Constructor.
1760 Keyword arguments:
1762 ``mofile``
1763 string, path to the mo file or its content
1765 ``encoding``
1766 string, the encoding to use, defaults to ``default_encoding``
1767 global variable (optional).
1769 ``check_for_duplicates``
1770 whether to check for duplicate entries when adding entries to the
1771 file (optional, default: ``False``).
1773 if _is_file(mofile):
1774 self.fhandle = open(mofile, 'rb')
1775 else:
1776 self.fhandle = io.BytesIO(mofile)
1778 klass = kwargs.get('klass')
1779 if klass is None:
1780 klass = MOFile
1781 self.instance = klass(
1782 fpath=mofile,
1783 encoding=kwargs.get('encoding', default_encoding),
1784 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1787 def __del__(self):
1789 Make sure the file is closed, this prevents warnings on unclosed file
1790 when running tests with python >= 3.2.
1792 if self.fhandle and hasattr(self.fhandle, 'close'):
1793 self.fhandle.close()
1795 def parse(self):
1797 Build the instance with the file handle provided in the
1798 constructor.
1800 # parse magic number
1801 magic_number = self._readbinary('<I', 4)
1802 if magic_number == MOFile.MAGIC:
1803 ii = '<II'
1804 elif magic_number == MOFile.MAGIC_SWAPPED:
1805 ii = '>II'
1806 else:
1807 raise IOError('Invalid mo file, magic number is incorrect !')
1808 self.instance.magic_number = magic_number
1809 # parse the version number and the number of strings
1810 version, numofstrings = self._readbinary(ii, 8)
1811 # from MO file format specs: "A program seeing an unexpected major
1812 # revision number should stop reading the MO file entirely"
1813 if version >> 16 not in (0, 1):
1814 raise IOError('Invalid mo file, unexpected major revision number')
1815 self.instance.version = version
1816 # original strings and translation strings hash table offset
1817 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1818 # move to msgid hash table and read length and offset of msgids
1819 self.fhandle.seek(msgids_hash_offset)
1820 msgids_index = []
1821 for i in range(numofstrings):
1822 msgids_index.append(self._readbinary(ii, 8))
1823 # move to msgstr hash table and read length and offset of msgstrs
1824 self.fhandle.seek(msgstrs_hash_offset)
1825 msgstrs_index = []
1826 for i in range(numofstrings):
1827 msgstrs_index.append(self._readbinary(ii, 8))
1828 # build entries
1829 encoding = self.instance.encoding
1830 for i in range(numofstrings):
1831 self.fhandle.seek(msgids_index[i][1])
1832 msgid = self.fhandle.read(msgids_index[i][0])
1834 self.fhandle.seek(msgstrs_index[i][1])
1835 msgstr = self.fhandle.read(msgstrs_index[i][0])
1836 if i == 0 and not msgid: # metadata
1837 raw_metadata, metadata = msgstr.split(b('\n')), {}
1838 for line in raw_metadata:
1839 tokens = line.split(b(':'), 1)
1840 if tokens[0] != b(''):
1841 try:
1842 k = tokens[0].decode(encoding)
1843 v = tokens[1].decode(encoding)
1844 metadata[k] = v.strip()
1845 except IndexError:
1846 metadata[k] = u('')
1847 self.instance.metadata = metadata
1848 continue
1849 # test if we have a plural entry
1850 msgid_tokens = msgid.split(b('\0'))
1851 if len(msgid_tokens) > 1:
1852 entry = self._build_entry(
1853 msgid=msgid_tokens[0],
1854 msgid_plural=msgid_tokens[1],
1855 msgstr_plural=dict(
1856 (k, v) for k, v in enumerate(msgstr.split(b('\0')))
1859 else:
1860 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1861 self.instance.append(entry)
1862 # close opened file
1863 self.fhandle.close()
1864 return self.instance
1866 def _build_entry(self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None):
1867 msgctxt_msgid = msgid.split(b('\x04'))
1868 encoding = self.instance.encoding
1869 if len(msgctxt_msgid) > 1:
1870 kwargs = {
1871 'msgctxt': msgctxt_msgid[0].decode(encoding),
1872 'msgid': msgctxt_msgid[1].decode(encoding),
1874 else:
1875 kwargs = {'msgid': msgid.decode(encoding)}
1876 if msgstr:
1877 kwargs['msgstr'] = msgstr.decode(encoding)
1878 if msgid_plural:
1879 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1880 if msgstr_plural:
1881 for k in msgstr_plural:
1882 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1883 kwargs['msgstr_plural'] = msgstr_plural
1884 return MOEntry(**kwargs)
1886 def _readbinary(self, fmt, numbytes):
1888 Private method that unpack n bytes of data using format <fmt>.
1889 It returns a tuple or a mixed value if the tuple length is 1.
1891 content = self.fhandle.read(numbytes)
1892 tup = struct.unpack(fmt, content)
1893 if len(tup) == 1:
1894 return tup[0]
1895 return tup
1898 # }}}