prefs: return the resolved editor variable
[git-cola.git] / cola / polib.py
blobcd8405eb3f08cb01c33a039b6fafb5147e13cc49
1 # -* coding: utf-8 -*-
3 # License: MIT (see extras/polib/LICENSE file provided)
4 # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4:
6 """
7 **polib** allows you to manipulate, create, modify gettext files (pot, po and
8 mo files). You can load existing files, iterate through it's entries, add,
9 modify entries, comments or metadata, etc. or create new po files from scratch.
11 **polib** provides a simple and pythonic API via the :func:`~polib.pofile` and
12 :func:`~polib.mofile` convenience functions.
13 """
14 from __future__ import absolute_import, division, print_function
15 import array
16 import codecs
17 import os
18 import re
19 import struct
20 import sys
21 import textwrap
22 import io
24 from . import compat
27 __author__ = 'David Jean Louis <izimobil@gmail.com>'
28 __version__ = '1.1.1'
29 __all__ = [
30 'pofile',
31 'POFile',
32 'POEntry',
33 'mofile',
34 'MOFile',
35 'MOEntry',
36 'default_encoding',
37 'escape',
38 'unescape',
39 'detect_encoding',
43 # the default encoding to use when encoding cannot be detected
44 default_encoding = 'utf-8'
46 # python 2/3 compatibility helpers {{{
49 if sys.version_info < (3,):
50 PY3 = False
51 text_type = compat.ustr
53 def b(s):
54 return s
56 def u(s):
57 return compat.ustr(s, "unicode_escape")
60 else:
61 PY3 = True
62 text_type = str
64 def b(s):
65 return s.encode("utf-8")
67 def u(s):
68 return s
71 # }}}
72 # _pofile_or_mofile {{{
75 def _pofile_or_mofile(f, filetype, **kwargs):
76 """
77 Internal function used by :func:`polib.pofile` and :func:`polib.mofile` to
78 honor the DRY concept.
79 """
80 # get the file encoding
81 enc = kwargs.get('encoding')
82 if enc is None:
83 enc = detect_encoding(f, filetype == 'mofile')
85 # parse the file
86 kls = _POFileParser if filetype == 'pofile' else _MOFileParser
87 parser = kls(
89 encoding=enc,
90 check_for_duplicates=kwargs.get('check_for_duplicates', False),
91 klass=kwargs.get('klass'),
93 instance = parser.parse()
94 instance.wrapwidth = kwargs.get('wrapwidth', 78)
95 return instance
98 # }}}
99 # _is_file {{{
102 def _is_file(filename_or_contents):
104 Safely returns the value of os.path.exists(filename_or_contents).
106 Arguments:
108 ``filename_or_contents``
109 either a filename, or a string holding the contents of some file.
110 In the latter case, this function will always return False.
112 try:
113 return os.path.isfile(filename_or_contents)
114 except (TypeError, ValueError, UnicodeEncodeError):
115 return False
118 # }}}
119 # function pofile() {{{
122 # pylint: disable=redefined-outer-name
123 def pofile(pofile, **kwargs):
125 Convenience function that parses the po or pot file ``pofile`` and returns
126 a :class:`~polib.POFile` instance.
128 Arguments:
130 ``pofile``
131 string, full or relative path to the po/pot file or its content (data).
133 ``wrapwidth``
134 integer, the wrap width, only useful when the ``-w`` option was passed
135 to xgettext (optional, default: ``78``).
137 ``encoding``
138 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
139 encoding will be auto-detected).
141 ``check_for_duplicates``
142 whether to check for duplicate entries when adding entries to the
143 file (optional, default: ``False``).
145 ``klass``
146 class which is used to instantiate the return value (optional,
147 default: ``None``, the return value with be a :class:`~polib.POFile`
148 instance).
150 return _pofile_or_mofile(pofile, 'pofile', **kwargs)
153 # }}}
154 # function mofile() {{{
157 # pylint: disable=redefined-outer-name
158 def mofile(mofile, **kwargs):
160 Convenience function that parses the mo file ``mofile`` and returns a
161 :class:`~polib.MOFile` instance.
163 Arguments:
165 ``mofile``
166 string, full or relative path to the mo file or its content (string
167 or bytes).
169 ``wrapwidth``
170 integer, the wrap width, only useful when the ``-w`` option was passed
171 to xgettext to generate the po file that was used to format the mo file
172 (optional, default: ``78``).
174 ``encoding``
175 string, the encoding to use (e.g. "utf-8") (default: ``None``, the
176 encoding will be auto-detected).
178 ``check_for_duplicates``
179 whether to check for duplicate entries when adding entries to the
180 file (optional, default: ``False``).
182 ``klass``
183 class which is used to instantiate the return value (optional,
184 default: ``None``, the return value with be a :class:`~polib.POFile`
185 instance).
187 return _pofile_or_mofile(mofile, 'mofile', **kwargs)
190 # }}}
191 # function detect_encoding() {{{
194 def detect_encoding(file, binary_mode=False):
196 Try to detect the encoding used by the ``file``. The ``file`` argument can
197 be a PO or MO file path or a string containing the contents of the file.
198 If the encoding cannot be detected, the function will return the value of
199 ``default_encoding``.
201 Arguments:
203 ``file``
204 string, full or relative path to the po/mo file or its content.
206 ``binary_mode``
207 boolean, set this to True if ``file`` is a mo file.
209 PATTERN = r'"?Content-Type:.+? charset=([\w_\-:\.]+)'
210 rxt = re.compile(u(PATTERN))
211 rxb = re.compile(b(PATTERN))
213 def charset_exists(charset):
214 """Check whether ``charset`` is valid or not."""
215 try:
216 codecs.lookup(charset)
217 except LookupError:
218 return False
219 return True
221 if not _is_file(file):
222 try:
223 match = rxt.search(file)
224 except TypeError:
225 match = rxb.search(file)
226 if match:
227 enc = match.group(1).strip()
228 if not isinstance(enc, text_type):
229 enc = enc.decode('utf-8')
230 if charset_exists(enc):
231 return enc
232 else:
233 # For PY3, always treat as binary
234 if binary_mode or PY3:
235 mode = 'rb'
236 rx = rxb
237 else:
238 mode = 'r'
239 rx = rxt
240 f = open(file, mode)
241 for line in f.readlines():
242 match = rx.search(line)
243 if match:
244 f.close()
245 enc = match.group(1).strip()
246 if not isinstance(enc, text_type):
247 enc = enc.decode('utf-8')
248 if charset_exists(enc):
249 return enc
250 f.close()
251 return default_encoding
254 # }}}
255 # function escape() {{{
258 def escape(st):
260 Escapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
261 the given string ``st`` and returns it.
263 return (
264 st.replace('\\', r'\\')
265 .replace('\t', r'\t')
266 .replace('\r', r'\r')
267 .replace('\n', r'\n')
268 .replace('\"', r'\"')
272 # }}}
273 # function unescape() {{{
276 def unescape(st):
278 Unescapes the characters ``\\\\``, ``\\t``, ``\\n``, ``\\r`` and ``"`` in
279 the given string ``st`` and returns it.
282 def unescape_repl(m):
283 m = m.group(1)
284 if m == 'n':
285 return '\n'
286 if m == 't':
287 return '\t'
288 if m == 'r':
289 return '\r'
290 if m == '\\':
291 return '\\'
292 return m # handles escaped double quote
294 return re.sub(r'\\(\\|n|t|r|")', unescape_repl, st)
297 # }}}
298 # function natural_sort() {{{
301 def natural_sort(lst):
303 Sort naturally the given list.
304 Credits: http://stackoverflow.com/a/4836734
307 def convert(text):
308 return int(text) if text.isdigit() else text.lower()
310 def alphanum_key(key):
311 return [convert(c) for c in re.split('([0-9]+)', key)]
313 return sorted(lst, key=alphanum_key)
316 # }}}
317 # class _BaseFile {{{
320 class _BaseFile(list):
322 Common base class for the :class:`~polib.POFile` and :class:`~polib.MOFile`
323 classes. This class should **not** be instantiated directly.
326 def __init__(self, *_args, **kwargs):
328 Constructor, accepts the following keyword arguments:
330 ``pofile``
331 string, the path to the po or mo file, or its content as a string.
333 ``wrapwidth``
334 integer, the wrap width, only useful when the ``-w`` option was
335 passed to xgettext (optional, default: ``78``).
337 ``encoding``
338 string, the encoding to use, defaults to ``default_encoding``
339 global variable (optional).
341 ``check_for_duplicates``
342 whether to check for duplicate entries when adding entries to the
343 file, (optional, default: ``False``).
345 list.__init__(self)
346 # the opened file handle
347 pofile = kwargs.get('pofile', None) # pylint: disable=redefined-outer-name
348 if pofile and _is_file(pofile):
349 self.fpath = pofile
350 else:
351 self.fpath = kwargs.get('fpath')
352 # the width at which lines should be wrapped
353 self.wrapwidth = kwargs.get('wrapwidth', 78)
354 # the file encoding
355 self.encoding = kwargs.get('encoding', default_encoding)
356 # whether to check for duplicate entries or not
357 self.check_for_duplicates = kwargs.get('check_for_duplicates', False)
358 # header
359 self.header = ''
360 # both po and mo files have metadata
361 self.metadata = {}
362 self.metadata_is_fuzzy = 0
364 def __unicode__(self):
366 Returns the unicode representation of the file.
368 ret = []
369 entries = [self.metadata_as_entry()] + [e for e in self if not e.obsolete]
370 for entry in entries:
371 ret.append(entry.__unicode__(self.wrapwidth))
372 for entry in self.obsolete_entries(): # pylint: disable=no-member
373 ret.append(entry.__unicode__(self.wrapwidth))
374 ret = u('\n').join(ret)
375 return ret
377 if PY3:
379 def __str__(self):
380 return self.__unicode__()
382 else:
384 def __str__(self):
386 Returns the string representation of the file.
388 return compat.ustr(self).encode(self.encoding)
390 def __contains__(self, entry):
392 Overridden ``list`` method to implement the membership test (in and
393 not in).
394 The method considers that an entry is in the file if it finds an entry
395 that has the same msgid (the test is **case sensitive**) and the same
396 msgctxt (or none for both entries).
398 Argument:
400 ``entry``
401 an instance of :class:`~polib._BaseEntry`.
403 return self.find(entry.msgid, by='msgid', msgctxt=entry.msgctxt) is not None
405 def __eq__(self, other):
406 return str(self) == str(other)
408 def __hash__(self):
409 return hash(str(self))
411 def append(self, entry):
413 Overridden method to check for duplicates entries, if a user tries to
414 add an entry that is already in the file, the method will raise a
415 ``ValueError`` exception.
417 Argument:
419 ``entry``
420 an instance of :class:`~polib._BaseEntry`.
422 # check_for_duplicates may not be defined (yet) when unpickling.
423 # But if pickling, we never want to check for duplicates anyway.
424 if getattr(self, 'check_for_duplicates', False) and entry in self:
425 raise ValueError('Entry "%s" already exists' % entry.msgid)
426 super(_BaseFile, self).append(entry)
428 def insert(self, index, entry):
430 Overridden method to check for duplicates entries, if a user tries to
431 add an entry that is already in the file, the method will raise a
432 ``ValueError`` exception.
434 Arguments:
436 ``index``
437 index at which the entry should be inserted.
439 ``entry``
440 an instance of :class:`~polib._BaseEntry`.
442 if self.check_for_duplicates and entry in self:
443 raise ValueError('Entry "%s" already exists' % entry.msgid)
444 super(_BaseFile, self).insert(index, entry)
446 def metadata_as_entry(self):
448 Returns the file metadata as a :class:`~polib.POFile` instance.
450 e = POEntry(msgid='')
451 mdata = self.ordered_metadata()
452 if mdata:
453 strs = []
454 for name, value in mdata:
455 # Strip whitespace off each line in a multi-line entry
456 strs.append('%s: %s' % (name, value))
457 e.msgstr = '\n'.join(strs) + '\n'
458 if self.metadata_is_fuzzy:
459 e.flags.append('fuzzy')
460 return e
462 def save(self, fpath=None, repr_method='__unicode__', newline=None):
464 Saves the po file to ``fpath``.
465 If it is an existing file and no ``fpath`` is provided, then the
466 existing file is rewritten with the modified data.
468 Keyword arguments:
470 ``fpath``
471 string, full or relative path to the file.
473 ``repr_method``
474 string, the method to use for output.
476 ``newline``
477 string, controls how universal newlines works
479 if self.fpath is None and fpath is None:
480 raise IOError('You must provide a file path to save() method')
481 contents = getattr(self, repr_method)()
482 if fpath is None:
483 fpath = self.fpath
484 if repr_method == 'to_binary':
485 fhandle = open(fpath, 'wb')
486 else:
487 fhandle = io.open(fpath, 'w', encoding=self.encoding, newline=newline)
488 if not isinstance(contents, text_type):
489 contents = contents.decode(self.encoding)
490 fhandle.write(contents)
491 fhandle.close()
492 # set the file path if not set
493 if self.fpath is None and fpath:
494 self.fpath = fpath
496 def find(self, st, by='msgid', include_obsolete_entries=False, msgctxt=False):
498 Find the entry which msgid (or property identified by the ``by``
499 argument) matches the string ``st``.
501 Keyword arguments:
503 ``st``
504 string, the string to search for.
506 ``by``
507 string, the property to use for comparison (default: ``msgid``).
509 ``include_obsolete_entries``
510 boolean, whether to also search in entries that are obsolete.
512 ``msgctxt``
513 string, allows specifying a specific message context for the
514 search.
516 if include_obsolete_entries:
517 entries = self[:]
518 else:
519 entries = [e for e in self if not e.obsolete]
520 matches = []
521 for e in entries:
522 if getattr(e, by) == st:
523 if msgctxt is not False and e.msgctxt != msgctxt:
524 continue
525 matches.append(e)
526 if len(matches) == 1:
527 return matches[0]
528 elif len(matches) > 1:
529 if not msgctxt:
530 # find the entry with no msgctx
531 e = None
532 for m in matches:
533 if not m.msgctxt:
534 e = m
535 if e:
536 return e
537 # fallback to the first entry found
538 return matches[0]
539 return None
541 def ordered_metadata(self):
543 Convenience method that returns an ordered version of the metadata
544 dictionary. The return value is list of tuples (metadata name,
545 metadata_value).
547 # copy the dict first
548 metadata = self.metadata.copy()
549 data_order = [
550 'Project-Id-Version',
551 'Report-Msgid-Bugs-To',
552 'POT-Creation-Date',
553 'PO-Revision-Date',
554 'Last-Translator',
555 'Language-Team',
556 'Language',
557 'MIME-Version',
558 'Content-Type',
559 'Content-Transfer-Encoding',
560 'Plural-Forms',
562 ordered_data = []
563 for data in data_order:
564 try:
565 value = metadata.pop(data)
566 ordered_data.append((data, value))
567 except KeyError:
568 pass
569 # the rest of the metadata will be alphabetically ordered since there
570 # are no specs for this AFAIK
571 for data in natural_sort(metadata.keys()):
572 value = metadata[data]
573 ordered_data.append((data, value))
574 return ordered_data
576 def to_binary(self):
578 Return the binary representation of the file.
580 offsets = []
581 entries = self.translated_entries() # pylint: disable=no-member
583 # the keys are sorted in the .mo file
584 def cmp(_self, other): # pylint: disable=unused-variable
585 # msgfmt compares entries with msgctxt if it exists
586 self_msgid = _self.msgctxt or _self.msgid
587 other_msgid = other.msgctxt or other.msgid
588 if self_msgid > other_msgid:
589 return 1
590 elif self_msgid < other_msgid:
591 return -1
592 else:
593 return 0
595 # add metadata entry
596 entries.sort(key=lambda o: o.msgid_with_context.encode('utf-8'))
597 mentry = self.metadata_as_entry()
598 entries = [mentry] + entries
599 entries_len = len(entries)
600 ids, strs = b(''), b('')
601 for e in entries:
602 # For each string, we need size and file offset. Each string is
603 # NUL terminated; the NUL does not count into the size.
604 msgid = b('')
605 if e.msgctxt:
606 # Contexts are stored by storing the concatenation of the
607 # context, a <EOT> byte, and the original string
608 msgid = self._encode(e.msgctxt + '\4')
609 if e.msgid_plural:
610 msgstr = []
611 for index in sorted(e.msgstr_plural.keys()):
612 msgstr.append(e.msgstr_plural[index])
613 msgid += self._encode(e.msgid + '\0' + e.msgid_plural)
614 msgstr = self._encode('\0'.join(msgstr))
615 else:
616 msgid += self._encode(e.msgid)
617 msgstr = self._encode(e.msgstr)
618 offsets.append((len(ids), len(msgid), len(strs), len(msgstr)))
619 ids += msgid + b('\0')
620 strs += msgstr + b('\0')
622 # The header is 7 32-bit unsigned integers.
623 keystart = 7 * 4 + 16 * entries_len
624 # and the values start after the keys
625 valuestart = keystart + len(ids)
626 koffsets = []
627 voffsets = []
628 # The string table first has the list of keys, then the list of values.
629 # Each entry has first the size of the string, then the file offset.
630 for o1, l1, o2, l2 in offsets:
631 koffsets += [l1, o1 + keystart]
632 voffsets += [l2, o2 + valuestart]
633 offsets = koffsets + voffsets
635 output = struct.pack(
636 "Iiiiiii",
637 # Magic number
638 MOFile.MAGIC,
639 # Version
641 # number of entries
642 entries_len,
643 # start of key index
644 7 * 4,
645 # start of value index
646 7 * 4 + entries_len * 8,
647 # size and offset of hash table, we don't use hash tables
649 keystart,
651 if PY3 and sys.version_info.minor > 1: # python 3.2 or superior
652 output += array.array("i", offsets).tobytes()
653 else:
654 output += array.array("i", offsets).tostring() # pylint: disable=no-member
655 output += ids
656 output += strs
657 return output
659 def _encode(self, mixed):
661 Encodes the given ``mixed`` argument with the file encoding if and
662 only if it's an unicode string and returns the encoded string.
664 if isinstance(mixed, text_type):
665 mixed = mixed.encode(self.encoding)
666 return mixed
669 # }}}
670 # class POFile {{{
673 class POFile(_BaseFile):
675 Po (or Pot) file reader/writer.
676 This class inherits the :class:`~polib._BaseFile` class and, by extension,
677 the python ``list`` type.
680 def __unicode__(self):
682 Returns the unicode representation of the po file.
684 ret, headers = '', self.header.split('\n')
685 for header in headers:
686 if not header:
687 ret += "#\n"
688 elif header[:1] in [',', ':']:
689 ret += '#%s\n' % header
690 else:
691 ret += '# %s\n' % header
693 if not isinstance(ret, text_type):
694 ret = ret.decode(self.encoding)
696 return ret + _BaseFile.__unicode__(self)
698 def save_as_mofile(self, fpath):
700 Saves the binary representation of the file to given ``fpath``.
702 Keyword argument:
704 ``fpath``
705 string, full or relative path to the mo file.
707 _BaseFile.save(self, fpath, 'to_binary')
709 def percent_translated(self):
711 Convenience method that returns the percentage of translated
712 messages.
714 total = len([e for e in self if not e.obsolete])
715 if total == 0:
716 return 100
717 translated = len(self.translated_entries())
718 return int(translated * 100 / float(total))
720 def translated_entries(self):
722 Convenience method that returns the list of translated entries.
724 return [e for e in self if e.translated()]
726 def untranslated_entries(self):
728 Convenience method that returns the list of untranslated entries.
730 return [
731 e for e in self if not e.translated() and not e.obsolete and not e.fuzzy
734 def fuzzy_entries(self):
736 Convenience method that returns the list of fuzzy entries.
738 return [e for e in self if e.fuzzy and not e.obsolete]
740 def obsolete_entries(self):
742 Convenience method that returns the list of obsolete entries.
744 return [e for e in self if e.obsolete]
746 def merge(self, refpot):
748 Convenience method that merges the current pofile with the pot file
749 provided. It behaves exactly as the gettext msgmerge utility:
751 * comments of this file will be preserved, but extracted comments and
752 occurrences will be discarded;
753 * any translations or comments in the file will be discarded, however,
754 dot comments and file positions will be preserved;
755 * the fuzzy flags are preserved.
757 Keyword argument:
759 ``refpot``
760 object POFile, the reference catalog.
762 # Store entries in dict/set for faster access
763 self_entries = dict((entry.msgid_with_context, entry) for entry in self)
764 refpot_msgids = set(entry.msgid_with_context for entry in refpot)
765 # Merge entries that are in the refpot
766 for entry in refpot:
767 e = self_entries.get(entry.msgid_with_context)
768 if e is None:
769 e = POEntry()
770 self.append(e)
771 e.merge(entry)
772 # ok, now we must "obsolete" entries that are not in the refpot anymore
773 for entry in self:
774 if entry.msgid_with_context not in refpot_msgids:
775 entry.obsolete = True
778 # }}}
779 # class MOFile {{{
782 class MOFile(_BaseFile):
784 Mo file reader/writer.
785 This class inherits the :class:`~polib._BaseFile` class and, by
786 extension, the python ``list`` type.
789 MAGIC = 0x950412DE
790 MAGIC_SWAPPED = 0xDE120495
792 def __init__(self, *args, **kwargs):
794 Constructor, accepts all keywords arguments accepted by
795 :class:`~polib._BaseFile` class.
797 _BaseFile.__init__(self, *args, **kwargs)
798 self.magic_number = None
799 self.version = 0
801 def save_as_pofile(self, fpath):
803 Saves the mofile as a pofile to ``fpath``.
805 Keyword argument:
807 ``fpath``
808 string, full or relative path to the file.
810 _BaseFile.save(self, fpath)
812 # pylint: disable=no-self-use,arguments-differ
813 def save(self, fpath=None):
815 Saves the mofile to ``fpath``.
817 Keyword argument:
819 ``fpath``
820 string, full or relative path to the file.
822 _BaseFile.save(self, fpath, 'to_binary')
824 # pylint: disable=no-self-use
825 def percent_translated(self):
827 Convenience method to keep the same interface with POFile instances.
829 return 100
831 # pylint: disable=no-self-use
832 def translated_entries(self):
834 Convenience method to keep the same interface with POFile instances.
836 return self
838 # pylint: disable=no-self-use
839 def untranslated_entries(self):
841 Convenience method to keep the same interface with POFile instances.
843 return []
845 # pylint: disable=no-self-use
846 def fuzzy_entries(self):
848 Convenience method to keep the same interface with POFile instances.
850 return []
852 # pylint: disable=no-self-use
853 def obsolete_entries(self):
855 Convenience method to keep the same interface with POFile instances.
857 return []
860 # }}}
861 # class _BaseEntry {{{
864 class _BaseEntry(object):
866 Base class for :class:`~polib.POEntry` and :class:`~polib.MOEntry` classes.
867 This class should **not** be instantiated directly.
870 def __init__(self, *_args, **kwargs):
872 Constructor, accepts the following keyword arguments:
874 ``msgid``
875 string, the entry msgid.
877 ``msgstr``
878 string, the entry msgstr.
880 ``msgid_plural``
881 string, the entry msgid_plural.
883 ``msgstr_plural``
884 dict, the entry msgstr_plural lines.
886 ``msgctxt``
887 string, the entry context (msgctxt).
889 ``obsolete``
890 bool, whether the entry is "obsolete" or not.
892 ``encoding``
893 string, the encoding to use, defaults to ``default_encoding``
894 global variable (optional).
896 self.msgid = kwargs.get('msgid', '')
897 self.msgstr = kwargs.get('msgstr', '')
898 self.msgid_plural = kwargs.get('msgid_plural', '')
899 self.msgstr_plural = kwargs.get('msgstr_plural', {})
900 self.msgctxt = kwargs.get('msgctxt', None)
901 self.obsolete = kwargs.get('obsolete', False)
902 self.encoding = kwargs.get('encoding', default_encoding)
904 def __unicode__(self, wrapwidth=78):
906 Returns the unicode representation of the entry.
908 if self.obsolete:
909 delflag = '#~ '
910 else:
911 delflag = ''
912 ret = []
913 # write the msgctxt if any
914 if self.msgctxt is not None:
915 ret += self._str_field("msgctxt", delflag, "", self.msgctxt, wrapwidth)
916 # write the msgid
917 ret += self._str_field("msgid", delflag, "", self.msgid, wrapwidth)
918 # write the msgid_plural if any
919 if self.msgid_plural:
920 ret += self._str_field(
921 "msgid_plural", delflag, "", self.msgid_plural, wrapwidth
923 if self.msgstr_plural:
924 # write the msgstr_plural if any
925 msgstrs = self.msgstr_plural
926 keys = list(msgstrs)
927 keys.sort()
928 for index in keys:
929 msgstr = msgstrs[index]
930 plural_index = '[%s]' % index
931 ret += self._str_field(
932 "msgstr", delflag, plural_index, msgstr, wrapwidth
934 else:
935 # otherwise write the msgstr
936 ret += self._str_field("msgstr", delflag, "", self.msgstr, wrapwidth)
937 ret.append('')
938 ret = u('\n').join(ret)
939 return ret
941 if PY3:
943 def __str__(self):
944 return self.__unicode__()
946 else:
948 def __str__(self):
950 Returns the string representation of the entry.
952 return compat.ustr(self).encode(self.encoding)
954 def __eq__(self, other):
955 return str(self) == str(other)
957 def __hash__(self):
958 return hash(str(self))
960 # pylint: disable=no-self-use
961 def _str_field(self, fieldname, delflag, plural_index, field, wrapwidth=78):
962 lines = field.splitlines(True)
963 if len(lines) > 1:
964 lines = [''] + lines # start with initial empty line
965 else:
966 escaped_field = escape(field)
967 specialchars_count = 0
968 for c in ['\\', '\n', '\r', '\t', '"']:
969 specialchars_count += field.count(c)
970 # comparison must take into account fieldname length + one space
971 # + 2 quotes (eg. msgid "<string>")
972 flength = len(fieldname) + 3
973 if plural_index:
974 flength += len(plural_index)
975 real_wrapwidth = wrapwidth - flength + specialchars_count
976 if wrapwidth > 0 and len(field) > real_wrapwidth:
977 # Wrap the line but take field name into account
978 lines = [''] + [
979 unescape(item)
980 for item in textwrap.wrap(
981 escaped_field,
982 wrapwidth - 2, # 2 for quotes ""
983 drop_whitespace=False,
984 break_long_words=False,
987 else:
988 lines = [field]
989 if fieldname.startswith('previous_'):
990 # quick and dirty trick to get the real field name
991 fieldname = fieldname[9:]
993 ret = ['%s%s%s "%s"' % (delflag, fieldname, plural_index, escape(lines.pop(0)))]
994 for line in lines:
995 ret.append('%s"%s"' % (delflag, escape(line)))
996 return ret
998 @property
999 def msgid_with_context(self):
1000 if self.msgctxt:
1001 return '%s%s%s' % (self.msgctxt, "\x04", self.msgid)
1002 return self.msgid
1005 # }}}
1006 # class POEntry {{{
1009 class POEntry(_BaseEntry):
1011 Represents a po file entry.
1014 def __init__(self, *args, **kwargs):
1016 Constructor, accepts the following keyword arguments:
1018 ``comment``
1019 string, the entry comment.
1021 ``tcomment``
1022 string, the entry translator comment.
1024 ``occurrences``
1025 list, the entry occurrences.
1027 ``flags``
1028 list, the entry flags.
1030 ``previous_msgctxt``
1031 string, the entry previous context.
1033 ``previous_msgid``
1034 string, the entry previous msgid.
1036 ``previous_msgid_plural``
1037 string, the entry previous msgid_plural.
1039 ``linenum``
1040 integer, the line number of the entry
1042 _BaseEntry.__init__(self, *args, **kwargs)
1043 self.comment = kwargs.get('comment', '')
1044 self.tcomment = kwargs.get('tcomment', '')
1045 self.occurrences = kwargs.get('occurrences', [])
1046 self.flags = kwargs.get('flags', [])
1047 self.previous_msgctxt = kwargs.get('previous_msgctxt', None)
1048 self.previous_msgid = kwargs.get('previous_msgid', None)
1049 self.previous_msgid_plural = kwargs.get('previous_msgid_plural', None)
1050 self.linenum = kwargs.get('linenum', None)
1052 def __unicode__(self, wrapwidth=78):
1054 Returns the unicode representation of the entry.
1056 ret = []
1057 # comments first, if any (with text wrapping as xgettext does)
1058 if self.obsolete:
1059 comments = [('tcomment', '# ')]
1060 else:
1061 comments = [('comment', '#. '), ('tcomment', '# ')]
1062 for c in comments:
1063 val = getattr(self, c[0])
1064 if val:
1065 for comment in val.split('\n'):
1066 if len(comment) + len(c[1]) > wrapwidth > 0:
1067 ret += textwrap.wrap(
1068 comment,
1069 wrapwidth,
1070 initial_indent=c[1],
1071 subsequent_indent=c[1],
1072 break_long_words=False,
1074 else:
1075 ret.append('%s%s' % (c[1], comment))
1077 # occurrences (with text wrapping as xgettext does)
1078 if not self.obsolete and self.occurrences:
1079 filelist = []
1080 for fpath, lineno in self.occurrences:
1081 if lineno:
1082 filelist.append('%s:%s' % (fpath, lineno))
1083 else:
1084 filelist.append(fpath)
1085 filestr = ' '.join(filelist)
1086 if len(filestr) + 3 > wrapwidth > 0:
1087 # textwrap split words that contain hyphen, this is not
1088 # what we want for filenames, so the dirty hack is to
1089 # temporally replace hyphens with a char that a file cannot
1090 # contain, like "*"
1091 ret += [
1092 line.replace('*', '-')
1093 for line in textwrap.wrap(
1094 filestr.replace('-', '*'),
1095 wrapwidth,
1096 initial_indent='#: ',
1097 subsequent_indent='#: ',
1098 break_long_words=False,
1101 else:
1102 ret.append('#: ' + filestr)
1104 # flags (TODO: wrapping ?)
1105 if self.flags:
1106 ret.append('#, %s' % ', '.join(self.flags))
1108 # previous context and previous msgid/msgid_plural
1109 fields = ['previous_msgctxt', 'previous_msgid', 'previous_msgid_plural']
1110 if self.obsolete:
1111 prefix = "#~| "
1112 else:
1113 prefix = "#| "
1114 for f in fields:
1115 val = getattr(self, f)
1116 if val is not None:
1117 ret += self._str_field(f, prefix, "", val, wrapwidth)
1119 ret.append(_BaseEntry.__unicode__(self, wrapwidth))
1120 ret = u('\n').join(ret)
1121 return ret
1123 # pylint: disable=cmp-method,too-many-return-statements
1124 def __cmp__(self, other):
1126 Called by comparison operations if rich comparison is not defined.
1128 # First: Obsolete test
1129 if self.obsolete != other.obsolete:
1130 if self.obsolete:
1131 return -1
1132 else:
1133 return 1
1134 # Work on a copy to protect original
1135 occ1 = sorted(self.occurrences[:])
1136 occ2 = sorted(other.occurrences[:])
1137 if occ1 > occ2:
1138 return 1
1139 if occ1 < occ2:
1140 return -1
1141 # Compare context
1142 msgctxt = self.msgctxt or '0'
1143 othermsgctxt = other.msgctxt or '0'
1144 if msgctxt > othermsgctxt:
1145 return 1
1146 elif msgctxt < othermsgctxt:
1147 return -1
1148 # Compare msgid_plural
1149 msgid_plural = self.msgid_plural or '0'
1150 othermsgid_plural = other.msgid_plural or '0'
1151 if msgid_plural > othermsgid_plural:
1152 return 1
1153 elif msgid_plural < othermsgid_plural:
1154 return -1
1155 # Compare msgstr_plural
1156 if self.msgstr_plural and isinstance(self.msgstr_plural, dict):
1157 msgstr_plural = list(self.msgstr_plural.values())
1158 else:
1159 msgstr_plural = []
1160 if other.msgstr_plural and isinstance(other.msgstr_plural, dict):
1161 othermsgstr_plural = list(other.msgstr_plural.values())
1162 else:
1163 othermsgstr_plural = []
1164 if msgstr_plural > othermsgstr_plural:
1165 return 1
1166 elif msgstr_plural < othermsgstr_plural:
1167 return -1
1168 # Compare msgid
1169 if self.msgid > other.msgid:
1170 return 1
1171 elif self.msgid < other.msgid:
1172 return -1
1173 # Compare msgstr
1174 if self.msgstr > other.msgstr:
1175 return 1
1176 elif self.msgstr < other.msgstr:
1177 return -1
1178 return 0
1180 def __gt__(self, other):
1181 return self.__cmp__(other) > 0
1183 def __lt__(self, other):
1184 return self.__cmp__(other) < 0
1186 def __ge__(self, other):
1187 return self.__cmp__(other) >= 0
1189 def __le__(self, other):
1190 return self.__cmp__(other) <= 0
1192 def __eq__(self, other):
1193 return self.__cmp__(other) == 0
1195 def __ne__(self, other):
1196 return self.__cmp__(other) != 0
1198 def translated(self):
1200 Returns ``True`` if the entry has been translated or ``False``
1201 otherwise.
1203 if self.obsolete or self.fuzzy:
1204 return False
1205 if self.msgstr != '':
1206 return True
1207 if self.msgstr_plural:
1208 for pos in self.msgstr_plural:
1209 if self.msgstr_plural[pos] == '':
1210 return False
1211 return True
1212 return False
1214 def merge(self, other):
1216 Merge the current entry with the given pot entry.
1218 self.msgid = other.msgid
1219 self.msgctxt = other.msgctxt
1220 self.occurrences = other.occurrences
1221 self.comment = other.comment
1222 fuzzy = self.fuzzy
1223 self.flags = other.flags[:] # clone flags
1224 if fuzzy:
1225 self.flags.append('fuzzy')
1226 self.msgid_plural = other.msgid_plural
1227 self.obsolete = other.obsolete
1228 self.previous_msgctxt = other.previous_msgctxt
1229 self.previous_msgid = other.previous_msgid
1230 self.previous_msgid_plural = other.previous_msgid_plural
1231 if other.msgstr_plural:
1232 for pos in other.msgstr_plural:
1233 try:
1234 # keep existing translation at pos if any
1235 self.msgstr_plural[pos]
1236 except KeyError:
1237 self.msgstr_plural[pos] = ''
1239 @property
1240 def fuzzy(self):
1241 return 'fuzzy' in self.flags
1243 def __hash__(self):
1244 return hash((self.msgid, self.msgstr))
1247 # }}}
1248 # class MOEntry {{{
1251 class MOEntry(_BaseEntry):
1253 Represents a mo file entry.
1256 def __init__(self, *args, **kwargs):
1258 Constructor, accepts the following keyword arguments,
1259 for consistency with :class:`~polib.POEntry`:
1261 ``comment``
1262 ``tcomment``
1263 ``occurrences``
1264 ``flags``
1265 ``previous_msgctxt``
1266 ``previous_msgid``
1267 ``previous_msgid_plural``
1269 Note: even though these keyword arguments are accepted,
1270 they hold no real meaning in the context of MO files
1271 and are simply ignored.
1273 _BaseEntry.__init__(self, *args, **kwargs)
1274 self.comment = ''
1275 self.tcomment = ''
1276 self.occurrences = []
1277 self.flags = []
1278 self.previous_msgctxt = None
1279 self.previous_msgid = None
1280 self.previous_msgid_plural = None
1282 def __hash__(self):
1283 return hash((self.msgid, self.msgstr))
1286 # }}}
1287 # class _POFileParser {{{
1290 class _POFileParser(object):
1292 A finite state machine to parse efficiently and correctly po
1293 file format.
1296 # pylint: disable=redefined-outer-name
1297 def __init__(self, pofile, *_args, **kwargs):
1299 Constructor.
1301 Keyword arguments:
1303 ``pofile``
1304 string, path to the po file or its content
1306 ``encoding``
1307 string, the encoding to use, defaults to ``default_encoding``
1308 global variable (optional).
1310 ``check_for_duplicates``
1311 whether to check for duplicate entries when adding entries to the
1312 file (optional, default: ``False``).
1314 enc = kwargs.get('encoding', default_encoding)
1315 if _is_file(pofile):
1316 try:
1317 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1318 except LookupError:
1319 enc = default_encoding
1320 self.fhandle = io.open(pofile, 'rt', encoding=enc)
1321 else:
1322 self.fhandle = pofile.splitlines()
1324 klass = kwargs.get('klass')
1325 if klass is None:
1326 klass = POFile
1327 self.instance = klass(
1328 pofile=pofile,
1329 encoding=enc,
1330 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1332 self.transitions = {}
1333 self.current_line = 0
1334 self.current_entry = POEntry(linenum=self.current_line)
1335 self.current_state = 'st'
1336 self.current_token = None
1337 # two memo flags used in handlers
1338 self.msgstr_index = 0
1339 self.entry_obsolete = 0
1340 # Configure the state machine, by adding transitions.
1341 # Signification of symbols:
1342 # * ST: Beginning of the file (start)
1343 # * HE: Header
1344 # * TC: a translation comment
1345 # * GC: a generated comment
1346 # * OC: a file/line occurrence
1347 # * FL: a flags line
1348 # * CT: a message context
1349 # * PC: a previous msgctxt
1350 # * PM: a previous msgid
1351 # * PP: a previous msgid_plural
1352 # * MI: a msgid
1353 # * MP: a msgid plural
1354 # * MS: a msgstr
1355 # * MX: a msgstr plural
1356 # * MC: a msgid or msgstr continuation line
1357 # pylint: disable=redefined-builtin
1358 all = [
1359 'st',
1360 'he',
1361 'gc',
1362 'oc',
1363 'fl',
1364 'ct',
1365 'pc',
1366 'pm',
1367 'pp',
1368 'tc',
1369 'ms',
1370 'mp',
1371 'mx',
1372 'mi',
1375 self.add('tc', ['st', 'he'], 'he')
1376 self.add(
1377 'tc',
1378 ['gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mp', 'mx', 'mi'],
1379 'tc',
1381 self.add('gc', all, 'gc')
1382 self.add('oc', all, 'oc')
1383 self.add('fl', all, 'fl')
1384 self.add('pc', all, 'pc')
1385 self.add('pm', all, 'pm')
1386 self.add('pp', all, 'pp')
1387 self.add(
1388 'ct',
1389 ['st', 'he', 'gc', 'oc', 'fl', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
1390 'ct',
1392 self.add(
1393 'mi',
1394 ['st', 'he', 'gc', 'oc', 'fl', 'ct', 'tc', 'pc', 'pm', 'pp', 'ms', 'mx'],
1395 'mi',
1397 self.add('mp', ['tc', 'gc', 'pc', 'pm', 'pp', 'mi'], 'mp')
1398 self.add('ms', ['mi', 'mp', 'tc'], 'ms')
1399 self.add('mx', ['mi', 'mx', 'mp', 'tc'], 'mx')
1400 self.add('mc', ['ct', 'mi', 'mp', 'ms', 'mx', 'pm', 'pp', 'pc'], 'mc')
1402 # pylint: disable=too-many-branches
1403 def parse(self):
1405 Run the state machine, parse the file line by line and call process()
1406 with the current matched symbol.
1409 keywords = {
1410 'msgctxt': 'ct',
1411 'msgid': 'mi',
1412 'msgstr': 'ms',
1413 'msgid_plural': 'mp',
1415 prev_keywords = {
1416 'msgid_plural': 'pp',
1417 'msgid': 'pm',
1418 'msgctxt': 'pc',
1420 tokens = []
1421 fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
1422 for line in self.fhandle:
1423 self.current_line += 1
1424 if self.current_line == 1:
1425 BOM = codecs.BOM_UTF8.decode('utf-8')
1426 if line.startswith(BOM):
1427 line = line[len(BOM) :]
1428 line = line.strip()
1429 if line == '':
1430 continue
1432 tokens = line.split(None, 2)
1433 nb_tokens = len(tokens)
1435 if tokens[0] == '#~|':
1436 continue
1438 if tokens[0] == '#~' and nb_tokens > 1:
1439 line = line[3:].strip()
1440 tokens = tokens[1:]
1441 nb_tokens -= 1
1442 self.entry_obsolete = 1
1443 else:
1444 self.entry_obsolete = 0
1446 # Take care of keywords like
1447 # msgid, msgid_plural, msgctxt & msgstr.
1448 if tokens[0] in keywords and nb_tokens > 1:
1449 line = line[len(tokens[0]) :].lstrip()
1450 if re.search(r'([^\\]|^)"', line[1:-1]):
1451 raise IOError(
1452 'Syntax error in po file %s(line %s): '
1453 'unescaped double quote found' % (fpath, self.current_line)
1455 self.current_token = line
1456 self.process(keywords[tokens[0]])
1457 continue
1459 self.current_token = line
1461 if tokens[0] == '#:':
1462 if nb_tokens <= 1:
1463 continue
1464 # we are on a occurrences line
1465 self.process('oc')
1467 elif line[:1] == '"':
1468 # we are on a continuation line
1469 if re.search(r'([^\\]|^)"', line[1:-1]):
1470 raise IOError(
1471 'Syntax error in po file %s(line %s): '
1472 'unescaped double quote found' % (fpath, self.current_line)
1474 self.process('mc')
1476 elif line[:7] == 'msgstr[':
1477 # we are on a msgstr plural
1478 self.process('mx')
1480 elif tokens[0] == '#,':
1481 if nb_tokens <= 1:
1482 continue
1483 # we are on a flags line
1484 self.process('fl')
1486 elif tokens[0] == '#' or tokens[0].startswith('##'):
1487 if line == '#':
1488 line += ' '
1489 # we are on a translator comment line
1490 self.process('tc')
1492 elif tokens[0] == '#.':
1493 if nb_tokens <= 1:
1494 continue
1495 # we are on a generated comment line
1496 self.process('gc')
1498 elif tokens[0] == '#|':
1499 if nb_tokens <= 1:
1500 raise IOError(
1501 'Syntax error in po file %s(line %s)'
1502 % (fpath, self.current_line)
1505 # Remove the marker and any whitespace right after that.
1506 line = line[2:].lstrip()
1507 self.current_token = line
1509 if tokens[1].startswith('"'):
1510 # Continuation of previous metadata.
1511 self.process('mc')
1512 continue
1514 if nb_tokens == 2:
1515 # Invalid continuation line.
1516 raise IOError(
1517 'Syntax error in po file %s(line %s): '
1518 'invalid continuation line' % (fpath, self.current_line)
1521 # we are on a "previous translation" comment line,
1522 if tokens[1] not in prev_keywords:
1523 # Unknown keyword in previous translation comment.
1524 raise IOError(
1525 'Syntax error in po file %s(line %s): '
1526 'unknown keyword %s' % (fpath, self.current_line, tokens[1])
1529 # Remove the keyword and any whitespace
1530 # between it and the starting quote.
1531 line = line[len(tokens[1]) :].lstrip()
1532 self.current_token = line
1533 self.process(prev_keywords[tokens[1]])
1535 else:
1536 raise IOError(
1537 'Syntax error in po file %s(line %s)' % (fpath, self.current_line)
1540 if self.current_entry and len(tokens) > 0 and not tokens[0].startswith('#'):
1541 # since entries are added when another entry is found, we must add
1542 # the last entry here (only if there are lines). Trailing comments
1543 # are ignored
1544 self.instance.append(self.current_entry)
1546 # before returning the instance, check if there's metadata and if
1547 # so extract it in a dict
1548 metadataentry = self.instance.find('')
1549 if metadataentry: # metadata found
1550 # remove the entry
1551 self.instance.remove(metadataentry)
1552 self.instance.metadata_is_fuzzy = metadataentry.flags
1553 key = None
1554 for msg in metadataentry.msgstr.splitlines():
1555 try:
1556 key, val = msg.split(':', 1)
1557 self.instance.metadata[key] = val.strip()
1558 except (ValueError, KeyError):
1559 if key is not None:
1560 self.instance.metadata[key] += '\n' + msg.strip()
1561 # close opened file
1562 if not isinstance(self.fhandle, list): # must be file
1563 self.fhandle.close()
1564 return self.instance
1566 def add(self, symbol, states, next_state):
1568 Add a transition to the state machine.
1570 Keywords arguments:
1572 ``symbol``
1573 string, the matched token (two chars symbol).
1575 ``states``
1576 list, a list of states (two chars symbols).
1578 ``next_state``
1579 the next state the fsm will have after the action.
1581 for state in states:
1582 action = getattr(self, 'handle_%s' % next_state)
1583 self.transitions[(symbol, state)] = (action, next_state)
1585 def process(self, symbol):
1587 Process the transition corresponding to the current state and the
1588 symbol provided.
1590 Keywords arguments:
1592 ``symbol``
1593 string, the matched token (two chars symbol).
1595 ``linenum``
1596 integer, the current line number of the parsed file.
1598 try:
1599 (action, state) = self.transitions[(symbol, self.current_state)]
1600 if action():
1601 self.current_state = state
1602 except Exception:
1603 fpath = '%s ' % self.instance.fpath if self.instance.fpath else ''
1604 if hasattr(self.fhandle, 'close'):
1605 self.fhandle.close()
1606 raise IOError(
1607 'Syntax error in po file %s(line %s)' % (fpath, self.current_line)
1610 # state handlers
1612 def handle_he(self):
1613 """Handle a header comment."""
1614 if self.instance.header != '':
1615 self.instance.header += '\n'
1616 self.instance.header += self.current_token[2:]
1617 return 1
1619 def handle_tc(self):
1620 """Handle a translator comment."""
1621 if self.current_state in ['mc', 'ms', 'mx']:
1622 self.instance.append(self.current_entry)
1623 self.current_entry = POEntry(linenum=self.current_line)
1624 if self.current_entry.tcomment != '':
1625 self.current_entry.tcomment += '\n'
1626 tcomment = self.current_token.lstrip('#')
1627 if tcomment.startswith(' '):
1628 tcomment = tcomment[1:]
1629 self.current_entry.tcomment += tcomment
1630 return True
1632 def handle_gc(self):
1633 """Handle a generated comment."""
1634 if self.current_state in ['mc', 'ms', 'mx']:
1635 self.instance.append(self.current_entry)
1636 self.current_entry = POEntry(linenum=self.current_line)
1637 if self.current_entry.comment != '':
1638 self.current_entry.comment += '\n'
1639 self.current_entry.comment += self.current_token[3:]
1640 return True
1642 def handle_oc(self):
1643 """Handle a file:num occurrence."""
1644 if self.current_state in ['mc', 'ms', 'mx']:
1645 self.instance.append(self.current_entry)
1646 self.current_entry = POEntry(linenum=self.current_line)
1647 occurrences = self.current_token[3:].split()
1648 for occurrence in occurrences:
1649 if occurrence != '':
1650 try:
1651 fil, line = occurrence.rsplit(':', 1)
1652 if not line.isdigit():
1653 fil = occurrence
1654 line = ''
1655 self.current_entry.occurrences.append((fil, line))
1656 except (ValueError, AttributeError):
1657 self.current_entry.occurrences.append((occurrence, ''))
1658 return True
1660 def handle_fl(self):
1661 """Handle a flags line."""
1662 if self.current_state in ['mc', 'ms', 'mx']:
1663 self.instance.append(self.current_entry)
1664 self.current_entry = POEntry(linenum=self.current_line)
1665 self.current_entry.flags += [
1666 c.strip() for c in self.current_token[3:].split(',')
1668 return True
1670 def handle_pp(self):
1671 """Handle a previous msgid_plural line."""
1672 if self.current_state in ['mc', 'ms', 'mx']:
1673 self.instance.append(self.current_entry)
1674 self.current_entry = POEntry(linenum=self.current_line)
1675 self.current_entry.previous_msgid_plural = unescape(self.current_token[1:-1])
1676 return True
1678 def handle_pm(self):
1679 """Handle a previous msgid line."""
1680 if self.current_state in ['mc', 'ms', 'mx']:
1681 self.instance.append(self.current_entry)
1682 self.current_entry = POEntry(linenum=self.current_line)
1683 self.current_entry.previous_msgid = unescape(self.current_token[1:-1])
1684 return True
1686 def handle_pc(self):
1687 """Handle a previous msgctxt line."""
1688 if self.current_state in ['mc', 'ms', 'mx']:
1689 self.instance.append(self.current_entry)
1690 self.current_entry = POEntry(linenum=self.current_line)
1691 self.current_entry.previous_msgctxt = unescape(self.current_token[1:-1])
1692 return True
1694 def handle_ct(self):
1695 """Handle a msgctxt."""
1696 if self.current_state in ['mc', 'ms', 'mx']:
1697 self.instance.append(self.current_entry)
1698 self.current_entry = POEntry(linenum=self.current_line)
1699 self.current_entry.msgctxt = unescape(self.current_token[1:-1])
1700 return True
1702 def handle_mi(self):
1703 """Handle a msgid."""
1704 if self.current_state in ['mc', 'ms', 'mx']:
1705 self.instance.append(self.current_entry)
1706 self.current_entry = POEntry(linenum=self.current_line)
1707 self.current_entry.obsolete = self.entry_obsolete
1708 self.current_entry.msgid = unescape(self.current_token[1:-1])
1709 return True
1711 def handle_mp(self):
1712 """Handle a msgid plural."""
1713 self.current_entry.msgid_plural = unescape(self.current_token[1:-1])
1714 return True
1716 def handle_ms(self):
1717 """Handle a msgstr."""
1718 self.current_entry.msgstr = unescape(self.current_token[1:-1])
1719 return True
1721 def handle_mx(self):
1722 """Handle a msgstr plural."""
1723 index = self.current_token[7]
1724 value = self.current_token[self.current_token.find('"') + 1 : -1]
1725 self.current_entry.msgstr_plural[int(index)] = unescape(value)
1726 self.msgstr_index = int(index)
1727 return True
1729 def handle_mc(self):
1730 """Handle a msgid or msgstr continuation line."""
1731 token = unescape(self.current_token[1:-1])
1732 if self.current_state == 'ct':
1733 self.current_entry.msgctxt += token
1734 elif self.current_state == 'mi':
1735 self.current_entry.msgid += token
1736 elif self.current_state == 'mp':
1737 self.current_entry.msgid_plural += token
1738 elif self.current_state == 'ms':
1739 self.current_entry.msgstr += token
1740 elif self.current_state == 'mx':
1741 self.current_entry.msgstr_plural[self.msgstr_index] += token
1742 elif self.current_state == 'pp':
1743 self.current_entry.previous_msgid_plural += token
1744 elif self.current_state == 'pm':
1745 self.current_entry.previous_msgid += token
1746 elif self.current_state == 'pc':
1747 self.current_entry.previous_msgctxt += token
1748 # don't change the current state
1749 return False
1752 # }}}
1753 # class _MOFileParser {{{
1756 class _MOFileParser(object):
1758 A class to parse binary mo files.
1761 # pylint: disable=unused-argument,redefined-outer-name
1762 def __init__(self, mofile, *_args, **kwargs):
1764 Constructor.
1766 Keyword arguments:
1768 ``mofile``
1769 string, path to the mo file or its content
1771 ``encoding``
1772 string, the encoding to use, defaults to ``default_encoding``
1773 global variable (optional).
1775 ``check_for_duplicates``
1776 whether to check for duplicate entries when adding entries to the
1777 file (optional, default: ``False``).
1779 if _is_file(mofile):
1780 self.fhandle = open(mofile, 'rb')
1781 else:
1782 self.fhandle = io.BytesIO(mofile)
1784 klass = kwargs.get('klass')
1785 if klass is None:
1786 klass = MOFile
1787 self.instance = klass(
1788 fpath=mofile,
1789 encoding=kwargs.get('encoding', default_encoding),
1790 check_for_duplicates=kwargs.get('check_for_duplicates', False),
1793 def __del__(self):
1795 Make sure the file is closed, this prevents warnings on unclosed file
1796 when running tests with python >= 3.2.
1798 if self.fhandle and hasattr(self.fhandle, 'close'):
1799 self.fhandle.close()
1801 def parse(self):
1803 Build the instance with the file handle provided in the
1804 constructor.
1806 # parse magic number
1807 magic_number = self._readbinary('<I', 4)
1808 if magic_number == MOFile.MAGIC:
1809 ii = '<II'
1810 elif magic_number == MOFile.MAGIC_SWAPPED:
1811 ii = '>II'
1812 else:
1813 raise IOError('Invalid mo file, magic number is incorrect !')
1814 self.instance.magic_number = magic_number
1815 # parse the version number and the number of strings
1816 version, numofstrings = self._readbinary(ii, 8)
1817 # from MO file format specs: "A program seeing an unexpected major
1818 # revision number should stop reading the MO file entirely"
1819 if version >> 16 not in (0, 1):
1820 raise IOError('Invalid mo file, unexpected major revision number')
1821 self.instance.version = version
1822 # original strings and translation strings hash table offset
1823 msgids_hash_offset, msgstrs_hash_offset = self._readbinary(ii, 8)
1824 # move to msgid hash table and read length and offset of msgids
1825 self.fhandle.seek(msgids_hash_offset)
1826 msgids_index = []
1827 for i in range(numofstrings):
1828 msgids_index.append(self._readbinary(ii, 8))
1829 # move to msgstr hash table and read length and offset of msgstrs
1830 self.fhandle.seek(msgstrs_hash_offset)
1831 msgstrs_index = []
1832 for i in range(numofstrings):
1833 msgstrs_index.append(self._readbinary(ii, 8))
1834 # build entries
1835 encoding = self.instance.encoding
1836 for i in range(numofstrings):
1837 self.fhandle.seek(msgids_index[i][1])
1838 msgid = self.fhandle.read(msgids_index[i][0])
1840 self.fhandle.seek(msgstrs_index[i][1])
1841 msgstr = self.fhandle.read(msgstrs_index[i][0])
1842 if i == 0 and not msgid: # metadata
1843 raw_metadata, metadata = msgstr.split(b('\n')), {}
1844 for line in raw_metadata:
1845 tokens = line.split(b(':'), 1)
1846 if tokens[0] != b(''):
1847 try:
1848 k = tokens[0].decode(encoding)
1849 v = tokens[1].decode(encoding)
1850 metadata[k] = v.strip()
1851 except IndexError:
1852 metadata[k] = u('')
1853 self.instance.metadata = metadata
1854 continue
1855 # test if we have a plural entry
1856 msgid_tokens = msgid.split(b('\0'))
1857 if len(msgid_tokens) > 1:
1858 entry = self._build_entry(
1859 msgid=msgid_tokens[0],
1860 msgid_plural=msgid_tokens[1],
1861 msgstr_plural=dict(
1862 (k, v) for k, v in enumerate(msgstr.split(b('\0')))
1865 else:
1866 entry = self._build_entry(msgid=msgid, msgstr=msgstr)
1867 self.instance.append(entry)
1868 # close opened file
1869 self.fhandle.close()
1870 return self.instance
1872 def _build_entry(self, msgid, msgstr=None, msgid_plural=None, msgstr_plural=None):
1873 msgctxt_msgid = msgid.split(b('\x04'))
1874 encoding = self.instance.encoding
1875 if len(msgctxt_msgid) > 1:
1876 kwargs = {
1877 'msgctxt': msgctxt_msgid[0].decode(encoding),
1878 'msgid': msgctxt_msgid[1].decode(encoding),
1880 else:
1881 kwargs = {'msgid': msgid.decode(encoding)}
1882 if msgstr:
1883 kwargs['msgstr'] = msgstr.decode(encoding)
1884 if msgid_plural:
1885 kwargs['msgid_plural'] = msgid_plural.decode(encoding)
1886 if msgstr_plural:
1887 for k in msgstr_plural:
1888 msgstr_plural[k] = msgstr_plural[k].decode(encoding)
1889 kwargs['msgstr_plural'] = msgstr_plural
1890 return MOEntry(**kwargs)
1892 def _readbinary(self, fmt, numbytes):
1894 Private method that unpack n bytes of data using format <fmt>.
1895 It returns a tuple or a mixed value if the tuple length is 1.
1897 content = self.fhandle.read(numbytes)
1898 tup = struct.unpack(fmt, content)
1899 if len(tup) == 1:
1900 return tup[0]
1901 return tup
1904 # }}}