doc: Add getreceivedbyaddress release notes
[bitcoinplatinum.git] / contrib / devtools / update-translations.py
blobe1924749d21f192c30af1902e98fafadcf14576f
1 #!/usr/bin/env python
2 # Copyright (c) 2014 Wladimir J. van der Laan
3 # Distributed under the MIT software license, see the accompanying
4 # file COPYING or http://www.opensource.org/licenses/mit-license.php.
5 '''
6 Run this script from the root of the repository to update all translations from
7 transifex.
8 It will do the following automatically:
10 - fetch all translations using the tx tool
11 - post-process them into valid and committable format
12 - remove invalid control characters
13 - remove location tags (makes diffs less noisy)
15 TODO:
16 - auto-add new translations to the build system according to the translation process
17 '''
18 from __future__ import division, print_function
19 import subprocess
20 import re
21 import sys
22 import os
23 import io
24 import xml.etree.ElementTree as ET
26 # Name of transifex tool
27 TX = 'tx'
28 # Name of source language file
29 SOURCE_LANG = 'bitcoin_en.ts'
30 # Directory with locale files
31 LOCALE_DIR = 'src/qt/locale'
32 # Minimum number of messages for translation to be considered at all
33 MIN_NUM_MESSAGES = 10
35 def check_at_repository_root():
36 if not os.path.exists('.git'):
37 print('No .git directory found')
38 print('Execute this script at the root of the repository', file=sys.stderr)
39 sys.exit(1)
41 def fetch_all_translations():
42 if subprocess.call([TX, 'pull', '-f', '-a']):
43 print('Error while fetching translations', file=sys.stderr)
44 sys.exit(1)
46 def find_format_specifiers(s):
47 '''Find all format specifiers in a string.'''
48 pos = 0
49 specifiers = []
50 while True:
51 percent = s.find('%', pos)
52 if percent < 0:
53 break
54 specifiers.append(s[percent+1])
55 pos = percent+2
56 return specifiers
58 def split_format_specifiers(specifiers):
59 '''Split format specifiers between numeric (Qt) and others (strprintf)'''
60 numeric = []
61 other = []
62 for s in specifiers:
63 if s in {'1','2','3','4','5','6','7','8','9'}:
64 numeric.append(s)
65 else:
66 other.append(s)
68 # If both numeric format specifiers and "others" are used, assume we're dealing
69 # with a Qt-formatted message. In the case of Qt formatting (see https://doc.qt.io/qt-5/qstring.html#arg)
70 # only numeric formats are replaced at all. This means "(percentage: %1%)" is valid, without needing
71 # any kind of escaping that would be necessary for strprintf. Without this, this function
72 # would wrongly detect '%)' as a printf format specifier.
73 if numeric:
74 other = []
76 # numeric (Qt) can be present in any order, others (strprintf) must be in specified order
77 return set(numeric),other
79 def sanitize_string(s):
80 '''Sanitize string for printing'''
81 return s.replace('\n',' ')
83 def check_format_specifiers(source, translation, errors, numerus):
84 source_f = split_format_specifiers(find_format_specifiers(source))
85 # assert that no source messages contain both Qt and strprintf format specifiers
86 # if this fails, go change the source as this is hacky and confusing!
87 assert(not(source_f[0] and source_f[1]))
88 try:
89 translation_f = split_format_specifiers(find_format_specifiers(translation))
90 except IndexError:
91 errors.append("Parse error in translation for '%s': '%s'" % (sanitize_string(source), sanitize_string(translation)))
92 return False
93 else:
94 if source_f != translation_f:
95 if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1:
96 # Allow numerus translations to omit %n specifier (usually when it only has one possible value)
97 return True
98 errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
99 return False
100 return True
102 def all_ts_files(suffix=''):
103 for filename in os.listdir(LOCALE_DIR):
104 # process only language files, and do not process source language
105 if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
106 continue
107 if suffix: # remove provided suffix
108 filename = filename[0:-len(suffix)]
109 filepath = os.path.join(LOCALE_DIR, filename)
110 yield(filename, filepath)
112 FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
113 def remove_invalid_characters(s):
114 '''Remove invalid characters from translation string'''
115 return FIX_RE.sub(b'', s)
117 # Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
118 # comparison, disable by default)
119 _orig_escape_cdata = None
120 def escape_cdata(text):
121 text = _orig_escape_cdata(text)
122 text = text.replace("'", '&apos;')
123 text = text.replace('"', '&quot;')
124 return text
126 def postprocess_translations(reduce_diff_hacks=False):
127 print('Checking and postprocessing...')
129 if reduce_diff_hacks:
130 global _orig_escape_cdata
131 _orig_escape_cdata = ET._escape_cdata
132 ET._escape_cdata = escape_cdata
134 for (filename,filepath) in all_ts_files():
135 os.rename(filepath, filepath+'.orig')
137 have_errors = False
138 for (filename,filepath) in all_ts_files('.orig'):
139 # pre-fixups to cope with transifex output
140 parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
141 with open(filepath + '.orig', 'rb') as f:
142 data = f.read()
143 # remove control characters; this must be done over the entire file otherwise the XML parser will fail
144 data = remove_invalid_characters(data)
145 tree = ET.parse(io.BytesIO(data), parser=parser)
147 # iterate over all messages in file
148 root = tree.getroot()
149 for context in root.findall('context'):
150 for message in context.findall('message'):
151 numerus = message.get('numerus') == 'yes'
152 source = message.find('source').text
153 translation_node = message.find('translation')
154 # pick all numerusforms
155 if numerus:
156 translations = [i.text for i in translation_node.findall('numerusform')]
157 else:
158 translations = [translation_node.text]
160 for translation in translations:
161 if translation is None:
162 continue
163 errors = []
164 valid = check_format_specifiers(source, translation, errors, numerus)
166 for error in errors:
167 print('%s: %s' % (filename, error))
169 if not valid: # set type to unfinished and clear string if invalid
170 translation_node.clear()
171 translation_node.set('type', 'unfinished')
172 have_errors = True
174 # Remove location tags
175 for location in message.findall('location'):
176 message.remove(location)
178 # Remove entire message if it is an unfinished translation
179 if translation_node.get('type') == 'unfinished':
180 context.remove(message)
182 # check if document is (virtually) empty, and remove it if so
183 num_messages = 0
184 for context in root.findall('context'):
185 for message in context.findall('message'):
186 num_messages += 1
187 if num_messages < MIN_NUM_MESSAGES:
188 print('Removing %s, as it contains only %i messages' % (filepath, num_messages))
189 continue
191 # write fixed-up tree
192 # if diff reduction requested, replace some XML to 'sanitize' to qt formatting
193 if reduce_diff_hacks:
194 out = io.BytesIO()
195 tree.write(out, encoding='utf-8')
196 out = out.getvalue()
197 out = out.replace(b' />', b'/>')
198 with open(filepath, 'wb') as f:
199 f.write(out)
200 else:
201 tree.write(filepath, encoding='utf-8')
202 return have_errors
204 if __name__ == '__main__':
205 check_at_repository_root()
206 fetch_all_translations()
207 postprocess_translations()