Markup command indexing: don't use internal function names.
[lilypond.git] / buildscripts / translations-status.py
blob1c62ee56d200b9a94e987a710ef5552a0c68ccb4
1 #!/usr/bin/env python
3 """
4 USAGE: translations-status.py BUILDSCRIPT-DIR LOCALEDIR
6 This script must be run from Documentation/
8 Reads template files translations.template.html.in
9 and for each LANG in LANGUAGES LANG/translations.template.html.in
10 Writes translations.html.in and for each LANG in LANGUAGES
11 translations.LANG.html.in
12 Writes out/translations-status.txt
13 Updates word counts in TRANSLATION
14 """
16 import sys
17 import re
18 import string
19 import os
21 import langdefs
23 def progress (str):
24 sys.stderr.write (str + '\n')
26 progress ("translations-status.py")
28 buildscript_dir = sys.argv[1]
30 _doc = lambda s: s
32 sys.path.append (buildscript_dir)
33 import buildlib
35 # load gettext messages catalogs
36 translation = langdefs.translation
39 language_re = re.compile (r'^@documentlanguage (.+)', re.M)
40 comments_re = re.compile (r'^@ignore\n(.|\n)*?\n@end ignore$|@c .*?$', re.M)
41 space_re = re.compile (r'\s+', re.M)
42 lilypond_re = re.compile (r'@lilypond({.*?}|(.|\n)*?\n@end lilypond$)', re.M)
43 node_re = re.compile ('^@node .*?$', re.M)
44 title_re = re.compile ('^@(top|chapter|(?:sub){0,2}section|' + \
45 '(?:unnumbered|appendix)(?:(?:sub){0,2}sec)?) (.*?)$', re.M)
46 include_re = re.compile ('^@include (.*?)$', re.M)
48 translators_re = re.compile (r'^@c\s+Translators\s*:\s*(.*?)$', re.M | re.I)
49 checkers_re = re.compile (r'^@c\s+Translation\s*checkers\s*:\s*(.*?)$',
50 re.M | re.I)
51 status_re = re.compile (r'^@c\s+Translation\s*status\s*:\s*(.*?)$', re.M | re.I)
52 post_gdp_re = re.compile ('post.GDP', re.I)
53 untranslated_node_str = '@untranslated'
54 skeleton_str = '-- SKELETON FILE --'
56 section_titles_string = _doc ('Section titles')
57 last_updated_string = _doc (' <p><i>Last updated %s</i></p>\n')
58 detailed_status_heads = [_doc ('Translators'), _doc ('Translation checkers'),
59 _doc ('Translated'), _doc ('Up to date'),
60 _doc ('Other info')]
61 format_table = {
62 'not translated': {'color':'d0f0f8', 'short':_doc ('no'), 'abbr':'NT',
63 'long':_doc ('not translated')},
64 'partially translated': {'color':'dfef77',
65 'short':_doc ('partially (%(p)d %%)'),
66 'abbr':'%(p)d%%',
67 'long':_doc ('partially translated (%(p)d %%)')},
68 'fully translated': {'color':'1fff1f', 'short':_doc ('yes'), 'abbr':'FT',
69 'long': _doc ('translated')},
70 'up to date': {'short':_doc ('yes'), 'long':_doc ('up to date'),
71 'abbr':'100%%', 'vague':_doc ('up to date')},
72 'outdated': {'short':_doc ('partially'), 'abbr':'%(p)d%%',
73 'vague':_doc ('partially up to date')},
74 'N/A': {'short':_doc ('N/A'), 'abbr':'N/A', 'color':'d587ff', 'vague':''},
75 'pre-GDP':_doc ('pre-GDP'),
76 'post-GDP':_doc ('post-GDP')
79 texi_level = {
80 # (Unumbered/Numbered/Lettered, level)
81 'top': ('u', 0),
82 'unnumbered': ('u', 1),
83 'unnumberedsec': ('u', 2),
84 'unnumberedsubsec': ('u', 3),
85 'chapter': ('n', 1),
86 'section': ('n', 2),
87 'subsection': ('n', 3),
88 'appendix': ('l', 1)
91 appendix_number_trans = string.maketrans ('@ABCDEFGHIJKLMNOPQRSTUVWXY',
92 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')
94 class SectionNumber (object):
95 def __init__ (self):
96 self.__data = [[0,'u']]
98 def __increase_last_index (self):
99 type = self.__data[-1][1]
100 if type == 'l':
101 self.__data[-1][0] = \
102 self.__data[-1][0].translate (appendix_number_trans)
103 elif type == 'n':
104 self.__data[-1][0] += 1
106 def format (self):
107 if self.__data[-1][1] == 'u':
108 return ''
109 return '.'.join ([str (i[0]) for i in self.__data if i[1] != 'u']) + ' '
111 def increase (self, (type, level)):
112 if level == 0:
113 self.__data = [[0,'u']]
114 while level + 1 < len (self.__data):
115 del self.__data[-1]
116 if level + 1 > len (self.__data):
117 self.__data.append ([0, type])
118 if type == 'l':
119 self.__data[-1][0] = '@'
120 if type == self.__data[-1][1]:
121 self.__increase_last_index ()
122 else:
123 self.__data[-1] = ([0, type])
124 if type == 'l':
125 self.__data[-1][0] = 'A'
126 elif type == 'n':
127 self.__data[-1][0] = 1
128 return self.format ()
131 def percentage_color (percent):
132 p = percent / 100.0
133 if p < 0.33:
134 c = [hex (int (3 * p * b + (1 - 3 * p) * a))[2:]
135 for (a, b) in [(0xff, 0xff), (0x5c, 0xa6), (0x5c, 0x4c)]]
136 elif p < 0.67:
137 c = [hex (int ((3 * p - 1) * b + (2 - 3 * p) * a))[2:]
138 for (a, b) in [(0xff, 0xff), (0xa6, 0xff), (0x4c, 0x3d)]]
139 else:
140 c = [hex (int ((3 * p - 2) * b + 3 * (1 - p) * a))[2:]
141 for (a, b) in [(0xff, 0x1f), (0xff, 0xff), (0x3d, 0x1f)]]
142 return ''.join (c)
145 def update_word_count (text, filename, word_count):
146 return re.sub (r'(?m)^(\d+) *' + filename,
147 str (word_count).ljust (6) + filename,
148 text)
150 po_msgid_re = re.compile (r'^msgid "(.*?)"(?:\n"(.*?)")*', re.M)
152 def po_word_count (po_content):
153 s = ' '.join ([''.join (t) for t in po_msgid_re.findall (po_content)])
154 return len (space_re.split (s))
156 sgml_tag_re = re.compile (r'<.*?>', re.S)
158 def sgml_word_count (sgml_doc):
159 s = sgml_tag_re.sub ('', sgml_doc)
160 return len (space_re.split (s))
162 def tely_word_count (tely_doc):
164 Calculate word count of a Texinfo document node by node.
166 Take string tely_doc as an argument.
167 Return a list of integers.
169 Texinfo comments and @lilypond blocks are not included in word counts.
171 tely_doc = comments_re.sub ('', tely_doc)
172 tely_doc = lilypond_re.sub ('', tely_doc)
173 nodes = node_re.split (tely_doc)
174 return [len (space_re.split (n)) for n in nodes]
177 class TelyDocument (object):
178 def __init__ (self, filename):
179 self.filename = filename
180 self.contents = open (filename).read ()
182 ## record title and sectionning level of first Texinfo section
183 m = title_re.search (self.contents)
184 if m:
185 self.title = m.group (2)
186 self.level = texi_level [m.group (1)]
187 else:
188 self.title = 'Untitled'
189 self.level = ('u', 1)
191 m = language_re.search (self.contents)
192 if m:
193 self.language = m.group (1)
195 included_files = [os.path.join (os.path.dirname (filename), t)
196 for t in include_re.findall (self.contents)]
197 self.included_files = [p for p in included_files if os.path.exists (p)]
199 def print_title (self, section_number):
200 return section_number.increase (self.level) + self.title
203 class TranslatedTelyDocument (TelyDocument):
204 def __init__ (self, filename, masterdocument, parent_translation=None):
205 TelyDocument.__init__ (self, filename)
207 self.masterdocument = masterdocument
208 if not hasattr (self, 'language') \
209 and hasattr (parent_translation, 'language'):
210 self.language = parent_translation.language
211 if hasattr (self, 'language'):
212 self.translation = translation[self.language]
213 else:
214 self.translation = lambda x: x
215 self.title = self.translation (self.title)
217 ## record authoring information
218 m = translators_re.search (self.contents)
219 if m:
220 self.translators = [n.strip () for n in m.group (1).split (',')]
221 else:
222 self.translators = parent_translation.translators
223 m = checkers_re.search (self.contents)
224 if m:
225 self.checkers = [n.strip () for n in m.group (1).split (',')]
226 elif isinstance (parent_translation, TranslatedTelyDocument):
227 self.checkers = parent_translation.checkers
228 else:
229 self.checkers = []
231 ## check whether translation is pre- or post-GDP
232 m = status_re.search (self.contents)
233 if m:
234 self.post_gdp = bool (post_gdp_re.search (m.group (1)))
235 else:
236 self.post_gdp = False
238 ## record which parts (nodes) of the file are actually translated
239 self.partially_translated = not skeleton_str in self.contents
240 nodes = node_re.split (self.contents)
241 self.translated_nodes = [not untranslated_node_str in n for n in nodes]
243 ## calculate translation percentage
244 master_total_word_count = sum (masterdocument.word_count)
245 translation_word_count = \
246 sum ([masterdocument.word_count[k] * self.translated_nodes[k]
247 for k in range (min (len (masterdocument.word_count),
248 len (self.translated_nodes)))])
249 self.translation_percentage = \
250 100 * translation_word_count / master_total_word_count
252 ## calculate how much the file is outdated
253 (diff_string, error) = \
254 buildlib.check_translated_doc (masterdocument.filename, self.contents)
255 if error:
256 sys.stderr.write ('warning: %s: %s' % (self.filename, error))
257 self.uptodate_percentage = None
258 else:
259 diff = diff_string.splitlines ()
260 insertions = sum ([len (l) - 1 for l in diff
261 if l.startswith ('+')
262 and not l.startswith ('+++')])
263 deletions = sum ([len (l) - 1 for l in diff
264 if l.startswith ('-')
265 and not l.startswith ('---')])
266 outdateness_percentage = 50.0 * (deletions + insertions) / \
267 (masterdocument.size + 0.5 * (deletions - insertions))
268 self.uptodate_percentage = 100 - int (outdateness_percentage)
269 if self.uptodate_percentage > 100:
270 alternative = 50
271 progress ("%s: strange uptodateness percentage %d %%, \
272 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
273 self.uptodate_percentage = alternative
274 elif self.uptodate_percentage < 1:
275 alternative = 1
276 progress ("%s: strange uptodateness percentage %d %%, \
277 setting to %d %%" % (self.filename, self.uptodate_percentage, alternative))
278 self.uptodate_percentage = alternative
280 def completeness (self, formats=['long'], translated=False):
281 if translated:
282 translation = self.translation
283 else:
284 translation = lambda x: x
286 if isinstance (formats, str):
287 formats = [formats]
288 p = self.translation_percentage
289 if p == 0:
290 status = 'not translated'
291 elif p == 100:
292 status = 'fully translated'
293 else:
294 status = 'partially translated'
295 return dict ([(f, translation (format_table[status][f]) % locals())
296 for f in formats])
298 def uptodateness (self, formats=['long'], translated=False):
299 if translated:
300 translation = self.translation
301 else:
302 translation = lambda x: x
304 if isinstance (formats, str):
305 formats = [formats]
306 p = self.uptodate_percentage
307 if p == None:
308 status = 'N/A'
309 elif p == 100:
310 status = 'up to date'
311 else:
312 status = 'outdated'
313 l = {}
314 for f in formats:
315 if f == 'color' and p != None:
316 l['color'] = percentage_color (p)
317 else:
318 l[f] = translation (format_table[status][f]) % locals ()
319 return l
321 def gdp_status (self):
322 if self.post_gdp:
323 return self.translation (format_table['post-GDP'])
324 else:
325 return self.translation (format_table['pre-GDP'])
327 def short_html_status (self):
328 s = ' <td>'
329 if self.partially_translated:
330 s += '<br>\n '.join (self.translators) + '<br>\n'
331 if self.checkers:
332 s += ' <small>' + \
333 '<br>\n '.join (self.checkers) + '</small><br>\n'
335 c = self.completeness (['color', 'long'])
336 s += ' <span style="background-color: #%(color)s">\
337 %(long)s</span><br>\n' % c
339 if self.partially_translated:
340 u = self.uptodateness (['vague', 'color'])
341 s += ' <span style="background-color: #%(color)s">\
342 %(vague)s</span><br>\n' % u
344 s += ' </td>\n'
345 return s
347 def text_status (self):
348 s = self.completeness ('abbr')['abbr'] + ' '
350 if self.partially_translated:
351 s += self.uptodateness ('abbr')['abbr'] + ' '
352 return s
354 def html_status (self, numbering=SectionNumber ()):
355 if self.title == 'Untitled':
356 return ''
358 if self.level[1] == 0: # if self is a master document
359 s = '''<table align="center" border="2">
360 <tr align="center">
361 <th>%s</th>''' % self.print_title (numbering)
362 s += ''.join ([' <th>%s</th>\n' % self.translation (h)
363 for h in detailed_status_heads])
364 s += ' </tr>\n'
365 s += ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
366 % (self.translation (section_titles_string),
367 sum (self.masterdocument.word_count))
369 else:
370 s = ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
371 % (self.print_title (numbering),
372 sum (self.masterdocument.word_count))
374 if self.partially_translated:
375 s += ' <td>' + '<br>\n '.join (self.translators) + '</td>\n'
376 s += ' <td>' + '<br>\n '.join (self.checkers) + '</td>\n'
377 else:
378 s += ' <td></td>\n' * 2
380 c = self.completeness (['color', 'short'], translated=True)
381 s += ' <td><span style="background-color: #%(color)s">\
382 %(short)s</span></td>\n' % {'color': c['color'],
383 'short': c['short']}
385 if self.partially_translated:
386 u = self.uptodateness (['short', 'color'], translated=True)
387 s += ' <td><span style="background-color: #%(color)s">\
388 %(short)s</span></td>\n' % {'color': u['color'],
389 'short': u['short']}
390 else:
391 s += ' <td></td>\n'
393 s += ' <td>' + self.gdp_status () + '</td>\n </tr>\n'
394 s += ''.join ([i.translations[self.language].html_status (numbering)
395 for i in self.masterdocument.includes
396 if self.language in i.translations])
398 if self.level[1] == 0: # if self is a master document
399 s += '</table>\n<p></p>\n'
400 return s
402 class MasterTelyDocument (TelyDocument):
403 def __init__ (self,
404 filename,
405 parent_translations=dict ([(lang, None)
406 for lang in langdefs.LANGDICT])):
407 TelyDocument.__init__ (self, filename)
408 self.size = len (self.contents)
409 self.word_count = tely_word_count (self.contents)
410 translations = dict ([(lang, os.path.join (lang, filename))
411 for lang in langdefs.LANGDICT])
412 self.translations = \
413 dict ([(lang,
414 TranslatedTelyDocument (translations[lang],
415 self, parent_translations.get (lang)))
416 for lang in langdefs.LANGDICT
417 if os.path.exists (translations[lang])])
418 if self.translations:
419 self.includes = [MasterTelyDocument (f, self.translations)
420 for f in self.included_files]
421 else:
422 self.includes = []
424 def update_word_counts (self, s):
425 s = update_word_count (s, self.filename, sum (self.word_count))
426 for i in self.includes:
427 s = i.update_word_counts (s)
428 return s
430 def html_status (self, numbering=SectionNumber ()):
431 if self.title == 'Untitled' or not self.translations:
432 return ''
433 if self.level[1] == 0: # if self is a master document
434 s = '''<table align="center" border="2">
435 <tr align="center">
436 <th>%s</th>''' % self.print_title (numbering)
437 s += ''.join ([' <th>%s</th>\n' % l for l in self.translations])
438 s += ' </tr>\n'
439 s += ' <tr align="left">\n <td>Section titles<br>(%d)</td>\n' \
440 % sum (self.word_count)
442 else: # if self is an included file
443 s = ' <tr align="left">\n <td>%s<br>(%d)</td>\n' \
444 % (self.print_title (numbering), sum (self.word_count))
446 s += ''.join ([t.short_html_status ()
447 for t in self.translations.values ()])
448 s += ' </tr>\n'
449 s += ''.join ([i.html_status (numbering) for i in self.includes])
451 if self.level[1] == 0: # if self is a master document
452 s += '</table>\n<p></p>\n'
453 return s
455 def text_status (self, numbering=SectionNumber (), colspec=[48,12]):
456 if self.title == 'Untitled' or not self.translations:
457 return ''
459 s = ''
460 if self.level[1] == 0: # if self is a master document
461 s += (self.print_title (numbering) + ' ').ljust (colspec[0])
462 s += ''.join (['%s'.ljust (colspec[1]) % l
463 for l in self.translations])
464 s += '\n'
465 s += ('Section titles (%d)' % \
466 sum (self.word_count)).ljust (colspec[0])
468 else:
469 s = '%s (%d) ' \
470 % (self.print_title (numbering), sum (self.word_count))
471 s = s.ljust (colspec[0])
473 s += ''.join ([t.text_status ().ljust(colspec[1])
474 for t in self.translations.values ()])
475 s += '\n\n'
476 s += ''.join ([i.text_status (numbering) for i in self.includes])
478 if self.level[1] == 0:
479 s += '\n'
480 return s
483 update_category_word_counts_re = re.compile (r'(?ms)^-(\d+)-(.*?\n)\d+ *total')
485 counts_re = re.compile (r'(?m)^(\d+) ')
487 def update_category_word_counts_sub (m):
488 return '-' + m.group (1) + '-' + m.group (2) + \
489 str (sum ([int (c)
490 for c in counts_re.findall (m.group (2))])).ljust (6) + \
491 'total'
494 progress ("Reading documents...")
496 tely_files = \
497 buildlib.read_pipe ("find -maxdepth 2 -name '*.tely'")[0].splitlines ()
498 tely_files.sort ()
499 master_docs = [MasterTelyDocument (os.path.normpath (filename))
500 for filename in tely_files]
501 master_docs = [doc for doc in master_docs if doc.translations]
503 main_status_page = open ('translations.template.html.in').read ()
505 enabled_languages = [l for l in langdefs.LANGDICT
506 if langdefs.LANGDICT[l].enabled
507 and l != 'en']
508 lang_status_pages = \
509 dict ([(l, open (os.path.join (l, 'translations.template.html.in')). read ())
510 for l in enabled_languages])
512 progress ("Generating status pages...")
514 date_time = buildlib.read_pipe ('LANG= date -u')[0]
516 main_status_html = last_updated_string % date_time
517 main_status_html += '\n'.join ([doc.html_status () for doc in master_docs])
519 html_re = re.compile ('<html>', re.I)
520 end_body_re = re.compile ('</body>', re.I)
522 html_header = '''<html>
523 <!-- This page is automatically generated by translation-status.py from
524 translations.template.html.in; DO NOT EDIT !-->'''
526 main_status_page = html_re.sub (html_header, main_status_page)
528 main_status_page = end_body_re.sub (main_status_html + '\n</body>',
529 main_status_page)
531 open ('translations.html.in', 'w').write (main_status_page)
533 for l in enabled_languages:
534 date_time = buildlib.read_pipe ('LANG=%s date -u' % l)[0]
535 lang_status_pages[l] = translation[l] (last_updated_string) % date_time + lang_status_pages[l]
536 lang_status_page = html_re.sub (html_header, lang_status_pages[l])
537 html_status = '\n'.join ([doc.translations[l].html_status ()
538 for doc in master_docs
539 if l in doc.translations])
540 lang_status_page = end_body_re.sub (html_status + '\n</body>',
541 lang_status_page)
542 open (os.path.join (l, 'translations.html.in'), 'w').write (lang_status_page)
544 main_status_txt = '''Documentation translations status
545 Generated %s
546 NT = not translated
547 FT = fully translated
549 ''' % date_time
551 main_status_txt += '\n'.join ([doc.text_status () for doc in master_docs])
553 status_txt_file = 'out/translations-status.txt'
554 progress ("Writing %s..." % status_txt_file)
555 open (status_txt_file, 'w').write (main_status_txt)
557 translation_instructions_file = 'TRANSLATION'
558 progress ("Updating %s..." % translation_instructions_file)
559 translation_instructions = open (translation_instructions_file).read ()
561 for doc in master_docs:
562 translation_instructions = doc.update_word_counts (translation_instructions)
564 for html_file in re.findall (r'(?m)^\d+ *(\S+?\.html\S*?)(?: |$)',
565 translation_instructions):
566 word_count = sgml_word_count (open (html_file).read ())
567 translation_instructions = update_word_count (translation_instructions,
568 html_file,
569 word_count)
571 for po_file in re.findall (r'(?m)^\d+ *(\S+?\.po\S*?)(?: |$)',
572 translation_instructions):
573 word_count = po_word_count (open (po_file).read ())
574 translation_instructions = update_word_count (translation_instructions,
575 po_file,
576 word_count)
578 translation_instructions = \
579 update_category_word_counts_re.sub (update_category_word_counts_sub,
580 translation_instructions)
582 open (translation_instructions_file, 'w').write (translation_instructions)