buildscripts/html-gettext.py

   1 #!@PYTHON@
   2 # html-gettext.py
   3
   4 # USAGE:  html-gettext.py [-o OUTDIR] LANG FILES
   5 #
   6 # -o OUTDIR specifies that output files should be written in OUTDIR
   7 #    rather than be overwritten
   8 #
   9
  10 import sys
  11 import re
  12 import os
  13 import getopt
  14
  15 import langdefs
  16
  17 optlist, args = getopt.getopt(sys.argv[1:],'o:')
  18 lang = args[0]
  19 files = args [1:]
  20
  21 outdir = '.'
  22 for x in optlist:
  23     if x[0] == '-o':
  24         outdir = x[1]
  25
  26 double_punct_char_separator = langdefs.LANGDICT[lang].double_punct_char_sep
  27 my_gettext = langdefs.translation[lang]
  28
  29 html_codes = ((' -- ', ' &ndash; '),
  30               (' --- ', ' &mdash; '),
  31               ("'", '&rsquo;'))
  32 html2texi = {'command':
  33                  (re.compile (r'<samp><span class="command">(.*?)</span></samp>'),
  34                   r'@command{\1}'),
  35              'code':
  36                  (re.compile (r'<code>(.*?)</code>'),
  37                   r'@code{\1}')
  38              }
  39 texi2html = {'command':
  40                  (re.compile (r'@command{(.*?)}'),
  41                   r'<samp><span class="command">\1</span></samp>'),
  42              'code':
  43                  (re.compile (r'@code{(.*?)}'),
  44                   r'<code>\1</code>')
  45              }
  46 whitespaces = re.compile (r'\s+')
  47
  48
  49 def _ (s):
  50     if not s:
  51         return ''
  52     s = whitespaces.sub (' ', s)
  53     for c in html_codes:
  54         s = s.replace (c[1], c[0])
  55     for u in html2texi.values():
  56         s = u[0].sub (u[1], s)
  57     s = my_gettext (s)
  58     for u in texi2html.values():
  59         s = u[0].sub (u[1], s)
  60     for c in html_codes:
  61         s = s.replace (c[0], c[1])
  62     return s
  63
  64 link_re =  re.compile (r'<link rel="(up|prev|next)" (.*?) title="([^"]*?)">')
  65
  66 def link_gettext (m):
  67     return '<link rel="' + m.group (1) + '" ' + m.group (2) \
  68         + ' title="' + _ (m.group (3)) + '">'
  69
  70 makeinfo_title_re = re.compile (r'<title>([^<]*?) - ([^<]*?)</title>')
  71
  72 def makeinfo_title_gettext (m):
  73     return '<title>' + _ (m.group (1)) + ' - ' + m.group (2) + '</title>'
  74
  75 texi2html_title_re = re.compile (r'<title>(.+?): ([A-Z\d.]+ |)(.+?)</title>')
  76
  77 def texi2html_title_gettext (m):
  78     return '<title>' + _ (m.group (1)) + double_punct_char_separator + ': ' \
  79         + m.group (2) + _ (m.group (3)) + '</title>'
  80
  81 a_href_re = re.compile ('(?s)<a ([^>]*?href="[\\w.#-_]+"[^>]*>(?:<code>|))\
  82 (Appendix |)([A-Z0-9.]+ | (?:&lt;){1,2} |&nbsp;[^:<]+?:&nbsp;|&nbsp;|)\
  83 (.+?)(</code>| (?:&gt;){1,2} |&nbsp;|)</a>:?')
  84
  85 def a_href_gettext (m):
  86     s = ''
  87     if m.group(0)[-1] == ':':
  88         s = double_punct_char_separator + ':'
  89     t = ''
  90     if m.group (2):
  91         t = _ (m.group (2))
  92     return '<a ' + m.group (1) + t + m.group (3) + _ (m.group (4)) + \
  93         m.group (5) + '</a>' + s
  94
  95 h_re = re.compile (r'<h(\d)( class="\w+"|)>\s*(Appendix |)([A-Z\d.]+ |)?([^<]+)\s*</h\1>')
  96
  97 def h_gettext (m):
  98     if m.group (3):
  99         s = _ (m.group (3))
 100     else:
 101         s= ''
 102     return '<h' + m.group (1) + m.group (2) + '>' + s +\
 103            m.group (4) + _ (m.group (5)) + '</h' + m.group (1) + '>'
 104
 105 for filename in files:
 106     f = open (filename, 'r')
 107     page = f.read ()
 108     f.close ()
 109     page = link_re.sub (link_gettext, page)
 110     page = makeinfo_title_re.sub (makeinfo_title_gettext, page)
 111     page = texi2html_title_re.sub (texi2html_title_gettext, page)
 112     page = a_href_re.sub (a_href_gettext, page)
 113     page = h_re.sub (h_gettext, page)
 114     for w in ('Next:', 'Previous:', 'Up:'):
 115         page = page.replace (w, _ (w))
 116     page = langdefs.LANGDICT[lang].html_filter (page)
 117     f = open (os.path.join (outdir, filename), 'w')
 118     f.write (page)
 119     f.close ()