remove duplicates from the list
[TortoiseGit.git] / doc / xml2po-modes / docbook.py
blobb44ae081cc37898ccbba7557369b8b7494349758
1 # -*- coding: utf-8 -*-
2 # Copyright (c) 2004 Danilo Segan <danilo@kvota.net>.
4 # This file is part of xml2po.
6 # xml2po is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # xml2po is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with xml2po; if not, write to the Free Software Foundation, Inc.,
18 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 # This implements special instructions for handling DocBook XML documents
22 # in a better way.
24 # This means:
25 # — better handling of nested complicated tags (i.e. definitions of
26 # ignored-tags and final-tags)
27 # — support for merging translator-credits back into DocBook articles
28 # — support for setting a language
31 # We use "currentXmlMode" class name for all modes
32 # -- it might be better to have it named docbookXmlMode, but it will make loading harder;
33 # it is also not necessary until we start supporting extracting strings from more
34 # than one document type at the same time
36 import re
37 import libxml2
38 import os
39 import md5
40 import sys
42 class docbookXmlMode:
43 """Class for special handling of DocBook document types.
45 It sets lang attribute on article elements, and adds translators
46 to articleinfo/copyright."""
47 def __init__(self):
48 self.lists = ['itemizedlist', 'orderedlist', 'variablelist',
49 'segmentedlist', 'simplelist', 'calloutlist', 'varlistentry' ]
50 self.objects = [ 'table', 'figure', 'textobject', 'imageobject', 'mediaobject',
51 'screenshot' ]
53 def getIgnoredTags(self):
54 "Returns array of tags to be ignored."
55 return self.objects + self.lists
57 def getFinalTags(self):
58 "Returns array of tags to be considered 'final'."
59 return ['para', 'formalpara', 'simpara',
60 'releaseinfo', 'revnumber', 'title',
61 'date', 'term', 'programlisting'] + self.objects + self.lists
63 def getSpacePreserveTags(self):
64 "Returns array of tags in which spaces are to be preserved."
65 return [
66 'classsynopsisinfo',
67 'computeroutput',
68 'funcsynopsisinfo',
69 'literallayout',
70 'programlisting',
71 'screen',
72 'synopsis',
73 'userinput'
76 def getStringForTranslators(self):
77 """Returns string which will be used to credit translators."""
78 return "translator-credits"
80 def getCommentForTranslators(self):
81 """Returns a comment to be added next to string for crediting translators."""
82 return """Put one translator per line, in the form of NAME <EMAIL>."""
84 def getStringForTranslation(self):
85 """Returns translation of 'translation'."""
86 return "translator-translation"
88 def getCommentForTranslation(self):
89 """Returns a string that explains how 'translation' is to be translated."""
90 return """Place the translation of 'translation' here."""
92 def _find_articleinfo(self, node):
93 if node.name == 'articleinfo' or node.name == 'bookinfo':
94 return node
95 child = node.children
96 while child:
97 ret = self._find_articleinfo(child)
98 if ret:
99 return ret
100 child = child.next
101 return None
103 def _find_lastcopyright(self, node):
104 if not node.children:
105 return None
106 last = node.lastChild()
107 tmp = last
108 while tmp:
109 if tmp.name == "copyright":
110 last = tmp
111 break
112 tmp = tmp.prev
113 return last
115 def _md5_for_file(self, filename):
116 hash = md5.new()
117 input = open(filename, "rb")
118 read = input.read(4096)
119 while read:
120 hash.update(read)
121 read = input.read(4096)
122 input.close()
123 return hash.hexdigest()
125 def _output_images(self, node, msg):
126 if node and node.type=='element' and node.name=='imagedata':
127 # Use .fileref to construct new message
128 attr = node.prop("fileref")
129 if attr:
130 dir = os.path.dirname(msg.filename)
131 fullpath = os.path.join(dir, attr)
132 if os.path.exists(fullpath):
133 hash = self._md5_for_file(fullpath)
134 else:
135 hash = "THIS FILE DOESN'T EXIST"
136 print >>sys.stderr, "Warning: image file '%s' not found." % fullpath
138 msg.outputMessage("@@image: '%s'; md5=%s" % (attr, hash), node.lineNo(),
139 "When image changes, this message will be marked fuzzy or untranslated for you.\n"+
140 "It doesn't matter what you translate it to: it's not used at all.")
141 elif node and node.children:
142 child = node.children
143 while child:
144 self._output_images(child,msg)
145 child = child.next
148 def preProcessXml(self, doc, msg):
149 """Add additional messages of interest here."""
150 root = doc.getRootElement()
151 self._output_images(root,msg)
153 def postProcessXmlTranslation(self, doc, language, translators, translation):
154 """Sets a language and translators in "doc" tree.
156 "translators" is a string consisted of "Name <email>" pairs
157 of each translator, separated by newlines."""
159 root = doc.getRootElement()
160 # DocBook documents can be something other than article, handle that as well in the future
161 while root and root.name != 'article' and root.name != 'book':
162 root = root.next
163 if root and (root.name == 'article' or root.name == 'book'):
164 root.setProp('lang', language)
165 else:
166 return
168 if translators == self.getStringForTranslators():
169 return
170 else:
171 # Now, lets find 'articleinfo' (it can be something else, but this goes along with 'article')
172 ai = self._find_articleinfo(root)
173 if not ai:
174 return
176 # Now, lets do one translator at a time
177 transgroup = libxml2.newNode("authorgroup")
178 lines = translators.split("\n")
179 for line in lines:
180 line = line.strip()
181 match = re.match(r"^([^<,]+)\s*(?:<([^>,]+)>)?$", line)
182 if match:
183 last = self._find_lastcopyright(ai)
184 copy = libxml2.newNode("othercredit")
185 if last:
186 copy = last.addNextSibling(copy)
187 else:
188 transgroup.addChild(copy)
189 ai.addChild(transgroup)
190 copy.newChild(None, "contrib", translation.encode('utf-8'))
191 if match.group(1) and match.group(2):
192 holder = match.group(1)+"(%s)" % match.group(2)
193 elif match.group(1):
194 holder = match.group(1)
195 elif match.group(2):
196 holder = match.group(2)
197 else:
198 holder = "???"
199 copy.newChild(None, "othername", holder.encode('utf-8'))
201 # Perform some tests when ran standalone
202 if __name__ == '__main__':
203 test = docbookXmlMode()
204 print "Ignored tags : " + repr(test.getIgnoredTags())
205 print "Final tags : " + repr(test.getFinalTags())
206 print "Space-preserve tags: " + repr(test.getSpacePreserveTags())
208 print "Credits from string: '%s'" % test.getStringForTranslators()
209 print "Explanation for credits:\n\t'%s'" % test.getCommentForTranslators()
211 print "String for translation: '%s'" % test.getStringForTranslation()
212 print "Explanation for translation:\n\t'%s'" % test.getCommentForTranslation()