doc/svnbook/xml2po/xml2po.py

   1 #!/usr/bin/python -u
   2 # -*- encoding: utf-8 -*-
   3 # Copyright (c) 2004, 2005, 2006 Danilo Šegan <danilo@gnome.org>.
   4 #
   5 # This file is part of xml2po.
   6 #
   7 # xml2po is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # xml2po is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with xml2po; if not, write to the Free Software Foundation, Inc.,
  19 # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  20 #
  21
  22 # xml2po -- translate XML documents
  23 VERSION = "1.0.5"
  24
  25 # Versioning system (I use this for a long time, so lets explain it to
  26 # those Linux-versioning-scheme addicts):
  27 #   1.0.* are unstable, development versions
  28 #   1.1 will be first stable release (release 1), and 1.1.* bugfix releases
  29 #   2.0.* will be unstable-feature-development stage (milestone 1)
  30 #   2.1.* unstable development betas (milestone 2)
  31 #   2.2 second stable release (release 2), and 2.2.* bugfix releases
  32 #   ...
  33 #
  34 import sys
  35 import libxml2
  36 import gettext
  37 import os
  38 import re
  39
  40 class NoneTranslations:
  41     def gettext(self, message):
  42         return None
  43
  44     def lgettext(self, message):
  45         return None
  46
  47     def ngettext(self, msgid1, msgid2, n):
  48         return None
  49
  50     def lngettext(self, msgid1, msgid2, n):
  51         return None
  52
  53     def ugettext(self, message):
  54         return None
  55
  56     def ungettext(self, msgid1, msgid2, n):
  57         return None
  58
  59
  60
  61 class MessageOutput:
  62     def __init__(self, with_translations = 0):
  63         self.messages = []
  64         self.comments = {}
  65         self.linenos = {}
  66         self.nowrap = {}
  67         if with_translations:
  68             self.translations = []
  69         self.do_translations = with_translations
  70         self.output_msgstr = 0 # this is msgid mode for outputMessage; 1 is for msgstr mode
  71
  72     def translationsFollow(self):
  73         """Indicate that what follows are translations."""
  74         self.output_msgstr = 1
  75
  76     def setFilename(self, filename):
  77         self.filename = filename
  78
  79     def outputMessage(self, text, lineno = 0, comment = None, spacepreserve = 0, tag = None):
  80         """Adds a string to the list of messages."""
  81         if (text.strip() != ''):
  82             t = escapePoString(normalizeString(text, not spacepreserve))
  83             if self.output_msgstr:
  84                 self.translations.append(t)
  85                 return
  86
  87             if self.do_translations or (not t in self.messages):
  88                 self.messages.append(t)
  89                 if spacepreserve:
  90                     self.nowrap[t] = 1
  91                 if t in self.linenos.keys():
  92                     self.linenos[t].append((self.filename, tag, lineno))
  93                 else:
  94                     self.linenos[t] = [ (self.filename, tag, lineno) ]
  95                 if (not self.do_translations) and comment and not t in self.comments:
  96                     self.comments[t] = comment
  97             else:
  98                 if t in self.linenos.keys():
  99                     self.linenos[t].append((self.filename, tag, lineno))
 100                 else:
 101                     self.linenos[t] = [ (self.filename, tag, lineno) ]
 102                 if comment and not t in self.comments:
 103                     self.comments[t] = comment
 104
 105     def outputHeader(self, out):
 106         from time import gmtime, strftime
 107         out.write("""msgid ""
 108 msgstr ""
 109 "Project-Id-Version: PACKAGE VERSION\\n"
 110 "POT-Creation-Date: %s\\n"
 111 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
 112 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
 113 "Language-Team: LANGUAGE <LL@li.org>\\n"
 114 "MIME-Version: 1.0\\n"
 115 "Content-Type: text/plain; charset=UTF-8\\n"
 116 "Content-Transfer-Encoding: 8bit\\n"
 117
 118 """ % (strftime("%Y-%m-%d %H:%M +0000", gmtime())))
 119
 120     def outputAll(self, out):
 121         self.outputHeader(out)
 122
 123         for k in self.messages:
 124             if k in self.comments:
 125                 out.write("#. %s\n" % (self.comments[k].replace("\n","\n#. ")))
 126             references = ""
 127             tagstr = ""
 128             tags = []
 129             for reference in self.linenos[k]:
 130                 references += "%s:%d " % (reference[0], reference[2])
 131                 if(reference[1] not in tags):
 132                     tags.append(reference[1])
 133                     tagstr += "(" + str(reference[1]) + "), "
 134             out.write("#.%s\n" % (tagstr[0 : len(tagstr) - 2]))
 135             out.write("#: %s\n" % (references[0 : len(references) - 1]))
 136             if k in self.nowrap and self.nowrap[k]:
 137                 out.write("#, no-wrap\n")
 138             out.write("msgid \"%s\"\n" % (k))
 139             translation = ""
 140             if self.do_translations:
 141                 if len(self.translations)>0:
 142                     translation = self.translations.pop(0)
 143             if translation == k:
 144                 translation = ""
 145             out.write("msgstr \"%s\"\n\n" % (translation))
 146
 147
 148 def normalizeNode(node):
 149     #print >>sys.stderr, "<%s> (%s) [%s]" % (node.name, node.type, node.serialize('utf-8'))
 150     if not node:
 151         return
 152     elif isSpacePreserveNode(node):
 153         return
 154     elif node.isText():
 155         if node.isBlankNode():
 156             if expand_entities or ( not (node.prev and not node.prev.isBlankNode()
 157                                          and node.next and not node.next.isBlankNode()) ):
 158                 #print >>sys.stderr, "BLANK"
 159                 node.setContent('')
 160         else:
 161             node.setContent(re.sub('\s+',' ', node.content))
 162
 163     elif node.children and node.type == 'element':
 164         child = node.children
 165         while child:
 166             normalizeNode(child)
 167             child = child.next
 168
 169 def normalizeString(text, ignorewhitespace = 1):
 170     """Normalizes string to be used as key for gettext lookup.
 171
 172     Removes all unnecessary whitespace."""
 173     if not ignorewhitespace:
 174         return text
 175     try:
 176         # Lets add document DTD so entities are resolved
 177         dtd = doc.intSubset()
 178         tmp = dtd.serialize('utf-8')
 179         tmp = tmp + '<norm>%s</norm>' % text
 180     except:
 181         tmp = '<norm>%s</norm>' % text
 182
 183     try:
 184         ctxt = libxml2.createDocParserCtxt(tmp)
 185         if expand_entities:
 186             ctxt.replaceEntities(1)
 187         ctxt.parseDocument()
 188         tree = ctxt.doc()
 189         newnode = tree.getRootElement()
 190     except:
 191         print >> sys.stderr, """Error while normalizing string as XML:\n"%s"\n""" % (text)
 192         return text
 193
 194     normalizeNode(newnode)
 195
 196     result = ''
 197     child = newnode.children
 198     while child:
 199         result += child.serialize('utf-8')
 200         child = child.next
 201
 202     result = re.sub('^ ','', result)
 203     result = re.sub(' $','', result)
 204
 205     return result
 206
 207 def stringForEntity(node):
 208     """Replaces entities in the node."""
 209     text = node.serialize('utf-8')
 210     try:
 211         # Lets add document DTD so entities are resolved
 212         dtd = node.doc.intSubset()
 213         tmp = dtd.serialize('utf-8') + '<norm>%s</norm>' % text
 214         next = 1
 215     except:
 216         tmp = '<norm>%s</norm>' % text
 217         next = 0
 218
 219     ctxt = libxml2.createDocParserCtxt(tmp)
 220     if expand_entities:
 221         ctxt.replaceEntities(1)
 222     ctxt.parseDocument()
 223     tree = ctxt.doc()
 224     if next:
 225         newnode = tree.children.next
 226     else:
 227         newnode = tree.children
 228
 229     result = ''
 230     child = newnode.children
 231     while child:
 232         result += child.serialize('utf-8')
 233         child = child.next
 234
 235     return result
 236
 237
 238 def escapePoString(text):
 239     return text.replace('\\','\\\\').replace('"', "\\\"").replace("\n","\\n").replace("\t","\\t")
 240
 241 def unEscapePoString(text):
 242     return text.replace('\\"', '"').replace('\\\\','\\')
 243
 244 def getTranslation(text, spacepreserve = 0):
 245     """Returns a translation via gettext for specified snippet.
 246
 247     text should be a string to look for, spacepreserve set to 1
 248     when spaces should be preserved.
 249     """
 250     #print >>sys.stderr,"getTranslation('%s')" % (text.encode('utf-8'))
 251     text = normalizeString(text, not spacepreserve)
 252     if (text.strip() == ''):
 253         return text
 254     global gt
 255     if gt:
 256         res = gt.ugettext(text.decode('utf-8'))
 257         return res
 258
 259     return text
 260
 261 def myAttributeSerialize(node):
 262     result = ''
 263     if node.children:
 264         child = node.children
 265         while child:
 266             if child.type=='text':
 267                 result += doc.encodeEntitiesReentrant(child.content)
 268             elif child.type=='entity_ref':
 269                 if not expand_entities:
 270                     result += '&' + child.name + ';'
 271                 else:
 272                     result += child.content.decode('utf-8')
 273             else:
 274                 result += myAttributeSerialize(child)
 275             child = child.next
 276     else:
 277         result = node.serialize('utf-8')
 278     return result
 279
 280 def startTagForNode(node):
 281     if not node:
 282         return 0
 283
 284     result = node.name
 285     params = ''
 286     if node.properties:
 287         for p in node.properties:
 288             if p.type == 'attribute':
 289                 try:
 290                     nsprop = p.ns().name + ":" + p.name
 291                 except:
 292                     nsprop = p.name
 293                 params += " %s=\"%s\"" % (nsprop, myAttributeSerialize(p))
 294     return result+params
 295
 296 def endTagForNode(node):
 297     if not node:
 298         return 0
 299
 300     result = node.name
 301     return result
 302
 303 def isFinalNode(node):
 304     if automatic:
 305         auto = autoNodeIsFinal(node)
 306         # Check if any of the parents is also autoNodeIsFinal,
 307         # and if it is, don't consider this node a final one
 308         parent = node.parent
 309         while parent and auto:
 310             auto = not autoNodeIsFinal(parent)
 311             parent = parent.parent
 312         return auto
 313     #node.type =='text' or not node.children or
 314     if node.type == 'element' and node.name in ultimate_tags:
 315         return 1
 316     elif node.children:
 317         final_children = 1
 318         child = node.children
 319         while child and final_children:
 320             if not child.isBlankNode() and child.type != 'comment' and not isFinalNode(child):
 321                 final_children = 0
 322             child = child.next
 323         if final_children:
 324             return 1
 325     return 0
 326
 327 def ignoreNode(node):
 328     if automatic:
 329         if node.type in ('dtd', 'comment'):
 330             return 1
 331         else:
 332             return 0
 333     else:
 334         if isFinalNode(node):
 335             return 0
 336         if node.name in ignored_tags or node.type in ('dtd', 'comment'):
 337             return 1
 338         return 0
 339
 340 def isSpacePreserveNode(node):
 341     pres = node.getSpacePreserve()
 342     if pres == 1:
 343         return 1
 344     else:
 345         if CurrentXmlMode and (node.name in CurrentXmlMode.getSpacePreserveTags()):
 346             return 1
 347         else:
 348             return 0
 349
 350 def getCommentForNode(node):
 351     """Walk through previous siblings until a comment is found, or other element.
 352
 353     Only whitespace is allowed between comment and current node."""
 354     prev = node.prev
 355     while prev and prev.type == 'text' and prev.content.strip() == '':
 356         prev = prev.prev
 357     if prev and prev.type == 'comment':
 358         return prev.content.strip()
 359     else:
 360         return None
 361
 362 def replaceAttributeContentsWithText(node,text):
 363     node.setContent(text)
 364
 365 def replaceNodeContentsWithText(node,text):
 366     """Replaces all subnodes of a node with contents of text treated as XML."""
 367
 368     if node.children:
 369         starttag = startTagForNode(node)
 370         endtag = endTagForNode(node)
 371
 372         # Lets add document DTD so entities are resolved
 373         tmp = '<?xml version="1.0" encoding="utf-8" ?>'
 374         try:
 375             dtd = doc.intSubset()
 376             tmp = tmp + dtd.serialize('utf-8')
 377         except libxml2.treeError:
 378             pass
 379
 380         content = '<%s>%s</%s>' % (starttag, text, endtag)
 381         tmp = tmp + content.encode('utf-8')
 382
 383         newnode = None
 384         try:
 385             ctxt = libxml2.createDocParserCtxt(tmp)
 386             ctxt.replaceEntities(0)
 387             ctxt.parseDocument()
 388             newnode = ctxt.doc()
 389         except:
 390             pass
 391
 392         if not newnode:
 393             print >> sys.stderr, """Error while parsing translation as XML:\n"%s"\n""" % (text.encode('utf-8'))
 394             return
 395
 396         newelem = newnode.getRootElement()
 397
 398         if newelem and newelem.children:
 399             free = node.children
 400             while free:
 401                 next = free.next
 402                 free.unlinkNode()
 403                 free = next
 404
 405             if node:
 406                 copy = newelem.copyNodeList()
 407                 next = node.next
 408                 node.replaceNode(newelem.copyNodeList())
 409                 node.next = next
 410
 411         else:
 412             # In practice, this happens with tags such as "<para>    </para>" (only whitespace in between)
 413             pass
 414     else:
 415         node.setContent(text)
 416
 417 def autoNodeIsFinal(node):
 418     """Returns 1 if node is text node, contains non-whitespace text nodes or entities."""
 419     if hasattr(node, '__autofinal__'):
 420         return node.__autofinal__
 421     if node.name in ignored_tags:
 422         node.__autofinal__ = 0
 423         return 0
 424     if node.isText() and node.content.strip()!='':
 425         node.__autofinal__ = 1
 426         return 1
 427     final = 0
 428     child = node.children
 429     while child:
 430         if child.type in ['text'] and  child.content.strip()!='':
 431             final = 1
 432             break
 433         child = child.next
 434
 435     node.__autofinal__ = final
 436     return final
 437
 438
 439 def worthOutputting(node, noauto = 0):
 440     """Returns 1 if node is "worth outputting", otherwise 0.
 441
 442     Node is "worth outputting", if none of the parents
 443     isFinalNode, and it contains non-blank text and entities.
 444     """
 445     if noauto and hasattr(node, '__worth__'):
 446         return node.__worth__
 447     elif not noauto and hasattr(node, '__autoworth__'):
 448         return node.__autoworth__
 449     worth = 1
 450     parent = node.parent
 451     final = isFinalNode(node) and node.name not in ignored_tags
 452     while not final and parent:
 453         if isFinalNode(parent):
 454             final = 1 # reset if we've got to one final tag
 455         if final and (parent.name not in ignored_tags) and worthOutputting(parent):
 456             worth = 0
 457             break
 458         parent = parent.parent
 459     if not worth:
 460         node.__worth__ = 0
 461         return 0
 462
 463     if noauto:
 464         node.__worth__ = worth
 465         return worth
 466     else:
 467         node.__autoworth__ = autoNodeIsFinal(node)
 468         return node.__autoworth__
 469
 470 def processAttribute(node, attr):
 471     if not node or not attr or not worthOutputting(node=node, noauto=1):
 472         return
 473
 474     outtxt = attr.content
 475     if mode=='merge':
 476         translation = getTranslation(outtxt, 0)
 477         replaceAttributeContentsWithText(attr, translation.encode('utf-8'))
 478     else:
 479         msg.outputMessage(outtxt, node.lineNo(),  "", 0,
 480                           node.name + ":" + attr.name)
 481
 482 def processElementTag(node, replacements, restart = 0):
 483     """Process node with node.type == 'element'."""
 484     if node.type == 'element':
 485         # Translate attributes if needed
 486         if node.properties and len(treated_attributes):
 487             for p in node.properties:
 488                 if p.name in treated_attributes:
 489                     processAttribute(node, p)
 490
 491         outtxt = ''
 492         if restart:
 493             myrepl = []
 494         else:
 495             myrepl = replacements
 496
 497         submsgs = []
 498
 499         child = node.children
 500         while child:
 501             if (isFinalNode(child)) or (child.type == 'element' and worthOutputting(child)):
 502                 myrepl.append(processElementTag(child, myrepl, 1))
 503                 outtxt += '<placeholder-%d/>' % (len(myrepl))
 504             else:
 505                 if child.type == 'element':
 506                     (starttag, content, endtag, translation) = processElementTag(child, myrepl, 0)
 507                     outtxt += '<%s>%s</%s>' % (starttag, content, endtag)
 508                 else:
 509                     outtxt += doSerialize(child)
 510
 511             child = child.next
 512
 513         if mode == 'merge':
 514             translation = getTranslation(outtxt, isSpacePreserveNode(node))
 515         else:
 516             translation = outtxt.decode('utf-8')
 517
 518         starttag = startTagForNode(node)
 519         endtag = endTagForNode(node)
 520
 521         worth = worthOutputting(node)
 522         if not translation:
 523             translation = outtxt.decode('utf-8')
 524             if worth and mark_untranslated: node.setLang('C')
 525
 526         if restart or worth:
 527             i = 0
 528             while i < len(myrepl):
 529                 replacement = '<%s>%s</%s>' % (myrepl[i][0], myrepl[i][3], myrepl[i][2])
 530                 i += 1
 531                 translation = translation.replace('<placeholder-%d/>' % (i), replacement)
 532
 533             if worth:
 534                 if mode == 'merge':
 535                     replaceNodeContentsWithText(node, translation)
 536                 else:
 537                     msg.outputMessage(outtxt, node.lineNo(), getCommentForNode(node), isSpacePreserveNode(node), tag = node.name)
 538
 539         return (starttag, outtxt, endtag, translation)
 540     else:
 541         raise Exception("You must pass node with node.type=='element'.")
 542
 543
 544 def isExternalGeneralParsedEntity(node):
 545     if (node and node.type=='entity_ref'):
 546         try:
 547             # it would be nice if debugDumpNode could use StringIO, but it apparently cannot
 548             tmp = file(".xml2po-entitychecking","w+")
 549             node.debugDumpNode(tmp,0)
 550             tmp.seek(0)
 551             tmpstr = tmp.read()
 552             tmp.close()
 553             os.remove(".xml2po-entitychecking")
 554         except:
 555             # We fail silently, and replace all entities if we cannot
 556             # write .xml2po-entitychecking
 557             # !!! This is not very nice thing to do, but I don't know if
 558             #     raising an exception is any better
 559             return 0
 560         if tmpstr.find('EXTERNAL_GENERAL_PARSED_ENTITY') != -1:
 561             return 1
 562         else:
 563             return 0
 564     else:
 565         return 0
 566
 567 def doSerialize(node):
 568     """Serializes a node and its children, emitting PO messages along the way.
 569
 570     node is the node to serialize, first indicates whether surrounding
 571     tags should be emitted as well.
 572     """
 573
 574     if ignoreNode(node):
 575         return ''
 576     elif not node.children:
 577         return node.serialize("utf-8")
 578     elif node.type == 'entity_ref':
 579         if isExternalGeneralParsedEntity(node):
 580             return node.serialize('utf-8')
 581         else:
 582             return stringForEntity(node) #content #content #serialize("utf-8")
 583     elif node.type == 'entity_decl':
 584         return node.serialize('utf-8') #'<%s>%s</%s>' % (startTagForNode(node), node.content, node.name)
 585     elif node.type == 'text':
 586         return node.serialize('utf-8')
 587     elif node.type == 'element':
 588         repl = []
 589         (starttag, content, endtag, translation) = processElementTag(node, repl, 1)
 590         return '<%s>%s</%s>' % (starttag, content, endtag)
 591     else:
 592         child = node.children
 593         outtxt = ''
 594         while child:
 595             outtxt += doSerialize(child)
 596             child = child.next
 597         return outtxt
 598
 599
 600 def read_finaltags(filelist):
 601     if CurrentXmlMode:
 602         return CurrentXmlMode.getFinalTags()
 603     else:
 604         defaults = ['para', 'title', 'releaseinfo', 'revnumber',
 605                     'date', 'itemizedlist', 'orderedlist',
 606                     'variablelist', 'varlistentry', 'term' ]
 607         return defaults
 608
 609 def read_ignoredtags(filelist):
 610     if CurrentXmlMode:
 611         return CurrentXmlMode.getIgnoredTags()
 612     else:
 613         defaults = ['itemizedlist', 'orderedlist', 'variablelist',
 614                     'varlistentry' ]
 615         return defaults
 616
 617 def read_treatedattributes(filelist):
 618     if CurrentXmlMode:
 619         return CurrentXmlMode.getTreatedAttributes()
 620     else:
 621         return []
 622
 623
 624 def tryToUpdate(allargs, lang):
 625     # Remove "-u" and "--update-translation"
 626     print >>sys.stderr, "OVDI!"
 627     command = allargs[0]
 628     args = allargs[1:]
 629     opts, args = getopt.getopt(args, 'avhm:ket:o:p:u:',
 630                                ['automatic-tags','version', 'help', 'keep-entities', 'extract-all-entities', 'merge', 'translation=',
 631                                 'output=', 'po-file=', 'update-translation=' ])
 632     for opt, arg in opts:
 633         if opt in ('-a', '--automatic-tags'):
 634             command += " -a"
 635         elif opt in ('-k', '--keep-entities'):
 636             command += " -k"
 637         elif opt in ('-e', '--extract-all-entities'):
 638             command += " -e"
 639         elif opt in ('-m', '--mode'):
 640             command += " -m %s" % arg
 641         elif opt in ('-o', '--output'):
 642             sys.stderr.write("Error: Option '-o' is not yet supported when updating translations directly.\n")
 643             sys.exit(8)
 644         elif opt in ('-v', '--version'):
 645             print VERSION
 646             sys.exit(0)
 647         elif opt in ('-h', '--help'):
 648             sys.stderr.write("Error: If you want help, please use `%s --help' without '-u' option.\n" % (allargs[0]))
 649             sys.exit(9)
 650         elif opt in ('-u', '--update-translation'):
 651             pass
 652         else:
 653             sys.stderr.write("Error: Option `%s' is not supported with option `-u'.\n" % (opt))
 654             sys.exit(9)
 655
 656     while args:
 657         command += " " + args.pop()
 658
 659     file = lang
 660
 661     sys.stderr.write("Merging translations for %s: " % (lang))
 662     result = os.system("%s | msgmerge -o .tmp.%s.po %s -" % (command, lang, file))
 663     if result:
 664         sys.exit(10)
 665     else:
 666         result = os.system("mv .tmp.%s.po %s" % (lang, file))
 667         if result:
 668             sys.stderr.write("Error: cannot rename file.\n")
 669             sys.exit(11)
 670         else:
 671             os.system("msgfmt -cv -o %s %s" % (NULL_STRING, file))
 672             sys.exit(0)
 673
 674 def load_mode(modename):
 675     #import imp
 676     #found = imp.find_module(modename, submodes_path)
 677     #module = imp.load_module(modename, found[0], found[1], found[2])
 678     try:
 679         sys.path.append(submodes_path)
 680         module = __import__(modename)
 681         modeModule = '%sXmlMode' % modename
 682         return getattr(module, modeModule)
 683     except:
 684         return None
 685
 686 def xml_error_handler(arg, ctxt):
 687     pass
 688
 689 libxml2.registerErrorHandler(xml_error_handler, None)
 690
 691
 692 # Main program start
 693 if __name__ != '__main__': raise NotImplementedError
 694
 695 # Parameters
 696 submodes_path = os.path.dirname(os.path.realpath(sys.argv[0])) + "/xml2po-modes"
 697 default_mode = 'docbook'
 698
 699 filename = ''
 700 origxml = ''
 701 mofile = ''
 702 gt = None
 703 ultimate = [ ]
 704 ignored = [ ]
 705 filenames = [ ]
 706 translationlanguage = ''
 707
 708 mode = 'pot' # 'pot' or 'merge'
 709 automatic = 0
 710 expand_entities = 1
 711 mark_untranslated = 0
 712 expand_all_entities = 0
 713
 714 output  = '-' # this means to stdout
 715
 716 NULL_STRING = '/dev/null'
 717 if not os.path.exists('/dev/null'): NULL_STRING = 'NUL'
 718
 719 import getopt, fileinput
 720
 721 def usage (with_help = False):
 722         print >> sys.stderr, "Usage:  %s [OPTIONS] [XMLFILE]..." % (sys.argv[0])
 723         if (with_help):
 724                 print >> sys.stderr, """
 725 OPTIONS may be some of:
 726     -a    --automatic-tags     Automatically decides if tags are to be considered
 727                                  "final" or not
 728     -k    --keep-entities      Don't expand entities
 729     -e    --expand-all-entities  Expand ALL entities (including SYSTEM ones)
 730     -m    --mode=TYPE          Treat tags as type TYPE (default: docbook)
 731     -o    --output=FILE        Print resulting text (XML or POT) to FILE
 732     -p    --po-file=FILE       Specify PO file containing translation, and merge
 733                                  Overwrites temporary file .xml2po.mo.
 734     -r    --reuse=FILE         Specify translated XML file with the same structure
 735     -t    --translation=FILE   Specify MO file containing translation, and merge
 736     -u    --update-translation=LANG.po   Updates a PO file using msgmerge program
 737
 738     -l    --language=LANG      Set language of the translation to LANG
 739           --mark-untranslated  Set 'xml:lang="C"' on untranslated tags
 740
 741     -v    --version            Output version of the xml2po program
 742
 743     -h    --help               Output this message
 744
 745 EXAMPLES:
 746     To create a POTemplate book.pot from input files chapter1.xml and
 747     chapter2.xml, run the following:
 748         %s -o book.pot chapter1.xml chapter2.xml
 749
 750     After translating book.pot into de.po, merge the translations back,
 751     using -p option for each XML file:
 752         %s -p de.po chapter1.xml > chapter1.de.xml
 753         %s -p de.po chapter2.xml > chapter2.de.xml
 754 """ % (sys.argv[0], sys.argv[0], sys.argv[0])
 755         sys.exit(0)
 756
 757 if len(sys.argv) < 2: usage()
 758
 759 args = sys.argv[1:]
 760 try: opts, args = getopt.getopt(args, 'avhkem:t:o:p:u:r:l:',
 761                            ['automatic-tags','version', 'help', 'keep-entities', 'expand-all-entities', 'mode=', 'translation=',
 762                             'output=', 'po-file=', 'update-translation=', 'reuse=', 'language=', 'mark-untranslated' ])
 763 except getopt.GetoptError: usage(True)
 764
 765 for opt, arg in opts:
 766     if opt in ('-m', '--mode'):
 767         default_mode = arg
 768     if opt in ('-a', '--automatic-tags'):
 769         automatic = 1
 770     elif opt in ('-k', '--keep-entities'):
 771         expand_entities = 0
 772     elif opt in ('--mark-untranslated',):
 773         mark_untranslated = 1
 774     elif opt in ('-e', '--expand-all-entities'):
 775         expand_all_entities = 1
 776     elif opt in ('-l', '--language'):
 777         translationlanguage = arg
 778     elif opt in ('-t', '--translation'):
 779         mofile = arg
 780         mode = 'merge'
 781         if translationlanguage == '': translationlanguage = os.path.split(os.path.splitext(mofile)[0])[1]
 782     elif opt in ('-r', '--reuse'):
 783         origxml = arg
 784     elif opt in ('-u', '--update-translation'):
 785         tryToUpdate(sys.argv, arg)
 786     elif opt in ('-p', '--po-file'):
 787         mofile = ".xml2po.mo"
 788         pofile = arg
 789         if translationlanguage == '': translationlanguage = os.path.split(os.path.splitext(pofile)[0])[1]
 790         os.system("msgfmt -o %s %s >%s" % (mofile, pofile, NULL_STRING)) and sys.exit(7)
 791         mode = 'merge'
 792     elif opt in ('-o', '--output'):
 793         output = arg
 794     elif opt in ('-v', '--version'):
 795         print VERSION
 796         sys.exit(0)
 797     elif opt in ('-h', '--help'):
 798         usage(True)
 799
 800 # Treat remaining arguments as XML files
 801 while args:
 802     filenames.append(args.pop())
 803
 804 if len(filenames) > 1 and mode=='merge':
 805     print  >> sys.stderr, "Error: You can merge translations with only one XML file at a time."
 806     sys.exit(2)
 807
 808 try:
 809     CurrentXmlMode = load_mode(default_mode)()
 810 except:
 811     CurrentXmlMode = None
 812     print >> sys.stderr, "Warning: cannot load module '%s', using automatic detection (-a)." % (default_mode)
 813     automatic = 1
 814
 815 if mode=='merge' and mofile=='':
 816     print >> sys.stderr, "Error: You must specify MO file when merging translations."
 817     sys.exit(3)
 818
 819 if mofile:
 820     try:
 821         mfile = open(mofile, "rb")
 822
 823         gt = gettext.GNUTranslations(mfile)
 824         gt.add_fallback(NoneTranslations())
 825     except:
 826         print >> sys.stderr, "Can't open MO file '%s'." % (mofile)
 827
 828 ultimate_tags = read_finaltags(ultimate)
 829 ignored_tags = read_ignoredtags(ignored)
 830 treated_attributes = read_treatedattributes(ignored)
 831
 832 # I'm not particularly happy about making any of these global,
 833 # but I don't want to bother too much with it right now
 834 semitrans = {}
 835 PlaceHolder = 0
 836 if origxml == '':
 837     msg = MessageOutput()
 838 else:
 839     filenames.append(origxml)
 840     msg = MessageOutput(1)
 841
 842 for filename in filenames:
 843     try:
 844         if filename == origxml:
 845             msg.translationsFollow()
 846         ctxt = libxml2.createFileParserCtxt(filename)
 847         ctxt.lineNumbers(1)
 848         if expand_all_entities:
 849             ctxt.replaceEntities(1)
 850         ctxt.parseDocument()
 851         doc = ctxt.doc()
 852         if doc.name != filename:
 853             print >> sys.stderr, "Error: I tried to open '%s' but got '%s' -- how did that happen?" % (filename, doc.name)
 854             sys.exit(4)
 855     except:
 856         print >> sys.stderr, "Error: cannot open file '%s'." % (filename)
 857         sys.exit(1)
 858
 859     msg.setFilename(filename)
 860     if CurrentXmlMode and origxml=='':
 861         CurrentXmlMode.preProcessXml(doc,msg)
 862     doSerialize(doc)
 863
 864 if output == '-':
 865     out = sys.stdout
 866 else:
 867     try:
 868         out = file(output, 'w')
 869     except:
 870         print >> sys.stderr, "Error: cannot open file %s for writing." % (output)
 871         sys.exit(5)
 872
 873 if mode != 'merge':
 874     if CurrentXmlMode:
 875         tcmsg = CurrentXmlMode.getStringForTranslators()
 876         tccom = CurrentXmlMode.getCommentForTranslators()
 877         if tcmsg:
 878             msg.outputMessage(tcmsg, 0, tccom)
 879
 880     msg.outputAll(out)
 881 else:
 882     if CurrentXmlMode:
 883         tcmsg = CurrentXmlMode.getStringForTranslators()
 884         if tcmsg:
 885             outtxt = getTranslation(tcmsg)
 886         else:
 887             outtxt = ''
 888         CurrentXmlMode.postProcessXmlTranslation(doc, translationlanguage, outtxt)
 889     out.write(doc.serialize('utf-8', 1))