add FM namespace ('http://booki.cc/' for now)
[objavi2.git] / html2odt
blobbc7cd9d0a48bac7914e4cb607332e5a6c45cb7f2
1 #!/usr/bin/python
2 """Convert html files to ODF.
4 html2odt source.html destination.odt
5 """
7 from __future__ import with_statement
8 import sys, os, subprocess, time
10 import uno
11 from com.sun.star.beans import PropertyValue
12 from com.sun.star.connection import NoConnectException
14 def _inspect_obj(o):
15 print >> sys.stderr, 'inspecting %r' % o
16 for a in dir(o):
17 try:
18 print >> sys.stderr, "%25s %s" % (a, getattr(o, a))
19 except Exception, e:
20 print >> sys.stderr, "%s DOES NOT WORK! (%s)" % (a, e)
22 def file_url(path):
23 if path.startswith('file:///'):
24 return path
25 return "file://" + os.path.abspath(path)
27 class Oo(object):
28 def __init__(self):
29 """Start up an open office and connect to it."""
30 accept_string = "socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
32 self.ooffice = subprocess.Popen(["ooffice", "-nologo", "-nodefault",
33 "-norestore", "-nofirststartwizard",
34 "-headless", "-invisible", "-nolockcheck",
35 "-accept=%s" % accept_string])
37 for i in range(10):
38 time.sleep(0.5)
39 try:
40 local = uno.getComponentContext()
41 self.resolver = local.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", local)
42 self.context = self.resolver.resolve("uno:" + accept_string)
43 self.desktop = self.unobject("com.sun.star.frame.Desktop", self.context)
44 return
46 except NoConnectException:
47 print >> sys.stderr, '.',
49 def unobject(self, klass, context=None):
50 """get an instance of the class named by <klass>. It will
51 probably be a string that looks like
52 'com.sun.something.SomeThing'."""
53 if context is None:
54 return self.context.ServiceManager.createInstance(klass)
55 return self.context.ServiceManager.createInstanceWithContext(klass, context)
57 def load(self, src):
58 """Attempt to load as TextDocument format, but fall back to
59 WebDocument if that doesn't work. (WebDocument is called
60 writer/web in the gui and is less exportable."""
61 # import property values:
62 # http://api.openoffice.org/docs/common/ref/com/sun/star/document/MediaDescriptor.html
63 try:
64 return self.desktop.loadComponentFromURL(src, "_blank", 0,
65 (PropertyValue("Hidden" , 0 , True, 0),
66 PropertyValue("FilterName" , 0 , 'HTML (StarWriter)', 0),
68 except Exception, e:
69 print >> sys.stderr, e
70 #fall back on default WebDocument format
71 return self.desktop.loadComponentFromURL(src, "_blank", 0,
72 (PropertyValue("Hidden" , 0 , True, 0),
75 def embed_graphics(self, doc):
76 """Reset each graphic object to an embedded copy of itself."""
77 gp = self.unobject("com.sun.star.graphic.GraphicProvider")
78 for i in range(doc.GraphicObjects.Count):
79 g = doc.GraphicObjects.getByIndex(i)
80 props = (PropertyValue("URL", 0, g.GraphicURL, 0),)
81 g.setPropertyValue("Graphic", gp.queryGraphic(props))
83 def convert(self, src, dest):
84 """Use the connected open office instance to convert the file
85 named by <src> into odf and save it as <dest>.
87 The main trick here is forcing the images to be stored inline."""
88 src = file_url(src)
89 dest = file_url(dest)
90 print >> sys.stderr, src
91 print >> sys.stderr, dest
93 doc = self.load(src)
94 self.embed_graphics(doc)
95 doc.storeToURL(dest, (PropertyValue("FilterName", 0, 'writer8', 0),
96 PropertyValue("Overwrite", 0, True, 0 )))
97 doc.dispose()
99 def __enter__(self):
100 return self
102 def __exit__(self, exc_type, exc_value, traceback):
103 self.desktop.dispose()
104 self.context.dispose()
105 for x in range(10):
106 os.kill(self.ooffice.pid, 15)
107 time.sleep(0.25)
108 if self.ooffice.poll():
109 break
110 print >> sys.stderr, '*',
111 else:
112 os.kill(self.ooffice.pid, 9)
115 def set_env(workdir):
116 workdir = os.path.abspath(workdir)
117 os.environ['HOME'] = workdir
118 os.chdir(workdir)
119 print >> sys.stderr, os.environ
121 if __name__ == '__main__':
122 workdir, src, dest = sys.argv[1:4]
123 set_env(workdir)
125 with Oo() as oo:
126 oo.convert(src, dest)