1 # -*- tab-width: 4; indent-tabs-mode: nil; py-indent-offset: 4 -*-
3 # This file is part of the LibreOffice project.
5 # This Source Code Form is subject to the terms of the Mozilla Public
6 # License, v. 2.0. If a copy of the MPL was not distributed with this
7 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 # This file incorporates work covered by the following license notice:
11 # Licensed to the Apache Software Foundation (ASF) under one or more
12 # contributor license agreements. See the NOTICE file distributed
13 # with this work for additional information regarding copyright
14 # ownership. The ASF licenses this file to you under the Apache
15 # License, Version 2.0 (the "License"); you may not use this file
16 # except in compliance with the License. You may obtain a copy of
17 # the License at http://www.apache.org/licenses/LICENSE-2.0 .
23 from unohelper
import Base
,systemPathToFileUrl
25 from com
.sun
.star
.beans
import PropertyValue
26 from com
.sun
.star
.uno
import Exception as UnoException
27 from com
.sun
.star
.io
import IOException
, XOutputStream
29 class OutputStream(Base
, XOutputStream
):
33 def closeOutput(self
):
36 def writeBytes(self
, seq
):
37 sys
.stdout
.write(seq
.value
)
47 opts
, args
= getopt
.getopt(sys
.argv
[1:], "hc:", ["help", "connection-string=", "html"])
49 url
= "uno:socket,host=localhost,port=2002;urp;StarOffice.ComponentContext"
50 filterName
= "Text (Encoded)"
52 if o
in ("-h", "--help"):
55 if o
in ("-c", "--connection-string"):
56 url
= "uno:" + a
+ ";urp;StarOffice.ComponentContext"
58 filterName
= "HTML (StarWriter)"
65 ctxLocal
= uno
.getComponentContext()
66 smgrLocal
= ctxLocal
.ServiceManager
68 resolver
= smgrLocal
.createInstanceWithContext(
69 "com.sun.star.bridge.UnoUrlResolver", ctxLocal
)
70 ctx
= resolver
.resolve(url
)
71 smgr
= ctx
.ServiceManager
73 desktop
= smgr
.createInstanceWithContext("com.sun.star.frame.Desktop", ctx
)
75 cwd
= systemPathToFileUrl(getcwd())
77 PropertyValue("FilterName" , 0, filterName
, 0),
78 PropertyValue("OutputStream", 0, OutputStream(), 0))
79 inProps
= PropertyValue("Hidden", 0 , True, 0),
82 fileUrl
= uno
.absolutize(cwd
, systemPathToFileUrl(path
))
83 doc
= desktop
.loadComponentFromURL(fileUrl
, "_blank", 0, inProps
)
86 raise UnoException("Could not open stream for unknown reason", None)
88 doc
.storeToURL("private:stream", outProps
)
89 except IOException
as e
:
90 sys
.stderr
.write("Error during conversion: " + e
.Message
+ "\n")
92 except UnoException
as e
:
93 sys
.stderr
.write("Error (" + repr(e
.__class
__) + ") during conversion: " + e
.Message
+ "\n")
98 except UnoException
as e
:
99 sys
.stderr
.write("Error (" + repr(e
.__class
__) + "): " + e
.Message
+ "\n")
101 except getopt
.GetoptError
as e
:
102 sys
.stderr
.write(str(e
) + "\n")
109 sys
.stderr
.write("usage: ooextract.py --help |\n"+
110 " [-c <connection-string> | --connection-string=<connection-string>\n"+
111 " file1 file2 ...\n"+
113 "Extracts plain text from documents and prints it to stdout.\n" +
114 "Requires an OpenOffice.org instance to be running. The script and the\n"+
115 "running OpenOffice.org instance must be able to access the file with\n"+
116 "by the same system path.\n"
118 "-c <connection-string> | --connection-string=<connection-string>\n" +
119 " The connection-string part of a UNO URL to where the\n" +
120 " the script should connect to in order to do the conversion.\n" +
121 " The strings defaults to socket,host=localhost,port=2002\n"
123 " Instead of the text filter, the writer html filter is used\n"
128 # vim: set shiftwidth=4 softtabstop=4 expandtab: