Update to version V0.5.0.
[docutils/kirr.git] / sandbox / xml2rst / xml2rst.py
blob956edba859c8c802b52f589a2d834b6eb66a6432
1 #! /usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
4 # Based on sample.py,v 4.1.2.6 2006/04/14 13:59:26 cvs Exp
6 # Copyright (C) 2009 Stefan Merten
8 # xml2rst.py is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published
10 # by the Free Software Foundation; either version 2 of the License,
11 # or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 # 02111-1307, USA.
23 """
24 Convert a docutils XML file to reStructuredText syntax.
28 perldoc xml2rst.py
30 for a man page.
31 """
33 """
34 =head1 NAME
36 xml2rst.py -- convert a docutils XML file to reStructuredText syntax
38 =head1 SYNOPSIS
40 B<xml2rst.py> [B<-v>] I<xml> [I<reST>]
42 B<xml2rst.py> B<--help>
44 =head1 DESCRIPTION
46 Converts a docutils XML input file to reStructuredText source.
48 This can be used to transform another format to reStructuredText given you have
49 a transformation to docutils XML.
51 =cut
52 """
54 ###############################################################################
55 ###############################################################################
56 # Import
58 import sys
59 import os.path
60 import re
62 from optparse import OptionParser, OptionGroup, OptionValueError, Option
63 from copy import copy
65 try:
66 from lxml import etree
67 except ImportError:
68 errorExit(2, ( "Python package 'lxml' is not available",
69 "You may try to use 'xml2rst.xsl' with a standalone XSLT processor like 'xalan' or 'xsltproc'", ))
71 ###############################################################################
72 ###############################################################################
73 # Constants
75 """
76 @var MainXsltNm: Name of the main XSLT source file
77 @type MainXsltNm: str
78 """
79 MainXsltNm = "xml2rst.xsl"
81 """
82 @var ScriptNm: Name of the script
83 @type ScriptNm: str
84 """
85 ScriptNm = sys.argv[0]
87 ###############################################################################
88 ###############################################################################
89 # Variables
91 """
92 @var options: Options given on the command line
93 @type options: optparse.Values
94 """
95 global options
97 ###############################################################################
98 ###############################################################################
99 # General functions
101 def pod2Head(pod):
103 @param pod: Snippet in POD format to be analyzed.
104 @type pod: str
106 @return: String of first `=headX' entry in POD snippet or empty string if
107 none found.
108 @rtype: str
110 for line in pod.split("\n"):
111 if line.startswith("=head"):
112 return line[len("=headX"):].strip()
113 return ""
115 ###############################################################################
117 def pod2Description(pod):
119 @param pod: Snippet in POD format to be analyzed.
120 @type pod: str
122 @return: Stripped text from all lines not being a POD line command.
123 @rtype: str
125 result = ""
126 for line in pod.split("\n"):
127 if not line.startswith("="):
128 result = result.strip() + " " + line.strip()
129 return result.strip()
131 ###############################################################################
133 def pod2OptionList(pod):
135 Return option names found in POD snippet. Option names are recognized in
136 `=item B<option>' constructs.
138 @param pod: Snippet in POD format to be analyzed.
139 @type pod: str
141 @return: All option names contained in POD snippet as a list.
142 @rtype: [ str, ..., ]
144 result = [ ]
145 for line in pod.split("\n"):
146 found = re.search("^=item\s*B<(-[^>]+)>", line)
147 if found:
148 result.append(found.group(1))
149 return result
151 ###############################################################################
153 def pod2OptionKeywords(pod):
155 Return a dict mapping `OptionParser.add_option' keywords to values found in
156 POD snippet.
158 @param pod: Snippet in POD format to be analyzed.
159 @type pod: str
161 @return: Mapping for all values found. Currently `help' and `dest' are
162 filled.
163 @rtype: { keyword: value, ..., }
165 result = { 'help': "", }
166 for line in pod.split("\n"):
167 if line.startswith("=cut"):
168 break
169 found = re.search("^=item\s*B<--?([^>]+)>(?:=|\s*)", line)
170 if found:
171 result['help'] = ""
172 optionName = found.group(1)
173 found = re.search("I<([^>]+)>", line)
174 if found:
175 result['dest'] = found.group(1)
176 elif len(optionName) > 1:
177 result['dest'] = optionName
178 else:
179 result['help'] += line + "\n"
180 result['help'] = result['help'].strip()
181 if result.has_key('dest'):
182 result['dest'] = result['dest'].replace("-", "_")
183 else:
184 errorExit(1, ( "Internal error: Missing `dest' in documentation string:",
185 pod, ))
186 return result
188 ###############################################################################
190 def pod2Argument(pod):
192 Return a list of two strings for `OptionGroup.__init__' describing the
193 argument found in POD snippet.
195 @param pod: Snippet in POD format to be analyzed.
196 @type pod: str
198 @return: Name of the argument and its description.
199 @rtype: [ argument, description, ]
201 argument = ""
202 description = ""
203 for line in pod.split("\n"):
204 if line.startswith("=cut"):
205 break
206 found = re.search("^=item\s*I<([^>]+)>", line)
207 if found:
208 description = ""
209 argument = found.group(1)
210 else:
211 description += line + "\n"
212 description = description.strip()
213 return [ argument, description, ]
215 ###############################################################################
217 def parseOptions():
219 Sets options and returns arguments.
221 @return: Name of input file and optionally of output file.
222 @rtype: ( str, [str,] )
224 global options
225 pod = """
227 =head1 OPTIONS
229 =cut
231 optionParser = OptionParser("usage: %prog [option]... <xml> [<rst>]")
233 pod = """
235 =head2 General options
237 =over 4
239 =cut
241 generalGroup = OptionGroup(optionParser, pod2Head(pod),
242 pod2Description(pod))
244 pod = """
246 =item B<-a> I<adornment>
248 =item B<--adornment>=I<adornment>
250 Configures title markup to use so different styles can be requested
251 easily.
253 The value of the parameter must be a string made up of a sequence of
254 character pairs. The first character of a pair is C<o> (overline) or
255 C<u> (underline) and the second character is the character to use for
256 the markup.
258 The first and the second character pair is used for document title and
259 subtitle, the following pairs are used for section titles where the
260 third pair is used for the top level section title.
262 Defaults to C<o=o-u=u-u~u:u.u`>.
264 =cut
266 generalGroup.add_option(default=None, *pod2OptionList(pod),
267 **pod2OptionKeywords(pod))
269 pod = """
271 =item B<-f> I<fold>
273 =item B<--fold>=I<fold>
275 Configures whether long text lines in paragraphs should be folded and
276 to which length. This option is for input not coming from reST which
277 may have no internal line feeds in plain text strings.
279 If folding is enabled text strings not in a line feed preserving
280 context are first white-space normalized and then broken according to
281 the folding rules. Folding rules put out the first word and continue
282 to do so with the following words unless the next word would cross
283 the folding boundary. Words are delimited by white-space.
285 Defaults to C<0>, i.e. no folding.
287 =cut
289 generalGroup.add_option(type="int", default=None,
290 *pod2OptionList(pod), **pod2OptionKeywords(pod))
292 pod = """
294 =item B<-v>
296 =item B<--verbose>
298 Operate verbose.
300 =cut
302 generalGroup.add_option(action="store_true",
303 *pod2OptionList(pod), **pod2OptionKeywords(pod))
304 optionParser.add_option_group(generalGroup)
306 pod = """
308 =back
310 =head2 Arguments
312 =over 4
314 =cut
316 argumentGroup = OptionGroup(optionParser, pod2Head(pod),
317 pod2Description(pod))
318 optionParser.add_option_group(argumentGroup)
320 pod = """
322 =item I<xml>
324 The XML input file containing docutils XML.
326 =cut
329 argument1Group = OptionGroup(optionParser, *pod2Argument(pod))
330 optionParser.add_option_group(argument1Group)
332 pod = """
334 =item I<rst>
336 The optional output file containing reStructuredText.
338 If not given output is put to C<STDOUT>.
340 =cut
342 argument2Group = OptionGroup(optionParser, *pod2Argument(pod))
343 optionParser.add_option_group(argument2Group)
345 pod = """
347 =back
349 =cut
351 ( options, args, ) = optionParser.parse_args()
353 if len(args) < 1:
354 optionParser.error("An input file is required")
355 if len(args) > 2:
356 optionParser.error("At most two arguments are allowed")
357 if (options.adornment is not None
358 and re.search('^([ou][]!"#$%&\'()*+,\-./:;<=>?@[\\^_`{|}~])+$',
359 options.adornment) is None):
360 optionParser.error("Invalid adornment string given")
362 return args
364 ###############################################################################
366 def errorOut(lines):
368 Outputs messages as error.
370 @param lines: Messages to be output as single lines.
371 @type lines: ( str, ..., )
373 @return: 0
374 @rtype: int
376 scriptName = os.path.basename(sys.argv[0])
377 for line in lines:
378 print >>sys.stderr, ("%s: %s" % ( scriptName, line, ))
379 return 0
381 ###############################################################################
383 def verboseOut(lines):
385 Outputs messages as a verbose message.
387 @param lines: Messages to be output as single lines.
388 @type lines: ( str, ..., )
390 @return: 0
391 @rtype: int
393 if options.verbose:
394 errorOut([ "## " + line
395 for line in lines ])
396 return 0
398 ###############################################################################
400 def errorExit(code, lines):
402 Exit program with an error message.
404 @param code: Exit Code to use.
405 @type code: int
407 @param lines: Strings to output as error message.
408 @type lines: ( str, ..., )
410 @return: Does not return.
412 errorOut(lines)
413 sys.exit(code)
415 ###############################################################################
416 ###############################################################################
417 # Specialized functions
419 def convert(inNm, outNm):
421 Do the conversion.
423 @param inNm: Filename of input file.
424 @type inNm: str
426 @param outNm: Filename of output file or None.
427 @type outNm: str | None
429 try:
430 inF = open(inNm)
431 except IOError:
432 errorExit(1, ( "Can't open input file %r" % ( inNm, ), ))
434 scriptP = os.path.dirname(os.path.realpath(ScriptNm))
435 mainXsltNm = os.path.join(scriptP, MainXsltNm)
436 try:
437 mainXsltF = open(mainXsltNm)
438 except IOError:
439 errorExit(1, ( "Can't open main XSLT file %r" % ( mainXsltNm, ), ))
441 xsltParser = etree.XMLParser()
442 mainXsltDoc = etree.parse(mainXsltF, xsltParser)
443 mainXsltF.close()
444 mainXslt = etree.XSLT(mainXsltDoc)
446 inParser = etree.XMLParser()
447 try:
448 inDoc = etree.parse(inF, inParser)
449 except Exception, e:
450 errorExit(1, ( "Error parsing input file %r: %s" % ( inNm, e, ), ))
451 inF.close()
453 xsltParams = { }
454 if options.fold is not None:
455 xsltParams['fold'] = str(options.fold)
456 if options.adornment is not None:
457 xsltParams['adornment'] = "'" + options.adornment + "'"
458 try:
459 result = mainXslt(inDoc, **xsltParams)
460 except Exception, e:
461 errorExit(1, ( "Error transforming input file %r: %s" % ( inNm, e, ), ))
462 # Chop off trailing linefeed - added somehow
463 outS = str(result)[:-1]
464 if outNm:
465 try:
466 outF = open(outNm, "w")
467 except IOError:
468 errorExit(1, ( "Can't open output file %r" % ( outNm, ), ))
469 outF.write(outS)
470 outF.close()
471 else:
472 print(outS)
474 ###############################################################################
475 ###############################################################################
476 # Classes
478 ########################################################################
479 ##############################################################################
480 # Now work
482 if __name__ == '__main__':
483 arguments = parseOptions()
484 inF = arguments[0]
485 if len(arguments) > 1:
486 outF = arguments[1]
487 else:
488 outF = None
489 convert(inF, outF)
491 ##############################################################################
492 ##############################################################################
494 # TODO Accept additional XSLT sheets to create a transformation pipeline
496 # TODO Move from XSLT to Python implementation step by step by replacing
497 # XSLT-code by Python code through extensions and other means