2 # -*- coding: iso-8859-1 -*-
4 # Based on sample.py,v 4.1.2.6 2006/04/14 13:59:26 cvs Exp
6 # Copyright (C) 2009 Stefan Merten
8 # xml2rst.py is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published
10 # by the Free Software Foundation; either version 2 of the License,
11 # or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
24 Convert a docutils XML file to reStructuredText syntax.
36 xml2rst.py -- convert a docutils XML file to reStructuredText syntax
40 B<xml2rst.py> [B<-v>] I<xml> [I<reST>]
42 B<xml2rst.py> B<--help>
46 Converts a docutils XML input file to reStructuredText source.
48 This can be used to transform another format to reStructuredText given you have
49 a transformation to docutils XML.
54 ###############################################################################
55 ###############################################################################
62 from optparse
import OptionParser
, OptionGroup
, OptionValueError
, Option
66 from lxml
import etree
68 errorExit(2, ( "Python package 'lxml' is not available",
69 "You may try to use 'xml2rst.xsl' with a standalone XSLT processor like 'xalan' or 'xsltproc'", ))
71 ###############################################################################
72 ###############################################################################
76 @var MainXsltNm: Name of the main XSLT source file
79 MainXsltNm
= "xml2rst.xsl"
82 @var ScriptNm: Name of the script
85 ScriptNm
= sys
.argv
[0]
87 ###############################################################################
88 ###############################################################################
92 @var options: Options given on the command line
93 @type options: optparse.Values
97 ###############################################################################
98 ###############################################################################
103 @param pod: Snippet in POD format to be analyzed.
106 @return: String of first `=headX' entry in POD snippet or empty string if
110 for line
in pod
.split("\n"):
111 if line
.startswith("=head"):
112 return line
[len("=headX"):].strip()
115 ###############################################################################
117 def pod2Description(pod
):
119 @param pod: Snippet in POD format to be analyzed.
122 @return: Stripped text from all lines not being a POD line command.
126 for line
in pod
.split("\n"):
127 if not line
.startswith("="):
128 result
= result
.strip() + " " + line
.strip()
129 return result
.strip()
131 ###############################################################################
133 def pod2OptionList(pod
):
135 Return option names found in POD snippet. Option names are recognized in
136 `=item B<option>' constructs.
138 @param pod: Snippet in POD format to be analyzed.
141 @return: All option names contained in POD snippet as a list.
142 @rtype: [ str, ..., ]
145 for line
in pod
.split("\n"):
146 found
= re
.search("^=item\s*B<(-[^>]+)>", line
)
148 result
.append(found
.group(1))
151 ###############################################################################
153 def pod2OptionKeywords(pod
):
155 Return a dict mapping `OptionParser.add_option' keywords to values found in
158 @param pod: Snippet in POD format to be analyzed.
161 @return: Mapping for all values found. Currently `help' and `dest' are
163 @rtype: { keyword: value, ..., }
165 result
= { 'help': "", }
166 for line
in pod
.split("\n"):
167 if line
.startswith("=cut"):
169 found
= re
.search("^=item\s*B<--?([^>]+)>(?:=|\s*)", line
)
172 optionName
= found
.group(1)
173 found
= re
.search("I<([^>]+)>", line
)
175 result
['dest'] = found
.group(1)
176 elif len(optionName
) > 1:
177 result
['dest'] = optionName
179 result
['help'] += line
+ "\n"
180 result
['help'] = result
['help'].strip()
181 if result
.has_key('dest'):
182 result
['dest'] = result
['dest'].replace("-", "_")
184 errorExit(1, ( "Internal error: Missing `dest' in documentation string:",
188 ###############################################################################
190 def pod2Argument(pod
):
192 Return a list of two strings for `OptionGroup.__init__' describing the
193 argument found in POD snippet.
195 @param pod: Snippet in POD format to be analyzed.
198 @return: Name of the argument and its description.
199 @rtype: [ argument, description, ]
203 for line
in pod
.split("\n"):
204 if line
.startswith("=cut"):
206 found
= re
.search("^=item\s*I<([^>]+)>", line
)
209 argument
= found
.group(1)
211 description
+= line
+ "\n"
212 description
= description
.strip()
213 return [ argument
, description
, ]
215 ###############################################################################
219 Sets options and returns arguments.
221 @return: Name of input file and optionally of output file.
222 @rtype: ( str, [str,] )
231 optionParser
= OptionParser("usage: %prog [option]... <xml> [<rst>]")
235 =head2 General options
241 generalGroup
= OptionGroup(optionParser
, pod2Head(pod
),
242 pod2Description(pod
))
246 =item B<-a> I<adornment>
248 =item B<--adornment>=I<adornment>
250 Configures title markup to use so different styles can be requested
253 The value of the parameter must be a string made up of a sequence of
254 character pairs. The first character of a pair is C<o> (overline) or
255 C<u> (underline) and the second character is the character to use for
258 The first and the second character pair is used for document title and
259 subtitle, the following pairs are used for section titles where the
260 third pair is used for the top level section title.
262 Defaults to C<o=o-u=u-u~u:u.u`>.
266 generalGroup
.add_option(default
=None, *pod2OptionList(pod
),
267 **pod2OptionKeywords(pod
))
273 =item B<--fold>=I<fold>
275 Configures whether long text lines in paragraphs should be folded and
276 to which length. This option is for input not coming from reST which
277 may have no internal line feeds in plain text strings.
279 If folding is enabled text strings not in a line feed preserving
280 context are first white-space normalized and then broken according to
281 the folding rules. Folding rules put out the first word and continue
282 to do so with the following words unless the next word would cross
283 the folding boundary. Words are delimited by white-space.
285 Defaults to C<0>, i.e. no folding.
289 generalGroup
.add_option(type="int", default
=None,
290 *pod2OptionList(pod
), **pod2OptionKeywords(pod
))
302 generalGroup
.add_option(action
="store_true",
303 *pod2OptionList(pod
), **pod2OptionKeywords(pod
))
304 optionParser
.add_option_group(generalGroup
)
316 argumentGroup
= OptionGroup(optionParser
, pod2Head(pod
),
317 pod2Description(pod
))
318 optionParser
.add_option_group(argumentGroup
)
324 The XML input file containing docutils XML.
329 argument1Group
= OptionGroup(optionParser
, *pod2Argument(pod
))
330 optionParser
.add_option_group(argument1Group
)
336 The optional output file containing reStructuredText.
338 If not given output is put to C<STDOUT>.
342 argument2Group
= OptionGroup(optionParser
, *pod2Argument(pod
))
343 optionParser
.add_option_group(argument2Group
)
351 ( options
, args
, ) = optionParser
.parse_args()
354 optionParser
.error("An input file is required")
356 optionParser
.error("At most two arguments are allowed")
357 if (options
.adornment
is not None
358 and re
.search('^([ou][]!"#$%&\'()*+,\-./:;<=>?@[\\^_`{|}~])+$',
359 options
.adornment
) is None):
360 optionParser
.error("Invalid adornment string given")
364 ###############################################################################
368 Outputs messages as error.
370 @param lines: Messages to be output as single lines.
371 @type lines: ( str, ..., )
376 scriptName
= os
.path
.basename(sys
.argv
[0])
378 print >>sys
.stderr
, ("%s: %s" % ( scriptName
, line
, ))
381 ###############################################################################
383 def verboseOut(lines
):
385 Outputs messages as a verbose message.
387 @param lines: Messages to be output as single lines.
388 @type lines: ( str, ..., )
394 errorOut([ "## " + line
398 ###############################################################################
400 def errorExit(code
, lines
):
402 Exit program with an error message.
404 @param code: Exit Code to use.
407 @param lines: Strings to output as error message.
408 @type lines: ( str, ..., )
410 @return: Does not return.
415 ###############################################################################
416 ###############################################################################
417 # Specialized functions
419 def convert(inNm
, outNm
):
423 @param inNm: Filename of input file.
426 @param outNm: Filename of output file or None.
427 @type outNm: str | None
432 errorExit(1, ( "Can't open input file %r" % ( inNm
, ), ))
434 scriptP
= os
.path
.dirname(os
.path
.realpath(ScriptNm
))
435 mainXsltNm
= os
.path
.join(scriptP
, MainXsltNm
)
437 mainXsltF
= open(mainXsltNm
)
439 errorExit(1, ( "Can't open main XSLT file %r" % ( mainXsltNm
, ), ))
441 xsltParser
= etree
.XMLParser()
442 mainXsltDoc
= etree
.parse(mainXsltF
, xsltParser
)
444 mainXslt
= etree
.XSLT(mainXsltDoc
)
446 inParser
= etree
.XMLParser()
448 inDoc
= etree
.parse(inF
, inParser
)
450 errorExit(1, ( "Error parsing input file %r: %s" % ( inNm
, e
, ), ))
454 if options
.fold
is not None:
455 xsltParams
['fold'] = str(options
.fold
)
456 if options
.adornment
is not None:
457 xsltParams
['adornment'] = "'" + options
.adornment
+ "'"
459 result
= mainXslt(inDoc
, **xsltParams
)
461 errorExit(1, ( "Error transforming input file %r: %s" % ( inNm
, e
, ), ))
462 # Chop off trailing linefeed - added somehow
463 outS
= str(result
)[:-1]
466 outF
= open(outNm
, "w")
468 errorExit(1, ( "Can't open output file %r" % ( outNm
, ), ))
474 ###############################################################################
475 ###############################################################################
478 ########################################################################
479 ##############################################################################
482 if __name__
== '__main__':
483 arguments
= parseOptions()
485 if len(arguments
) > 1:
491 ##############################################################################
492 ##############################################################################
494 # TODO Accept additional XSLT sheets to create a transformation pipeline
496 # TODO Move from XSLT to Python implementation step by step by replacing
497 # XSLT-code by Python code through extensions and other means