Release 0.9.1: set version number to 0.9.1
[docutils/kirr.git] / sandbox / xml2rst / xml2rst.py
blob4c2289d807644f48bb583baa8c4fa73f5daa9cca
1 #! /usr/bin/env python
2 # -*- coding: iso-8859-1 -*-
4 # Based on sample.py,v 4.1.2.6 2006/04/14 13:59:26 cvs Exp
6 # Copyright (C) 2009 Stefan Merten
8 # xml2rst.py is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published
10 # by the Free Software Foundation; either version 2 of the License,
11 # or (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 # General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 # 02111-1307, USA.
23 """
24 Convert a docutils XML file to reStructuredText syntax.
28 perldoc xml2rst.py
30 for a man page.
31 """
33 """
34 =head1 NAME
36 xml2rst.py -- convert a docutils XML file to reStructuredText syntax
38 =head1 SYNOPSIS
40 B<xml2rst.py> [B<-v>] I<xml> [I<reST>]
42 B<xml2rst.py> B<--help>
44 =head1 DESCRIPTION
46 Converts a docutils XML input file to reStructuredText source.
48 This can be used to transform another format to reStructuredText given you have
49 a transformation to docutils XML.
51 =cut
52 """
54 ###############################################################################
55 ###############################################################################
56 # Import
58 import sys
59 import os.path
60 import re
62 from optparse import OptionParser, OptionGroup, OptionValueError, Option
64 from rst import rst_xslt
66 ###############################################################################
67 ###############################################################################
68 # Variables
70 """
71 @var options: Options given on the command line
72 @type options: optparse.Values
73 """
74 global options
76 ###############################################################################
77 ###############################################################################
78 # General functions
80 def pod2Head(pod):
81 """
82 @param pod: Snippet in POD format to be analyzed.
83 @type pod: str
85 @return: String of first `=headX' entry in POD snippet or empty string if
86 none found.
87 @rtype: str
88 """
89 for line in pod.split("\n"):
90 if line.startswith("=head"):
91 return line[len("=headX"):].strip()
92 return ""
94 ###############################################################################
96 def pod2Description(pod):
97 """
98 @param pod: Snippet in POD format to be analyzed.
99 @type pod: str
101 @return: Stripped text from all lines not being a POD line command.
102 @rtype: str
104 result = ""
105 for line in pod.split("\n"):
106 if not line.startswith("="):
107 result = result.strip() + " " + line.strip()
108 return result.strip()
110 ###############################################################################
112 def pod2OptionList(pod):
114 Return option names found in POD snippet. Option names are recognized in
115 `=item B<option>' constructs.
117 @param pod: Snippet in POD format to be analyzed.
118 @type pod: str
120 @return: All option names contained in POD snippet as a list.
121 @rtype: [ str, ..., ]
123 result = [ ]
124 for line in pod.split("\n"):
125 found = re.search("^=item\s*B<(-[^>]+)>", line)
126 if found:
127 result.append(found.group(1))
128 return result
130 ###############################################################################
132 def pod2OptionKeywords(pod):
134 Return a dict mapping `OptionParser.add_option' keywords to values found in
135 POD snippet.
137 @param pod: Snippet in POD format to be analyzed.
138 @type pod: str
140 @return: Mapping for all values found. Currently `help' and `dest' are
141 filled.
142 @rtype: { keyword: value, ..., }
144 result = { 'help': "", }
145 for line in pod.split("\n"):
146 if line.startswith("=cut"):
147 break
148 found = re.search("^=item\s*B<--?([^>]+)>(?:=|\s*)", line)
149 if found:
150 result['help'] = ""
151 optionName = found.group(1)
152 found = re.search("I<([^>]+)>", line)
153 if found:
154 result['dest'] = found.group(1)
155 elif len(optionName) > 1:
156 result['dest'] = optionName
157 else:
158 result['help'] += line + "\n"
159 result['help'] = result['help'].strip()
160 if result.has_key('dest'):
161 result['dest'] = result['dest'].replace("-", "_")
162 else:
163 errorExit(1, ( "Internal error: Missing `dest' in documentation string:",
164 pod, ))
165 return result
167 ###############################################################################
169 def pod2Argument(pod):
171 Return a list of two strings for `OptionGroup.__init__' describing the
172 argument found in POD snippet.
174 @param pod: Snippet in POD format to be analyzed.
175 @type pod: str
177 @return: Name of the argument and its description.
178 @rtype: [ argument, description, ]
180 argument = ""
181 description = ""
182 for line in pod.split("\n"):
183 if line.startswith("=cut"):
184 break
185 found = re.search("^=item\s*I<([^>]+)>", line)
186 if found:
187 description = ""
188 argument = found.group(1)
189 else:
190 description += line + "\n"
191 description = description.strip()
192 return [ argument, description, ]
194 ###############################################################################
196 def parseOptions():
198 Sets options and returns arguments.
200 @return: Name of input file and optionally of output file.
201 @rtype: ( str, [str,] )
203 global options
204 pod = """
206 =head1 OPTIONS
208 =cut
210 optionParser = OptionParser("usage: %prog [option]... <xml> [<rst>]")
212 pod = """
214 =head2 General options
216 =over 4
218 =cut
220 generalGroup = OptionGroup(optionParser, pod2Head(pod),
221 pod2Description(pod))
223 pod = """
225 =item B<-a> I<adornment>
227 =item B<--adornment>=I<adornment>
229 Configures title markup to use so different styles can be requested
230 easily.
232 The value of the parameter must be a string made up of a sequence of
233 character pairs. The first character of a pair is C<o> (overline) or
234 C<u> (underline) and the second character is the character to use for
235 the markup.
237 The first and the second character pair is used for document title and
238 subtitle, the following pairs are used for section titles where the
239 third pair is used for the top level section title.
241 Defaults to C<o=o-u=u-u~u:u.u`>.
243 =cut
245 generalGroup.add_option(default=None, *pod2OptionList(pod),
246 **pod2OptionKeywords(pod))
248 pod = """
250 =item B<-f> I<fold>
252 =item B<--fold>=I<fold>
254 Configures whether long text lines in paragraphs should be folded and
255 to which length. This option is for input not coming from reST which
256 may have no internal line feeds in plain text strings.
258 If folding is enabled text strings not in a line feed preserving
259 context are first white-space normalized and then broken according to
260 the folding rules. Folding rules put out the first word and continue
261 to do so with the following words unless the next word would cross
262 the folding boundary. Words are delimited by white-space.
264 Defaults to C<0>, i.e. no folding.
266 =cut
268 generalGroup.add_option(type="int", default=None,
269 *pod2OptionList(pod), **pod2OptionKeywords(pod))
271 pod = """
273 =item B<-v>
275 =item B<--verbose>
277 Operate verbose.
279 =cut
281 generalGroup.add_option(action="store_true",
282 *pod2OptionList(pod), **pod2OptionKeywords(pod))
283 optionParser.add_option_group(generalGroup)
285 pod = """
287 =back
289 =head2 Arguments
291 =over 4
293 =cut
295 argumentGroup = OptionGroup(optionParser, pod2Head(pod),
296 pod2Description(pod))
297 optionParser.add_option_group(argumentGroup)
299 pod = """
301 =item I<xml>
303 The XML input file containing docutils XML.
305 =cut
308 argument1Group = OptionGroup(optionParser, *pod2Argument(pod))
309 optionParser.add_option_group(argument1Group)
311 pod = """
313 =item I<rst>
315 The optional output file containing reStructuredText.
317 If not given output is put to C<STDOUT>.
319 =cut
321 argument2Group = OptionGroup(optionParser, *pod2Argument(pod))
322 optionParser.add_option_group(argument2Group)
324 pod = """
326 =back
328 =cut
330 ( options, args, ) = optionParser.parse_args()
332 if len(args) < 1:
333 optionParser.error("An input file is required")
334 if len(args) > 2:
335 optionParser.error("At most two arguments are allowed")
336 if (options.adornment is not None
337 and re.search('^([ou][]!"#$%&\'()*+,\-./:;<=>?@[\\^_`{|}~])+$',
338 options.adornment) is None):
339 optionParser.error("Invalid adornment string given")
341 return args
343 ###############################################################################
345 def errorOut(lines):
347 Outputs messages as error.
349 @param lines: Messages to be output as single lines.
350 @type lines: ( str, ..., )
352 @return: 0
353 @rtype: int
355 scriptName = os.path.basename(sys.argv[0])
356 for line in lines:
357 print >>sys.stderr, ("%s: %s" % ( scriptName, line, ))
358 return 0
360 ###############################################################################
362 def verboseOut(lines):
364 Outputs messages as a verbose message.
366 @param lines: Messages to be output as single lines.
367 @type lines: ( str, ..., )
369 @return: 0
370 @rtype: int
372 if options.verbose:
373 errorOut([ "## " + line
374 for line in lines ])
375 return 0
377 ###############################################################################
379 def errorExit(code, lines):
381 Exit program with an error message.
383 @param code: Exit Code to use.
384 @type code: int
386 @param lines: Strings to output as error message.
387 @type lines: ( str, ..., )
389 @return: Does not return.
391 errorOut(lines)
392 sys.exit(code)
394 ###############################################################################
395 ###############################################################################
396 # Specialized functions
398 ###############################################################################
399 ###############################################################################
400 # Classes
402 ########################################################################
403 ##############################################################################
404 # Now work
406 if __name__ == '__main__':
407 arguments = parseOptions()
408 inF = arguments[0]
409 if len(arguments) > 1:
410 outF = arguments[1]
411 else:
412 outF = None
413 try:
414 rst_xslt.convert(inF, outF, options)
415 except Exception, e:
416 errorExit(1, e)
418 ##############################################################################
419 ##############################################################################
421 # TODO Accept additional XSLT sheets to create a transformation pipeline
423 # TODO Move from XSLT to Python implementation step by step by replacing
424 # XSLT-code by Python code through extensions and other means
427 # TODO The docutils XML reader must be used