Fix #338: re.sub() flag argument at wrong position.
[docutils.git] / sandbox / docpy-writer / rst2docpy.py
bloba39cffd1d8476a36ab2c62fbe1b409848172e950
1 #!/usr/bin/python
3 # Convert the reStructuredText docs to LaTeX for use in Python docs
5 # This script is a hacked version taken from the Optik SVN repository.
7 import sys, os
8 import re
9 import rfc822
10 from distutils.dep_util import newer_group, newer
11 from docutils.core import Publisher
12 from docutils.readers.standalone import Reader as StandaloneReader
13 from docutils.transforms import Transform
14 from docutils.writers.latex2e import Writer as LaTeXWriter, LaTeXTranslator
15 from docutils import nodes
17 class OptikReader(StandaloneReader):
18 #default_transforms = (StandaloneReader.default_transforms +
19 # (ReplacementTransform,))
20 pass
22 # python 2.3
23 if not hasattr(__builtins__,"set"):
24 import sets
25 set = sets.Set
26 if not hasattr(__builtins__,"sorted"):
27 def sorted(list):
28 if hasattr(list,"sort"):
29 return list.sort()
30 # maybe it is sorted
31 return list
33 from markup import codemarkup
34 missing = set()
36 class PyLaTeXWriter(LaTeXWriter):
37 def __init__(self):
38 LaTeXWriter.__init__(self)
39 self.translator_class = PyLaTeXTranslator
41 class PyLaTeXTranslator(LaTeXTranslator):
42 remap_title = {
44 roman = (None,None,"ii","iii","iv","v")
46 refuri_override = {
47 "reference" : "reference-guide",
48 "callbacks" : "option-callbacks",
51 def __init__(self, document):
52 LaTeXTranslator.__init__(self, document)
53 self.label_prefix = ""
54 self.docinfo = {}
55 self.head_prefix = []
56 self.head = []
57 self.body_prefix = []
58 self.in_title = False
59 self.in_anydesc = False # _title is different if it is a funcdesc
60 self.admonition_stack = []
62 # Disable a bunch of methods from the base class.
63 empty_method = lambda self: None
64 for nodetype in ('field_argument',
65 'field_body',
66 'field_list',
67 'field_name'):
68 setattr(self, 'visit_' + nodetype, empty_method)
69 setattr(self, 'depart_' + nodetype, empty_method)
70 self.head_prefix = []
71 # definitions must be guarded if multiple modules are included
72 self.definitions = [
73 "\\ifx\\locallinewidth\\undefined\\newlength{\\locallinewidth}\\fi\n"
74 "\\setlength{\\locallinewidth}{\\linewidth}\n"
76 def astext(self):
77 return ''.join(self.definitions +
78 self.head_prefix +
79 self.head +
80 self.body_prefix +
81 self.body +
82 self.body_suffix)
84 def set_label_prefix(self, text):
85 self.label_prefix = text.replace(" ","-")
87 def generate_section_label(self, title):
88 title = title.lower()
89 title = re.sub(r'\([^\)]*\)', '', title)
90 title = re.sub(r'[^\w\s\-]', '', title)
91 title = re.sub(r'\b(the|an?|and|your|are)\b', '', title)
92 title = re.sub(r'(example \d+).*', r'\1', title)
93 return self.label_prefix + "-" + "-".join(title.split())
95 def visit_document(self, node):
96 pass
98 def depart_document(self, node):
99 pass
101 def visit_docinfo(self, node):
102 pass
104 def depart_docinfo(self, node):
105 # module and summary are mandatory
106 self.body.append(
107 "\\section{\\module{%(module)s} --- %(summary)s}\n"
108 % self.docinfo )
109 if self.docinfo.has_key("moduletype"):
110 self.body.append(
111 "\\declaremodule{%(moduletype)s}{%(module)s}\n"
112 % self.docinfo )
113 if self.docinfo.has_key("moduleauthor"):
114 self.body.append(
115 "\\moduleauthor{%(moduleauthor)s}{%(moduleauthoremail)s}\n"
116 % self.docinfo )
117 if self.docinfo.has_key("synopsis"):
118 self.body.append(
119 "\\modulesynopsis{%(synopsis)s}\n"
120 % self.docinfo )
121 if self.docinfo.has_key("release"):
122 self.body.append( "\\release{%(release)s}\n" % self.docinfo )
123 if self.docinfo.has_key("shortversion"):
124 self.body.append( "\\setshortversion{%(shortversion)s}\n"
125 % self.docinfo )
126 if self.docinfo.has_key("sectionauthor"):
127 self.body.append(
128 "\\sectionauthor{%(sectionauthor)s}{%(sectionauthoremail)s}\n"
129 % self.docinfo )
130 if self.docinfo.has_key("versionadded"):
131 self.body.append(
132 "\\versionadded{%(versionadded)s}\n"
133 % self.docinfo )
135 def visit_docinfo_item(self, node, name):
136 if name == "author":
137 (ename, email) = rfc822.parseaddr(node.astext())
138 self.docinfo["moduleauthor"] = ename
139 self.docinfo["moduleauthoremail"] = email
140 raise nodes.SkipNode
142 def depart_docinfo_item(self, node):
143 pass
145 def visit_field(self, node):
146 if isinstance(node.parent, nodes.docinfo):
147 name = node[0].astext().lower().replace(" ","")
148 if name == "moduleauthor":
149 (ename, email) = rfc822.parseaddr(node[1].astext())
150 self.docinfo["moduleauthor"] = ename
151 self.docinfo["moduleauthoremail"] = email
152 elif name in ("author", "sectionauthor") :
153 (ename, email) = rfc822.parseaddr(node[1].astext())
154 self.docinfo["sectionauthor"] = ename
155 self.docinfo["sectionauthoremail"] = email
156 else:
157 if name == "module":
158 self.set_label_prefix(node[1].astext())
159 self.docinfo[name] = node[1].astext()
160 raise nodes.SkipNode
162 _quoted_string_re = re.compile(r'\"[^\"]*\"')
163 _short_opt_string_re = re.compile(r'-[a-zA-Z]')
164 _long_opt_string_re = re.compile(r'--[a-zA-Z-]+')
165 _identifier_re = re.compile(r'[a-zA-Z_][a-zA-Z_0-9]*'
166 r'(\.[a-zA-Z_][a-zA-Z_0-9]*)*'
167 r'(\(\))?$')
169 def visit_literal(self, node):
170 assert isinstance(node[0], nodes.Text)
171 text = node[0].data
172 if self.in_title:
173 cmd = None
174 elif self._quoted_string_re.match(text):
175 cmd = 'code'
176 elif self._short_opt_string_re.match(text):
177 cmd = 'programopt'
178 elif self._long_opt_string_re.match(text):
179 cmd = 'longprogramopt'
180 text = text[2:]
181 elif self._identifier_re.match(text):
182 cmd = codemarkup.get(text)
183 if cmd is None:
184 ## print "warning: unrecognized code word %r" % text
185 missing.add(text)
186 cmd = 'code'
187 else:
188 cmd = 'code'
190 self.literal = 1
191 node[0].data = text
192 if cmd is not None:
193 self.body.append('\\%s{' % cmd)
195 # use definition lists for special environments
197 # definition_list
198 # defintion_list_item
199 # term
200 # classifier
201 # definition
202 # paragraph ?
203 def visit_definition_list(self, node):
204 pass
206 def depart_definition_list(self, node):
207 pass
209 def visit_definition_list_item(self, node):
210 self._dl_term = []
212 def depart_definition_list_item(self, node):
213 try:
214 self.body.append(self.context.pop())
215 except:
216 self.body.append("% WARN definition list without classifier\n")
219 def visit_term(self, node):
220 self._dl_term.append(node.astext())
221 raise nodes.SkipNode
223 def depart_term(self, node):
224 pass
226 def visit_classifier(self, node):
227 # TODO here it should be decided if it is latex or python
228 classifier = node.astext()
230 if classifier in ('datadesc', 'datadescni', 'excdesc', 'classdesc*',
231 'csimplemacrodesc', 'ctypedesc', 'memberdesc',
232 'memberdescni', 'cvardesc', 'excclassdesc',
233 'funcdesc', 'funcdescni', 'methoddesc',
234 'methoddescni', 'cmemberdesc', 'classdesc',
235 'cfuncdesc'):
236 pass
237 else:
238 classifier = 'datadescni'
239 self.body.append('\n\\begin{%s}' % classifier)
240 self.in_anydesc = classifier
241 self.body.append(self.anydesc_title(self._dl_term.pop()))
242 self.context.append('\\end{%s}\n' % classifier)
243 self.in_anydesc = None
244 raise nodes.SkipNode
246 def depart_classifier(self, node):
247 pass
249 def visit_definition(self, node):
250 if len(self._dl_term)>0:
251 # no classifier, fake it (maybe make a plain latex description).
252 classifier = 'datadescni'
253 self.body.append('\n\\begin{%s}' % classifier)
254 self.in_anydesc = classifier
255 self.body.append(self.anydesc_title(self._dl_term.pop()))
256 self.context.append('\\end{%s}\n' % classifier)
257 self.in_anydesc = None
259 def depart_definition(self, node):
260 pass
263 def depart_literal(self, node):
264 if not self.in_title:
265 self.body.append('}')
266 self.literal = 0
268 def visit_literal_block(self, node):
269 self.body.append("\\begin{verbatim}\n")
270 self.verbatim = 1
272 def depart_literal_block(self, node):
273 self.verbatim = 0
274 self.body.append("\n\\end{verbatim}\n")
276 def anydesc_title(self, title):
277 """Returns the title for xxxdesc environments."""
278 def markup_optional_parameters(s):
279 return s.replace('[','\\optional{').replace(']','}')
280 def with_params(s):
281 return markup_optional_parameters(
282 '{%s}' % s.replace('(','}{').replace(')',''))
283 def split_tag_or_typename(s, braces):
284 # "name", "tag name", "name(params)", "type name(params)"
285 param_pos = s.find("(")
286 blank_pos = s.find(" ")
287 if ((blank_pos>0 and param_pos<0)
288 or (blank_pos>0 and blank_pos<param_pos)):
289 (tag,rest) = s.split(None,1)
290 return (braces[0] + tag + braces[1], rest)
291 return ('', s)
292 def with_tag_or_typename(s, braces):
293 # "name", "tag name", "name(params)", "type name(params)"
294 (tag, rest) = split_tag_or_typename(s, braces)
295 return tag + with_params(rest)
297 if self.in_anydesc in ('datadesc','datadescni','excdesc','classdesc*',
298 'csimplemacrodesc'):
299 # \begin{xdesc}{name}
300 return '{%s}' % title
301 elif self.in_anydesc in ('ctypedesc','memberdesc','memberdescni',):
302 # \begin{ctypedesc} [tag]{name}
303 return with_tag_or_typename(title, '[]')
304 elif self.in_anydesc in ('classdesc', 'cvardesc','excclassdesc',
305 'funcdesc','funcdescni'):
306 # "funcname(arguments)" to "{funcname}{arguments}"
307 # "funcname([arguments])" to "{funcname}{\optional{arguments}}"
308 return with_params(title)
309 elif self.in_anydesc in ('methoddesc','methoddescni'):
310 # \begin{methoddesc} [type name]{name}{parameters}
311 return with_tag_or_typename(title, '[]')
312 elif self.in_anydesc in ('cfuncdesc',):
313 return with_tag_or_typename(title, '{}')
314 elif self.in_anydesc in ('cmemberdesc',):
315 # \begin{cmemberdesc} {container}{type}{name}
316 (tag, rest) = split_tag_or_typename(title, '{}')
317 return tag + with_tag_or_typename(rest, '{}')
318 # fallback
319 return "{%s}" % title
321 def visit_title(self, node):
322 title = node.astext()
323 if self.in_anydesc:
324 self.body.append(self.anydesc_title(title))
325 raise nodes.SkipNode
326 title = self.remap_title.get(title, title)
327 # TODO label_prefix might not be set yet.
328 label = self.generate_section_label(title)
329 section_name = self.d_class.section(self.section_level + 1)
330 self.body.append("\n\n\\%s{" % section_name)
331 self.context.append("\\label{%s}}\n" % label)
332 self.in_title = True
334 def depart_title(self, node):
335 self.in_title = False
336 self.body.append(self.context.pop())
338 def visit_target(self, node):
339 pass
341 def depart_target(self, node):
342 pass
344 def visit_admonition(self, node, name=''):
345 self.admonition_stack.append(name)
346 if name in ('note', 'warning'):
347 self.body.append('\\begin{notice}[%s]' % name)
348 else:
349 LaTeXTranslator.visit_admonition(self, node, name)
350 def depart_admonition(self, node=None):
351 name = self.admonition_stack.pop()
352 if name=="note":
353 self.body.append('\\end{notice}\n')
354 else:
355 LaTeXTranslator.depart_admonition(self, node)
357 def bookmark(self, node):
358 pass
360 def visit_reference(self, node):
361 if node.has_key('refuri'):
362 refuri = node['refuri']
363 basename = os.path.splitext(refuri)[0]
364 label = self.label_prefix + "-" + self.refuri_override.get(basename, basename)
365 print "got refuri=%r, label=%r" % (refuri, label)
366 elif node.has_key('refid'):
367 label = self.generate_section_label(node['refid'])
368 print "got refid=%r, label=%r" % (node['refid'], label)
369 else:
370 print "warning: unhandled reference: node=%r" % node
371 LaTeXTranslator.visit_reference(self, node)
373 self.body.append("section~\\ref{%s}, " % label)
374 raise nodes.SkipDeparture
376 _quoted_phrase_re = re.compile(r'"([^"]+)"')
377 _em_dash_re = re.compile(r'\s+\-\-\s+')
379 def visit_Text(self, node):
380 text = node.astext()
381 if self.in_title:
382 text = self.remap_title.get(text, text)
384 if not (self.literal or self.verbatim):
385 text = self._em_dash_re.sub(u"\u2014", text)
386 text = self._quoted_phrase_re.sub(u"\u201C\\1\u201D", text)
387 text = re.sub(r'\bdocument\b', "section", text)
388 text = self.encode(text)
390 # A couple of transformations are easiest if they go direct
391 # to LaTeX, so do them *after* encode().
392 text = text.replace("UNIX", "\\UNIX{}")
394 self.body.append(text)
396 def depart_Text(self, node):
397 pass
399 # table handling
400 # TODO move table handling into latex2e writer Table class.
401 def visit_table(self, node):
402 self.active_table.open()
403 def depart_table(self, node):
404 self.body.append('\\end{table%s}\n' %
405 (self.roman[len(self.active_table._col_specs)]) )
406 # TODO use roman to map name ? only i ... iv is supported
407 self.active_table.close()
408 def visit_thead(self, node):
409 self.body.append('\\begin{table%s}{l%s}{textrm}\n' %
410 (self.roman[len(self.active_table._col_specs)],
411 '|l'*(len(self.active_table._col_specs)-1)
413 self.active_table.set('preamble written',1)
414 def depart_thead(self, node):
415 pass
416 def visit_row(self, node):
417 if not isinstance(node.parent, nodes.thead):
418 self.body.append('\\line%s' %
419 (self.roman[len(self.active_table._col_specs)], )
421 def depart_row(self, node):
422 # CAUTION: latex2html stuffs content outside of {} into paragraphs
423 # before the table.
424 pass
425 def visit_entry(self, node):
426 if node.has_key('morerows') or node.has_key('morecols'):
427 raise NotImplementedError('Cells spanning rows or columns are not'
428 ' supported.')
429 # CAUTION: latex2html needs ``\lineii{`` the brace must follow
430 # immediately
431 self.body.append('{')
432 def depart_entry(self, node):
433 self.body.append('}\n')
436 def convert(infilename, outfilename):
437 print "converting %s to %s" % (infilename, outfilename)
438 pub = Publisher()
439 pub.set_components('standalone', # reader
440 'restructuredtext', # parser
441 'latex') # writer (arg, will be discarded)
442 pub.reader = OptikReader()
443 pub.writer = PyLaTeXWriter()
444 pub.process_programmatic_settings(None, None, None)
445 pub.set_source(source_path=infilename)
446 pub.set_destination(destination_path=outfilename)
447 pub.publish()
449 def main():
450 convert(sys.argv[1], sys.argv[2])
451 if missing:
452 mod = open("missing.py", "w")
453 mod.write("# possible markups:\n")
454 mod.write("# module, code, method, class, function, member, var. Are there more?\n")
455 mod.write("codemarkup = {\n")
456 keys = sorted(missing)
457 for name in keys:
458 mod.write(" '%s': 'code',\n" % name)
459 mod.write("}\n")
460 mod.close()
462 main()