Add <target> to one more testcase (see r8206).
[docutils.git] / sandbox / mmgilbe / rst.py
blobb2ad961c60ca10f140d1cbcd591f58165e7b46b3
1 # -*- coding: iso-8859-1 -*-
2 """
3 MoinMoin - ReStructured Text Parser
5 @copyright: 2004 by Matthew Gilbert <gilbert AT voxmea DOT net>
6 and by Alexander Schremmer <alex AT alexanderweb DOT de>
7 @license: GNU GPL, see COPYING for details.
9 REQUIRES docutils 0.3.3 or later
10 """
12 #############################################################################
13 ### ReStructured Text Parser
14 #############################################################################
16 import re
17 import new
18 import StringIO
19 import __builtin__
20 import sys
21 import copy
23 # docutils imports are below
24 import MoinMoin.parser.wiki
25 from MoinMoin.Page import Page
27 Dependencies = [] # this parser just depends on the raw text
29 # --- make docutils safe by overriding all module-scoped names related to IO ---
31 # TODO: Add an error message to dummyOpen so that the user knows what they did
32 # requested an unsupported feature of docutils in MoinMoin.
33 def dummyOpen(x, y=None, z=None): return
35 class dummyIO(StringIO.StringIO):
36 def __init__(self, destination=None, destination_path=None,
37 encoding=None, error_handler='', autoclose=1,
38 handle_io_errors=1, source_path=None):
39 StringIO.StringIO.__init__(self)
40 pass
42 class dummyUrllib2:
43 def urlopen(a):
44 return StringIO.StringIO()
45 urlopen = staticmethod(urlopen)
47 # # # All docutils imports must be contained below here
48 import docutils
49 from docutils.core import publish_parts
50 from docutils.writers import html4css1
51 from docutils.nodes import fully_normalize_name, reference
52 from docutils.parsers import rst
53 from docutils.parsers.rst import directives, roles
54 # # # All docutils imports must be contained above here
56 def safe_import(name, globals = None, locals = None, fromlist = None):
57 mod = __builtin__.__import__(name, globals, locals, fromlist)
58 if mod:
59 mod.open = dummyOpen
60 mod.urllib2 = dummyUrllib2
61 return mod
63 # Go through and change all docutils modules to use a dummyOpen and dummyUrllib2
64 # module. Also make sure that any docutils imported modules also get the dummy
65 # implementations.
66 for i in sys.modules.keys():
67 if i.startswith('docutils') and sys.modules[i]:
68 sys.modules[i].open = dummyOpen
69 sys.modules[i].urllib2 = dummyUrllib2
70 sys.modules[i].__import__ = safe_import
72 docutils.io.FileInput = dummyIO
73 docutils.io.FileOutput = dummyIO
75 # --- End of dummy-code --------------------------------------------------------
77 def html_escape_unicode(node):
78 # Find Python function that does this for me. string.encode('ascii',
79 # 'xmlcharrefreplace') only 2.3 and above.
80 for i in node:
81 if ord(i) > 127:
82 node = node.replace(i, '&#%d;' % (ord(i)))
83 return node
85 class MoinWriter(html4css1.Writer):
87 config_section = 'MoinMoin writer'
88 config_section_dependencies = ('writers',)
90 #"""Final translated form of `document`."""
91 output = None
93 def wiki_resolver(self, node):
94 """
95 Normally an unknown reference would be an error in an reST document.
96 However, this is how new documents are created in the wiki. This
97 passes on unknown references to eventually be handled by the
98 MoinMoin formatter.
99 """
100 # TODO: Need to better document the attributes here.
101 if getattr(node, 'indirect_reference_name', None):
102 node['refuri'] = node.indirect_reference_name
103 return 1
104 elif 'id' in node.attributes:
105 # I'm pretty sure the first test should catch any targets or
106 # references with the "id" attribute. Therefore, if we get to here
107 # its probably an internal link that didn't work so we let it go
108 # through as an error.
109 return 0
110 node['refuri'] = node['refname']
111 del node['refname']
112 self.nodes.append(node)
113 return 1
115 wiki_resolver.priority = 001
117 def __init__(self, formatter, request):
118 html4css1.Writer.__init__(self)
119 self.formatter = formatter
120 self.request = request
121 # Add our wiki unknown_reference_resolver to our list of functions to
122 # run when a target isn't found
123 self.unknown_reference_resolvers = [self.wiki_resolver]
124 # We create a new parser to process MoinMoin wiki style links in the
125 # reST.
126 self.wikiparser = MoinMoin.parser.wiki.Parser('', self.request)
127 self.wikiparser.formatter = self.formatter
128 self.wikiparser.hilite_re = None
129 self.nodes = []
132 def translate(self):
133 visitor = MoinTranslator(self.document,
134 self.formatter,
135 self.request,
136 self.wikiparser,
137 self)
138 self.document.walkabout(visitor)
139 self.visitor = visitor
140 self.output = html_escape_unicode(visitor.astext())
143 class Parser:
145 # allow caching - This should be turned off when testing.
146 caching = 1
148 def __init__(self, raw, request, **kw):
149 self.raw = raw
150 self.request = request
151 self.form = request.form
153 def format(self, formatter):
154 # Create our simple parser
155 parser = MoinDirectives(self.request)
157 parts = publish_parts(source = self.raw,
158 writer = MoinWriter(formatter, self.request))
160 text = ''
161 if parts['title']:
162 text += '<h2>' + parts['title'] + '</h2>'
163 # If there is only one subtitle then it is held in parts['subtitle'].
164 # However, if there is more than one subtitle then this is empty and
165 # fragment contains all of the subtitles.
166 if parts['subtitle']:
167 text += '<h3>' + parts['subtitle'] + '</h3>'
168 if parts['docinfo']:
169 text += parts['docinfo']
170 text += parts['fragment']
171 self.request.write(html_escape_unicode(text))
174 class MoinTranslator(html4css1.HTMLTranslator):
176 def __init__(self, document, formatter, request, parser, writer):
177 html4css1.HTMLTranslator.__init__(self, document)
178 self.formatter = formatter
179 self.request = request
180 # MMG: Using our own writer when needed. Save the old one to restore
181 # after the page has been processed by the html4css1 parser.
182 self.original_write, self.request.write = self.request.write, self.capture_wiki_formatting
183 self.wikiparser = parser
184 self.wikiparser.request = request
185 # MoinMoin likes to start the initial headers at level 3 and the title
186 # gets level 2, so to comply with their styles, we do here also.
187 # TODO: Could this be fixed by passing this value in settings_overrides?
188 self.initial_header_level = 3
189 # Temporary place for wiki returned markup. This will be filled when
190 # replacing the default writer with the capture_wiki_formatting
191 # function (see visit_image for an example).
192 self.wiki_text = ''
193 self.setup_wiki_handlers()
195 def capture_wiki_formatting(self, text):
197 Captures MoinMoin generated markup to the instance variable
198 wiki_text.
200 # For some reason getting empty strings here which of course overwrites
201 # what we really want (this is called multiple times per MoinMoin
202 # format call, which I don't understand).
203 self.wiki_text += text
205 def process_wiki_text(self, text):
207 This sequence is repeated numerous times, so its captured as a
208 single call here. Its important that wiki_text is blanked before we
209 make the format call. format will call request.write which we've
210 hooked to capture_wiki_formatting. If wiki_text is not blanked
211 before a call to request.write we will get the old markup as well as
212 the newly generated markup.
214 TODO: Could implement this as a list so that it acts as a stack. I
215 don't like having to remember to blank wiki_text.
217 self.wiki_text = ''
218 self.wikiparser.raw = text
219 self.wikiparser.format(self.formatter)
221 def add_wiki_markup(self):
223 Place holder in case this becomes more elaborate someday. For now it
224 only appends the MoinMoin generated markup to the html body and
225 raises SkipNode.
227 self.body.append(self.wiki_text)
228 self.wiki_text = ''
229 raise docutils.nodes.SkipNode
231 def astext(self):
232 self.request.write = self.original_write
233 return html4css1.HTMLTranslator.astext(self)
235 def process_inline(self, node, uri_string):
237 Process the "inline:" link scheme. This can either ome from
238 visit_reference or from visit_image. The uri_string changes
239 depending on the caller. The uri is passed to MoinMoin to handle the
240 inline link. If it is an image, the src line is extracted and passed
241 to the html4css1 writer to allow the reST image attributes.
242 Otherwise, the html from MoinMoin is inserted into the reST document
243 and SkipNode is raised.
245 self.process_wiki_text(node[uri_string])
246 # Only pass the src and alt parts to the writer. The reST writer
247 # inserts its own tags so we don't need the MoinMoin html markup.
248 src = re.search('src="([^"]+)"', self.wiki_text)
249 if src:
250 node['uri'] = src.groups()[0]
251 if not 'alt' in node.attributes:
252 alt = re.search('alt="([^"]*)"', self.wiki_text)
253 if alt:
254 node['alt'] = alt.groups()[0]
255 else:
256 # Image doesn't exist yet for the page so just use what's
257 # returned from MoinMoin verbatim
258 self.add_wiki_markup()
260 def process_wiki_target(self, target):
261 self.process_wiki_text(target)
262 # MMG: May need a call to fixup_wiki_formatting here but I
263 # don't think so.
264 self.add_wiki_markup()
266 def fixup_wiki_formatting(self, text):
267 replacement = {'<p>': '', '</p>': '', '\n': '', '> ': '>'}
268 for src, dst in replacement.items():
269 text = text.replace(src, dst)
270 # Everything seems to have a space ending the text block. We want to
271 # get rid of this
272 if text and text[-1] == ' ':
273 text = text[:-1]
274 return text
276 def visit_reference(self, node):
278 Pass links to MoinMoin to get the correct wiki space url. Extract
279 the url and pass it on to the html4css1 writer to handle. Inline
280 images are also handled by visit_image. Not sure what the "drawing:"
281 link scheme is used for, so for now it is handled here.
283 Also included here is a hack to allow MoinMoin macros. This routine
284 checks for a link which starts with "[[". This link is passed to the
285 MoinMoin formatter and the resulting markup is inserted into the
286 document in the place of the original link reference.
288 moin_link_schemes = ['wiki:', 'attachment:', 'drawing:', '[[',
289 'inline:']
291 if 'refuri' in node.attributes:
292 target = None
293 refuri = node['refuri']
295 # MMG: Fix this line
296 if [scheme for scheme in moin_link_schemes if
297 refuri.lstrip().startswith(scheme)]:
298 # For a macro, We want the actuall text from the user in target,
299 # not the fully normalized version that is contained in refuri.
300 if refuri.startswith('[['):
301 target = node['name']
302 else:
303 target = refuri
304 # TODO: Figure out the following two elif's and comment
305 # appropriately.
306 # The node should have a whitespace normalized name if the docutlis
307 # reStructuredText parser would normally fully normalize the name.
308 elif ('name' in node.attributes and
309 fully_normalize_name(node['name']) == refuri):
310 target = ':%s:' % (node['name'])
311 # If its not a uri containing a ':' then its probably destined for
312 # wiki space.
313 elif ':' not in refuri:
314 target = ':%s:' % (refuri)
316 if target:
317 if target.startswith('inline:'):
318 self.process_inline(node, 'refuri')
319 elif target.startswith('[[') and target.endswith(']]'):
320 self.process_wiki_target(target)
321 else:
322 # Not a macro or inline so hopefully its a link. Put the target in
323 # brackets so that MoinMoin knows its a link. Extract the
324 # href, if it exists, and let docutils handle it from there.
325 # If there is no href just add whatever MoinMoin returned.
326 node_text = node.astext().replace('\n', ' ')
327 self.process_wiki_text('[%s %s]' % (target, node_text))
328 href = re.search('href="([^"]+)"', self.wiki_text)
329 if href:
330 # dirty hack in order to undo the HTML entity quoting
331 node['refuri'] = href.groups()[0].replace("&amp;", "&")
332 else:
333 self.wiki_text = self.fixup_wiki_formatting(self.wiki_text)
334 self.add_wiki_markup()
335 html4css1.HTMLTranslator.visit_reference(self, node)
337 def visit_image(self, node):
338 """
339 Need to intervene in the case of inline images. We need MoinMoin to
340 give us the actual src line to the image and then we can feed this
341 to the default html4css1 writer. NOTE: Since the writer can't "open"
342 this image the scale attribute doesn't work without directly
343 specifying the height or width (or both).
345 TODO: Need to handle figures similarly.
347 uri = node['uri'].lstrip()
348 prefix = '' # assume no prefix
349 if ':' in uri:
350 prefix = uri.split(':',1)[0]
351 # if prefix isn't URL, try to display in page
352 if not prefix.lower() in ('file', 'http', 'https', 'ftp'):
353 # no prefix given, so fake "inline:"
354 if not prefix:
355 node['uri'] = 'inline:' + uri
356 self.process_inline(node, 'uri')
357 html4css1.HTMLTranslator.visit_image(self, node)
359 def create_wiki_functor(self, moin_func):
360 moin_callable = getattr(self.formatter, moin_func)
361 def visit_func(self, node):
362 self.wiki_text = ''
363 self.request.write(moin_callable(1))
364 self.body.append(self.wiki_text)
365 def depart_func(self, node):
366 self.wiki_text = ''
367 self.request.write(moin_callable(0))
368 self.body.append(self.wiki_text)
369 return visit_func, depart_func
371 def setup_wiki_handlers(self):
373 Have the MoinMoin formatter handle markup when it makes sense. These
374 are portions of the document that do not contain reST specific
375 markup. This allows these portions of the document to look
376 consistent with other wiki pages.
378 Setup dispatch routines to handle basic document markup. The
379 hanlders dict is the html4css1 handler name followed by the wiki
380 handler name.
382 handlers = {
383 # Text Markup
384 'emphasis': 'emphasis',
385 'strong': 'strong',
386 'literal': 'code',
387 # Blocks
388 'literal_block': 'preformatted',
389 # Simple Lists
390 'bullet_list': 'bullet_list',
391 'list_item': 'listitem',
392 # Definition List
393 'definition_list': 'definition_list',
394 # Admonitions
395 'warning': 'highlight'}
396 for rest_func, moin_func in handlers.items():
397 visit_func, depart_func = self.create_wiki_functor(moin_func)
398 visit_func = new.instancemethod(visit_func, self, MoinTranslator)
399 depart_func = new.instancemethod(depart_func, self, MoinTranslator)
400 setattr(self, 'visit_%s' % (rest_func), visit_func)
401 setattr(self, 'depart_%s' % (rest_func), depart_func)
403 # Enumerated list takes an extra paramter so we handle this differently
404 def visit_enumerated_list(self, node):
405 self.wiki_text = ''
406 self.request.write(self.formatter.number_list(1, start=node.get('start', None)))
407 self.body.append(self.wiki_text)
409 def depart_enumerated_list(self, node):
410 self.wiki_text = ''
411 self.request.write(self.formatter.number_list(0))
412 self.body.append(self.wiki_text)
415 class MoinDirectives:
417 Class to handle all custom directive handling. This code is called as
418 part of the parsing stage.
421 def __init__(self, request):
422 self.request = request
424 # include MoinMoin pages
425 directives.register_directive('include', self.include)
427 # used for MoinMoin macros
428 directives.register_directive('macro', self.macro)
430 # disallow a few directives in order to prevent XSS
431 # disallowed include because it suffers from these bugs:
432 # * recursive includes are possible
434 # for directive in ('meta', 'include', 'raw'):
435 for directive in ('meta', 'raw'):
436 directives.register_directive(directive, None)
438 # disable the raw role
439 roles._roles['raw'] = None
441 # As a quick fix to handle recursive includes we limit the times a
442 # document can be included to one.
443 self.included_documents = []
445 # Handle the include directive rather than letting the default docutils
446 # parser handle it. This allows the inclusion of MoinMoin pages instead of
447 # something from the filesystem.
448 def include(self, name, arguments, options, content, lineno,
449 content_offset, block_text, state, state_machine):
450 # content contains the included file name
452 _ = self.request.getText
454 if len(content):
455 if content[0] in self.included_documents:
456 lines = [_("**Duplicate included files are not permitted**")]
457 state_machine.insert_input(lines, 'MoinDirectives')
458 return
459 self.included_documents.append(content[0])
460 page = Page(page_name = content[0], request = self.request)
461 if page.exists():
462 text = page.get_raw_body()
463 lines = text.split('\n')
464 # Remove the "#format rst" line
465 if lines[0].startswith("#format"):
466 del lines[0]
467 else:
468 lines = [_("**Could not find the referenced page: %s**") % (content[0],)]
469 # Insert the text from the included document and then continue
470 # parsing
471 state_machine.insert_input(lines, 'MoinDirectives')
472 return
474 include.content = True
476 # Add additional macro directive.
477 # This allows MoinMoin macros to be used either by using the directive
478 # directly or by using the substitution syntax. Much cleaner than using the
479 # reference hack (`[[SomeMacro]]`_). This however simply adds a node to the
480 # document tree which is a reference, but through a much better user
481 # interface.
482 def macro(self, name, arguments, options, content, lineno,
483 content_offset, block_text, state, state_machine):
484 # content contains macro to be called
485 if len(content):
486 # Allow either with or without brackets
487 if content[0].startswith('[['):
488 macro = content[0]
489 else:
490 macro = '[[%s]]' % content[0]
491 ref = reference(macro, refuri = macro)
492 ref['name'] = macro
493 return [ref]
494 return
496 macro.content = True