sandbox/mmgilbe/rst.py

   1 # -*- coding: iso-8859-1 -*-
   2 """
   3     MoinMoin - ReStructured Text Parser
   4
   5     @copyright: 2004 by Matthew Gilbert <gilbert AT voxmea DOT net>
   6         and by Alexander Schremmer <alex AT alexanderweb DOT de>
   7     @license: GNU GPL, see COPYING for details.
   8
   9     REQUIRES docutils 0.3.3 or later
  10 """
  11
  12 #############################################################################
  13 ### ReStructured Text Parser
  14 #############################################################################
  15
  16 import re
  17 import new
  18 import StringIO
  19 import __builtin__
  20 import sys
  21 import copy
  22
  23 # docutils imports are below
  24 import MoinMoin.parser.wiki
  25 from MoinMoin.Page import Page
  26
  27 Dependencies = [] # this parser just depends on the raw text
  28
  29 # --- make docutils safe by overriding all module-scoped names related to IO ---
  30
  31 # TODO: Add an error message to dummyOpen so that the user knows what they did
  32 # requested an unsupported feature of docutils in MoinMoin.
  33 def dummyOpen(x, y=None, z=None): return
  34
  35 class dummyIO(StringIO.StringIO):
  36     def __init__(self, destination=None, destination_path=None,
  37                  encoding=None, error_handler='', autoclose=1,
  38                  handle_io_errors=1, source_path=None):
  39         StringIO.StringIO.__init__(self)
  40         pass
  41
  42 class dummyUrllib2:
  43     def urlopen(a):
  44         return StringIO.StringIO()
  45     urlopen = staticmethod(urlopen)
  46
  47 # # # All docutils imports must be contained below here
  48 import docutils
  49 from docutils.core import publish_parts
  50 from docutils.writers import html4css1
  51 from docutils.nodes import fully_normalize_name, reference
  52 from docutils.parsers import rst
  53 from docutils.parsers.rst import directives, roles
  54 # # # All docutils imports must be contained above here
  55
  56 def safe_import(name, globals = None, locals = None, fromlist = None):
  57     mod = __builtin__.__import__(name, globals, locals, fromlist)
  58     if mod:
  59         mod.open = dummyOpen
  60         mod.urllib2 = dummyUrllib2
  61     return mod
  62
  63 # Go through and change all docutils modules to use a dummyOpen and dummyUrllib2
  64 # module. Also make sure that any docutils imported modules also get the dummy
  65 # implementations.
  66 for i in sys.modules.keys():
  67     if i.startswith('docutils') and sys.modules[i]:
  68         sys.modules[i].open = dummyOpen
  69         sys.modules[i].urllib2 = dummyUrllib2
  70         sys.modules[i].__import__ = safe_import
  71
  72 docutils.io.FileInput = dummyIO
  73 docutils.io.FileOutput = dummyIO
  74
  75 # --- End of dummy-code --------------------------------------------------------
  76
  77 def html_escape_unicode(node):
  78     # Find Python function that does this for me. string.encode('ascii',
  79     # 'xmlcharrefreplace') only 2.3 and above.
  80     for i in node:
  81         if ord(i) > 127:
  82             node = node.replace(i, '&#%d;' % (ord(i)))
  83     return node
  84
  85 class MoinWriter(html4css1.Writer):
  86
  87     config_section = 'MoinMoin writer'
  88     config_section_dependencies = ('writers',)
  89
  90     #"""Final translated form of `document`."""
  91     output = None
  92
  93     def wiki_resolver(self, node):
  94         """
  95             Normally an unknown reference would be an error in an reST document.
  96             However, this is how new documents are created in the wiki. This
  97             passes on unknown references to eventually be handled by the
  98             MoinMoin formatter.
  99         """
 100         # TODO: Need to better document the attributes here.
 101         if getattr(node, 'indirect_reference_name', None):
 102             node['refuri'] = node.indirect_reference_name
 103             return 1
 104         elif 'id' in node.attributes:
 105             # I'm pretty sure the first test should catch any targets or
 106             # references with the "id" attribute. Therefore, if we get to here
 107             # its probably an internal link that didn't work so we let it go
 108             # through as an error.
 109             return 0
 110         node['refuri'] = node['refname']
 111         del node['refname']
 112         self.nodes.append(node)
 113         return 1
 114
 115     wiki_resolver.priority = 001
 116
 117     def __init__(self, formatter, request):
 118         html4css1.Writer.__init__(self)
 119         self.formatter = formatter
 120         self.request = request
 121         # Add our wiki unknown_reference_resolver to our list of functions to
 122         # run when a target isn't found
 123         self.unknown_reference_resolvers = [self.wiki_resolver]
 124         # We create a new parser to process MoinMoin wiki style links in the
 125         # reST.
 126         self.wikiparser = MoinMoin.parser.wiki.Parser('', self.request)
 127         self.wikiparser.formatter = self.formatter
 128         self.wikiparser.hilite_re = None
 129         self.nodes = []
 130
 131
 132     def translate(self):
 133         visitor = MoinTranslator(self.document,
 134                                  self.formatter,
 135                                  self.request,
 136                                  self.wikiparser,
 137                                  self)
 138         self.document.walkabout(visitor)
 139         self.visitor = visitor
 140         self.output = html_escape_unicode(visitor.astext())
 141
 142
 143 class Parser:
 144
 145     # allow caching - This should be turned off when testing.
 146     caching = 1
 147
 148     def __init__(self, raw, request, **kw):
 149         self.raw = raw
 150         self.request = request
 151         self.form = request.form
 152
 153     def format(self, formatter):
 154         # Create our simple parser
 155         parser = MoinDirectives(self.request)
 156
 157         parts =  publish_parts(source = self.raw,
 158                                writer = MoinWriter(formatter, self.request))
 159
 160         text = ''
 161         if parts['title']:
 162             text += '<h2>' + parts['title'] + '</h2>'
 163         # If there is only one subtitle then it is held in parts['subtitle'].
 164         # However, if there is more than one subtitle then this is empty and
 165         # fragment contains all of the subtitles.
 166         if parts['subtitle']:
 167             text += '<h3>' + parts['subtitle'] + '</h3>'
 168         if parts['docinfo']:
 169             text += parts['docinfo']
 170         text += parts['fragment']
 171         self.request.write(html_escape_unicode(text))
 172
 173
 174 class MoinTranslator(html4css1.HTMLTranslator):
 175
 176     def __init__(self, document, formatter, request, parser, writer):
 177         html4css1.HTMLTranslator.__init__(self, document)
 178         self.formatter = formatter
 179         self.request = request
 180         # MMG: Using our own writer when needed. Save the old one to restore
 181         # after the page has been processed by the html4css1 parser.
 182         self.original_write, self.request.write = self.request.write, self.capture_wiki_formatting
 183         self.wikiparser = parser
 184         self.wikiparser.request = request
 185         # MoinMoin likes to start the initial headers at level 3 and the title
 186         # gets level 2, so to comply with their styles, we do here also.
 187         # TODO: Could this be fixed by passing this value in settings_overrides?
 188         self.initial_header_level = 3
 189         # Temporary place for wiki returned markup. This will be filled when
 190         # replacing the default writer with the capture_wiki_formatting
 191         # function (see visit_image for an example).
 192         self.wiki_text = ''
 193         self.setup_wiki_handlers()
 194
 195     def capture_wiki_formatting(self, text):
 196         """
 197             Captures MoinMoin generated markup to the instance variable
 198             wiki_text.
 199         """
 200         # For some reason getting empty strings here which of course overwrites
 201         # what we really want (this is called multiple times per MoinMoin
 202         # format call, which I don't understand).
 203         self.wiki_text += text
 204
 205     def process_wiki_text(self, text):
 206         """
 207             This sequence is repeated numerous times, so its captured as a
 208             single call here. Its important that wiki_text is blanked before we
 209             make the format call. format will call request.write which we've
 210             hooked to capture_wiki_formatting. If wiki_text is not blanked
 211             before a call to request.write we will get the old markup as well as
 212             the newly generated markup.
 213
 214             TODO: Could implement this as a list so that it acts as a stack. I
 215             don't like having to remember to blank wiki_text.
 216         """
 217         self.wiki_text = ''
 218         self.wikiparser.raw = text
 219         self.wikiparser.format(self.formatter)
 220
 221     def add_wiki_markup(self):
 222         """
 223             Place holder in case this becomes more elaborate someday. For now it
 224             only appends the MoinMoin generated markup to the html body and
 225             raises SkipNode.
 226         """
 227         self.body.append(self.wiki_text)
 228         self.wiki_text = ''
 229         raise docutils.nodes.SkipNode
 230
 231     def astext(self):
 232         self.request.write = self.original_write
 233         return html4css1.HTMLTranslator.astext(self)
 234
 235     def process_inline(self, node, uri_string):
 236         """
 237             Process the "inline:" link scheme. This can either ome from
 238             visit_reference or from visit_image. The uri_string changes
 239             depending on the caller. The uri is passed to MoinMoin to handle the
 240             inline link. If it is an image, the src line is extracted and passed
 241             to the html4css1 writer to allow the reST image attributes.
 242             Otherwise, the html from MoinMoin is inserted into the reST document
 243             and SkipNode is raised.
 244         """
 245         self.process_wiki_text(node[uri_string])
 246         # Only pass the src and alt parts to the writer. The reST writer
 247         # inserts its own tags so we don't need the MoinMoin html markup.
 248         src = re.search('src="([^"]+)"', self.wiki_text)
 249         if src:
 250             node['uri'] = src.groups()[0]
 251             if not 'alt' in node.attributes:
 252                 alt = re.search('alt="([^"]*)"', self.wiki_text)
 253                 if alt:
 254                     node['alt'] = alt.groups()[0]
 255         else:
 256             # Image doesn't exist yet for the page so just use what's
 257             # returned from MoinMoin verbatim
 258             self.add_wiki_markup()
 259
 260     def process_wiki_target(self, target):
 261         self.process_wiki_text(target)
 262         # MMG: May need a call to fixup_wiki_formatting here but I
 263         # don't think so.
 264         self.add_wiki_markup()
 265
 266     def fixup_wiki_formatting(self, text):
 267         replacement = {'<p>': '', '</p>': '', '\n': '', '> ': '>'}
 268         for src, dst in replacement.items():
 269             text = text.replace(src, dst)
 270         # Everything seems to have a space ending the text block. We want to
 271         # get rid of this
 272         if text and text[-1] == ' ':
 273             text = text[:-1]
 274         return text
 275
 276     def visit_reference(self, node):
 277         """
 278             Pass links to MoinMoin to get the correct wiki space url. Extract
 279             the url and pass it on to the html4css1 writer to handle. Inline
 280             images are also handled by visit_image. Not sure what the "drawing:"
 281             link scheme is used for, so for now it is handled here.
 282
 283             Also included here is a hack to allow MoinMoin macros. This routine
 284             checks for a link which starts with "[[". This link is passed to the
 285             MoinMoin formatter and the resulting markup is inserted into the
 286             document in the place of the original link reference.
 287         """
 288         moin_link_schemes = ['wiki:', 'attachment:', 'drawing:', '[[',
 289                              'inline:']
 290
 291         if 'refuri' in node.attributes:
 292             target = None
 293             refuri = node['refuri']
 294
 295             # MMG: Fix this line
 296             if [scheme for scheme in moin_link_schemes if
 297                     refuri.lstrip().startswith(scheme)]:
 298                 # For a macro, We want the actuall text from the user in target,
 299                 # not the fully normalized version that is contained in refuri.
 300                 if refuri.startswith('[['):
 301                     target = node['name']
 302                 else:
 303                     target = refuri
 304             # TODO: Figure out the following two elif's and comment
 305             # appropriately.
 306             # The node should have a whitespace normalized name if the docutlis
 307             # reStructuredText parser would normally fully normalize the name.
 308             elif ('name' in node.attributes and
 309                   fully_normalize_name(node['name']) == refuri):
 310                 target = ':%s:' % (node['name'])
 311             # If its not a uri containing a ':' then its probably destined for
 312             # wiki space.
 313             elif ':' not in refuri:
 314                 target = ':%s:' % (refuri)
 315
 316             if target:
 317                 if target.startswith('inline:'):
 318                     self.process_inline(node, 'refuri')
 319                 elif target.startswith('[[') and target.endswith(']]'):
 320                     self.process_wiki_target(target)
 321                 else:
 322                     # Not a macro or inline so hopefully its a link. Put the target in
 323                     # brackets so that MoinMoin knows its a link. Extract the
 324                     # href, if it exists, and let docutils handle it from there.
 325                     # If there is no href just add whatever MoinMoin returned.
 326                     node_text = node.astext().replace('\n', ' ')
 327                     self.process_wiki_text('[%s %s]' % (target, node_text))
 328                     href = re.search('href="([^"]+)"', self.wiki_text)
 329                     if href:
 330                         # dirty hack in order to undo the HTML entity quoting
 331                         node['refuri'] = href.groups()[0].replace("&amp;", "&")
 332                     else:
 333                         self.wiki_text = self.fixup_wiki_formatting(self.wiki_text)
 334                         self.add_wiki_markup()
 335         html4css1.HTMLTranslator.visit_reference(self, node)
 336
 337     def visit_image(self, node):
 338         """
 339             Need to intervene in the case of inline images. We need MoinMoin to
 340             give us the actual src line to the image and then we can feed this
 341             to the default html4css1 writer. NOTE: Since the writer can't "open"
 342             this image the scale attribute doesn't work without directly
 343             specifying the height or width (or both).
 344
 345             TODO: Need to handle figures similarly.
 346         """
 347         uri = node['uri'].lstrip()
 348         prefix = ''       # assume no prefix
 349         if ':' in uri:
 350             prefix = uri.split(':',1)[0]
 351         # if prefix isn't URL, try to display in page
 352         if not prefix.lower() in ('file', 'http', 'https', 'ftp'):
 353             # no prefix given, so fake "inline:"
 354             if not prefix:
 355                 node['uri'] = 'inline:' + uri
 356             self.process_inline(node, 'uri')
 357         html4css1.HTMLTranslator.visit_image(self, node)
 358
 359     def create_wiki_functor(self, moin_func):
 360         moin_callable = getattr(self.formatter, moin_func)
 361         def visit_func(self, node):
 362             self.wiki_text = ''
 363             self.request.write(moin_callable(1))
 364             self.body.append(self.wiki_text)
 365         def depart_func(self, node):
 366             self.wiki_text = ''
 367             self.request.write(moin_callable(0))
 368             self.body.append(self.wiki_text)
 369         return visit_func, depart_func
 370
 371     def setup_wiki_handlers(self):
 372         """
 373             Have the MoinMoin formatter handle markup when it makes sense. These
 374             are portions of the document that do not contain reST specific
 375             markup. This allows these portions of the document to look
 376             consistent with other wiki pages.
 377
 378             Setup dispatch routines to handle basic document markup. The
 379             hanlders dict is the html4css1 handler name followed by the wiki
 380             handler name.
 381         """
 382         handlers = {
 383             # Text Markup
 384             'emphasis': 'emphasis',
 385             'strong': 'strong',
 386             'literal': 'code',
 387             # Blocks
 388             'literal_block': 'preformatted',
 389             # Simple Lists
 390             'bullet_list': 'bullet_list',
 391             'list_item': 'listitem',
 392             # Definition List
 393             'definition_list': 'definition_list',
 394             # Admonitions
 395             'warning': 'highlight'}
 396         for rest_func, moin_func in handlers.items():
 397             visit_func, depart_func = self.create_wiki_functor(moin_func)
 398             visit_func = new.instancemethod(visit_func, self, MoinTranslator)
 399             depart_func = new.instancemethod(depart_func, self, MoinTranslator)
 400             setattr(self, 'visit_%s' % (rest_func), visit_func)
 401             setattr(self, 'depart_%s' % (rest_func), depart_func)
 402
 403     # Enumerated list takes an extra paramter so we handle this differently
 404     def visit_enumerated_list(self, node):
 405         self.wiki_text = ''
 406         self.request.write(self.formatter.number_list(1, start=node.get('start', None)))
 407         self.body.append(self.wiki_text)
 408
 409     def depart_enumerated_list(self, node):
 410         self.wiki_text = ''
 411         self.request.write(self.formatter.number_list(0))
 412         self.body.append(self.wiki_text)
 413
 414
 415 class MoinDirectives:
 416     """
 417         Class to handle all custom directive handling. This code is called as
 418         part of the parsing stage.
 419     """
 420
 421     def __init__(self, request):
 422         self.request = request
 423
 424         # include MoinMoin pages
 425         directives.register_directive('include', self.include)
 426
 427         # used for MoinMoin macros
 428         directives.register_directive('macro', self.macro)
 429
 430         # disallow a few directives in order to prevent XSS
 431         # disallowed include because it suffers from these bugs:
 432         #  * recursive includes are possible
 433
 434         # for directive in ('meta', 'include', 'raw'):
 435         for directive in ('meta', 'raw'):
 436             directives.register_directive(directive, None)
 437
 438         # disable the raw role
 439         roles._roles['raw'] = None
 440
 441         # As a quick fix to handle recursive includes we limit the times a
 442         # document can be included to one.
 443         self.included_documents = []
 444
 445     # Handle the include directive rather than letting the default docutils
 446     # parser handle it. This allows the inclusion of MoinMoin pages instead of
 447     # something from the filesystem.
 448     def include(self, name, arguments, options, content, lineno,
 449                 content_offset, block_text, state, state_machine):
 450         # content contains the included file name
 451
 452         _ = self.request.getText
 453
 454         if len(content):
 455             if content[0] in self.included_documents:
 456                 lines = [_("**Duplicate included files are not permitted**")]
 457                 state_machine.insert_input(lines, 'MoinDirectives')
 458                 return
 459             self.included_documents.append(content[0])
 460             page = Page(page_name = content[0], request = self.request)
 461             if page.exists():
 462                 text = page.get_raw_body()
 463                 lines = text.split('\n')
 464                 # Remove the "#format rst" line
 465                 if lines[0].startswith("#format"):
 466                     del lines[0]
 467             else:
 468                 lines = [_("**Could not find the referenced page: %s**") % (content[0],)]
 469             # Insert the text from the included document and then continue
 470             # parsing
 471             state_machine.insert_input(lines, 'MoinDirectives')
 472         return
 473
 474     include.content = True
 475
 476     # Add additional macro directive.
 477     # This allows MoinMoin macros to be used either by using the directive
 478     # directly or by using the substitution syntax. Much cleaner than using the
 479     # reference hack (`[[SomeMacro]]`_). This however simply adds a node to the
 480     # document tree which is a reference, but through a much better user
 481     # interface.
 482     def macro(self, name, arguments, options, content, lineno,
 483                 content_offset, block_text, state, state_machine):
 484         # content contains macro to be called
 485         if len(content):
 486             # Allow either with or without brackets
 487             if content[0].startswith('[['):
 488                 macro = content[0]
 489             else:
 490                 macro = '[[%s]]' % content[0]
 491             ref = reference(macro, refuri = macro)
 492             ref['name'] = macro
 493             return [ref]
 494         return
 495
 496     macro.content = True
 497