docutils/transforms/universal.py

   1 # $Id$
   2 # -*- coding: utf-8 -*-
   3 # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
   4 # Maintainer: docutils-develop@lists.sourceforge.net
   5 # Copyright: This module has been placed in the public domain.
   6
   7 """
   8 Transforms needed by most or all documents:
   9
  10 - `Decorations`: Generate a document's header & footer.
  11 - `Messages`: Placement of system messages stored in
  12   `nodes.document.transform_messages`.
  13 - `TestMessages`: Like `Messages`, used on test runs.
  14 - `FinalReferences`: Resolve remaining references.
  15 """
  16
  17 __docformat__ = 'reStructuredText'
  18
  19 import re
  20 import sys
  21 import time
  22 from docutils import nodes, utils
  23 from docutils.transforms import TransformError, Transform
  24 from docutils.utils import smartquotes
  25
  26 class Decorations(Transform):
  27
  28     """
  29     Populate a document's decoration element (header, footer).
  30     """
  31
  32     default_priority = 820
  33
  34     def apply(self):
  35         header_nodes = self.generate_header()
  36         if header_nodes:
  37             decoration = self.document.get_decoration()
  38             header = decoration.get_header()
  39             header.extend(header_nodes)
  40         footer_nodes = self.generate_footer()
  41         if footer_nodes:
  42             decoration = self.document.get_decoration()
  43             footer = decoration.get_footer()
  44             footer.extend(footer_nodes)
  45
  46     def generate_header(self):
  47         return None
  48
  49     def generate_footer(self):
  50         # @@@ Text is hard-coded for now.
  51         # Should be made dynamic (language-dependent).
  52         settings = self.document.settings
  53         if settings.generator or settings.datestamp or settings.source_link \
  54                or settings.source_url:
  55             text = []
  56             if settings.source_link and settings._source \
  57                    or settings.source_url:
  58                 if settings.source_url:
  59                     source = settings.source_url
  60                 else:
  61                     source = utils.relative_path(settings._destination,
  62                                                  settings._source)
  63                 text.extend([
  64                     nodes.reference('', 'View document source',
  65                                     refuri=source),
  66                     nodes.Text('.\n')])
  67             if settings.datestamp:
  68                 datestamp = time.strftime(settings.datestamp, time.gmtime())
  69                 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
  70             if settings.generator:
  71                 text.extend([
  72                     nodes.Text('Generated by '),
  73                     nodes.reference('', 'Docutils', refuri=
  74                                     'http://docutils.sourceforge.net/'),
  75                     nodes.Text(' from '),
  76                     nodes.reference('', 'reStructuredText', refuri='http://'
  77                                     'docutils.sourceforge.net/rst.html'),
  78                     nodes.Text(' source.\n')])
  79             return [nodes.paragraph('', '', *text)]
  80         else:
  81             return None
  82
  83
  84 class ExposeInternals(Transform):
  85
  86     """
  87     Expose internal attributes if ``expose_internals`` setting is set.
  88     """
  89
  90     default_priority = 840
  91
  92     def not_Text(self, node):
  93         return not isinstance(node, nodes.Text)
  94
  95     def apply(self):
  96         if self.document.settings.expose_internals:
  97             for node in self.document.traverse(self.not_Text):
  98                 for att in self.document.settings.expose_internals:
  99                     value = getattr(node, att, None)
 100                     if value is not None:
 101                         node['internal:' + att] = value
 102
 103
 104 class Messages(Transform):
 105
 106     """
 107     Place any system messages generated after parsing into a dedicated section
 108     of the document.
 109     """
 110
 111     default_priority = 860
 112
 113     def apply(self):
 114         unfiltered = self.document.transform_messages
 115         threshold = self.document.reporter.report_level
 116         messages = []
 117         for msg in unfiltered:
 118             if msg['level'] >= threshold and not msg.parent:
 119                 messages.append(msg)
 120         if messages:
 121             section = nodes.section(classes=['system-messages'])
 122             # @@@ get this from the language module?
 123             section += nodes.title('', 'Docutils System Messages')
 124             section += messages
 125             self.document.transform_messages[:] = []
 126             self.document += section
 127
 128
 129 class FilterMessages(Transform):
 130
 131     """
 132     Remove system messages below verbosity threshold.
 133     """
 134
 135     default_priority = 870
 136
 137     def apply(self):
 138         for node in self.document.traverse(nodes.system_message):
 139             if node['level'] < self.document.reporter.report_level:
 140                 node.parent.remove(node)
 141
 142
 143 class TestMessages(Transform):
 144
 145     """
 146     Append all post-parse system messages to the end of the document.
 147
 148     Used for testing purposes.
 149     """
 150
 151     default_priority = 880
 152
 153     def apply(self):
 154         for msg in self.document.transform_messages:
 155             if not msg.parent:
 156                 self.document += msg
 157
 158
 159 class StripComments(Transform):
 160
 161     """
 162     Remove comment elements from the document tree (only if the
 163     ``strip_comments`` setting is enabled).
 164     """
 165
 166     default_priority = 740
 167
 168     def apply(self):
 169         if self.document.settings.strip_comments:
 170             for node in self.document.traverse(nodes.comment):
 171                 node.parent.remove(node)
 172
 173
 174 class StripClassesAndElements(Transform):
 175
 176     """
 177     Remove from the document tree all elements with classes in
 178     `self.document.settings.strip_elements_with_classes` and all "classes"
 179     attribute values in `self.document.settings.strip_classes`.
 180     """
 181
 182     default_priority = 420
 183
 184     def apply(self):
 185         if not (self.document.settings.strip_elements_with_classes
 186                 or self.document.settings.strip_classes):
 187             return
 188         # prepare dicts for lookup (not sets, for Python 2.2 compatibility):
 189         self.strip_elements = dict(
 190             [(key, None)
 191              for key in (self.document.settings.strip_elements_with_classes
 192                          or [])])
 193         self.strip_classes = dict(
 194             [(key, None) for key in (self.document.settings.strip_classes
 195                                      or [])])
 196         for node in self.document.traverse(self.check_classes):
 197             node.parent.remove(node)
 198
 199     def check_classes(self, node):
 200         if isinstance(node, nodes.Element):
 201             for class_value in node['classes'][:]:
 202                 if class_value in self.strip_classes:
 203                     node['classes'].remove(class_value)
 204                 if class_value in self.strip_elements:
 205                     return 1
 206
 207 class SmartQuotes(Transform):
 208
 209     """
 210     Replace ASCII quotation marks with typographic form.
 211
 212     Also replace multiple dashes with em-dash/en-dash characters.
 213     """
 214
 215     default_priority = 850
 216
 217     def __init__(self, document, startnode):
 218         Transform.__init__(self, document, startnode=startnode)
 219         self.unsupported_languages = set()
 220
 221     def get_tokens(self, txtnodes):
 222         # A generator that yields ``(texttype, nodetext)`` tuples for a list
 223         # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
 224
 225         texttype = {True: 'literal', # "literal" text is not changed:
 226                     False: 'plain'}
 227         for txtnode in txtnodes:
 228             nodetype = texttype[isinstance(txtnode.parent,
 229                                            (nodes.literal,
 230                                             nodes.math,
 231                                             nodes.image,
 232                                             nodes.raw,
 233                                             nodes.problematic))]
 234             yield (nodetype, txtnode.astext())
 235
 236
 237     def apply(self):
 238         smart_quotes = self.document.settings.smart_quotes
 239         if not smart_quotes:
 240             return
 241         try:
 242             alternative = smart_quotes.startswith('alt')
 243         except AttributeError:
 244             alternative = False
 245         # print repr(alternative)
 246
 247         document_language = self.document.settings.language_code
 248
 249         # "Educate" quotes in normal text. Handle each block of text
 250         # (TextElement node) as a unit to keep context around inline nodes:
 251         for node in self.document.traverse(nodes.TextElement):
 252             # skip preformatted text blocks and special elements:
 253             if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
 254                 continue
 255             # nested TextElements are not "block-level" elements:
 256             if isinstance(node.parent, nodes.TextElement):
 257                 continue
 258
 259             # list of text nodes in the "text block":
 260             txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
 261                         if not isinstance(txtnode.parent,
 262                                           nodes.option_string)]
 263
 264             # language: use typographical quotes for language "lang"
 265             lang = node.get_language_code(document_language)
 266             # use alternative form if `smart-quotes` setting starts with "alt":
 267             if alternative:
 268                 if '-x-altquot' in lang:
 269                     lang = lang.replace('-x-altquot', '')
 270                 else:
 271                     lang += '-x-altquot'
 272             # drop subtags missing in quotes:
 273             for tag in utils.normalize_language_tag(lang):
 274                 if tag in smartquotes.smartchars.quotes:
 275                     lang = tag
 276                     break
 277             else: # language not supported: (keep ASCII quotes)
 278                 if lang not in self.unsupported_languages:
 279                     self.document.reporter.warning('No smart quotes '
 280                         'defined for language "%s".'%lang, base_node=node)
 281                 self.unsupported_languages.add(lang)
 282                 lang = ''
 283
 284             # Iterator educating quotes in plain text:
 285             # '2': set all, using old school en- and em- dash shortcuts
 286             teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
 287                                                  attr='2', language=lang)
 288
 289             for txtnode, newtext in zip(txtnodes, teacher):
 290                 txtnode.parent.replace(txtnode, nodes.Text(newtext))
 291
 292             self.unsupported_languages = set() # reset