docutils/transforms/universal.py

   1 # $Id$
   2 # -*- coding: utf-8 -*-
   3 # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
   4 # Maintainer: docutils-develop@lists.sourceforge.net
   5 # Copyright: This module has been placed in the public domain.
   6
   7 """
   8 Transforms needed by most or all documents:
   9
  10 - `Decorations`: Generate a document's header & footer.
  11 - `Messages`: Placement of system messages stored in
  12   `nodes.document.transform_messages`.
  13 - `TestMessages`: Like `Messages`, used on test runs.
  14 - `FinalReferences`: Resolve remaining references.
  15 """
  16
  17 __docformat__ = 'reStructuredText'
  18
  19 import re
  20 import sys
  21 import time
  22 from docutils import nodes, utils
  23 from docutils.transforms import TransformError, Transform
  24 from docutils.utils import smartquotes
  25
  26 class Decorations(Transform):
  27
  28     """
  29     Populate a document's decoration element (header, footer).
  30     """
  31
  32     default_priority = 820
  33
  34     def apply(self):
  35         header_nodes = self.generate_header()
  36         if header_nodes:
  37             decoration = self.document.get_decoration()
  38             header = decoration.get_header()
  39             header.extend(header_nodes)
  40         footer_nodes = self.generate_footer()
  41         if footer_nodes:
  42             decoration = self.document.get_decoration()
  43             footer = decoration.get_footer()
  44             footer.extend(footer_nodes)
  45
  46     def generate_header(self):
  47         return None
  48
  49     def generate_footer(self):
  50         # @@@ Text is hard-coded for now.
  51         # Should be made dynamic (language-dependent).
  52         # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
  53         # for the datestamp?
  54         # See https://sourceforge.net/p/docutils/patches/132/
  55         # and https://reproducible-builds.org/specs/source-date-epoch/
  56         settings = self.document.settings
  57         if settings.generator or settings.datestamp or settings.source_link \
  58                or settings.source_url:
  59             text = []
  60             if settings.source_link and settings._source \
  61                    or settings.source_url:
  62                 if settings.source_url:
  63                     source = settings.source_url
  64                 else:
  65                     source = utils.relative_path(settings._destination,
  66                                                  settings._source)
  67                 text.extend([
  68                     nodes.reference('', 'View document source',
  69                                     refuri=source),
  70                     nodes.Text('.\n')])
  71             if settings.datestamp:
  72                 datestamp = time.strftime(settings.datestamp, time.gmtime())
  73                 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
  74             if settings.generator:
  75                 text.extend([
  76                     nodes.Text('Generated by '),
  77                     nodes.reference('', 'Docutils', refuri=
  78                                     'http://docutils.sourceforge.net/'),
  79                     nodes.Text(' from '),
  80                     nodes.reference('', 'reStructuredText', refuri='http://'
  81                                     'docutils.sourceforge.net/rst.html'),
  82                     nodes.Text(' source.\n')])
  83             return [nodes.paragraph('', '', *text)]
  84         else:
  85             return None
  86
  87
  88 class ExposeInternals(Transform):
  89
  90     """
  91     Expose internal attributes if ``expose_internals`` setting is set.
  92     """
  93
  94     default_priority = 840
  95
  96     def not_Text(self, node):
  97         return not isinstance(node, nodes.Text)
  98
  99     def apply(self):
 100         if self.document.settings.expose_internals:
 101             for node in self.document.traverse(self.not_Text):
 102                 for att in self.document.settings.expose_internals:
 103                     value = getattr(node, att, None)
 104                     if value is not None:
 105                         node['internal:' + att] = value
 106
 107
 108 class Messages(Transform):
 109
 110     """
 111     Place any system messages generated after parsing into a dedicated section
 112     of the document.
 113     """
 114
 115     default_priority = 860
 116
 117     def apply(self):
 118         unfiltered = self.document.transform_messages
 119         threshold = self.document.reporter.report_level
 120         messages = []
 121         for msg in unfiltered:
 122             if msg['level'] >= threshold and not msg.parent:
 123                 messages.append(msg)
 124         if messages:
 125             section = nodes.section(classes=['system-messages'])
 126             # @@@ get this from the language module?
 127             section += nodes.title('', 'Docutils System Messages')
 128             section += messages
 129             self.document.transform_messages[:] = []
 130             self.document += section
 131
 132
 133 class FilterMessages(Transform):
 134
 135     """
 136     Remove system messages below verbosity threshold.
 137     """
 138
 139     default_priority = 870
 140
 141     def apply(self):
 142         for node in self.document.traverse(nodes.system_message):
 143             if node['level'] < self.document.reporter.report_level:
 144                 node.parent.remove(node)
 145
 146
 147 class TestMessages(Transform):
 148
 149     """
 150     Append all post-parse system messages to the end of the document.
 151
 152     Used for testing purposes.
 153     """
 154
 155     default_priority = 880
 156
 157     def apply(self):
 158         for msg in self.document.transform_messages:
 159             if not msg.parent:
 160                 self.document += msg
 161
 162
 163 class StripComments(Transform):
 164
 165     """
 166     Remove comment elements from the document tree (only if the
 167     ``strip_comments`` setting is enabled).
 168     """
 169
 170     default_priority = 740
 171
 172     def apply(self):
 173         if self.document.settings.strip_comments:
 174             for node in self.document.traverse(nodes.comment):
 175                 node.parent.remove(node)
 176
 177
 178 class StripClassesAndElements(Transform):
 179
 180     """
 181     Remove from the document tree all elements with classes in
 182     `self.document.settings.strip_elements_with_classes` and all "classes"
 183     attribute values in `self.document.settings.strip_classes`.
 184     """
 185
 186     default_priority = 420
 187
 188     def apply(self):
 189         if not (self.document.settings.strip_elements_with_classes
 190                 or self.document.settings.strip_classes):
 191             return
 192         # prepare dicts for lookup (not sets, for Python 2.2 compatibility):
 193         self.strip_elements = dict(
 194             [(key, None)
 195              for key in (self.document.settings.strip_elements_with_classes
 196                          or [])])
 197         self.strip_classes = dict(
 198             [(key, None) for key in (self.document.settings.strip_classes
 199                                      or [])])
 200         for node in self.document.traverse(self.check_classes):
 201             node.parent.remove(node)
 202
 203     def check_classes(self, node):
 204         if isinstance(node, nodes.Element):
 205             for class_value in node['classes'][:]:
 206                 if class_value in self.strip_classes:
 207                     node['classes'].remove(class_value)
 208                 if class_value in self.strip_elements:
 209                     return 1
 210
 211 class SmartQuotes(Transform):
 212
 213     """
 214     Replace ASCII quotation marks with typographic form.
 215
 216     Also replace multiple dashes with em-dash/en-dash characters.
 217     """
 218
 219     default_priority = 850
 220
 221     def __init__(self, document, startnode):
 222         Transform.__init__(self, document, startnode=startnode)
 223         self.unsupported_languages = set()
 224
 225     def get_tokens(self, txtnodes):
 226         # A generator that yields ``(texttype, nodetext)`` tuples for a list
 227         # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
 228
 229         texttype = {True: 'literal', # "literal" text is not changed:
 230                     False: 'plain'}
 231         for txtnode in txtnodes:
 232             nodetype = texttype[isinstance(txtnode.parent,
 233                                            (nodes.literal,
 234                                             nodes.math,
 235                                             nodes.image,
 236                                             nodes.raw,
 237                                             nodes.problematic))]
 238             yield (nodetype, txtnode.astext())
 239
 240
 241     def apply(self):
 242         smart_quotes = self.document.settings.smart_quotes
 243         if not smart_quotes:
 244             return
 245         try:
 246             alternative = smart_quotes.startswith('alt')
 247         except AttributeError:
 248             alternative = False
 249         # print repr(alternative)
 250
 251         document_language = self.document.settings.language_code
 252
 253         # "Educate" quotes in normal text. Handle each block of text
 254         # (TextElement node) as a unit to keep context around inline nodes:
 255         for node in self.document.traverse(nodes.TextElement):
 256             # skip preformatted text blocks and special elements:
 257             if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
 258                 continue
 259             # nested TextElements are not "block-level" elements:
 260             if isinstance(node.parent, nodes.TextElement):
 261                 continue
 262
 263             # list of text nodes in the "text block":
 264             txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
 265                         if not isinstance(txtnode.parent,
 266                                           nodes.option_string)]
 267
 268             # language: use typographical quotes for language "lang"
 269             lang = node.get_language_code(document_language)
 270             # use alternative form if `smart-quotes` setting starts with "alt":
 271             if alternative:
 272                 if '-x-altquot' in lang:
 273                     lang = lang.replace('-x-altquot', '')
 274                 else:
 275                     lang += '-x-altquot'
 276             # drop subtags missing in quotes:
 277             for tag in utils.normalize_language_tag(lang):
 278                 if tag in smartquotes.smartchars.quotes:
 279                     lang = tag
 280                     break
 281             else: # language not supported: (keep ASCII quotes)
 282                 if lang not in self.unsupported_languages:
 283                     self.document.reporter.warning('No smart quotes '
 284                         'defined for language "%s".'%lang, base_node=node)
 285                 self.unsupported_languages.add(lang)
 286                 lang = ''
 287
 288             # Iterator educating quotes in plain text:
 289             # '2': set all, using old school en- and em- dash shortcuts
 290             teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
 291                                                  attr='qDe', language=lang)
 292
 293             for txtnode, newtext in zip(txtnodes, teacher):
 294                 txtnode.parent.replace(txtnode, nodes.Text(newtext))
 295
 296             self.unsupported_languages = set() # reset