Update smartquotes:
[docutils.git] / docutils / transforms / universal.py
blob8a03258ea27a07b6cdeb5cfbbfa2f5eda52fb860
1 # $Id$
2 # -*- coding: utf-8 -*-
3 # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
4 # Maintainer: docutils-develop@lists.sourceforge.net
5 # Copyright: This module has been placed in the public domain.
7 """
8 Transforms needed by most or all documents:
10 - `Decorations`: Generate a document's header & footer.
11 - `Messages`: Placement of system messages stored in
12 `nodes.document.transform_messages`.
13 - `TestMessages`: Like `Messages`, used on test runs.
14 - `FinalReferences`: Resolve remaining references.
15 """
17 __docformat__ = 'reStructuredText'
19 import re
20 import sys
21 import time
22 from docutils import nodes, utils
23 from docutils.transforms import TransformError, Transform
24 from docutils.utils import smartquotes
26 class Decorations(Transform):
28 """
29 Populate a document's decoration element (header, footer).
30 """
32 default_priority = 820
34 def apply(self):
35 header_nodes = self.generate_header()
36 if header_nodes:
37 decoration = self.document.get_decoration()
38 header = decoration.get_header()
39 header.extend(header_nodes)
40 footer_nodes = self.generate_footer()
41 if footer_nodes:
42 decoration = self.document.get_decoration()
43 footer = decoration.get_footer()
44 footer.extend(footer_nodes)
46 def generate_header(self):
47 return None
49 def generate_footer(self):
50 # @@@ Text is hard-coded for now.
51 # Should be made dynamic (language-dependent).
52 # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
53 # for the datestamp?
54 # See https://sourceforge.net/p/docutils/patches/132/
55 # and https://reproducible-builds.org/specs/source-date-epoch/
56 settings = self.document.settings
57 if settings.generator or settings.datestamp or settings.source_link \
58 or settings.source_url:
59 text = []
60 if settings.source_link and settings._source \
61 or settings.source_url:
62 if settings.source_url:
63 source = settings.source_url
64 else:
65 source = utils.relative_path(settings._destination,
66 settings._source)
67 text.extend([
68 nodes.reference('', 'View document source',
69 refuri=source),
70 nodes.Text('.\n')])
71 if settings.datestamp:
72 datestamp = time.strftime(settings.datestamp, time.gmtime())
73 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
74 if settings.generator:
75 text.extend([
76 nodes.Text('Generated by '),
77 nodes.reference('', 'Docutils', refuri=
78 'http://docutils.sourceforge.net/'),
79 nodes.Text(' from '),
80 nodes.reference('', 'reStructuredText', refuri='http://'
81 'docutils.sourceforge.net/rst.html'),
82 nodes.Text(' source.\n')])
83 return [nodes.paragraph('', '', *text)]
84 else:
85 return None
88 class ExposeInternals(Transform):
90 """
91 Expose internal attributes if ``expose_internals`` setting is set.
92 """
94 default_priority = 840
96 def not_Text(self, node):
97 return not isinstance(node, nodes.Text)
99 def apply(self):
100 if self.document.settings.expose_internals:
101 for node in self.document.traverse(self.not_Text):
102 for att in self.document.settings.expose_internals:
103 value = getattr(node, att, None)
104 if value is not None:
105 node['internal:' + att] = value
108 class Messages(Transform):
111 Place any system messages generated after parsing into a dedicated section
112 of the document.
115 default_priority = 860
117 def apply(self):
118 unfiltered = self.document.transform_messages
119 threshold = self.document.reporter.report_level
120 messages = []
121 for msg in unfiltered:
122 if msg['level'] >= threshold and not msg.parent:
123 messages.append(msg)
124 if messages:
125 section = nodes.section(classes=['system-messages'])
126 # @@@ get this from the language module?
127 section += nodes.title('', 'Docutils System Messages')
128 section += messages
129 self.document.transform_messages[:] = []
130 self.document += section
133 class FilterMessages(Transform):
136 Remove system messages below verbosity threshold.
139 default_priority = 870
141 def apply(self):
142 for node in self.document.traverse(nodes.system_message):
143 if node['level'] < self.document.reporter.report_level:
144 node.parent.remove(node)
147 class TestMessages(Transform):
150 Append all post-parse system messages to the end of the document.
152 Used for testing purposes.
155 default_priority = 880
157 def apply(self):
158 for msg in self.document.transform_messages:
159 if not msg.parent:
160 self.document += msg
163 class StripComments(Transform):
166 Remove comment elements from the document tree (only if the
167 ``strip_comments`` setting is enabled).
170 default_priority = 740
172 def apply(self):
173 if self.document.settings.strip_comments:
174 for node in self.document.traverse(nodes.comment):
175 node.parent.remove(node)
178 class StripClassesAndElements(Transform):
181 Remove from the document tree all elements with classes in
182 `self.document.settings.strip_elements_with_classes` and all "classes"
183 attribute values in `self.document.settings.strip_classes`.
186 default_priority = 420
188 def apply(self):
189 if not (self.document.settings.strip_elements_with_classes
190 or self.document.settings.strip_classes):
191 return
192 # prepare dicts for lookup (not sets, for Python 2.2 compatibility):
193 self.strip_elements = dict(
194 [(key, None)
195 for key in (self.document.settings.strip_elements_with_classes
196 or [])])
197 self.strip_classes = dict(
198 [(key, None) for key in (self.document.settings.strip_classes
199 or [])])
200 for node in self.document.traverse(self.check_classes):
201 node.parent.remove(node)
203 def check_classes(self, node):
204 if isinstance(node, nodes.Element):
205 for class_value in node['classes'][:]:
206 if class_value in self.strip_classes:
207 node['classes'].remove(class_value)
208 if class_value in self.strip_elements:
209 return 1
211 class SmartQuotes(Transform):
214 Replace ASCII quotation marks with typographic form.
216 Also replace multiple dashes with em-dash/en-dash characters.
219 default_priority = 850
221 def __init__(self, document, startnode):
222 Transform.__init__(self, document, startnode=startnode)
223 self.unsupported_languages = set()
225 def get_tokens(self, txtnodes):
226 # A generator that yields ``(texttype, nodetext)`` tuples for a list
227 # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
229 texttype = {True: 'literal', # "literal" text is not changed:
230 False: 'plain'}
231 for txtnode in txtnodes:
232 nodetype = texttype[isinstance(txtnode.parent,
233 (nodes.literal,
234 nodes.math,
235 nodes.image,
236 nodes.raw,
237 nodes.problematic))]
238 yield (nodetype, txtnode.astext())
241 def apply(self):
242 smart_quotes = self.document.settings.smart_quotes
243 if not smart_quotes:
244 return
245 try:
246 alternative = smart_quotes.startswith('alt')
247 except AttributeError:
248 alternative = False
249 # print repr(alternative)
251 document_language = self.document.settings.language_code
253 # "Educate" quotes in normal text. Handle each block of text
254 # (TextElement node) as a unit to keep context around inline nodes:
255 for node in self.document.traverse(nodes.TextElement):
256 # skip preformatted text blocks and special elements:
257 if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
258 continue
259 # nested TextElements are not "block-level" elements:
260 if isinstance(node.parent, nodes.TextElement):
261 continue
263 # list of text nodes in the "text block":
264 txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
265 if not isinstance(txtnode.parent,
266 nodes.option_string)]
268 # language: use typographical quotes for language "lang"
269 lang = node.get_language_code(document_language)
270 # use alternative form if `smart-quotes` setting starts with "alt":
271 if alternative:
272 if '-x-altquot' in lang:
273 lang = lang.replace('-x-altquot', '')
274 else:
275 lang += '-x-altquot'
276 # drop subtags missing in quotes:
277 for tag in utils.normalize_language_tag(lang):
278 if tag in smartquotes.smartchars.quotes:
279 lang = tag
280 break
281 else: # language not supported: (keep ASCII quotes)
282 if lang not in self.unsupported_languages:
283 self.document.reporter.warning('No smart quotes '
284 'defined for language "%s".'%lang, base_node=node)
285 self.unsupported_languages.add(lang)
286 lang = ''
288 # Iterator educating quotes in plain text:
289 # '2': set all, using old school en- and em- dash shortcuts
290 teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
291 attr='qDe', language=lang)
293 for txtnode, newtext in zip(txtnodes, teacher):
294 txtnode.parent.replace(txtnode, nodes.Text(newtext))
296 self.unsupported_languages = set() # reset