Consistently use "utf-8" (not "utf8") in magic comment giving source encoding.
[docutils.git] / docutils / transforms / universal.py
blob36c9be149a844cc9b540e73cc65e3e8dd07d63ca
1 # $Id$
2 # -*- coding: utf-8 -*-
3 # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
4 # Maintainer: docutils-develop@lists.sourceforge.net
5 # Copyright: This module has been placed in the public domain.
7 """
8 Transforms needed by most or all documents:
10 - `Decorations`: Generate a document's header & footer.
11 - `Messages`: Placement of system messages stored in
12 `nodes.document.transform_messages`.
13 - `TestMessages`: Like `Messages`, used on test runs.
14 - `FinalReferences`: Resolve remaining references.
15 """
17 __docformat__ = 'reStructuredText'
19 import re
20 import sys
21 import time
22 from docutils import nodes, utils
23 from docutils.transforms import TransformError, Transform
24 from docutils.utils import smartquotes
26 class Decorations(Transform):
28 """
29 Populate a document's decoration element (header, footer).
30 """
32 default_priority = 820
34 def apply(self):
35 header_nodes = self.generate_header()
36 if header_nodes:
37 decoration = self.document.get_decoration()
38 header = decoration.get_header()
39 header.extend(header_nodes)
40 footer_nodes = self.generate_footer()
41 if footer_nodes:
42 decoration = self.document.get_decoration()
43 footer = decoration.get_footer()
44 footer.extend(footer_nodes)
46 def generate_header(self):
47 return None
49 def generate_footer(self):
50 # @@@ Text is hard-coded for now.
51 # Should be made dynamic (language-dependent).
52 settings = self.document.settings
53 if settings.generator or settings.datestamp or settings.source_link \
54 or settings.source_url:
55 text = []
56 if settings.source_link and settings._source \
57 or settings.source_url:
58 if settings.source_url:
59 source = settings.source_url
60 else:
61 source = utils.relative_path(settings._destination,
62 settings._source)
63 text.extend([
64 nodes.reference('', 'View document source',
65 refuri=source),
66 nodes.Text('.\n')])
67 if settings.datestamp:
68 datestamp = time.strftime(settings.datestamp, time.gmtime())
69 text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
70 if settings.generator:
71 text.extend([
72 nodes.Text('Generated by '),
73 nodes.reference('', 'Docutils', refuri=
74 'http://docutils.sourceforge.net/'),
75 nodes.Text(' from '),
76 nodes.reference('', 'reStructuredText', refuri='http://'
77 'docutils.sourceforge.net/rst.html'),
78 nodes.Text(' source.\n')])
79 return [nodes.paragraph('', '', *text)]
80 else:
81 return None
84 class ExposeInternals(Transform):
86 """
87 Expose internal attributes if ``expose_internals`` setting is set.
88 """
90 default_priority = 840
92 def not_Text(self, node):
93 return not isinstance(node, nodes.Text)
95 def apply(self):
96 if self.document.settings.expose_internals:
97 for node in self.document.traverse(self.not_Text):
98 for att in self.document.settings.expose_internals:
99 value = getattr(node, att, None)
100 if value is not None:
101 node['internal:' + att] = value
104 class Messages(Transform):
107 Place any system messages generated after parsing into a dedicated section
108 of the document.
111 default_priority = 860
113 def apply(self):
114 unfiltered = self.document.transform_messages
115 threshold = self.document.reporter.report_level
116 messages = []
117 for msg in unfiltered:
118 if msg['level'] >= threshold and not msg.parent:
119 messages.append(msg)
120 if messages:
121 section = nodes.section(classes=['system-messages'])
122 # @@@ get this from the language module?
123 section += nodes.title('', 'Docutils System Messages')
124 section += messages
125 self.document.transform_messages[:] = []
126 self.document += section
129 class FilterMessages(Transform):
132 Remove system messages below verbosity threshold.
135 default_priority = 870
137 def apply(self):
138 for node in self.document.traverse(nodes.system_message):
139 if node['level'] < self.document.reporter.report_level:
140 node.parent.remove(node)
143 class TestMessages(Transform):
146 Append all post-parse system messages to the end of the document.
148 Used for testing purposes.
151 default_priority = 880
153 def apply(self):
154 for msg in self.document.transform_messages:
155 if not msg.parent:
156 self.document += msg
159 class StripComments(Transform):
162 Remove comment elements from the document tree (only if the
163 ``strip_comments`` setting is enabled).
166 default_priority = 740
168 def apply(self):
169 if self.document.settings.strip_comments:
170 for node in self.document.traverse(nodes.comment):
171 node.parent.remove(node)
174 class StripClassesAndElements(Transform):
177 Remove from the document tree all elements with classes in
178 `self.document.settings.strip_elements_with_classes` and all "classes"
179 attribute values in `self.document.settings.strip_classes`.
182 default_priority = 420
184 def apply(self):
185 if not (self.document.settings.strip_elements_with_classes
186 or self.document.settings.strip_classes):
187 return
188 # prepare dicts for lookup (not sets, for Python 2.2 compatibility):
189 self.strip_elements = dict(
190 [(key, None)
191 for key in (self.document.settings.strip_elements_with_classes
192 or [])])
193 self.strip_classes = dict(
194 [(key, None) for key in (self.document.settings.strip_classes
195 or [])])
196 for node in self.document.traverse(self.check_classes):
197 node.parent.remove(node)
199 def check_classes(self, node):
200 if isinstance(node, nodes.Element):
201 for class_value in node['classes'][:]:
202 if class_value in self.strip_classes:
203 node['classes'].remove(class_value)
204 if class_value in self.strip_elements:
205 return 1
207 class SmartQuotes(Transform):
210 Replace ASCII quotation marks with typographic form.
212 Also replace multiple dashes with em-dash/en-dash characters.
215 default_priority = 850
217 def __init__(self, document, startnode):
218 Transform.__init__(self, document, startnode=startnode)
219 self.unsupported_languages = set()
221 def get_tokens(self, txtnodes):
222 # A generator that yields ``(texttype, nodetext)`` tuples for a list
223 # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).
225 texttype = {True: 'literal', # "literal" text is not changed:
226 False: 'plain'}
227 for txtnode in txtnodes:
228 nodetype = texttype[isinstance(txtnode.parent,
229 (nodes.literal,
230 nodes.math,
231 nodes.image,
232 nodes.raw,
233 nodes.problematic))]
234 yield (nodetype, txtnode.astext())
237 def apply(self):
238 smart_quotes = self.document.settings.smart_quotes
239 if not smart_quotes:
240 return
241 try:
242 alternative = smart_quotes.startswith('alt')
243 except AttributeError:
244 alternative = False
245 # print repr(alternative)
247 document_language = self.document.settings.language_code
249 # "Educate" quotes in normal text. Handle each block of text
250 # (TextElement node) as a unit to keep context around inline nodes:
251 for node in self.document.traverse(nodes.TextElement):
252 # skip preformatted text blocks and special elements:
253 if isinstance(node, (nodes.FixedTextElement, nodes.Special)):
254 continue
255 # nested TextElements are not "block-level" elements:
256 if isinstance(node.parent, nodes.TextElement):
257 continue
259 # list of text nodes in the "text block":
260 txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
261 if not isinstance(txtnode.parent,
262 nodes.option_string)]
264 # language: use typographical quotes for language "lang"
265 lang = node.get_language_code(document_language)
266 # use alternative form if `smart-quotes` setting starts with "alt":
267 if alternative:
268 if '-x-altquot' in lang:
269 lang = lang.replace('-x-altquot', '')
270 else:
271 lang += '-x-altquot'
272 # drop subtags missing in quotes:
273 for tag in utils.normalize_language_tag(lang):
274 if tag in smartquotes.smartchars.quotes:
275 lang = tag
276 break
277 else: # language not supported: (keep ASCII quotes)
278 if lang not in self.unsupported_languages:
279 self.document.reporter.warning('No smart quotes '
280 'defined for language "%s".'%lang, base_node=node)
281 self.unsupported_languages.add(lang)
282 lang = ''
284 # Iterator educating quotes in plain text:
285 # '2': set all, using old school en- and em- dash shortcuts
286 teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
287 attr='2', language=lang)
289 for txtnode, newtext in zip(txtnodes, teacher):
290 txtnode.parent.replace(txtnode, nodes.Text(newtext))
292 self.unsupported_languages = set() # reset