Ensure `rawsource` in "unicode" directive.
[docutils.git] / docutils / docutils / parsers / rst / directives / misc.py
blobc9a069c2d8c1bb265b630c083c12a2d1d89f7c3a
1 # $Id$
2 # Authors: David Goodger <goodger@python.org>; Dethe Elza
3 # Copyright: This module has been placed in the public domain.
5 """Miscellaneous directives."""
7 __docformat__ = 'reStructuredText'
9 import sys
10 import os.path
11 import re
12 import time
13 from docutils import io, nodes, statemachine, utils
14 from docutils.utils.error_reporting import SafeString, ErrorString
15 from docutils.utils.error_reporting import locale_encoding
16 from docutils.parsers.rst import Directive, convert_directive_function
17 from docutils.parsers.rst import directives, roles, states
18 from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
19 from docutils.parsers.rst.roles import set_classes
20 from docutils.transforms import misc
22 class Include(Directive):
24 """
25 Include content read from a separate source file.
27 Content may be parsed by the parser, or included as a literal
28 block. The encoding of the included file can be specified. Only
29 a part of the given file argument may be included by specifying
30 start and end line or text to match before and/or after the text
31 to be used.
32 """
34 required_arguments = 1
35 optional_arguments = 0
36 final_argument_whitespace = True
37 option_spec = {'literal': directives.flag,
38 'code': directives.unchanged,
39 'encoding': directives.encoding,
40 'tab-width': int,
41 'start-line': int,
42 'end-line': int,
43 'start-after': directives.unchanged_required,
44 'end-before': directives.unchanged_required,
45 # ignored except for 'literal' or 'code':
46 'number-lines': directives.unchanged, # integer or None
47 'class': directives.class_option,
48 'name': directives.unchanged}
50 standard_include_path = os.path.join(os.path.dirname(states.__file__),
51 'include')
53 def run(self):
54 """Include a file as part of the content of this reST file."""
55 if not self.state.document.settings.file_insertion_enabled:
56 raise self.warning('"%s" directive disabled.' % self.name)
57 source = self.state_machine.input_lines.source(
58 self.lineno - self.state_machine.input_offset - 1)
59 source_dir = os.path.dirname(os.path.abspath(source))
60 path = directives.path(self.arguments[0])
61 if path.startswith('<') and path.endswith('>'):
62 path = os.path.join(self.standard_include_path, path[1:-1])
63 path = os.path.normpath(os.path.join(source_dir, path))
64 path = utils.relative_path(None, path)
65 path = nodes.reprunicode(path)
66 encoding = self.options.get(
67 'encoding', self.state.document.settings.input_encoding)
68 e_handler=self.state.document.settings.input_encoding_error_handler
69 tab_width = self.options.get(
70 'tab-width', self.state.document.settings.tab_width)
71 try:
72 self.state.document.settings.record_dependencies.add(path)
73 include_file = io.FileInput(source_path=path,
74 encoding=encoding,
75 error_handler=e_handler)
76 except UnicodeEncodeError, error:
77 raise self.severe(u'Problems with "%s" directive path:\n'
78 'Cannot encode input file path "%s" '
79 '(wrong locale?).' %
80 (self.name, SafeString(path)))
81 except IOError, error:
82 raise self.severe(u'Problems with "%s" directive path:\n%s.' %
83 (self.name, ErrorString(error)))
84 startline = self.options.get('start-line', None)
85 endline = self.options.get('end-line', None)
86 try:
87 if startline or (endline is not None):
88 lines = include_file.readlines()
89 rawtext = ''.join(lines[startline:endline])
90 else:
91 rawtext = include_file.read()
92 except UnicodeError, error:
93 raise self.severe(u'Problem with "%s" directive:\n%s' %
94 (self.name, ErrorString(error)))
95 # start-after/end-before: no restrictions on newlines in match-text,
96 # and no restrictions on matching inside lines vs. line boundaries
97 after_text = self.options.get('start-after', None)
98 if after_text:
99 # skip content in rawtext before *and incl.* a matching text
100 after_index = rawtext.find(after_text)
101 if after_index < 0:
102 raise self.severe('Problem with "start-after" option of "%s" '
103 'directive:\nText not found.' % self.name)
104 rawtext = rawtext[after_index + len(after_text):]
105 before_text = self.options.get('end-before', None)
106 if before_text:
107 # skip content in rawtext after *and incl.* a matching text
108 before_index = rawtext.find(before_text)
109 if before_index < 0:
110 raise self.severe('Problem with "end-before" option of "%s" '
111 'directive:\nText not found.' % self.name)
112 rawtext = rawtext[:before_index]
114 include_lines = statemachine.string2lines(rawtext, tab_width,
115 convert_whitespace=True)
116 if 'literal' in self.options:
117 # Convert tabs to spaces, if `tab_width` is positive.
118 if tab_width >= 0:
119 text = rawtext.expandtabs(tab_width)
120 else:
121 text = rawtext
122 literal_block = nodes.literal_block(rawtext, source=path,
123 classes=self.options.get('class', []))
124 literal_block.line = 1
125 self.add_name(literal_block)
126 if 'number-lines' in self.options:
127 try:
128 startline = int(self.options['number-lines'] or 1)
129 except ValueError:
130 raise self.error(':number-lines: with non-integer '
131 'start value')
132 endline = startline + len(include_lines)
133 if text.endswith('\n'):
134 text = text[:-1]
135 tokens = NumberLines([([], text)], startline, endline)
136 for classes, value in tokens:
137 if classes:
138 literal_block += nodes.inline(value, value,
139 classes=classes)
140 else:
141 literal_block += nodes.Text(value, value)
142 else:
143 literal_block += nodes.Text(text, text)
144 return [literal_block]
145 if 'code' in self.options:
146 self.options['source'] = path
147 codeblock = CodeBlock(self.name,
148 [self.options.pop('code')], # arguments
149 self.options,
150 include_lines, # content
151 self.lineno,
152 self.content_offset,
153 self.block_text,
154 self.state,
155 self.state_machine)
156 return codeblock.run()
157 self.state_machine.insert_input(include_lines, path)
158 return []
161 class Raw(Directive):
164 Pass through content unchanged
166 Content is included in output based on type argument
168 Content may be included inline (content section of directive) or
169 imported from a file or url.
172 required_arguments = 1
173 optional_arguments = 0
174 final_argument_whitespace = True
175 option_spec = {'file': directives.path,
176 'url': directives.uri,
177 'encoding': directives.encoding}
178 has_content = True
180 def run(self):
181 if (not self.state.document.settings.raw_enabled
182 or (not self.state.document.settings.file_insertion_enabled
183 and ('file' in self.options
184 or 'url' in self.options))):
185 raise self.warning('"%s" directive disabled.' % self.name)
186 attributes = {'format': ' '.join(self.arguments[0].lower().split())}
187 encoding = self.options.get(
188 'encoding', self.state.document.settings.input_encoding)
189 e_handler=self.state.document.settings.input_encoding_error_handler
190 if self.content:
191 if 'file' in self.options or 'url' in self.options:
192 raise self.error(
193 '"%s" directive may not both specify an external file '
194 'and have content.' % self.name)
195 text = '\n'.join(self.content)
196 elif 'file' in self.options:
197 if 'url' in self.options:
198 raise self.error(
199 'The "file" and "url" options may not be simultaneously '
200 'specified for the "%s" directive.' % self.name)
201 source_dir = os.path.dirname(
202 os.path.abspath(self.state.document.current_source))
203 path = os.path.normpath(os.path.join(source_dir,
204 self.options['file']))
205 path = utils.relative_path(None, path)
206 try:
207 raw_file = io.FileInput(source_path=path,
208 encoding=encoding,
209 error_handler=e_handler)
210 # TODO: currently, raw input files are recorded as
211 # dependencies even if not used for the chosen output format.
212 self.state.document.settings.record_dependencies.add(path)
213 except IOError, error:
214 raise self.severe(u'Problems with "%s" directive path:\n%s.'
215 % (self.name, ErrorString(error)))
216 try:
217 text = raw_file.read()
218 except UnicodeError, error:
219 raise self.severe(u'Problem with "%s" directive:\n%s'
220 % (self.name, ErrorString(error)))
221 attributes['source'] = path
222 elif 'url' in self.options:
223 source = self.options['url']
224 # Do not import urllib2 at the top of the module because
225 # it may fail due to broken SSL dependencies, and it takes
226 # about 0.15 seconds to load.
227 import urllib2
228 try:
229 raw_text = urllib2.urlopen(source).read()
230 except (urllib2.URLError, IOError, OSError), error:
231 raise self.severe(u'Problems with "%s" directive URL "%s":\n%s.'
232 % (self.name, self.options['url'], ErrorString(error)))
233 raw_file = io.StringInput(source=raw_text, source_path=source,
234 encoding=encoding,
235 error_handler=e_handler)
236 try:
237 text = raw_file.read()
238 except UnicodeError, error:
239 raise self.severe(u'Problem with "%s" directive:\n%s'
240 % (self.name, ErrorString(error)))
241 attributes['source'] = source
242 else:
243 # This will always fail because there is no content.
244 self.assert_has_content()
245 raw_node = nodes.raw('', text, **attributes)
246 (raw_node.source,
247 raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
248 return [raw_node]
251 class Replace(Directive):
253 has_content = True
255 def run(self):
256 if not isinstance(self.state, states.SubstitutionDef):
257 raise self.error(
258 'Invalid context: the "%s" directive can only be used within '
259 'a substitution definition.' % self.name)
260 self.assert_has_content()
261 text = '\n'.join(self.content)
262 element = nodes.Element(text)
263 self.state.nested_parse(self.content, self.content_offset,
264 element)
265 # element might contain [paragraph] + system_message(s)
266 node = None
267 messages = []
268 for elem in element:
269 if not node and isinstance(elem, nodes.paragraph):
270 node = elem
271 elif isinstance(elem, nodes.system_message):
272 elem['backrefs'] = []
273 messages.append(elem)
274 else:
275 return [
276 self.state_machine.reporter.error(
277 'Error in "%s" directive: may contain a single paragraph '
278 'only.' % (self.name), line=self.lineno) ]
279 if node:
280 return messages + node.children
281 return messages
283 class Unicode(Directive):
285 r"""
286 Convert Unicode character codes (numbers) to characters. Codes may be
287 decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
288 ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
289 entities (e.g. ``&#x262E;``). Text following ".." is a comment and is
290 ignored. Spaces are ignored, and any other text remains as-is.
293 required_arguments = 1
294 optional_arguments = 0
295 final_argument_whitespace = True
296 option_spec = {'trim': directives.flag,
297 'ltrim': directives.flag,
298 'rtrim': directives.flag}
300 comment_pattern = re.compile(r'( |\n|^)\.\. ')
302 def run(self):
303 if not isinstance(self.state, states.SubstitutionDef):
304 raise self.error(
305 'Invalid context: the "%s" directive can only be used within '
306 'a substitution definition.' % self.name)
307 substitution_definition = self.state_machine.node
308 if 'trim' in self.options:
309 substitution_definition.attributes['ltrim'] = 1
310 substitution_definition.attributes['rtrim'] = 1
311 if 'ltrim' in self.options:
312 substitution_definition.attributes['ltrim'] = 1
313 if 'rtrim' in self.options:
314 substitution_definition.attributes['rtrim'] = 1
315 codes = self.comment_pattern.split(self.arguments[0])[0].split()
316 element = nodes.Element()
317 for code in codes:
318 try:
319 decoded = directives.unicode_code(code)
320 except ValueError, error:
321 raise self.error(u'Invalid character code: %s\n%s'
322 % (code, ErrorString(error)))
323 element += nodes.Text(utils.unescape_rawsource(decoded), decoded)
324 return element.children
327 class Class(Directive):
330 Set a "class" attribute on the directive content or the next element.
331 When applied to the next element, a "pending" element is inserted, and a
332 transform does the work later.
335 required_arguments = 1
336 optional_arguments = 0
337 final_argument_whitespace = True
338 has_content = True
340 def run(self):
341 try:
342 class_value = directives.class_option(self.arguments[0])
343 except ValueError:
344 raise self.error(
345 'Invalid class attribute value for "%s" directive: "%s".'
346 % (self.name, self.arguments[0]))
347 node_list = []
348 if self.content:
349 container = nodes.Element()
350 self.state.nested_parse(self.content, self.content_offset,
351 container)
352 for node in container:
353 node['classes'].extend(class_value)
354 node_list.extend(container.children)
355 else:
356 pending = nodes.pending(
357 misc.ClassAttribute,
358 {'class': class_value, 'directive': self.name},
359 self.block_text)
360 self.state_machine.document.note_pending(pending)
361 node_list.append(pending)
362 return node_list
365 class Role(Directive):
367 has_content = True
369 argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
370 % ((states.Inliner.simplename,) * 2))
372 def run(self):
373 """Dynamically create and register a custom interpreted text role."""
374 if self.content_offset > self.lineno or not self.content:
375 raise self.error('"%s" directive requires arguments on the first '
376 'line.' % self.name)
377 args = self.content[0]
378 match = self.argument_pattern.match(args)
379 if not match:
380 raise self.error('"%s" directive arguments not valid role names: '
381 '"%s".' % (self.name, args))
382 new_role_name = match.group(1)
383 base_role_name = match.group(3)
384 messages = []
385 if base_role_name:
386 base_role, messages = roles.role(
387 base_role_name, self.state_machine.language, self.lineno,
388 self.state.reporter)
389 if base_role is None:
390 error = self.state.reporter.error(
391 'Unknown interpreted text role "%s".' % base_role_name,
392 nodes.literal_block(self.block_text, self.block_text),
393 line=self.lineno)
394 return messages + [error]
395 else:
396 base_role = roles.generic_custom_role
397 assert not hasattr(base_role, 'arguments'), (
398 'Supplemental directive arguments for "%s" directive not '
399 'supported (specified by "%r" role).' % (self.name, base_role))
400 try:
401 converted_role = convert_directive_function(base_role)
402 (arguments, options, content, content_offset) = (
403 self.state.parse_directive_block(
404 self.content[1:], self.content_offset, converted_role,
405 option_presets={}))
406 except states.MarkupError, detail:
407 error = self.state_machine.reporter.error(
408 'Error in "%s" directive:\n%s.' % (self.name, detail),
409 nodes.literal_block(self.block_text, self.block_text),
410 line=self.lineno)
411 return messages + [error]
412 if 'class' not in options:
413 try:
414 options['class'] = directives.class_option(new_role_name)
415 except ValueError, detail:
416 error = self.state_machine.reporter.error(
417 u'Invalid argument for "%s" directive:\n%s.'
418 % (self.name, SafeString(detail)), nodes.literal_block(
419 self.block_text, self.block_text), line=self.lineno)
420 return messages + [error]
421 role = roles.CustomRole(new_role_name, base_role, options, content)
422 roles.register_local_role(new_role_name, role)
423 return messages
426 class DefaultRole(Directive):
428 """Set the default interpreted text role."""
430 optional_arguments = 1
431 final_argument_whitespace = False
433 def run(self):
434 if not self.arguments:
435 if '' in roles._roles:
436 # restore the "default" default role
437 del roles._roles['']
438 return []
439 role_name = self.arguments[0]
440 role, messages = roles.role(role_name, self.state_machine.language,
441 self.lineno, self.state.reporter)
442 if role is None:
443 error = self.state.reporter.error(
444 'Unknown interpreted text role "%s".' % role_name,
445 nodes.literal_block(self.block_text, self.block_text),
446 line=self.lineno)
447 return messages + [error]
448 roles._roles[''] = role
449 # @@@ should this be local to the document, not the parser?
450 return messages
453 class Title(Directive):
455 required_arguments = 1
456 optional_arguments = 0
457 final_argument_whitespace = True
459 def run(self):
460 self.state_machine.document['title'] = self.arguments[0]
461 return []
464 class Date(Directive):
466 has_content = True
468 def run(self):
469 if not isinstance(self.state, states.SubstitutionDef):
470 raise self.error(
471 'Invalid context: the "%s" directive can only be used within '
472 'a substitution definition.' % self.name)
473 format_str = '\n'.join(self.content) or '%Y-%m-%d'
474 if sys.version_info< (3, 0):
475 try:
476 format_str = format_str.encode(locale_encoding or 'utf-8')
477 except UnicodeEncodeError:
478 raise self.warning(u'Cannot encode date format string '
479 u'with locale encoding "%s".' % locale_encoding)
480 # @@@
481 # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
482 # Pro: Docutils-generated documentation
483 # can easily be part of `reproducible software builds`__
485 # __ https://reproducible-builds.org/
487 # Con: Changes the specs, hard to predict behaviour,
488 # no actual use case!
490 # See also the discussion about \date \time \year in TeX
491 # http://tug.org/pipermail/tex-k/2016-May/002704.html
492 # source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
493 # if (source_date_epoch
494 # and self.state.document.settings.use_source_date_epoch):
495 # text = time.strftime(format_str,
496 # time.gmtime(int(source_date_epoch)))
497 # else:
498 text = time.strftime(format_str)
499 if sys.version_info< (3, 0):
500 # `text` is a byte string that may contain non-ASCII characters:
501 try:
502 text = text.decode(locale_encoding or 'utf-8')
503 except UnicodeDecodeError:
504 text = text.decode(locale_encoding or 'utf-8', 'replace')
505 raise self.warning(u'Error decoding "%s"'
506 u'with locale encoding "%s".' % (text, locale_encoding))
507 return [nodes.Text(text)]
510 class TestDirective(Directive):
512 """This directive is useful only for testing purposes."""
514 optional_arguments = 1
515 final_argument_whitespace = True
516 option_spec = {'option': directives.unchanged_required}
517 has_content = True
519 def run(self):
520 if self.content:
521 text = '\n'.join(self.content)
522 info = self.state_machine.reporter.info(
523 'Directive processed. Type="%s", arguments=%r, options=%r, '
524 'content:' % (self.name, self.arguments, self.options),
525 nodes.literal_block(text, text), line=self.lineno)
526 else:
527 info = self.state_machine.reporter.info(
528 'Directive processed. Type="%s", arguments=%r, options=%r, '
529 'content: None' % (self.name, self.arguments, self.options),
530 line=self.lineno)
531 return [info]
533 # Old-style, functional definition:
535 # def directive_test_function(name, arguments, options, content, lineno,
536 # content_offset, block_text, state, state_machine):
537 # """This directive is useful only for testing purposes."""
538 # if content:
539 # text = '\n'.join(content)
540 # info = state_machine.reporter.info(
541 # 'Directive processed. Type="%s", arguments=%r, options=%r, '
542 # 'content:' % (name, arguments, options),
543 # nodes.literal_block(text, text), line=lineno)
544 # else:
545 # info = state_machine.reporter.info(
546 # 'Directive processed. Type="%s", arguments=%r, options=%r, '
547 # 'content: None' % (name, arguments, options), line=lineno)
548 # return [info]
550 # directive_test_function.arguments = (0, 1, 1)
551 # directive_test_function.options = {'option': directives.unchanged_required}
552 # directive_test_function.content = 1