Fix bug #424 Wrong circular inclusion detection.
[docutils.git] / docutils / docutils / parsers / rst / directives / misc.py
blob9c68db489e179ac40a5d0438287c609578647be3
1 # $Id$
2 # Authors: David Goodger <goodger@python.org>; Dethe Elza
3 # Copyright: This module has been placed in the public domain.
5 """Miscellaneous directives."""
7 __docformat__ = 'reStructuredText'
9 import sys
10 import os.path
11 import re
12 import time
13 from docutils import io, nodes, statemachine, utils
14 from docutils.utils.error_reporting import SafeString, ErrorString
15 from docutils.utils.error_reporting import locale_encoding
16 from docutils.parsers.rst import Directive, convert_directive_function
17 from docutils.parsers.rst import directives, roles, states
18 from docutils.parsers.rst.directives.body import CodeBlock, NumberLines
19 from docutils.parsers.rst.roles import set_classes
20 from docutils.transforms import misc
22 class Include(Directive):
24 """
25 Include content read from a separate source file.
27 Content may be parsed by the parser, or included as a literal
28 block. The encoding of the included file can be specified. Only
29 a part of the given file argument may be included by specifying
30 start and end line or text to match before and/or after the text
31 to be used.
32 """
34 required_arguments = 1
35 optional_arguments = 0
36 final_argument_whitespace = True
37 option_spec = {'literal': directives.flag,
38 'code': directives.unchanged,
39 'encoding': directives.encoding,
40 'parser': directives.parser_name,
41 'tab-width': int,
42 'start-line': int,
43 'end-line': int,
44 'start-after': directives.unchanged_required,
45 'end-before': directives.unchanged_required,
46 # ignored except for 'literal' or 'code':
47 'number-lines': directives.unchanged, # integer or None
48 'class': directives.class_option,
49 'name': directives.unchanged}
51 standard_include_path = os.path.join(os.path.dirname(states.__file__),
52 'include')
54 def run(self):
55 """Include a file as part of the content of this reST file.
57 Depending on the options, the file (or a clipping) is
58 converted to nodes and returned or inserted into the input stream.
59 """
60 if not self.state.document.settings.file_insertion_enabled:
61 raise self.warning('"%s" directive disabled.' % self.name)
62 source = self.state_machine.input_lines.source(
63 self.lineno - self.state_machine.input_offset - 1)
64 source_dir = os.path.dirname(os.path.abspath(source))
65 path = directives.path(self.arguments[0])
66 if path.startswith('<') and path.endswith('>'):
67 path = os.path.join(self.standard_include_path, path[1:-1])
68 path = os.path.normpath(os.path.join(source_dir, path))
69 path = utils.relative_path(None, path)
70 path = nodes.reprunicode(path)
71 encoding = self.options.get(
72 'encoding', self.state.document.settings.input_encoding)
73 e_handler=self.state.document.settings.input_encoding_error_handler
74 tab_width = self.options.get(
75 'tab-width', self.state.document.settings.tab_width)
76 try:
77 self.state.document.settings.record_dependencies.add(path)
78 include_file = io.FileInput(source_path=path,
79 encoding=encoding,
80 error_handler=e_handler)
81 except UnicodeEncodeError as error:
82 raise self.severe(u'Problems with "%s" directive path:\n'
83 'Cannot encode input file path "%s" '
84 '(wrong locale?).' %
85 (self.name, SafeString(path)))
86 except IOError as error:
87 raise self.severe(u'Problems with "%s" directive path:\n%s.' %
88 (self.name, ErrorString(error)))
90 # Get to-be-included content
91 startline = self.options.get('start-line', None)
92 endline = self.options.get('end-line', None)
93 try:
94 if startline or (endline is not None):
95 lines = include_file.readlines()
96 rawtext = ''.join(lines[startline:endline])
97 else:
98 rawtext = include_file.read()
99 except UnicodeError as error:
100 raise self.severe(u'Problem with "%s" directive:\n%s' %
101 (self.name, ErrorString(error)))
102 # start-after/end-before: no restrictions on newlines in match-text,
103 # and no restrictions on matching inside lines vs. line boundaries
104 after_text = self.options.get('start-after', None)
105 if after_text:
106 # skip content in rawtext before *and incl.* a matching text
107 after_index = rawtext.find(after_text)
108 if after_index < 0:
109 raise self.severe('Problem with "start-after" option of "%s" '
110 'directive:\nText not found.' % self.name)
111 rawtext = rawtext[after_index + len(after_text):]
112 before_text = self.options.get('end-before', None)
113 if before_text:
114 # skip content in rawtext after *and incl.* a matching text
115 before_index = rawtext.find(before_text)
116 if before_index < 0:
117 raise self.severe('Problem with "end-before" option of "%s" '
118 'directive:\nText not found.' % self.name)
119 rawtext = rawtext[:before_index]
121 include_lines = statemachine.string2lines(rawtext, tab_width,
122 convert_whitespace=True)
123 for i, line in enumerate(include_lines):
124 if len(line) > self.state.document.settings.line_length_limit:
125 raise self.warning('"%s": line %d exceeds the'
126 ' line-length-limit.' % (path, i+1))
128 if 'literal' in self.options:
129 # Don't convert tabs to spaces, if `tab_width` is negative.
130 if tab_width >= 0:
131 text = rawtext.expandtabs(tab_width)
132 else:
133 text = rawtext
134 literal_block = nodes.literal_block(rawtext, source=path,
135 classes=self.options.get('class', []))
136 literal_block.line = 1
137 self.add_name(literal_block)
138 if 'number-lines' in self.options:
139 try:
140 startline = int(self.options['number-lines'] or 1)
141 except ValueError:
142 raise self.error(':number-lines: with non-integer '
143 'start value')
144 endline = startline + len(include_lines)
145 if text.endswith('\n'):
146 text = text[:-1]
147 tokens = NumberLines([([], text)], startline, endline)
148 for classes, value in tokens:
149 if classes:
150 literal_block += nodes.inline(value, value,
151 classes=classes)
152 else:
153 literal_block += nodes.Text(value)
154 else:
155 literal_block += nodes.Text(text)
156 return [literal_block]
158 if 'code' in self.options:
159 self.options['source'] = path
160 # Don't convert tabs to spaces, if `tab_width` is negative:
161 if tab_width < 0:
162 include_lines = rawtext.splitlines()
163 codeblock = CodeBlock(self.name,
164 [self.options.pop('code')], # arguments
165 self.options,
166 include_lines, # content
167 self.lineno,
168 self.content_offset,
169 self.block_text,
170 self.state,
171 self.state_machine)
172 return codeblock.run()
174 # Prevent circular inclusion:
175 clip_options = (startline, endline, before_text, after_text)
176 include_log = self.state.document.include_log
177 # log entries are tuples (<source>, <clip-options>)
178 if not include_log: # new document
179 include_log.append((utils.relative_path(None, source),
180 (None, None, None, None)))
181 if (path, clip_options) in include_log:
182 raise self.warning('circular inclusion in "%s" directive: %s'
183 % (self.name, ' < '.join([path] + [pth for (pth, opt)
184 in include_log[::-1]])))
186 if 'parser' in self.options:
187 # parse into a dummy document and return created nodes
188 parser = self.options['parser']()
189 document = utils.new_document(path, self.state.document.settings)
190 document.include_log = include_log + [(path, clip_options)]
191 parser.parse('\n'.join(include_lines), document)
192 return document.children
194 # Include as rST source:
196 # mark end (cf. parsers.rst.states.Body.comment())
197 include_lines += ['', '.. end of inclusion from "%s"' % path]
198 self.state_machine.insert_input(include_lines, path)
199 # update include-log
200 include_log.append((path, clip_options))
201 return []
204 class Raw(Directive):
207 Pass through content unchanged
209 Content is included in output based on type argument
211 Content may be included inline (content section of directive) or
212 imported from a file or url.
215 required_arguments = 1
216 optional_arguments = 0
217 final_argument_whitespace = True
218 option_spec = {'file': directives.path,
219 'url': directives.uri,
220 'encoding': directives.encoding,
221 'class': directives.class_option}
222 has_content = True
224 def run(self):
225 if (not self.state.document.settings.raw_enabled
226 or (not self.state.document.settings.file_insertion_enabled
227 and ('file' in self.options
228 or 'url' in self.options))):
229 raise self.warning('"%s" directive disabled.' % self.name)
230 attributes = {'format': ' '.join(self.arguments[0].lower().split())}
231 encoding = self.options.get(
232 'encoding', self.state.document.settings.input_encoding)
233 e_handler=self.state.document.settings.input_encoding_error_handler
234 if self.content:
235 if 'file' in self.options or 'url' in self.options:
236 raise self.error(
237 '"%s" directive may not both specify an external file '
238 'and have content.' % self.name)
239 text = '\n'.join(self.content)
240 elif 'file' in self.options:
241 if 'url' in self.options:
242 raise self.error(
243 'The "file" and "url" options may not be simultaneously '
244 'specified for the "%s" directive.' % self.name)
245 source_dir = os.path.dirname(
246 os.path.abspath(self.state.document.current_source))
247 path = os.path.normpath(os.path.join(source_dir,
248 self.options['file']))
249 path = utils.relative_path(None, path)
250 try:
251 raw_file = io.FileInput(source_path=path,
252 encoding=encoding,
253 error_handler=e_handler)
254 # TODO: currently, raw input files are recorded as
255 # dependencies even if not used for the chosen output format.
256 self.state.document.settings.record_dependencies.add(path)
257 except IOError as error:
258 raise self.severe(u'Problems with "%s" directive path:\n%s.'
259 % (self.name, ErrorString(error)))
260 try:
261 text = raw_file.read()
262 except UnicodeError as error:
263 raise self.severe(u'Problem with "%s" directive:\n%s'
264 % (self.name, ErrorString(error)))
265 attributes['source'] = path
266 elif 'url' in self.options:
267 source = self.options['url']
268 # Do not import urllib2 at the top of the module because
269 # it may fail due to broken SSL dependencies, and it takes
270 # about 0.15 seconds to load.
271 if sys.version_info >= (3, 0):
272 from urllib.request import urlopen
273 from urllib.error import URLError
274 else:
275 from urllib2 import urlopen, URLError
276 try:
277 raw_text = urlopen(source).read()
278 except (URLError, IOError, OSError) as error:
279 raise self.severe(u'Problems with "%s" directive URL "%s":\n%s.'
280 % (self.name, self.options['url'], ErrorString(error)))
281 raw_file = io.StringInput(source=raw_text, source_path=source,
282 encoding=encoding,
283 error_handler=e_handler)
284 try:
285 text = raw_file.read()
286 except UnicodeError as error:
287 raise self.severe(u'Problem with "%s" directive:\n%s'
288 % (self.name, ErrorString(error)))
289 attributes['source'] = source
290 else:
291 # This will always fail because there is no content.
292 self.assert_has_content()
293 raw_node = nodes.raw('', text, classes=self.options.get('class', []),
294 **attributes)
295 (raw_node.source,
296 raw_node.line) = self.state_machine.get_source_and_line(self.lineno)
297 return [raw_node]
300 class Replace(Directive):
302 has_content = True
304 def run(self):
305 if not isinstance(self.state, states.SubstitutionDef):
306 raise self.error(
307 'Invalid context: the "%s" directive can only be used within '
308 'a substitution definition.' % self.name)
309 self.assert_has_content()
310 text = '\n'.join(self.content)
311 element = nodes.Element(text)
312 self.state.nested_parse(self.content, self.content_offset,
313 element)
314 # element might contain [paragraph] + system_message(s)
315 node = None
316 messages = []
317 for elem in element:
318 if not node and isinstance(elem, nodes.paragraph):
319 node = elem
320 elif isinstance(elem, nodes.system_message):
321 elem['backrefs'] = []
322 messages.append(elem)
323 else:
324 return [
325 self.state_machine.reporter.error(
326 'Error in "%s" directive: may contain a single paragraph '
327 'only.' % (self.name), line=self.lineno) ]
328 if node:
329 return messages + node.children
330 return messages
332 class Unicode(Directive):
334 r"""
335 Convert Unicode character codes (numbers) to characters. Codes may be
336 decimal numbers, hexadecimal numbers (prefixed by ``0x``, ``x``, ``\x``,
337 ``U+``, ``u``, or ``\u``; e.g. ``U+262E``), or XML-style numeric character
338 entities (e.g. ``&#x262E;``). Text following ".." is a comment and is
339 ignored. Spaces are ignored, and any other text remains as-is.
342 required_arguments = 1
343 optional_arguments = 0
344 final_argument_whitespace = True
345 option_spec = {'trim': directives.flag,
346 'ltrim': directives.flag,
347 'rtrim': directives.flag}
349 comment_pattern = re.compile(r'( |\n|^)\.\. ')
351 def run(self):
352 if not isinstance(self.state, states.SubstitutionDef):
353 raise self.error(
354 'Invalid context: the "%s" directive can only be used within '
355 'a substitution definition.' % self.name)
356 substitution_definition = self.state_machine.node
357 if 'trim' in self.options:
358 substitution_definition.attributes['ltrim'] = 1
359 substitution_definition.attributes['rtrim'] = 1
360 if 'ltrim' in self.options:
361 substitution_definition.attributes['ltrim'] = 1
362 if 'rtrim' in self.options:
363 substitution_definition.attributes['rtrim'] = 1
364 codes = self.comment_pattern.split(self.arguments[0])[0].split()
365 element = nodes.Element()
366 for code in codes:
367 try:
368 decoded = directives.unicode_code(code)
369 except ValueError as error:
370 raise self.error(u'Invalid character code: %s\n%s'
371 % (code, ErrorString(error)))
372 element += nodes.Text(decoded)
373 return element.children
376 class Class(Directive):
379 Set a "class" attribute on the directive content or the next element.
380 When applied to the next element, a "pending" element is inserted, and a
381 transform does the work later.
384 required_arguments = 1
385 optional_arguments = 0
386 final_argument_whitespace = True
387 has_content = True
389 def run(self):
390 try:
391 class_value = directives.class_option(self.arguments[0])
392 except ValueError:
393 raise self.error(
394 'Invalid class attribute value for "%s" directive: "%s".'
395 % (self.name, self.arguments[0]))
396 node_list = []
397 if self.content:
398 container = nodes.Element()
399 self.state.nested_parse(self.content, self.content_offset,
400 container)
401 for node in container:
402 node['classes'].extend(class_value)
403 node_list.extend(container.children)
404 else:
405 pending = nodes.pending(
406 misc.ClassAttribute,
407 {'class': class_value, 'directive': self.name},
408 self.block_text)
409 self.state_machine.document.note_pending(pending)
410 node_list.append(pending)
411 return node_list
414 class Role(Directive):
416 has_content = True
418 argument_pattern = re.compile(r'(%s)\s*(\(\s*(%s)\s*\)\s*)?$'
419 % ((states.Inliner.simplename,) * 2))
421 def run(self):
422 """Dynamically create and register a custom interpreted text role."""
423 if self.content_offset > self.lineno or not self.content:
424 raise self.error('"%s" directive requires arguments on the first '
425 'line.' % self.name)
426 args = self.content[0]
427 match = self.argument_pattern.match(args)
428 if not match:
429 raise self.error('"%s" directive arguments not valid role names: '
430 '"%s".' % (self.name, args))
431 new_role_name = match.group(1)
432 base_role_name = match.group(3)
433 messages = []
434 if base_role_name:
435 base_role, messages = roles.role(
436 base_role_name, self.state_machine.language, self.lineno,
437 self.state.reporter)
438 if base_role is None:
439 error = self.state.reporter.error(
440 'Unknown interpreted text role "%s".' % base_role_name,
441 nodes.literal_block(self.block_text, self.block_text),
442 line=self.lineno)
443 return messages + [error]
444 else:
445 base_role = roles.generic_custom_role
446 assert not hasattr(base_role, 'arguments'), (
447 'Supplemental directive arguments for "%s" directive not '
448 'supported (specified by "%r" role).' % (self.name, base_role))
449 try:
450 converted_role = convert_directive_function(base_role)
451 (arguments, options, content, content_offset) = (
452 self.state.parse_directive_block(
453 self.content[1:], self.content_offset, converted_role,
454 option_presets={}))
455 except states.MarkupError as detail:
456 error = self.state_machine.reporter.error(
457 'Error in "%s" directive:\n%s.' % (self.name, detail),
458 nodes.literal_block(self.block_text, self.block_text),
459 line=self.lineno)
460 return messages + [error]
461 if 'class' not in options:
462 try:
463 options['class'] = directives.class_option(new_role_name)
464 except ValueError as detail:
465 error = self.state_machine.reporter.error(
466 u'Invalid argument for "%s" directive:\n%s.'
467 % (self.name, SafeString(detail)), nodes.literal_block(
468 self.block_text, self.block_text), line=self.lineno)
469 return messages + [error]
470 role = roles.CustomRole(new_role_name, base_role, options, content)
471 roles.register_local_role(new_role_name, role)
472 return messages
475 class DefaultRole(Directive):
477 """Set the default interpreted text role."""
479 optional_arguments = 1
480 final_argument_whitespace = False
482 def run(self):
483 if not self.arguments:
484 if '' in roles._roles:
485 # restore the "default" default role
486 del roles._roles['']
487 return []
488 role_name = self.arguments[0]
489 role, messages = roles.role(role_name, self.state_machine.language,
490 self.lineno, self.state.reporter)
491 if role is None:
492 error = self.state.reporter.error(
493 'Unknown interpreted text role "%s".' % role_name,
494 nodes.literal_block(self.block_text, self.block_text),
495 line=self.lineno)
496 return messages + [error]
497 roles._roles[''] = role
498 return messages
501 class Title(Directive):
503 required_arguments = 1
504 optional_arguments = 0
505 final_argument_whitespace = True
507 def run(self):
508 self.state_machine.document['title'] = self.arguments[0]
509 return []
512 class MetaBody(states.SpecializedBody):
514 def field_marker(self, match, context, next_state):
515 """Meta element."""
516 node, blank_finish = self.parsemeta(match)
517 self.parent += node
518 return [], next_state, []
520 def parsemeta(self, match):
521 name = self.parse_field_marker(match)
522 name = utils.unescape(utils.escape2null(name))
523 indented, indent, line_offset, blank_finish = \
524 self.state_machine.get_first_known_indented(match.end())
525 node = nodes.meta()
526 node['content'] = utils.unescape(utils.escape2null(
527 ' '.join(indented)))
528 if not indented:
529 line = self.state_machine.line
530 msg = self.reporter.info(
531 'No content for meta tag "%s".' % name,
532 nodes.literal_block(line, line))
533 return msg, blank_finish
534 tokens = name.split()
535 try:
536 attname, val = utils.extract_name_value(tokens[0])[0]
537 node[attname.lower()] = val
538 except utils.NameValueError:
539 node['name'] = tokens[0]
540 for token in tokens[1:]:
541 try:
542 attname, val = utils.extract_name_value(token)[0]
543 node[attname.lower()] = val
544 except utils.NameValueError as detail:
545 line = self.state_machine.line
546 msg = self.reporter.error(
547 'Error parsing meta tag attribute "%s": %s.'
548 % (token, detail), nodes.literal_block(line, line))
549 return msg, blank_finish
550 return node, blank_finish
553 class Meta(Directive):
555 has_content = True
557 SMkwargs = {'state_classes': (MetaBody,)}
559 def run(self):
560 self.assert_has_content()
561 node = nodes.Element()
562 new_line_offset, blank_finish = self.state.nested_list_parse(
563 self.content, self.content_offset, node,
564 initial_state='MetaBody', blank_finish=True,
565 state_machine_kwargs=self.SMkwargs)
566 if (new_line_offset - self.content_offset) != len(self.content):
567 # incomplete parse of block?
568 error = self.state_machine.reporter.error(
569 'Invalid meta directive.',
570 nodes.literal_block(self.block_text, self.block_text),
571 line=self.lineno)
572 node += error
573 # insert at begin of document
574 index = self.state.document.first_child_not_matching_class(
575 (nodes.Titular, nodes.meta)) or 0
576 self.state.document[index:index] = node.children
577 return []
580 class Date(Directive):
582 has_content = True
584 def run(self):
585 if not isinstance(self.state, states.SubstitutionDef):
586 raise self.error(
587 'Invalid context: the "%s" directive can only be used within '
588 'a substitution definition.' % self.name)
589 format_str = '\n'.join(self.content) or '%Y-%m-%d'
590 if sys.version_info< (3, 0):
591 try:
592 format_str = format_str.encode(locale_encoding or 'utf-8')
593 except UnicodeEncodeError:
594 raise self.warning(u'Cannot encode date format string '
595 u'with locale encoding "%s".' % locale_encoding)
596 # @@@
597 # Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable?
598 # Pro: Docutils-generated documentation
599 # can easily be part of `reproducible software builds`__
601 # __ https://reproducible-builds.org/
603 # Con: Changes the specs, hard to predict behaviour,
605 # See also the discussion about \date \time \year in TeX
606 # http://tug.org/pipermail/tex-k/2016-May/002704.html
607 # source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
608 # if (source_date_epoch):
609 # text = time.strftime(format_str,
610 # time.gmtime(int(source_date_epoch)))
611 # else:
612 text = time.strftime(format_str)
613 if sys.version_info< (3, 0):
614 # `text` is a byte string that may contain non-ASCII characters:
615 try:
616 text = text.decode(locale_encoding or 'utf-8')
617 except UnicodeDecodeError:
618 text = text.decode(locale_encoding or 'utf-8', 'replace')
619 raise self.warning(u'Error decoding "%s"'
620 u'with locale encoding "%s".' % (text, locale_encoding))
621 return [nodes.Text(text)]
624 class TestDirective(Directive):
626 """This directive is useful only for testing purposes."""
628 optional_arguments = 1
629 final_argument_whitespace = True
630 option_spec = {'option': directives.unchanged_required}
631 has_content = True
633 def run(self):
634 if self.content:
635 text = '\n'.join(self.content)
636 info = self.state_machine.reporter.info(
637 'Directive processed. Type="%s", arguments=%r, options=%r, '
638 'content:' % (self.name, self.arguments, self.options),
639 nodes.literal_block(text, text), line=self.lineno)
640 else:
641 info = self.state_machine.reporter.info(
642 'Directive processed. Type="%s", arguments=%r, options=%r, '
643 'content: None' % (self.name, self.arguments, self.options),
644 line=self.lineno)
645 return [info]
647 # Old-style, functional definition:
649 # def directive_test_function(name, arguments, options, content, lineno,
650 # content_offset, block_text, state, state_machine):
651 # """This directive is useful only for testing purposes."""
652 # if content:
653 # text = '\n'.join(content)
654 # info = state_machine.reporter.info(
655 # 'Directive processed. Type="%s", arguments=%r, options=%r, '
656 # 'content:' % (name, arguments, options),
657 # nodes.literal_block(text, text), line=lineno)
658 # else:
659 # info = state_machine.reporter.info(
660 # 'Directive processed. Type="%s", arguments=%r, options=%r, '
661 # 'content: None' % (name, arguments, options), line=lineno)
662 # return [info]
664 # directive_test_function.arguments = (0, 1, 1)
665 # directive_test_function.options = {'option': directives.unchanged_required}
666 # directive_test_function.content = 1