3 # Author: David Goodger <goodger@python.org>
4 # Copyright: This module has been placed in the public domain.
7 Miscellaneous utilities for the documentation utilities.
10 __docformat__
= 'reStructuredText'
19 from docutils
import ApplicationError
, DataError
, __version_info__
20 from docutils
import nodes
21 from docutils
.nodes
import unescape
23 from docutils
.utils
.error_reporting
import ErrorOutput
, SafeString
25 if sys
.version_info
>= (3, 0):
29 class SystemMessage(ApplicationError
):
31 def __init__(self
, system_message
, level
):
32 Exception.__init
__(self
, system_message
.astext())
36 class SystemMessagePropagation(ApplicationError
): pass
39 class Reporter(object):
42 Info/warning/error reporter and ``system_message`` element generator.
44 Five levels of system messages are defined, along with corresponding
45 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
47 There is typically one Reporter object per process. A Reporter object is
48 instantiated with thresholds for reporting (generating warnings) and
49 halting processing (raising exceptions), a switch to turn debug output on
50 or off, and an I/O stream for warnings. These are stored as instance
53 When a system message is generated, its level is compared to the stored
54 thresholds, and a warning or error is generated as appropriate. Debug
55 messages are produced if the stored debug switch is on, independently of
56 other thresholds. Message output is sent to the stored warning stream if
59 The Reporter class also employs a modified form of the "Observer" pattern
60 [GoF95]_ to track system messages generated. The `attach_observer` method
61 should be called before parsing, with a bound method or function which
62 accepts system messages. The observer can be removed with
63 `detach_observer`, and another added in its place.
65 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
66 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
70 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
71 """List of names for system message levels, indexed by level."""
73 # system message level constants:
78 SEVERE_LEVEL
) = range(5)
80 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
81 debug
=False, encoding
=None, error_handler
='backslashreplace'):
84 - `source`: The path to or description of the source data.
85 - `report_level`: The level at or above which warning output will
87 - `halt_level`: The level at or above which `SystemMessage`
88 exceptions will be raised, halting execution.
89 - `debug`: Show debug (level=0) system messages?
90 - `stream`: Where warning output is sent. Can be file-like (has a
91 ``.write`` method), a string (file name, opened for writing),
92 '' (empty string) or `False` (for discarding all stream messages)
93 or `None` (implies `sys.stderr`; default).
94 - `encoding`: The output encoding.
95 - `error_handler`: The error handler for stderr output encoding.
99 """The path to or description of the source data."""
101 self
.error_handler
= error_handler
102 """The character encoding error handler."""
104 self
.debug_flag
= debug
105 """Show debug (level=0) system messages?"""
107 self
.report_level
= report_level
108 """The level at or above which warning output will be sent
111 self
.halt_level
= halt_level
112 """The level at or above which `SystemMessage` exceptions
113 will be raised, halting execution."""
115 if not isinstance(stream
, ErrorOutput
):
116 stream
= ErrorOutput(stream
, encoding
, error_handler
)
119 """Where warning output is sent."""
121 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
122 """The output character encoding."""
125 """List of bound methods or functions to call with each system_message
129 """The highest level system message generated so far."""
131 def set_conditions(self
, category
, report_level
, halt_level
,
132 stream
=None, debug
=False):
133 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
134 'set attributes via configuration settings or directly',
135 DeprecationWarning, stacklevel
=2)
136 self
.report_level
= report_level
137 self
.halt_level
= halt_level
138 if not isinstance(stream
, ErrorOutput
):
139 stream
= ErrorOutput(stream
, self
.encoding
, self
.error_handler
)
141 self
.debug_flag
= debug
143 def attach_observer(self
, observer
):
145 The `observer` parameter is a function or bound method which takes one
146 argument, a `nodes.system_message` instance.
148 self
.observers
.append(observer
)
150 def detach_observer(self
, observer
):
151 self
.observers
.remove(observer
)
153 def notify_observers(self
, message
):
154 for observer
in self
.observers
:
157 def system_message(self
, level
, message
, *children
, **kwargs
):
159 Return a system_message object.
161 Raise an exception or generate a warning if appropriate.
163 # `message` can be a `string`, `unicode`, or `Exception` instance.
164 if isinstance(message
, Exception):
165 message
= SafeString(message
)
167 attributes
= kwargs
.copy()
168 if 'base_node' in kwargs
:
169 source
, line
= get_source_line(kwargs
['base_node'])
170 del attributes
['base_node']
171 if source
is not None:
172 attributes
.setdefault('source', source
)
174 attributes
.setdefault('line', line
)
175 # assert source is not None, "node has line- but no source-argument"
176 if not 'source' in attributes
: # 'line' is absolute line number
177 try: # look up (source, line-in-source)
178 source
, line
= self
.get_source_and_line(attributes
.get('line'))
179 except AttributeError:
180 source
, line
= None, None
181 if source
is not None:
182 attributes
['source'] = source
184 attributes
['line'] = line
185 # assert attributes['line'] is not None, (message, kwargs)
186 # assert attributes['source'] is not None, (message, kwargs)
187 attributes
.setdefault('source', self
.source
)
189 msg
= nodes
.system_message(message
, level
=level
,
190 type=self
.levels
[level
],
191 *children
, **attributes
)
192 if self
.stream
and (level
>= self
.report_level
193 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
194 or level
>= self
.halt_level
):
195 self
.stream
.write(msg
.astext() + '\n')
196 if level
>= self
.halt_level
:
197 raise SystemMessage(msg
, level
)
198 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
199 self
.notify_observers(msg
)
200 self
.max_level
= max(level
, self
.max_level
)
203 def debug(self
, *args
, **kwargs
):
205 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
206 effect on the processing. Level-0 system messages are handled
207 separately from the others.
210 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
212 def info(self
, *args
, **kwargs
):
214 Level-1, "INFO": a minor issue that can be ignored. Typically there is
215 no effect on processing, and level-1 system messages are not reported.
217 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
219 def warning(self
, *args
, **kwargs
):
221 Level-2, "WARNING": an issue that should be addressed. If ignored,
222 there may be unpredictable problems with the output.
224 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
226 def error(self
, *args
, **kwargs
):
228 Level-3, "ERROR": an error that should be addressed. If ignored, the
229 output will contain errors.
231 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
233 def severe(self
, *args
, **kwargs
):
235 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
236 the output will contain severe errors. Typically level-4 system
237 messages are turned into exceptions which halt processing.
239 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
242 class ExtensionOptionError(DataError
): pass
243 class BadOptionError(ExtensionOptionError
): pass
244 class BadOptionDataError(ExtensionOptionError
): pass
245 class DuplicateOptionError(ExtensionOptionError
): pass
248 def extract_extension_options(field_list
, options_spec
):
250 Return a dictionary mapping extension option names to converted values.
253 - `field_list`: A flat field list without field arguments, where each
254 field body consists of a single paragraph only.
255 - `options_spec`: Dictionary mapping known option names to a
256 conversion function such as `int` or `float`.
259 - `KeyError` for unknown option names.
260 - `ValueError` for invalid option values (raised by the conversion
262 - `TypeError` for invalid option value types (raised by conversion
264 - `DuplicateOptionError` for duplicate options.
265 - `BadOptionError` for invalid fields.
266 - `BadOptionDataError` for invalid option data (missing name,
267 missing data, bad quotes, etc.).
269 option_list
= extract_options(field_list
)
270 option_dict
= assemble_option_dict(option_list
, options_spec
)
273 def extract_options(field_list
):
275 Return a list of option (name, value) pairs from field names & bodies.
278 `field_list`: A flat field list, where each field name is a single
279 word and each field body consists of a single paragraph only.
282 - `BadOptionError` for invalid fields.
283 - `BadOptionDataError` for invalid option data (missing name,
284 missing data, bad quotes, etc.).
287 for field
in field_list
:
288 if len(field
[0].astext().split()) != 1:
289 raise BadOptionError(
290 'extension option field name may not contain multiple words')
291 name
= str(field
[0].astext().lower())
295 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
296 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
297 raise BadOptionDataError(
298 'extension option field body may contain\n'
299 'a single paragraph only (option "%s")' % name
)
301 data
= body
[0][0].astext()
302 option_list
.append((name
, data
))
305 def assemble_option_dict(option_list
, options_spec
):
307 Return a mapping of option names to values.
310 - `option_list`: A list of (name, value) pairs (the output of
311 `extract_options()`).
312 - `options_spec`: Dictionary mapping known option names to a
313 conversion function such as `int` or `float`.
316 - `KeyError` for unknown option names.
317 - `DuplicateOptionError` for duplicate options.
318 - `ValueError` for invalid option values (raised by conversion
320 - `TypeError` for invalid option value types (raised by conversion
324 for name
, value
in option_list
:
325 convertor
= options_spec
[name
] # raises KeyError if unknown
326 if convertor
is None:
327 raise KeyError(name
) # or if explicitly disabled
329 raise DuplicateOptionError('duplicate option "%s"' % name
)
331 options
[name
] = convertor(value
)
332 except (ValueError, TypeError) as detail
:
333 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
334 % (name
, value
, ' '.join(detail
.args
)))
338 class NameValueError(DataError
): pass
341 def decode_path(path
):
343 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
345 Decode file/path string in a failsave manner if not already done.
347 # see also http://article.gmane.org/gmane.text.docutils.user/2905
348 if isinstance(path
, unicode):
351 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
352 except AttributeError: # default value None has no decode method
354 return nodes
.reprunicode('')
355 raise ValueError('`path` value must be a String or ``None``, not %r'
357 except UnicodeDecodeError:
359 path
= path
.decode('utf-8', 'strict')
360 except UnicodeDecodeError:
361 path
= path
.decode('ascii', 'replace')
362 return nodes
.reprunicode(path
)
365 def extract_name_value(line
):
367 Return a list of (name, value) from a line of the form "name=value ...".
370 `NameValueError` for invalid input (missing name, missing data, bad
375 equals
= line
.find('=')
377 raise NameValueError('missing "="')
378 attname
= line
[:equals
].strip()
379 if equals
== 0 or not attname
:
380 raise NameValueError(
381 'missing attribute name before "="')
382 line
= line
[equals
+1:].lstrip()
384 raise NameValueError(
385 'missing value after "%s="' % attname
)
387 endquote
= line
.find(line
[0], 1)
389 raise NameValueError(
390 'attribute "%s" missing end quote (%s)'
391 % (attname
, line
[0]))
392 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
393 raise NameValueError(
394 'attribute "%s" end quote (%s) not followed by '
395 'whitespace' % (attname
, line
[0]))
396 data
= line
[1:endquote
]
397 line
= line
[endquote
+1:].lstrip()
399 space
= line
.find(' ')
405 line
= line
[space
+1:].lstrip()
406 attlist
.append((attname
.lower(), data
))
409 def new_reporter(source_path
, settings
):
411 Return a new Reporter object.
415 The path to or description of the source text of the document.
416 `settings` : optparse.Values object
420 source_path
, settings
.report_level
, settings
.halt_level
,
421 stream
=settings
.warning_stream
, debug
=settings
.debug
,
422 encoding
=settings
.error_encoding
,
423 error_handler
=settings
.error_encoding_error_handler
)
426 def new_document(source_path
, settings
=None):
428 Return a new empty document object.
431 `source_path` : string
432 The path to or description of the source text of the document.
433 `settings` : optparse.Values object
434 Runtime settings. If none are provided, a default core set will
435 be used. If you will use the document object with any Docutils
436 components, you must provide their default settings as well. For
437 example, if parsing, at least provide the parser settings,
438 obtainable as follows::
440 settings = docutils.frontend.OptionParser(
441 components=(docutils.parsers.rst.Parser,)
442 ).get_default_values()
444 from docutils
import frontend
446 settings
= frontend
.OptionParser().get_default_values()
447 source_path
= decode_path(source_path
)
448 reporter
= new_reporter(source_path
, settings
)
449 document
= nodes
.document(settings
, reporter
, source
=source_path
)
450 document
.note_source(source_path
, -1)
453 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
454 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
455 textnode
= paragraph
[0]
456 for pattern
, substitution
in keyword_substitutions
:
457 match
= pattern
.search(textnode
)
459 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
462 def relative_path(source
, target
):
464 Build and return a path to `target`, relative to `source` (both files).
466 If there is no common prefix, return the absolute path to `target`.
468 source_parts
= os
.path
.abspath(source
or type(target
)('dummy_file')
470 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
471 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
472 if source_parts
[:2] != target_parts
[:2]:
473 # Nothing in common between paths.
474 # Return absolute path, using '/' for URLs:
475 return '/'.join(target_parts
)
476 source_parts
.reverse()
477 target_parts
.reverse()
478 while (source_parts
and target_parts
479 and source_parts
[-1] == target_parts
[-1]):
480 # Remove path components in common:
483 target_parts
.reverse()
484 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
485 return '/'.join(parts
)
487 def get_stylesheet_reference(settings
, relative_to
=None):
489 Retrieve a stylesheet reference from the settings object.
491 Deprecated. Use get_stylesheet_list() instead to
492 enable specification of multiple stylesheets as a comma-separated
495 if settings
.stylesheet_path
:
496 assert not settings
.stylesheet
, (
497 'stylesheet and stylesheet_path are mutually exclusive.')
498 if relative_to
== None:
499 relative_to
= settings
._destination
500 return relative_path(relative_to
, settings
.stylesheet_path
)
502 return settings
.stylesheet
504 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
506 # The original settings arguments are kept unchanged: you can test
507 # with e.g. ``if settings.stylesheet_path:``
509 # Differences to ``get_stylesheet_reference``:
510 # * return value is a list
511 # * no re-writing of the path (and therefore no optional argument)
512 # (if required, use ``utils.relative_path(source, target)``
513 # in the calling script)
514 def get_stylesheet_list(settings
):
516 Retrieve list of stylesheet references from the settings object.
518 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
519 'stylesheet and stylesheet_path are mutually exclusive.')
520 stylesheets
= settings
.stylesheet_path
or settings
.stylesheet
or []
521 # programmatically set default can be string or unicode:
522 if not isinstance(stylesheets
, list):
523 stylesheets
= [path
.strip() for path
in stylesheets
.split(',')]
524 # expand relative paths if found in stylesheet-dirs:
525 return [find_file_in_dirs(path
, settings
.stylesheet_dirs
)
526 for path
in stylesheets
]
528 def find_file_in_dirs(path
, dirs
):
530 Search for `path` in the list of directories `dirs`.
532 Return the first expansion that matches an existing file.
534 if os
.path
.isabs(path
):
540 d
= os
.path
.expanduser(d
)
541 f
= os
.path
.join(d
, path
)
542 if os
.path
.exists(f
):
546 def get_trim_footnote_ref_space(settings
):
548 Return whether or not to trim footnote space.
550 If trim_footnote_reference_space is not None, return it.
552 If trim_footnote_reference_space is None, return False unless the
553 footnote reference style is 'superscript'.
555 if settings
.setdefault('trim_footnote_reference_space', None) is None:
556 return getattr(settings
, 'footnote_references', None) == 'superscript'
558 return settings
.trim_footnote_reference_space
560 def get_source_line(node
):
562 Return the "source" and "line" attributes from the `node` given or from
563 its closest ancestor.
566 if node
.source
or node
.line
:
567 return node
.source
, node
.line
571 def escape2null(text
):
572 """Return a string with escape-backslashes converted to nulls."""
576 found
= text
.find('\\', start
)
578 parts
.append(text
[start
:])
579 return ''.join(parts
)
580 parts
.append(text
[start
:found
])
581 parts
.append('\x00' + text
[found
+1:found
+2])
582 start
= found
+ 2 # skip character after escape
584 # `unescape` definition moved to `nodes` to avoid circular import dependency.
586 def split_escaped_whitespace(text
):
588 Split `text` on escaped whitespace (null+space or null+newline).
589 Return a list of strings.
591 strings
= text
.split('\x00 ')
592 strings
= [string
.split('\x00\n') for string
in strings
]
593 # flatten list of lists of strings to list of strings:
594 return list(itertools
.chain(*strings
))
596 def strip_combining_chars(text
):
597 if isinstance(text
, str) and sys
.version_info
< (3, 0):
599 return u
''.join([c
for c
in text
if not unicodedata
.combining(c
)])
601 def find_combining_chars(text
):
602 """Return indices of all combining chars in Unicode string `text`.
604 >>> from docutils.utils import find_combining_chars
605 >>> find_combining_chars(u'A t̆ab̆lĕ')
609 if isinstance(text
, str) and sys
.version_info
< (3, 0):
611 return [i
for i
,c
in enumerate(text
) if unicodedata
.combining(c
)]
613 def column_indices(text
):
614 """Indices of Unicode string `text` when skipping combining characters.
616 >>> from docutils.utils import column_indices
617 >>> column_indices(u'A t̆ab̆lĕ')
618 [0, 1, 2, 4, 5, 7, 8]
621 # TODO: account for asian wide chars here instead of using dummy
622 # replacements in the tableparser?
623 string_indices
= list(range(len(text
)))
624 for index
in find_combining_chars(text
):
625 string_indices
[index
] = None
626 return [i
for i
in string_indices
if i
is not None]
628 east_asian_widths
= {'W': 2, # Wide
629 'F': 2, # Full-width (wide)
631 'H': 1, # Half-width (narrow)
632 'N': 1, # Neutral (not East Asian, treated as narrow)
633 'A': 1} # Ambiguous (s/b wide in East Asian context,
634 # narrow otherwise, but that doesn't work)
635 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
638 def column_width(text
):
639 """Return the column width of text.
641 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
643 if isinstance(text
, str) and sys
.version_info
< (3, 0):
645 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
647 # correction for combining chars:
648 width
-= len(find_combining_chars(text
))
658 def unique_combinations(items
, n
):
659 """Return `itertools.combinations`."""
660 warnings
.warn('docutils.utils.unique_combinations is deprecated; '
661 'use itertools.combinations directly.',
662 DeprecationWarning, stacklevel
=2)
663 return itertools
.combinations(items
, n
)
665 def normalize_language_tag(tag
):
666 """Return a list of normalized combinations for a `BCP 47` language tag.
670 >>> from docutils.utils import normalize_language_tag
671 >>> normalize_language_tag('de_AT-1901')
672 ['de-at-1901', 'de-at', 'de-1901', 'de']
673 >>> normalize_language_tag('de-CH-x_altquot')
674 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
678 tag
= tag
.lower().replace('-', '_')
679 # split (except singletons, which mark the following tag as non-standard):
680 tag
= re
.sub(r
'_([a-zA-Z0-9])_', r
'_\1-', tag
)
681 subtags
= [subtag
for subtag
in tag
.split('_')]
682 base_tag
= (subtags
.pop(0),)
683 # find all combinations of subtags
685 for n
in range(len(subtags
), 0, -1):
686 for tags
in itertools
.combinations(subtags
, n
):
687 taglist
.append('-'.join(base_tag
+tags
))
692 class DependencyList(object):
695 List of dependencies, with file recording support.
697 Note that the output file is not automatically closed. You have
698 to explicitly call the close() method.
701 def __init__(self
, output_file
=None, dependencies
=[]):
703 Initialize the dependency list, automatically setting the
704 output file to `output_file` (see `set_output()`) and adding
705 all supplied dependencies.
707 self
.set_output(output_file
)
708 for i
in dependencies
:
711 def set_output(self
, output_file
):
713 Set the output file and clear the list of already added
716 `output_file` must be a string. The specified file is
717 immediately overwritten.
719 If output_file is '-', the output will be written to stdout.
720 If it is None, no file output is done when calling add().
724 if output_file
== '-':
728 self
.file = docutils
.io
.FileOutput(destination_path
=of
,
729 encoding
='utf8', autoclose
=False)
733 def add(self
, *filenames
):
735 If the dependency `filename` has not already been added,
736 append it to self.list and print it to self.file if self.file
739 for filename
in filenames
:
740 if not filename
in self
.list:
741 self
.list.append(filename
)
742 if self
.file is not None:
743 self
.file.write(filename
+'\n')
747 Close the output file.
754 output_file
= self
.file.name
755 except AttributeError:
757 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)
760 release_level_abbreviations
= {
766 def version_identifier(version_info
=None):
768 Return a version identifier string built from `version_info`, a
769 `docutils.VersionInfo` namedtuple instance or compatible tuple. If
770 `version_info` is not provided, by default return a version identifier
771 string based on `docutils.__version_info__` (i.e. the current Docutils
774 if version_info
is None:
775 version_info
= __version_info__
776 if version_info
.micro
:
777 micro
= '.%s' % version_info
.micro
781 releaselevel
= release_level_abbreviations
[version_info
.releaselevel
]
782 if version_info
.serial
:
783 serial
= version_info
.serial
787 if version_info
.release
:
791 version
= '%s.%s%s%s%s%s' % (