3 # Author: David Goodger <goodger@python.org>
4 # Copyright: This module has been placed in the public domain.
7 Miscellaneous utilities for the documentation utilities.
10 __docformat__
= 'reStructuredText'
19 from docutils
import ApplicationError
, DataError
, __version_info__
20 from docutils
import nodes
22 from docutils
.utils
.error_reporting
import ErrorOutput
, SafeString
25 class SystemMessage(ApplicationError
):
27 def __init__(self
, system_message
, level
):
28 Exception.__init
__(self
, system_message
.astext())
32 class SystemMessagePropagation(ApplicationError
): pass
38 Info/warning/error reporter and ``system_message`` element generator.
40 Five levels of system messages are defined, along with corresponding
41 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
43 There is typically one Reporter object per process. A Reporter object is
44 instantiated with thresholds for reporting (generating warnings) and
45 halting processing (raising exceptions), a switch to turn debug output on
46 or off, and an I/O stream for warnings. These are stored as instance
49 When a system message is generated, its level is compared to the stored
50 thresholds, and a warning or error is generated as appropriate. Debug
51 messages are produced if the stored debug switch is on, independently of
52 other thresholds. Message output is sent to the stored warning stream if
55 The Reporter class also employs a modified form of the "Observer" pattern
56 [GoF95]_ to track system messages generated. The `attach_observer` method
57 should be called before parsing, with a bound method or function which
58 accepts system messages. The observer can be removed with
59 `detach_observer`, and another added in its place.
61 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
62 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
66 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
67 """List of names for system message levels, indexed by level."""
69 # system message level constants:
74 SEVERE_LEVEL
) = range(5)
76 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
77 debug
=False, encoding
=None, error_handler
='backslashreplace'):
80 - `source`: The path to or description of the source data.
81 - `report_level`: The level at or above which warning output will
83 - `halt_level`: The level at or above which `SystemMessage`
84 exceptions will be raised, halting execution.
85 - `debug`: Show debug (level=0) system messages?
86 - `stream`: Where warning output is sent. Can be file-like (has a
87 ``.write`` method), a string (file name, opened for writing),
88 '' (empty string) or `False` (for discarding all stream messages)
89 or `None` (implies `sys.stderr`; default).
90 - `encoding`: The output encoding.
91 - `error_handler`: The error handler for stderr output encoding.
95 """The path to or description of the source data."""
97 self
.error_handler
= error_handler
98 """The character encoding error handler."""
100 self
.debug_flag
= debug
101 """Show debug (level=0) system messages?"""
103 self
.report_level
= report_level
104 """The level at or above which warning output will be sent
107 self
.halt_level
= halt_level
108 """The level at or above which `SystemMessage` exceptions
109 will be raised, halting execution."""
111 if not isinstance(stream
, ErrorOutput
):
112 stream
= ErrorOutput(stream
, encoding
, error_handler
)
115 """Where warning output is sent."""
117 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
118 """The output character encoding."""
121 """List of bound methods or functions to call with each system_message
125 """The highest level system message generated so far."""
127 def set_conditions(self
, category
, report_level
, halt_level
,
128 stream
=None, debug
=False):
129 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
130 'set attributes via configuration settings or directly',
131 DeprecationWarning, stacklevel
=2)
132 self
.report_level
= report_level
133 self
.halt_level
= halt_level
134 if not isinstance(stream
, ErrorOutput
):
135 stream
= ErrorOutput(stream
, self
.encoding
, self
.error_handler
)
137 self
.debug_flag
= debug
139 def attach_observer(self
, observer
):
141 The `observer` parameter is a function or bound method which takes one
142 argument, a `nodes.system_message` instance.
144 self
.observers
.append(observer
)
146 def detach_observer(self
, observer
):
147 self
.observers
.remove(observer
)
149 def notify_observers(self
, message
):
150 for observer
in self
.observers
:
153 def system_message(self
, level
, message
, *children
, **kwargs
):
155 Return a system_message object.
157 Raise an exception or generate a warning if appropriate.
159 # `message` can be a `string`, `unicode`, or `Exception` instance.
160 if isinstance(message
, Exception):
161 message
= SafeString(message
)
163 attributes
= kwargs
.copy()
164 if 'base_node' in kwargs
:
165 source
, line
= get_source_line(kwargs
['base_node'])
166 del attributes
['base_node']
167 if source
is not None:
168 attributes
.setdefault('source', source
)
170 attributes
.setdefault('line', line
)
171 # assert source is not None, "node has line- but no source-argument"
172 if not 'source' in attributes
: # 'line' is absolute line number
173 try: # look up (source, line-in-source)
174 source
, line
= self
.get_source_and_line(attributes
.get('line'))
175 # print "locator lookup", kwargs.get('line'), "->", source, line
176 except AttributeError:
177 source
, line
= None, None
178 if source
is not None:
179 attributes
['source'] = source
181 attributes
['line'] = line
182 # assert attributes['line'] is not None, (message, kwargs)
183 # assert attributes['source'] is not None, (message, kwargs)
184 attributes
.setdefault('source', self
.source
)
186 msg
= nodes
.system_message(message
, level
=level
,
187 type=self
.levels
[level
],
188 *children
, **attributes
)
189 if self
.stream
and (level
>= self
.report_level
190 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
191 or level
>= self
.halt_level
):
192 self
.stream
.write(msg
.astext() + '\n')
193 if level
>= self
.halt_level
:
194 raise SystemMessage(msg
, level
)
195 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
196 self
.notify_observers(msg
)
197 self
.max_level
= max(level
, self
.max_level
)
200 def debug(self
, *args
, **kwargs
):
202 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
203 effect on the processing. Level-0 system messages are handled
204 separately from the others.
207 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
209 def info(self
, *args
, **kwargs
):
211 Level-1, "INFO": a minor issue that can be ignored. Typically there is
212 no effect on processing, and level-1 system messages are not reported.
214 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
216 def warning(self
, *args
, **kwargs
):
218 Level-2, "WARNING": an issue that should be addressed. If ignored,
219 there may be unpredictable problems with the output.
221 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
223 def error(self
, *args
, **kwargs
):
225 Level-3, "ERROR": an error that should be addressed. If ignored, the
226 output will contain errors.
228 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
230 def severe(self
, *args
, **kwargs
):
232 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
233 the output will contain severe errors. Typically level-4 system
234 messages are turned into exceptions which halt processing.
236 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
239 class ExtensionOptionError(DataError
): pass
240 class BadOptionError(ExtensionOptionError
): pass
241 class BadOptionDataError(ExtensionOptionError
): pass
242 class DuplicateOptionError(ExtensionOptionError
): pass
245 def extract_extension_options(field_list
, options_spec
):
247 Return a dictionary mapping extension option names to converted values.
250 - `field_list`: A flat field list without field arguments, where each
251 field body consists of a single paragraph only.
252 - `options_spec`: Dictionary mapping known option names to a
253 conversion function such as `int` or `float`.
256 - `KeyError` for unknown option names.
257 - `ValueError` for invalid option values (raised by the conversion
259 - `TypeError` for invalid option value types (raised by conversion
261 - `DuplicateOptionError` for duplicate options.
262 - `BadOptionError` for invalid fields.
263 - `BadOptionDataError` for invalid option data (missing name,
264 missing data, bad quotes, etc.).
266 option_list
= extract_options(field_list
)
267 option_dict
= assemble_option_dict(option_list
, options_spec
)
270 def extract_options(field_list
):
272 Return a list of option (name, value) pairs from field names & bodies.
275 `field_list`: A flat field list, where each field name is a single
276 word and each field body consists of a single paragraph only.
279 - `BadOptionError` for invalid fields.
280 - `BadOptionDataError` for invalid option data (missing name,
281 missing data, bad quotes, etc.).
284 for field
in field_list
:
285 if len(field
[0].astext().split()) != 1:
286 raise BadOptionError(
287 'extension option field name may not contain multiple words')
288 name
= str(field
[0].astext().lower())
292 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
293 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
294 raise BadOptionDataError(
295 'extension option field body may contain\n'
296 'a single paragraph only (option "%s")' % name
)
298 data
= body
[0][0].astext()
299 option_list
.append((name
, data
))
302 def assemble_option_dict(option_list
, options_spec
):
304 Return a mapping of option names to values.
307 - `option_list`: A list of (name, value) pairs (the output of
308 `extract_options()`).
309 - `options_spec`: Dictionary mapping known option names to a
310 conversion function such as `int` or `float`.
313 - `KeyError` for unknown option names.
314 - `DuplicateOptionError` for duplicate options.
315 - `ValueError` for invalid option values (raised by conversion
317 - `TypeError` for invalid option value types (raised by conversion
321 for name
, value
in option_list
:
322 convertor
= options_spec
[name
] # raises KeyError if unknown
323 if convertor
is None:
324 raise KeyError(name
) # or if explicitly disabled
326 raise DuplicateOptionError('duplicate option "%s"' % name
)
328 options
[name
] = convertor(value
)
329 except (ValueError, TypeError), detail
:
330 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
331 % (name
, value
, ' '.join(detail
.args
)))
335 class NameValueError(DataError
): pass
338 def decode_path(path
):
340 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
342 Decode file/path string in a failsave manner if not already done.
344 # see also http://article.gmane.org/gmane.text.docutils.user/2905
345 if isinstance(path
, unicode):
348 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
349 except AttributeError: # default value None has no decode method
350 return nodes
.reprunicode(path
)
351 except UnicodeDecodeError:
353 path
= path
.decode('utf-8', 'strict')
354 except UnicodeDecodeError:
355 path
= path
.decode('ascii', 'replace')
356 return nodes
.reprunicode(path
)
359 def extract_name_value(line
):
361 Return a list of (name, value) from a line of the form "name=value ...".
364 `NameValueError` for invalid input (missing name, missing data, bad
369 equals
= line
.find('=')
371 raise NameValueError('missing "="')
372 attname
= line
[:equals
].strip()
373 if equals
== 0 or not attname
:
374 raise NameValueError(
375 'missing attribute name before "="')
376 line
= line
[equals
+1:].lstrip()
378 raise NameValueError(
379 'missing value after "%s="' % attname
)
381 endquote
= line
.find(line
[0], 1)
383 raise NameValueError(
384 'attribute "%s" missing end quote (%s)'
385 % (attname
, line
[0]))
386 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
387 raise NameValueError(
388 'attribute "%s" end quote (%s) not followed by '
389 'whitespace' % (attname
, line
[0]))
390 data
= line
[1:endquote
]
391 line
= line
[endquote
+1:].lstrip()
393 space
= line
.find(' ')
399 line
= line
[space
+1:].lstrip()
400 attlist
.append((attname
.lower(), data
))
403 def new_reporter(source_path
, settings
):
405 Return a new Reporter object.
409 The path to or description of the source text of the document.
410 `settings` : optparse.Values object
414 source_path
, settings
.report_level
, settings
.halt_level
,
415 stream
=settings
.warning_stream
, debug
=settings
.debug
,
416 encoding
=settings
.error_encoding
,
417 error_handler
=settings
.error_encoding_error_handler
)
420 def new_document(source_path
, settings
=None):
422 Return a new empty document object.
425 `source_path` : string
426 The path to or description of the source text of the document.
427 `settings` : optparse.Values object
428 Runtime settings. If none are provided, a default core set will
429 be used. If you will use the document object with any Docutils
430 components, you must provide their default settings as well. For
431 example, if parsing, at least provide the parser settings,
432 obtainable as follows::
434 settings = docutils.frontend.OptionParser(
435 components=(docutils.parsers.rst.Parser,)
436 ).get_default_values()
438 from docutils
import frontend
440 settings
= frontend
.OptionParser().get_default_values()
441 source_path
= decode_path(source_path
)
442 reporter
= new_reporter(source_path
, settings
)
443 document
= nodes
.document(settings
, reporter
, source
=source_path
)
444 document
.note_source(source_path
, -1)
447 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
448 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
449 textnode
= paragraph
[0]
450 for pattern
, substitution
in keyword_substitutions
:
451 match
= pattern
.search(textnode
)
453 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
456 def relative_path(source
, target
):
458 Build and return a path to `target`, relative to `source` (both files).
460 If there is no common prefix, return the absolute path to `target`.
462 source_parts
= os
.path
.abspath(source
or type(target
)('dummy_file')
464 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
465 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
466 if source_parts
[:2] != target_parts
[:2]:
467 # Nothing in common between paths.
468 # Return absolute path, using '/' for URLs:
469 return '/'.join(target_parts
)
470 source_parts
.reverse()
471 target_parts
.reverse()
472 while (source_parts
and target_parts
473 and source_parts
[-1] == target_parts
[-1]):
474 # Remove path components in common:
477 target_parts
.reverse()
478 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
479 return '/'.join(parts
)
481 def get_stylesheet_reference(settings
, relative_to
=None):
483 Retrieve a stylesheet reference from the settings object.
485 Deprecated. Use get_stylesheet_list() instead to
486 enable specification of multiple stylesheets as a comma-separated
489 if settings
.stylesheet_path
:
490 assert not settings
.stylesheet
, (
491 'stylesheet and stylesheet_path are mutually exclusive.')
492 if relative_to
== None:
493 relative_to
= settings
._destination
494 return relative_path(relative_to
, settings
.stylesheet_path
)
496 return settings
.stylesheet
498 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
500 # The original settings arguments are kept unchanged: you can test
501 # with e.g. ``if settings.stylesheet_path:``
503 # Differences to ``get_stylesheet_reference``:
504 # * return value is a list
505 # * no re-writing of the path (and therefore no optional argument)
506 # (if required, use ``utils.relative_path(source, target)``
507 # in the calling script)
508 def get_stylesheet_list(settings
):
510 Retrieve list of stylesheet references from the settings object.
512 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
513 'stylesheet and stylesheet_path are mutually exclusive.')
514 stylesheets
= settings
.stylesheet_path
or settings
.stylesheet
or []
515 # programmatically set default can be string or unicode:
516 if not isinstance(stylesheets
, list):
517 stylesheets
= [path
.strip() for path
in stylesheets
.split(',')]
518 # expand relative paths if found in stylesheet-dirs:
519 return [find_file_in_dirs(path
, settings
.stylesheet_dirs
)
520 for path
in stylesheets
]
522 def find_file_in_dirs(path
, dirs
):
524 Search for `path` in the list of directories `dirs`.
526 Return the first expansion that matches an existing file.
528 if os
.path
.isabs(path
):
534 d
= os
.path
.expanduser(d
)
535 f
= os
.path
.join(d
, path
)
536 if os
.path
.exists(f
):
540 def get_trim_footnote_ref_space(settings
):
542 Return whether or not to trim footnote space.
544 If trim_footnote_reference_space is not None, return it.
546 If trim_footnote_reference_space is None, return False unless the
547 footnote reference style is 'superscript'.
549 if settings
.trim_footnote_reference_space
is None:
550 return hasattr(settings
, 'footnote_references') and \
551 settings
.footnote_references
== 'superscript'
553 return settings
.trim_footnote_reference_space
555 def get_source_line(node
):
557 Return the "source" and "line" attributes from the `node` given or from
558 its closest ancestor.
561 if node
.source
or node
.line
:
562 return node
.source
, node
.line
566 def escape2null(text
):
567 """Return a string with escape-backslashes converted to nulls."""
571 found
= text
.find('\\', start
)
573 parts
.append(text
[start
:])
574 return ''.join(parts
)
575 parts
.append(text
[start
:found
])
576 parts
.append('\x00' + text
[found
+1:found
+2])
577 start
= found
+ 2 # skip character after escape
579 def unescape(text
, restore_backslashes
=False, respect_whitespace
=False):
581 Return a string with nulls removed or restored to backslashes.
582 Backslash-escaped spaces are also removed.
584 if restore_backslashes
:
585 return text
.replace('\x00', '\\')
587 for sep
in ['\x00 ', '\x00\n', '\x00']:
588 text
= ''.join(text
.split(sep
))
591 def split_escaped_whitespace(text
):
593 Split `text` on escaped whitespace (null+space or null+newline).
594 Return a list of strings.
596 strings
= text
.split('\x00 ')
597 strings
= [string
.split('\x00\n') for string
in strings
]
598 # flatten list of lists of strings to list of strings:
599 return list(itertools
.chain(*strings
))
601 def strip_combining_chars(text
):
602 if isinstance(text
, str) and sys
.version_info
< (3,0):
604 return u
''.join([c
for c
in text
if not unicodedata
.combining(c
)])
606 def find_combining_chars(text
):
607 """Return indices of all combining chars in Unicode string `text`.
609 >>> from docutils.utils import find_combining_chars
610 >>> find_combining_chars(u'A t̆ab̆lĕ')
614 if isinstance(text
, str) and sys
.version_info
< (3,0):
616 return [i
for i
,c
in enumerate(text
) if unicodedata
.combining(c
)]
618 def column_indices(text
):
619 """Indices of Unicode string `text` when skipping combining characters.
621 >>> from docutils.utils import column_indices
622 >>> column_indices(u'A t̆ab̆lĕ')
623 [0, 1, 2, 4, 5, 7, 8]
626 # TODO: account for asian wide chars here instead of using dummy
627 # replacements in the tableparser?
628 string_indices
= range(len(text
))
629 for index
in find_combining_chars(text
):
630 string_indices
[index
] = None
631 return [i
for i
in string_indices
if i
is not None]
633 east_asian_widths
= {'W': 2, # Wide
634 'F': 2, # Full-width (wide)
636 'H': 1, # Half-width (narrow)
637 'N': 1, # Neutral (not East Asian, treated as narrow)
638 'A': 1} # Ambiguous (s/b wide in East Asian context,
639 # narrow otherwise, but that doesn't work)
640 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
643 def column_width(text
):
644 """Return the column width of text.
646 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
648 if isinstance(text
, str) and sys
.version_info
< (3,0):
650 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
652 # correction for combining chars:
653 width
-= len(find_combining_chars(text
))
663 def unique_combinations(items
, n
):
664 """Return `itertools.combinations`."""
665 warnings
.warn('docutils.utils.unique_combinations is deprecated; '
666 'use itertools.combinations directly.',
667 DeprecationWarning, stacklevel
=2)
668 return itertools
.combinations(items
, n
)
670 def normalize_language_tag(tag
):
671 """Return a list of normalized combinations for a `BCP 47` language tag.
675 >>> from docutils.utils import normalize_language_tag
676 >>> normalize_language_tag('de_AT-1901')
677 ['de-at-1901', 'de-at', 'de-1901', 'de']
678 >>> normalize_language_tag('de-CH-x_altquot')
679 ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
683 tag
= tag
.lower().replace('-','_')
684 # split (except singletons, which mark the following tag as non-standard):
685 tag
= re
.sub(r
'_([a-zA-Z0-9])_', r
'_\1-', tag
)
686 subtags
= [subtag
for subtag
in tag
.split('_')]
687 base_tag
= (subtags
.pop(0),)
688 # find all combinations of subtags
690 for n
in range(len(subtags
), 0, -1):
691 # for tags in unique_combinations(subtags, n):
692 for tags
in itertools
.combinations(subtags
, n
):
693 taglist
.append('-'.join(base_tag
+tags
))
698 class DependencyList(object):
701 List of dependencies, with file recording support.
703 Note that the output file is not automatically closed. You have
704 to explicitly call the close() method.
707 def __init__(self
, output_file
=None, dependencies
=[]):
709 Initialize the dependency list, automatically setting the
710 output file to `output_file` (see `set_output()`) and adding
711 all supplied dependencies.
713 self
.set_output(output_file
)
714 for i
in dependencies
:
717 def set_output(self
, output_file
):
719 Set the output file and clear the list of already added
722 `output_file` must be a string. The specified file is
723 immediately overwritten.
725 If output_file is '-', the output will be written to stdout.
726 If it is None, no file output is done when calling add().
730 if output_file
== '-':
734 self
.file = docutils
.io
.FileOutput(destination_path
=of
,
735 encoding
='utf8', autoclose
=False)
739 def add(self
, *filenames
):
741 If the dependency `filename` has not already been added,
742 append it to self.list and print it to self.file if self.file
745 for filename
in filenames
:
746 if not filename
in self
.list:
747 self
.list.append(filename
)
748 if self
.file is not None:
749 self
.file.write(filename
+'\n')
753 Close the output file.
760 output_file
= self
.file.name
761 except AttributeError:
763 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)
766 release_level_abbreviations
= {
772 def version_identifier(version_info
=None):
774 Return a version identifier string built from `version_info`, a
775 `docutils.VersionInfo` namedtuple instance or compatible tuple. If
776 `version_info` is not provided, by default return a version identifier
777 string based on `docutils.__version_info__` (i.e. the current Docutils
780 if version_info
is None:
781 version_info
= __version_info__
782 if version_info
.micro
:
783 micro
= '.%s' % version_info
.micro
787 releaselevel
= release_level_abbreviations
[version_info
.releaselevel
]
788 if version_info
.serial
:
789 serial
= version_info
.serial
793 if version_info
.release
:
797 version
= '%s.%s%s%s%s%s' % (