3 # Author: David Goodger <goodger@python.org>
4 # Copyright: This module has been placed in the public domain.
7 Miscellaneous utilities for the documentation utilities.
10 __docformat__
= 'reStructuredText'
18 from docutils
import ApplicationError
, DataError
19 from docutils
import nodes
20 from docutils
.io
import FileOutput
21 from docutils
.utils
.error_reporting
import ErrorOutput
, SafeString
24 class SystemMessage(ApplicationError
):
26 def __init__(self
, system_message
, level
):
27 Exception.__init
__(self
, system_message
.astext())
31 class SystemMessagePropagation(ApplicationError
): pass
37 Info/warning/error reporter and ``system_message`` element generator.
39 Five levels of system messages are defined, along with corresponding
40 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
42 There is typically one Reporter object per process. A Reporter object is
43 instantiated with thresholds for reporting (generating warnings) and
44 halting processing (raising exceptions), a switch to turn debug output on
45 or off, and an I/O stream for warnings. These are stored as instance
48 When a system message is generated, its level is compared to the stored
49 thresholds, and a warning or error is generated as appropriate. Debug
50 messages are produced if the stored debug switch is on, independently of
51 other thresholds. Message output is sent to the stored warning stream if
54 The Reporter class also employs a modified form of the "Observer" pattern
55 [GoF95]_ to track system messages generated. The `attach_observer` method
56 should be called before parsing, with a bound method or function which
57 accepts system messages. The observer can be removed with
58 `detach_observer`, and another added in its place.
60 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
61 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
65 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
66 """List of names for system message levels, indexed by level."""
68 # system message level constants:
73 SEVERE_LEVEL
) = range(5)
75 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
76 debug
=False, encoding
=None, error_handler
='backslashreplace'):
79 - `source`: The path to or description of the source data.
80 - `report_level`: The level at or above which warning output will
82 - `halt_level`: The level at or above which `SystemMessage`
83 exceptions will be raised, halting execution.
84 - `debug`: Show debug (level=0) system messages?
85 - `stream`: Where warning output is sent. Can be file-like (has a
86 ``.write`` method), a string (file name, opened for writing),
87 '' (empty string) or `False` (for discarding all stream messages)
88 or `None` (implies `sys.stderr`; default).
89 - `encoding`: The output encoding.
90 - `error_handler`: The error handler for stderr output encoding.
94 """The path to or description of the source data."""
96 self
.error_handler
= error_handler
97 """The character encoding error handler."""
99 self
.debug_flag
= debug
100 """Show debug (level=0) system messages?"""
102 self
.report_level
= report_level
103 """The level at or above which warning output will be sent
106 self
.halt_level
= halt_level
107 """The level at or above which `SystemMessage` exceptions
108 will be raised, halting execution."""
110 if not isinstance(stream
, ErrorOutput
):
111 stream
= ErrorOutput(stream
, encoding
, error_handler
)
114 """Where warning output is sent."""
116 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
117 """The output character encoding."""
120 """List of bound methods or functions to call with each system_message
124 """The highest level system message generated so far."""
126 def set_conditions(self
, category
, report_level
, halt_level
,
127 stream
=None, debug
=False):
128 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
129 'set attributes via configuration settings or directly',
130 DeprecationWarning, stacklevel
=2)
131 self
.report_level
= report_level
132 self
.halt_level
= halt_level
133 if not isinstance(stream
, ErrorOutput
):
134 stream
= ErrorOutput(stream
, self
.encoding
, self
.error_handler
)
136 self
.debug_flag
= debug
138 def attach_observer(self
, observer
):
140 The `observer` parameter is a function or bound method which takes one
141 argument, a `nodes.system_message` instance.
143 self
.observers
.append(observer
)
145 def detach_observer(self
, observer
):
146 self
.observers
.remove(observer
)
148 def notify_observers(self
, message
):
149 for observer
in self
.observers
:
152 def system_message(self
, level
, message
, *children
, **kwargs
):
154 Return a system_message object.
156 Raise an exception or generate a warning if appropriate.
158 # `message` can be a `string`, `unicode`, or `Exception` instance.
159 if isinstance(message
, Exception):
160 message
= SafeString(message
)
162 attributes
= kwargs
.copy()
163 if 'base_node' in kwargs
:
164 source
, line
= get_source_line(kwargs
['base_node'])
165 del attributes
['base_node']
166 if source
is not None:
167 attributes
.setdefault('source', source
)
169 attributes
.setdefault('line', line
)
170 # assert source is not None, "node has line- but no source-argument"
171 if not 'source' in attributes
: # 'line' is absolute line number
172 try: # look up (source, line-in-source)
173 source
, line
= self
.get_source_and_line(attributes
.get('line'))
174 # print "locator lookup", kwargs.get('line'), "->", source, line
175 except AttributeError:
176 source
, line
= None, None
177 if source
is not None:
178 attributes
['source'] = source
180 attributes
['line'] = line
181 # assert attributes['line'] is not None, (message, kwargs)
182 # assert attributes['source'] is not None, (message, kwargs)
183 attributes
.setdefault('source', self
.source
)
185 msg
= nodes
.system_message(message
, level
=level
,
186 type=self
.levels
[level
],
187 *children
, **attributes
)
188 if self
.stream
and (level
>= self
.report_level
189 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
190 or level
>= self
.halt_level
):
191 self
.stream
.write(msg
.astext() + '\n')
192 if level
>= self
.halt_level
:
193 raise SystemMessage(msg
, level
)
194 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
195 self
.notify_observers(msg
)
196 self
.max_level
= max(level
, self
.max_level
)
199 def debug(self
, *args
, **kwargs
):
201 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
202 effect on the processing. Level-0 system messages are handled
203 separately from the others.
206 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
208 def info(self
, *args
, **kwargs
):
210 Level-1, "INFO": a minor issue that can be ignored. Typically there is
211 no effect on processing, and level-1 system messages are not reported.
213 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
215 def warning(self
, *args
, **kwargs
):
217 Level-2, "WARNING": an issue that should be addressed. If ignored,
218 there may be unpredictable problems with the output.
220 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
222 def error(self
, *args
, **kwargs
):
224 Level-3, "ERROR": an error that should be addressed. If ignored, the
225 output will contain errors.
227 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
229 def severe(self
, *args
, **kwargs
):
231 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
232 the output will contain severe errors. Typically level-4 system
233 messages are turned into exceptions which halt processing.
235 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
238 class ExtensionOptionError(DataError
): pass
239 class BadOptionError(ExtensionOptionError
): pass
240 class BadOptionDataError(ExtensionOptionError
): pass
241 class DuplicateOptionError(ExtensionOptionError
): pass
244 def extract_extension_options(field_list
, options_spec
):
246 Return a dictionary mapping extension option names to converted values.
249 - `field_list`: A flat field list without field arguments, where each
250 field body consists of a single paragraph only.
251 - `options_spec`: Dictionary mapping known option names to a
252 conversion function such as `int` or `float`.
255 - `KeyError` for unknown option names.
256 - `ValueError` for invalid option values (raised by the conversion
258 - `TypeError` for invalid option value types (raised by conversion
260 - `DuplicateOptionError` for duplicate options.
261 - `BadOptionError` for invalid fields.
262 - `BadOptionDataError` for invalid option data (missing name,
263 missing data, bad quotes, etc.).
265 option_list
= extract_options(field_list
)
266 option_dict
= assemble_option_dict(option_list
, options_spec
)
269 def extract_options(field_list
):
271 Return a list of option (name, value) pairs from field names & bodies.
274 `field_list`: A flat field list, where each field name is a single
275 word and each field body consists of a single paragraph only.
278 - `BadOptionError` for invalid fields.
279 - `BadOptionDataError` for invalid option data (missing name,
280 missing data, bad quotes, etc.).
283 for field
in field_list
:
284 if len(field
[0].astext().split()) != 1:
285 raise BadOptionError(
286 'extension option field name may not contain multiple words')
287 name
= str(field
[0].astext().lower())
291 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
292 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
293 raise BadOptionDataError(
294 'extension option field body may contain\n'
295 'a single paragraph only (option "%s")' % name
)
297 data
= body
[0][0].astext()
298 option_list
.append((name
, data
))
301 def assemble_option_dict(option_list
, options_spec
):
303 Return a mapping of option names to values.
306 - `option_list`: A list of (name, value) pairs (the output of
307 `extract_options()`).
308 - `options_spec`: Dictionary mapping known option names to a
309 conversion function such as `int` or `float`.
312 - `KeyError` for unknown option names.
313 - `DuplicateOptionError` for duplicate options.
314 - `ValueError` for invalid option values (raised by conversion
316 - `TypeError` for invalid option value types (raised by conversion
320 for name
, value
in option_list
:
321 convertor
= options_spec
[name
] # raises KeyError if unknown
322 if convertor
is None:
323 raise KeyError(name
) # or if explicitly disabled
325 raise DuplicateOptionError('duplicate option "%s"' % name
)
327 options
[name
] = convertor(value
)
328 except (ValueError, TypeError), detail
:
329 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
330 % (name
, value
, ' '.join(detail
.args
)))
334 class NameValueError(DataError
): pass
337 def decode_path(path
):
339 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
341 Decode file/path string in a failsave manner if not already done.
343 # see also http://article.gmane.org/gmane.text.docutils.user/2905
344 if isinstance(path
, unicode):
347 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
348 except AttributeError: # default value None has no decode method
349 return nodes
.reprunicode(path
)
350 except UnicodeDecodeError:
352 path
= path
.decode('utf-8', 'strict')
353 except UnicodeDecodeError:
354 path
= path
.decode('ascii', 'replace')
355 return nodes
.reprunicode(path
)
358 def extract_name_value(line
):
360 Return a list of (name, value) from a line of the form "name=value ...".
363 `NameValueError` for invalid input (missing name, missing data, bad
368 equals
= line
.find('=')
370 raise NameValueError('missing "="')
371 attname
= line
[:equals
].strip()
372 if equals
== 0 or not attname
:
373 raise NameValueError(
374 'missing attribute name before "="')
375 line
= line
[equals
+1:].lstrip()
377 raise NameValueError(
378 'missing value after "%s="' % attname
)
380 endquote
= line
.find(line
[0], 1)
382 raise NameValueError(
383 'attribute "%s" missing end quote (%s)'
384 % (attname
, line
[0]))
385 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
386 raise NameValueError(
387 'attribute "%s" end quote (%s) not followed by '
388 'whitespace' % (attname
, line
[0]))
389 data
= line
[1:endquote
]
390 line
= line
[endquote
+1:].lstrip()
392 space
= line
.find(' ')
398 line
= line
[space
+1:].lstrip()
399 attlist
.append((attname
.lower(), data
))
402 def new_reporter(source_path
, settings
):
404 Return a new Reporter object.
408 The path to or description of the source text of the document.
409 `settings` : optparse.Values object
413 source_path
, settings
.report_level
, settings
.halt_level
,
414 stream
=settings
.warning_stream
, debug
=settings
.debug
,
415 encoding
=settings
.error_encoding
,
416 error_handler
=settings
.error_encoding_error_handler
)
419 def new_document(source_path
, settings
=None):
421 Return a new empty document object.
424 `source_path` : string
425 The path to or description of the source text of the document.
426 `settings` : optparse.Values object
427 Runtime settings. If none are provided, a default core set will
428 be used. If you will use the document object with any Docutils
429 components, you must provide their default settings as well. For
430 example, if parsing, at least provide the parser settings,
431 obtainable as follows::
433 settings = docutils.frontend.OptionParser(
434 components=(docutils.parsers.rst.Parser,)
435 ).get_default_values()
437 from docutils
import frontend
439 settings
= frontend
.OptionParser().get_default_values()
440 source_path
= decode_path(source_path
)
441 reporter
= new_reporter(source_path
, settings
)
442 document
= nodes
.document(settings
, reporter
, source
=source_path
)
443 document
.note_source(source_path
, -1)
446 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
447 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
448 textnode
= paragraph
[0]
449 for pattern
, substitution
in keyword_substitutions
:
450 match
= pattern
.search(textnode
)
452 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
455 def relative_path(source
, target
):
457 Build and return a path to `target`, relative to `source` (both files).
459 If there is no common prefix, return the absolute path to `target`.
461 source_parts
= os
.path
.abspath(source
or type(target
)('dummy_file')
463 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
464 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
465 if source_parts
[:2] != target_parts
[:2]:
466 # Nothing in common between paths.
467 # Return absolute path, using '/' for URLs:
468 return '/'.join(target_parts
)
469 source_parts
.reverse()
470 target_parts
.reverse()
471 while (source_parts
and target_parts
472 and source_parts
[-1] == target_parts
[-1]):
473 # Remove path components in common:
476 target_parts
.reverse()
477 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
478 return '/'.join(parts
)
480 def get_stylesheet_reference(settings
, relative_to
=None):
482 Retrieve a stylesheet reference from the settings object.
484 Deprecated. Use get_stylesheet_list() instead to
485 enable specification of multiple stylesheets as a comma-separated
488 if settings
.stylesheet_path
:
489 assert not settings
.stylesheet
, (
490 'stylesheet and stylesheet_path are mutually exclusive.')
491 if relative_to
== None:
492 relative_to
= settings
._destination
493 return relative_path(relative_to
, settings
.stylesheet_path
)
495 return settings
.stylesheet
497 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
499 # The original settings arguments are kept unchanged: you can test
500 # with e.g. ``if settings.stylesheet_path:``
502 # Differences to ``get_stylesheet_reference``:
503 # * return value is a list
504 # * no re-writing of the path (and therefore no optional argument)
505 # (if required, use ``utils.relative_path(source, target)``
506 # in the calling script)
507 def get_stylesheet_list(settings
):
509 Retrieve list of stylesheet references from the settings object.
511 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
512 'stylesheet and stylesheet_path are mutually exclusive.')
513 stylesheets
= settings
.stylesheet_path
or settings
.stylesheet
or []
514 # programmatically set default can be string or unicode:
515 if not isinstance(stylesheets
, list):
516 stylesheets
= [cls
.strip() for cls
in stylesheets
.split(',')]
519 def get_trim_footnote_ref_space(settings
):
521 Return whether or not to trim footnote space.
523 If trim_footnote_reference_space is not None, return it.
525 If trim_footnote_reference_space is None, return False unless the
526 footnote reference style is 'superscript'.
528 if settings
.trim_footnote_reference_space
is None:
529 return hasattr(settings
, 'footnote_references') and \
530 settings
.footnote_references
== 'superscript'
532 return settings
.trim_footnote_reference_space
534 def get_source_line(node
):
536 Return the "source" and "line" attributes from the `node` given or from
537 its closest ancestor.
540 if node
.source
or node
.line
:
541 return node
.source
, node
.line
545 def escape2null(text
):
546 """Return a string with escape-backslashes converted to nulls."""
550 found
= text
.find('\\', start
)
552 parts
.append(text
[start
:])
553 return ''.join(parts
)
554 parts
.append(text
[start
:found
])
555 parts
.append('\x00' + text
[found
+1:found
+2])
556 start
= found
+ 2 # skip character after escape
558 def unescape(text
, restore_backslashes
=False):
560 Return a string with nulls removed or restored to backslashes.
561 Backslash-escaped spaces are also removed.
563 if restore_backslashes
:
564 return text
.replace('\x00', '\\')
566 for sep
in ['\x00 ', '\x00\n', '\x00']:
567 text
= ''.join(text
.split(sep
))
570 def strip_combining_chars(text
):
571 if isinstance(text
, str) and sys
.version_info
< (3,0):
573 return u
''.join([c
for c
in text
if not unicodedata
.combining(c
)])
575 def find_combining_chars(text
):
576 """Return indices of all combining chars in Unicode string `text`.
578 >>> find_combining_chars(u'A t̆ab̆lĕ')
581 if isinstance(text
, str) and sys
.version_info
< (3,0):
583 return [i
for i
,c
in enumerate(text
) if unicodedata
.combining(c
)]
585 def column_indices(text
):
586 """Indices of Unicode string `text` when skipping combining characters.
588 >>> column_indices(u'A t̆ab̆lĕ')
589 [0, 1, 2, 4, 5, 7, 8]
591 # TODO: account for asian wide chars here instead of using dummy
592 # replacements in the tableparser?
593 string_indices
= range(len(text
))
594 for index
in find_combining_chars(text
):
595 string_indices
[index
] = None
596 return [i
for i
in string_indices
if i
is not None]
598 east_asian_widths
= {'W': 2, # Wide
599 'F': 2, # Full-width (wide)
601 'H': 1, # Half-width (narrow)
602 'N': 1, # Neutral (not East Asian, treated as narrow)
603 'A': 1} # Ambiguous (s/b wide in East Asian context,
604 # narrow otherwise, but that doesn't work)
605 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
608 def column_width(text
):
609 """Return the column width of text.
611 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
613 if isinstance(text
, str) and sys
.version_info
< (3,0):
616 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
618 except AttributeError: # east_asian_width() New in version 2.4.
620 # correction for combining chars:
621 width
-= len(find_combining_chars(text
))
631 # by Li Daobing http://code.activestate.com/recipes/190465/
632 # since Python 2.6 there is also itertools.combinations()
633 def unique_combinations(items
, n
):
634 """Return n-length tuples, in sorted order, no repeated elements"""
637 for i
in xrange(len(items
)-n
+1):
638 for cc
in unique_combinations(items
[i
+1:],n
-1):
641 def normalize_language_tag(tag
):
642 """Return a list of normalized combinations for a `BCP 47` language tag.
646 >>> normalize_language_tag('de_AT-1901')
647 ['de-at-1901', 'de-at', 'de-1901', 'de']
650 tag
= tag
.lower().replace('_','-')
651 # split (except singletons, which mark the following tag as non-standard):
652 tag
= re
.sub(r
'-([a-zA-Z0-9])-', r
'-\1_', tag
)
654 subtags
= [subtag
.replace('_', '-') for subtag
in tag
.split('-')]
655 base_tag
= [subtags
.pop(0)]
656 # find all combinations of subtags
657 for n
in range(len(subtags
), 0, -1):
658 for tags
in unique_combinations(subtags
, n
):
659 taglist
.append('-'.join(base_tag
+tags
))
664 class DependencyList(object):
667 List of dependencies, with file recording support.
669 Note that the output file is not automatically closed. You have
670 to explicitly call the close() method.
673 def __init__(self
, output_file
=None, dependencies
=[]):
675 Initialize the dependency list, automatically setting the
676 output file to `output_file` (see `set_output()`) and adding
677 all supplied dependencies.
679 self
.set_output(output_file
)
680 for i
in dependencies
:
683 def set_output(self
, output_file
):
685 Set the output file and clear the list of already added
688 `output_file` must be a string. The specified file is
689 immediately overwritten.
691 If output_file is '-', the output will be written to stdout.
692 If it is None, no file output is done when calling add().
696 if output_file
== '-':
700 self
.file = FileOutput(destination_path
=of
,
701 encoding
='utf8', autoclose
=False)
705 def add(self
, *filenames
):
707 If the dependency `filename` has not already been added,
708 append it to self.list and print it to self.file if self.file
711 for filename
in filenames
:
712 if not filename
in self
.list:
713 self
.list.append(filename
)
714 if self
.file is not None:
715 self
.file.write(filename
+'\n')
719 Close the output file.
726 output_file
= self
.file.name
727 except AttributeError:
729 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)