3 # Author: David Goodger <goodger@python.org>
4 # Copyright: This module has been placed in the public domain.
7 Miscellaneous utilities for the documentation utilities.
10 __docformat__
= 'reStructuredText'
17 from docutils
import ApplicationError
, DataError
18 from docutils
import nodes
19 from docutils
.io
import FileOutput
20 from docutils
.utils
.error_reporting
import ErrorOutput
, SafeString
23 class SystemMessage(ApplicationError
):
25 def __init__(self
, system_message
, level
):
26 Exception.__init
__(self
, system_message
.astext())
30 class SystemMessagePropagation(ApplicationError
): pass
36 Info/warning/error reporter and ``system_message`` element generator.
38 Five levels of system messages are defined, along with corresponding
39 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
41 There is typically one Reporter object per process. A Reporter object is
42 instantiated with thresholds for reporting (generating warnings) and
43 halting processing (raising exceptions), a switch to turn debug output on
44 or off, and an I/O stream for warnings. These are stored as instance
47 When a system message is generated, its level is compared to the stored
48 thresholds, and a warning or error is generated as appropriate. Debug
49 messages are produced if the stored debug switch is on, independently of
50 other thresholds. Message output is sent to the stored warning stream if
53 The Reporter class also employs a modified form of the "Observer" pattern
54 [GoF95]_ to track system messages generated. The `attach_observer` method
55 should be called before parsing, with a bound method or function which
56 accepts system messages. The observer can be removed with
57 `detach_observer`, and another added in its place.
59 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
60 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
64 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
65 """List of names for system message levels, indexed by level."""
67 # system message level constants:
72 SEVERE_LEVEL
) = range(5)
74 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
75 debug
=False, encoding
=None, error_handler
='backslashreplace'):
78 - `source`: The path to or description of the source data.
79 - `report_level`: The level at or above which warning output will
81 - `halt_level`: The level at or above which `SystemMessage`
82 exceptions will be raised, halting execution.
83 - `debug`: Show debug (level=0) system messages?
84 - `stream`: Where warning output is sent. Can be file-like (has a
85 ``.write`` method), a string (file name, opened for writing),
86 '' (empty string) or `False` (for discarding all stream messages)
87 or `None` (implies `sys.stderr`; default).
88 - `encoding`: The output encoding.
89 - `error_handler`: The error handler for stderr output encoding.
93 """The path to or description of the source data."""
95 self
.error_handler
= error_handler
96 """The character encoding error handler."""
98 self
.debug_flag
= debug
99 """Show debug (level=0) system messages?"""
101 self
.report_level
= report_level
102 """The level at or above which warning output will be sent
105 self
.halt_level
= halt_level
106 """The level at or above which `SystemMessage` exceptions
107 will be raised, halting execution."""
109 if not isinstance(stream
, ErrorOutput
):
110 stream
= ErrorOutput(stream
, encoding
, error_handler
)
113 """Where warning output is sent."""
115 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
116 """The output character encoding."""
119 """List of bound methods or functions to call with each system_message
123 """The highest level system message generated so far."""
125 def set_conditions(self
, category
, report_level
, halt_level
,
126 stream
=None, debug
=False):
127 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
128 'set attributes via configuration settings or directly',
129 DeprecationWarning, stacklevel
=2)
130 self
.report_level
= report_level
131 self
.halt_level
= halt_level
132 if not isinstance(stream
, ErrorOutput
):
133 stream
= ErrorOutput(stream
, self
.encoding
, self
.error_handler
)
135 self
.debug_flag
= debug
137 def attach_observer(self
, observer
):
139 The `observer` parameter is a function or bound method which takes one
140 argument, a `nodes.system_message` instance.
142 self
.observers
.append(observer
)
144 def detach_observer(self
, observer
):
145 self
.observers
.remove(observer
)
147 def notify_observers(self
, message
):
148 for observer
in self
.observers
:
151 def system_message(self
, level
, message
, *children
, **kwargs
):
153 Return a system_message object.
155 Raise an exception or generate a warning if appropriate.
157 # `message` can be a `string`, `unicode`, or `Exception` instance.
158 if isinstance(message
, Exception):
159 message
= SafeString(message
)
161 attributes
= kwargs
.copy()
162 if 'base_node' in kwargs
:
163 source
, line
= get_source_line(kwargs
['base_node'])
164 del attributes
['base_node']
165 if source
is not None:
166 attributes
.setdefault('source', source
)
168 attributes
.setdefault('line', line
)
169 # assert source is not None, "node has line- but no source-argument"
170 if not 'source' in attributes
: # 'line' is absolute line number
171 try: # look up (source, line-in-source)
172 source
, line
= self
.get_source_and_line(attributes
.get('line'))
173 # print "locator lookup", kwargs.get('line'), "->", source, line
174 except AttributeError:
175 source
, line
= None, None
176 if source
is not None:
177 attributes
['source'] = source
179 attributes
['line'] = line
180 # assert attributes['line'] is not None, (message, kwargs)
181 # assert attributes['source'] is not None, (message, kwargs)
182 attributes
.setdefault('source', self
.source
)
184 msg
= nodes
.system_message(message
, level
=level
,
185 type=self
.levels
[level
],
186 *children
, **attributes
)
187 if self
.stream
and (level
>= self
.report_level
188 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
189 or level
>= self
.halt_level
):
190 self
.stream
.write(msg
.astext() + '\n')
191 if level
>= self
.halt_level
:
192 raise SystemMessage(msg
, level
)
193 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
194 self
.notify_observers(msg
)
195 self
.max_level
= max(level
, self
.max_level
)
198 def debug(self
, *args
, **kwargs
):
200 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
201 effect on the processing. Level-0 system messages are handled
202 separately from the others.
205 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
207 def info(self
, *args
, **kwargs
):
209 Level-1, "INFO": a minor issue that can be ignored. Typically there is
210 no effect on processing, and level-1 system messages are not reported.
212 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
214 def warning(self
, *args
, **kwargs
):
216 Level-2, "WARNING": an issue that should be addressed. If ignored,
217 there may be unpredictable problems with the output.
219 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
221 def error(self
, *args
, **kwargs
):
223 Level-3, "ERROR": an error that should be addressed. If ignored, the
224 output will contain errors.
226 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
228 def severe(self
, *args
, **kwargs
):
230 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
231 the output will contain severe errors. Typically level-4 system
232 messages are turned into exceptions which halt processing.
234 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
237 class ExtensionOptionError(DataError
): pass
238 class BadOptionError(ExtensionOptionError
): pass
239 class BadOptionDataError(ExtensionOptionError
): pass
240 class DuplicateOptionError(ExtensionOptionError
): pass
243 def extract_extension_options(field_list
, options_spec
):
245 Return a dictionary mapping extension option names to converted values.
248 - `field_list`: A flat field list without field arguments, where each
249 field body consists of a single paragraph only.
250 - `options_spec`: Dictionary mapping known option names to a
251 conversion function such as `int` or `float`.
254 - `KeyError` for unknown option names.
255 - `ValueError` for invalid option values (raised by the conversion
257 - `TypeError` for invalid option value types (raised by conversion
259 - `DuplicateOptionError` for duplicate options.
260 - `BadOptionError` for invalid fields.
261 - `BadOptionDataError` for invalid option data (missing name,
262 missing data, bad quotes, etc.).
264 option_list
= extract_options(field_list
)
265 option_dict
= assemble_option_dict(option_list
, options_spec
)
268 def extract_options(field_list
):
270 Return a list of option (name, value) pairs from field names & bodies.
273 `field_list`: A flat field list, where each field name is a single
274 word and each field body consists of a single paragraph only.
277 - `BadOptionError` for invalid fields.
278 - `BadOptionDataError` for invalid option data (missing name,
279 missing data, bad quotes, etc.).
282 for field
in field_list
:
283 if len(field
[0].astext().split()) != 1:
284 raise BadOptionError(
285 'extension option field name may not contain multiple words')
286 name
= str(field
[0].astext().lower())
290 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
291 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
292 raise BadOptionDataError(
293 'extension option field body may contain\n'
294 'a single paragraph only (option "%s")' % name
)
296 data
= body
[0][0].astext()
297 option_list
.append((name
, data
))
300 def assemble_option_dict(option_list
, options_spec
):
302 Return a mapping of option names to values.
305 - `option_list`: A list of (name, value) pairs (the output of
306 `extract_options()`).
307 - `options_spec`: Dictionary mapping known option names to a
308 conversion function such as `int` or `float`.
311 - `KeyError` for unknown option names.
312 - `DuplicateOptionError` for duplicate options.
313 - `ValueError` for invalid option values (raised by conversion
315 - `TypeError` for invalid option value types (raised by conversion
319 for name
, value
in option_list
:
320 convertor
= options_spec
[name
] # raises KeyError if unknown
321 if convertor
is None:
322 raise KeyError(name
) # or if explicitly disabled
324 raise DuplicateOptionError('duplicate option "%s"' % name
)
326 options
[name
] = convertor(value
)
327 except (ValueError, TypeError), detail
:
328 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
329 % (name
, value
, ' '.join(detail
.args
)))
333 class NameValueError(DataError
): pass
336 def decode_path(path
):
338 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
340 Decode file/path string in a failsave manner if not already done.
342 # see also http://article.gmane.org/gmane.text.docutils.user/2905
343 if isinstance(path
, unicode):
346 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
347 except AttributeError: # default value None has no decode method
348 return nodes
.reprunicode(path
)
349 except UnicodeDecodeError:
351 path
= path
.decode('utf-8', 'strict')
352 except UnicodeDecodeError:
353 path
= path
.decode('ascii', 'replace')
354 return nodes
.reprunicode(path
)
357 def extract_name_value(line
):
359 Return a list of (name, value) from a line of the form "name=value ...".
362 `NameValueError` for invalid input (missing name, missing data, bad
367 equals
= line
.find('=')
369 raise NameValueError('missing "="')
370 attname
= line
[:equals
].strip()
371 if equals
== 0 or not attname
:
372 raise NameValueError(
373 'missing attribute name before "="')
374 line
= line
[equals
+1:].lstrip()
376 raise NameValueError(
377 'missing value after "%s="' % attname
)
379 endquote
= line
.find(line
[0], 1)
381 raise NameValueError(
382 'attribute "%s" missing end quote (%s)'
383 % (attname
, line
[0]))
384 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
385 raise NameValueError(
386 'attribute "%s" end quote (%s) not followed by '
387 'whitespace' % (attname
, line
[0]))
388 data
= line
[1:endquote
]
389 line
= line
[endquote
+1:].lstrip()
391 space
= line
.find(' ')
397 line
= line
[space
+1:].lstrip()
398 attlist
.append((attname
.lower(), data
))
401 def new_reporter(source_path
, settings
):
403 Return a new Reporter object.
407 The path to or description of the source text of the document.
408 `settings` : optparse.Values object
412 source_path
, settings
.report_level
, settings
.halt_level
,
413 stream
=settings
.warning_stream
, debug
=settings
.debug
,
414 encoding
=settings
.error_encoding
,
415 error_handler
=settings
.error_encoding_error_handler
)
418 def new_document(source_path
, settings
=None):
420 Return a new empty document object.
423 `source_path` : string
424 The path to or description of the source text of the document.
425 `settings` : optparse.Values object
426 Runtime settings. If none are provided, a default core set will
427 be used. If you will use the document object with any Docutils
428 components, you must provide their default settings as well. For
429 example, if parsing, at least provide the parser settings,
430 obtainable as follows::
432 settings = docutils.frontend.OptionParser(
433 components=(docutils.parsers.rst.Parser,)
434 ).get_default_values()
436 from docutils
import frontend
438 settings
= frontend
.OptionParser().get_default_values()
439 source_path
= decode_path(source_path
)
440 reporter
= new_reporter(source_path
, settings
)
441 document
= nodes
.document(settings
, reporter
, source
=source_path
)
442 document
.note_source(source_path
, -1)
445 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
446 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
447 textnode
= paragraph
[0]
448 for pattern
, substitution
in keyword_substitutions
:
449 match
= pattern
.search(textnode
)
451 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
454 def relative_path(source
, target
):
456 Build and return a path to `target`, relative to `source` (both files).
458 If there is no common prefix, return the absolute path to `target`.
460 source_parts
= os
.path
.abspath(source
or type(target
)('dummy_file')
462 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
463 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
464 if source_parts
[:2] != target_parts
[:2]:
465 # Nothing in common between paths.
466 # Return absolute path, using '/' for URLs:
467 return '/'.join(target_parts
)
468 source_parts
.reverse()
469 target_parts
.reverse()
470 while (source_parts
and target_parts
471 and source_parts
[-1] == target_parts
[-1]):
472 # Remove path components in common:
475 target_parts
.reverse()
476 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
477 return '/'.join(parts
)
479 def get_stylesheet_reference(settings
, relative_to
=None):
481 Retrieve a stylesheet reference from the settings object.
483 Deprecated. Use get_stylesheet_list() instead to
484 enable specification of multiple stylesheets as a comma-separated
487 if settings
.stylesheet_path
:
488 assert not settings
.stylesheet
, (
489 'stylesheet and stylesheet_path are mutually exclusive.')
490 if relative_to
== None:
491 relative_to
= settings
._destination
492 return relative_path(relative_to
, settings
.stylesheet_path
)
494 return settings
.stylesheet
496 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
498 # The original settings arguments are kept unchanged: you can test
499 # with e.g. ``if settings.stylesheet_path:``
501 # Differences to ``get_stylesheet_reference``:
502 # * return value is a list
503 # * no re-writing of the path (and therefore no optional argument)
504 # (if required, use ``utils.relative_path(source, target)``
505 # in the calling script)
506 def get_stylesheet_list(settings
):
508 Retrieve list of stylesheet references from the settings object.
510 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
511 'stylesheet and stylesheet_path are mutually exclusive.')
512 if settings
.stylesheet_path
:
513 sheets
= settings
.stylesheet_path
.split(",")
514 elif settings
.stylesheet
:
515 sheets
= settings
.stylesheet
.split(",")
518 # strip whitespace (frequently occuring in config files)
519 return [sheet
.strip(u
' \t\n') for sheet
in sheets
]
521 def get_trim_footnote_ref_space(settings
):
523 Return whether or not to trim footnote space.
525 If trim_footnote_reference_space is not None, return it.
527 If trim_footnote_reference_space is None, return False unless the
528 footnote reference style is 'superscript'.
530 if settings
.trim_footnote_reference_space
is None:
531 return hasattr(settings
, 'footnote_references') and \
532 settings
.footnote_references
== 'superscript'
534 return settings
.trim_footnote_reference_space
536 def get_source_line(node
):
538 Return the "source" and "line" attributes from the `node` given or from
539 its closest ancestor.
542 if node
.source
or node
.line
:
543 return node
.source
, node
.line
547 def escape2null(text
):
548 """Return a string with escape-backslashes converted to nulls."""
552 found
= text
.find('\\', start
)
554 parts
.append(text
[start
:])
555 return ''.join(parts
)
556 parts
.append(text
[start
:found
])
557 parts
.append('\x00' + text
[found
+1:found
+2])
558 start
= found
+ 2 # skip character after escape
560 def unescape(text
, restore_backslashes
=False):
562 Return a string with nulls removed or restored to backslashes.
563 Backslash-escaped spaces are also removed.
565 if restore_backslashes
:
566 return text
.replace('\x00', '\\')
568 for sep
in ['\x00 ', '\x00\n', '\x00']:
569 text
= ''.join(text
.split(sep
))
572 def strip_combining_chars(text
):
573 if isinstance(text
, str) and sys
.version_info
< (3,0):
575 return u
''.join([c
for c
in text
if not unicodedata
.combining(c
)])
577 def find_combining_chars(text
):
578 """Return indices of all combining chars in Unicode string `text`.
580 >>> find_combining_chars(u'A t̆ab̆lĕ')
583 if isinstance(text
, str) and sys
.version_info
< (3,0):
585 return [i
for i
,c
in enumerate(text
) if unicodedata
.combining(c
)]
587 def column_indices(text
):
588 """Indices of Unicode string `text` when skipping combining characters.
590 >>> column_indices(u'A t̆ab̆lĕ')
591 [0, 1, 2, 4, 5, 7, 8]
593 # TODO: account for asian wide chars here instead of using dummy
594 # replacements in the tableparser?
595 string_indices
= range(len(text
))
596 for index
in find_combining_chars(text
):
597 string_indices
[index
] = None
598 return [i
for i
in string_indices
if i
is not None]
600 east_asian_widths
= {'W': 2, # Wide
601 'F': 2, # Full-width (wide)
603 'H': 1, # Half-width (narrow)
604 'N': 1, # Neutral (not East Asian, treated as narrow)
605 'A': 1} # Ambiguous (s/b wide in East Asian context,
606 # narrow otherwise, but that doesn't work)
607 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
610 def column_width(text
):
611 """Return the column width of text.
613 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
615 if isinstance(text
, str) and sys
.version_info
< (3,0):
618 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
620 except AttributeError: # east_asian_width() New in version 2.4.
622 # correction for combining chars:
623 width
-= len(find_combining_chars(text
))
633 # by Li Daobing http://code.activestate.com/recipes/190465/
634 # since Python 2.6 there is also itertools.combinations()
635 def unique_combinations(items
, n
):
636 """Return n-length tuples, in sorted order, no repeated elements"""
639 for i
in xrange(len(items
)-n
+1):
640 for cc
in unique_combinations(items
[i
+1:],n
-1):
643 def normalize_language_tag(tag
):
644 """Return a list of normalized combinations for a `BCP 47` language tag.
648 >>> normalize_language_tag('de-AT-1901')
649 ['de_at_1901', 'de_at', 'de_1901', 'de']
652 tag
= tag
.lower().replace('-','_')
653 # find all combinations of subtags
655 base_tag
= tag
.split('_')[:1]
656 subtags
= tag
.split('_')[1:]
657 # print base_tag, subtags
658 for n
in range(len(subtags
), 0, -1):
659 for tags
in unique_combinations(subtags
, n
):
661 taglist
.append('_'.join(base_tag
+ tags
))
666 class DependencyList(object):
669 List of dependencies, with file recording support.
671 Note that the output file is not automatically closed. You have
672 to explicitly call the close() method.
675 def __init__(self
, output_file
=None, dependencies
=[]):
677 Initialize the dependency list, automatically setting the
678 output file to `output_file` (see `set_output()`) and adding
679 all supplied dependencies.
681 self
.set_output(output_file
)
682 for i
in dependencies
:
685 def set_output(self
, output_file
):
687 Set the output file and clear the list of already added
690 `output_file` must be a string. The specified file is
691 immediately overwritten.
693 If output_file is '-', the output will be written to stdout.
694 If it is None, no file output is done when calling add().
698 if output_file
== '-':
702 self
.file = FileOutput(destination_path
=of
,
703 encoding
='utf8', autoclose
=False)
707 def add(self
, *filenames
):
709 If the dependency `filename` has not already been added,
710 append it to self.list and print it to self.file if self.file
713 for filename
in filenames
:
714 if not filename
in self
.list:
715 self
.list.append(filename
)
716 if self
.file is not None:
717 self
.file.write(filename
+'\n')
721 Close the output file.
728 output_file
= self
.file.name
729 except AttributeError:
731 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)