3 # Author: David Goodger <goodger@python.org>
4 # Copyright: This module has been placed in the public domain.
7 Miscellaneous utilities for the documentation utilities.
10 __docformat__
= 'reStructuredText'
17 from docutils
import ApplicationError
, DataError
18 from docutils
import nodes
19 from docutils
.io
import FileOutput
20 from docutils
.error_reporting
import ErrorOutput
, SafeString
23 class SystemMessage(ApplicationError
):
25 def __init__(self
, system_message
, level
):
26 Exception.__init
__(self
, system_message
.astext())
30 class SystemMessagePropagation(ApplicationError
): pass
36 Info/warning/error reporter and ``system_message`` element generator.
38 Five levels of system messages are defined, along with corresponding
39 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
41 There is typically one Reporter object per process. A Reporter object is
42 instantiated with thresholds for reporting (generating warnings) and
43 halting processing (raising exceptions), a switch to turn debug output on
44 or off, and an I/O stream for warnings. These are stored as instance
47 When a system message is generated, its level is compared to the stored
48 thresholds, and a warning or error is generated as appropriate. Debug
49 messages are produced if the stored debug switch is on, independently of
50 other thresholds. Message output is sent to the stored warning stream if
53 The Reporter class also employs a modified form of the "Observer" pattern
54 [GoF95]_ to track system messages generated. The `attach_observer` method
55 should be called before parsing, with a bound method or function which
56 accepts system messages. The observer can be removed with
57 `detach_observer`, and another added in its place.
59 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
60 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
64 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
65 """List of names for system message levels, indexed by level."""
67 # system message level constants:
72 SEVERE_LEVEL
) = range(5)
74 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
75 debug
=0, encoding
=None, error_handler
='backslashreplace'):
78 - `source`: The path to or description of the source data.
79 - `report_level`: The level at or above which warning output will
81 - `halt_level`: The level at or above which `SystemMessage`
82 exceptions will be raised, halting execution.
83 - `debug`: Show debug (level=0) system messages?
84 - `stream`: Where warning output is sent. Can be file-like (has a
85 ``.write`` method), a string (file name, opened for writing),
86 '' (empty string) or `False` (for discarding all stream messages)
87 or `None` (implies `sys.stderr`; default).
88 - `encoding`: The output encoding.
89 - `error_handler`: The error handler for stderr output encoding.
93 """The path to or description of the source data."""
95 self
.error_handler
= error_handler
96 """The character encoding error handler."""
98 self
.debug_flag
= debug
99 """Show debug (level=0) system messages?"""
101 self
.report_level
= report_level
102 """The level at or above which warning output will be sent
105 self
.halt_level
= halt_level
106 """The level at or above which `SystemMessage` exceptions
107 will be raised, halting execution."""
109 if not isinstance(stream
, ErrorOutput
):
110 stream
= ErrorOutput(stream
, encoding
, error_handler
)
113 """Where warning output is sent."""
115 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
116 """The output character encoding."""
119 """List of bound methods or functions to call with each system_message
123 """The highest level system message generated so far."""
125 def set_conditions(self
, category
, report_level
, halt_level
,
126 stream
=None, debug
=0):
127 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
128 'set attributes via configuration settings or directly',
129 DeprecationWarning, stacklevel
=2)
130 self
.report_level
= report_level
131 self
.halt_level
= halt_level
132 if not isinstance(stream
, ErrorOutput
):
133 stream
= ErrorOutput(stream
, self
.encoding
, self
.error_handler
)
135 self
.debug_flag
= debug
137 def attach_observer(self
, observer
):
139 The `observer` parameter is a function or bound method which takes one
140 argument, a `nodes.system_message` instance.
142 self
.observers
.append(observer
)
144 def detach_observer(self
, observer
):
145 self
.observers
.remove(observer
)
147 def notify_observers(self
, message
):
148 for observer
in self
.observers
:
151 def system_message(self
, level
, message
, *children
, **kwargs
):
153 Return a system_message object.
155 Raise an exception or generate a warning if appropriate.
157 # `message` can be a `string`, `unicode`, or `Exception` instance.
158 if isinstance(message
, Exception):
159 message
= SafeString(message
)
161 attributes
= kwargs
.copy()
162 if 'base_node' in kwargs
:
163 source
, line
= get_source_line(kwargs
['base_node'])
164 del attributes
['base_node']
165 if source
is not None:
166 attributes
.setdefault('source', source
)
168 attributes
.setdefault('line', line
)
169 # assert source is not None, "node has line- but no source-argument"
170 if not 'source' in attributes
: # 'line' is absolute line number
171 try: # look up (source, line-in-source)
172 source
, line
= self
.locator(attributes
.get('line'))
173 # print "locator lookup", kwargs.get('line'), "->", source, line
174 except AttributeError:
175 source
, line
= None, None
176 if source
is not None:
177 attributes
['source'] = source
179 attributes
['line'] = line
180 # assert attributes['line'] is not None, (message, kwargs)
181 # assert attributes['source'] is not None, (message, kwargs)
182 attributes
.setdefault('source', self
.source
)
184 msg
= nodes
.system_message(message
, level
=level
,
185 type=self
.levels
[level
],
186 *children
, **attributes
)
187 if self
.stream
and (level
>= self
.report_level
188 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
189 or level
>= self
.halt_level
):
190 self
.stream
.write(msg
.astext() + '\n')
191 if level
>= self
.halt_level
:
192 raise SystemMessage(msg
, level
)
193 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
194 self
.notify_observers(msg
)
195 self
.max_level
= max(level
, self
.max_level
)
198 def debug(self
, *args
, **kwargs
):
200 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
201 effect on the processing. Level-0 system messages are handled
202 separately from the others.
205 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
207 def info(self
, *args
, **kwargs
):
209 Level-1, "INFO": a minor issue that can be ignored. Typically there is
210 no effect on processing, and level-1 system messages are not reported.
212 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
214 def warning(self
, *args
, **kwargs
):
216 Level-2, "WARNING": an issue that should be addressed. If ignored,
217 there may be unpredictable problems with the output.
219 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
221 def error(self
, *args
, **kwargs
):
223 Level-3, "ERROR": an error that should be addressed. If ignored, the
224 output will contain errors.
226 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
228 def severe(self
, *args
, **kwargs
):
230 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
231 the output will contain severe errors. Typically level-4 system
232 messages are turned into exceptions which halt processing.
234 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
237 class ExtensionOptionError(DataError
): pass
238 class BadOptionError(ExtensionOptionError
): pass
239 class BadOptionDataError(ExtensionOptionError
): pass
240 class DuplicateOptionError(ExtensionOptionError
): pass
243 def extract_extension_options(field_list
, options_spec
):
245 Return a dictionary mapping extension option names to converted values.
248 - `field_list`: A flat field list without field arguments, where each
249 field body consists of a single paragraph only.
250 - `options_spec`: Dictionary mapping known option names to a
251 conversion function such as `int` or `float`.
254 - `KeyError` for unknown option names.
255 - `ValueError` for invalid option values (raised by the conversion
257 - `TypeError` for invalid option value types (raised by conversion
259 - `DuplicateOptionError` for duplicate options.
260 - `BadOptionError` for invalid fields.
261 - `BadOptionDataError` for invalid option data (missing name,
262 missing data, bad quotes, etc.).
264 option_list
= extract_options(field_list
)
265 option_dict
= assemble_option_dict(option_list
, options_spec
)
268 def extract_options(field_list
):
270 Return a list of option (name, value) pairs from field names & bodies.
273 `field_list`: A flat field list, where each field name is a single
274 word and each field body consists of a single paragraph only.
277 - `BadOptionError` for invalid fields.
278 - `BadOptionDataError` for invalid option data (missing name,
279 missing data, bad quotes, etc.).
282 for field
in field_list
:
283 if len(field
[0].astext().split()) != 1:
284 raise BadOptionError(
285 'extension option field name may not contain multiple words')
286 name
= str(field
[0].astext().lower())
290 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
291 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
292 raise BadOptionDataError(
293 'extension option field body may contain\n'
294 'a single paragraph only (option "%s")' % name
)
296 data
= body
[0][0].astext()
297 option_list
.append((name
, data
))
300 def assemble_option_dict(option_list
, options_spec
):
302 Return a mapping of option names to values.
305 - `option_list`: A list of (name, value) pairs (the output of
306 `extract_options()`).
307 - `options_spec`: Dictionary mapping known option names to a
308 conversion function such as `int` or `float`.
311 - `KeyError` for unknown option names.
312 - `DuplicateOptionError` for duplicate options.
313 - `ValueError` for invalid option values (raised by conversion
315 - `TypeError` for invalid option value types (raised by conversion
319 for name
, value
in option_list
:
320 convertor
= options_spec
[name
] # raises KeyError if unknown
321 if convertor
is None:
322 raise KeyError(name
) # or if explicitly disabled
324 raise DuplicateOptionError('duplicate option "%s"' % name
)
326 options
[name
] = convertor(value
)
327 except (ValueError, TypeError), detail
:
328 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
329 % (name
, value
, ' '.join(detail
.args
)))
333 class NameValueError(DataError
): pass
336 def decode_path(path
):
338 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
340 Decode file/path string in a failsave manner if not already done.
342 # see also http://article.gmane.org/gmane.text.docutils.user/2905
343 if isinstance(path
, unicode):
346 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
347 except AttributeError: # default value None has no decode method
348 return nodes
.reprunicode(path
)
349 except UnicodeDecodeError:
351 path
= path
.decode('utf-8', 'strict')
352 except UnicodeDecodeError:
353 path
= path
.decode('ascii', 'replace')
354 return nodes
.reprunicode(path
)
357 def extract_name_value(line
):
359 Return a list of (name, value) from a line of the form "name=value ...".
362 `NameValueError` for invalid input (missing name, missing data, bad
367 equals
= line
.find('=')
369 raise NameValueError('missing "="')
370 attname
= line
[:equals
].strip()
371 if equals
== 0 or not attname
:
372 raise NameValueError(
373 'missing attribute name before "="')
374 line
= line
[equals
+1:].lstrip()
376 raise NameValueError(
377 'missing value after "%s="' % attname
)
379 endquote
= line
.find(line
[0], 1)
381 raise NameValueError(
382 'attribute "%s" missing end quote (%s)'
383 % (attname
, line
[0]))
384 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
385 raise NameValueError(
386 'attribute "%s" end quote (%s) not followed by '
387 'whitespace' % (attname
, line
[0]))
388 data
= line
[1:endquote
]
389 line
= line
[endquote
+1:].lstrip()
391 space
= line
.find(' ')
397 line
= line
[space
+1:].lstrip()
398 attlist
.append((attname
.lower(), data
))
401 def new_reporter(source_path
, settings
):
403 Return a new Reporter object.
407 The path to or description of the source text of the document.
408 `settings` : optparse.Values object
412 source_path
, settings
.report_level
, settings
.halt_level
,
413 stream
=settings
.warning_stream
, debug
=settings
.debug
,
414 encoding
=settings
.error_encoding
,
415 error_handler
=settings
.error_encoding_error_handler
)
418 def new_document(source_path
, settings
=None):
420 Return a new empty document object.
423 `source_path` : string
424 The path to or description of the source text of the document.
425 `settings` : optparse.Values object
426 Runtime settings. If none are provided, a default core set will
427 be used. If you will use the document object with any Docutils
428 components, you must provide their default settings as well. For
429 example, if parsing, at least provide the parser settings,
430 obtainable as follows::
432 settings = docutils.frontend.OptionParser(
433 components=(docutils.parsers.rst.Parser,)
434 ).get_default_values()
436 from docutils
import frontend
438 settings
= frontend
.OptionParser().get_default_values()
439 source_path
= decode_path(source_path
)
440 reporter
= new_reporter(source_path
, settings
)
441 document
= nodes
.document(settings
, reporter
, source
=source_path
)
442 document
.note_source(source_path
, -1)
445 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
446 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
447 textnode
= paragraph
[0]
448 for pattern
, substitution
in keyword_substitutions
:
449 match
= pattern
.search(textnode
)
451 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
454 def relative_path(source
, target
):
456 Build and return a path to `target`, relative to `source` (both files).
458 If there is no common prefix, return the absolute path to `target`.
460 source_parts
= os
.path
.abspath(source
or 'dummy_file').split(os
.sep
)
461 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
462 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
463 if source_parts
[:2] != target_parts
[:2]:
464 # Nothing in common between paths.
465 # Return absolute path, using '/' for URLs:
466 return '/'.join(target_parts
)
467 source_parts
.reverse()
468 target_parts
.reverse()
469 while (source_parts
and target_parts
470 and source_parts
[-1] == target_parts
[-1]):
471 # Remove path components in common:
474 target_parts
.reverse()
475 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
476 return '/'.join(parts
)
478 def get_stylesheet_reference(settings
, relative_to
=None):
480 Retrieve a stylesheet reference from the settings object.
482 Deprecated. Use get_stylesheet_list() instead to
483 enable specification of multiple stylesheets as a comma-separated
486 if settings
.stylesheet_path
:
487 assert not settings
.stylesheet
, (
488 'stylesheet and stylesheet_path are mutually exclusive.')
489 if relative_to
== None:
490 relative_to
= settings
._destination
491 return relative_path(relative_to
, settings
.stylesheet_path
)
493 return settings
.stylesheet
495 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
497 # The original settings arguments are kept unchanged: you can test
498 # with e.g. ``if settings.stylesheet_path:``
500 # Differences to ``get_stylesheet_reference``:
501 # * return value is a list
502 # * no re-writing of the path (and therefore no optional argument)
503 # (if required, use ``utils.relative_path(source, target)``
504 # in the calling script)
505 def get_stylesheet_list(settings
):
507 Retrieve list of stylesheet references from the settings object.
509 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
510 'stylesheet and stylesheet_path are mutually exclusive.')
511 if settings
.stylesheet_path
:
512 sheets
= settings
.stylesheet_path
.split(",")
513 elif settings
.stylesheet
:
514 sheets
= settings
.stylesheet
.split(",")
517 # strip whitespace (frequently occuring in config files)
518 return [sheet
.strip(u
' \t\n') for sheet
in sheets
]
520 def get_trim_footnote_ref_space(settings
):
522 Return whether or not to trim footnote space.
524 If trim_footnote_reference_space is not None, return it.
526 If trim_footnote_reference_space is None, return False unless the
527 footnote reference style is 'superscript'.
529 if settings
.trim_footnote_reference_space
is None:
530 return hasattr(settings
, 'footnote_references') and \
531 settings
.footnote_references
== 'superscript'
533 return settings
.trim_footnote_reference_space
535 def get_source_line(node
):
537 Return the "source" and "line" attributes from the `node` given or from
538 its closest ancestor.
541 if node
.source
or node
.line
:
542 return node
.source
, node
.line
546 def escape2null(text
):
547 """Return a string with escape-backslashes converted to nulls."""
551 found
= text
.find('\\', start
)
553 parts
.append(text
[start
:])
554 return ''.join(parts
)
555 parts
.append(text
[start
:found
])
556 parts
.append('\x00' + text
[found
+1:found
+2])
557 start
= found
+ 2 # skip character after escape
559 def unescape(text
, restore_backslashes
=0):
561 Return a string with nulls removed or restored to backslashes.
562 Backslash-escaped spaces are also removed.
564 if restore_backslashes
:
565 return text
.replace('\x00', '\\')
567 for sep
in ['\x00 ', '\x00\n', '\x00']:
568 text
= ''.join(text
.split(sep
))
571 def strip_combining_chars(text
):
572 if isinstance(text
, str) and sys
.version_info
< (3,0):
574 return u
''.join([c
for c
in text
if not unicodedata
.combining(c
)])
576 def find_combining_chars(text
):
577 """Return indices of all combining chars in Unicode string `text`.
579 >>> find_combining_chars(u'A t̆ab̆lĕ')
582 if isinstance(text
, str) and sys
.version_info
< (3,0):
584 return [i
for i
,c
in enumerate(text
) if unicodedata
.combining(c
)]
586 def column_indices(text
):
587 """Indices of Unicode string `text` when skipping combining characters.
589 >>> column_indices(u'A t̆ab̆lĕ')
590 [0, 1, 2, 4, 5, 7, 8]
592 # TODO: account for asian wide chars here instead of using dummy
593 # replacements in the tableparser?
594 string_indices
= range(len(text
))
595 for index
in find_combining_chars(text
):
596 string_indices
[index
] = None
597 return [i
for i
in string_indices
if i
is not None]
599 east_asian_widths
= {'W': 2, # Wide
600 'F': 2, # Full-width (wide)
602 'H': 1, # Half-width (narrow)
603 'N': 1, # Neutral (not East Asian, treated as narrow)
604 'A': 1} # Ambiguous (s/b wide in East Asian context,
605 # narrow otherwise, but that doesn't work)
606 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
609 def column_width(text
):
610 """Return the column width of text.
612 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
614 if isinstance(text
, str) and sys
.version_info
< (3,0):
617 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
619 except AttributeError: # east_asian_width() New in version 2.4.
621 # correction for combining chars:
622 width
-= len(find_combining_chars(text
))
632 # by Li Daobing http://code.activestate.com/recipes/190465/
633 # since Python 2.6 there is also itertools.combinations()
634 def unique_combinations(items
, n
):
635 """Return n-length tuples, in sorted order, no repeated elements"""
638 for i
in xrange(len(items
)-n
+1):
639 for cc
in unique_combinations(items
[i
+1:],n
-1):
642 def normalize_language_tag(tag
):
643 """Return a list of normalized combinations for a `BCP 47` language tag.
647 >>> normalize_language_tag('de-AT-1901')
648 ['de_at_1901', 'de_at', 'de_1901', 'de']
651 tag
= tag
.lower().replace('-','_')
652 # find all combinations of subtags
654 base_tag
= tag
.split('_')[:1]
655 subtags
= tag
.split('_')[1:]
656 # print base_tag, subtags
657 for n
in range(len(subtags
), 0, -1):
658 for tags
in unique_combinations(subtags
, n
):
660 taglist
.append('_'.join(base_tag
+ tags
))
665 class DependencyList(object):
668 List of dependencies, with file recording support.
670 Note that the output file is not automatically closed. You have
671 to explicitly call the close() method.
674 def __init__(self
, output_file
=None, dependencies
=[]):
676 Initialize the dependency list, automatically setting the
677 output file to `output_file` (see `set_output()`) and adding
678 all supplied dependencies.
680 self
.set_output(output_file
)
681 for i
in dependencies
:
684 def set_output(self
, output_file
):
686 Set the output file and clear the list of already added
689 `output_file` must be a string. The specified file is
690 immediately overwritten.
692 If output_file is '-', the output will be written to stdout.
693 If it is None, no file output is done when calling add().
697 if output_file
== '-':
701 self
.file = FileOutput(destination_path
=of
,
702 encoding
='utf8', autoclose
=False)
706 def add(self
, *filenames
):
708 If the dependency `filename` has not already been added,
709 append it to self.list and print it to self.file if self.file
712 for filename
in filenames
:
713 if not filename
in self
.list:
714 self
.list.append(filename
)
715 if self
.file is not None:
716 self
.file.write(filename
+'\n')
720 Close the output file.
727 output_file
= self
.file.name
728 except AttributeError:
730 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)