2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 Miscellaneous utilities for the documentation utilities.
9 __docformat__
= 'reStructuredText'
16 from docutils
import ApplicationError
, DataError
17 from docutils
import nodes
18 from docutils
._compat
import bytes
21 class SystemMessage(ApplicationError
):
23 def __init__(self
, system_message
, level
):
24 Exception.__init
__(self
, system_message
.astext())
28 class SystemMessagePropagation(ApplicationError
): pass
34 Info/warning/error reporter and ``system_message`` element generator.
36 Five levels of system messages are defined, along with corresponding
37 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
39 There is typically one Reporter object per process. A Reporter object is
40 instantiated with thresholds for reporting (generating warnings) and
41 halting processing (raising exceptions), a switch to turn debug output on
42 or off, and an I/O stream for warnings. These are stored as instance
45 When a system message is generated, its level is compared to the stored
46 thresholds, and a warning or error is generated as appropriate. Debug
47 messages are produced if the stored debug switch is on, independently of
48 other thresholds. Message output is sent to the stored warning stream if
51 The Reporter class also employs a modified form of the "Observer" pattern
52 [GoF95]_ to track system messages generated. The `attach_observer` method
53 should be called before parsing, with a bound method or function which
54 accepts system messages. The observer can be removed with
55 `detach_observer`, and another added in its place.
57 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
58 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
62 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
63 """List of names for system message levels, indexed by level."""
65 # system message level constants:
70 SEVERE_LEVEL
) = range(5)
72 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
73 debug
=0, encoding
=None, error_handler
='backslashreplace'):
76 - `source`: The path to or description of the source data.
77 - `report_level`: The level at or above which warning output will
79 - `halt_level`: The level at or above which `SystemMessage`
80 exceptions will be raised, halting execution.
81 - `debug`: Show debug (level=0) system messages?
82 - `stream`: Where warning output is sent. Can be file-like (has a
83 ``.write`` method), a string (file name, opened for writing),
84 '' (empty string, for discarding all stream messages) or
85 `None` (implies `sys.stderr`; default).
86 - `encoding`: The output encoding.
87 - `error_handler`: The error handler for stderr output encoding.
91 """The path to or description of the source data."""
93 self
.error_handler
= error_handler
94 """The character encoding error handler."""
96 self
.debug_flag
= debug
97 """Show debug (level=0) system messages?"""
99 self
.report_level
= report_level
100 """The level at or above which warning output will be sent
103 self
.halt_level
= halt_level
104 """The level at or above which `SystemMessage` exceptions
105 will be raised, halting execution."""
109 elif stream
and type(stream
) in (unicode, bytes
):
110 # if `stream` is a file name, open it
111 if type(stream
) is bytes
:
112 stream
= open(stream
, 'w')
114 stream
= open(stream
.encode(), 'w')
117 """Where warning output is sent."""
119 self
.encoding
= encoding
or getattr(stream
, 'encoding', 'ascii')
120 """The output character encoding."""
123 """List of bound methods or functions to call with each system_message
127 """The highest level system message generated so far."""
129 def set_conditions(self
, category
, report_level
, halt_level
,
130 stream
=None, debug
=0):
131 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
132 'set attributes via configuration settings or directly',
133 DeprecationWarning, stacklevel
=2)
134 self
.report_level
= report_level
135 self
.halt_level
= halt_level
139 self
.debug_flag
= debug
141 def attach_observer(self
, observer
):
143 The `observer` parameter is a function or bound method which takes one
144 argument, a `nodes.system_message` instance.
146 self
.observers
.append(observer
)
148 def detach_observer(self
, observer
):
149 self
.observers
.remove(observer
)
151 def notify_observers(self
, message
):
152 for observer
in self
.observers
:
155 def system_message(self
, level
, message
, *children
, **kwargs
):
157 Return a system_message object.
159 Raise an exception or generate a warning if appropriate.
161 # `message` can be a `string`, `unicode`, or `Exception` instance.
162 # Convert now to detect errors:
164 message
= unicode(message
)
165 except UnicodeError, err
:
166 # In Python < 2.6, # unicode(<exception instance>) uses __str__
167 # and fails with non-ASCII chars in arguments
168 if sys
.version_info
< (2,6):
170 message
= u
', '.join(message
.args
)
171 except AttributeError:
176 attributes
= kwargs
.copy()
177 if 'base_node' in kwargs
:
178 source
, line
= get_source_line(kwargs
['base_node'])
179 del attributes
['base_node']
180 if source
is not None:
181 attributes
.setdefault('source', source
)
183 attributes
.setdefault('line', line
)
184 # assert source is not None, "node has line- but no source-argument"
185 if not 'source' in attributes
: # 'line' is absolute line number
186 try: # look up (source, line-in-source)
187 source
, line
= self
.locator(attributes
.get('line'))
188 # print "locator lookup", kwargs.get('line'), "->", source, line
189 except AttributeError:
190 source
, line
= None, None
191 if source
is not None:
192 attributes
['source'] = source
194 attributes
['line'] = line
195 # assert attributes['line'] is not None, (message, kwargs)
196 # assert attributes['source'] is not None, (message, kwargs)
197 attributes
.setdefault('source', self
.source
)
199 msg
= nodes
.system_message(message
, level
=level
,
200 type=self
.levels
[level
],
201 *children
, **attributes
)
202 if self
.stream
and (level
>= self
.report_level
203 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
204 or level
>= self
.halt_level
):
205 msgtext
= msg
.astext() + '\n'
207 self
.stream
.write(msgtext
)
208 except UnicodeEncodeError:
209 self
.stream
.write(msgtext
.encode(self
.encoding
,
211 if level
>= self
.halt_level
:
212 raise SystemMessage(msg
, level
)
213 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
214 self
.notify_observers(msg
)
215 self
.max_level
= max(level
, self
.max_level
)
218 def debug(self
, *args
, **kwargs
):
220 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
221 effect on the processing. Level-0 system messages are handled
222 separately from the others.
225 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
227 def info(self
, *args
, **kwargs
):
229 Level-1, "INFO": a minor issue that can be ignored. Typically there is
230 no effect on processing, and level-1 system messages are not reported.
232 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
234 def warning(self
, *args
, **kwargs
):
236 Level-2, "WARNING": an issue that should be addressed. If ignored,
237 there may be unpredictable problems with the output.
239 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
241 def error(self
, *args
, **kwargs
):
243 Level-3, "ERROR": an error that should be addressed. If ignored, the
244 output will contain errors.
246 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
248 def severe(self
, *args
, **kwargs
):
250 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
251 the output will contain severe errors. Typically level-4 system
252 messages are turned into exceptions which halt processing.
254 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
257 class ExtensionOptionError(DataError
): pass
258 class BadOptionError(ExtensionOptionError
): pass
259 class BadOptionDataError(ExtensionOptionError
): pass
260 class DuplicateOptionError(ExtensionOptionError
): pass
263 def extract_extension_options(field_list
, options_spec
):
265 Return a dictionary mapping extension option names to converted values.
268 - `field_list`: A flat field list without field arguments, where each
269 field body consists of a single paragraph only.
270 - `options_spec`: Dictionary mapping known option names to a
271 conversion function such as `int` or `float`.
274 - `KeyError` for unknown option names.
275 - `ValueError` for invalid option values (raised by the conversion
277 - `TypeError` for invalid option value types (raised by conversion
279 - `DuplicateOptionError` for duplicate options.
280 - `BadOptionError` for invalid fields.
281 - `BadOptionDataError` for invalid option data (missing name,
282 missing data, bad quotes, etc.).
284 option_list
= extract_options(field_list
)
285 option_dict
= assemble_option_dict(option_list
, options_spec
)
288 def extract_options(field_list
):
290 Return a list of option (name, value) pairs from field names & bodies.
293 `field_list`: A flat field list, where each field name is a single
294 word and each field body consists of a single paragraph only.
297 - `BadOptionError` for invalid fields.
298 - `BadOptionDataError` for invalid option data (missing name,
299 missing data, bad quotes, etc.).
302 for field
in field_list
:
303 if len(field
[0].astext().split()) != 1:
304 raise BadOptionError(
305 'extension option field name may not contain multiple words')
306 name
= str(field
[0].astext().lower())
310 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
311 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
312 raise BadOptionDataError(
313 'extension option field body may contain\n'
314 'a single paragraph only (option "%s")' % name
)
316 data
= body
[0][0].astext()
317 option_list
.append((name
, data
))
320 def assemble_option_dict(option_list
, options_spec
):
322 Return a mapping of option names to values.
325 - `option_list`: A list of (name, value) pairs (the output of
326 `extract_options()`).
327 - `options_spec`: Dictionary mapping known option names to a
328 conversion function such as `int` or `float`.
331 - `KeyError` for unknown option names.
332 - `DuplicateOptionError` for duplicate options.
333 - `ValueError` for invalid option values (raised by conversion
335 - `TypeError` for invalid option value types (raised by conversion
339 for name
, value
in option_list
:
340 convertor
= options_spec
[name
] # raises KeyError if unknown
341 if convertor
is None:
342 raise KeyError(name
) # or if explicitly disabled
344 raise DuplicateOptionError('duplicate option "%s"' % name
)
346 options
[name
] = convertor(value
)
347 except (ValueError, TypeError), detail
:
348 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
349 % (name
, value
, ' '.join(detail
.args
)))
353 class NameValueError(DataError
): pass
356 def decode_path(path
):
358 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
360 Decode file/path string in a failsave manner if not already done.
362 # see also http://article.gmane.org/gmane.text.docutils.user/2905
363 if isinstance(path
, unicode):
366 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
367 except AttributeError: # default value None has no decode method
368 return nodes
.reprunicode(path
)
369 except UnicodeDecodeError:
371 path
= path
.decode('utf-8', 'strict')
372 except UnicodeDecodeError:
373 path
= path
.decode('ascii', 'replace')
374 return nodes
.reprunicode(path
)
377 def extract_name_value(line
):
379 Return a list of (name, value) from a line of the form "name=value ...".
382 `NameValueError` for invalid input (missing name, missing data, bad
387 equals
= line
.find('=')
389 raise NameValueError('missing "="')
390 attname
= line
[:equals
].strip()
391 if equals
== 0 or not attname
:
392 raise NameValueError(
393 'missing attribute name before "="')
394 line
= line
[equals
+1:].lstrip()
396 raise NameValueError(
397 'missing value after "%s="' % attname
)
399 endquote
= line
.find(line
[0], 1)
401 raise NameValueError(
402 'attribute "%s" missing end quote (%s)'
403 % (attname
, line
[0]))
404 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
405 raise NameValueError(
406 'attribute "%s" end quote (%s) not followed by '
407 'whitespace' % (attname
, line
[0]))
408 data
= line
[1:endquote
]
409 line
= line
[endquote
+1:].lstrip()
411 space
= line
.find(' ')
417 line
= line
[space
+1:].lstrip()
418 attlist
.append((attname
.lower(), data
))
421 def new_reporter(source_path
, settings
):
423 Return a new Reporter object.
427 The path to or description of the source text of the document.
428 `settings` : optparse.Values object
432 source_path
, settings
.report_level
, settings
.halt_level
,
433 stream
=settings
.warning_stream
, debug
=settings
.debug
,
434 encoding
=settings
.error_encoding
,
435 error_handler
=settings
.error_encoding_error_handler
)
438 def new_document(source_path
, settings
=None):
440 Return a new empty document object.
443 `source_path` : string
444 The path to or description of the source text of the document.
445 `settings` : optparse.Values object
446 Runtime settings. If none are provided, a default core set will
447 be used. If you will use the document object with any Docutils
448 components, you must provide their default settings as well. For
449 example, if parsing, at least provide the parser settings,
450 obtainable as follows::
452 settings = docutils.frontend.OptionParser(
453 components=(docutils.parsers.rst.Parser,)
454 ).get_default_values()
456 from docutils
import frontend
458 settings
= frontend
.OptionParser().get_default_values()
459 source_path
= decode_path(source_path
)
460 reporter
= new_reporter(source_path
, settings
)
461 document
= nodes
.document(settings
, reporter
, source
=source_path
)
462 document
.note_source(source_path
, -1)
465 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
466 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
467 textnode
= paragraph
[0]
468 for pattern
, substitution
in keyword_substitutions
:
469 match
= pattern
.search(textnode
)
471 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
474 def relative_path(source
, target
):
476 Build and return a path to `target`, relative to `source` (both files).
478 If there is no common prefix, return the absolute path to `target`.
480 source_parts
= os
.path
.abspath(source
or 'dummy_file').split(os
.sep
)
481 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
482 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
483 if source_parts
[:2] != target_parts
[:2]:
484 # Nothing in common between paths.
485 # Return absolute path, using '/' for URLs:
486 return '/'.join(target_parts
)
487 source_parts
.reverse()
488 target_parts
.reverse()
489 while (source_parts
and target_parts
490 and source_parts
[-1] == target_parts
[-1]):
491 # Remove path components in common:
494 target_parts
.reverse()
495 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
496 return '/'.join(parts
)
498 def get_stylesheet_reference(settings
, relative_to
=None):
500 Retrieve a stylesheet reference from the settings object.
502 Deprecated. Use get_stylesheet_reference_list() instead to
503 enable specification of multiple stylesheets as a comma-separated
506 if settings
.stylesheet_path
:
507 assert not settings
.stylesheet
, (
508 'stylesheet and stylesheet_path are mutually exclusive.')
509 if relative_to
== None:
510 relative_to
= settings
._destination
511 return relative_path(relative_to
, settings
.stylesheet_path
)
513 return settings
.stylesheet
515 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
517 # The original settings arguments are kept unchanged: you can test
518 # with e.g. ``if settings.stylesheet_path:``
520 # Differences to ``get_stylesheet_reference``:
521 # * return value is a list
522 # * no re-writing of the path (and therefore no optional argument)
523 # (if required, use ``utils.relative_path(source, target)``
524 # in the calling script)
525 def get_stylesheet_list(settings
):
527 Retrieve list of stylesheet references from the settings object.
529 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
530 'stylesheet and stylesheet_path are mutually exclusive.')
531 if settings
.stylesheet_path
:
532 sheets
= settings
.stylesheet_path
.split(",")
533 elif settings
.stylesheet
:
534 sheets
= settings
.stylesheet
.split(",")
537 # strip whitespace (frequently occuring in config files)
538 return [sheet
.strip(u
' \t\n') for sheet
in sheets
]
540 def get_trim_footnote_ref_space(settings
):
542 Return whether or not to trim footnote space.
544 If trim_footnote_reference_space is not None, return it.
546 If trim_footnote_reference_space is None, return False unless the
547 footnote reference style is 'superscript'.
549 if settings
.trim_footnote_reference_space
is None:
550 return hasattr(settings
, 'footnote_references') and \
551 settings
.footnote_references
== 'superscript'
553 return settings
.trim_footnote_reference_space
555 def get_source_line(node
):
557 Return the "source" and "line" attributes from the `node` given or from
558 its closest ancestor.
561 if node
.source
or node
.line
:
562 return node
.source
, node
.line
566 def escape2null(text
):
567 """Return a string with escape-backslashes converted to nulls."""
571 found
= text
.find('\\', start
)
573 parts
.append(text
[start
:])
574 return ''.join(parts
)
575 parts
.append(text
[start
:found
])
576 parts
.append('\x00' + text
[found
+1:found
+2])
577 start
= found
+ 2 # skip character after escape
579 def unescape(text
, restore_backslashes
=0):
581 Return a string with nulls removed or restored to backslashes.
582 Backslash-escaped spaces are also removed.
584 if restore_backslashes
:
585 return text
.replace('\x00', '\\')
587 for sep
in ['\x00 ', '\x00\n', '\x00']:
588 text
= ''.join(text
.split(sep
))
591 east_asian_widths
= {'W': 2, # Wide
592 'F': 2, # Full-width (wide)
594 'H': 1, # Half-width (narrow)
595 'N': 1, # Neutral (not East Asian, treated as narrow)
596 'A': 1} # Ambiguous (s/b wide in East Asian context,
597 # narrow otherwise, but that doesn't work)
598 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
601 def column_width(text
):
602 """Return the column width of text.
604 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
606 if isinstance(text
, str) and sys
.version_info
< (3,0):
608 combining_correction
= sum([-1 for c
in text
609 if unicodedata
.combining(c
)])
611 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
613 except AttributeError: # east_asian_width() New in version 2.4.
615 return width
+ combining_correction
624 # by Li Daobing http://code.activestate.com/recipes/190465/
625 # since Python 2.6 there is also itertools.combinations()
626 def unique_combinations(items
, n
):
627 """Return r-length tuples, in sorted order, no repeated elements"""
630 for i
in xrange(len(items
)-n
+1):
631 for cc
in unique_combinations(items
[i
+1:],n
-1):
634 def normalize_language_tag(tag
):
635 """Return a list of normalized combinations for a `BCP 47` language tag.
639 >>> normalize_language_tag('de-AT-1901')
640 ['de_at_1901', 'de_at', 'de_1901', 'de']
643 tag
= tag
.lower().replace('-','_')
644 # find all combinations of subtags
646 base_tag
= tag
.split('_')[:1]
647 subtags
= tag
.split('_')[1:]
648 # print base_tag, subtags
649 for n
in range(len(subtags
), 0, -1):
650 for tags
in unique_combinations(subtags
, n
):
652 taglist
.append('_'.join(base_tag
+ tags
))
656 class DependencyList
:
659 List of dependencies, with file recording support.
661 Note that the output file is not automatically closed. You have
662 to explicitly call the close() method.
665 def __init__(self
, output_file
=None, dependencies
=[]):
667 Initialize the dependency list, automatically setting the
668 output file to `output_file` (see `set_output()`) and adding
669 all supplied dependencies.
671 self
.set_output(output_file
)
672 for i
in dependencies
:
675 def set_output(self
, output_file
):
677 Set the output file and clear the list of already added
680 `output_file` must be a string. The specified file is
681 immediately overwritten.
683 If output_file is '-', the output will be written to stdout.
684 If it is None, no file output is done when calling add().
687 if output_file
== '-':
688 self
.file = sys
.stdout
690 self
.file = open(output_file
, 'w')
694 def add(self
, *filenames
):
696 If the dependency `filename` has not already been added,
697 append it to self.list and print it to self.file if self.file
700 for filename
in filenames
:
701 if not filename
in self
.list:
702 self
.list.append(filename
)
703 if self
.file is not None:
704 print >>self
.file, filename
708 Close the output file.
715 output_file
= self
.file.name
718 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)