2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
6 Miscellaneous utilities for the documentation utilities.
9 __docformat__
= 'reStructuredText'
16 from docutils
import ApplicationError
, DataError
17 from docutils
import nodes
18 from docutils
._compat
import bytes
21 class SystemMessage(ApplicationError
):
23 def __init__(self
, system_message
, level
):
24 Exception.__init
__(self
, system_message
.astext())
28 class SystemMessagePropagation(ApplicationError
): pass
34 Info/warning/error reporter and ``system_message`` element generator.
36 Five levels of system messages are defined, along with corresponding
37 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
39 There is typically one Reporter object per process. A Reporter object is
40 instantiated with thresholds for reporting (generating warnings) and
41 halting processing (raising exceptions), a switch to turn debug output on
42 or off, and an I/O stream for warnings. These are stored as instance
45 When a system message is generated, its level is compared to the stored
46 thresholds, and a warning or error is generated as appropriate. Debug
47 messages are produced iff the stored debug switch is on, independently of
48 other thresholds. Message output is sent to the stored warning stream if
51 The Reporter class also employs a modified form of the "Observer" pattern
52 [GoF95]_ to track system messages generated. The `attach_observer` method
53 should be called before parsing, with a bound method or function which
54 accepts system messages. The observer can be removed with
55 `detach_observer`, and another added in its place.
57 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
58 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
62 levels
= 'DEBUG INFO WARNING ERROR SEVERE'.split()
63 """List of names for system message levels, indexed by level."""
65 # system message level constants:
70 SEVERE_LEVEL
) = range(5)
72 def __init__(self
, source
, report_level
, halt_level
, stream
=None,
73 debug
=0, encoding
=None, error_handler
='backslashreplace'):
76 - `source`: The path to or description of the source data.
77 - `report_level`: The level at or above which warning output will
79 - `halt_level`: The level at or above which `SystemMessage`
80 exceptions will be raised, halting execution.
81 - `debug`: Show debug (level=0) system messages?
82 - `stream`: Where warning output is sent. Can be file-like (has a
83 ``.write`` method), a string (file name, opened for writing),
84 '' (empty string, for discarding all stream messages) or
85 `None` (implies `sys.stderr`; default).
86 - `encoding`: The output encoding.
87 - `error_handler`: The error handler for stderr output encoding.
91 """The path to or description of the source data."""
93 self
.error_handler
= error_handler
94 """The character encoding error handler."""
96 self
.debug_flag
= debug
97 """Show debug (level=0) system messages?"""
99 self
.report_level
= report_level
100 """The level at or above which warning output will be sent
103 self
.halt_level
= halt_level
104 """The level at or above which `SystemMessage` exceptions
105 will be raised, halting execution."""
109 elif stream
and type(stream
) in (unicode, bytes
):
110 # if `stream` is a file name, open it
111 if type(stream
) is bytes
:
112 stream
= open(stream
, 'w')
114 stream
= open(stream
.encode(), 'w')
117 """Where warning output is sent."""
121 encoding
= stream
.encoding
122 except AttributeError:
125 self
.encoding
= encoding
or 'ascii'
126 """The output character encoding."""
129 """List of bound methods or functions to call with each system_message
133 """The highest level system message generated so far."""
135 def set_conditions(self
, category
, report_level
, halt_level
,
136 stream
=None, debug
=0):
137 warnings
.warn('docutils.utils.Reporter.set_conditions deprecated; '
138 'set attributes via configuration settings or directly',
139 DeprecationWarning, stacklevel
=2)
140 self
.report_level
= report_level
141 self
.halt_level
= halt_level
145 self
.debug_flag
= debug
147 def attach_observer(self
, observer
):
149 The `observer` parameter is a function or bound method which takes one
150 argument, a `nodes.system_message` instance.
152 self
.observers
.append(observer
)
154 def detach_observer(self
, observer
):
155 self
.observers
.remove(observer
)
157 def notify_observers(self
, message
):
158 for observer
in self
.observers
:
161 def system_message(self
, level
, message
, *children
, **kwargs
):
163 Return a system_message object.
165 Raise an exception or generate a warning if appropriate.
167 attributes
= kwargs
.copy()
168 if 'base_node' in kwargs
:
169 source
, line
= get_source_line(kwargs
['base_node'])
170 del attributes
['base_node']
171 if source
is not None:
172 attributes
.setdefault('source', source
)
174 attributes
.setdefault('line', line
)
175 # assert source is not None, "node has line- but no source-argument"
176 if not 'source' in attributes
: # 'line' is absolute line number
177 try: # look up (source, line-in-source)
178 source
, line
= self
.locator(attributes
.get('line'))
179 # print "locator lookup", kwargs.get('line'), "->", source, line
180 except AttributeError:
181 source
, line
= None, None
182 if source
is not None:
183 attributes
['source'] = source
185 attributes
['line'] = line
186 # assert attributes['line'] is not None, (message, kwargs)
187 # assert attributes['source'] is not None, (message, kwargs)
188 attributes
.setdefault('source', self
.source
)
190 msg
= nodes
.system_message(message
, level
=level
,
191 type=self
.levels
[level
],
192 *children
, **attributes
)
193 if self
.stream
and (level
>= self
.report_level
194 or self
.debug_flag
and level
== self
.DEBUG_LEVEL
195 or level
>= self
.halt_level
):
196 msgtext
= msg
.astext() + '\n'
198 self
.stream
.write(msgtext
)
199 except UnicodeEncodeError:
200 self
.stream
.write(msgtext
.encode(self
.encoding
,
202 if level
>= self
.halt_level
:
203 raise SystemMessage(msg
, level
)
204 if level
> self
.DEBUG_LEVEL
or self
.debug_flag
:
205 self
.notify_observers(msg
)
206 self
.max_level
= max(level
, self
.max_level
)
209 def debug(self
, *args
, **kwargs
):
211 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
212 effect on the processing. Level-0 system messages are handled
213 separately from the others.
216 return self
.system_message(self
.DEBUG_LEVEL
, *args
, **kwargs
)
218 def info(self
, *args
, **kwargs
):
220 Level-1, "INFO": a minor issue that can be ignored. Typically there is
221 no effect on processing, and level-1 system messages are not reported.
223 return self
.system_message(self
.INFO_LEVEL
, *args
, **kwargs
)
225 def warning(self
, *args
, **kwargs
):
227 Level-2, "WARNING": an issue that should be addressed. If ignored,
228 there may be unpredictable problems with the output.
230 return self
.system_message(self
.WARNING_LEVEL
, *args
, **kwargs
)
232 def error(self
, *args
, **kwargs
):
234 Level-3, "ERROR": an error that should be addressed. If ignored, the
235 output will contain errors.
237 return self
.system_message(self
.ERROR_LEVEL
, *args
, **kwargs
)
239 def severe(self
, *args
, **kwargs
):
241 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
242 the output will contain severe errors. Typically level-4 system
243 messages are turned into exceptions which halt processing.
245 return self
.system_message(self
.SEVERE_LEVEL
, *args
, **kwargs
)
248 class ExtensionOptionError(DataError
): pass
249 class BadOptionError(ExtensionOptionError
): pass
250 class BadOptionDataError(ExtensionOptionError
): pass
251 class DuplicateOptionError(ExtensionOptionError
): pass
254 def extract_extension_options(field_list
, options_spec
):
256 Return a dictionary mapping extension option names to converted values.
259 - `field_list`: A flat field list without field arguments, where each
260 field body consists of a single paragraph only.
261 - `options_spec`: Dictionary mapping known option names to a
262 conversion function such as `int` or `float`.
265 - `KeyError` for unknown option names.
266 - `ValueError` for invalid option values (raised by the conversion
268 - `TypeError` for invalid option value types (raised by conversion
270 - `DuplicateOptionError` for duplicate options.
271 - `BadOptionError` for invalid fields.
272 - `BadOptionDataError` for invalid option data (missing name,
273 missing data, bad quotes, etc.).
275 option_list
= extract_options(field_list
)
276 option_dict
= assemble_option_dict(option_list
, options_spec
)
279 def extract_options(field_list
):
281 Return a list of option (name, value) pairs from field names & bodies.
284 `field_list`: A flat field list, where each field name is a single
285 word and each field body consists of a single paragraph only.
288 - `BadOptionError` for invalid fields.
289 - `BadOptionDataError` for invalid option data (missing name,
290 missing data, bad quotes, etc.).
293 for field
in field_list
:
294 if len(field
[0].astext().split()) != 1:
295 raise BadOptionError(
296 'extension option field name may not contain multiple words')
297 name
= str(field
[0].astext().lower())
301 elif len(body
) > 1 or not isinstance(body
[0], nodes
.paragraph
) \
302 or len(body
[0]) != 1 or not isinstance(body
[0][0], nodes
.Text
):
303 raise BadOptionDataError(
304 'extension option field body may contain\n'
305 'a single paragraph only (option "%s")' % name
)
307 data
= body
[0][0].astext()
308 option_list
.append((name
, data
))
311 def assemble_option_dict(option_list
, options_spec
):
313 Return a mapping of option names to values.
316 - `option_list`: A list of (name, value) pairs (the output of
317 `extract_options()`).
318 - `options_spec`: Dictionary mapping known option names to a
319 conversion function such as `int` or `float`.
322 - `KeyError` for unknown option names.
323 - `DuplicateOptionError` for duplicate options.
324 - `ValueError` for invalid option values (raised by conversion
326 - `TypeError` for invalid option value types (raised by conversion
330 for name
, value
in option_list
:
331 convertor
= options_spec
[name
] # raises KeyError if unknown
332 if convertor
is None:
333 raise KeyError(name
) # or if explicitly disabled
335 raise DuplicateOptionError('duplicate option "%s"' % name
)
337 options
[name
] = convertor(value
)
338 except (ValueError, TypeError), detail
:
339 raise detail
.__class
__('(option: "%s"; value: %r)\n%s'
340 % (name
, value
, ' '.join(detail
.args
)))
344 class NameValueError(DataError
): pass
347 def decode_path(path
):
349 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
351 Decode file/path string in a failsave manner if not already done.
353 # see also http://article.gmane.org/gmane.text.docutils.user/2905
354 if isinstance(path
, unicode):
357 path
= path
.decode(sys
.getfilesystemencoding(), 'strict')
358 except AttributeError: # default value None has no decode method
359 return nodes
.reprunicode(path
)
360 except UnicodeDecodeError:
362 path
= path
.decode('utf-8', 'strict')
363 except UnicodeDecodeError:
364 path
= path
.decode('ascii', 'replace')
365 return nodes
.reprunicode(path
)
368 def extract_name_value(line
):
370 Return a list of (name, value) from a line of the form "name=value ...".
373 `NameValueError` for invalid input (missing name, missing data, bad
378 equals
= line
.find('=')
380 raise NameValueError('missing "="')
381 attname
= line
[:equals
].strip()
382 if equals
== 0 or not attname
:
383 raise NameValueError(
384 'missing attribute name before "="')
385 line
= line
[equals
+1:].lstrip()
387 raise NameValueError(
388 'missing value after "%s="' % attname
)
390 endquote
= line
.find(line
[0], 1)
392 raise NameValueError(
393 'attribute "%s" missing end quote (%s)'
394 % (attname
, line
[0]))
395 if len(line
) > endquote
+ 1 and line
[endquote
+ 1].strip():
396 raise NameValueError(
397 'attribute "%s" end quote (%s) not followed by '
398 'whitespace' % (attname
, line
[0]))
399 data
= line
[1:endquote
]
400 line
= line
[endquote
+1:].lstrip()
402 space
= line
.find(' ')
408 line
= line
[space
+1:].lstrip()
409 attlist
.append((attname
.lower(), data
))
412 def new_reporter(source_path
, settings
):
414 Return a new Reporter object.
418 The path to or description of the source text of the document.
419 `settings` : optparse.Values object
423 source_path
, settings
.report_level
, settings
.halt_level
,
424 stream
=settings
.warning_stream
, debug
=settings
.debug
,
425 encoding
=settings
.error_encoding
,
426 error_handler
=settings
.error_encoding_error_handler
)
429 def new_document(source_path
, settings
=None):
431 Return a new empty document object.
434 `source_path` : string
435 The path to or description of the source text of the document.
436 `settings` : optparse.Values object
437 Runtime settings. If none are provided, a default core set will
438 be used. If you will use the document object with any Docutils
439 components, you must provide their default settings as well. For
440 example, if parsing, at least provide the parser settings,
441 obtainable as follows::
443 settings = docutils.frontend.OptionParser(
444 components=(docutils.parsers.rst.Parser,)
445 ).get_default_values()
447 from docutils
import frontend
449 settings
= frontend
.OptionParser().get_default_values()
450 source_path
= decode_path(source_path
)
451 reporter
= new_reporter(source_path
, settings
)
452 document
= nodes
.document(settings
, reporter
, source
=source_path
)
453 document
.note_source(source_path
, -1)
456 def clean_rcs_keywords(paragraph
, keyword_substitutions
):
457 if len(paragraph
) == 1 and isinstance(paragraph
[0], nodes
.Text
):
458 textnode
= paragraph
[0]
459 for pattern
, substitution
in keyword_substitutions
:
460 match
= pattern
.search(textnode
)
462 paragraph
[0] = nodes
.Text(pattern
.sub(substitution
, textnode
))
465 def relative_path(source
, target
):
467 Build and return a path to `target`, relative to `source` (both files).
469 If there is no common prefix, return the absolute path to `target`.
471 source_parts
= os
.path
.abspath(source
or 'dummy_file').split(os
.sep
)
472 target_parts
= os
.path
.abspath(target
).split(os
.sep
)
473 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
474 if source_parts
[:2] != target_parts
[:2]:
475 # Nothing in common between paths.
476 # Return absolute path, using '/' for URLs:
477 return '/'.join(target_parts
)
478 source_parts
.reverse()
479 target_parts
.reverse()
480 while (source_parts
and target_parts
481 and source_parts
[-1] == target_parts
[-1]):
482 # Remove path components in common:
485 target_parts
.reverse()
486 parts
= ['..'] * (len(source_parts
) - 1) + target_parts
487 return '/'.join(parts
)
489 def get_stylesheet_reference(settings
, relative_to
=None):
491 Retrieve a stylesheet reference from the settings object.
493 Deprecated. Use get_stylesheet_reference_list() instead to
494 enable specification of multiple stylesheets as a comma-separated
497 if settings
.stylesheet_path
:
498 assert not settings
.stylesheet
, (
499 'stylesheet and stylesheet_path are mutually exclusive.')
500 if relative_to
== None:
501 relative_to
= settings
._destination
502 return relative_path(relative_to
, settings
.stylesheet_path
)
504 return settings
.stylesheet
506 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
508 # The original settings arguments are kept unchanged: you can test
509 # with e.g. ``if settings.stylesheet_path:``
511 # Differences to ``get_stylesheet_reference``:
512 # * return value is a list
513 # * no re-writing of the path (and therefore no optional argument)
514 # (if required, use ``utils.relative_path(source, target)``
515 # in the calling script)
516 def get_stylesheet_list(settings
):
518 Retrieve list of stylesheet references from the settings object.
520 assert not (settings
.stylesheet
and settings
.stylesheet_path
), (
521 'stylesheet and stylesheet_path are mutually exclusive.')
522 if settings
.stylesheet_path
:
523 sheets
= settings
.stylesheet_path
.split(",")
524 elif settings
.stylesheet
:
525 sheets
= settings
.stylesheet
.split(",")
528 # strip whitespace (frequently occuring in config files)
529 return [sheet
.strip(u
' \t\n') for sheet
in sheets
]
531 def get_trim_footnote_ref_space(settings
):
533 Return whether or not to trim footnote space.
535 If trim_footnote_reference_space is not None, return it.
537 If trim_footnote_reference_space is None, return False unless the
538 footnote reference style is 'superscript'.
540 if settings
.trim_footnote_reference_space
is None:
541 return hasattr(settings
, 'footnote_references') and \
542 settings
.footnote_references
== 'superscript'
544 return settings
.trim_footnote_reference_space
546 def get_source_line(node
):
548 Return the "source" and "line" attributes from the `node` given or from
549 its closest ancestor.
552 if node
.source
or node
.line
:
553 return node
.source
, node
.line
557 def escape2null(text
):
558 """Return a string with escape-backslashes converted to nulls."""
562 found
= text
.find('\\', start
)
564 parts
.append(text
[start
:])
565 return ''.join(parts
)
566 parts
.append(text
[start
:found
])
567 parts
.append('\x00' + text
[found
+1:found
+2])
568 start
= found
+ 2 # skip character after escape
570 def unescape(text
, restore_backslashes
=0):
572 Return a string with nulls removed or restored to backslashes.
573 Backslash-escaped spaces are also removed.
575 if restore_backslashes
:
576 return text
.replace('\x00', '\\')
578 for sep
in ['\x00 ', '\x00\n', '\x00']:
579 text
= ''.join(text
.split(sep
))
582 east_asian_widths
= {'W': 2, # Wide
583 'F': 2, # Full-width (wide)
585 'H': 1, # Half-width (narrow)
586 'N': 1, # Neutral (not East Asian, treated as narrow)
587 'A': 1} # Ambiguous (s/b wide in East Asian context,
588 # narrow otherwise, but that doesn't work)
589 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
592 def column_width(text
):
593 """Return the column width of text.
595 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
597 if isinstance(text
, str) and sys
.version_info
< (3,0):
599 combining_correction
= sum([-1 for c
in text
600 if unicodedata
.combining(c
)])
602 width
= sum([east_asian_widths
[unicodedata
.east_asian_width(c
)]
604 except AttributeError: # east_asian_width() New in version 2.4.
606 return width
+ combining_correction
615 # by Li Daobing http://code.activestate.com/recipes/190465/
616 # since Python 2.6 there is also itertools.combinations()
617 def unique_combinations(items
, n
):
618 """Return r-length tuples, in sorted order, no repeated elements"""
621 for i
in xrange(len(items
)-n
+1):
622 for cc
in unique_combinations(items
[i
+1:],n
-1):
625 def normalize_language_tag(tag
):
626 """Return a list of normalized combinations for a `BCP 47` language tag.
630 >>> normalize_language_tag('de-AT-1901')
631 ['de_at_1901', 'de_at', 'de_1901', 'de']
634 tag
= tag
.lower().replace('-','_')
635 # find all combinations of subtags
637 base_tag
= tag
.split('_')[:1]
638 subtags
= tag
.split('_')[1:]
639 # print base_tag, subtags
640 for n
in range(len(subtags
), 0, -1):
641 for tags
in unique_combinations(subtags
, n
):
643 taglist
.append('_'.join(base_tag
+ tags
))
647 class DependencyList
:
650 List of dependencies, with file recording support.
652 Note that the output file is not automatically closed. You have
653 to explicitly call the close() method.
656 def __init__(self
, output_file
=None, dependencies
=[]):
658 Initialize the dependency list, automatically setting the
659 output file to `output_file` (see `set_output()`) and adding
660 all supplied dependencies.
662 self
.set_output(output_file
)
663 for i
in dependencies
:
666 def set_output(self
, output_file
):
668 Set the output file and clear the list of already added
671 `output_file` must be a string. The specified file is
672 immediately overwritten.
674 If output_file is '-', the output will be written to stdout.
675 If it is None, no file output is done when calling add().
678 if output_file
== '-':
679 self
.file = sys
.stdout
681 self
.file = open(output_file
, 'w')
685 def add(self
, *filenames
):
687 If the dependency `filename` has not already been added,
688 append it to self.list and print it to self.file if self.file
691 for filename
in filenames
:
692 if not filename
in self
.list:
693 self
.list.append(filename
)
694 if self
.file is not None:
695 print >>self
.file, filename
699 Close the output file.
706 output_file
= self
.file.name
709 return '%s(%r, %s)' % (self
.__class
__.__name
__, output_file
, self
.list)