Minor fixes for Python 3.1
[docutils.git] / docutils / utils.py
blob61cff5d16eb3efe019e173692fc0309d37185db6
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Miscellaneous utilities for the documentation utilities.
7 """
9 __docformat__ = 'reStructuredText'
11 import sys
12 import os
13 import os.path
14 import warnings
15 import unicodedata
16 from docutils import ApplicationError, DataError
17 from docutils import nodes
18 from docutils._compat import bytes
21 class SystemMessage(ApplicationError):
23 def __init__(self, system_message, level):
24 Exception.__init__(self, system_message.astext())
25 self.level = level
28 class SystemMessagePropagation(ApplicationError): pass
31 class Reporter:
33 """
34 Info/warning/error reporter and ``system_message`` element generator.
36 Five levels of system messages are defined, along with corresponding
37 methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
39 There is typically one Reporter object per process. A Reporter object is
40 instantiated with thresholds for reporting (generating warnings) and
41 halting processing (raising exceptions), a switch to turn debug output on
42 or off, and an I/O stream for warnings. These are stored as instance
43 attributes.
45 When a system message is generated, its level is compared to the stored
46 thresholds, and a warning or error is generated as appropriate. Debug
47 messages are produced iff the stored debug switch is on, independently of
48 other thresholds. Message output is sent to the stored warning stream if
49 not set to ''.
51 The Reporter class also employs a modified form of the "Observer" pattern
52 [GoF95]_ to track system messages generated. The `attach_observer` method
53 should be called before parsing, with a bound method or function which
54 accepts system messages. The observer can be removed with
55 `detach_observer`, and another added in its place.
57 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
58 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
59 1995.
60 """
62 levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
63 """List of names for system message levels, indexed by level."""
65 # system message level constants:
66 (DEBUG_LEVEL,
67 INFO_LEVEL,
68 WARNING_LEVEL,
69 ERROR_LEVEL,
70 SEVERE_LEVEL) = range(5)
72 def __init__(self, source, report_level, halt_level, stream=None,
73 debug=0, encoding=None, error_handler='backslashreplace'):
74 """
75 :Parameters:
76 - `source`: The path to or description of the source data.
77 - `report_level`: The level at or above which warning output will
78 be sent to `stream`.
79 - `halt_level`: The level at or above which `SystemMessage`
80 exceptions will be raised, halting execution.
81 - `debug`: Show debug (level=0) system messages?
82 - `stream`: Where warning output is sent. Can be file-like (has a
83 ``.write`` method), a string (file name, opened for writing),
84 '' (empty string, for discarding all stream messages) or
85 `None` (implies `sys.stderr`; default).
86 - `encoding`: The output encoding.
87 - `error_handler`: The error handler for stderr output encoding.
88 """
90 self.source = source
91 """The path to or description of the source data."""
93 self.error_handler = error_handler
94 """The character encoding error handler."""
96 self.debug_flag = debug
97 """Show debug (level=0) system messages?"""
99 self.report_level = report_level
100 """The level at or above which warning output will be sent
101 to `self.stream`."""
103 self.halt_level = halt_level
104 """The level at or above which `SystemMessage` exceptions
105 will be raised, halting execution."""
107 if stream is None:
108 stream = sys.stderr
109 elif stream and type(stream) in (unicode, bytes):
110 # if `stream` is a file name, open it
111 if type(stream) is bytes:
112 stream = open(stream, 'w')
113 else:
114 stream = open(stream.encode(), 'w')
116 self.stream = stream
117 """Where warning output is sent."""
119 if encoding is None:
120 try:
121 encoding = stream.encoding
122 except AttributeError:
123 pass
125 self.encoding = encoding or 'ascii'
126 """The output character encoding."""
128 self.observers = []
129 """List of bound methods or functions to call with each system_message
130 created."""
132 self.max_level = -1
133 """The highest level system message generated so far."""
135 def set_conditions(self, category, report_level, halt_level,
136 stream=None, debug=0):
137 warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
138 'set attributes via configuration settings or directly',
139 DeprecationWarning, stacklevel=2)
140 self.report_level = report_level
141 self.halt_level = halt_level
142 if stream is None:
143 stream = sys.stderr
144 self.stream = stream
145 self.debug_flag = debug
147 def attach_observer(self, observer):
149 The `observer` parameter is a function or bound method which takes one
150 argument, a `nodes.system_message` instance.
152 self.observers.append(observer)
154 def detach_observer(self, observer):
155 self.observers.remove(observer)
157 def notify_observers(self, message):
158 for observer in self.observers:
159 observer(message)
161 def system_message(self, level, message, *children, **kwargs):
163 Return a system_message object.
165 Raise an exception or generate a warning if appropriate.
167 attributes = kwargs.copy()
168 if 'base_node' in kwargs:
169 source, line = get_source_line(kwargs['base_node'])
170 del attributes['base_node']
171 if source is not None:
172 attributes.setdefault('source', source)
173 if line is not None:
174 attributes.setdefault('line', line)
175 # assert source is not None, "node has line- but no source-argument"
176 if not 'source' in attributes: # 'line' is absolute line number
177 try: # look up (source, line-in-source)
178 source, line = self.locator(attributes.get('line'))
179 # print "locator lookup", kwargs.get('line'), "->", source, line
180 except AttributeError:
181 source, line = None, None
182 if source is not None:
183 attributes['source'] = source
184 if line is not None:
185 attributes['line'] = line
186 # assert attributes['line'] is not None, (message, kwargs)
187 # assert attributes['source'] is not None, (message, kwargs)
188 attributes.setdefault('source', self.source)
190 msg = nodes.system_message(message, level=level,
191 type=self.levels[level],
192 *children, **attributes)
193 if self.stream and (level >= self.report_level
194 or self.debug_flag and level == self.DEBUG_LEVEL
195 or level >= self.halt_level):
196 msgtext = msg.astext() + '\n'
197 try:
198 self.stream.write(msgtext)
199 except UnicodeEncodeError:
200 self.stream.write(msgtext.encode(self.encoding,
201 self.error_handler))
202 if level >= self.halt_level:
203 raise SystemMessage(msg, level)
204 if level > self.DEBUG_LEVEL or self.debug_flag:
205 self.notify_observers(msg)
206 self.max_level = max(level, self.max_level)
207 return msg
209 def debug(self, *args, **kwargs):
211 Level-0, "DEBUG": an internal reporting issue. Typically, there is no
212 effect on the processing. Level-0 system messages are handled
213 separately from the others.
215 if self.debug_flag:
216 return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
218 def info(self, *args, **kwargs):
220 Level-1, "INFO": a minor issue that can be ignored. Typically there is
221 no effect on processing, and level-1 system messages are not reported.
223 return self.system_message(self.INFO_LEVEL, *args, **kwargs)
225 def warning(self, *args, **kwargs):
227 Level-2, "WARNING": an issue that should be addressed. If ignored,
228 there may be unpredictable problems with the output.
230 return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
232 def error(self, *args, **kwargs):
234 Level-3, "ERROR": an error that should be addressed. If ignored, the
235 output will contain errors.
237 return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
239 def severe(self, *args, **kwargs):
241 Level-4, "SEVERE": a severe error that must be addressed. If ignored,
242 the output will contain severe errors. Typically level-4 system
243 messages are turned into exceptions which halt processing.
245 return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
248 class ExtensionOptionError(DataError): pass
249 class BadOptionError(ExtensionOptionError): pass
250 class BadOptionDataError(ExtensionOptionError): pass
251 class DuplicateOptionError(ExtensionOptionError): pass
254 def extract_extension_options(field_list, options_spec):
256 Return a dictionary mapping extension option names to converted values.
258 :Parameters:
259 - `field_list`: A flat field list without field arguments, where each
260 field body consists of a single paragraph only.
261 - `options_spec`: Dictionary mapping known option names to a
262 conversion function such as `int` or `float`.
264 :Exceptions:
265 - `KeyError` for unknown option names.
266 - `ValueError` for invalid option values (raised by the conversion
267 function).
268 - `TypeError` for invalid option value types (raised by conversion
269 function).
270 - `DuplicateOptionError` for duplicate options.
271 - `BadOptionError` for invalid fields.
272 - `BadOptionDataError` for invalid option data (missing name,
273 missing data, bad quotes, etc.).
275 option_list = extract_options(field_list)
276 option_dict = assemble_option_dict(option_list, options_spec)
277 return option_dict
279 def extract_options(field_list):
281 Return a list of option (name, value) pairs from field names & bodies.
283 :Parameter:
284 `field_list`: A flat field list, where each field name is a single
285 word and each field body consists of a single paragraph only.
287 :Exceptions:
288 - `BadOptionError` for invalid fields.
289 - `BadOptionDataError` for invalid option data (missing name,
290 missing data, bad quotes, etc.).
292 option_list = []
293 for field in field_list:
294 if len(field[0].astext().split()) != 1:
295 raise BadOptionError(
296 'extension option field name may not contain multiple words')
297 name = str(field[0].astext().lower())
298 body = field[1]
299 if len(body) == 0:
300 data = None
301 elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
302 or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
303 raise BadOptionDataError(
304 'extension option field body may contain\n'
305 'a single paragraph only (option "%s")' % name)
306 else:
307 data = body[0][0].astext()
308 option_list.append((name, data))
309 return option_list
311 def assemble_option_dict(option_list, options_spec):
313 Return a mapping of option names to values.
315 :Parameters:
316 - `option_list`: A list of (name, value) pairs (the output of
317 `extract_options()`).
318 - `options_spec`: Dictionary mapping known option names to a
319 conversion function such as `int` or `float`.
321 :Exceptions:
322 - `KeyError` for unknown option names.
323 - `DuplicateOptionError` for duplicate options.
324 - `ValueError` for invalid option values (raised by conversion
325 function).
326 - `TypeError` for invalid option value types (raised by conversion
327 function).
329 options = {}
330 for name, value in option_list:
331 convertor = options_spec[name] # raises KeyError if unknown
332 if convertor is None:
333 raise KeyError(name) # or if explicitly disabled
334 if name in options:
335 raise DuplicateOptionError('duplicate option "%s"' % name)
336 try:
337 options[name] = convertor(value)
338 except (ValueError, TypeError), detail:
339 raise detail.__class__('(option: "%s"; value: %r)\n%s'
340 % (name, value, ' '.join(detail.args)))
341 return options
344 class NameValueError(DataError): pass
347 def decode_path(path):
349 Ensure `path` is Unicode. Return `nodes.reprunicode` object.
351 Decode file/path string in a failsave manner if not already done.
353 # see also http://article.gmane.org/gmane.text.docutils.user/2905
354 if isinstance(path, unicode):
355 return path
356 try:
357 path = path.decode(sys.getfilesystemencoding(), 'strict')
358 except AttributeError: # default value None has no decode method
359 return nodes.reprunicode(path)
360 except UnicodeDecodeError:
361 try:
362 path = path.decode('utf-8', 'strict')
363 except UnicodeDecodeError:
364 path = path.decode('ascii', 'replace')
365 return nodes.reprunicode(path)
368 def extract_name_value(line):
370 Return a list of (name, value) from a line of the form "name=value ...".
372 :Exception:
373 `NameValueError` for invalid input (missing name, missing data, bad
374 quotes, etc.).
376 attlist = []
377 while line:
378 equals = line.find('=')
379 if equals == -1:
380 raise NameValueError('missing "="')
381 attname = line[:equals].strip()
382 if equals == 0 or not attname:
383 raise NameValueError(
384 'missing attribute name before "="')
385 line = line[equals+1:].lstrip()
386 if not line:
387 raise NameValueError(
388 'missing value after "%s="' % attname)
389 if line[0] in '\'"':
390 endquote = line.find(line[0], 1)
391 if endquote == -1:
392 raise NameValueError(
393 'attribute "%s" missing end quote (%s)'
394 % (attname, line[0]))
395 if len(line) > endquote + 1 and line[endquote + 1].strip():
396 raise NameValueError(
397 'attribute "%s" end quote (%s) not followed by '
398 'whitespace' % (attname, line[0]))
399 data = line[1:endquote]
400 line = line[endquote+1:].lstrip()
401 else:
402 space = line.find(' ')
403 if space == -1:
404 data = line
405 line = ''
406 else:
407 data = line[:space]
408 line = line[space+1:].lstrip()
409 attlist.append((attname.lower(), data))
410 return attlist
412 def new_reporter(source_path, settings):
414 Return a new Reporter object.
416 :Parameters:
417 `source` : string
418 The path to or description of the source text of the document.
419 `settings` : optparse.Values object
420 Runtime settings.
422 reporter = Reporter(
423 source_path, settings.report_level, settings.halt_level,
424 stream=settings.warning_stream, debug=settings.debug,
425 encoding=settings.error_encoding,
426 error_handler=settings.error_encoding_error_handler)
427 return reporter
429 def new_document(source_path, settings=None):
431 Return a new empty document object.
433 :Parameters:
434 `source_path` : string
435 The path to or description of the source text of the document.
436 `settings` : optparse.Values object
437 Runtime settings. If none are provided, a default core set will
438 be used. If you will use the document object with any Docutils
439 components, you must provide their default settings as well. For
440 example, if parsing, at least provide the parser settings,
441 obtainable as follows::
443 settings = docutils.frontend.OptionParser(
444 components=(docutils.parsers.rst.Parser,)
445 ).get_default_values()
447 from docutils import frontend
448 if settings is None:
449 settings = frontend.OptionParser().get_default_values()
450 source_path = decode_path(source_path)
451 reporter = new_reporter(source_path, settings)
452 document = nodes.document(settings, reporter, source=source_path)
453 document.note_source(source_path, -1)
454 return document
456 def clean_rcs_keywords(paragraph, keyword_substitutions):
457 if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
458 textnode = paragraph[0]
459 for pattern, substitution in keyword_substitutions:
460 match = pattern.search(textnode)
461 if match:
462 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
463 return
465 def relative_path(source, target):
467 Build and return a path to `target`, relative to `source` (both files).
469 If there is no common prefix, return the absolute path to `target`.
471 source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
472 target_parts = os.path.abspath(target).split(os.sep)
473 # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
474 if source_parts[:2] != target_parts[:2]:
475 # Nothing in common between paths.
476 # Return absolute path, using '/' for URLs:
477 return '/'.join(target_parts)
478 source_parts.reverse()
479 target_parts.reverse()
480 while (source_parts and target_parts
481 and source_parts[-1] == target_parts[-1]):
482 # Remove path components in common:
483 source_parts.pop()
484 target_parts.pop()
485 target_parts.reverse()
486 parts = ['..'] * (len(source_parts) - 1) + target_parts
487 return '/'.join(parts)
489 def get_stylesheet_reference(settings, relative_to=None):
491 Retrieve a stylesheet reference from the settings object.
493 Deprecated. Use get_stylesheet_reference_list() instead to
494 enable specification of multiple stylesheets as a comma-separated
495 list.
497 if settings.stylesheet_path:
498 assert not settings.stylesheet, (
499 'stylesheet and stylesheet_path are mutually exclusive.')
500 if relative_to == None:
501 relative_to = settings._destination
502 return relative_path(relative_to, settings.stylesheet_path)
503 else:
504 return settings.stylesheet
506 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
508 # The original settings arguments are kept unchanged: you can test
509 # with e.g. ``if settings.stylesheet_path:``
511 # Differences to ``get_stylesheet_reference``:
512 # * return value is a list
513 # * no re-writing of the path (and therefore no optional argument)
514 # (if required, use ``utils.relative_path(source, target)``
515 # in the calling script)
516 def get_stylesheet_list(settings):
518 Retrieve list of stylesheet references from the settings object.
520 assert not (settings.stylesheet and settings.stylesheet_path), (
521 'stylesheet and stylesheet_path are mutually exclusive.')
522 if settings.stylesheet_path:
523 sheets = settings.stylesheet_path.split(",")
524 elif settings.stylesheet:
525 sheets = settings.stylesheet.split(",")
526 else:
527 sheets = []
528 # strip whitespace (frequently occuring in config files)
529 return [sheet.strip(u' \t\n') for sheet in sheets]
531 def get_trim_footnote_ref_space(settings):
533 Return whether or not to trim footnote space.
535 If trim_footnote_reference_space is not None, return it.
537 If trim_footnote_reference_space is None, return False unless the
538 footnote reference style is 'superscript'.
540 if settings.trim_footnote_reference_space is None:
541 return hasattr(settings, 'footnote_references') and \
542 settings.footnote_references == 'superscript'
543 else:
544 return settings.trim_footnote_reference_space
546 def get_source_line(node):
548 Return the "source" and "line" attributes from the `node` given or from
549 its closest ancestor.
551 while node:
552 if node.source or node.line:
553 return node.source, node.line
554 node = node.parent
555 return None, None
557 def escape2null(text):
558 """Return a string with escape-backslashes converted to nulls."""
559 parts = []
560 start = 0
561 while 1:
562 found = text.find('\\', start)
563 if found == -1:
564 parts.append(text[start:])
565 return ''.join(parts)
566 parts.append(text[start:found])
567 parts.append('\x00' + text[found+1:found+2])
568 start = found + 2 # skip character after escape
570 def unescape(text, restore_backslashes=0):
572 Return a string with nulls removed or restored to backslashes.
573 Backslash-escaped spaces are also removed.
575 if restore_backslashes:
576 return text.replace('\x00', '\\')
577 else:
578 for sep in ['\x00 ', '\x00\n', '\x00']:
579 text = ''.join(text.split(sep))
580 return text
582 east_asian_widths = {'W': 2, # Wide
583 'F': 2, # Full-width (wide)
584 'Na': 1, # Narrow
585 'H': 1, # Half-width (narrow)
586 'N': 1, # Neutral (not East Asian, treated as narrow)
587 'A': 1} # Ambiguous (s/b wide in East Asian context,
588 # narrow otherwise, but that doesn't work)
589 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
590 column widths."""
592 def column_width(text):
593 """Return the column width of text.
595 Correct ``len(text)`` for wide East Asian and combining Unicode chars.
597 if isinstance(text, str) and sys.version_info < (3,0):
598 return len(text)
599 combining_correction = sum([-1 for c in text
600 if unicodedata.combining(c)])
601 try:
602 width = sum([east_asian_widths[unicodedata.east_asian_width(c)]
603 for c in text])
604 except AttributeError: # east_asian_width() New in version 2.4.
605 width = len(text)
606 return width + combining_correction
608 def uniq(L):
609 r = []
610 for item in L:
611 if not item in r:
612 r.append(item)
613 return r
615 # by Li Daobing http://code.activestate.com/recipes/190465/
616 # since Python 2.6 there is also itertools.combinations()
617 def unique_combinations(items, n):
618 """Return r-length tuples, in sorted order, no repeated elements"""
619 if n==0: yield []
620 else:
621 for i in xrange(len(items)-n+1):
622 for cc in unique_combinations(items[i+1:],n-1):
623 yield [items[i]]+cc
625 def normalize_language_tag(tag):
626 """Return a list of normalized combinations for a `BCP 47` language tag.
628 Example:
630 >>> normalize_language_tag('de-AT-1901')
631 ['de_at_1901', 'de_at', 'de_1901', 'de']
633 # normalize:
634 tag = tag.lower().replace('-','_')
635 # find all combinations of subtags
636 taglist = []
637 base_tag= tag.split('_')[:1]
638 subtags = tag.split('_')[1:]
639 # print base_tag, subtags
640 for n in range(len(subtags), 0, -1):
641 for tags in unique_combinations(subtags, n):
642 # print tags
643 taglist.append('_'.join(base_tag + tags))
644 taglist += base_tag
645 return taglist
647 class DependencyList:
650 List of dependencies, with file recording support.
652 Note that the output file is not automatically closed. You have
653 to explicitly call the close() method.
656 def __init__(self, output_file=None, dependencies=[]):
658 Initialize the dependency list, automatically setting the
659 output file to `output_file` (see `set_output()`) and adding
660 all supplied dependencies.
662 self.set_output(output_file)
663 for i in dependencies:
664 self.add(i)
666 def set_output(self, output_file):
668 Set the output file and clear the list of already added
669 dependencies.
671 `output_file` must be a string. The specified file is
672 immediately overwritten.
674 If output_file is '-', the output will be written to stdout.
675 If it is None, no file output is done when calling add().
677 self.list = []
678 if output_file == '-':
679 self.file = sys.stdout
680 elif output_file:
681 self.file = open(output_file, 'w')
682 else:
683 self.file = None
685 def add(self, *filenames):
687 If the dependency `filename` has not already been added,
688 append it to self.list and print it to self.file if self.file
689 is not None.
691 for filename in filenames:
692 if not filename in self.list:
693 self.list.append(filename)
694 if self.file is not None:
695 print >>self.file, filename
697 def close(self):
699 Close the output file.
701 self.file.close()
702 self.file = None
704 def __repr__(self):
705 if self.file:
706 output_file = self.file.name
707 else:
708 output_file = None
709 return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)