docutils/utils.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Miscellaneous utilities for the documentation utilities.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11 import sys
  12 import os
  13 import os.path
  14 import types
  15 import warnings
  16 import unicodedata
  17 from types import StringType, UnicodeType
  18 from docutils import ApplicationError, DataError
  19 from docutils import frontend, nodes
  20
  21
  22 class SystemMessage(ApplicationError):
  23
  24     def __init__(self, system_message, level):
  25         Exception.__init__(self, system_message.astext())
  26         self.level = level
  27
  28
  29 class SystemMessagePropagation(ApplicationError): pass
  30
  31
  32 class Reporter:
  33
  34     """
  35     Info/warning/error reporter and ``system_message`` element generator.
  36
  37     Five levels of system messages are defined, along with corresponding
  38     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  39
  40     There is typically one Reporter object per process.  A Reporter object is
  41     instantiated with thresholds for reporting (generating warnings) and
  42     halting processing (raising exceptions), a switch to turn debug output on
  43     or off, and an I/O stream for warnings.  These are stored as instance
  44     attributes.
  45
  46     When a system message is generated, its level is compared to the stored
  47     thresholds, and a warning or error is generated as appropriate.  Debug
  48     messages are produced iff the stored debug switch is on, independently of
  49     other thresholds.  Message output is sent to the stored warning stream if
  50     not set to ''.
  51
  52     The Reporter class also employs a modified form of the "Observer" pattern
  53     [GoF95]_ to track system messages generated.  The `attach_observer` method
  54     should be called before parsing, with a bound method or function which
  55     accepts system messages.  The observer can be removed with
  56     `detach_observer`, and another added in its place.
  57
  58     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  59        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  60        1995.
  61     """
  62
  63     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  64     """List of names for system message levels, indexed by level."""
  65
  66     def __init__(self, source, report_level, halt_level, stream=None,
  67                  debug=0, encoding='ascii', error_handler='replace'):
  68         """
  69         :Parameters:
  70             - `source`: The path to or description of the source data.
  71             - `report_level`: The level at or above which warning output will
  72               be sent to `stream`.
  73             - `halt_level`: The level at or above which `SystemMessage`
  74               exceptions will be raised, halting execution.
  75             - `debug`: Show debug (level=0) system messages?
  76             - `stream`: Where warning output is sent.  Can be file-like (has a
  77               ``.write`` method), a string (file name, opened for writing),
  78               '' (empty string, for discarding all stream messages) or
  79               `None` (implies `sys.stderr`; default).
  80             - `encoding`: The encoding for stderr output.
  81             - `error_handler`: The error handler for stderr output encoding.
  82         """
  83
  84         self.source = source
  85         """The path to or description of the source data."""
  86
  87         self.encoding = encoding
  88         """The character encoding for the stderr output."""
  89
  90         self.error_handler = error_handler
  91         """The character encoding error handler."""
  92
  93         self.debug_flag = debug
  94         """Show debug (level=0) system messages?"""
  95
  96         self.report_level = report_level
  97         """The level at or above which warning output will be sent
  98         to `self.stream`."""
  99
 100         self.halt_level = halt_level
 101         """The level at or above which `SystemMessage` exceptions
 102         will be raised, halting execution."""
 103
 104         if stream is None:
 105             stream = sys.stderr
 106         elif type(stream) in (StringType, UnicodeType):
 107             # Leave stream untouched if it's ''.
 108             if stream != '':
 109                 if type(stream) == StringType:
 110                     stream = open(stream, 'w')
 111                 elif type(stream) == UnicodeType:
 112                     stream = open(stream.encode(), 'w')
 113
 114         self.stream = stream
 115         """Where warning output is sent."""
 116
 117         self.observers = []
 118         """List of bound methods or functions to call with each system_message
 119         created."""
 120
 121         self.max_level = -1
 122         """The highest level system message generated so far."""
 123
 124     def set_conditions(self, category, report_level, halt_level,
 125                        stream=None, debug=0):
 126         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 127                       'set attributes via configuration settings or directly',
 128                       DeprecationWarning, stacklevel=2)
 129         self.report_level = report_level
 130         self.halt_level = halt_level
 131         if stream is None:
 132             stream = sys.stderr
 133         self.stream = stream
 134         self.debug_flag = debug
 135
 136     def attach_observer(self, observer):
 137         """
 138         The `observer` parameter is a function or bound method which takes one
 139         argument, a `nodes.system_message` instance.
 140         """
 141         self.observers.append(observer)
 142
 143     def detach_observer(self, observer):
 144         self.observers.remove(observer)
 145
 146     def notify_observers(self, message):
 147         for observer in self.observers:
 148             observer(message)
 149
 150     def system_message(self, level, message, *children, **kwargs):
 151         """
 152         Return a system_message object.
 153
 154         Raise an exception or generate a warning if appropriate.
 155         """
 156         attributes = kwargs.copy()
 157         if kwargs.has_key('base_node'):
 158             source, line = get_source_line(kwargs['base_node'])
 159             del attributes['base_node']
 160             if source is not None:
 161                 attributes.setdefault('source', source)
 162             if line is not None:
 163                 attributes.setdefault('line', line)
 164         attributes.setdefault('source', self.source)
 165         msg = nodes.system_message(message, level=level,
 166                                    type=self.levels[level],
 167                                    *children, **attributes)
 168         if self.stream and (level >= self.report_level
 169                             or self.debug_flag and level == 0):
 170             msgtext = msg.astext().encode(self.encoding, self.error_handler)
 171             print >>self.stream, msgtext
 172         if level >= self.halt_level:
 173             raise SystemMessage(msg, level)
 174         if level > 0 or self.debug_flag:
 175             self.notify_observers(msg)
 176         self.max_level = max(level, self.max_level)
 177         return msg
 178
 179     def debug(self, *args, **kwargs):
 180         """
 181         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 182         effect on the processing. Level-0 system messages are handled
 183         separately from the others.
 184         """
 185         if self.debug_flag:
 186             return self.system_message(0, *args, **kwargs)
 187
 188     def info(self, *args, **kwargs):
 189         """
 190         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 191         no effect on processing, and level-1 system messages are not reported.
 192         """
 193         return self.system_message(1, *args, **kwargs)
 194
 195     def warning(self, *args, **kwargs):
 196         """
 197         Level-2, "WARNING": an issue that should be addressed. If ignored,
 198         there may be unpredictable problems with the output.
 199         """
 200         return self.system_message(2, *args, **kwargs)
 201
 202     def error(self, *args, **kwargs):
 203         """
 204         Level-3, "ERROR": an error that should be addressed. If ignored, the
 205         output will contain errors.
 206         """
 207         return self.system_message(3, *args, **kwargs)
 208
 209     def severe(self, *args, **kwargs):
 210         """
 211         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 212         the output will contain severe errors. Typically level-4 system
 213         messages are turned into exceptions which halt processing.
 214         """
 215         return self.system_message(4, *args, **kwargs)
 216
 217
 218 class ExtensionOptionError(DataError): pass
 219 class BadOptionError(ExtensionOptionError): pass
 220 class BadOptionDataError(ExtensionOptionError): pass
 221 class DuplicateOptionError(ExtensionOptionError): pass
 222
 223
 224 def extract_extension_options(field_list, options_spec):
 225     """
 226     Return a dictionary mapping extension option names to converted values.
 227
 228     :Parameters:
 229         - `field_list`: A flat field list without field arguments, where each
 230           field body consists of a single paragraph only.
 231         - `options_spec`: Dictionary mapping known option names to a
 232           conversion function such as `int` or `float`.
 233
 234     :Exceptions:
 235         - `KeyError` for unknown option names.
 236         - `ValueError` for invalid option values (raised by the conversion
 237            function).
 238         - `TypeError` for invalid option value types (raised by conversion
 239            function).
 240         - `DuplicateOptionError` for duplicate options.
 241         - `BadOptionError` for invalid fields.
 242         - `BadOptionDataError` for invalid option data (missing name,
 243           missing data, bad quotes, etc.).
 244     """
 245     option_list = extract_options(field_list)
 246     option_dict = assemble_option_dict(option_list, options_spec)
 247     return option_dict
 248
 249 def extract_options(field_list):
 250     """
 251     Return a list of option (name, value) pairs from field names & bodies.
 252
 253     :Parameter:
 254         `field_list`: A flat field list, where each field name is a single
 255         word and each field body consists of a single paragraph only.
 256
 257     :Exceptions:
 258         - `BadOptionError` for invalid fields.
 259         - `BadOptionDataError` for invalid option data (missing name,
 260           missing data, bad quotes, etc.).
 261     """
 262     option_list = []
 263     for field in field_list:
 264         if len(field[0].astext().split()) != 1:
 265             raise BadOptionError(
 266                 'extension option field name may not contain multiple words')
 267         name = str(field[0].astext().lower())
 268         body = field[1]
 269         if len(body) == 0:
 270             data = None
 271         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 272               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 273             raise BadOptionDataError(
 274                   'extension option field body may contain\n'
 275                   'a single paragraph only (option "%s")' % name)
 276         else:
 277             data = body[0][0].astext()
 278         option_list.append((name, data))
 279     return option_list
 280
 281 def assemble_option_dict(option_list, options_spec):
 282     """
 283     Return a mapping of option names to values.
 284
 285     :Parameters:
 286         - `option_list`: A list of (name, value) pairs (the output of
 287           `extract_options()`).
 288         - `options_spec`: Dictionary mapping known option names to a
 289           conversion function such as `int` or `float`.
 290
 291     :Exceptions:
 292         - `KeyError` for unknown option names.
 293         - `DuplicateOptionError` for duplicate options.
 294         - `ValueError` for invalid option values (raised by conversion
 295            function).
 296         - `TypeError` for invalid option value types (raised by conversion
 297            function).
 298     """
 299     options = {}
 300     for name, value in option_list:
 301         convertor = options_spec[name]  # raises KeyError if unknown
 302         if convertor is None:
 303             raise KeyError(name)        # or if explicitly disabled
 304         if options.has_key(name):
 305             raise DuplicateOptionError('duplicate option "%s"' % name)
 306         try:
 307             options[name] = convertor(value)
 308         except (ValueError, TypeError), detail:
 309             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 310                                    % (name, value, ' '.join(detail.args)))
 311     return options
 312
 313
 314 class NameValueError(DataError): pass
 315
 316
 317 def extract_name_value(line):
 318     """
 319     Return a list of (name, value) from a line of the form "name=value ...".
 320
 321     :Exception:
 322         `NameValueError` for invalid input (missing name, missing data, bad
 323         quotes, etc.).
 324     """
 325     attlist = []
 326     while line:
 327         equals = line.find('=')
 328         if equals == -1:
 329             raise NameValueError('missing "="')
 330         attname = line[:equals].strip()
 331         if equals == 0 or not attname:
 332             raise NameValueError(
 333                   'missing attribute name before "="')
 334         line = line[equals+1:].lstrip()
 335         if not line:
 336             raise NameValueError(
 337                   'missing value after "%s="' % attname)
 338         if line[0] in '\'"':
 339             endquote = line.find(line[0], 1)
 340             if endquote == -1:
 341                 raise NameValueError(
 342                       'attribute "%s" missing end quote (%s)'
 343                       % (attname, line[0]))
 344             if len(line) > endquote + 1 and line[endquote + 1].strip():
 345                 raise NameValueError(
 346                       'attribute "%s" end quote (%s) not followed by '
 347                       'whitespace' % (attname, line[0]))
 348             data = line[1:endquote]
 349             line = line[endquote+1:].lstrip()
 350         else:
 351             space = line.find(' ')
 352             if space == -1:
 353                 data = line
 354                 line = ''
 355             else:
 356                 data = line[:space]
 357                 line = line[space+1:].lstrip()
 358         attlist.append((attname.lower(), data))
 359     return attlist
 360
 361 def new_reporter(source_path, settings):
 362     """
 363     Return a new Reporter object.
 364
 365     :Parameters:
 366         `source` : string
 367             The path to or description of the source text of the document.
 368         `settings` : optparse.Values object
 369             Runtime settings.
 370     """
 371     reporter = Reporter(
 372         source_path, settings.report_level, settings.halt_level,
 373         stream=settings.warning_stream, debug=settings.debug,
 374         encoding=settings.error_encoding,
 375         error_handler=settings.error_encoding_error_handler)
 376     return reporter
 377
 378 def new_document(source_path, settings=None):
 379     """
 380     Return a new empty document object.
 381
 382     :Parameters:
 383         `source_path` : string
 384             The path to or description of the source text of the document.
 385         `settings` : optparse.Values object
 386             Runtime settings.  If none provided, a default set will be used.
 387     """
 388     if settings is None:
 389         settings = frontend.OptionParser().get_default_values()
 390     reporter = new_reporter(source_path, settings)
 391     document = nodes.document(settings, reporter, source=source_path)
 392     document.note_source(source_path, -1)
 393     return document
 394
 395 def clean_rcs_keywords(paragraph, keyword_substitutions):
 396     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 397         textnode = paragraph[0]
 398         for pattern, substitution in keyword_substitutions:
 399             match = pattern.search(textnode.data)
 400             if match:
 401                 textnode.data = pattern.sub(substitution, textnode.data)
 402                 return
 403
 404 def relative_path(source, target):
 405     """
 406     Build and return a path to `target`, relative to `source` (both files).
 407
 408     If there is no common prefix, return the absolute path to `target`.
 409     """
 410     source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
 411     target_parts = os.path.abspath(target).split(os.sep)
 412     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 413     if source_parts[:2] != target_parts[:2]:
 414         # Nothing in common between paths.
 415         # Return absolute path, using '/' for URLs:
 416         return '/'.join(target_parts)
 417     source_parts.reverse()
 418     target_parts.reverse()
 419     while (source_parts and target_parts
 420            and source_parts[-1] == target_parts[-1]):
 421         # Remove path components in common:
 422         source_parts.pop()
 423         target_parts.pop()
 424     target_parts.reverse()
 425     parts = ['..'] * (len(source_parts) - 1) + target_parts
 426     return '/'.join(parts)
 427
 428 def get_stylesheet_reference(settings, relative_to=None):
 429     """
 430     Retrieve a stylesheet reference from the settings object.
 431     """
 432     if settings.stylesheet_path:
 433         assert not settings.stylesheet, \
 434                'stylesheet and stylesheet_path are mutually exclusive.'
 435         if relative_to == None:
 436             relative_to = settings._destination
 437         return relative_path(relative_to, settings.stylesheet_path)
 438     else:
 439         return settings.stylesheet
 440
 441 def get_trim_footnote_ref_space(settings):
 442     """
 443     Return whether or not to trim footnote space.
 444
 445     If trim_footnote_reference_space is not None, return it.
 446
 447     If trim_footnote_reference_space is None, return False unless the
 448     footnote reference style is 'superscript'.
 449     """
 450     if settings.trim_footnote_reference_space is None:
 451         return hasattr(settings, 'footnote_references') and \
 452                settings.footnote_references == 'superscript'
 453     else:
 454         return settings.trim_footnote_reference_space
 455
 456 def get_source_line(node):
 457     """
 458     Return the "source" and "line" attributes from the `node` given or from
 459     its closest ancestor.
 460     """
 461     while node:
 462         if node.source or node.line:
 463             return node.source, node.line
 464         node = node.parent
 465     return None, None
 466
 467 def escape2null(text):
 468     """Return a string with escape-backslashes converted to nulls."""
 469     parts = []
 470     start = 0
 471     while 1:
 472         found = text.find('\\', start)
 473         if found == -1:
 474             parts.append(text[start:])
 475             return ''.join(parts)
 476         parts.append(text[start:found])
 477         parts.append('\x00' + text[found+1:found+2])
 478         start = found + 2               # skip character after escape
 479
 480 def unescape(text, restore_backslashes=0):
 481     """
 482     Return a string with nulls removed or restored to backslashes.
 483     Backslash-escaped spaces are also removed.
 484     """
 485     if restore_backslashes:
 486         return text.replace('\x00', '\\')
 487     else:
 488         for sep in ['\x00 ', '\x00\n', '\x00']:
 489             text = ''.join(text.split(sep))
 490         return text
 491
 492 east_asian_widths = {'W': 2,   # Wide
 493                      'F': 2,   # Full-width (wide)
 494                      'Na': 1,  # Narrow
 495                      'H': 1,   # Half-width (narrow)
 496                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 497                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 498                                # narrow otherwise, but that doesn't work)
 499 """Mapping of result codes from `unicodedata.east_asian_width()` to character
 500 column widths."""
 501
 502 def east_asian_column_width(text):
 503     if isinstance(text, types.UnicodeType):
 504         total = 0
 505         for c in text:
 506             total += east_asian_widths[unicodedata.east_asian_width(c)]
 507         return total
 508     else:
 509         return len(text)
 510
 511 if hasattr(unicodedata, 'east_asian_width'):
 512     column_width = east_asian_column_width
 513 else:
 514     column_width = len
 515
 516
 517 class DependencyList:
 518
 519     """
 520     List of dependencies, with file recording support.
 521
 522     Note that the output file is not automatically closed.  You have
 523     to explicitly call the close() method.
 524     """
 525
 526     def __init__(self, output_file=None, dependencies=[]):
 527         """
 528         Initialize the dependency list, automatically setting the
 529         output file to `output_file` (see `set_output()`) and adding
 530         all supplied dependencies.
 531         """
 532         self.set_output(output_file)
 533         for i in dependencies:
 534             self.add(i)
 535
 536     def set_output(self, output_file):
 537         """
 538         Set the output file and clear the list of already added
 539         dependencies.
 540
 541         `output_file` must be a string.  The specified file is
 542         immediately overwritten.
 543
 544         If output_file is '-', the output will be written to stdout.
 545         If it is None, no file output is done when calling add().
 546         """
 547         self.list = []
 548         if output_file == '-':
 549             self.file = sys.stdout
 550         elif output_file:
 551             self.file = open(output_file, 'w')
 552         else:
 553             self.file = None
 554
 555     def add(self, filename):
 556         """
 557         If the dependency `filename` has not already been added,
 558         append it to self.list and print it to self.file if self.file
 559         is not None.
 560         """
 561         if not filename in self.list:
 562             self.list.append(filename)
 563             if self.file is not None:
 564                 print >>self.file, filename
 565
 566     def close(self):
 567         """
 568         Close the output file.
 569         """
 570         self.file.close()
 571         self.file = None
 572
 573     def __repr__(self):
 574         if self.file:
 575             output_file = self.file.name
 576         else:
 577             output_file = None
 578         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)