docutils/utils.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Miscellaneous utilities for the documentation utilities.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11 import sys
  12 import os
  13 import os.path
  14 import types
  15 import warnings
  16 import unicodedata
  17 from types import StringType, UnicodeType
  18 from docutils import ApplicationError, DataError
  19 from docutils import nodes
  20
  21
  22 class SystemMessage(ApplicationError):
  23
  24     def __init__(self, system_message, level):
  25         Exception.__init__(self, system_message.astext())
  26         self.level = level
  27
  28
  29 class SystemMessagePropagation(ApplicationError): pass
  30
  31
  32 class Reporter:
  33
  34     """
  35     Info/warning/error reporter and ``system_message`` element generator.
  36
  37     Five levels of system messages are defined, along with corresponding
  38     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  39
  40     There is typically one Reporter object per process.  A Reporter object is
  41     instantiated with thresholds for reporting (generating warnings) and
  42     halting processing (raising exceptions), a switch to turn debug output on
  43     or off, and an I/O stream for warnings.  These are stored as instance
  44     attributes.
  45
  46     When a system message is generated, its level is compared to the stored
  47     thresholds, and a warning or error is generated as appropriate.  Debug
  48     messages are produced iff the stored debug switch is on, independently of
  49     other thresholds.  Message output is sent to the stored warning stream if
  50     not set to ''.
  51
  52     The Reporter class also employs a modified form of the "Observer" pattern
  53     [GoF95]_ to track system messages generated.  The `attach_observer` method
  54     should be called before parsing, with a bound method or function which
  55     accepts system messages.  The observer can be removed with
  56     `detach_observer`, and another added in its place.
  57
  58     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  59        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  60        1995.
  61     """
  62
  63     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  64     """List of names for system message levels, indexed by level."""
  65
  66     # system message level constants:
  67     (DEBUG_LEVEL,
  68      INFO_LEVEL,
  69      WARNING_LEVEL,
  70      ERROR_LEVEL,
  71      SEVERE_LEVEL) = range(5)
  72
  73     def __init__(self, source, report_level, halt_level, stream=None,
  74                  debug=0, encoding='ascii', error_handler='replace'):
  75         """
  76         :Parameters:
  77             - `source`: The path to or description of the source data.
  78             - `report_level`: The level at or above which warning output will
  79               be sent to `stream`.
  80             - `halt_level`: The level at or above which `SystemMessage`
  81               exceptions will be raised, halting execution.
  82             - `debug`: Show debug (level=0) system messages?
  83             - `stream`: Where warning output is sent.  Can be file-like (has a
  84               ``.write`` method), a string (file name, opened for writing),
  85               '' (empty string, for discarding all stream messages) or
  86               `None` (implies `sys.stderr`; default).
  87             - `encoding`: The encoding for stderr output.
  88             - `error_handler`: The error handler for stderr output encoding.
  89         """
  90
  91         self.source = source
  92         """The path to or description of the source data."""
  93
  94         self.encoding = encoding
  95         """The character encoding for the stderr output."""
  96
  97         self.error_handler = error_handler
  98         """The character encoding error handler."""
  99
 100         self.debug_flag = debug
 101         """Show debug (level=0) system messages?"""
 102
 103         self.report_level = report_level
 104         """The level at or above which warning output will be sent
 105         to `self.stream`."""
 106
 107         self.halt_level = halt_level
 108         """The level at or above which `SystemMessage` exceptions
 109         will be raised, halting execution."""
 110
 111         if stream is None:
 112             stream = sys.stderr
 113         elif type(stream) in (StringType, UnicodeType):
 114             # Leave stream untouched if it's ''.
 115             if stream != '':
 116                 if type(stream) == StringType:
 117                     stream = open(stream, 'w')
 118                 elif type(stream) == UnicodeType:
 119                     stream = open(stream.encode(), 'w')
 120
 121         self.stream = stream
 122         """Where warning output is sent."""
 123
 124         self.observers = []
 125         """List of bound methods or functions to call with each system_message
 126         created."""
 127
 128         self.max_level = -1
 129         """The highest level system message generated so far."""
 130
 131     def set_conditions(self, category, report_level, halt_level,
 132                        stream=None, debug=0):
 133         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 134                       'set attributes via configuration settings or directly',
 135                       DeprecationWarning, stacklevel=2)
 136         self.report_level = report_level
 137         self.halt_level = halt_level
 138         if stream is None:
 139             stream = sys.stderr
 140         self.stream = stream
 141         self.debug_flag = debug
 142
 143     def attach_observer(self, observer):
 144         """
 145         The `observer` parameter is a function or bound method which takes one
 146         argument, a `nodes.system_message` instance.
 147         """
 148         self.observers.append(observer)
 149
 150     def detach_observer(self, observer):
 151         self.observers.remove(observer)
 152
 153     def notify_observers(self, message):
 154         for observer in self.observers:
 155             observer(message)
 156
 157     def system_message(self, level, message, *children, **kwargs):
 158         """
 159         Return a system_message object.
 160
 161         Raise an exception or generate a warning if appropriate.
 162         """
 163         attributes = kwargs.copy()
 164         if kwargs.has_key('base_node'):
 165             source, line = get_source_line(kwargs['base_node'])
 166             del attributes['base_node']
 167             if source is not None:
 168                 attributes.setdefault('source', source)
 169             if line is not None:
 170                 attributes.setdefault('line', line)
 171         attributes.setdefault('source', self.source)
 172         msg = nodes.system_message(message, level=level,
 173                                    type=self.levels[level],
 174                                    *children, **attributes)
 175         if self.stream and (level >= self.report_level
 176                             or self.debug_flag and level == self.DEBUG_LEVEL):
 177             msgtext = msg.astext().encode(self.encoding, self.error_handler)
 178             print >>self.stream, msgtext
 179         if level >= self.halt_level:
 180             raise SystemMessage(msg, level)
 181         if level > self.DEBUG_LEVEL or self.debug_flag:
 182             self.notify_observers(msg)
 183         self.max_level = max(level, self.max_level)
 184         return msg
 185
 186     def debug(self, *args, **kwargs):
 187         """
 188         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 189         effect on the processing. Level-0 system messages are handled
 190         separately from the others.
 191         """
 192         if self.debug_flag:
 193             return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
 194
 195     def info(self, *args, **kwargs):
 196         """
 197         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 198         no effect on processing, and level-1 system messages are not reported.
 199         """
 200         return self.system_message(self.INFO_LEVEL, *args, **kwargs)
 201
 202     def warning(self, *args, **kwargs):
 203         """
 204         Level-2, "WARNING": an issue that should be addressed. If ignored,
 205         there may be unpredictable problems with the output.
 206         """
 207         return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
 208
 209     def error(self, *args, **kwargs):
 210         """
 211         Level-3, "ERROR": an error that should be addressed. If ignored, the
 212         output will contain errors.
 213         """
 214         return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
 215
 216     def severe(self, *args, **kwargs):
 217         """
 218         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 219         the output will contain severe errors. Typically level-4 system
 220         messages are turned into exceptions which halt processing.
 221         """
 222         return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
 223
 224
 225 class ExtensionOptionError(DataError): pass
 226 class BadOptionError(ExtensionOptionError): pass
 227 class BadOptionDataError(ExtensionOptionError): pass
 228 class DuplicateOptionError(ExtensionOptionError): pass
 229
 230
 231 def extract_extension_options(field_list, options_spec):
 232     """
 233     Return a dictionary mapping extension option names to converted values.
 234
 235     :Parameters:
 236         - `field_list`: A flat field list without field arguments, where each
 237           field body consists of a single paragraph only.
 238         - `options_spec`: Dictionary mapping known option names to a
 239           conversion function such as `int` or `float`.
 240
 241     :Exceptions:
 242         - `KeyError` for unknown option names.
 243         - `ValueError` for invalid option values (raised by the conversion
 244            function).
 245         - `TypeError` for invalid option value types (raised by conversion
 246            function).
 247         - `DuplicateOptionError` for duplicate options.
 248         - `BadOptionError` for invalid fields.
 249         - `BadOptionDataError` for invalid option data (missing name,
 250           missing data, bad quotes, etc.).
 251     """
 252     option_list = extract_options(field_list)
 253     option_dict = assemble_option_dict(option_list, options_spec)
 254     return option_dict
 255
 256 def extract_options(field_list):
 257     """
 258     Return a list of option (name, value) pairs from field names & bodies.
 259
 260     :Parameter:
 261         `field_list`: A flat field list, where each field name is a single
 262         word and each field body consists of a single paragraph only.
 263
 264     :Exceptions:
 265         - `BadOptionError` for invalid fields.
 266         - `BadOptionDataError` for invalid option data (missing name,
 267           missing data, bad quotes, etc.).
 268     """
 269     option_list = []
 270     for field in field_list:
 271         if len(field[0].astext().split()) != 1:
 272             raise BadOptionError(
 273                 'extension option field name may not contain multiple words')
 274         name = str(field[0].astext().lower())
 275         body = field[1]
 276         if len(body) == 0:
 277             data = None
 278         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 279               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 280             raise BadOptionDataError(
 281                   'extension option field body may contain\n'
 282                   'a single paragraph only (option "%s")' % name)
 283         else:
 284             data = body[0][0].astext()
 285         option_list.append((name, data))
 286     return option_list
 287
 288 def assemble_option_dict(option_list, options_spec):
 289     """
 290     Return a mapping of option names to values.
 291
 292     :Parameters:
 293         - `option_list`: A list of (name, value) pairs (the output of
 294           `extract_options()`).
 295         - `options_spec`: Dictionary mapping known option names to a
 296           conversion function such as `int` or `float`.
 297
 298     :Exceptions:
 299         - `KeyError` for unknown option names.
 300         - `DuplicateOptionError` for duplicate options.
 301         - `ValueError` for invalid option values (raised by conversion
 302            function).
 303         - `TypeError` for invalid option value types (raised by conversion
 304            function).
 305     """
 306     options = {}
 307     for name, value in option_list:
 308         convertor = options_spec[name]  # raises KeyError if unknown
 309         if convertor is None:
 310             raise KeyError(name)        # or if explicitly disabled
 311         if options.has_key(name):
 312             raise DuplicateOptionError('duplicate option "%s"' % name)
 313         try:
 314             options[name] = convertor(value)
 315         except (ValueError, TypeError), detail:
 316             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 317                                    % (name, value, ' '.join(detail.args)))
 318     return options
 319
 320
 321 class NameValueError(DataError): pass
 322
 323
 324 def extract_name_value(line):
 325     """
 326     Return a list of (name, value) from a line of the form "name=value ...".
 327
 328     :Exception:
 329         `NameValueError` for invalid input (missing name, missing data, bad
 330         quotes, etc.).
 331     """
 332     attlist = []
 333     while line:
 334         equals = line.find('=')
 335         if equals == -1:
 336             raise NameValueError('missing "="')
 337         attname = line[:equals].strip()
 338         if equals == 0 or not attname:
 339             raise NameValueError(
 340                   'missing attribute name before "="')
 341         line = line[equals+1:].lstrip()
 342         if not line:
 343             raise NameValueError(
 344                   'missing value after "%s="' % attname)
 345         if line[0] in '\'"':
 346             endquote = line.find(line[0], 1)
 347             if endquote == -1:
 348                 raise NameValueError(
 349                       'attribute "%s" missing end quote (%s)'
 350                       % (attname, line[0]))
 351             if len(line) > endquote + 1 and line[endquote + 1].strip():
 352                 raise NameValueError(
 353                       'attribute "%s" end quote (%s) not followed by '
 354                       'whitespace' % (attname, line[0]))
 355             data = line[1:endquote]
 356             line = line[endquote+1:].lstrip()
 357         else:
 358             space = line.find(' ')
 359             if space == -1:
 360                 data = line
 361                 line = ''
 362             else:
 363                 data = line[:space]
 364                 line = line[space+1:].lstrip()
 365         attlist.append((attname.lower(), data))
 366     return attlist
 367
 368 def new_reporter(source_path, settings):
 369     """
 370     Return a new Reporter object.
 371
 372     :Parameters:
 373         `source` : string
 374             The path to or description of the source text of the document.
 375         `settings` : optparse.Values object
 376             Runtime settings.
 377     """
 378     reporter = Reporter(
 379         source_path, settings.report_level, settings.halt_level,
 380         stream=settings.warning_stream, debug=settings.debug,
 381         encoding=settings.error_encoding,
 382         error_handler=settings.error_encoding_error_handler)
 383     return reporter
 384
 385 def new_document(source_path, settings=None):
 386     """
 387     Return a new empty document object.
 388
 389     :Parameters:
 390         `source_path` : string
 391             The path to or description of the source text of the document.
 392         `settings` : optparse.Values object
 393             Runtime settings.  If none provided, a default set will be used.
 394     """
 395     from docutils import frontend
 396     if settings is None:
 397         settings = frontend.OptionParser().get_default_values()
 398     reporter = new_reporter(source_path, settings)
 399     document = nodes.document(settings, reporter, source=source_path)
 400     document.note_source(source_path, -1)
 401     return document
 402
 403 def clean_rcs_keywords(paragraph, keyword_substitutions):
 404     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 405         textnode = paragraph[0]
 406         for pattern, substitution in keyword_substitutions:
 407             match = pattern.search(textnode.data)
 408             if match:
 409                 textnode.data = pattern.sub(substitution, textnode.data)
 410                 return
 411
 412 def relative_path(source, target):
 413     """
 414     Build and return a path to `target`, relative to `source` (both files).
 415
 416     If there is no common prefix, return the absolute path to `target`.
 417     """
 418     source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
 419     target_parts = os.path.abspath(target).split(os.sep)
 420     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 421     if source_parts[:2] != target_parts[:2]:
 422         # Nothing in common between paths.
 423         # Return absolute path, using '/' for URLs:
 424         return '/'.join(target_parts)
 425     source_parts.reverse()
 426     target_parts.reverse()
 427     while (source_parts and target_parts
 428            and source_parts[-1] == target_parts[-1]):
 429         # Remove path components in common:
 430         source_parts.pop()
 431         target_parts.pop()
 432     target_parts.reverse()
 433     parts = ['..'] * (len(source_parts) - 1) + target_parts
 434     return '/'.join(parts)
 435
 436 def get_stylesheet_reference(settings, relative_to=None):
 437     """
 438     Retrieve a stylesheet reference from the settings object.
 439     """
 440     if settings.stylesheet_path:
 441         assert not settings.stylesheet, \
 442                'stylesheet and stylesheet_path are mutually exclusive.'
 443         if relative_to == None:
 444             relative_to = settings._destination
 445         return relative_path(relative_to, settings.stylesheet_path)
 446     else:
 447         return settings.stylesheet
 448
 449 def get_trim_footnote_ref_space(settings):
 450     """
 451     Return whether or not to trim footnote space.
 452
 453     If trim_footnote_reference_space is not None, return it.
 454
 455     If trim_footnote_reference_space is None, return False unless the
 456     footnote reference style is 'superscript'.
 457     """
 458     if settings.trim_footnote_reference_space is None:
 459         return hasattr(settings, 'footnote_references') and \
 460                settings.footnote_references == 'superscript'
 461     else:
 462         return settings.trim_footnote_reference_space
 463
 464 def get_source_line(node):
 465     """
 466     Return the "source" and "line" attributes from the `node` given or from
 467     its closest ancestor.
 468     """
 469     while node:
 470         if node.source or node.line:
 471             return node.source, node.line
 472         node = node.parent
 473     return None, None
 474
 475 def escape2null(text):
 476     """Return a string with escape-backslashes converted to nulls."""
 477     parts = []
 478     start = 0
 479     while 1:
 480         found = text.find('\\', start)
 481         if found == -1:
 482             parts.append(text[start:])
 483             return ''.join(parts)
 484         parts.append(text[start:found])
 485         parts.append('\x00' + text[found+1:found+2])
 486         start = found + 2               # skip character after escape
 487
 488 def unescape(text, restore_backslashes=0):
 489     """
 490     Return a string with nulls removed or restored to backslashes.
 491     Backslash-escaped spaces are also removed.
 492     """
 493     if restore_backslashes:
 494         return text.replace('\x00', '\\')
 495     else:
 496         for sep in ['\x00 ', '\x00\n', '\x00']:
 497             text = ''.join(text.split(sep))
 498         return text
 499
 500 east_asian_widths = {'W': 2,   # Wide
 501                      'F': 2,   # Full-width (wide)
 502                      'Na': 1,  # Narrow
 503                      'H': 1,   # Half-width (narrow)
 504                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 505                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 506                                # narrow otherwise, but that doesn't work)
 507 """Mapping of result codes from `unicodedata.east_asian_width()` to character
 508 column widths."""
 509
 510 def east_asian_column_width(text):
 511     if isinstance(text, types.UnicodeType):
 512         total = 0
 513         for c in text:
 514             total += east_asian_widths[unicodedata.east_asian_width(c)]
 515         return total
 516     else:
 517         return len(text)
 518
 519 if hasattr(unicodedata, 'east_asian_width'):
 520     column_width = east_asian_column_width
 521 else:
 522     column_width = len
 523
 524 def uniq(L):
 525      r = []
 526      for item in L:
 527          if not item in r:
 528              r.append(item)
 529      return r
 530
 531
 532 class DependencyList:
 533
 534     """
 535     List of dependencies, with file recording support.
 536
 537     Note that the output file is not automatically closed.  You have
 538     to explicitly call the close() method.
 539     """
 540
 541     def __init__(self, output_file=None, dependencies=[]):
 542         """
 543         Initialize the dependency list, automatically setting the
 544         output file to `output_file` (see `set_output()`) and adding
 545         all supplied dependencies.
 546         """
 547         self.set_output(output_file)
 548         for i in dependencies:
 549             self.add(i)
 550
 551     def set_output(self, output_file):
 552         """
 553         Set the output file and clear the list of already added
 554         dependencies.
 555
 556         `output_file` must be a string.  The specified file is
 557         immediately overwritten.
 558
 559         If output_file is '-', the output will be written to stdout.
 560         If it is None, no file output is done when calling add().
 561         """
 562         self.list = []
 563         if output_file == '-':
 564             self.file = sys.stdout
 565         elif output_file:
 566             self.file = open(output_file, 'w')
 567         else:
 568             self.file = None
 569
 570     def add(self, filename):
 571         """
 572         If the dependency `filename` has not already been added,
 573         append it to self.list and print it to self.file if self.file
 574         is not None.
 575         """
 576         if not filename in self.list:
 577             self.list.append(filename)
 578             if self.file is not None:
 579                 print >>self.file, filename
 580
 581     def close(self):
 582         """
 583         Close the output file.
 584         """
 585         self.file.close()
 586         self.file = None
 587
 588     def __repr__(self):
 589         if self.file:
 590             output_file = self.file.name
 591         else:
 592             output_file = None
 593         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)