docutils/utils.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Miscellaneous utilities for the documentation utilities.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11 import sys
  12 import os
  13 import os.path
  14 import warnings
  15 import unicodedata
  16 from docutils import ApplicationError, DataError
  17 from docutils import nodes
  18
  19
  20 class SystemMessage(ApplicationError):
  21
  22     def __init__(self, system_message, level):
  23         Exception.__init__(self, system_message.astext())
  24         self.level = level
  25
  26
  27 class SystemMessagePropagation(ApplicationError): pass
  28
  29
  30 class Reporter:
  31
  32     """
  33     Info/warning/error reporter and ``system_message`` element generator.
  34
  35     Five levels of system messages are defined, along with corresponding
  36     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  37
  38     There is typically one Reporter object per process.  A Reporter object is
  39     instantiated with thresholds for reporting (generating warnings) and
  40     halting processing (raising exceptions), a switch to turn debug output on
  41     or off, and an I/O stream for warnings.  These are stored as instance
  42     attributes.
  43
  44     When a system message is generated, its level is compared to the stored
  45     thresholds, and a warning or error is generated as appropriate.  Debug
  46     messages are produced iff the stored debug switch is on, independently of
  47     other thresholds.  Message output is sent to the stored warning stream if
  48     not set to ''.
  49
  50     The Reporter class also employs a modified form of the "Observer" pattern
  51     [GoF95]_ to track system messages generated.  The `attach_observer` method
  52     should be called before parsing, with a bound method or function which
  53     accepts system messages.  The observer can be removed with
  54     `detach_observer`, and another added in its place.
  55
  56     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  57        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  58        1995.
  59     """
  60
  61     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  62     """List of names for system message levels, indexed by level."""
  63
  64     # system message level constants:
  65     (DEBUG_LEVEL,
  66      INFO_LEVEL,
  67      WARNING_LEVEL,
  68      ERROR_LEVEL,
  69      SEVERE_LEVEL) = range(5)
  70
  71     def __init__(self, source, report_level, halt_level, stream=None,
  72                  debug=0, encoding='ascii', error_handler='replace'):
  73         """
  74         :Parameters:
  75             - `source`: The path to or description of the source data.
  76             - `report_level`: The level at or above which warning output will
  77               be sent to `stream`.
  78             - `halt_level`: The level at or above which `SystemMessage`
  79               exceptions will be raised, halting execution.
  80             - `debug`: Show debug (level=0) system messages?
  81             - `stream`: Where warning output is sent.  Can be file-like (has a
  82               ``.write`` method), a string (file name, opened for writing),
  83               '' (empty string, for discarding all stream messages) or
  84               `None` (implies `sys.stderr`; default).
  85             - `encoding`: The encoding for stderr output.
  86             - `error_handler`: The error handler for stderr output encoding.
  87         """
  88
  89         self.source = source
  90         """The path to or description of the source data."""
  91
  92         self.encoding = encoding
  93         """The character encoding for the stderr output."""
  94
  95         self.error_handler = error_handler
  96         """The character encoding error handler."""
  97
  98         self.debug_flag = debug
  99         """Show debug (level=0) system messages?"""
 100
 101         self.report_level = report_level
 102         """The level at or above which warning output will be sent
 103         to `self.stream`."""
 104
 105         self.halt_level = halt_level
 106         """The level at or above which `SystemMessage` exceptions
 107         will be raised, halting execution."""
 108
 109         if stream is None:
 110             stream = sys.stderr
 111         elif type(stream) in (str, unicode):
 112             # Leave stream untouched if it's ''.
 113             if stream != '':
 114                 if type(stream) == str:
 115                     stream = open(stream, 'w')
 116                 elif type(stream) == unicode:
 117                     stream = open(stream.encode(), 'w')
 118
 119         self.stream = stream
 120         """Where warning output is sent."""
 121
 122         self.observers = []
 123         """List of bound methods or functions to call with each system_message
 124         created."""
 125
 126         self.max_level = -1
 127         """The highest level system message generated so far."""
 128
 129     def set_conditions(self, category, report_level, halt_level,
 130                        stream=None, debug=0):
 131         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 132                       'set attributes via configuration settings or directly',
 133                       DeprecationWarning, stacklevel=2)
 134         self.report_level = report_level
 135         self.halt_level = halt_level
 136         if stream is None:
 137             stream = sys.stderr
 138         self.stream = stream
 139         self.debug_flag = debug
 140
 141     def attach_observer(self, observer):
 142         """
 143         The `observer` parameter is a function or bound method which takes one
 144         argument, a `nodes.system_message` instance.
 145         """
 146         self.observers.append(observer)
 147
 148     def detach_observer(self, observer):
 149         self.observers.remove(observer)
 150
 151     def notify_observers(self, message):
 152         for observer in self.observers:
 153             observer(message)
 154
 155     def system_message(self, level, message, *children, **kwargs):
 156         """
 157         Return a system_message object.
 158
 159         Raise an exception or generate a warning if appropriate.
 160         """
 161         attributes = kwargs.copy()
 162         if 'base_node' in kwargs:
 163             source, line = get_source_line(kwargs['base_node'])
 164             del attributes['base_node']
 165             if source is not None:
 166                 attributes.setdefault('source', source)
 167             if line is not None:
 168                 attributes.setdefault('line', line)
 169         attributes.setdefault('source', self.source)
 170         msg = nodes.system_message(message, level=level,
 171                                    type=self.levels[level],
 172                                    *children, **attributes)
 173         if self.stream and (level >= self.report_level
 174                             or self.debug_flag and level == self.DEBUG_LEVEL):
 175             msgtext = msg.astext().encode(self.encoding, self.error_handler)
 176             print >>self.stream, msgtext
 177         if level >= self.halt_level:
 178             raise SystemMessage(msg, level)
 179         if level > self.DEBUG_LEVEL or self.debug_flag:
 180             self.notify_observers(msg)
 181         self.max_level = max(level, self.max_level)
 182         return msg
 183
 184     def debug(self, *args, **kwargs):
 185         """
 186         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 187         effect on the processing. Level-0 system messages are handled
 188         separately from the others.
 189         """
 190         if self.debug_flag:
 191             return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
 192
 193     def info(self, *args, **kwargs):
 194         """
 195         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 196         no effect on processing, and level-1 system messages are not reported.
 197         """
 198         return self.system_message(self.INFO_LEVEL, *args, **kwargs)
 199
 200     def warning(self, *args, **kwargs):
 201         """
 202         Level-2, "WARNING": an issue that should be addressed. If ignored,
 203         there may be unpredictable problems with the output.
 204         """
 205         return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
 206
 207     def error(self, *args, **kwargs):
 208         """
 209         Level-3, "ERROR": an error that should be addressed. If ignored, the
 210         output will contain errors.
 211         """
 212         return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
 213
 214     def severe(self, *args, **kwargs):
 215         """
 216         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 217         the output will contain severe errors. Typically level-4 system
 218         messages are turned into exceptions which halt processing.
 219         """
 220         return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
 221
 222
 223 class ExtensionOptionError(DataError): pass
 224 class BadOptionError(ExtensionOptionError): pass
 225 class BadOptionDataError(ExtensionOptionError): pass
 226 class DuplicateOptionError(ExtensionOptionError): pass
 227
 228
 229 def extract_extension_options(field_list, options_spec):
 230     """
 231     Return a dictionary mapping extension option names to converted values.
 232
 233     :Parameters:
 234         - `field_list`: A flat field list without field arguments, where each
 235           field body consists of a single paragraph only.
 236         - `options_spec`: Dictionary mapping known option names to a
 237           conversion function such as `int` or `float`.
 238
 239     :Exceptions:
 240         - `KeyError` for unknown option names.
 241         - `ValueError` for invalid option values (raised by the conversion
 242            function).
 243         - `TypeError` for invalid option value types (raised by conversion
 244            function).
 245         - `DuplicateOptionError` for duplicate options.
 246         - `BadOptionError` for invalid fields.
 247         - `BadOptionDataError` for invalid option data (missing name,
 248           missing data, bad quotes, etc.).
 249     """
 250     option_list = extract_options(field_list)
 251     option_dict = assemble_option_dict(option_list, options_spec)
 252     return option_dict
 253
 254 def extract_options(field_list):
 255     """
 256     Return a list of option (name, value) pairs from field names & bodies.
 257
 258     :Parameter:
 259         `field_list`: A flat field list, where each field name is a single
 260         word and each field body consists of a single paragraph only.
 261
 262     :Exceptions:
 263         - `BadOptionError` for invalid fields.
 264         - `BadOptionDataError` for invalid option data (missing name,
 265           missing data, bad quotes, etc.).
 266     """
 267     option_list = []
 268     for field in field_list:
 269         if len(field[0].astext().split()) != 1:
 270             raise BadOptionError(
 271                 'extension option field name may not contain multiple words')
 272         name = str(field[0].astext().lower())
 273         body = field[1]
 274         if len(body) == 0:
 275             data = None
 276         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 277               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 278             raise BadOptionDataError(
 279                   'extension option field body may contain\n'
 280                   'a single paragraph only (option "%s")' % name)
 281         else:
 282             data = body[0][0].astext()
 283         option_list.append((name, data))
 284     return option_list
 285
 286 def assemble_option_dict(option_list, options_spec):
 287     """
 288     Return a mapping of option names to values.
 289
 290     :Parameters:
 291         - `option_list`: A list of (name, value) pairs (the output of
 292           `extract_options()`).
 293         - `options_spec`: Dictionary mapping known option names to a
 294           conversion function such as `int` or `float`.
 295
 296     :Exceptions:
 297         - `KeyError` for unknown option names.
 298         - `DuplicateOptionError` for duplicate options.
 299         - `ValueError` for invalid option values (raised by conversion
 300            function).
 301         - `TypeError` for invalid option value types (raised by conversion
 302            function).
 303     """
 304     options = {}
 305     for name, value in option_list:
 306         convertor = options_spec[name]  # raises KeyError if unknown
 307         if convertor is None:
 308             raise KeyError(name)        # or if explicitly disabled
 309         if name in options:
 310             raise DuplicateOptionError('duplicate option "%s"' % name)
 311         try:
 312             options[name] = convertor(value)
 313         except (ValueError, TypeError), detail:
 314             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 315                                    % (name, value, ' '.join(detail.args)))
 316     return options
 317
 318
 319 class NameValueError(DataError): pass
 320
 321
 322 def extract_name_value(line):
 323     """
 324     Return a list of (name, value) from a line of the form "name=value ...".
 325
 326     :Exception:
 327         `NameValueError` for invalid input (missing name, missing data, bad
 328         quotes, etc.).
 329     """
 330     attlist = []
 331     while line:
 332         equals = line.find('=')
 333         if equals == -1:
 334             raise NameValueError('missing "="')
 335         attname = line[:equals].strip()
 336         if equals == 0 or not attname:
 337             raise NameValueError(
 338                   'missing attribute name before "="')
 339         line = line[equals+1:].lstrip()
 340         if not line:
 341             raise NameValueError(
 342                   'missing value after "%s="' % attname)
 343         if line[0] in '\'"':
 344             endquote = line.find(line[0], 1)
 345             if endquote == -1:
 346                 raise NameValueError(
 347                       'attribute "%s" missing end quote (%s)'
 348                       % (attname, line[0]))
 349             if len(line) > endquote + 1 and line[endquote + 1].strip():
 350                 raise NameValueError(
 351                       'attribute "%s" end quote (%s) not followed by '
 352                       'whitespace' % (attname, line[0]))
 353             data = line[1:endquote]
 354             line = line[endquote+1:].lstrip()
 355         else:
 356             space = line.find(' ')
 357             if space == -1:
 358                 data = line
 359                 line = ''
 360             else:
 361                 data = line[:space]
 362                 line = line[space+1:].lstrip()
 363         attlist.append((attname.lower(), data))
 364     return attlist
 365
 366 def new_reporter(source_path, settings):
 367     """
 368     Return a new Reporter object.
 369
 370     :Parameters:
 371         `source` : string
 372             The path to or description of the source text of the document.
 373         `settings` : optparse.Values object
 374             Runtime settings.
 375     """
 376     reporter = Reporter(
 377         source_path, settings.report_level, settings.halt_level,
 378         stream=settings.warning_stream, debug=settings.debug,
 379         encoding=settings.error_encoding,
 380         error_handler=settings.error_encoding_error_handler)
 381     return reporter
 382
 383 def new_document(source_path, settings=None):
 384     """
 385     Return a new empty document object.
 386
 387     :Parameters:
 388         `source_path` : string
 389             The path to or description of the source text of the document.
 390         `settings` : optparse.Values object
 391             Runtime settings.  If none provided, a default set will be used.
 392     """
 393     from docutils import frontend
 394     if settings is None:
 395         settings = frontend.OptionParser().get_default_values()
 396     reporter = new_reporter(source_path, settings)
 397     document = nodes.document(settings, reporter, source=source_path)
 398     document.note_source(source_path, -1)
 399     return document
 400
 401 def clean_rcs_keywords(paragraph, keyword_substitutions):
 402     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 403         textnode = paragraph[0]
 404         for pattern, substitution in keyword_substitutions:
 405             match = pattern.search(textnode)
 406             if match:
 407                 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
 408                 return
 409
 410 def relative_path(source, target):
 411     """
 412     Build and return a path to `target`, relative to `source` (both files).
 413
 414     If there is no common prefix, return the absolute path to `target`.
 415     """
 416     source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
 417     target_parts = os.path.abspath(target).split(os.sep)
 418     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 419     if source_parts[:2] != target_parts[:2]:
 420         # Nothing in common between paths.
 421         # Return absolute path, using '/' for URLs:
 422         return '/'.join(target_parts)
 423     source_parts.reverse()
 424     target_parts.reverse()
 425     while (source_parts and target_parts
 426            and source_parts[-1] == target_parts[-1]):
 427         # Remove path components in common:
 428         source_parts.pop()
 429         target_parts.pop()
 430     target_parts.reverse()
 431     parts = ['..'] * (len(source_parts) - 1) + target_parts
 432     return '/'.join(parts)
 433
 434 def get_stylesheet_reference(settings, relative_to=None):
 435     """
 436     Retrieve a stylesheet reference from the settings object.
 437     """
 438     if settings.stylesheet_path:
 439         assert not settings.stylesheet, \
 440                'stylesheet and stylesheet_path are mutually exclusive.'
 441         if relative_to == None:
 442             relative_to = settings._destination
 443         return relative_path(relative_to, settings.stylesheet_path)
 444     else:
 445         return settings.stylesheet
 446
 447 def get_trim_footnote_ref_space(settings):
 448     """
 449     Return whether or not to trim footnote space.
 450
 451     If trim_footnote_reference_space is not None, return it.
 452
 453     If trim_footnote_reference_space is None, return False unless the
 454     footnote reference style is 'superscript'.
 455     """
 456     if settings.trim_footnote_reference_space is None:
 457         return hasattr(settings, 'footnote_references') and \
 458                settings.footnote_references == 'superscript'
 459     else:
 460         return settings.trim_footnote_reference_space
 461
 462 def get_source_line(node):
 463     """
 464     Return the "source" and "line" attributes from the `node` given or from
 465     its closest ancestor.
 466     """
 467     while node:
 468         if node.source or node.line:
 469             return node.source, node.line
 470         node = node.parent
 471     return None, None
 472
 473 def escape2null(text):
 474     """Return a string with escape-backslashes converted to nulls."""
 475     parts = []
 476     start = 0
 477     while 1:
 478         found = text.find('\\', start)
 479         if found == -1:
 480             parts.append(text[start:])
 481             return ''.join(parts)
 482         parts.append(text[start:found])
 483         parts.append('\x00' + text[found+1:found+2])
 484         start = found + 2               # skip character after escape
 485
 486 def unescape(text, restore_backslashes=0):
 487     """
 488     Return a string with nulls removed or restored to backslashes.
 489     Backslash-escaped spaces are also removed.
 490     """
 491     if restore_backslashes:
 492         return text.replace('\x00', '\\')
 493     else:
 494         for sep in ['\x00 ', '\x00\n', '\x00']:
 495             text = ''.join(text.split(sep))
 496         return text
 497
 498 east_asian_widths = {'W': 2,   # Wide
 499                      'F': 2,   # Full-width (wide)
 500                      'Na': 1,  # Narrow
 501                      'H': 1,   # Half-width (narrow)
 502                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 503                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 504                                # narrow otherwise, but that doesn't work)
 505 """Mapping of result codes from `unicodedata.east_asian_width()` to character
 506 column widths."""
 507
 508 def east_asian_column_width(text):
 509     if isinstance(text, unicode):
 510         total = 0
 511         for c in text:
 512             total += east_asian_widths[unicodedata.east_asian_width(c)]
 513         return total
 514     else:
 515         return len(text)
 516
 517 if hasattr(unicodedata, 'east_asian_width'):
 518     column_width = east_asian_column_width
 519 else:
 520     column_width = len
 521
 522 def uniq(L):
 523      r = []
 524      for item in L:
 525          if not item in r:
 526              r.append(item)
 527      return r
 528
 529
 530 class DependencyList:
 531
 532     """
 533     List of dependencies, with file recording support.
 534
 535     Note that the output file is not automatically closed.  You have
 536     to explicitly call the close() method.
 537     """
 538
 539     def __init__(self, output_file=None, dependencies=[]):
 540         """
 541         Initialize the dependency list, automatically setting the
 542         output file to `output_file` (see `set_output()`) and adding
 543         all supplied dependencies.
 544         """
 545         self.set_output(output_file)
 546         for i in dependencies:
 547             self.add(i)
 548
 549     def set_output(self, output_file):
 550         """
 551         Set the output file and clear the list of already added
 552         dependencies.
 553
 554         `output_file` must be a string.  The specified file is
 555         immediately overwritten.
 556
 557         If output_file is '-', the output will be written to stdout.
 558         If it is None, no file output is done when calling add().
 559         """
 560         self.list = []
 561         if output_file == '-':
 562             self.file = sys.stdout
 563         elif output_file:
 564             self.file = open(output_file, 'w')
 565         else:
 566             self.file = None
 567
 568     def add(self, filename):
 569         """
 570         If the dependency `filename` has not already been added,
 571         append it to self.list and print it to self.file if self.file
 572         is not None.
 573         """
 574         if not filename in self.list:
 575             self.list.append(filename)
 576             if self.file is not None:
 577                 print >>self.file, filename
 578
 579     def close(self):
 580         """
 581         Close the output file.
 582         """
 583         self.file.close()
 584         self.file = None
 585
 586     def __repr__(self):
 587         if self.file:
 588             output_file = self.file.name
 589         else:
 590             output_file = None
 591         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)