docutils/docutils/utils.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Miscellaneous utilities for the documentation utilities.
   7 """
   8
   9 __docformat__ = 'reStructuredText'
  10
  11 import sys
  12 import os
  13 import os.path
  14 import warnings
  15 import unicodedata
  16 from docutils import ApplicationError, DataError
  17 from docutils import nodes
  18
  19
  20 class SystemMessage(ApplicationError):
  21
  22     def __init__(self, system_message, level):
  23         Exception.__init__(self, system_message.astext())
  24         self.level = level
  25
  26
  27 class SystemMessagePropagation(ApplicationError): pass
  28
  29
  30 class Reporter:
  31
  32     """
  33     Info/warning/error reporter and ``system_message`` element generator.
  34
  35     Five levels of system messages are defined, along with corresponding
  36     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  37
  38     There is typically one Reporter object per process.  A Reporter object is
  39     instantiated with thresholds for reporting (generating warnings) and
  40     halting processing (raising exceptions), a switch to turn debug output on
  41     or off, and an I/O stream for warnings.  These are stored as instance
  42     attributes.
  43
  44     When a system message is generated, its level is compared to the stored
  45     thresholds, and a warning or error is generated as appropriate.  Debug
  46     messages are produced iff the stored debug switch is on, independently of
  47     other thresholds.  Message output is sent to the stored warning stream if
  48     not set to ''.
  49
  50     The Reporter class also employs a modified form of the "Observer" pattern
  51     [GoF95]_ to track system messages generated.  The `attach_observer` method
  52     should be called before parsing, with a bound method or function which
  53     accepts system messages.  The observer can be removed with
  54     `detach_observer`, and another added in its place.
  55
  56     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  57        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  58        1995.
  59     """
  60
  61     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  62     """List of names for system message levels, indexed by level."""
  63
  64     # system message level constants:
  65     (DEBUG_LEVEL,
  66      INFO_LEVEL,
  67      WARNING_LEVEL,
  68      ERROR_LEVEL,
  69      SEVERE_LEVEL) = range(5)
  70
  71     def __init__(self, source, report_level, halt_level, stream=None,
  72                  debug=0, encoding='ascii', error_handler='replace'):
  73         """
  74         :Parameters:
  75             - `source`: The path to or description of the source data.
  76             - `report_level`: The level at or above which warning output will
  77               be sent to `stream`.
  78             - `halt_level`: The level at or above which `SystemMessage`
  79               exceptions will be raised, halting execution.
  80             - `debug`: Show debug (level=0) system messages?
  81             - `stream`: Where warning output is sent.  Can be file-like (has a
  82               ``.write`` method), a string (file name, opened for writing),
  83               '' (empty string, for discarding all stream messages) or
  84               `None` (implies `sys.stderr`; default).
  85             - `encoding`: The encoding for stderr output.
  86             - `error_handler`: The error handler for stderr output encoding.
  87         """
  88
  89         self.source = source
  90         """The path to or description of the source data."""
  91
  92         self.encoding = encoding
  93         """The character encoding for the stderr output."""
  94
  95         self.error_handler = error_handler
  96         """The character encoding error handler."""
  97
  98         self.debug_flag = debug
  99         """Show debug (level=0) system messages?"""
 100
 101         self.report_level = report_level
 102         """The level at or above which warning output will be sent
 103         to `self.stream`."""
 104
 105         self.halt_level = halt_level
 106         """The level at or above which `SystemMessage` exceptions
 107         will be raised, halting execution."""
 108
 109         if stream is None:
 110             stream = sys.stderr
 111         elif type(stream) in (str, unicode):
 112             # Leave stream untouched if it's ''.
 113             if stream != '':
 114                 if type(stream) == str:
 115                     stream = open(stream, 'w')
 116                 elif type(stream) == unicode:
 117                     stream = open(stream.encode(), 'w')
 118
 119         self.stream = stream
 120         """Where warning output is sent."""
 121
 122         self.observers = []
 123         """List of bound methods or functions to call with each system_message
 124         created."""
 125
 126         self.max_level = -1
 127         """The highest level system message generated so far."""
 128
 129     def set_conditions(self, category, report_level, halt_level,
 130                        stream=None, debug=0):
 131         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 132                       'set attributes via configuration settings or directly',
 133                       DeprecationWarning, stacklevel=2)
 134         self.report_level = report_level
 135         self.halt_level = halt_level
 136         if stream is None:
 137             stream = sys.stderr
 138         self.stream = stream
 139         self.debug_flag = debug
 140
 141     def attach_observer(self, observer):
 142         """
 143         The `observer` parameter is a function or bound method which takes one
 144         argument, a `nodes.system_message` instance.
 145         """
 146         self.observers.append(observer)
 147
 148     def detach_observer(self, observer):
 149         self.observers.remove(observer)
 150
 151     def notify_observers(self, message):
 152         for observer in self.observers:
 153             observer(message)
 154
 155     def system_message(self, level, message, *children, **kwargs):
 156         """
 157         Return a system_message object.
 158
 159         Raise an exception or generate a warning if appropriate.
 160         """
 161         attributes = kwargs.copy()
 162         if 'base_node' in kwargs:
 163             source, line = get_source_line(kwargs['base_node'])
 164             del attributes['base_node']
 165             if source is not None:
 166                 attributes.setdefault('source', source)
 167             if line is not None:
 168                 attributes.setdefault('line', line)
 169         attributes.setdefault('source', self.source)
 170         msg = nodes.system_message(message, level=level,
 171                                    type=self.levels[level],
 172                                    *children, **attributes)
 173         if self.stream and (level >= self.report_level
 174                             or self.debug_flag and level == self.DEBUG_LEVEL):
 175             msgtext = msg.astext().encode(self.encoding, self.error_handler)
 176             print >>self.stream, msgtext
 177         if level >= self.halt_level:
 178             raise SystemMessage(msg, level)
 179         if level > self.DEBUG_LEVEL or self.debug_flag:
 180             self.notify_observers(msg)
 181         self.max_level = max(level, self.max_level)
 182         return msg
 183
 184     def debug(self, *args, **kwargs):
 185         """
 186         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 187         effect on the processing. Level-0 system messages are handled
 188         separately from the others.
 189         """
 190         if self.debug_flag:
 191             return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
 192
 193     def info(self, *args, **kwargs):
 194         """
 195         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 196         no effect on processing, and level-1 system messages are not reported.
 197         """
 198         return self.system_message(self.INFO_LEVEL, *args, **kwargs)
 199
 200     def warning(self, *args, **kwargs):
 201         """
 202         Level-2, "WARNING": an issue that should be addressed. If ignored,
 203         there may be unpredictable problems with the output.
 204         """
 205         return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
 206
 207     def error(self, *args, **kwargs):
 208         """
 209         Level-3, "ERROR": an error that should be addressed. If ignored, the
 210         output will contain errors.
 211         """
 212         return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
 213
 214     def severe(self, *args, **kwargs):
 215         """
 216         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 217         the output will contain severe errors. Typically level-4 system
 218         messages are turned into exceptions which halt processing.
 219         """
 220         return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
 221
 222
 223 class ExtensionOptionError(DataError): pass
 224 class BadOptionError(ExtensionOptionError): pass
 225 class BadOptionDataError(ExtensionOptionError): pass
 226 class DuplicateOptionError(ExtensionOptionError): pass
 227
 228
 229 def extract_extension_options(field_list, options_spec):
 230     """
 231     Return a dictionary mapping extension option names to converted values.
 232
 233     :Parameters:
 234         - `field_list`: A flat field list without field arguments, where each
 235           field body consists of a single paragraph only.
 236         - `options_spec`: Dictionary mapping known option names to a
 237           conversion function such as `int` or `float`.
 238
 239     :Exceptions:
 240         - `KeyError` for unknown option names.
 241         - `ValueError` for invalid option values (raised by the conversion
 242            function).
 243         - `TypeError` for invalid option value types (raised by conversion
 244            function).
 245         - `DuplicateOptionError` for duplicate options.
 246         - `BadOptionError` for invalid fields.
 247         - `BadOptionDataError` for invalid option data (missing name,
 248           missing data, bad quotes, etc.).
 249     """
 250     option_list = extract_options(field_list)
 251     option_dict = assemble_option_dict(option_list, options_spec)
 252     return option_dict
 253
 254 def extract_options(field_list):
 255     """
 256     Return a list of option (name, value) pairs from field names & bodies.
 257
 258     :Parameter:
 259         `field_list`: A flat field list, where each field name is a single
 260         word and each field body consists of a single paragraph only.
 261
 262     :Exceptions:
 263         - `BadOptionError` for invalid fields.
 264         - `BadOptionDataError` for invalid option data (missing name,
 265           missing data, bad quotes, etc.).
 266     """
 267     option_list = []
 268     for field in field_list:
 269         if len(field[0].astext().split()) != 1:
 270             raise BadOptionError(
 271                 'extension option field name may not contain multiple words')
 272         name = str(field[0].astext().lower())
 273         body = field[1]
 274         if len(body) == 0:
 275             data = None
 276         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 277               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 278             raise BadOptionDataError(
 279                   'extension option field body may contain\n'
 280                   'a single paragraph only (option "%s")' % name)
 281         else:
 282             data = body[0][0].astext()
 283         option_list.append((name, data))
 284     return option_list
 285
 286 def assemble_option_dict(option_list, options_spec):
 287     """
 288     Return a mapping of option names to values.
 289
 290     :Parameters:
 291         - `option_list`: A list of (name, value) pairs (the output of
 292           `extract_options()`).
 293         - `options_spec`: Dictionary mapping known option names to a
 294           conversion function such as `int` or `float`.
 295
 296     :Exceptions:
 297         - `KeyError` for unknown option names.
 298         - `DuplicateOptionError` for duplicate options.
 299         - `ValueError` for invalid option values (raised by conversion
 300            function).
 301         - `TypeError` for invalid option value types (raised by conversion
 302            function).
 303     """
 304     options = {}
 305     for name, value in option_list:
 306         convertor = options_spec[name]  # raises KeyError if unknown
 307         if convertor is None:
 308             raise KeyError(name)        # or if explicitly disabled
 309         if name in options:
 310             raise DuplicateOptionError('duplicate option "%s"' % name)
 311         try:
 312             options[name] = convertor(value)
 313         except (ValueError, TypeError), detail:
 314             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 315                                    % (name, value, ' '.join(detail.args)))
 316     return options
 317
 318
 319 class NameValueError(DataError): pass
 320
 321
 322 def extract_name_value(line):
 323     """
 324     Return a list of (name, value) from a line of the form "name=value ...".
 325
 326     :Exception:
 327         `NameValueError` for invalid input (missing name, missing data, bad
 328         quotes, etc.).
 329     """
 330     attlist = []
 331     while line:
 332         equals = line.find('=')
 333         if equals == -1:
 334             raise NameValueError('missing "="')
 335         attname = line[:equals].strip()
 336         if equals == 0 or not attname:
 337             raise NameValueError(
 338                   'missing attribute name before "="')
 339         line = line[equals+1:].lstrip()
 340         if not line:
 341             raise NameValueError(
 342                   'missing value after "%s="' % attname)
 343         if line[0] in '\'"':
 344             endquote = line.find(line[0], 1)
 345             if endquote == -1:
 346                 raise NameValueError(
 347                       'attribute "%s" missing end quote (%s)'
 348                       % (attname, line[0]))
 349             if len(line) > endquote + 1 and line[endquote + 1].strip():
 350                 raise NameValueError(
 351                       'attribute "%s" end quote (%s) not followed by '
 352                       'whitespace' % (attname, line[0]))
 353             data = line[1:endquote]
 354             line = line[endquote+1:].lstrip()
 355         else:
 356             space = line.find(' ')
 357             if space == -1:
 358                 data = line
 359                 line = ''
 360             else:
 361                 data = line[:space]
 362                 line = line[space+1:].lstrip()
 363         attlist.append((attname.lower(), data))
 364     return attlist
 365
 366 def new_reporter(source_path, settings):
 367     """
 368     Return a new Reporter object.
 369
 370     :Parameters:
 371         `source` : string
 372             The path to or description of the source text of the document.
 373         `settings` : optparse.Values object
 374             Runtime settings.
 375     """
 376     reporter = Reporter(
 377         source_path, settings.report_level, settings.halt_level,
 378         stream=settings.warning_stream, debug=settings.debug,
 379         encoding=settings.error_encoding,
 380         error_handler=settings.error_encoding_error_handler)
 381     return reporter
 382
 383 def new_document(source_path, settings=None):
 384     """
 385     Return a new empty document object.
 386
 387     :Parameters:
 388         `source_path` : string
 389             The path to or description of the source text of the document.
 390         `settings` : optparse.Values object
 391             Runtime settings.  If none provided, a default set will be used.
 392     """
 393     from docutils import frontend
 394     if settings is None:
 395         settings = frontend.OptionParser().get_default_values()
 396     reporter = new_reporter(source_path, settings)
 397     document = nodes.document(settings, reporter, source=source_path)
 398     document.note_source(source_path, -1)
 399     return document
 400
 401 def clean_rcs_keywords(paragraph, keyword_substitutions):
 402     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 403         textnode = paragraph[0]
 404         for pattern, substitution in keyword_substitutions:
 405             match = pattern.search(textnode)
 406             if match:
 407                 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
 408                 return
 409
 410 def relative_path(source, target):
 411     """
 412     Build and return a path to `target`, relative to `source` (both files).
 413
 414     If there is no common prefix, return the absolute path to `target`.
 415     """
 416     source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
 417     target_parts = os.path.abspath(target).split(os.sep)
 418     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 419     if source_parts[:2] != target_parts[:2]:
 420         # Nothing in common between paths.
 421         # Return absolute path, using '/' for URLs:
 422         return '/'.join(target_parts)
 423     source_parts.reverse()
 424     target_parts.reverse()
 425     while (source_parts and target_parts
 426            and source_parts[-1] == target_parts[-1]):
 427         # Remove path components in common:
 428         source_parts.pop()
 429         target_parts.pop()
 430     target_parts.reverse()
 431     parts = ['..'] * (len(source_parts) - 1) + target_parts
 432     return '/'.join(parts)
 433
 434 def get_stylesheet_reference(settings, relative_to=None):
 435     """
 436     Retrieve a stylesheet reference from the settings object.
 437
 438     Deprecated. Use get_stylesheet_reference_list() instead to
 439     enable specification of multiple stylesheets as a comma-separated
 440     list.
 441     """
 442     if settings.stylesheet_path:
 443         assert not settings.stylesheet, (
 444             'stylesheet and stylesheet_path are mutually exclusive.')
 445         if relative_to == None:
 446             relative_to = settings._destination
 447         return relative_path(relative_to, settings.stylesheet_path)
 448     else:
 449         return settings.stylesheet
 450
 451 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
 452 #
 453 # The original settings arguments are kept unchanged: you can test
 454 # with e.g. ``if settings.stylesheet_path:``
 455 #
 456 # Differences to ``get_stylesheet_reference``:
 457 # * return value is a list
 458 # * no re-writing of the path (and therefore no optional argument)
 459 #   (if required, use ``utils.relative_path(source, target)``
 460 #   in the calling script)
 461 def get_stylesheet_list(settings):
 462     """
 463     Retrieve list of stylesheet references from the settings object.
 464     """
 465     if settings.stylesheet_path:
 466         assert not settings.stylesheet, (
 467                'stylesheet and stylesheet_path are mutually exclusive.')
 468         return settings.stylesheet_path.split(",")
 469     elif settings.stylesheet:
 470         return settings.stylesheet.split(",")
 471     else:
 472         return []
 473
 474 def get_trim_footnote_ref_space(settings):
 475     """
 476     Return whether or not to trim footnote space.
 477
 478     If trim_footnote_reference_space is not None, return it.
 479
 480     If trim_footnote_reference_space is None, return False unless the
 481     footnote reference style is 'superscript'.
 482     """
 483     if settings.trim_footnote_reference_space is None:
 484         return hasattr(settings, 'footnote_references') and \
 485                settings.footnote_references == 'superscript'
 486     else:
 487         return settings.trim_footnote_reference_space
 488
 489 def get_source_line(node):
 490     """
 491     Return the "source" and "line" attributes from the `node` given or from
 492     its closest ancestor.
 493     """
 494     while node:
 495         if node.source or node.line:
 496             return node.source, node.line
 497         node = node.parent
 498     return None, None
 499
 500 def escape2null(text):
 501     """Return a string with escape-backslashes converted to nulls."""
 502     parts = []
 503     start = 0
 504     while 1:
 505         found = text.find('\\', start)
 506         if found == -1:
 507             parts.append(text[start:])
 508             return ''.join(parts)
 509         parts.append(text[start:found])
 510         parts.append('\x00' + text[found+1:found+2])
 511         start = found + 2               # skip character after escape
 512
 513 def unescape(text, restore_backslashes=0):
 514     """
 515     Return a string with nulls removed or restored to backslashes.
 516     Backslash-escaped spaces are also removed.
 517     """
 518     if restore_backslashes:
 519         return text.replace('\x00', '\\')
 520     else:
 521         for sep in ['\x00 ', '\x00\n', '\x00']:
 522             text = ''.join(text.split(sep))
 523         return text
 524
 525 east_asian_widths = {'W': 2,   # Wide
 526                      'F': 2,   # Full-width (wide)
 527                      'Na': 1,  # Narrow
 528                      'H': 1,   # Half-width (narrow)
 529                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 530                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 531                                # narrow otherwise, but that doesn't work)
 532 """Mapping of result codes from `unicodedata.east_asian_width()` to character
 533 column widths."""
 534
 535 def east_asian_column_width(text):
 536     if isinstance(text, unicode):
 537         total = 0
 538         for c in text:
 539             total += east_asian_widths[unicodedata.east_asian_width(c)]
 540         return total
 541     else:
 542         return len(text)
 543
 544 if hasattr(unicodedata, 'east_asian_width'):
 545     column_width = east_asian_column_width
 546 else:
 547     column_width = len
 548
 549 def uniq(L):
 550      r = []
 551      for item in L:
 552          if not item in r:
 553              r.append(item)
 554      return r
 555
 556
 557 class DependencyList:
 558
 559     """
 560     List of dependencies, with file recording support.
 561
 562     Note that the output file is not automatically closed.  You have
 563     to explicitly call the close() method.
 564     """
 565
 566     def __init__(self, output_file=None, dependencies=[]):
 567         """
 568         Initialize the dependency list, automatically setting the
 569         output file to `output_file` (see `set_output()`) and adding
 570         all supplied dependencies.
 571         """
 572         self.set_output(output_file)
 573         for i in dependencies:
 574             self.add(i)
 575
 576     def set_output(self, output_file):
 577         """
 578         Set the output file and clear the list of already added
 579         dependencies.
 580
 581         `output_file` must be a string.  The specified file is
 582         immediately overwritten.
 583
 584         If output_file is '-', the output will be written to stdout.
 585         If it is None, no file output is done when calling add().
 586         """
 587         self.list = []
 588         if output_file == '-':
 589             self.file = sys.stdout
 590         elif output_file:
 591             self.file = open(output_file, 'w')
 592         else:
 593             self.file = None
 594
 595     def add(self, *filenames):
 596         """
 597         If the dependency `filename` has not already been added,
 598         append it to self.list and print it to self.file if self.file
 599         is not None.
 600         """
 601         for filename in filenames:
 602             if not filename in self.list:
 603                 self.list.append(filename)
 604                 if self.file is not None:
 605                     print >>self.file, filename
 606
 607     def close(self):
 608         """
 609         Close the output file.
 610         """
 611         self.file.close()
 612         self.file = None
 613
 614     def __repr__(self):
 615         if self.file:
 616             output_file = self.file.name
 617         else:
 618             output_file = None
 619         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)