docutils/utils.py

   1 # Author: David Goodger
   2 # Contact: goodger@users.sourceforge.net
   3 # Revision: $Revision$
   4 # Date: $Date$
   5 # Copyright: This module has been placed in the public domain.
   6
   7 """
   8 Miscellaneous utilities for the documentation utilities.
   9 """
  10
  11 __docformat__ = 'reStructuredText'
  12
  13 import sys
  14 import os
  15 import os.path
  16 import types
  17 import warnings
  18 import unicodedata
  19 from types import StringType, UnicodeType
  20 from docutils import ApplicationError, DataError
  21 from docutils import frontend, nodes
  22
  23
  24 class SystemMessage(ApplicationError):
  25
  26     def __init__(self, system_message, level):
  27         Exception.__init__(self, system_message.astext())
  28         self.level = level
  29
  30
  31 class SystemMessagePropagation(ApplicationError): pass
  32
  33
  34 class Reporter:
  35
  36     """
  37     Info/warning/error reporter and ``system_message`` element generator.
  38
  39     Five levels of system messages are defined, along with corresponding
  40     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  41
  42     There is typically one Reporter object per process.  A Reporter object is
  43     instantiated with thresholds for reporting (generating warnings) and
  44     halting processing (raising exceptions), a switch to turn debug output on
  45     or off, and an I/O stream for warnings.  These are stored as instance
  46     attributes.
  47
  48     When a system message is generated, its level is compared to the stored
  49     thresholds, and a warning or error is generated as appropriate.  Debug
  50     messages are produced iff the stored debug switch is on, independently of
  51     other thresholds.  Message output is sent to the stored warning stream if
  52     not set to ''.
  53
  54     The Reporter class also employs a modified form of the "Observer" pattern
  55     [GoF95]_ to track system messages generated.  The `attach_observer` method
  56     should be called before parsing, with a bound method or function which
  57     accepts system messages.  The observer can be removed with
  58     `detach_observer`, and another added in its place.
  59
  60     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  61        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  62        1995.
  63     """
  64
  65     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  66     """List of names for system message levels, indexed by level."""
  67
  68     def __init__(self, source, report_level, halt_level, stream=None,
  69                  debug=0, encoding='ascii', error_handler='replace'):
  70         """
  71         :Parameters:
  72             - `source`: The path to or description of the source data.
  73             - `report_level`: The level at or above which warning output will
  74               be sent to `stream`.
  75             - `halt_level`: The level at or above which `SystemMessage`
  76               exceptions will be raised, halting execution.
  77             - `debug`: Show debug (level=0) system messages?
  78             - `stream`: Where warning output is sent.  Can be file-like (has a
  79               ``.write`` method), a string (file name, opened for writing),
  80               '' (empty string, for discarding all stream messages) or
  81               `None` (implies `sys.stderr`; default).
  82             - `encoding`: The encoding for stderr output.
  83             - `error_handler`: The error handler for stderr output encoding.
  84         """
  85
  86         self.source = source
  87         """The path to or description of the source data."""
  88
  89         self.encoding = encoding
  90         """The character encoding for the stderr output."""
  91
  92         self.error_handler = error_handler
  93         """The character encoding error handler."""
  94
  95         self.debug_flag = debug
  96         """Show debug (level=0) system messages?"""
  97
  98         self.report_level = report_level
  99         """The level at or above which warning output will be sent
 100         to `self.stream`."""
 101
 102         self.halt_level = halt_level
 103         """The level at or above which `SystemMessage` exceptions
 104         will be raised, halting execution."""
 105
 106         if stream is None:
 107             stream = sys.stderr
 108         elif type(stream) in (StringType, UnicodeType):
 109             # Leave stream untouched if it's ''.
 110             if stream != '':
 111                 if type(stream) == StringType:
 112                     stream = open(stream, 'w')
 113                 elif type(stream) == UnicodeType:
 114                     stream = open(stream.encode(), 'w')
 115
 116         self.stream = stream
 117         """Where warning output is sent."""
 118
 119         self.observers = []
 120         """List of bound methods or functions to call with each system_message
 121         created."""
 122
 123         self.max_level = -1
 124         """The highest level system message generated so far."""
 125
 126     def set_conditions(self, category, report_level, halt_level,
 127                        stream=None, debug=0):
 128         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 129                       'set attributes via configuration settings or directly',
 130                       DeprecationWarning, stacklevel=2)
 131         self.report_level = report_level
 132         self.halt_level = halt_level
 133         if stream is None:
 134             stream = sys.stderr
 135         self.stream = stream
 136         self.debug_flag = debug
 137
 138     def attach_observer(self, observer):
 139         """
 140         The `observer` parameter is a function or bound method which takes one
 141         argument, a `nodes.system_message` instance.
 142         """
 143         self.observers.append(observer)
 144
 145     def detach_observer(self, observer):
 146         self.observers.remove(observer)
 147
 148     def notify_observers(self, message):
 149         for observer in self.observers:
 150             observer(message)
 151
 152     def system_message(self, level, message, *children, **kwargs):
 153         """
 154         Return a system_message object.
 155
 156         Raise an exception or generate a warning if appropriate.
 157         """
 158         attributes = kwargs.copy()
 159         if kwargs.has_key('base_node'):
 160             source, line = get_source_line(kwargs['base_node'])
 161             del attributes['base_node']
 162             if source is not None:
 163                 attributes.setdefault('source', source)
 164             if line is not None:
 165                 attributes.setdefault('line', line)
 166         attributes.setdefault('source', self.source)
 167         msg = nodes.system_message(message, level=level,
 168                                    type=self.levels[level],
 169                                    *children, **attributes)
 170         if self.stream and (level >= self.report_level
 171                             or self.debug_flag and level == 0):
 172             msgtext = msg.astext().encode(self.encoding, self.error_handler)
 173             print >>self.stream, msgtext
 174         if level >= self.halt_level:
 175             raise SystemMessage(msg, level)
 176         if level > 0 or self.debug_flag:
 177             self.notify_observers(msg)
 178         self.max_level = max(level, self.max_level)
 179         return msg
 180
 181     def debug(self, *args, **kwargs):
 182         """
 183         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 184         effect on the processing. Level-0 system messages are handled
 185         separately from the others.
 186         """
 187         if self.debug_flag:
 188             return self.system_message(0, *args, **kwargs)
 189
 190     def info(self, *args, **kwargs):
 191         """
 192         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 193         no effect on processing, and level-1 system messages are not reported.
 194         """
 195         return self.system_message(1, *args, **kwargs)
 196
 197     def warning(self, *args, **kwargs):
 198         """
 199         Level-2, "WARNING": an issue that should be addressed. If ignored,
 200         there may be unpredictable problems with the output.
 201         """
 202         return self.system_message(2, *args, **kwargs)
 203
 204     def error(self, *args, **kwargs):
 205         """
 206         Level-3, "ERROR": an error that should be addressed. If ignored, the
 207         output will contain errors.
 208         """
 209         return self.system_message(3, *args, **kwargs)
 210
 211     def severe(self, *args, **kwargs):
 212         """
 213         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 214         the output will contain severe errors. Typically level-4 system
 215         messages are turned into exceptions which halt processing.
 216         """
 217         return self.system_message(4, *args, **kwargs)
 218
 219
 220 class ExtensionOptionError(DataError): pass
 221 class BadOptionError(ExtensionOptionError): pass
 222 class BadOptionDataError(ExtensionOptionError): pass
 223 class DuplicateOptionError(ExtensionOptionError): pass
 224
 225
 226 def extract_extension_options(field_list, options_spec):
 227     """
 228     Return a dictionary mapping extension option names to converted values.
 229
 230     :Parameters:
 231         - `field_list`: A flat field list without field arguments, where each
 232           field body consists of a single paragraph only.
 233         - `options_spec`: Dictionary mapping known option names to a
 234           conversion function such as `int` or `float`.
 235
 236     :Exceptions:
 237         - `KeyError` for unknown option names.
 238         - `ValueError` for invalid option values (raised by the conversion
 239            function).
 240         - `TypeError` for invalid option value types (raised by conversion
 241            function).
 242         - `DuplicateOptionError` for duplicate options.
 243         - `BadOptionError` for invalid fields.
 244         - `BadOptionDataError` for invalid option data (missing name,
 245           missing data, bad quotes, etc.).
 246     """
 247     option_list = extract_options(field_list)
 248     option_dict = assemble_option_dict(option_list, options_spec)
 249     return option_dict
 250
 251 def extract_options(field_list):
 252     """
 253     Return a list of option (name, value) pairs from field names & bodies.
 254
 255     :Parameter:
 256         `field_list`: A flat field list, where each field name is a single
 257         word and each field body consists of a single paragraph only.
 258
 259     :Exceptions:
 260         - `BadOptionError` for invalid fields.
 261         - `BadOptionDataError` for invalid option data (missing name,
 262           missing data, bad quotes, etc.).
 263     """
 264     option_list = []
 265     for field in field_list:
 266         if len(field[0].astext().split()) != 1:
 267             raise BadOptionError(
 268                 'extension option field name may not contain multiple words')
 269         name = str(field[0].astext().lower())
 270         body = field[1]
 271         if len(body) == 0:
 272             data = None
 273         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 274               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 275             raise BadOptionDataError(
 276                   'extension option field body may contain\n'
 277                   'a single paragraph only (option "%s")' % name)
 278         else:
 279             data = body[0][0].astext()
 280         option_list.append((name, data))
 281     return option_list
 282
 283 def assemble_option_dict(option_list, options_spec):
 284     """
 285     Return a mapping of option names to values.
 286
 287     :Parameters:
 288         - `option_list`: A list of (name, value) pairs (the output of
 289           `extract_options()`).
 290         - `options_spec`: Dictionary mapping known option names to a
 291           conversion function such as `int` or `float`.
 292
 293     :Exceptions:
 294         - `KeyError` for unknown option names.
 295         - `DuplicateOptionError` for duplicate options.
 296         - `ValueError` for invalid option values (raised by conversion
 297            function).
 298         - `TypeError` for invalid option value types (raised by conversion
 299            function).
 300     """
 301     options = {}
 302     for name, value in option_list:
 303         convertor = options_spec[name]  # raises KeyError if unknown
 304         if convertor is None:
 305             raise KeyError(name)        # or if explicitly disabled
 306         if options.has_key(name):
 307             raise DuplicateOptionError('duplicate option "%s"' % name)
 308         try:
 309             options[name] = convertor(value)
 310         except (ValueError, TypeError), detail:
 311             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 312                                    % (name, value, ' '.join(detail.args)))
 313     return options
 314
 315
 316 class NameValueError(DataError): pass
 317
 318
 319 def extract_name_value(line):
 320     """
 321     Return a list of (name, value) from a line of the form "name=value ...".
 322
 323     :Exception:
 324         `NameValueError` for invalid input (missing name, missing data, bad
 325         quotes, etc.).
 326     """
 327     attlist = []
 328     while line:
 329         equals = line.find('=')
 330         if equals == -1:
 331             raise NameValueError('missing "="')
 332         attname = line[:equals].strip()
 333         if equals == 0 or not attname:
 334             raise NameValueError(
 335                   'missing attribute name before "="')
 336         line = line[equals+1:].lstrip()
 337         if not line:
 338             raise NameValueError(
 339                   'missing value after "%s="' % attname)
 340         if line[0] in '\'"':
 341             endquote = line.find(line[0], 1)
 342             if endquote == -1:
 343                 raise NameValueError(
 344                       'attribute "%s" missing end quote (%s)'
 345                       % (attname, line[0]))
 346             if len(line) > endquote + 1 and line[endquote + 1].strip():
 347                 raise NameValueError(
 348                       'attribute "%s" end quote (%s) not followed by '
 349                       'whitespace' % (attname, line[0]))
 350             data = line[1:endquote]
 351             line = line[endquote+1:].lstrip()
 352         else:
 353             space = line.find(' ')
 354             if space == -1:
 355                 data = line
 356                 line = ''
 357             else:
 358                 data = line[:space]
 359                 line = line[space+1:].lstrip()
 360         attlist.append((attname.lower(), data))
 361     return attlist
 362
 363 def new_reporter(source_path, settings):
 364     """
 365     Return a new Reporter object.
 366
 367     :Parameters:
 368         `source` : string
 369             The path to or description of the source text of the document.
 370         `settings` : optparse.Values object
 371             Runtime settings.
 372     """
 373     reporter = Reporter(
 374         source_path, settings.report_level, settings.halt_level,
 375         stream=settings.warning_stream, debug=settings.debug,
 376         encoding=settings.error_encoding,
 377         error_handler=settings.error_encoding_error_handler)
 378     return reporter
 379
 380 def new_document(source_path, settings=None):
 381     """
 382     Return a new empty document object.
 383
 384     :Parameters:
 385         `source` : string
 386             The path to or description of the source text of the document.
 387         `settings` : optparse.Values object
 388             Runtime settings.  If none provided, a default set will be used.
 389     """
 390     if settings is None:
 391         settings = frontend.OptionParser().get_default_values()
 392     reporter = new_reporter(source_path, settings)
 393     document = nodes.document(settings, reporter, source=source_path)
 394     document.note_source(source_path, -1)
 395     return document
 396
 397 def clean_rcs_keywords(paragraph, keyword_substitutions):
 398     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 399         textnode = paragraph[0]
 400         for pattern, substitution in keyword_substitutions:
 401             match = pattern.search(textnode.data)
 402             if match:
 403                 textnode.data = pattern.sub(substitution, textnode.data)
 404                 return
 405
 406 def relative_path(source, target):
 407     """
 408     Build and return a path to `target`, relative to `source` (both files).
 409
 410     If there is no common prefix, return the absolute path to `target`.
 411     """
 412     source_parts = os.path.abspath(source or 'dummy_file').split(os.sep)
 413     target_parts = os.path.abspath(target).split(os.sep)
 414     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 415     if source_parts[:2] != target_parts[:2]:
 416         # Nothing in common between paths.
 417         # Return absolute path, using '/' for URLs:
 418         return '/'.join(target_parts)
 419     source_parts.reverse()
 420     target_parts.reverse()
 421     while (source_parts and target_parts
 422            and source_parts[-1] == target_parts[-1]):
 423         # Remove path components in common:
 424         source_parts.pop()
 425         target_parts.pop()
 426     target_parts.reverse()
 427     parts = ['..'] * (len(source_parts) - 1) + target_parts
 428     return '/'.join(parts)
 429
 430 def get_stylesheet_reference(settings, relative_to=None):
 431     """
 432     Retrieve a stylesheet reference from the settings object.
 433     """
 434     if settings.stylesheet_path:
 435         assert not settings.stylesheet, \
 436                'stylesheet and stylesheet_path are mutually exclusive.'
 437         if relative_to == None:
 438             relative_to = settings._destination
 439         return relative_path(relative_to, settings.stylesheet_path)
 440     else:
 441         return settings.stylesheet
 442
 443 def get_trim_footnote_ref_space(settings):
 444     """
 445     Return whether or not to trim footnote space.
 446
 447     If trim_footnote_reference_space is not None, return it.
 448
 449     If trim_footnote_reference_space is None, return False unless the
 450     footnote reference style is 'superscript'.
 451     """
 452     if settings.trim_footnote_reference_space is None:
 453         return hasattr(settings, 'footnote_references') and \
 454                settings.footnote_references == 'superscript'
 455     else:
 456         return settings.trim_footnote_reference_space
 457
 458 def get_source_line(node):
 459     """
 460     Return the "source" and "line" attributes from the `node` given or from
 461     its closest ancestor.
 462     """
 463     while node:
 464         if node.source or node.line:
 465             return node.source, node.line
 466         node = node.parent
 467     return None, None
 468
 469 def escape2null(text):
 470     """Return a string with escape-backslashes converted to nulls."""
 471     parts = []
 472     start = 0
 473     while 1:
 474         found = text.find('\\', start)
 475         if found == -1:
 476             parts.append(text[start:])
 477             return ''.join(parts)
 478         parts.append(text[start:found])
 479         parts.append('\x00' + text[found+1:found+2])
 480         start = found + 2               # skip character after escape
 481
 482 def unescape(text, restore_backslashes=0):
 483     """
 484     Return a string with nulls removed or restored to backslashes.
 485     Backslash-escaped spaces are also removed.
 486     """
 487     if restore_backslashes:
 488         return text.replace('\x00', '\\')
 489     else:
 490         for sep in ['\x00 ', '\x00\n', '\x00']:
 491             text = ''.join(text.split(sep))
 492         return text
 493
 494 east_asian_widths = {'W': 2,   # Wide
 495                      'F': 2,   # Full-width (wide)
 496                      'Na': 1,  # Narrow
 497                      'H': 1,   # Half-width (narrow)
 498                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 499                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 500                                # narrow otherwise, but that doesn't work)
 501 """Mapping of result codes from `unicodedata.east_asian_width()` to character
 502 column widths."""
 503
 504 def east_asian_column_width(text):
 505     if isinstance(text, types.UnicodeType):
 506         total = 0
 507         for c in text:
 508             total += east_asian_widths[unicodedata.east_asian_width(c)]
 509         return total
 510     else:
 511         return len(text)
 512
 513 if hasattr(unicodedata, 'east_asian_width'):
 514     column_width = east_asian_column_width
 515 else:
 516     column_width = len
 517
 518
 519 class DependencyList:
 520
 521     """
 522     List of dependencies, with file recording support.
 523
 524     Note that the output file is not automatically closed.  You have
 525     to explicitly call the close() method.
 526     """
 527
 528     def __init__(self, output_file=None, dependencies=[]):
 529         """
 530         Initialize the dependency list, automatically setting the
 531         output file to `output_file` (see `set_output()`) and adding
 532         all supplied dependencies.
 533         """
 534         self.set_output(output_file)
 535         for i in dependencies:
 536             self.add(i)
 537
 538     def set_output(self, output_file):
 539         """
 540         Set the output file and clear the list of already added
 541         dependencies.
 542
 543         `output_file` must be a string.  The specified file is
 544         immediately overwritten.
 545
 546         If output_file is '-', the output will be written to stdout.
 547         If it is None, no file output is done when calling add().
 548         """
 549         self.list = []
 550         if output_file == '-':
 551             self.file = sys.stdout
 552         elif output_file:
 553             self.file = open(output_file, 'w')
 554         else:
 555             self.file = None
 556
 557     def add(self, filename):
 558         """
 559         If the dependency `filename` has not already been added,
 560         append it to self.list and print it to self.file if self.file
 561         is not None.
 562         """
 563         if not filename in self.list:
 564             self.list.append(filename)
 565             if self.file is not None:
 566                 print >>self.file, filename
 567
 568     def close(self):
 569         """
 570         Close the output file.
 571         """
 572         self.file.close()
 573         self.file = None
 574
 575     def __repr__(self):
 576         if self.file:
 577             output_file = self.file.name
 578         else:
 579             output_file = None
 580         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)