docutils/docutils/utils/__init__.py

   1 # coding: utf-8
   2 # $Id$
   3 # Author: David Goodger <goodger@python.org>
   4 # Copyright: This module has been placed in the public domain.
   5
   6 """
   7 Miscellaneous utilities for the documentation utilities.
   8 """
   9
  10 __docformat__ = 'reStructuredText'
  11
  12 import sys
  13 import os
  14 import os.path
  15 import re
  16 import itertools
  17 import warnings
  18 import unicodedata
  19 from docutils import ApplicationError, DataError, __version_info__
  20 from docutils import nodes
  21 import docutils.io
  22 from docutils.utils.error_reporting import ErrorOutput, SafeString
  23
  24
  25 class SystemMessage(ApplicationError):
  26
  27     def __init__(self, system_message, level):
  28         Exception.__init__(self, system_message.astext())
  29         self.level = level
  30
  31
  32 class SystemMessagePropagation(ApplicationError): pass
  33
  34
  35 class Reporter:
  36
  37     """
  38     Info/warning/error reporter and ``system_message`` element generator.
  39
  40     Five levels of system messages are defined, along with corresponding
  41     methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`.
  42
  43     There is typically one Reporter object per process.  A Reporter object is
  44     instantiated with thresholds for reporting (generating warnings) and
  45     halting processing (raising exceptions), a switch to turn debug output on
  46     or off, and an I/O stream for warnings.  These are stored as instance
  47     attributes.
  48
  49     When a system message is generated, its level is compared to the stored
  50     thresholds, and a warning or error is generated as appropriate.  Debug
  51     messages are produced if the stored debug switch is on, independently of
  52     other thresholds.  Message output is sent to the stored warning stream if
  53     not set to ''.
  54
  55     The Reporter class also employs a modified form of the "Observer" pattern
  56     [GoF95]_ to track system messages generated.  The `attach_observer` method
  57     should be called before parsing, with a bound method or function which
  58     accepts system messages.  The observer can be removed with
  59     `detach_observer`, and another added in its place.
  60
  61     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
  62        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
  63        1995.
  64     """
  65
  66     levels = 'DEBUG INFO WARNING ERROR SEVERE'.split()
  67     """List of names for system message levels, indexed by level."""
  68
  69     # system message level constants:
  70     (DEBUG_LEVEL,
  71      INFO_LEVEL,
  72      WARNING_LEVEL,
  73      ERROR_LEVEL,
  74      SEVERE_LEVEL) = range(5)
  75
  76     def __init__(self, source, report_level, halt_level, stream=None,
  77                  debug=False, encoding=None, error_handler='backslashreplace'):
  78         """
  79         :Parameters:
  80             - `source`: The path to or description of the source data.
  81             - `report_level`: The level at or above which warning output will
  82               be sent to `stream`.
  83             - `halt_level`: The level at or above which `SystemMessage`
  84               exceptions will be raised, halting execution.
  85             - `debug`: Show debug (level=0) system messages?
  86             - `stream`: Where warning output is sent.  Can be file-like (has a
  87               ``.write`` method), a string (file name, opened for writing),
  88               '' (empty string) or `False` (for discarding all stream messages)
  89               or `None` (implies `sys.stderr`; default).
  90             - `encoding`: The output encoding.
  91             - `error_handler`: The error handler for stderr output encoding.
  92         """
  93
  94         self.source = source
  95         """The path to or description of the source data."""
  96
  97         self.error_handler = error_handler
  98         """The character encoding error handler."""
  99
 100         self.debug_flag = debug
 101         """Show debug (level=0) system messages?"""
 102
 103         self.report_level = report_level
 104         """The level at or above which warning output will be sent
 105         to `self.stream`."""
 106
 107         self.halt_level = halt_level
 108         """The level at or above which `SystemMessage` exceptions
 109         will be raised, halting execution."""
 110
 111         if not isinstance(stream, ErrorOutput):
 112             stream = ErrorOutput(stream, encoding, error_handler)
 113
 114         self.stream = stream
 115         """Where warning output is sent."""
 116
 117         self.encoding = encoding or getattr(stream, 'encoding', 'ascii')
 118         """The output character encoding."""
 119
 120         self.observers = []
 121         """List of bound methods or functions to call with each system_message
 122         created."""
 123
 124         self.max_level = -1
 125         """The highest level system message generated so far."""
 126
 127     def set_conditions(self, category, report_level, halt_level,
 128                        stream=None, debug=False):
 129         warnings.warn('docutils.utils.Reporter.set_conditions deprecated; '
 130                       'set attributes via configuration settings or directly',
 131                       DeprecationWarning, stacklevel=2)
 132         self.report_level = report_level
 133         self.halt_level = halt_level
 134         if not isinstance(stream, ErrorOutput):
 135             stream = ErrorOutput(stream, self.encoding, self.error_handler)
 136         self.stream = stream
 137         self.debug_flag = debug
 138
 139     def attach_observer(self, observer):
 140         """
 141         The `observer` parameter is a function or bound method which takes one
 142         argument, a `nodes.system_message` instance.
 143         """
 144         self.observers.append(observer)
 145
 146     def detach_observer(self, observer):
 147         self.observers.remove(observer)
 148
 149     def notify_observers(self, message):
 150         for observer in self.observers:
 151             observer(message)
 152
 153     def system_message(self, level, message, *children, **kwargs):
 154         """
 155         Return a system_message object.
 156
 157         Raise an exception or generate a warning if appropriate.
 158         """
 159         # `message` can be a `string`, `unicode`, or `Exception` instance.
 160         if isinstance(message, Exception):
 161             message = SafeString(message)
 162
 163         attributes = kwargs.copy()
 164         if 'base_node' in kwargs:
 165             source, line = get_source_line(kwargs['base_node'])
 166             del attributes['base_node']
 167             if source is not None:
 168                 attributes.setdefault('source', source)
 169             if line is not None:
 170                 attributes.setdefault('line', line)
 171                 # assert source is not None, "node has line- but no source-argument"
 172         if not 'source' in attributes: # 'line' is absolute line number
 173             try: # look up (source, line-in-source)
 174                 source, line = self.get_source_and_line(attributes.get('line'))
 175                 # print "locator lookup", kwargs.get('line'), "->", source, line
 176             except AttributeError:
 177                 source, line = None, None
 178             if source is not None:
 179                 attributes['source'] = source
 180             if line is not None:
 181                 attributes['line'] = line
 182         # assert attributes['line'] is not None, (message, kwargs)
 183         # assert attributes['source'] is not None, (message, kwargs)
 184         attributes.setdefault('source', self.source)
 185
 186         msg = nodes.system_message(message, level=level,
 187                                    type=self.levels[level],
 188                                    *children, **attributes)
 189         if self.stream and (level >= self.report_level
 190                             or self.debug_flag and level == self.DEBUG_LEVEL
 191                             or level >= self.halt_level):
 192             self.stream.write(msg.astext() + '\n')
 193         if level >= self.halt_level:
 194             raise SystemMessage(msg, level)
 195         if level > self.DEBUG_LEVEL or self.debug_flag:
 196             self.notify_observers(msg)
 197         self.max_level = max(level, self.max_level)
 198         return msg
 199
 200     def debug(self, *args, **kwargs):
 201         """
 202         Level-0, "DEBUG": an internal reporting issue. Typically, there is no
 203         effect on the processing. Level-0 system messages are handled
 204         separately from the others.
 205         """
 206         if self.debug_flag:
 207             return self.system_message(self.DEBUG_LEVEL, *args, **kwargs)
 208
 209     def info(self, *args, **kwargs):
 210         """
 211         Level-1, "INFO": a minor issue that can be ignored. Typically there is
 212         no effect on processing, and level-1 system messages are not reported.
 213         """
 214         return self.system_message(self.INFO_LEVEL, *args, **kwargs)
 215
 216     def warning(self, *args, **kwargs):
 217         """
 218         Level-2, "WARNING": an issue that should be addressed. If ignored,
 219         there may be unpredictable problems with the output.
 220         """
 221         return self.system_message(self.WARNING_LEVEL, *args, **kwargs)
 222
 223     def error(self, *args, **kwargs):
 224         """
 225         Level-3, "ERROR": an error that should be addressed. If ignored, the
 226         output will contain errors.
 227         """
 228         return self.system_message(self.ERROR_LEVEL, *args, **kwargs)
 229
 230     def severe(self, *args, **kwargs):
 231         """
 232         Level-4, "SEVERE": a severe error that must be addressed. If ignored,
 233         the output will contain severe errors. Typically level-4 system
 234         messages are turned into exceptions which halt processing.
 235         """
 236         return self.system_message(self.SEVERE_LEVEL, *args, **kwargs)
 237
 238
 239 class ExtensionOptionError(DataError): pass
 240 class BadOptionError(ExtensionOptionError): pass
 241 class BadOptionDataError(ExtensionOptionError): pass
 242 class DuplicateOptionError(ExtensionOptionError): pass
 243
 244
 245 def extract_extension_options(field_list, options_spec):
 246     """
 247     Return a dictionary mapping extension option names to converted values.
 248
 249     :Parameters:
 250         - `field_list`: A flat field list without field arguments, where each
 251           field body consists of a single paragraph only.
 252         - `options_spec`: Dictionary mapping known option names to a
 253           conversion function such as `int` or `float`.
 254
 255     :Exceptions:
 256         - `KeyError` for unknown option names.
 257         - `ValueError` for invalid option values (raised by the conversion
 258            function).
 259         - `TypeError` for invalid option value types (raised by conversion
 260            function).
 261         - `DuplicateOptionError` for duplicate options.
 262         - `BadOptionError` for invalid fields.
 263         - `BadOptionDataError` for invalid option data (missing name,
 264           missing data, bad quotes, etc.).
 265     """
 266     option_list = extract_options(field_list)
 267     option_dict = assemble_option_dict(option_list, options_spec)
 268     return option_dict
 269
 270 def extract_options(field_list):
 271     """
 272     Return a list of option (name, value) pairs from field names & bodies.
 273
 274     :Parameter:
 275         `field_list`: A flat field list, where each field name is a single
 276         word and each field body consists of a single paragraph only.
 277
 278     :Exceptions:
 279         - `BadOptionError` for invalid fields.
 280         - `BadOptionDataError` for invalid option data (missing name,
 281           missing data, bad quotes, etc.).
 282     """
 283     option_list = []
 284     for field in field_list:
 285         if len(field[0].astext().split()) != 1:
 286             raise BadOptionError(
 287                 'extension option field name may not contain multiple words')
 288         name = str(field[0].astext().lower())
 289         body = field[1]
 290         if len(body) == 0:
 291             data = None
 292         elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \
 293               or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text):
 294             raise BadOptionDataError(
 295                   'extension option field body may contain\n'
 296                   'a single paragraph only (option "%s")' % name)
 297         else:
 298             data = body[0][0].astext()
 299         option_list.append((name, data))
 300     return option_list
 301
 302 def assemble_option_dict(option_list, options_spec):
 303     """
 304     Return a mapping of option names to values.
 305
 306     :Parameters:
 307         - `option_list`: A list of (name, value) pairs (the output of
 308           `extract_options()`).
 309         - `options_spec`: Dictionary mapping known option names to a
 310           conversion function such as `int` or `float`.
 311
 312     :Exceptions:
 313         - `KeyError` for unknown option names.
 314         - `DuplicateOptionError` for duplicate options.
 315         - `ValueError` for invalid option values (raised by conversion
 316            function).
 317         - `TypeError` for invalid option value types (raised by conversion
 318            function).
 319     """
 320     options = {}
 321     for name, value in option_list:
 322         convertor = options_spec[name]  # raises KeyError if unknown
 323         if convertor is None:
 324             raise KeyError(name)        # or if explicitly disabled
 325         if name in options:
 326             raise DuplicateOptionError('duplicate option "%s"' % name)
 327         try:
 328             options[name] = convertor(value)
 329         except (ValueError, TypeError), detail:
 330             raise detail.__class__('(option: "%s"; value: %r)\n%s'
 331                                    % (name, value, ' '.join(detail.args)))
 332     return options
 333
 334
 335 class NameValueError(DataError): pass
 336
 337
 338 def decode_path(path):
 339     """
 340     Ensure `path` is Unicode. Return `nodes.reprunicode` object.
 341
 342     Decode file/path string in a failsave manner if not already done.
 343     """
 344     # see also http://article.gmane.org/gmane.text.docutils.user/2905
 345     if isinstance(path, unicode):
 346         return path
 347     try:
 348         path = path.decode(sys.getfilesystemencoding(), 'strict')
 349     except AttributeError: # default value None has no decode method
 350         return nodes.reprunicode(path)
 351     except UnicodeDecodeError:
 352         try:
 353             path = path.decode('utf-8', 'strict')
 354         except UnicodeDecodeError:
 355             path = path.decode('ascii', 'replace')
 356     return nodes.reprunicode(path)
 357
 358
 359 def extract_name_value(line):
 360     """
 361     Return a list of (name, value) from a line of the form "name=value ...".
 362
 363     :Exception:
 364         `NameValueError` for invalid input (missing name, missing data, bad
 365         quotes, etc.).
 366     """
 367     attlist = []
 368     while line:
 369         equals = line.find('=')
 370         if equals == -1:
 371             raise NameValueError('missing "="')
 372         attname = line[:equals].strip()
 373         if equals == 0 or not attname:
 374             raise NameValueError(
 375                   'missing attribute name before "="')
 376         line = line[equals+1:].lstrip()
 377         if not line:
 378             raise NameValueError(
 379                   'missing value after "%s="' % attname)
 380         if line[0] in '\'"':
 381             endquote = line.find(line[0], 1)
 382             if endquote == -1:
 383                 raise NameValueError(
 384                       'attribute "%s" missing end quote (%s)'
 385                       % (attname, line[0]))
 386             if len(line) > endquote + 1 and line[endquote + 1].strip():
 387                 raise NameValueError(
 388                       'attribute "%s" end quote (%s) not followed by '
 389                       'whitespace' % (attname, line[0]))
 390             data = line[1:endquote]
 391             line = line[endquote+1:].lstrip()
 392         else:
 393             space = line.find(' ')
 394             if space == -1:
 395                 data = line
 396                 line = ''
 397             else:
 398                 data = line[:space]
 399                 line = line[space+1:].lstrip()
 400         attlist.append((attname.lower(), data))
 401     return attlist
 402
 403 def new_reporter(source_path, settings):
 404     """
 405     Return a new Reporter object.
 406
 407     :Parameters:
 408         `source` : string
 409             The path to or description of the source text of the document.
 410         `settings` : optparse.Values object
 411             Runtime settings.
 412     """
 413     reporter = Reporter(
 414         source_path, settings.report_level, settings.halt_level,
 415         stream=settings.warning_stream, debug=settings.debug,
 416         encoding=settings.error_encoding,
 417         error_handler=settings.error_encoding_error_handler)
 418     return reporter
 419
 420 def new_document(source_path, settings=None):
 421     """
 422     Return a new empty document object.
 423
 424     :Parameters:
 425         `source_path` : string
 426             The path to or description of the source text of the document.
 427         `settings` : optparse.Values object
 428             Runtime settings.  If none are provided, a default core set will
 429             be used.  If you will use the document object with any Docutils
 430             components, you must provide their default settings as well.  For
 431             example, if parsing, at least provide the parser settings,
 432             obtainable as follows::
 433
 434                 settings = docutils.frontend.OptionParser(
 435                     components=(docutils.parsers.rst.Parser,)
 436                     ).get_default_values()
 437     """
 438     from docutils import frontend
 439     if settings is None:
 440         settings = frontend.OptionParser().get_default_values()
 441     source_path = decode_path(source_path)
 442     reporter = new_reporter(source_path, settings)
 443     document = nodes.document(settings, reporter, source=source_path)
 444     document.note_source(source_path, -1)
 445     return document
 446
 447 def clean_rcs_keywords(paragraph, keyword_substitutions):
 448     if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text):
 449         textnode = paragraph[0]
 450         for pattern, substitution in keyword_substitutions:
 451             match = pattern.search(textnode)
 452             if match:
 453                 paragraph[0] = nodes.Text(pattern.sub(substitution, textnode))
 454                 return
 455
 456 def relative_path(source, target):
 457     """
 458     Build and return a path to `target`, relative to `source` (both files).
 459
 460     If there is no common prefix, return the absolute path to `target`.
 461     """
 462     source_parts = os.path.abspath(source or type(target)('dummy_file')
 463                                   ).split(os.sep)
 464     target_parts = os.path.abspath(target).split(os.sep)
 465     # Check first 2 parts because '/dir'.split('/') == ['', 'dir']:
 466     if source_parts[:2] != target_parts[:2]:
 467         # Nothing in common between paths.
 468         # Return absolute path, using '/' for URLs:
 469         return '/'.join(target_parts)
 470     source_parts.reverse()
 471     target_parts.reverse()
 472     while (source_parts and target_parts
 473            and source_parts[-1] == target_parts[-1]):
 474         # Remove path components in common:
 475         source_parts.pop()
 476         target_parts.pop()
 477     target_parts.reverse()
 478     parts = ['..'] * (len(source_parts) - 1) + target_parts
 479     return '/'.join(parts)
 480
 481 def get_stylesheet_reference(settings, relative_to=None):
 482     """
 483     Retrieve a stylesheet reference from the settings object.
 484
 485     Deprecated. Use get_stylesheet_list() instead to
 486     enable specification of multiple stylesheets as a comma-separated
 487     list.
 488     """
 489     if settings.stylesheet_path:
 490         assert not settings.stylesheet, (
 491             'stylesheet and stylesheet_path are mutually exclusive.')
 492         if relative_to == None:
 493             relative_to = settings._destination
 494         return relative_path(relative_to, settings.stylesheet_path)
 495     else:
 496         return settings.stylesheet
 497
 498 # Return 'stylesheet' or 'stylesheet_path' arguments as list.
 499 #
 500 # The original settings arguments are kept unchanged: you can test
 501 # with e.g. ``if settings.stylesheet_path:``
 502 #
 503 # Differences to ``get_stylesheet_reference``:
 504 # * return value is a list
 505 # * no re-writing of the path (and therefore no optional argument)
 506 #   (if required, use ``utils.relative_path(source, target)``
 507 #   in the calling script)
 508 def get_stylesheet_list(settings):
 509     """
 510     Retrieve list of stylesheet references from the settings object.
 511     """
 512     assert not (settings.stylesheet and settings.stylesheet_path), (
 513             'stylesheet and stylesheet_path are mutually exclusive.')
 514     stylesheets = settings.stylesheet_path or settings.stylesheet or []
 515     # programmatically set default can be string or unicode:
 516     if not isinstance(stylesheets, list):
 517         stylesheets = [path.strip() for path in stylesheets.split(',')]
 518     # expand relative paths if found in stylesheet-dirs:
 519     return [find_file_in_dirs(path, settings.stylesheet_dirs)
 520             for path in stylesheets]
 521
 522 def find_file_in_dirs(path, dirs):
 523     """
 524     Search for `path` in the list of directories `dirs`.
 525
 526     Return the first expansion that matches an existing file.
 527     """
 528     if os.path.isabs(path):
 529         return path
 530     for d in dirs:
 531         if d == '.':
 532             f = path
 533         else:
 534             d = os.path.expanduser(d)
 535             f = os.path.join(d, path)
 536         if os.path.exists(f):
 537             return f
 538     return path
 539
 540 def get_trim_footnote_ref_space(settings):
 541     """
 542     Return whether or not to trim footnote space.
 543
 544     If trim_footnote_reference_space is not None, return it.
 545
 546     If trim_footnote_reference_space is None, return False unless the
 547     footnote reference style is 'superscript'.
 548     """
 549     if settings.trim_footnote_reference_space is None:
 550         return hasattr(settings, 'footnote_references') and \
 551                settings.footnote_references == 'superscript'
 552     else:
 553         return settings.trim_footnote_reference_space
 554
 555 def get_source_line(node):
 556     """
 557     Return the "source" and "line" attributes from the `node` given or from
 558     its closest ancestor.
 559     """
 560     while node:
 561         if node.source or node.line:
 562             return node.source, node.line
 563         node = node.parent
 564     return None, None
 565
 566 def escape2null(text):
 567     """Return a string with escape-backslashes converted to nulls."""
 568     parts = []
 569     start = 0
 570     while True:
 571         found = text.find('\\', start)
 572         if found == -1:
 573             parts.append(text[start:])
 574             return ''.join(parts)
 575         parts.append(text[start:found])
 576         parts.append('\x00' + text[found+1:found+2])
 577         start = found + 2               # skip character after escape
 578
 579 def unescape(text, restore_backslashes=False, respect_whitespace=False):
 580     """
 581     Return a string with nulls removed or restored to backslashes.
 582     Backslash-escaped spaces are also removed.
 583     """
 584     if restore_backslashes:
 585         return text.replace('\x00', '\\')
 586     else:
 587         for sep in ['\x00 ', '\x00\n', '\x00']:
 588             text = ''.join(text.split(sep))
 589         return text
 590
 591 def split_escaped_whitespace(text):
 592     """
 593     Split `text` on escaped whitespace (null+space or null+newline).
 594     Return a list of strings.
 595     """
 596     strings = text.split('\x00 ')
 597     strings = [string.split('\x00\n') for string in strings]
 598     # flatten list of lists of strings to list of strings:
 599     return list(itertools.chain(*strings))
 600
 601 def strip_combining_chars(text):
 602     if isinstance(text, str) and sys.version_info < (3,0):
 603         return text
 604     return u''.join([c for c in text if not unicodedata.combining(c)])
 605
 606 def find_combining_chars(text):
 607     """Return indices of all combining chars in  Unicode string `text`.
 608
 609     >>> from docutils.utils import find_combining_chars
 610     >>> find_combining_chars(u'A t̆ab̆lĕ')
 611     [3, 6, 9]
 612
 613     """
 614     if isinstance(text, str) and sys.version_info < (3,0):
 615         return []
 616     return [i for i,c in enumerate(text) if unicodedata.combining(c)]
 617
 618 def column_indices(text):
 619     """Indices of Unicode string `text` when skipping combining characters.
 620
 621     >>> from docutils.utils import column_indices
 622     >>> column_indices(u'A t̆ab̆lĕ')
 623     [0, 1, 2, 4, 5, 7, 8]
 624
 625     """
 626     # TODO: account for asian wide chars here instead of using dummy
 627     # replacements in the tableparser?
 628     string_indices = range(len(text))
 629     for index in find_combining_chars(text):
 630         string_indices[index] = None
 631     return [i for i in string_indices if i is not None]
 632
 633 east_asian_widths = {'W': 2,   # Wide
 634                      'F': 2,   # Full-width (wide)
 635                      'Na': 1,  # Narrow
 636                      'H': 1,   # Half-width (narrow)
 637                      'N': 1,   # Neutral (not East Asian, treated as narrow)
 638                      'A': 1}   # Ambiguous (s/b wide in East Asian context,
 639                                # narrow otherwise, but that doesn't work)
 640 """Mapping of result codes from `unicodedata.east_asian_widt()` to character
 641 column widths."""
 642
 643 def column_width(text):
 644     """Return the column width of text.
 645
 646     Correct ``len(text)`` for wide East Asian and combining Unicode chars.
 647     """
 648     if isinstance(text, str) and sys.version_info < (3,0):
 649         return len(text)
 650     width = sum([east_asian_widths[unicodedata.east_asian_width(c)]
 651                  for c in text])
 652     # correction for combining chars:
 653     width -= len(find_combining_chars(text))
 654     return width
 655
 656 def uniq(L):
 657      r = []
 658      for item in L:
 659          if not item in r:
 660              r.append(item)
 661      return r
 662
 663 def unique_combinations(items, n):
 664     """Return `itertools.combinations`."""
 665     warnings.warn('docutils.utils.unique_combinations is deprecated; '
 666                   'use itertools.combinations directly.',
 667                       DeprecationWarning, stacklevel=2)
 668     return itertools.combinations(items, n)
 669
 670 def normalize_language_tag(tag):
 671     """Return a list of normalized combinations for a `BCP 47` language tag.
 672
 673     Example:
 674
 675     >>> from docutils.utils import normalize_language_tag
 676     >>> normalize_language_tag('de_AT-1901')
 677     ['de-at-1901', 'de-at', 'de-1901', 'de']
 678     >>> normalize_language_tag('de-CH-x_altquot')
 679     ['de-ch-x-altquot', 'de-ch', 'de-x-altquot', 'de']
 680
 681     """
 682     # normalize:
 683     tag = tag.lower().replace('-','_')
 684     # split (except singletons, which mark the following tag as non-standard):
 685     tag = re.sub(r'_([a-zA-Z0-9])_', r'_\1-', tag)
 686     subtags = [subtag for subtag in tag.split('_')]
 687     base_tag = (subtags.pop(0),)
 688     # find all combinations of subtags
 689     taglist = []
 690     for n in range(len(subtags), 0, -1):
 691         # for tags in unique_combinations(subtags, n):
 692         for tags in itertools.combinations(subtags, n):
 693             taglist.append('-'.join(base_tag+tags))
 694     taglist += base_tag
 695     return taglist
 696
 697
 698 class DependencyList(object):
 699
 700     """
 701     List of dependencies, with file recording support.
 702
 703     Note that the output file is not automatically closed.  You have
 704     to explicitly call the close() method.
 705     """
 706
 707     def __init__(self, output_file=None, dependencies=[]):
 708         """
 709         Initialize the dependency list, automatically setting the
 710         output file to `output_file` (see `set_output()`) and adding
 711         all supplied dependencies.
 712         """
 713         self.set_output(output_file)
 714         for i in dependencies:
 715             self.add(i)
 716
 717     def set_output(self, output_file):
 718         """
 719         Set the output file and clear the list of already added
 720         dependencies.
 721
 722         `output_file` must be a string.  The specified file is
 723         immediately overwritten.
 724
 725         If output_file is '-', the output will be written to stdout.
 726         If it is None, no file output is done when calling add().
 727         """
 728         self.list = []
 729         if output_file:
 730             if output_file == '-':
 731                 of = None
 732             else:
 733                 of = output_file
 734             self.file = docutils.io.FileOutput(destination_path=of,
 735                                    encoding='utf8', autoclose=False)
 736         else:
 737             self.file = None
 738
 739     def add(self, *filenames):
 740         """
 741         If the dependency `filename` has not already been added,
 742         append it to self.list and print it to self.file if self.file
 743         is not None.
 744         """
 745         for filename in filenames:
 746             if not filename in self.list:
 747                 self.list.append(filename)
 748                 if self.file is not None:
 749                     self.file.write(filename+'\n')
 750
 751     def close(self):
 752         """
 753         Close the output file.
 754         """
 755         self.file.close()
 756         self.file = None
 757
 758     def __repr__(self):
 759         try:
 760             output_file = self.file.name
 761         except AttributeError:
 762             output_file = None
 763         return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list)
 764
 765
 766 release_level_abbreviations = {
 767     'alpha':     'a',
 768     'beta':      'b',
 769     'candidate': 'rc',
 770     'final':     '',}
 771
 772 def version_identifier(version_info=None):
 773     """
 774     Return a version identifier string built from `version_info`, a
 775     `docutils.VersionInfo` namedtuple instance or compatible tuple. If
 776     `version_info` is not provided, by default return a version identifier
 777     string based on `docutils.__version_info__` (i.e. the current Docutils
 778     version).
 779     """
 780     if version_info is None:
 781         version_info = __version_info__
 782     if version_info.micro:
 783         micro = '.%s' % version_info.micro
 784     else:
 785         # 0 is omitted:
 786         micro = ''
 787     releaselevel = release_level_abbreviations[version_info.releaselevel]
 788     if version_info.serial:
 789         serial = version_info.serial
 790     else:
 791         # 0 is omitted:
 792         serial = ''
 793     if version_info.release:
 794         dev = ''
 795     else:
 796         dev = '.dev'
 797     version = '%s.%s%s%s%s%s' % (
 798         version_info.major,
 799         version_info.minor,
 800         micro,
 801         releaselevel,
 802         serial,
 803         dev)
 804     return version