lib/testtools/testtools/compat.py

   1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
   2
   3 """Compatibility support for python 2 and 3."""
   4
   5 __metaclass__ = type
   6 __all__ = [
   7     '_b',
   8     '_u',
   9     'advance_iterator',
  10     'all',
  11     'BytesIO',
  12     'classtypes',
  13     'isbaseexception',
  14     'istext',
  15     'str_is_unicode',
  16     'StringIO',
  17     'reraise',
  18     'unicode_output_stream',
  19     ]
  20
  21 import codecs
  22 import linecache
  23 import locale
  24 import os
  25 import re
  26 import sys
  27 import traceback
  28 import unicodedata
  29
  30 from testtools.helpers import try_imports
  31
  32 BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO'])
  33 StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
  34
  35 try:
  36     from testtools import _compat2x as _compat
  37     _compat
  38 except SyntaxError:
  39     from testtools import _compat3x as _compat
  40
  41 reraise = _compat.reraise
  42
  43
  44 __u_doc = """A function version of the 'u' prefix.
  45
  46 This is needed becayse the u prefix is not usable in Python 3 but is required
  47 in Python 2 to get a unicode object.
  48
  49 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
  50 it to be _u('\u1234'). The Python 3 interpreter will decode it
  51 appropriately and the no-op _u for Python 3 lets it through, in Python
  52 2 we then call unicode-escape in the _u function.
  53 """
  54
  55 if sys.version_info > (3, 0):
  56     import builtins
  57     def _u(s):
  58         return s
  59     _r = ascii
  60     def _b(s):
  61         """A byte literal."""
  62         return s.encode("latin-1")
  63     advance_iterator = next
  64     # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
  65     def istext(x):
  66         return isinstance(x, str)
  67     def classtypes():
  68         return (type,)
  69     str_is_unicode = True
  70 else:
  71     import __builtin__ as builtins
  72     def _u(s):
  73         # The double replace mangling going on prepares the string for
  74         # unicode-escape - \foo is preserved, \u and \U are decoded.
  75         return (s.replace("\\", "\\\\").replace("\\\\u", "\\u")
  76             .replace("\\\\U", "\\U").decode("unicode-escape"))
  77     _r = repr
  78     def _b(s):
  79         return s
  80     advance_iterator = lambda it: it.next()
  81     def istext(x):
  82         return isinstance(x, basestring)
  83     def classtypes():
  84         import types
  85         return (type, types.ClassType)
  86     str_is_unicode = sys.platform == "cli"
  87
  88 _u.__doc__ = __u_doc
  89
  90
  91 if sys.version_info > (2, 5):
  92     all = all
  93     _error_repr = BaseException.__repr__
  94     def isbaseexception(exception):
  95         """Return whether exception inherits from BaseException only"""
  96         return (isinstance(exception, BaseException)
  97             and not isinstance(exception, Exception))
  98 else:
  99     def all(iterable):
 100         """If contents of iterable all evaluate as boolean True"""
 101         for obj in iterable:
 102             if not obj:
 103                 return False
 104         return True
 105     def _error_repr(exception):
 106         """Format an exception instance as Python 2.5 and later do"""
 107         return exception.__class__.__name__ + repr(exception.args)
 108     def isbaseexception(exception):
 109         """Return whether exception would inherit from BaseException only
 110
 111         This approximates the hierarchy in Python 2.5 and later, compare the
 112         difference between the diagrams at the bottom of the pages:
 113         <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
 114         <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
 115         """
 116         return isinstance(exception, (KeyboardInterrupt, SystemExit))
 117
 118
 119 # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
 120 #                there should be better ways to write code needing this.
 121 if not issubclass(getattr(builtins, "bytes", str), str):
 122     def _isbytes(x):
 123         return isinstance(x, bytes)
 124 else:
 125     # Never return True on Pythons that provide the name but not the real type
 126     def _isbytes(x):
 127         return False
 128
 129
 130 def _slow_escape(text):
 131     """Escape unicode ``text`` leaving printable characters unmodified
 132
 133     The behaviour emulates the Python 3 implementation of repr, see
 134     unicode_repr in unicodeobject.c and isprintable definition.
 135
 136     Because this iterates over the input a codepoint at a time, it's slow, and
 137     does not handle astral characters correctly on Python builds with 16 bit
 138     rather than 32 bit unicode type.
 139     """
 140     output = []
 141     for c in text:
 142         o = ord(c)
 143         if o < 256:
 144             if o < 32 or 126 < o < 161:
 145                 output.append(c.encode("unicode-escape"))
 146             elif o == 92:
 147                 # Separate due to bug in unicode-escape codec in Python 2.4
 148                 output.append("\\\\")
 149             else:
 150                 output.append(c)
 151         else:
 152             # To get correct behaviour would need to pair up surrogates here
 153             if unicodedata.category(c)[0] in "CZ":
 154                 output.append(c.encode("unicode-escape"))
 155             else:
 156                 output.append(c)
 157     return "".join(output)
 158
 159
 160 def text_repr(text, multiline=None):
 161     """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
 162     """
 163     is_py3k = sys.version_info > (3, 0)
 164     nl = _isbytes(text) and bytes((0xA,)) or "\n"
 165     if multiline is None:
 166         multiline = nl in text
 167     if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
 168         # Use normal repr for single line of unicode on Python 3 or bytes
 169         return repr(text)
 170     prefix = repr(text[:0])[:-2]
 171     if multiline:
 172         # To escape multiline strings, split and process each line in turn,
 173         # making sure that quotes are not escaped.
 174         if is_py3k:
 175             offset = len(prefix) + 1
 176             lines = []
 177             for l in text.split(nl):
 178                 r = repr(l)
 179                 q = r[-1]
 180                 lines.append(r[offset:-1].replace("\\" + q, q))
 181         elif not str_is_unicode and isinstance(text, str):
 182             lines = [l.encode("string-escape").replace("\\'", "'")
 183                 for l in text.split("\n")]
 184         else:
 185             lines = [_slow_escape(l) for l in text.split("\n")]
 186         # Combine the escaped lines and append two of the closing quotes,
 187         # then iterate over the result to escape triple quotes correctly.
 188         _semi_done = "\n".join(lines) + "''"
 189         p = 0
 190         while True:
 191             p = _semi_done.find("'''", p)
 192             if p == -1:
 193                 break
 194             _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
 195             p += 2
 196         return "".join([prefix, "'''\\\n", _semi_done, "'"])
 197     escaped_text = _slow_escape(text)
 198     # Determine which quote character to use and if one gets prefixed with a
 199     # backslash following the same logic Python uses for repr() on strings
 200     quote = "'"
 201     if "'" in text:
 202         if '"' in text:
 203             escaped_text = escaped_text.replace("'", "\\'")
 204         else:
 205             quote = '"'
 206     return "".join([prefix, quote, escaped_text, quote])
 207
 208
 209 def unicode_output_stream(stream):
 210     """Get wrapper for given stream that writes any unicode without exception
 211
 212     Characters that can't be coerced to the encoding of the stream, or 'ascii'
 213     if valid encoding is not found, will be replaced. The original stream may
 214     be returned in situations where a wrapper is determined unneeded.
 215
 216     The wrapper only allows unicode to be written, not non-ascii bytestrings,
 217     which is a good thing to ensure sanity and sanitation.
 218     """
 219     if sys.platform == "cli":
 220         # Best to never encode before writing in IronPython
 221         return stream
 222     try:
 223         writer = codecs.getwriter(stream.encoding or "")
 224     except (AttributeError, LookupError):
 225         # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
 226         #                different handling as it doesn't want bytestrings
 227         return codecs.getwriter("ascii")(stream, "replace")
 228     if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
 229         # The current stream has a unicode encoding so no error handler is needed
 230         if sys.version_info > (3, 0):
 231             return stream
 232         return writer(stream)
 233     if sys.version_info > (3, 0):
 234         # Python 3 doesn't seem to make this easy, handle a common case
 235         try:
 236             return stream.__class__(stream.buffer, stream.encoding, "replace",
 237                 stream.newlines, stream.line_buffering)
 238         except AttributeError:
 239             pass
 240     return writer(stream, "replace")
 241
 242
 243 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
 244 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
 245 # treat all versions the same way
 246 _default_source_encoding = "ascii"
 247
 248 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
 249 _cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search
 250
 251 def _detect_encoding(lines):
 252     """Get the encoding of a Python source file from a list of lines as bytes
 253
 254     This function does less than tokenize.detect_encoding added in Python 3 as
 255     it does not attempt to raise a SyntaxError when the interpreter would, it
 256     just wants the encoding of a source file Python has already compiled and
 257     determined is valid.
 258     """
 259     if not lines:
 260         return _default_source_encoding
 261     if lines[0].startswith("\xef\xbb\xbf"):
 262         # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
 263         return "utf-8"
 264     # Only the first two lines of the source file are examined
 265     magic = _cookie_search("".join(lines[:2]))
 266     if magic is None:
 267         return _default_source_encoding
 268     encoding = magic.group(1)
 269     try:
 270         codecs.lookup(encoding)
 271     except LookupError:
 272         # Some codecs raise something other than LookupError if they don't
 273         # support the given error handler, but not the text ones that could
 274         # actually be used for Python source code
 275         return _default_source_encoding
 276     return encoding
 277
 278
 279 class _EncodingTuple(tuple):
 280     """A tuple type that can have an encoding attribute smuggled on"""
 281
 282
 283 def _get_source_encoding(filename):
 284     """Detect, cache and return the encoding of Python source at filename"""
 285     try:
 286         return linecache.cache[filename].encoding
 287     except (AttributeError, KeyError):
 288         encoding = _detect_encoding(linecache.getlines(filename))
 289         if filename in linecache.cache:
 290             newtuple = _EncodingTuple(linecache.cache[filename])
 291             newtuple.encoding = encoding
 292             linecache.cache[filename] = newtuple
 293         return encoding
 294
 295
 296 def _get_exception_encoding():
 297     """Return the encoding we expect messages from the OS to be encoded in"""
 298     if os.name == "nt":
 299         # GZ 2010-05-24: Really want the codepage number instead, the error
 300         #                handling of standard codecs is more deterministic
 301         return "mbcs"
 302     # GZ 2010-05-23: We need this call to be after initialisation, but there's
 303     #                no benefit in asking more than once as it's a global
 304     #                setting that can change after the message is formatted.
 305     return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"
 306
 307
 308 def _exception_to_text(evalue):
 309     """Try hard to get a sensible text value out of an exception instance"""
 310     try:
 311         return unicode(evalue)
 312     except KeyboardInterrupt:
 313         raise
 314     except:
 315         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 316         pass
 317     try:
 318         return str(evalue).decode(_get_exception_encoding(), "replace")
 319     except KeyboardInterrupt:
 320         raise
 321     except:
 322         # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
 323         pass
 324     # Okay, out of ideas, let higher level handle it
 325     return None
 326
 327
 328 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
 329 #                on the best way to break it up
 330 _TB_HEADER = _u('Traceback (most recent call last):\n')
 331 def _format_exc_info(eclass, evalue, tb, limit=None):
 332     """Format a stack trace and the exception information as unicode
 333
 334     Compatibility function for Python 2 which ensures each component of a
 335     traceback is correctly decoded according to its origins.
 336
 337     Based on traceback.format_exception and related functions.
 338     """
 339     fs_enc = sys.getfilesystemencoding()
 340     if tb:
 341         list = [_TB_HEADER]
 342         extracted_list = []
 343         for filename, lineno, name, line in traceback.extract_tb(tb, limit):
 344             extracted_list.append((
 345                 filename.decode(fs_enc, "replace"),
 346                 lineno,
 347                 name.decode("ascii", "replace"),
 348                 line and line.decode(
 349                     _get_source_encoding(filename), "replace")))
 350         list.extend(traceback.format_list(extracted_list))
 351     else:
 352         list = []
 353     if evalue is None:
 354         # Is a (deprecated) string exception
 355         list.append((eclass + "\n").decode("ascii", "replace"))
 356         return list
 357     if isinstance(evalue, SyntaxError):
 358         # Avoid duplicating the special formatting for SyntaxError here,
 359         # instead create a new instance with unicode filename and line
 360         # Potentially gives duff spacing, but that's a pre-existing issue
 361         try:
 362             msg, (filename, lineno, offset, line) = evalue
 363         except (TypeError, ValueError):
 364             pass # Strange exception instance, fall through to generic code
 365         else:
 366             # Errors during parsing give the line from buffer encoded as
 367             # latin-1 or utf-8 or the encoding of the file depending on the
 368             # coding and whether the patch for issue #1031213 is applied, so
 369             # give up on trying to decode it and just read the file again
 370             if line:
 371                 bytestr = linecache.getline(filename, lineno)
 372                 if bytestr:
 373                     if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"):
 374                         bytestr = bytestr[3:]
 375                     line = bytestr.decode(
 376                         _get_source_encoding(filename), "replace")
 377                     del linecache.cache[filename]
 378                 else:
 379                     line = line.decode("ascii", "replace")
 380             if filename:
 381                 filename = filename.decode(fs_enc, "replace")
 382             evalue = eclass(msg, (filename, lineno, offset, line))
 383             list.extend(traceback.format_exception_only(eclass, evalue))
 384             return list
 385     sclass = eclass.__name__
 386     svalue = _exception_to_text(evalue)
 387     if svalue:
 388         list.append("%s: %s\n" % (sclass, svalue))
 389     elif svalue is None:
 390         # GZ 2010-05-24: Not a great fallback message, but keep for the moment
 391         list.append("%s: <unprintable %s object>\n" % (sclass, sclass))
 392     else:
 393         list.append("%s\n" % sclass)
 394     return list