1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
3 """Compatibility support for python 2 and 3."""
18 'unicode_output_stream',
30 from testtools
.helpers
import try_imports
32 BytesIO
= try_imports(['StringIO.StringIO', 'io.BytesIO'])
33 StringIO
= try_imports(['StringIO.StringIO', 'io.StringIO'])
36 from testtools
import _compat2x
as _compat
38 from testtools
import _compat3x
as _compat
40 reraise
= _compat
.reraise
43 __u_doc
= """A function version of the 'u' prefix.
45 This is needed becayse the u prefix is not usable in Python 3 but is required
46 in Python 2 to get a unicode object.
48 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
49 it to be _u('\u1234'). The Python 3 interpreter will decode it
50 appropriately and the no-op _u for Python 3 lets it through, in Python
51 2 we then call unicode-escape in the _u function.
54 if sys
.version_info
> (3, 0):
61 return s
.encode("latin-1")
62 advance_iterator
= next
63 # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
65 return isinstance(x
, str)
70 import __builtin__
as builtins
72 # The double replace mangling going on prepares the string for
73 # unicode-escape - \foo is preserved, \u and \U are decoded.
74 return (s
.replace("\\", "\\\\").replace("\\\\u", "\\u")
75 .replace("\\\\U", "\\U").decode("unicode-escape"))
79 advance_iterator
= lambda it
: it
.next()
81 return isinstance(x
, basestring
)
84 return (type, types
.ClassType
)
85 str_is_unicode
= sys
.platform
== "cli"
90 if sys
.version_info
> (2, 5):
92 _error_repr
= BaseException
.__repr
__
93 def isbaseexception(exception
):
94 """Return whether exception inherits from BaseException only"""
95 return (isinstance(exception
, BaseException
)
96 and not isinstance(exception
, Exception))
99 """If contents of iterable all evaluate as boolean True"""
104 def _error_repr(exception
):
105 """Format an exception instance as Python 2.5 and later do"""
106 return exception
.__class
__.__name
__ + repr(exception
.args
)
107 def isbaseexception(exception
):
108 """Return whether exception would inherit from BaseException only
110 This approximates the hierarchy in Python 2.5 and later, compare the
111 difference between the diagrams at the bottom of the pages:
112 <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
113 <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
115 return isinstance(exception
, (KeyboardInterrupt, SystemExit))
118 # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
119 # there should be better ways to write code needing this.
120 if not issubclass(getattr(builtins
, "bytes", str), str):
122 return isinstance(x
, bytes
)
124 # Never return True on Pythons that provide the name but not the real type
129 def _slow_escape(text
):
130 """Escape unicode ``text`` leaving printable characters unmodified
132 The behaviour emulates the Python 3 implementation of repr, see
133 unicode_repr in unicodeobject.c and isprintable definition.
135 Because this iterates over the input a codepoint at a time, it's slow, and
136 does not handle astral characters correctly on Python builds with 16 bit
137 rather than 32 bit unicode type.
143 if o
< 32 or 126 < o
< 161:
144 output
.append(c
.encode("unicode-escape"))
146 # Separate due to bug in unicode-escape codec in Python 2.4
147 output
.append("\\\\")
151 # To get correct behaviour would need to pair up surrogates here
152 if unicodedata
.category(c
)[0] in "CZ":
153 output
.append(c
.encode("unicode-escape"))
156 return "".join(output
)
159 def text_repr(text
, multiline
=None):
160 """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
162 is_py3k
= sys
.version_info
> (3, 0)
163 nl
= _isbytes(text
) and bytes((0xA,)) or "\n"
164 if multiline
is None:
165 multiline
= nl
in text
166 if not multiline
and (is_py3k
or not str_is_unicode
and type(text
) is str):
167 # Use normal repr for single line of unicode on Python 3 or bytes
169 prefix
= repr(text
[:0])[:-2]
171 # To escape multiline strings, split and process each line in turn,
172 # making sure that quotes are not escaped.
174 offset
= len(prefix
) + 1
176 for l
in text
.split(nl
):
179 lines
.append(r
[offset
:-1].replace("\\" + q
, q
))
180 elif not str_is_unicode
and isinstance(text
, str):
181 lines
= [l
.encode("string-escape").replace("\\'", "'")
182 for l
in text
.split("\n")]
184 lines
= [_slow_escape(l
) for l
in text
.split("\n")]
185 # Combine the escaped lines and append two of the closing quotes,
186 # then iterate over the result to escape triple quotes correctly.
187 _semi_done
= "\n".join(lines
) + "''"
190 p
= _semi_done
.find("'''", p
)
193 _semi_done
= "\\".join([_semi_done
[:p
], _semi_done
[p
:]])
195 return "".join([prefix
, "'''\\\n", _semi_done
, "'"])
196 escaped_text
= _slow_escape(text
)
197 # Determine which quote character to use and if one gets prefixed with a
198 # backslash following the same logic Python uses for repr() on strings
202 escaped_text
= escaped_text
.replace("'", "\\'")
205 return "".join([prefix
, quote
, escaped_text
, quote
])
208 def unicode_output_stream(stream
):
209 """Get wrapper for given stream that writes any unicode without exception
211 Characters that can't be coerced to the encoding of the stream, or 'ascii'
212 if valid encoding is not found, will be replaced. The original stream may
213 be returned in situations where a wrapper is determined unneeded.
215 The wrapper only allows unicode to be written, not non-ascii bytestrings,
216 which is a good thing to ensure sanity and sanitation.
218 if sys
.platform
== "cli":
219 # Best to never encode before writing in IronPython
222 writer
= codecs
.getwriter(stream
.encoding
or "")
223 except (AttributeError, LookupError):
224 # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
225 # different handling as it doesn't want bytestrings
226 return codecs
.getwriter("ascii")(stream
, "replace")
227 if writer
.__module
__.rsplit(".", 1)[1].startswith("utf"):
228 # The current stream has a unicode encoding so no error handler is needed
229 if sys
.version_info
> (3, 0):
231 return writer(stream
)
232 if sys
.version_info
> (3, 0):
233 # Python 3 doesn't seem to make this easy, handle a common case
235 return stream
.__class
__(stream
.buffer, stream
.encoding
, "replace",
236 stream
.newlines
, stream
.line_buffering
)
237 except AttributeError:
239 return writer(stream
, "replace")
242 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
243 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
244 # treat all versions the same way
245 _default_source_encoding
= "ascii"
247 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
248 _cookie_search
=re
.compile("coding[:=]\s*([-\w.]+)").search
250 def _detect_encoding(lines
):
251 """Get the encoding of a Python source file from a list of lines as bytes
253 This function does less than tokenize.detect_encoding added in Python 3 as
254 it does not attempt to raise a SyntaxError when the interpreter would, it
255 just wants the encoding of a source file Python has already compiled and
259 return _default_source_encoding
260 if lines
[0].startswith("\xef\xbb\xbf"):
261 # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
263 # Only the first two lines of the source file are examined
264 magic
= _cookie_search("".join(lines
[:2]))
266 return _default_source_encoding
267 encoding
= magic
.group(1)
269 codecs
.lookup(encoding
)
271 # Some codecs raise something other than LookupError if they don't
272 # support the given error handler, but not the text ones that could
273 # actually be used for Python source code
274 return _default_source_encoding
278 class _EncodingTuple(tuple):
279 """A tuple type that can have an encoding attribute smuggled on"""
282 def _get_source_encoding(filename
):
283 """Detect, cache and return the encoding of Python source at filename"""
285 return linecache
.cache
[filename
].encoding
286 except (AttributeError, KeyError):
287 encoding
= _detect_encoding(linecache
.getlines(filename
))
288 if filename
in linecache
.cache
:
289 newtuple
= _EncodingTuple(linecache
.cache
[filename
])
290 newtuple
.encoding
= encoding
291 linecache
.cache
[filename
] = newtuple
295 def _get_exception_encoding():
296 """Return the encoding we expect messages from the OS to be encoded in"""
298 # GZ 2010-05-24: Really want the codepage number instead, the error
299 # handling of standard codecs is more deterministic
301 # GZ 2010-05-23: We need this call to be after initialisation, but there's
302 # no benefit in asking more than once as it's a global
303 # setting that can change after the message is formatted.
304 return locale
.getlocale(locale
.LC_MESSAGES
)[1] or "ascii"
307 def _exception_to_text(evalue
):
308 """Try hard to get a sensible text value out of an exception instance"""
310 return unicode(evalue
)
311 except KeyboardInterrupt:
314 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
317 return str(evalue
).decode(_get_exception_encoding(), "replace")
318 except KeyboardInterrupt:
321 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
323 # Okay, out of ideas, let higher level handle it
327 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
328 # on the best way to break it up
329 _TB_HEADER
= _u('Traceback (most recent call last):\n')
330 def _format_exc_info(eclass
, evalue
, tb
, limit
=None):
331 """Format a stack trace and the exception information as unicode
333 Compatibility function for Python 2 which ensures each component of a
334 traceback is correctly decoded according to its origins.
336 Based on traceback.format_exception and related functions.
338 fs_enc
= sys
.getfilesystemencoding()
342 for filename
, lineno
, name
, line
in traceback
.extract_tb(tb
, limit
):
343 extracted_list
.append((
344 filename
.decode(fs_enc
, "replace"),
346 name
.decode("ascii", "replace"),
347 line
and line
.decode(
348 _get_source_encoding(filename
), "replace")))
349 list.extend(traceback
.format_list(extracted_list
))
353 # Is a (deprecated) string exception
354 list.append((eclass
+ "\n").decode("ascii", "replace"))
356 if isinstance(evalue
, SyntaxError):
357 # Avoid duplicating the special formatting for SyntaxError here,
358 # instead create a new instance with unicode filename and line
359 # Potentially gives duff spacing, but that's a pre-existing issue
361 msg
, (filename
, lineno
, offset
, line
) = evalue
362 except (TypeError, ValueError):
363 pass # Strange exception instance, fall through to generic code
365 # Errors during parsing give the line from buffer encoded as
366 # latin-1 or utf-8 or the encoding of the file depending on the
367 # coding and whether the patch for issue #1031213 is applied, so
368 # give up on trying to decode it and just read the file again
370 bytestr
= linecache
.getline(filename
, lineno
)
372 if lineno
== 1 and bytestr
.startswith("\xef\xbb\xbf"):
373 bytestr
= bytestr
[3:]
374 line
= bytestr
.decode(
375 _get_source_encoding(filename
), "replace")
376 del linecache
.cache
[filename
]
378 line
= line
.decode("ascii", "replace")
380 filename
= filename
.decode(fs_enc
, "replace")
381 evalue
= eclass(msg
, (filename
, lineno
, offset
, line
))
382 list.extend(traceback
.format_exception_only(eclass
, evalue
))
384 sclass
= eclass
.__name
__
385 svalue
= _exception_to_text(evalue
)
387 list.append("%s: %s\n" % (sclass
, svalue
))
389 # GZ 2010-05-24: Not a great fallback message, but keep for the moment
390 list.append("%s: <unprintable %s object>\n" % (sclass
, sclass
))
392 list.append("%s\n" % sclass
)