replace: fix unused variable warning
[Samba/gebeck_regimport.git] / lib / testtools / testtools / compat.py
blob2547b88d59748bae31cf9556fa187d1d6f87fbd8
1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
3 """Compatibility support for python 2 and 3."""
5 __metaclass__ = type
6 __all__ = [
7 '_b',
8 '_u',
9 'advance_iterator',
10 'all',
11 'BytesIO',
12 'classtypes',
13 'isbaseexception',
14 'istext',
15 'str_is_unicode',
16 'StringIO',
17 'reraise',
18 'unicode_output_stream',
21 import codecs
22 import linecache
23 import locale
24 import os
25 import re
26 import sys
27 import traceback
28 import unicodedata
30 from testtools.helpers import try_imports
32 BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO'])
33 StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
35 try:
36 from testtools import _compat2x as _compat
37 _compat
38 except SyntaxError:
39 from testtools import _compat3x as _compat
41 reraise = _compat.reraise
44 __u_doc = """A function version of the 'u' prefix.
46 This is needed becayse the u prefix is not usable in Python 3 but is required
47 in Python 2 to get a unicode object.
49 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
50 it to be _u('\u1234'). The Python 3 interpreter will decode it
51 appropriately and the no-op _u for Python 3 lets it through, in Python
52 2 we then call unicode-escape in the _u function.
53 """
55 if sys.version_info > (3, 0):
56 import builtins
57 def _u(s):
58 return s
59 _r = ascii
60 def _b(s):
61 """A byte literal."""
62 return s.encode("latin-1")
63 advance_iterator = next
64 # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
65 def istext(x):
66 return isinstance(x, str)
67 def classtypes():
68 return (type,)
69 str_is_unicode = True
70 else:
71 import __builtin__ as builtins
72 def _u(s):
73 # The double replace mangling going on prepares the string for
74 # unicode-escape - \foo is preserved, \u and \U are decoded.
75 return (s.replace("\\", "\\\\").replace("\\\\u", "\\u")
76 .replace("\\\\U", "\\U").decode("unicode-escape"))
77 _r = repr
78 def _b(s):
79 return s
80 advance_iterator = lambda it: it.next()
81 def istext(x):
82 return isinstance(x, basestring)
83 def classtypes():
84 import types
85 return (type, types.ClassType)
86 str_is_unicode = sys.platform == "cli"
88 _u.__doc__ = __u_doc
91 if sys.version_info > (2, 5):
92 all = all
93 _error_repr = BaseException.__repr__
94 def isbaseexception(exception):
95 """Return whether exception inherits from BaseException only"""
96 return (isinstance(exception, BaseException)
97 and not isinstance(exception, Exception))
98 else:
99 def all(iterable):
100 """If contents of iterable all evaluate as boolean True"""
101 for obj in iterable:
102 if not obj:
103 return False
104 return True
105 def _error_repr(exception):
106 """Format an exception instance as Python 2.5 and later do"""
107 return exception.__class__.__name__ + repr(exception.args)
108 def isbaseexception(exception):
109 """Return whether exception would inherit from BaseException only
111 This approximates the hierarchy in Python 2.5 and later, compare the
112 difference between the diagrams at the bottom of the pages:
113 <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
114 <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
116 return isinstance(exception, (KeyboardInterrupt, SystemExit))
119 # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
120 # there should be better ways to write code needing this.
121 if not issubclass(getattr(builtins, "bytes", str), str):
122 def _isbytes(x):
123 return isinstance(x, bytes)
124 else:
125 # Never return True on Pythons that provide the name but not the real type
126 def _isbytes(x):
127 return False
130 def _slow_escape(text):
131 """Escape unicode ``text`` leaving printable characters unmodified
133 The behaviour emulates the Python 3 implementation of repr, see
134 unicode_repr in unicodeobject.c and isprintable definition.
136 Because this iterates over the input a codepoint at a time, it's slow, and
137 does not handle astral characters correctly on Python builds with 16 bit
138 rather than 32 bit unicode type.
140 output = []
141 for c in text:
142 o = ord(c)
143 if o < 256:
144 if o < 32 or 126 < o < 161:
145 output.append(c.encode("unicode-escape"))
146 elif o == 92:
147 # Separate due to bug in unicode-escape codec in Python 2.4
148 output.append("\\\\")
149 else:
150 output.append(c)
151 else:
152 # To get correct behaviour would need to pair up surrogates here
153 if unicodedata.category(c)[0] in "CZ":
154 output.append(c.encode("unicode-escape"))
155 else:
156 output.append(c)
157 return "".join(output)
160 def text_repr(text, multiline=None):
161 """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
163 is_py3k = sys.version_info > (3, 0)
164 nl = _isbytes(text) and bytes((0xA,)) or "\n"
165 if multiline is None:
166 multiline = nl in text
167 if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
168 # Use normal repr for single line of unicode on Python 3 or bytes
169 return repr(text)
170 prefix = repr(text[:0])[:-2]
171 if multiline:
172 # To escape multiline strings, split and process each line in turn,
173 # making sure that quotes are not escaped.
174 if is_py3k:
175 offset = len(prefix) + 1
176 lines = []
177 for l in text.split(nl):
178 r = repr(l)
179 q = r[-1]
180 lines.append(r[offset:-1].replace("\\" + q, q))
181 elif not str_is_unicode and isinstance(text, str):
182 lines = [l.encode("string-escape").replace("\\'", "'")
183 for l in text.split("\n")]
184 else:
185 lines = [_slow_escape(l) for l in text.split("\n")]
186 # Combine the escaped lines and append two of the closing quotes,
187 # then iterate over the result to escape triple quotes correctly.
188 _semi_done = "\n".join(lines) + "''"
189 p = 0
190 while True:
191 p = _semi_done.find("'''", p)
192 if p == -1:
193 break
194 _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
195 p += 2
196 return "".join([prefix, "'''\\\n", _semi_done, "'"])
197 escaped_text = _slow_escape(text)
198 # Determine which quote character to use and if one gets prefixed with a
199 # backslash following the same logic Python uses for repr() on strings
200 quote = "'"
201 if "'" in text:
202 if '"' in text:
203 escaped_text = escaped_text.replace("'", "\\'")
204 else:
205 quote = '"'
206 return "".join([prefix, quote, escaped_text, quote])
209 def unicode_output_stream(stream):
210 """Get wrapper for given stream that writes any unicode without exception
212 Characters that can't be coerced to the encoding of the stream, or 'ascii'
213 if valid encoding is not found, will be replaced. The original stream may
214 be returned in situations where a wrapper is determined unneeded.
216 The wrapper only allows unicode to be written, not non-ascii bytestrings,
217 which is a good thing to ensure sanity and sanitation.
219 if sys.platform == "cli":
220 # Best to never encode before writing in IronPython
221 return stream
222 try:
223 writer = codecs.getwriter(stream.encoding or "")
224 except (AttributeError, LookupError):
225 # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
226 # different handling as it doesn't want bytestrings
227 return codecs.getwriter("ascii")(stream, "replace")
228 if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
229 # The current stream has a unicode encoding so no error handler is needed
230 if sys.version_info > (3, 0):
231 return stream
232 return writer(stream)
233 if sys.version_info > (3, 0):
234 # Python 3 doesn't seem to make this easy, handle a common case
235 try:
236 return stream.__class__(stream.buffer, stream.encoding, "replace",
237 stream.newlines, stream.line_buffering)
238 except AttributeError:
239 pass
240 return writer(stream, "replace")
243 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
244 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
245 # treat all versions the same way
246 _default_source_encoding = "ascii"
248 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
249 _cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search
251 def _detect_encoding(lines):
252 """Get the encoding of a Python source file from a list of lines as bytes
254 This function does less than tokenize.detect_encoding added in Python 3 as
255 it does not attempt to raise a SyntaxError when the interpreter would, it
256 just wants the encoding of a source file Python has already compiled and
257 determined is valid.
259 if not lines:
260 return _default_source_encoding
261 if lines[0].startswith("\xef\xbb\xbf"):
262 # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
263 return "utf-8"
264 # Only the first two lines of the source file are examined
265 magic = _cookie_search("".join(lines[:2]))
266 if magic is None:
267 return _default_source_encoding
268 encoding = magic.group(1)
269 try:
270 codecs.lookup(encoding)
271 except LookupError:
272 # Some codecs raise something other than LookupError if they don't
273 # support the given error handler, but not the text ones that could
274 # actually be used for Python source code
275 return _default_source_encoding
276 return encoding
279 class _EncodingTuple(tuple):
280 """A tuple type that can have an encoding attribute smuggled on"""
283 def _get_source_encoding(filename):
284 """Detect, cache and return the encoding of Python source at filename"""
285 try:
286 return linecache.cache[filename].encoding
287 except (AttributeError, KeyError):
288 encoding = _detect_encoding(linecache.getlines(filename))
289 if filename in linecache.cache:
290 newtuple = _EncodingTuple(linecache.cache[filename])
291 newtuple.encoding = encoding
292 linecache.cache[filename] = newtuple
293 return encoding
296 def _get_exception_encoding():
297 """Return the encoding we expect messages from the OS to be encoded in"""
298 if os.name == "nt":
299 # GZ 2010-05-24: Really want the codepage number instead, the error
300 # handling of standard codecs is more deterministic
301 return "mbcs"
302 # GZ 2010-05-23: We need this call to be after initialisation, but there's
303 # no benefit in asking more than once as it's a global
304 # setting that can change after the message is formatted.
305 return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"
308 def _exception_to_text(evalue):
309 """Try hard to get a sensible text value out of an exception instance"""
310 try:
311 return unicode(evalue)
312 except KeyboardInterrupt:
313 raise
314 except:
315 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
316 pass
317 try:
318 return str(evalue).decode(_get_exception_encoding(), "replace")
319 except KeyboardInterrupt:
320 raise
321 except:
322 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
323 pass
324 # Okay, out of ideas, let higher level handle it
325 return None
328 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
329 # on the best way to break it up
330 _TB_HEADER = _u('Traceback (most recent call last):\n')
331 def _format_exc_info(eclass, evalue, tb, limit=None):
332 """Format a stack trace and the exception information as unicode
334 Compatibility function for Python 2 which ensures each component of a
335 traceback is correctly decoded according to its origins.
337 Based on traceback.format_exception and related functions.
339 fs_enc = sys.getfilesystemencoding()
340 if tb:
341 list = [_TB_HEADER]
342 extracted_list = []
343 for filename, lineno, name, line in traceback.extract_tb(tb, limit):
344 extracted_list.append((
345 filename.decode(fs_enc, "replace"),
346 lineno,
347 name.decode("ascii", "replace"),
348 line and line.decode(
349 _get_source_encoding(filename), "replace")))
350 list.extend(traceback.format_list(extracted_list))
351 else:
352 list = []
353 if evalue is None:
354 # Is a (deprecated) string exception
355 list.append((eclass + "\n").decode("ascii", "replace"))
356 return list
357 if isinstance(evalue, SyntaxError):
358 # Avoid duplicating the special formatting for SyntaxError here,
359 # instead create a new instance with unicode filename and line
360 # Potentially gives duff spacing, but that's a pre-existing issue
361 try:
362 msg, (filename, lineno, offset, line) = evalue
363 except (TypeError, ValueError):
364 pass # Strange exception instance, fall through to generic code
365 else:
366 # Errors during parsing give the line from buffer encoded as
367 # latin-1 or utf-8 or the encoding of the file depending on the
368 # coding and whether the patch for issue #1031213 is applied, so
369 # give up on trying to decode it and just read the file again
370 if line:
371 bytestr = linecache.getline(filename, lineno)
372 if bytestr:
373 if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"):
374 bytestr = bytestr[3:]
375 line = bytestr.decode(
376 _get_source_encoding(filename), "replace")
377 del linecache.cache[filename]
378 else:
379 line = line.decode("ascii", "replace")
380 if filename:
381 filename = filename.decode(fs_enc, "replace")
382 evalue = eclass(msg, (filename, lineno, offset, line))
383 list.extend(traceback.format_exception_only(eclass, evalue))
384 return list
385 sclass = eclass.__name__
386 svalue = _exception_to_text(evalue)
387 if svalue:
388 list.append("%s: %s\n" % (sclass, svalue))
389 elif svalue is None:
390 # GZ 2010-05-24: Not a great fallback message, but keep for the moment
391 list.append("%s: <unprintable %s object>\n" % (sclass, sclass))
392 else:
393 list.append("%s\n" % sclass)
394 return list