dsdb: reset schema->{classes,attributes}_to_remove_size to 0
[Samba/gebeck_regimport.git] / lib / testtools / testtools / compat.py
blob375eca2c02af50d9a70ce27dffb81f8e8833121f
1 # Copyright (c) 2008-2011 testtools developers. See LICENSE for details.
3 """Compatibility support for python 2 and 3."""
5 __metaclass__ = type
6 __all__ = [
7 '_b',
8 '_u',
9 'advance_iterator',
10 'all',
11 'BytesIO',
12 'classtypes',
13 'isbaseexception',
14 'istext',
15 'str_is_unicode',
16 'StringIO',
17 'reraise',
18 'unicode_output_stream',
21 import codecs
22 import linecache
23 import locale
24 import os
25 import re
26 import sys
27 import traceback
28 import unicodedata
30 from testtools.helpers import try_imports
32 BytesIO = try_imports(['StringIO.StringIO', 'io.BytesIO'])
33 StringIO = try_imports(['StringIO.StringIO', 'io.StringIO'])
35 try:
36 from testtools import _compat2x as _compat
37 except SyntaxError:
38 from testtools import _compat3x as _compat
40 reraise = _compat.reraise
43 __u_doc = """A function version of the 'u' prefix.
45 This is needed becayse the u prefix is not usable in Python 3 but is required
46 in Python 2 to get a unicode object.
48 To migrate code that was written as u'\u1234' in Python 2 to 2+3 change
49 it to be _u('\u1234'). The Python 3 interpreter will decode it
50 appropriately and the no-op _u for Python 3 lets it through, in Python
51 2 we then call unicode-escape in the _u function.
52 """
54 if sys.version_info > (3, 0):
55 import builtins
56 def _u(s):
57 return s
58 _r = ascii
59 def _b(s):
60 """A byte literal."""
61 return s.encode("latin-1")
62 advance_iterator = next
63 # GZ 2011-08-24: Seems istext() is easy to misuse and makes for bad code.
64 def istext(x):
65 return isinstance(x, str)
66 def classtypes():
67 return (type,)
68 str_is_unicode = True
69 else:
70 import __builtin__ as builtins
71 def _u(s):
72 # The double replace mangling going on prepares the string for
73 # unicode-escape - \foo is preserved, \u and \U are decoded.
74 return (s.replace("\\", "\\\\").replace("\\\\u", "\\u")
75 .replace("\\\\U", "\\U").decode("unicode-escape"))
76 _r = repr
77 def _b(s):
78 return s
79 advance_iterator = lambda it: it.next()
80 def istext(x):
81 return isinstance(x, basestring)
82 def classtypes():
83 import types
84 return (type, types.ClassType)
85 str_is_unicode = sys.platform == "cli"
87 _u.__doc__ = __u_doc
90 if sys.version_info > (2, 5):
91 all = all
92 _error_repr = BaseException.__repr__
93 def isbaseexception(exception):
94 """Return whether exception inherits from BaseException only"""
95 return (isinstance(exception, BaseException)
96 and not isinstance(exception, Exception))
97 else:
98 def all(iterable):
99 """If contents of iterable all evaluate as boolean True"""
100 for obj in iterable:
101 if not obj:
102 return False
103 return True
104 def _error_repr(exception):
105 """Format an exception instance as Python 2.5 and later do"""
106 return exception.__class__.__name__ + repr(exception.args)
107 def isbaseexception(exception):
108 """Return whether exception would inherit from BaseException only
110 This approximates the hierarchy in Python 2.5 and later, compare the
111 difference between the diagrams at the bottom of the pages:
112 <http://docs.python.org/release/2.4.4/lib/module-exceptions.html>
113 <http://docs.python.org/release/2.5.4/lib/module-exceptions.html>
115 return isinstance(exception, (KeyboardInterrupt, SystemExit))
118 # GZ 2011-08-24: Using isinstance checks like this encourages bad interfaces,
119 # there should be better ways to write code needing this.
120 if not issubclass(getattr(builtins, "bytes", str), str):
121 def _isbytes(x):
122 return isinstance(x, bytes)
123 else:
124 # Never return True on Pythons that provide the name but not the real type
125 def _isbytes(x):
126 return False
129 def _slow_escape(text):
130 """Escape unicode ``text`` leaving printable characters unmodified
132 The behaviour emulates the Python 3 implementation of repr, see
133 unicode_repr in unicodeobject.c and isprintable definition.
135 Because this iterates over the input a codepoint at a time, it's slow, and
136 does not handle astral characters correctly on Python builds with 16 bit
137 rather than 32 bit unicode type.
139 output = []
140 for c in text:
141 o = ord(c)
142 if o < 256:
143 if o < 32 or 126 < o < 161:
144 output.append(c.encode("unicode-escape"))
145 elif o == 92:
146 # Separate due to bug in unicode-escape codec in Python 2.4
147 output.append("\\\\")
148 else:
149 output.append(c)
150 else:
151 # To get correct behaviour would need to pair up surrogates here
152 if unicodedata.category(c)[0] in "CZ":
153 output.append(c.encode("unicode-escape"))
154 else:
155 output.append(c)
156 return "".join(output)
159 def text_repr(text, multiline=None):
160 """Rich repr for ``text`` returning unicode, triple quoted if ``multiline``.
162 is_py3k = sys.version_info > (3, 0)
163 nl = _isbytes(text) and bytes((0xA,)) or "\n"
164 if multiline is None:
165 multiline = nl in text
166 if not multiline and (is_py3k or not str_is_unicode and type(text) is str):
167 # Use normal repr for single line of unicode on Python 3 or bytes
168 return repr(text)
169 prefix = repr(text[:0])[:-2]
170 if multiline:
171 # To escape multiline strings, split and process each line in turn,
172 # making sure that quotes are not escaped.
173 if is_py3k:
174 offset = len(prefix) + 1
175 lines = []
176 for l in text.split(nl):
177 r = repr(l)
178 q = r[-1]
179 lines.append(r[offset:-1].replace("\\" + q, q))
180 elif not str_is_unicode and isinstance(text, str):
181 lines = [l.encode("string-escape").replace("\\'", "'")
182 for l in text.split("\n")]
183 else:
184 lines = [_slow_escape(l) for l in text.split("\n")]
185 # Combine the escaped lines and append two of the closing quotes,
186 # then iterate over the result to escape triple quotes correctly.
187 _semi_done = "\n".join(lines) + "''"
188 p = 0
189 while True:
190 p = _semi_done.find("'''", p)
191 if p == -1:
192 break
193 _semi_done = "\\".join([_semi_done[:p], _semi_done[p:]])
194 p += 2
195 return "".join([prefix, "'''\\\n", _semi_done, "'"])
196 escaped_text = _slow_escape(text)
197 # Determine which quote character to use and if one gets prefixed with a
198 # backslash following the same logic Python uses for repr() on strings
199 quote = "'"
200 if "'" in text:
201 if '"' in text:
202 escaped_text = escaped_text.replace("'", "\\'")
203 else:
204 quote = '"'
205 return "".join([prefix, quote, escaped_text, quote])
208 def unicode_output_stream(stream):
209 """Get wrapper for given stream that writes any unicode without exception
211 Characters that can't be coerced to the encoding of the stream, or 'ascii'
212 if valid encoding is not found, will be replaced. The original stream may
213 be returned in situations where a wrapper is determined unneeded.
215 The wrapper only allows unicode to be written, not non-ascii bytestrings,
216 which is a good thing to ensure sanity and sanitation.
218 if sys.platform == "cli":
219 # Best to never encode before writing in IronPython
220 return stream
221 try:
222 writer = codecs.getwriter(stream.encoding or "")
223 except (AttributeError, LookupError):
224 # GZ 2010-06-16: Python 3 StringIO ends up here, but probably needs
225 # different handling as it doesn't want bytestrings
226 return codecs.getwriter("ascii")(stream, "replace")
227 if writer.__module__.rsplit(".", 1)[1].startswith("utf"):
228 # The current stream has a unicode encoding so no error handler is needed
229 if sys.version_info > (3, 0):
230 return stream
231 return writer(stream)
232 if sys.version_info > (3, 0):
233 # Python 3 doesn't seem to make this easy, handle a common case
234 try:
235 return stream.__class__(stream.buffer, stream.encoding, "replace",
236 stream.newlines, stream.line_buffering)
237 except AttributeError:
238 pass
239 return writer(stream, "replace")
242 # The default source encoding is actually "iso-8859-1" until Python 2.5 but
243 # using non-ascii causes a deprecation warning in 2.4 and it's cleaner to
244 # treat all versions the same way
245 _default_source_encoding = "ascii"
247 # Pattern specified in <http://www.python.org/dev/peps/pep-0263/>
248 _cookie_search=re.compile("coding[:=]\s*([-\w.]+)").search
250 def _detect_encoding(lines):
251 """Get the encoding of a Python source file from a list of lines as bytes
253 This function does less than tokenize.detect_encoding added in Python 3 as
254 it does not attempt to raise a SyntaxError when the interpreter would, it
255 just wants the encoding of a source file Python has already compiled and
256 determined is valid.
258 if not lines:
259 return _default_source_encoding
260 if lines[0].startswith("\xef\xbb\xbf"):
261 # Source starting with UTF-8 BOM is either UTF-8 or a SyntaxError
262 return "utf-8"
263 # Only the first two lines of the source file are examined
264 magic = _cookie_search("".join(lines[:2]))
265 if magic is None:
266 return _default_source_encoding
267 encoding = magic.group(1)
268 try:
269 codecs.lookup(encoding)
270 except LookupError:
271 # Some codecs raise something other than LookupError if they don't
272 # support the given error handler, but not the text ones that could
273 # actually be used for Python source code
274 return _default_source_encoding
275 return encoding
278 class _EncodingTuple(tuple):
279 """A tuple type that can have an encoding attribute smuggled on"""
282 def _get_source_encoding(filename):
283 """Detect, cache and return the encoding of Python source at filename"""
284 try:
285 return linecache.cache[filename].encoding
286 except (AttributeError, KeyError):
287 encoding = _detect_encoding(linecache.getlines(filename))
288 if filename in linecache.cache:
289 newtuple = _EncodingTuple(linecache.cache[filename])
290 newtuple.encoding = encoding
291 linecache.cache[filename] = newtuple
292 return encoding
295 def _get_exception_encoding():
296 """Return the encoding we expect messages from the OS to be encoded in"""
297 if os.name == "nt":
298 # GZ 2010-05-24: Really want the codepage number instead, the error
299 # handling of standard codecs is more deterministic
300 return "mbcs"
301 # GZ 2010-05-23: We need this call to be after initialisation, but there's
302 # no benefit in asking more than once as it's a global
303 # setting that can change after the message is formatted.
304 return locale.getlocale(locale.LC_MESSAGES)[1] or "ascii"
307 def _exception_to_text(evalue):
308 """Try hard to get a sensible text value out of an exception instance"""
309 try:
310 return unicode(evalue)
311 except KeyboardInterrupt:
312 raise
313 except:
314 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
315 pass
316 try:
317 return str(evalue).decode(_get_exception_encoding(), "replace")
318 except KeyboardInterrupt:
319 raise
320 except:
321 # Apparently this is what traceback._some_str does. Sigh - RBC 20100623
322 pass
323 # Okay, out of ideas, let higher level handle it
324 return None
327 # GZ 2010-05-23: This function is huge and horrible and I welcome suggestions
328 # on the best way to break it up
329 _TB_HEADER = _u('Traceback (most recent call last):\n')
330 def _format_exc_info(eclass, evalue, tb, limit=None):
331 """Format a stack trace and the exception information as unicode
333 Compatibility function for Python 2 which ensures each component of a
334 traceback is correctly decoded according to its origins.
336 Based on traceback.format_exception and related functions.
338 fs_enc = sys.getfilesystemencoding()
339 if tb:
340 list = [_TB_HEADER]
341 extracted_list = []
342 for filename, lineno, name, line in traceback.extract_tb(tb, limit):
343 extracted_list.append((
344 filename.decode(fs_enc, "replace"),
345 lineno,
346 name.decode("ascii", "replace"),
347 line and line.decode(
348 _get_source_encoding(filename), "replace")))
349 list.extend(traceback.format_list(extracted_list))
350 else:
351 list = []
352 if evalue is None:
353 # Is a (deprecated) string exception
354 list.append((eclass + "\n").decode("ascii", "replace"))
355 return list
356 if isinstance(evalue, SyntaxError):
357 # Avoid duplicating the special formatting for SyntaxError here,
358 # instead create a new instance with unicode filename and line
359 # Potentially gives duff spacing, but that's a pre-existing issue
360 try:
361 msg, (filename, lineno, offset, line) = evalue
362 except (TypeError, ValueError):
363 pass # Strange exception instance, fall through to generic code
364 else:
365 # Errors during parsing give the line from buffer encoded as
366 # latin-1 or utf-8 or the encoding of the file depending on the
367 # coding and whether the patch for issue #1031213 is applied, so
368 # give up on trying to decode it and just read the file again
369 if line:
370 bytestr = linecache.getline(filename, lineno)
371 if bytestr:
372 if lineno == 1 and bytestr.startswith("\xef\xbb\xbf"):
373 bytestr = bytestr[3:]
374 line = bytestr.decode(
375 _get_source_encoding(filename), "replace")
376 del linecache.cache[filename]
377 else:
378 line = line.decode("ascii", "replace")
379 if filename:
380 filename = filename.decode(fs_enc, "replace")
381 evalue = eclass(msg, (filename, lineno, offset, line))
382 list.extend(traceback.format_exception_only(eclass, evalue))
383 return list
384 sclass = eclass.__name__
385 svalue = _exception_to_text(evalue)
386 if svalue:
387 list.append("%s: %s\n" % (sclass, svalue))
388 elif svalue is None:
389 # GZ 2010-05-24: Not a great fallback message, but keep for the moment
390 list.append("%s: <unprintable %s object>\n" % (sclass, sclass))
391 else:
392 list.append("%s\n" % sclass)
393 return list