1 #! /usr/bin/env python3
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
8 Test module for `docutils.io`.
13 from io
import StringIO
, BytesIO
15 from pathlib
import Path
19 if __name__
== '__main__':
20 # prepend the "docutils root" to the Python library path
21 # so we import the local `docutils` package.
22 sys
.path
.insert(0, str(Path(__file__
).resolve().parents
[1]))
24 from docutils
import io
as du_io
26 # DATA_ROOT is ./test/data/ from the docutils root
27 DATA_ROOT
= os
.path
.join(os
.path
.abspath(os
.path
.dirname(__file__
)), 'data')
29 # normalize the preferred encoding's name:
30 preferredencoding
= codecs
.lookup(
31 locale
.getpreferredencoding(do_setlocale
=False)).name
34 # Stub: Buffer with 'strict' auto-conversion of input to byte string:
36 def write(self
, data
):
37 if isinstance(data
, str):
38 data
.encode('ascii', 'strict')
42 # Stub: Buffer expecting unicode string:
44 def write(self
, data
):
45 # emulate Python 3 handling of stdout, stderr
46 if isinstance(data
, bytes
):
47 raise TypeError('must be unicode, not bytes')
51 class mock_stdout(UBuf
):
59 class HelperTests(unittest
.TestCase
):
61 def test_check_encoding_true(self
):
62 """Return `True` if lookup returns the same codec"""
63 self
.assertEqual(True, du_io
.check_encoding(mock_stdout
, 'utf-8'))
64 self
.assertEqual(True, du_io
.check_encoding(mock_stdout
, 'utf_8'))
65 self
.assertEqual(True, du_io
.check_encoding(mock_stdout
, 'utf8'))
66 self
.assertEqual(True, du_io
.check_encoding(mock_stdout
, 'UTF-8'))
68 def test_check_encoding_false(self
):
69 """Return `False` if lookup returns different codecs"""
70 self
.assertEqual(False, du_io
.check_encoding(mock_stdout
, 'ascii'))
71 self
.assertEqual(False, du_io
.check_encoding(mock_stdout
, 'latin-1'))
73 def test_check_encoding_none(self
):
74 """Cases where the comparison fails."""
75 # stream.encoding is None:
76 self
.assertEqual(None,
77 du_io
.check_encoding(du_io
.FileInput(), 'ascii'))
78 # stream.encoding does not exist:
79 self
.assertEqual(None, du_io
.check_encoding(BBuf
, 'ascii'))
80 # encoding is None or empty string:
81 self
.assertEqual(None, du_io
.check_encoding(mock_stdout
, None))
82 self
.assertEqual(None, du_io
.check_encoding(mock_stdout
, ''))
84 self
.assertEqual(None, du_io
.check_encoding(mock_stdout
, 'UTF-9'))
86 def test_error_string(self
):
87 us
= '\xfc' # bytes(us) fails
88 bs
= b
'\xc3\xbc' # str(bs) returns repr(bs)
90 self
.assertEqual('Exception: spam',
91 du_io
.error_string(Exception('spam')))
92 self
.assertEqual('IndexError: ' + str(bs
),
93 du_io
.error_string(IndexError(bs
)))
94 self
.assertEqual('ImportError: %s' % us
,
95 du_io
.error_string(ImportError(us
)))
98 class InputTests(unittest
.TestCase
):
100 def test_bom_handling(self
):
102 # default input encoding will change to UTF-8 in Docutils 0.22
103 source
= '\ufeffdata\n\ufeff blah\n'
104 expected
= 'data\n\ufeff blah\n' # only leading ZWNBSP removed
105 input = du_io
.StringInput(source
=source
.encode('utf-16-be'))
106 self
.assertEqual(expected
, input.read())
107 input = du_io
.StringInput(source
=source
.encode('utf-16-le'))
108 self
.assertEqual(expected
, input.read())
109 input = du_io
.StringInput(source
=source
.encode('utf-8'))
110 self
.assertEqual(expected
, input.read())
111 # With `str` input all ZWNBSPs are still there.
112 input = du_io
.StringInput(source
=source
)
113 self
.assertEqual(source
, input.read())
115 def test_encoding_declaration(self
):
116 input = du_io
.StringInput(source
=b
"""\
117 .. -*- coding: ascii -*-
121 data
= input.read() # noqa: F841
122 self
.assertEqual('ascii', input.successful_encoding
)
123 input = du_io
.StringInput(source
=b
"""\
125 # -*- coding: ascii -*-
128 data
= input.read() # noqa: F841
129 self
.assertEqual('ascii', input.successful_encoding
)
130 input = du_io
.StringInput(source
=b
"""\
132 # extraneous comment; prevents coding slug from being read
133 # -*- coding: ascii -*-
136 self
.assertNotEqual(input.successful_encoding
, 'ascii')
138 def test_decode_unicode(self
):
139 # With the special value "unicode" or "Unicode":
140 uniinput
= du_io
.Input(encoding
='unicode')
141 # keep unicode instances as-is
142 self
.assertEqual('ja', uniinput
.decode('ja'))
143 # raise AssertionError if data is not a `str` instance
144 with self
.assertRaises(AssertionError):
145 uniinput
.decode(b
'ja')
148 class OutputTests(unittest
.TestCase
):
155 """Buffer accepting binary strings (bytes)"""
157 """Buffer accepting unicode strings"""
158 self
.mock_stdout
= mock_stdout()
159 """Stub of sys.stdout under Python 3"""
161 def test_write_unicode(self
):
162 fo
= du_io
.FileOutput(destination
=self
.udrain
, encoding
='unicode',
165 self
.assertEqual(self
.udata
, self
.udrain
.getvalue())
167 def test_write_utf8(self
):
168 fo
= du_io
.FileOutput(destination
=self
.udrain
, encoding
='utf-8',
171 self
.assertEqual(self
.udata
, self
.udrain
.getvalue())
173 def test_FileOutput_hande_io_errors_deprection_warning(self
):
174 with self
.assertWarnsRegex(DeprecationWarning,
175 '"handle_io_errors" is ignored'):
176 du_io
.FileOutput(handle_io_errors
=True)
178 # With destination in binary mode, data must be binary string
179 # and is written as-is:
180 def test_write_bytes(self
):
181 fo
= du_io
.FileOutput(destination
=self
.bdrain
, encoding
='utf-8',
182 mode
='wb', autoclose
=False)
184 self
.assertEqual(self
.bdata
, self
.bdrain
.getvalue())
186 def test_write_bytes_to_stdout(self
):
187 # try writing data to `destination.buffer`, if data is
188 # instance of `bytes` and writing to `destination` fails:
189 fo
= du_io
.FileOutput(destination
=self
.mock_stdout
)
191 self
.assertEqual(self
.bdata
,
192 self
.mock_stdout
.buffer.getvalue())
194 def test_encoding_clash_resolved(self
):
195 fo
= du_io
.FileOutput(destination
=self
.mock_stdout
,
196 encoding
='latin1', autoclose
=False)
198 self
.assertEqual(self
.udata
.encode('latin1'),
199 self
.mock_stdout
.buffer.getvalue())
201 def test_encoding_clash_nonresolvable(self
):
202 del self
.mock_stdout
.buffer
203 fo
= du_io
.FileOutput(destination
=self
.mock_stdout
,
204 encoding
='latin1', autoclose
=False)
205 self
.assertRaises(ValueError, fo
.write
, self
.udata
)
208 class ErrorOutputTests(unittest
.TestCase
):
209 def test_defaults(self
):
210 e
= du_io
.ErrorOutput()
211 self
.assertEqual(sys
.stderr
, e
.destination
)
214 buf
= BBuf() # buffer storing byte string
215 e
= du_io
.ErrorOutput(buf
, encoding
='ascii')
216 # write byte-string as-is
218 self
.assertEqual(b
'b\xfc', buf
.getvalue())
219 # encode unicode data with backslashescape fallback replacement:
221 self
.assertEqual(b
'b\xfc u\\xfc', buf
.getvalue())
222 # handle Exceptions with Unicode string args
223 # unicode(Exception('e\xfc')) # fails in Python < 2.6
224 e
.write(AttributeError(' e\xfc'))
225 self
.assertEqual(b
'b\xfc u\\xfc e\\xfc', buf
.getvalue())
226 # encode with `encoding` attribute
229 self
.assertEqual(b
'b\xfc u\\xfc e\\xfc u\xc3\xbc', buf
.getvalue())
232 buf
= UBuf() # buffer only accepting unicode string
233 # decode of binary strings
234 e
= du_io
.ErrorOutput(buf
, encoding
='ascii')
236 # use REPLACEMENT CHARACTER
237 self
.assertEqual(buf
.getvalue(), 'b\ufffd')
238 # write Unicode string and Exceptions with Unicode args
240 self
.assertEqual(buf
.getvalue(), 'b\ufffd u\xfc')
241 e
.write(AttributeError(' e\xfc'))
242 self
.assertEqual(buf
.getvalue(), 'b\ufffd u\xfc e\xfc')
243 # decode with `encoding` attribute
244 e
.encoding
= 'latin1'
246 self
.assertEqual(buf
.getvalue(), 'b\ufffd u\xfc e\xfc b\xfc')
249 class FileInputTests(unittest
.TestCase
):
251 # test input encoding auto-detection:
253 # Up to Docutils 0.18, auto-detection was not used under Python 3
254 # unless reading a file with Python's default encoding failed
256 def test_bom_utf_8(self
):
257 """Drop optional BOM from utf-8 encoded files.
259 source
= du_io
.FileInput(
260 source_path
=os
.path
.join(DATA_ROOT
, 'utf-8-sig.txt'))
261 self
.assertTrue(source
.read().startswith('Grüße'))
263 def test_bom_utf_16(self
):
264 """Drop BOM from utf-16 encoded files, use correct encoding.
266 # Assert correct decoding, BOM is gone.
267 source
= du_io
.FileInput(
268 source_path
=os
.path
.join(DATA_ROOT
, 'utf-16-le-sig.txt'))
269 self
.assertTrue(source
.read().startswith('Grüße'))
271 def test_coding_slug(self
):
272 """Use self-declared encoding.
274 source
= du_io
.FileInput(
275 source_path
=os
.path
.join(DATA_ROOT
, 'latin2.txt'))
276 self
.assertTrue(source
.read().endswith('škoda\n'))
278 def test_fallback_utf8(self
):
279 """Try 'utf-8', if encoding is not specified in the source."""
280 source
= du_io
.FileInput(
281 source_path
=os
.path
.join(DATA_ROOT
, 'utf8.txt'))
282 self
.assertEqual('Grüße\n', source
.read())
284 @unittest.skipIf(preferredencoding
in (None, 'ascii', 'utf-8'),
285 'locale encoding not set or UTF-8')
286 def test_fallback_no_utf8(self
):
287 # If no encoding is given and decoding with 'utf-8' fails,
288 # use the locale's preferred encoding (if not None).
289 # Provisional: the default will become 'utf-8'
290 # (without auto-detection and fallback) in Docutils 0.22.
291 source
= du_io
.FileInput(
292 source_path
=os
.path
.join(DATA_ROOT
, 'latin1.txt'))
294 successful_encoding
= codecs
.lookup(source
.successful_encoding
).name
295 self
.assertEqual(preferredencoding
, successful_encoding
)
296 if successful_encoding
== 'iso8859-1':
297 self
.assertEqual('Grüße\n', data
)
299 def test_readlines(self
):
300 source
= du_io
.FileInput(
301 source_path
=os
.path
.join(DATA_ROOT
, 'include.txt'))
302 data
= source
.readlines()
303 self
.assertEqual(['Some include text.\n'], data
)
306 if __name__
== '__main__':