Mask trailing whitespace in test sample.
[docutils.git] / docutils / test / test_io.py
blobe8c8368a947a65ac89a7bef72e4f7421bcead44b
1 #! /usr/bin/env python3
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for `docutils.io`.
9 """
11 import codecs
12 import locale
13 from io import StringIO, BytesIO
14 import os.path
15 from pathlib import Path
16 import sys
17 import unittest
19 if __name__ == '__main__':
20 # prepend the "docutils root" to the Python library path
21 # so we import the local `docutils` package.
22 sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
24 from docutils import io as du_io
26 # DATA_ROOT is ./test/data/ from the docutils root
27 DATA_ROOT = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
29 # normalize the preferred encoding's name:
30 preferredencoding = codecs.lookup(
31 locale.getpreferredencoding(do_setlocale=False)).name
34 # Stub: Buffer with 'strict' auto-conversion of input to byte string:
35 class BBuf(BytesIO):
36 def write(self, data):
37 if isinstance(data, str):
38 data.encode('ascii', 'strict')
39 super().write(data)
42 # Stub: Buffer expecting unicode string:
43 class UBuf(StringIO):
44 def write(self, data):
45 # emulate Python 3 handling of stdout, stderr
46 if isinstance(data, bytes):
47 raise TypeError('must be unicode, not bytes')
48 super().write(data)
51 class mock_stdout(UBuf):
52 encoding = 'utf-8'
54 def __init__(self):
55 self.buffer = BBuf()
56 super().__init__()
59 class HelperTests(unittest.TestCase):
61 def test_check_encoding_true(self):
62 """Return `True` if lookup returns the same codec"""
63 self.assertEqual(True, du_io.check_encoding(mock_stdout, 'utf-8'))
64 self.assertEqual(True, du_io.check_encoding(mock_stdout, 'utf_8'))
65 self.assertEqual(True, du_io.check_encoding(mock_stdout, 'utf8'))
66 self.assertEqual(True, du_io.check_encoding(mock_stdout, 'UTF-8'))
68 def test_check_encoding_false(self):
69 """Return `False` if lookup returns different codecs"""
70 self.assertEqual(False, du_io.check_encoding(mock_stdout, 'ascii'))
71 self.assertEqual(False, du_io.check_encoding(mock_stdout, 'latin-1'))
73 def test_check_encoding_none(self):
74 """Cases where the comparison fails."""
75 # stream.encoding is None:
76 self.assertEqual(None,
77 du_io.check_encoding(du_io.FileInput(), 'ascii'))
78 # stream.encoding does not exist:
79 self.assertEqual(None, du_io.check_encoding(BBuf, 'ascii'))
80 # encoding is None or empty string:
81 self.assertEqual(None, du_io.check_encoding(mock_stdout, None))
82 self.assertEqual(None, du_io.check_encoding(mock_stdout, ''))
83 # encoding is invalid
84 self.assertEqual(None, du_io.check_encoding(mock_stdout, 'UTF-9'))
86 def test_error_string(self):
87 us = '\xfc' # bytes(us) fails
88 bs = b'\xc3\xbc' # str(bs) returns repr(bs)
90 self.assertEqual('Exception: spam',
91 du_io.error_string(Exception('spam')))
92 self.assertEqual('IndexError: ' + str(bs),
93 du_io.error_string(IndexError(bs)))
94 self.assertEqual('ImportError: %s' % us,
95 du_io.error_string(ImportError(us)))
98 class InputTests(unittest.TestCase):
100 def test_bom_handling(self):
101 # Provisional:
102 # default input encoding will change to UTF-8 in Docutils 0.22
103 source = '\ufeffdata\n\ufeff blah\n'
104 expected = 'data\n\ufeff blah\n' # only leading ZWNBSP removed
105 input = du_io.StringInput(source=source.encode('utf-16-be'))
106 self.assertEqual(expected, input.read())
107 input = du_io.StringInput(source=source.encode('utf-16-le'))
108 self.assertEqual(expected, input.read())
109 input = du_io.StringInput(source=source.encode('utf-8'))
110 self.assertEqual(expected, input.read())
111 # With `str` input all ZWNBSPs are still there.
112 input = du_io.StringInput(source=source)
113 self.assertEqual(source, input.read())
115 def test_encoding_declaration(self):
116 input = du_io.StringInput(source=b"""\
117 .. -*- coding: ascii -*-
118 data
119 blah
120 """)
121 data = input.read() # noqa: F841
122 self.assertEqual('ascii', input.successful_encoding)
123 input = du_io.StringInput(source=b"""\
124 #! python
125 # -*- coding: ascii -*-
126 print("hello world")
127 """)
128 data = input.read() # noqa: F841
129 self.assertEqual('ascii', input.successful_encoding)
130 input = du_io.StringInput(source=b"""\
131 #! python
132 # extraneous comment; prevents coding slug from being read
133 # -*- coding: ascii -*-
134 print("hello world")
135 """)
136 self.assertNotEqual(input.successful_encoding, 'ascii')
138 def test_decode_unicode(self):
139 # With the special value "unicode" or "Unicode":
140 uniinput = du_io.Input(encoding='unicode')
141 # keep unicode instances as-is
142 self.assertEqual('ja', uniinput.decode('ja'))
143 # raise AssertionError if data is not a `str` instance
144 with self.assertRaises(AssertionError):
145 uniinput.decode(b'ja')
148 class OutputTests(unittest.TestCase):
150 bdata = b'\xfc'
151 udata = '\xfc'
153 def setUp(self):
154 self.bdrain = BBuf()
155 """Buffer accepting binary strings (bytes)"""
156 self.udrain = UBuf()
157 """Buffer accepting unicode strings"""
158 self.mock_stdout = mock_stdout()
159 """Stub of sys.stdout under Python 3"""
161 def test_write_unicode(self):
162 fo = du_io.FileOutput(destination=self.udrain, encoding='unicode',
163 autoclose=False)
164 fo.write(self.udata)
165 self.assertEqual(self.udata, self.udrain.getvalue())
167 def test_write_utf8(self):
168 fo = du_io.FileOutput(destination=self.udrain, encoding='utf-8',
169 autoclose=False)
170 fo.write(self.udata)
171 self.assertEqual(self.udata, self.udrain.getvalue())
173 def test_FileOutput_hande_io_errors_deprection_warning(self):
174 with self.assertWarnsRegex(DeprecationWarning,
175 '"handle_io_errors" is ignored'):
176 du_io.FileOutput(handle_io_errors=True)
178 # With destination in binary mode, data must be binary string
179 # and is written as-is:
180 def test_write_bytes(self):
181 fo = du_io.FileOutput(destination=self.bdrain, encoding='utf-8',
182 mode='wb', autoclose=False)
183 fo.write(self.bdata)
184 self.assertEqual(self.bdata, self.bdrain.getvalue())
186 def test_write_bytes_to_stdout(self):
187 # try writing data to `destination.buffer`, if data is
188 # instance of `bytes` and writing to `destination` fails:
189 fo = du_io.FileOutput(destination=self.mock_stdout)
190 fo.write(self.bdata)
191 self.assertEqual(self.bdata,
192 self.mock_stdout.buffer.getvalue())
194 def test_encoding_clash_resolved(self):
195 fo = du_io.FileOutput(destination=self.mock_stdout,
196 encoding='latin1', autoclose=False)
197 fo.write(self.udata)
198 self.assertEqual(self.udata.encode('latin1'),
199 self.mock_stdout.buffer.getvalue())
201 def test_encoding_clash_nonresolvable(self):
202 del self.mock_stdout.buffer
203 fo = du_io.FileOutput(destination=self.mock_stdout,
204 encoding='latin1', autoclose=False)
205 self.assertRaises(ValueError, fo.write, self.udata)
208 class ErrorOutputTests(unittest.TestCase):
209 def test_defaults(self):
210 e = du_io.ErrorOutput()
211 self.assertEqual(sys.stderr, e.destination)
213 def test_bbuf(self):
214 buf = BBuf() # buffer storing byte string
215 e = du_io.ErrorOutput(buf, encoding='ascii')
216 # write byte-string as-is
217 e.write(b'b\xfc')
218 self.assertEqual(b'b\xfc', buf.getvalue())
219 # encode unicode data with backslashescape fallback replacement:
220 e.write(' u\xfc')
221 self.assertEqual(b'b\xfc u\\xfc', buf.getvalue())
222 # handle Exceptions with Unicode string args
223 # unicode(Exception('e\xfc')) # fails in Python < 2.6
224 e.write(AttributeError(' e\xfc'))
225 self.assertEqual(b'b\xfc u\\xfc e\\xfc', buf.getvalue())
226 # encode with `encoding` attribute
227 e.encoding = 'utf-8'
228 e.write(' u\xfc')
229 self.assertEqual(b'b\xfc u\\xfc e\\xfc u\xc3\xbc', buf.getvalue())
231 def test_ubuf(self):
232 buf = UBuf() # buffer only accepting unicode string
233 # decode of binary strings
234 e = du_io.ErrorOutput(buf, encoding='ascii')
235 e.write(b'b\xfc')
236 # use REPLACEMENT CHARACTER
237 self.assertEqual(buf.getvalue(), 'b\ufffd')
238 # write Unicode string and Exceptions with Unicode args
239 e.write(' u\xfc')
240 self.assertEqual(buf.getvalue(), 'b\ufffd u\xfc')
241 e.write(AttributeError(' e\xfc'))
242 self.assertEqual(buf.getvalue(), 'b\ufffd u\xfc e\xfc')
243 # decode with `encoding` attribute
244 e.encoding = 'latin1'
245 e.write(b' b\xfc')
246 self.assertEqual(buf.getvalue(), 'b\ufffd u\xfc e\xfc b\xfc')
249 class FileInputTests(unittest.TestCase):
251 # test input encoding auto-detection:
253 # Up to Docutils 0.18, auto-detection was not used under Python 3
254 # unless reading a file with Python's default encoding failed
256 def test_bom_utf_8(self):
257 """Drop optional BOM from utf-8 encoded files.
259 source = du_io.FileInput(
260 source_path=os.path.join(DATA_ROOT, 'utf-8-sig.txt'))
261 self.assertTrue(source.read().startswith('Grüße'))
263 def test_bom_utf_16(self):
264 """Drop BOM from utf-16 encoded files, use correct encoding.
266 # Assert correct decoding, BOM is gone.
267 source = du_io.FileInput(
268 source_path=os.path.join(DATA_ROOT, 'utf-16-le-sig.txt'))
269 self.assertTrue(source.read().startswith('Grüße'))
271 def test_coding_slug(self):
272 """Use self-declared encoding.
274 source = du_io.FileInput(
275 source_path=os.path.join(DATA_ROOT, 'latin2.txt'))
276 self.assertTrue(source.read().endswith('škoda\n'))
278 def test_fallback_utf8(self):
279 """Try 'utf-8', if encoding is not specified in the source."""
280 source = du_io.FileInput(
281 source_path=os.path.join(DATA_ROOT, 'utf8.txt'))
282 self.assertEqual('Grüße\n', source.read())
284 @unittest.skipIf(preferredencoding in (None, 'ascii', 'utf-8'),
285 'locale encoding not set or UTF-8')
286 def test_fallback_no_utf8(self):
287 # If no encoding is given and decoding with 'utf-8' fails,
288 # use the locale's preferred encoding (if not None).
289 # Provisional: the default will become 'utf-8'
290 # (without auto-detection and fallback) in Docutils 0.22.
291 source = du_io.FileInput(
292 source_path=os.path.join(DATA_ROOT, 'latin1.txt'))
293 data = source.read()
294 successful_encoding = codecs.lookup(source.successful_encoding).name
295 self.assertEqual(preferredencoding, successful_encoding)
296 if successful_encoding == 'iso8859-1':
297 self.assertEqual('Grüße\n', data)
299 def test_readlines(self):
300 source = du_io.FileInput(
301 source_path=os.path.join(DATA_ROOT, 'include.txt'))
302 data = source.readlines()
303 self.assertEqual(['Some include text.\n'], data)
306 if __name__ == '__main__':
307 unittest.main()