Formatting changes to facilitate integration of "py3" patchset.
[docutils.git] / docutils / test / test_io.py
blob5f7f814e3d2d3921c647090d0b67c95b73f5611d
1 #! /usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for io.py.
9 """
11 import unittest, sys
12 import DocutilsTestSupport # must be imported before docutils
13 from docutils import io
14 from docutils.utils.error_reporting import locale_encoding
15 from test_error_reporting import BBuf, UBuf
17 class mock_stdout(UBuf):
18 encoding = 'utf8'
20 def __init__(self):
21 self.buffer = BBuf()
22 UBuf.__init__(self)
24 class HelperTests(unittest.TestCase):
26 def test_check_encoding_true(self):
27 """Return `True` if lookup returns the same codec"""
28 self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
29 self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
30 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
32 def test_check_encoding_false(self):
33 """Return `False` if lookup returns different codecs"""
34 self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False)
35 self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False)
37 def test_check_encoding_none(self):
38 """Cases where the comparison fails."""
39 # stream.encoding is None:
40 self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None)
41 # stream.encoding does not exist:
42 self.assertEqual(io.check_encoding(BBuf, 'ascii'), None)
43 # encoding is None:
44 self.assertEqual(io.check_encoding(mock_stdout, None), None)
45 # encoding is invalid
46 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None)
49 class InputTests(unittest.TestCase):
51 def test_bom(self):
52 input = io.StringInput(source=b'\xef\xbb\xbf foo \xef\xbb\xbf bar',
53 encoding='utf8')
54 # Assert BOMs are gone.
55 self.assertEqual(input.read(), u' foo bar')
56 # With unicode input:
57 input = io.StringInput(source=u'\ufeff foo \ufeff bar')
58 # Assert BOMs are still there.
59 self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
61 def test_coding_slug(self):
62 input = io.StringInput(source=b"""\
63 .. -*- coding: ascii -*-
64 data
65 blah
66 """)
67 data = input.read()
68 self.assertEqual(input.successful_encoding, 'ascii')
69 input = io.StringInput(source=b"""\
70 #! python
71 # -*- coding: ascii -*-
72 print("hello world")
73 """)
74 data = input.read()
75 self.assertEqual(input.successful_encoding, 'ascii')
76 input = io.StringInput(source=b"""\
77 #! python
78 # extraneous comment; prevents coding slug from being read
79 # -*- coding: ascii -*-
80 print("hello world")
81 """)
82 data = input.read()
83 self.assertNotEqual(input.successful_encoding, 'ascii')
85 def test_bom_detection(self):
86 source = u'\ufeffdata\nblah\n'
87 input = io.StringInput(source=source.encode('utf-16-be'))
88 data = input.read()
89 self.assertEqual(input.successful_encoding, 'utf-16-be')
90 input = io.StringInput(source=source.encode('utf-16-le'))
91 data = input.read()
92 self.assertEqual(input.successful_encoding, 'utf-16-le')
93 input = io.StringInput(source=source.encode('utf-8'))
94 data = input.read()
95 self.assertEqual(input.successful_encoding, 'utf-8')
97 def test_readlines(self):
98 input = io.FileInput(source_path='data/include.txt')
99 data = input.readlines()
100 self.assertEqual(data, [u'Some include text.\n'])
102 def test_heuristics_utf8(self):
103 # if no encoding is given, try decoding with utf8:
104 input = io.FileInput(source_path='functional/input/cyrillic.txt')
105 data = input.read()
106 if sys.version_info < (3, 0):
107 # in Py3k, the locale encoding is used without --input-encoding
108 # skipping the heuristic
109 self.assertEqual(input.successful_encoding, 'utf-8')
111 def test_heuristics_no_utf8(self):
112 # if no encoding is given and decoding with utf8 fails,
113 # use either the locale encoding (if specified) or latin-1:
114 if sys.version_info >= (3, 0) and locale_encoding != "utf8":
115 # in Py3k, the locale encoding is used without --input-encoding
116 # skipping the heuristic unless decoding fails.
117 return
118 probed_encodings = (locale_encoding, 'latin-1')
119 input = io.FileInput(source_path='data/latin1.txt')
120 data = input.read()
121 if input.successful_encoding not in probed_encodings:
122 raise AssertionError(
123 "guessed encoding '%s' differs from probed encodings %r"
124 % (input.successful_encoding, probed_encodings))
125 if input.successful_encoding == 'latin-1':
126 self.assertEqual(data, u'Gr\xfc\xdfe\n')
128 def test_decode_unicode(self):
129 # With the special value "unicode" or "Unicode":
130 uniinput = io.Input(encoding='unicode')
131 # keep unicode instances as-is
132 self.assertEqual(uniinput.decode(u'ja'), u'ja')
133 # raise AssertionError if data is not an unicode string
134 self.assertRaises(AssertionError, uniinput.decode, b'ja')
136 def test_deprecation_warning(self):
137 # Test deprecation warning of 'U' universal newlines mode.
138 # TODO remove with 3.4 support end
140 # Arrange
141 import warnings
142 with warnings.catch_warnings(record=True) as w:
143 # Cause all warnings to always be triggered
144 warnings.simplefilter("always", DeprecationWarning)
146 # Act
147 # Trigger a warning?
148 io.FileInput(source_path='data/include.txt').close()
150 # Assert
151 self.assertEqual(len(w), 0, "Expected no warnings, got %s" %
152 list(v.category for v in w))
155 class OutputTests(unittest.TestCase):
157 bdata = b'\xfc'
158 udata = u'\xfc'
160 def setUp(self):
161 self.bdrain = BBuf()
162 """Buffer accepting binary strings (bytes)"""
163 self.udrain = UBuf()
164 """Buffer accepting unicode strings"""
165 self.mock_stdout = mock_stdout()
166 """Stub of sys.stdout under Python 3"""
168 def test_write_unicode(self):
169 fo = io.FileOutput(destination=self.udrain, encoding='unicode',
170 autoclose=False)
171 fo.write(self.udata)
172 self.assertEqual(self.udrain.getvalue(), self.udata)
174 def test_write_utf8(self):
175 if sys.version_info >= (3, 0):
176 fo = io.FileOutput(destination=self.udrain, encoding='utf8',
177 autoclose=False)
178 fo.write(self.udata)
179 self.assertEqual(self.udrain.getvalue(), self.udata)
180 else:
181 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
182 autoclose=False)
183 fo.write(self.udata)
184 self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8'))
186 # With destination in binary mode, data must be binary string
187 # and is written as-is:
188 def test_write_bytes(self):
189 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
190 mode='wb', autoclose=False)
191 fo.write(self.bdata)
192 self.assertEqual(self.bdrain.getvalue(), self.bdata)
194 # Test for Python 3 features:
195 if sys.version_info >= (3, 0):
196 def test_write_bytes_to_stdout(self):
197 # try writing data to `destination.buffer`, if data is
198 # instance of `bytes` and writing to `destination` fails:
199 fo = io.FileOutput(destination=self.mock_stdout)
200 fo.write(self.bdata)
201 self.assertEqual(self.mock_stdout.buffer.getvalue(),
202 self.bdata)
204 def test_encoding_clash_resolved(self):
205 fo = io.FileOutput(destination=self.mock_stdout,
206 encoding='latin1', autoclose=False)
207 fo.write(self.udata)
208 self.assertEqual(self.mock_stdout.buffer.getvalue(),
209 self.udata.encode('latin1'))
211 def test_encoding_clash_nonresolvable(self):
212 del(self.mock_stdout.buffer)
213 fo = io.FileOutput(destination=self.mock_stdout,
214 encoding='latin1', autoclose=False)
215 self.assertRaises(ValueError, fo.write, self.udata)
218 if __name__ == '__main__':
219 unittest.main()