Fixup: more save implementation of binary data output under Python 3.
[docutils.git] / test / test_io.py
blob1d213ce7a1e5276668466d2c2eef0103b8980355
1 #! /usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for io.py.
9 """
11 import unittest, sys
12 import DocutilsTestSupport # must be imported before docutils
13 from docutils import io
14 from docutils._compat import b, bytes
15 from docutils.error_reporting import locale_encoding
16 from test_error_reporting import BBuf, UBuf
18 # python 2.3
19 if not hasattr(unittest.TestCase, "assertTrue"):
20 assertTrue = unittest.TestCase.failUnless
22 class mock_stdout(UBuf):
23 encoding = 'utf8'
25 def __init__(self):
26 self.buffer = BBuf()
27 UBuf.__init__(self)
29 class HelperTests(unittest.TestCase):
31 def test_check_encoding_true(self):
32 """Return `True` if lookup returns the same codec"""
33 self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
34 self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
35 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
37 def test_check_encoding_false(self):
38 """Return `False` if lookup returns different codecs"""
39 self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False)
40 self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False)
42 def test_check_encoding_none(self):
43 """Cases where the comparison fails."""
44 # stream.encoding is None:
45 self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None)
46 # stream.encoding does not exist:
47 self.assertEqual(io.check_encoding(BBuf, 'ascii'), None)
48 # encoding is None:
49 self.assertEqual(io.check_encoding(mock_stdout, None), None)
50 # encoding is invalid
51 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None)
54 class InputTests(unittest.TestCase):
56 def test_bom(self):
57 input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
58 encoding='utf8')
59 # Assert BOMs are gone.
60 self.assertEqual(input.read(), u' foo bar')
61 # With unicode input:
62 input = io.StringInput(source=u'\ufeff foo \ufeff bar')
63 # Assert BOMs are still there.
64 self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
66 def test_coding_slug(self):
67 input = io.StringInput(source=b("""\
68 .. -*- coding: ascii -*-
69 data
70 blah
71 """))
72 data = input.read()
73 self.assertEqual(input.successful_encoding, 'ascii')
74 input = io.StringInput(source=b("""\
75 #! python
76 # -*- coding: ascii -*-
77 print "hello world"
78 """))
79 data = input.read()
80 self.assertEqual(input.successful_encoding, 'ascii')
81 input = io.StringInput(source=b("""\
82 #! python
83 # extraneous comment; prevents coding slug from being read
84 # -*- coding: ascii -*-
85 print "hello world"
86 """))
87 data = input.read()
88 self.assertNotEqual(input.successful_encoding, 'ascii')
90 def test_bom_detection(self):
91 source = u'\ufeffdata\nblah\n'
92 input = io.StringInput(source=source.encode('utf-16-be'))
93 data = input.read()
94 self.assertEqual(input.successful_encoding, 'utf-16-be')
95 input = io.StringInput(source=source.encode('utf-16-le'))
96 data = input.read()
97 self.assertEqual(input.successful_encoding, 'utf-16-le')
98 input = io.StringInput(source=source.encode('utf-8'))
99 data = input.read()
100 self.assertEqual(input.successful_encoding, 'utf-8')
102 def test_readlines(self):
103 input = io.FileInput(source_path='data/include.txt')
104 data = input.readlines()
105 self.assertEqual(data, [u'Some include text.\n'])
107 def test_heuristics_utf8(self):
108 # if no encoding is given, try decoding with utf8:
109 input = io.FileInput(source_path='functional/input/cyrillic.txt')
110 data = input.read()
111 if sys.version_info < (3,0):
112 # in Py3k, the locale encoding is used without --input-encoding
113 # skipping the heuristic
114 self.assertEqual(input.successful_encoding, 'utf-8')
116 def test_heuristics_no_utf8(self):
117 # if no encoding is given and decoding with utf8 fails,
118 # use either the locale encoding (if specified) or latin1:
119 input = io.FileInput(source_path='data/latin1.txt')
120 data = input.read()
121 self.assertTrue(input.successful_encoding in (locale_encoding,
122 'latin-1'))
123 if input.successful_encoding == 'latin-1':
124 self.assertEqual(data, u'Gr\xfc\xdfe\n')
127 class OutputTests(unittest.TestCase):
129 bdata = b('\xfc')
130 udata = u'\xfc'
132 def setUp(self):
133 self.bdrain = BBuf()
134 """Buffer accepting binary strings (bytes)"""
135 self.udrain = UBuf()
136 """Buffer accepting unicode strings"""
137 self.mock_stdout = mock_stdout()
138 """Stub of sys.stdout under Python 3"""
140 def test_write_unicode(self):
141 fo = io.FileOutput(destination=self.udrain, encoding='unicode',
142 autoclose=False)
143 fo.write(self.udata)
144 self.assertEqual(self.udrain.getvalue(), self.udata)
146 def test_write_utf8(self):
147 if sys.version_info >= (3,0):
148 fo = io.FileOutput(destination=self.udrain, encoding='utf8',
149 autoclose=False)
150 fo.write(self.udata)
151 self.assertEqual(self.udrain.getvalue(), self.udata)
152 else:
153 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
154 autoclose=False)
155 fo.write(self.udata)
156 self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8'))
158 # With destination in binary mode, data must be binary string
159 # and is written as-is:
160 def test_write_bytes(self):
161 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
162 mode='wb', autoclose=False)
163 fo.write(self.bdata)
164 self.assertEqual(self.bdrain.getvalue(), self.bdata)
166 # Test for Python 3 features:
167 if sys.version_info >= (3,0):
168 def test_write_bytes_to_stdout(self):
169 # binary data is written to destination.buffer, if the
170 # destination is sys.stdout or sys.stdin
171 backup = sys.stdout
172 sys.stdout = self.mock_stdout
173 fo = io.FileOutput(destination=sys.stdout, mode='wb',
174 autoclose=False)
175 fo.write(self.bdata)
176 self.assertEqual(self.mock_stdout.buffer.getvalue(),
177 self.bdata)
178 sys.stdout = backup
180 def test_encoding_clash(self):
181 # Raise error, if given and destination encodings differ
182 # TODO: try the `write to .buffer` scheme instead?
183 self.assertRaises(ValueError,
184 io.FileOutput, destination=self.mock_stdout,
185 encoding='latin1')
188 if __name__ == '__main__':
189 unittest.main()