4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
12 import DocutilsTestSupport
# must be imported before docutils
13 from docutils
import io
14 from docutils
._compat
import b
, bytes
15 from docutils
.error_reporting
import locale_encoding
16 from test_error_reporting
import BBuf
, UBuf
19 if not hasattr(unittest
.TestCase
, "assertTrue"):
20 assertTrue
= unittest
.TestCase
.failUnless
22 class mock_stdout(UBuf
):
29 class HelperTests(unittest
.TestCase
):
31 def test_check_encoding_true(self
):
32 """Return `True` if lookup returns the same codec"""
33 self
.assertEqual(io
.check_encoding(mock_stdout
, 'utf8'), True)
34 self
.assertEqual(io
.check_encoding(mock_stdout
, 'utf-8'), True)
35 self
.assertEqual(io
.check_encoding(mock_stdout
, 'UTF-8'), True)
37 def test_check_encoding_false(self
):
38 """Return `False` if lookup returns different codecs"""
39 self
.assertEqual(io
.check_encoding(mock_stdout
, 'ascii'), False)
40 self
.assertEqual(io
.check_encoding(mock_stdout
, 'latin-1'), False)
42 def test_check_encoding_none(self
):
43 """Cases where the comparison fails."""
44 # stream.encoding is None:
45 self
.assertEqual(io
.check_encoding(io
.FileInput(), 'ascii'), None)
46 # stream.encoding does not exist:
47 self
.assertEqual(io
.check_encoding(BBuf
, 'ascii'), None)
49 self
.assertEqual(io
.check_encoding(mock_stdout
, None), None)
51 self
.assertEqual(io
.check_encoding(mock_stdout
, 'UTF-9'), None)
54 class InputTests(unittest
.TestCase
):
57 input = io
.StringInput(source
=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
59 # Assert BOMs are gone.
60 self
.assertEqual(input.read(), u
' foo bar')
62 input = io
.StringInput(source
=u
'\ufeff foo \ufeff bar')
63 # Assert BOMs are still there.
64 self
.assertEqual(input.read(), u
'\ufeff foo \ufeff bar')
66 def test_coding_slug(self
):
67 input = io
.StringInput(source
=b("""\
68 .. -*- coding: ascii -*-
73 self
.assertEqual(input.successful_encoding
, 'ascii')
74 input = io
.StringInput(source
=b("""\
76 # -*- coding: ascii -*-
80 self
.assertEqual(input.successful_encoding
, 'ascii')
81 input = io
.StringInput(source
=b("""\
83 # extraneous comment; prevents coding slug from being read
84 # -*- coding: ascii -*-
88 self
.assertNotEqual(input.successful_encoding
, 'ascii')
90 def test_bom_detection(self
):
91 source
= u
'\ufeffdata\nblah\n'
92 input = io
.StringInput(source
=source
.encode('utf-16-be'))
94 self
.assertEqual(input.successful_encoding
, 'utf-16-be')
95 input = io
.StringInput(source
=source
.encode('utf-16-le'))
97 self
.assertEqual(input.successful_encoding
, 'utf-16-le')
98 input = io
.StringInput(source
=source
.encode('utf-8'))
100 self
.assertEqual(input.successful_encoding
, 'utf-8')
102 def test_readlines(self
):
103 input = io
.FileInput(source_path
='data/include.txt')
104 data
= input.readlines()
105 self
.assertEqual(data
, [u
'Some include text.\n'])
107 def test_heuristics_utf8(self
):
108 # if no encoding is given, try decoding with utf8:
109 input = io
.FileInput(source_path
='functional/input/cyrillic.txt')
111 if sys
.version_info
< (3,0):
112 # in Py3k, the locale encoding is used without --input-encoding
113 # skipping the heuristic
114 self
.assertEqual(input.successful_encoding
, 'utf-8')
116 def test_heuristics_no_utf8(self
):
117 # if no encoding is given and decoding with utf8 fails,
118 # use either the locale encoding (if specified) or latin1:
119 input = io
.FileInput(source_path
='data/latin1.txt')
121 self
.assertTrue(input.successful_encoding
in (locale_encoding
,
123 if input.successful_encoding
== 'latin-1':
124 self
.assertEqual(data
, u
'Gr\xfc\xdfe\n')
127 class OutputTests(unittest
.TestCase
):
134 """Buffer accepting binary strings (bytes)"""
136 """Buffer accepting unicode strings"""
137 self
.mock_stdout
= mock_stdout()
138 """Stub of sys.stdout under Python 3"""
140 def test_write_unicode(self
):
141 fo
= io
.FileOutput(destination
=self
.udrain
, encoding
='unicode',
144 self
.assertEqual(self
.udrain
.getvalue(), self
.udata
)
146 def test_write_utf8(self
):
147 if sys
.version_info
>= (3,0):
148 fo
= io
.FileOutput(destination
=self
.udrain
, encoding
='utf8',
151 self
.assertEqual(self
.udrain
.getvalue(), self
.udata
)
153 fo
= io
.FileOutput(destination
=self
.bdrain
, encoding
='utf8',
156 self
.assertEqual(self
.bdrain
.getvalue(), self
.udata
.encode('utf8'))
158 # With destination in binary mode, data must be binary string
159 # and is written as-is:
160 def test_write_bytes(self
):
161 fo
= io
.FileOutput(destination
=self
.bdrain
, encoding
='utf8',
162 mode
='wb', autoclose
=False)
164 self
.assertEqual(self
.bdrain
.getvalue(), self
.bdata
)
166 # Test for Python 3 features:
167 if sys
.version_info
>= (3,0):
168 def test_write_bytes_to_stdout(self
):
169 # binary data is written to destination.buffer, if the
170 # destination is sys.stdout or sys.stdin
172 sys
.stdout
= self
.mock_stdout
173 fo
= io
.FileOutput(destination
=sys
.stdout
, mode
='wb',
176 self
.assertEqual(self
.mock_stdout
.buffer.getvalue(),
180 def test_encoding_clash(self
):
181 # Raise error, if given and destination encodings differ
182 # TODO: try the `write to .buffer` scheme instead?
183 self
.assertRaises(ValueError,
184 io
.FileOutput
, destination
=self
.mock_stdout
,
188 if __name__
== '__main__':