4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
12 import DocutilsTestSupport
# must be imported before docutils
13 from docutils
import io
14 from docutils
._compat
import b
, bytes
15 from docutils
.utils
.error_reporting
import locale_encoding
16 from test_error_reporting
import BBuf
, UBuf
18 class mock_stdout(UBuf
):
25 class HelperTests(unittest
.TestCase
):
27 def test_check_encoding_true(self
):
28 """Return `True` if lookup returns the same codec"""
29 self
.assertEqual(io
.check_encoding(mock_stdout
, 'utf8'), True)
30 self
.assertEqual(io
.check_encoding(mock_stdout
, 'utf-8'), True)
31 self
.assertEqual(io
.check_encoding(mock_stdout
, 'UTF-8'), True)
33 def test_check_encoding_false(self
):
34 """Return `False` if lookup returns different codecs"""
35 self
.assertEqual(io
.check_encoding(mock_stdout
, 'ascii'), False)
36 self
.assertEqual(io
.check_encoding(mock_stdout
, 'latin-1'), False)
38 def test_check_encoding_none(self
):
39 """Cases where the comparison fails."""
40 # stream.encoding is None:
41 self
.assertEqual(io
.check_encoding(io
.FileInput(), 'ascii'), None)
42 # stream.encoding does not exist:
43 self
.assertEqual(io
.check_encoding(BBuf
, 'ascii'), None)
45 self
.assertEqual(io
.check_encoding(mock_stdout
, None), None)
47 self
.assertEqual(io
.check_encoding(mock_stdout
, 'UTF-9'), None)
50 class InputTests(unittest
.TestCase
):
53 input = io
.StringInput(source
=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
55 # Assert BOMs are gone.
56 self
.assertEqual(input.read(), u
' foo bar')
58 input = io
.StringInput(source
=u
'\ufeff foo \ufeff bar')
59 # Assert BOMs are still there.
60 self
.assertEqual(input.read(), u
'\ufeff foo \ufeff bar')
62 def test_coding_slug(self
):
63 input = io
.StringInput(source
=b("""\
64 .. -*- coding: ascii -*-
69 self
.assertEqual(input.successful_encoding
, 'ascii')
70 input = io
.StringInput(source
=b("""\
72 # -*- coding: ascii -*-
76 self
.assertEqual(input.successful_encoding
, 'ascii')
77 input = io
.StringInput(source
=b("""\
79 # extraneous comment; prevents coding slug from being read
80 # -*- coding: ascii -*-
84 self
.assertNotEqual(input.successful_encoding
, 'ascii')
86 def test_bom_detection(self
):
87 source
= u
'\ufeffdata\nblah\n'
88 input = io
.StringInput(source
=source
.encode('utf-16-be'))
90 self
.assertEqual(input.successful_encoding
, 'utf-16-be')
91 input = io
.StringInput(source
=source
.encode('utf-16-le'))
93 self
.assertEqual(input.successful_encoding
, 'utf-16-le')
94 input = io
.StringInput(source
=source
.encode('utf-8'))
96 self
.assertEqual(input.successful_encoding
, 'utf-8')
98 def test_readlines(self
):
99 input = io
.FileInput(source_path
='data/include.txt')
100 data
= input.readlines()
101 self
.assertEqual(data
, [u
'Some include text.\n'])
103 def test_heuristics_utf8(self
):
104 # if no encoding is given, try decoding with utf8:
105 input = io
.FileInput(source_path
='functional/input/cyrillic.txt')
107 if sys
.version_info
< (3,0):
108 # in Py3k, the locale encoding is used without --input-encoding
109 # skipping the heuristic
110 self
.assertEqual(input.successful_encoding
, 'utf-8')
112 def test_heuristics_no_utf8(self
):
113 # if no encoding is given and decoding with utf8 fails,
114 # use either the locale encoding (if specified) or latin1:
115 input = io
.FileInput(source_path
='data/latin1.txt')
117 self
.assertTrue(input.successful_encoding
in (locale_encoding
,
119 if input.successful_encoding
== 'latin-1':
120 self
.assertEqual(data
, u
'Gr\xfc\xdfe\n')
122 def test_decode_unicode(self
):
123 # With the special value "unicode" or "Unicode":
124 uniinput
= io
.Input(encoding
='unicode')
125 # keep unicode instances as-is
126 self
.assertEqual(uniinput
.decode(u
'ja'), u
'ja')
127 # raise AssertionError if data is not an unicode string
128 self
.assertRaises(AssertionError, uniinput
.decode
, b('ja'))
131 class OutputTests(unittest
.TestCase
):
138 """Buffer accepting binary strings (bytes)"""
140 """Buffer accepting unicode strings"""
141 self
.mock_stdout
= mock_stdout()
142 """Stub of sys.stdout under Python 3"""
144 def test_write_unicode(self
):
145 fo
= io
.FileOutput(destination
=self
.udrain
, encoding
='unicode',
148 self
.assertEqual(self
.udrain
.getvalue(), self
.udata
)
150 def test_write_utf8(self
):
151 if sys
.version_info
>= (3,0):
152 fo
= io
.FileOutput(destination
=self
.udrain
, encoding
='utf8',
155 self
.assertEqual(self
.udrain
.getvalue(), self
.udata
)
157 fo
= io
.FileOutput(destination
=self
.bdrain
, encoding
='utf8',
160 self
.assertEqual(self
.bdrain
.getvalue(), self
.udata
.encode('utf8'))
162 # With destination in binary mode, data must be binary string
163 # and is written as-is:
164 def test_write_bytes(self
):
165 fo
= io
.FileOutput(destination
=self
.bdrain
, encoding
='utf8',
166 mode
='wb', autoclose
=False)
168 self
.assertEqual(self
.bdrain
.getvalue(), self
.bdata
)
170 # Test for Python 3 features:
171 if sys
.version_info
>= (3,0):
172 def test_write_bytes_to_stdout(self
):
173 # try writing data to `destination.buffer`, if data is
174 # instance of `bytes` and writing to `destination` fails:
175 fo
= io
.FileOutput(destination
=self
.mock_stdout
)
177 self
.assertEqual(self
.mock_stdout
.buffer.getvalue(),
180 def test_encoding_clash_resolved(self
):
181 fo
= io
.FileOutput(destination
=self
.mock_stdout
,
182 encoding
='latin1', autoclose
=False)
184 self
.assertEqual(self
.mock_stdout
.buffer.getvalue(),
185 self
.udata
.encode('latin1'))
187 def test_encoding_clash_nonresolvable(self
):
188 del(self
.mock_stdout
.buffer)
189 fo
= io
.FileOutput(destination
=self
.mock_stdout
,
190 encoding
='latin1', autoclose
=False)
191 self
.assertRaises(ValueError, fo
.write
, self
.udata
)
194 if __name__
== '__main__':