Update policies on branches and versioning.
[docutils.git] / docutils / test / test_io.py
blob003203137d005d714cf32c80b9f20428d21eca3e
1 #! /usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for io.py.
9 """
11 import unittest, sys
12 import DocutilsTestSupport # must be imported before docutils
13 from docutils import io
14 from docutils._compat import b, bytes
15 from docutils.utils.error_reporting import locale_encoding
16 from test_error_reporting import BBuf, UBuf
18 class mock_stdout(UBuf):
19 encoding = 'utf8'
21 def __init__(self):
22 self.buffer = BBuf()
23 UBuf.__init__(self)
25 class HelperTests(unittest.TestCase):
27 def test_check_encoding_true(self):
28 """Return `True` if lookup returns the same codec"""
29 self.assertEqual(io.check_encoding(mock_stdout, 'utf8'), True)
30 self.assertEqual(io.check_encoding(mock_stdout, 'utf-8'), True)
31 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-8'), True)
33 def test_check_encoding_false(self):
34 """Return `False` if lookup returns different codecs"""
35 self.assertEqual(io.check_encoding(mock_stdout, 'ascii'), False)
36 self.assertEqual(io.check_encoding(mock_stdout, 'latin-1'), False)
38 def test_check_encoding_none(self):
39 """Cases where the comparison fails."""
40 # stream.encoding is None:
41 self.assertEqual(io.check_encoding(io.FileInput(), 'ascii'), None)
42 # stream.encoding does not exist:
43 self.assertEqual(io.check_encoding(BBuf, 'ascii'), None)
44 # encoding is None:
45 self.assertEqual(io.check_encoding(mock_stdout, None), None)
46 # encoding is invalid
47 self.assertEqual(io.check_encoding(mock_stdout, 'UTF-9'), None)
50 class InputTests(unittest.TestCase):
52 def test_bom(self):
53 input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
54 encoding='utf8')
55 # Assert BOMs are gone.
56 self.assertEqual(input.read(), u' foo bar')
57 # With unicode input:
58 input = io.StringInput(source=u'\ufeff foo \ufeff bar')
59 # Assert BOMs are still there.
60 self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
62 def test_coding_slug(self):
63 input = io.StringInput(source=b("""\
64 .. -*- coding: ascii -*-
65 data
66 blah
67 """))
68 data = input.read()
69 self.assertEqual(input.successful_encoding, 'ascii')
70 input = io.StringInput(source=b("""\
71 #! python
72 # -*- coding: ascii -*-
73 print "hello world"
74 """))
75 data = input.read()
76 self.assertEqual(input.successful_encoding, 'ascii')
77 input = io.StringInput(source=b("""\
78 #! python
79 # extraneous comment; prevents coding slug from being read
80 # -*- coding: ascii -*-
81 print "hello world"
82 """))
83 data = input.read()
84 self.assertNotEqual(input.successful_encoding, 'ascii')
86 def test_bom_detection(self):
87 source = u'\ufeffdata\nblah\n'
88 input = io.StringInput(source=source.encode('utf-16-be'))
89 data = input.read()
90 self.assertEqual(input.successful_encoding, 'utf-16-be')
91 input = io.StringInput(source=source.encode('utf-16-le'))
92 data = input.read()
93 self.assertEqual(input.successful_encoding, 'utf-16-le')
94 input = io.StringInput(source=source.encode('utf-8'))
95 data = input.read()
96 self.assertEqual(input.successful_encoding, 'utf-8')
98 def test_readlines(self):
99 input = io.FileInput(source_path='data/include.txt')
100 data = input.readlines()
101 self.assertEqual(data, [u'Some include text.\n'])
103 def test_heuristics_utf8(self):
104 # if no encoding is given, try decoding with utf8:
105 input = io.FileInput(source_path='functional/input/cyrillic.txt')
106 data = input.read()
107 if sys.version_info < (3,0):
108 # in Py3k, the locale encoding is used without --input-encoding
109 # skipping the heuristic
110 self.assertEqual(input.successful_encoding, 'utf-8')
112 def test_heuristics_no_utf8(self):
113 # if no encoding is given and decoding with utf8 fails,
114 # use either the locale encoding (if specified) or latin-1:
115 if sys.version_info >= (3,0) and locale_encoding != "utf8":
116 # in Py3k, the locale encoding is used without --input-encoding
117 # skipping the heuristic unless decoding fails.
118 return
119 probed_encodings = (locale_encoding, 'latin-1')
120 input = io.FileInput(source_path='data/latin1.txt')
121 data = input.read()
122 if input.successful_encoding not in probed_encodings:
123 raise AssertionError(
124 "guessed encoding '%s' differs from probed encodings %r"
125 % (input.successful_encoding, probed_encodings))
126 if input.successful_encoding == 'latin-1':
127 self.assertEqual(data, u'Gr\xfc\xdfe\n')
129 def test_decode_unicode(self):
130 # With the special value "unicode" or "Unicode":
131 uniinput = io.Input(encoding='unicode')
132 # keep unicode instances as-is
133 self.assertEqual(uniinput.decode(u'ja'), u'ja')
134 # raise AssertionError if data is not an unicode string
135 self.assertRaises(AssertionError, uniinput.decode, b('ja'))
138 class OutputTests(unittest.TestCase):
140 bdata = b('\xfc')
141 udata = u'\xfc'
143 def setUp(self):
144 self.bdrain = BBuf()
145 """Buffer accepting binary strings (bytes)"""
146 self.udrain = UBuf()
147 """Buffer accepting unicode strings"""
148 self.mock_stdout = mock_stdout()
149 """Stub of sys.stdout under Python 3"""
151 def test_write_unicode(self):
152 fo = io.FileOutput(destination=self.udrain, encoding='unicode',
153 autoclose=False)
154 fo.write(self.udata)
155 self.assertEqual(self.udrain.getvalue(), self.udata)
157 def test_write_utf8(self):
158 if sys.version_info >= (3,0):
159 fo = io.FileOutput(destination=self.udrain, encoding='utf8',
160 autoclose=False)
161 fo.write(self.udata)
162 self.assertEqual(self.udrain.getvalue(), self.udata)
163 else:
164 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
165 autoclose=False)
166 fo.write(self.udata)
167 self.assertEqual(self.bdrain.getvalue(), self.udata.encode('utf8'))
169 # With destination in binary mode, data must be binary string
170 # and is written as-is:
171 def test_write_bytes(self):
172 fo = io.FileOutput(destination=self.bdrain, encoding='utf8',
173 mode='wb', autoclose=False)
174 fo.write(self.bdata)
175 self.assertEqual(self.bdrain.getvalue(), self.bdata)
177 # Test for Python 3 features:
178 if sys.version_info >= (3,0):
179 def test_write_bytes_to_stdout(self):
180 # try writing data to `destination.buffer`, if data is
181 # instance of `bytes` and writing to `destination` fails:
182 fo = io.FileOutput(destination=self.mock_stdout)
183 fo.write(self.bdata)
184 self.assertEqual(self.mock_stdout.buffer.getvalue(),
185 self.bdata)
187 def test_encoding_clash_resolved(self):
188 fo = io.FileOutput(destination=self.mock_stdout,
189 encoding='latin1', autoclose=False)
190 fo.write(self.udata)
191 self.assertEqual(self.mock_stdout.buffer.getvalue(),
192 self.udata.encode('latin1'))
194 def test_encoding_clash_nonresolvable(self):
195 del(self.mock_stdout.buffer)
196 fo = io.FileOutput(destination=self.mock_stdout,
197 encoding='latin1', autoclose=False)
198 self.assertRaises(ValueError, fo.write, self.udata)
201 if __name__ == '__main__':
202 unittest.main()