4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
12 import DocutilsTestSupport
# must be imported before docutils
13 from docutils
import io
14 from docutils
._compat
import b
, bytes
15 from docutils
.error_reporting
import locale_encoding
17 class InputTests(unittest
.TestCase
):
20 input = io
.StringInput(source
=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
22 # Assert BOMs are gone.
23 self
.assertEqual(input.read(), u
' foo bar')
25 input = io
.StringInput(source
=u
'\ufeff foo \ufeff bar')
26 # Assert BOMs are still there.
27 self
.assertEqual(input.read(), u
'\ufeff foo \ufeff bar')
29 def test_coding_slug(self
):
30 input = io
.StringInput(source
=b("""\
31 .. -*- coding: ascii -*-
36 self
.assertEqual(input.successful_encoding
, 'ascii')
37 input = io
.StringInput(source
=b("""\
39 # -*- coding: ascii -*-
43 self
.assertEqual(input.successful_encoding
, 'ascii')
44 input = io
.StringInput(source
=b("""\
46 # extraneous comment; prevents coding slug from being read
47 # -*- coding: ascii -*-
51 self
.assertNotEqual(input.successful_encoding
, 'ascii')
53 def test_bom_detection(self
):
54 source
= u
'\ufeffdata\nblah\n'
55 input = io
.StringInput(source
=source
.encode('utf-16-be'))
57 self
.assertEqual(input.successful_encoding
, 'utf-16-be')
58 input = io
.StringInput(source
=source
.encode('utf-16-le'))
60 self
.assertEqual(input.successful_encoding
, 'utf-16-le')
61 input = io
.StringInput(source
=source
.encode('utf-8'))
63 self
.assertEqual(input.successful_encoding
, 'utf-8')
65 def test_readlines(self
):
66 input = io
.FileInput(source_path
='data/include.txt')
67 data
= input.readlines()
68 self
.assertEqual(data
, [u
'Some include text.\n'])
70 def test_heuristics_utf8(self
):
71 # if no encoding is given, try decoding with utf8:
72 input = io
.FileInput(source_path
='functional/input/cyrillic.txt')
74 if sys
.version_info
< (3,0):
75 # in Py3k, the locale encoding is used without --input-encoding
76 # skipping the heuristic
77 self
.assertEqual(input.successful_encoding
, 'utf-8')
79 def test_heuristics_no_utf8(self
):
80 # if no encoding is given and decoding with utf8 fails,
81 # use either the locale encoding (if specified) or latin1:
82 input = io
.FileInput(source_path
='data/latin1.txt')
84 self
.assertTrue(input.successful_encoding
in (locale_encoding
,
86 if input.successful_encoding
== 'latin-1':
87 self
.assertEqual(data
, u
'Gr\xfc\xdfe\n')
90 if __name__
== '__main__':