4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
12 import DocutilsTestSupport
# must be imported before docutils
13 from docutils
import io
14 from docutils
._compat
import b
, bytes
17 class InputTests(unittest
.TestCase
):
20 input = io
.StringInput(source
=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
22 # Assert BOMs are gone.
23 self
.assertEqual(input.read(), u
' foo bar')
25 input = io
.StringInput(source
=u
'\ufeff foo \ufeff bar')
26 # Assert BOMs are still there.
27 self
.assertEqual(input.read(), u
'\ufeff foo \ufeff bar')
29 def test_coding_slug(self
):
30 input = io
.StringInput(source
=b("""\
31 .. -*- coding: ascii -*-
36 self
.assertEqual(input.successful_encoding
, 'ascii')
37 input = io
.StringInput(source
=b("""\
39 # -*- coding: ascii -*-
43 self
.assertEqual(input.successful_encoding
, 'ascii')
44 input = io
.StringInput(source
=b("""\
46 # extraneous comment; prevents coding slug from being read
47 # -*- coding: ascii -*-
51 self
.assertNotEqual(input.successful_encoding
, 'ascii')
53 def test_bom_detection(self
):
54 source
= u
'\ufeffdata\nblah\n'
55 input = io
.StringInput(source
=source
.encode('utf-16-be'))
57 self
.assertEqual(input.successful_encoding
, 'utf-16-be')
58 input = io
.StringInput(source
=source
.encode('utf-16-le'))
60 self
.assertEqual(input.successful_encoding
, 'utf-16-le')
61 input = io
.StringInput(source
=source
.encode('utf-8'))
63 self
.assertEqual(input.successful_encoding
, 'utf-8')
65 def test_readlines(self
):
66 input = io
.FileInput(source_path
='data/include.txt')
67 data
= input.readlines()
68 self
.assertEqual(data
, [u
'Some include text.\n'])
70 def test_heuristics_utf8(self
):
71 input = io
.FileInput(source_path
='functional/input/cyrillic.txt')
73 if sys
.version_info
< (3,0):
74 # in Py3k, the locale encoding is used without --input-encoding
75 # skipping the heuristic
76 self
.assertEqual(input.successful_encoding
, 'utf-8')
78 def test_heuristics_latin1(self
):
79 input = io
.FileInput(source_path
='data/latin1.txt')
81 self
.assertEqual(input.successful_encoding
, 'latin-1')
82 self
.assertEqual(data
, u
'Gr\xfc\xdfe\n')
85 if __name__
== '__main__':