4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
12 import DocutilsTestSupport
# must be imported before docutils
13 from docutils
import io
14 from docutils
._compat
import b
, bytes
15 from docutils
.error_reporting
import locale_encoding
17 class InputTests(unittest
.TestCase
):
20 if not hasattr(unittest
.TestCase
, "assertTrue"):
21 assertTrue
= unittest
.TestCase
.failUnless
24 input = io
.StringInput(source
=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
26 # Assert BOMs are gone.
27 self
.assertEqual(input.read(), u
' foo bar')
29 input = io
.StringInput(source
=u
'\ufeff foo \ufeff bar')
30 # Assert BOMs are still there.
31 self
.assertEqual(input.read(), u
'\ufeff foo \ufeff bar')
33 def test_coding_slug(self
):
34 input = io
.StringInput(source
=b("""\
35 .. -*- coding: ascii -*-
40 self
.assertEqual(input.successful_encoding
, 'ascii')
41 input = io
.StringInput(source
=b("""\
43 # -*- coding: ascii -*-
47 self
.assertEqual(input.successful_encoding
, 'ascii')
48 input = io
.StringInput(source
=b("""\
50 # extraneous comment; prevents coding slug from being read
51 # -*- coding: ascii -*-
55 self
.assertNotEqual(input.successful_encoding
, 'ascii')
57 def test_bom_detection(self
):
58 source
= u
'\ufeffdata\nblah\n'
59 input = io
.StringInput(source
=source
.encode('utf-16-be'))
61 self
.assertEqual(input.successful_encoding
, 'utf-16-be')
62 input = io
.StringInput(source
=source
.encode('utf-16-le'))
64 self
.assertEqual(input.successful_encoding
, 'utf-16-le')
65 input = io
.StringInput(source
=source
.encode('utf-8'))
67 self
.assertEqual(input.successful_encoding
, 'utf-8')
69 def test_readlines(self
):
70 input = io
.FileInput(source_path
='data/include.txt')
71 data
= input.readlines()
72 self
.assertEqual(data
, [u
'Some include text.\n'])
74 def test_heuristics_utf8(self
):
75 # if no encoding is given, try decoding with utf8:
76 input = io
.FileInput(source_path
='functional/input/cyrillic.txt')
78 if sys
.version_info
< (3,0):
79 # in Py3k, the locale encoding is used without --input-encoding
80 # skipping the heuristic
81 self
.assertEqual(input.successful_encoding
, 'utf-8')
83 def test_heuristics_no_utf8(self
):
84 # if no encoding is given and decoding with utf8 fails,
85 # use either the locale encoding (if specified) or latin1:
86 input = io
.FileInput(source_path
='data/latin1.txt')
88 self
.assertTrue(input.successful_encoding
in (locale_encoding
,
90 if input.successful_encoding
== 'latin-1':
91 self
.assertEqual(data
, u
'Gr\xfc\xdfe\n')
94 if __name__
== '__main__':