Tests for encoding problems
[docutils.git] / test / test_io.py
blobcb63e7ed13d0f7b4aeea31c4ade15262a7c02f72
1 #! /usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for io.py.
9 """
11 import unittest, sys
12 import DocutilsTestSupport # must be imported before docutils
13 from docutils import io
14 from docutils._compat import b, bytes
17 class InputTests(unittest.TestCase):
19 def test_bom(self):
20 input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
21 encoding='utf8')
22 # Assert BOMs are gone.
23 self.assertEqual(input.read(), u' foo bar')
24 # With unicode input:
25 input = io.StringInput(source=u'\ufeff foo \ufeff bar')
26 # Assert BOMs are still there.
27 self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
29 def test_coding_slug(self):
30 input = io.StringInput(source=b("""\
31 .. -*- coding: ascii -*-
32 data
33 blah
34 """))
35 data = input.read()
36 self.assertEqual(input.successful_encoding, 'ascii')
37 input = io.StringInput(source=b("""\
38 #! python
39 # -*- coding: ascii -*-
40 print "hello world"
41 """))
42 data = input.read()
43 self.assertEqual(input.successful_encoding, 'ascii')
44 input = io.StringInput(source=b("""\
45 #! python
46 # extraneous comment; prevents coding slug from being read
47 # -*- coding: ascii -*-
48 print "hello world"
49 """))
50 data = input.read()
51 self.assertNotEqual(input.successful_encoding, 'ascii')
53 def test_bom_detection(self):
54 source = u'\ufeffdata\nblah\n'
55 input = io.StringInput(source=source.encode('utf-16-be'))
56 data = input.read()
57 self.assertEqual(input.successful_encoding, 'utf-16-be')
58 input = io.StringInput(source=source.encode('utf-16-le'))
59 data = input.read()
60 self.assertEqual(input.successful_encoding, 'utf-16-le')
61 input = io.StringInput(source=source.encode('utf-8'))
62 data = input.read()
63 self.assertEqual(input.successful_encoding, 'utf-8')
65 def test_readlines(self):
66 input = io.FileInput(source_path='data/include.txt')
67 data = input.readlines()
68 self.assertEqual(data, [u'Some include text.\n'])
70 def test_heuristics_utf8(self):
71 input = io.FileInput(source_path='functional/input/cyrillic.txt')
72 data = input.read()
73 if sys.version_info < (3,0):
74 # in Py3k, the locale encoding is used without --input-encoding
75 # skipping the heuristic
76 self.assertEqual(input.successful_encoding, 'utf-8')
78 def test_heuristics_latin1(self):
79 input = io.FileInput(source_path='data/latin1.txt')
80 data = input.read()
81 self.assertEqual(input.successful_encoding, 'latin-1')
82 self.assertEqual(data, u'Gr\xfc\xdfe\n')
85 if __name__ == '__main__':
86 unittest.main()