Store source and line in the "raw" node generated by raw-derived roles.
[docutils.git] / test / test_io.py
blob6443abda42b4a232520ef9d0b4fe1245d08957ed
1 #! /usr/bin/env python
3 # $Id$
4 # Author: Lea Wiemann <LeWiemann@gmail.com>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Test module for io.py.
9 """
11 import unittest, sys
12 import DocutilsTestSupport # must be imported before docutils
13 from docutils import io
14 from docutils._compat import b, bytes
15 from docutils.error_reporting import locale_encoding
17 class InputTests(unittest.TestCase):
19 def test_bom(self):
20 input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'),
21 encoding='utf8')
22 # Assert BOMs are gone.
23 self.assertEqual(input.read(), u' foo bar')
24 # With unicode input:
25 input = io.StringInput(source=u'\ufeff foo \ufeff bar')
26 # Assert BOMs are still there.
27 self.assertEqual(input.read(), u'\ufeff foo \ufeff bar')
29 def test_coding_slug(self):
30 input = io.StringInput(source=b("""\
31 .. -*- coding: ascii -*-
32 data
33 blah
34 """))
35 data = input.read()
36 self.assertEqual(input.successful_encoding, 'ascii')
37 input = io.StringInput(source=b("""\
38 #! python
39 # -*- coding: ascii -*-
40 print "hello world"
41 """))
42 data = input.read()
43 self.assertEqual(input.successful_encoding, 'ascii')
44 input = io.StringInput(source=b("""\
45 #! python
46 # extraneous comment; prevents coding slug from being read
47 # -*- coding: ascii -*-
48 print "hello world"
49 """))
50 data = input.read()
51 self.assertNotEqual(input.successful_encoding, 'ascii')
53 def test_bom_detection(self):
54 source = u'\ufeffdata\nblah\n'
55 input = io.StringInput(source=source.encode('utf-16-be'))
56 data = input.read()
57 self.assertEqual(input.successful_encoding, 'utf-16-be')
58 input = io.StringInput(source=source.encode('utf-16-le'))
59 data = input.read()
60 self.assertEqual(input.successful_encoding, 'utf-16-le')
61 input = io.StringInput(source=source.encode('utf-8'))
62 data = input.read()
63 self.assertEqual(input.successful_encoding, 'utf-8')
65 def test_readlines(self):
66 input = io.FileInput(source_path='data/include.txt')
67 data = input.readlines()
68 self.assertEqual(data, [u'Some include text.\n'])
70 def test_heuristics_utf8(self):
71 # if no encoding is given, try decoding with utf8:
72 input = io.FileInput(source_path='functional/input/cyrillic.txt')
73 data = input.read()
74 if sys.version_info < (3,0):
75 # in Py3k, the locale encoding is used without --input-encoding
76 # skipping the heuristic
77 self.assertEqual(input.successful_encoding, 'utf-8')
79 def test_heuristics_no_utf8(self):
80 # if no encoding is given and decoding with utf8 fails,
81 # use either the locale encoding (if specified) or latin1:
82 input = io.FileInput(source_path='data/latin1.txt')
83 data = input.read()
84 self.assertTrue(input.successful_encoding in (locale_encoding,
85 'latin-1'))
86 if input.successful_encoding == 'latin-1':
87 self.assertEqual(data, u'Gr\xfc\xdfe\n')
90 if __name__ == '__main__':
91 unittest.main()