From 414851391c4a95a9e9cb5b787553fc242cee7fb3 Mon Sep 17 00:00:00 2001 From: milde Date: Tue, 8 Nov 2011 17:15:46 +0000 Subject: [PATCH] Tests for encoding problems git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7217 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- test/data/latin1.txt | 1 + test/test_io.py | 35 +++++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 8 deletions(-) create mode 100644 test/data/latin1.txt diff --git a/test/data/latin1.txt b/test/data/latin1.txt new file mode 100644 index 000000000..a9af3baf0 --- /dev/null +++ b/test/data/latin1.txt @@ -0,0 +1 @@ +Grüße diff --git a/test/test_io.py b/test/test_io.py index 5cf44f439..cb63e7ed1 100755 --- a/test/test_io.py +++ b/test/test_io.py @@ -20,11 +20,11 @@ class InputTests(unittest.TestCase): input = io.StringInput(source=b('\xef\xbb\xbf foo \xef\xbb\xbf bar'), encoding='utf8') # Assert BOMs are gone. - self.assertEquals(input.read(), u' foo bar') + self.assertEqual(input.read(), u' foo bar') # With unicode input: input = io.StringInput(source=u'\ufeff foo \ufeff bar') # Assert BOMs are still there. - self.assertEquals(input.read(), u'\ufeff foo \ufeff bar') + self.assertEqual(input.read(), u'\ufeff foo \ufeff bar') def test_coding_slug(self): input = io.StringInput(source=b("""\ @@ -33,14 +33,14 @@ data blah """)) data = input.read() - self.assertEquals(input.successful_encoding, 'ascii') + self.assertEqual(input.successful_encoding, 'ascii') input = io.StringInput(source=b("""\ #! python # -*- coding: ascii -*- print "hello world" """)) data = input.read() - self.assertEquals(input.successful_encoding, 'ascii') + self.assertEqual(input.successful_encoding, 'ascii') input = io.StringInput(source=b("""\ #! python # extraneous comment; prevents coding slug from being read @@ -48,19 +48,38 @@ print "hello world" print "hello world" """)) data = input.read() - self.assertNotEquals(input.successful_encoding, 'ascii') + self.assertNotEqual(input.successful_encoding, 'ascii') def test_bom_detection(self): source = u'\ufeffdata\nblah\n' input = io.StringInput(source=source.encode('utf-16-be')) data = input.read() - self.assertEquals(input.successful_encoding, 'utf-16-be') + self.assertEqual(input.successful_encoding, 'utf-16-be') input = io.StringInput(source=source.encode('utf-16-le')) data = input.read() - self.assertEquals(input.successful_encoding, 'utf-16-le') + self.assertEqual(input.successful_encoding, 'utf-16-le') input = io.StringInput(source=source.encode('utf-8')) data = input.read() - self.assertEquals(input.successful_encoding, 'utf-8') + self.assertEqual(input.successful_encoding, 'utf-8') + + def test_readlines(self): + input = io.FileInput(source_path='data/include.txt') + data = input.readlines() + self.assertEqual(data, [u'Some include text.\n']) + + def test_heuristics_utf8(self): + input = io.FileInput(source_path='functional/input/cyrillic.txt') + data = input.read() + if sys.version_info < (3,0): + # in Py3k, the locale encoding is used without --input-encoding + # skipping the heuristic + self.assertEqual(input.successful_encoding, 'utf-8') + + def test_heuristics_latin1(self): + input = io.FileInput(source_path='data/latin1.txt') + data = input.read() + self.assertEqual(input.successful_encoding, 'latin-1') + self.assertEqual(data, u'Gr\xfc\xdfe\n') if __name__ == '__main__': -- 2.11.4.GIT