Drop 2.4 and 2.5 compatibility code, part 2.
[docutils.git] / docutils / test / test_parsers / test_rst / test_east_asian_text.py
blob94857227cf812e0585caca5bca8ae44ab1d175a7
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for East Asian text with double-width characters.
10 """
12 from __init__ import DocutilsTestSupport
14 import unicodedata
15 try:
16 east_asian_width = unicodedata.east_asian_width
17 except AttributeError:
18 east_asian_width = None
20 def suite():
21 s = DocutilsTestSupport.ParserTestSuite()
22 s.generateTests(totest)
23 return s
25 totest = {}
27 totest['double-width'] = [
28 [u"""\
29 タイトル1
30 =========
32 タイトル2
33 ========
34 """,
35 u"""\
36 <document source="test data">
37 <section ids="id1" names="タイトル1">
38 <title>
39 タイトル1
40 <section ids="id2" names="タイトル2">
41 <title>
42 タイトル2
43 <system_message level="2" line="5" source="test data" type="WARNING">
44 <paragraph>
45 Title underline too short.
46 <literal_block xml:space="preserve">
47 タイトル2
48 ========
49 """],
50 [ur"""
51 +-----------------------+
52 | * ヒョウ:ダイ1ギョウ |
53 | * ダイ2ギョウ |
54 +-----------------------+
55 | \* ダイ1ギョウ |
56 | * ダイ2ギョウ |
57 +-----------------------+
58 """,
59 u"""\
60 <document source="test data">
61 <table>
62 <tgroup cols="1">
63 <colspec colwidth="23">
64 <tbody>
65 <row>
66 <entry>
67 <bullet_list bullet="*">
68 <list_item>
69 <paragraph>
70 ヒョウ:ダイ1ギョウ
71 <list_item>
72 <paragraph>
73 ダイ2ギョウ
74 <row>
75 <entry>
76 <paragraph>
77 * ダイ1ギョウ
78 * ダイ2ギョウ
79 """],
80 [u"""\
81 Complex spanning pattern (no edge knows all rows/cols):
83 +--------+---------------------+
84 | 北西・ | 北・北東セル |
85 | 西セル +--------------+------+
86 | | 真ん中のセル | 東・ |
87 +--------+--------------+ 南東 |
88 | 南西・南セル | セル |
89 +-----------------------+------+
90 """,
91 u"""\
92 <document source="test data">
93 <paragraph>
94 Complex spanning pattern (no edge knows all rows/cols):
95 <table>
96 <tgroup cols="3">
97 <colspec colwidth="8">
98 <colspec colwidth="14">
99 <colspec colwidth="6">
100 <tbody>
101 <row>
102 <entry morerows="1">
103 <paragraph>
104 北西・
105 西セル
106 <entry morecols="1">
107 <paragraph>
108 北・北東セル
109 <row>
110 <entry>
111 <paragraph>
112 真ん中のセル
113 <entry morerows="1">
114 <paragraph>
115 東・
116 南東
117 セル
118 <row>
119 <entry morecols="1">
120 <paragraph>
121 南西・南セル
122 """],
123 [u"""\
124 ========= =========
125 ダイ1ラン ダイ2ラン
126 ========= =========
128 ======== =========
129 ダイ1ラン ダイ2ラン
130 ======== =========
131 """,
132 u"""\
133 <document source="test data">
134 <table>
135 <tgroup cols="2">
136 <colspec colwidth="9">
137 <colspec colwidth="9">
138 <tbody>
139 <row>
140 <entry>
141 <paragraph>
142 ダイ1ラン
143 <entry>
144 <paragraph>
145 ダイ2ラン
146 <system_message level="3" line="6" source="test data" type="ERROR">
147 <paragraph>
148 Malformed table.
149 Text in column margin in table line 2.
150 <literal_block xml:space="preserve">
151 ======== =========
152 ダイ1ラン ダイ2ラン
153 ======== =========
154 """],
155 [u"""\
156 Some ambiguous-width characters:
158 = ===================================
159 © copyright sign
160 ® registered sign
161 « left pointing guillemet
162 » right pointing guillemet
163 – en-dash
164 — em-dash
165 ‘ single turned comma quotation mark
166 ’ single comma quotation mark
167 ‚ low single comma quotation mark
168 “ double turned comma quotation mark
169 ” double comma quotation mark
170 „ low double comma quotation mark
171 † dagger
172 ‡ double dagger
173 … ellipsis
174 ™ trade mark sign
175 ⇔ left-right double arrow
176 = ===================================
177 """,
178 b"""\
179 <document source="test data">
180 <paragraph>
181 Some ambiguous-width characters:
182 <table>
183 <tgroup cols="2">
184 <colspec colwidth="1">
185 <colspec colwidth="35">
186 <tbody>
187 <row>
188 <entry>
189 <paragraph>
190 \xa9
191 <entry>
192 <paragraph>
193 copyright sign
194 <row>
195 <entry>
196 <paragraph>
197 \xae
198 <entry>
199 <paragraph>
200 registered sign
201 <row>
202 <entry>
203 <paragraph>
204 \xab
205 <entry>
206 <paragraph>
207 left pointing guillemet
208 <row>
209 <entry>
210 <paragraph>
211 \xbb
212 <entry>
213 <paragraph>
214 right pointing guillemet
215 <row>
216 <entry>
217 <paragraph>
218 \\u2013
219 <entry>
220 <paragraph>
221 en-dash
222 <row>
223 <entry>
224 <paragraph>
225 \\u2014
226 <entry>
227 <paragraph>
228 em-dash
229 <row>
230 <entry>
231 <paragraph>
232 \\u2018
233 <entry>
234 <paragraph>
235 single turned comma quotation mark
236 <row>
237 <entry>
238 <paragraph>
239 \\u2019
240 <entry>
241 <paragraph>
242 single comma quotation mark
243 <row>
244 <entry>
245 <paragraph>
246 \\u201a
247 <entry>
248 <paragraph>
249 low single comma quotation mark
250 <row>
251 <entry>
252 <paragraph>
253 \\u201c
254 <entry>
255 <paragraph>
256 double turned comma quotation mark
257 <row>
258 <entry>
259 <paragraph>
260 \\u201d
261 <entry>
262 <paragraph>
263 double comma quotation mark
264 <row>
265 <entry>
266 <paragraph>
267 \\u201e
268 <entry>
269 <paragraph>
270 low double comma quotation mark
271 <row>
272 <entry>
273 <paragraph>
274 \\u2020
275 <entry>
276 <paragraph>
277 dagger
278 <row>
279 <entry>
280 <paragraph>
281 \\u2021
282 <entry>
283 <paragraph>
284 double dagger
285 <row>
286 <entry>
287 <paragraph>
288 \\u2026
289 <entry>
290 <paragraph>
291 ellipsis
292 <row>
293 <entry>
294 <paragraph>
295 \\u2122
296 <entry>
297 <paragraph>
298 trade mark sign
299 <row>
300 <entry>
301 <paragraph>
302 \\u21d4
303 <entry>
304 <paragraph>
305 left-right double arrow
306 """.decode('raw_unicode_escape')],
309 [u"""\
310 """,
311 u"""\
312 """],
316 if __name__ == '__main__':
317 import unittest
318 unittest.main(defaultTest='suite')