Fix tests.
[docutils.git] / test / test_parsers / test_rst / test_east_asian_text.py
blobdca1c46c6ff7093741fbc3a8acef5e827bc3337c
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for East Asian text with double-width characters.
10 """
12 from __init__ import DocutilsTestSupport
14 import unicodedata
15 try:
16 east_asian_width = unicodedata.east_asian_width
17 except AttributeError:
18 east_asian_width = None
20 from docutils._compat import b
22 def suite():
23 s = DocutilsTestSupport.ParserTestSuite()
24 s.generateTests(totest)
25 return s
27 totest = {}
29 totest['double-width'] = [
30 [u"""\
31 タイトル1
32 =========
34 タイトル2
35 ========
36 """,
37 u"""\
38 <document source="test data">
39 <section ids="id1" names="タイトル1">
40 <title>
41 タイトル1
42 <section ids="id2" names="タイトル2">
43 <title>
44 タイトル2
45 <system_message level="2" line="5" source="test data" type="WARNING">
46 <paragraph>
47 Title underline too short.
48 <literal_block xml:space="preserve">
49 タイトル2
50 ========
51 """],
52 [ur"""
53 +-----------------------+
54 | * ヒョウ:ダイ1ギョウ |
55 | * ダイ2ギョウ |
56 +-----------------------+
57 | \* ダイ1ギョウ |
58 | * ダイ2ギョウ |
59 +-----------------------+
60 """,
61 u"""\
62 <document source="test data">
63 <table>
64 <tgroup cols="1">
65 <colspec colwidth="23">
66 <tbody>
67 <row>
68 <entry>
69 <bullet_list bullet="*">
70 <list_item>
71 <paragraph>
72 ヒョウ:ダイ1ギョウ
73 <list_item>
74 <paragraph>
75 ダイ2ギョウ
76 <row>
77 <entry>
78 <paragraph>
79 * ダイ1ギョウ
80 * ダイ2ギョウ
81 """],
82 [u"""\
83 Complex spanning pattern (no edge knows all rows/cols):
85 +--------+---------------------+
86 | 北西・ | 北・北東セル |
87 | 西セル +--------------+------+
88 | | 真ん中のセル | 東・ |
89 +--------+--------------+ 南東 |
90 | 南西・南セル | セル |
91 +-----------------------+------+
92 """,
93 u"""\
94 <document source="test data">
95 <paragraph>
96 Complex spanning pattern (no edge knows all rows/cols):
97 <table>
98 <tgroup cols="3">
99 <colspec colwidth="8">
100 <colspec colwidth="14">
101 <colspec colwidth="6">
102 <tbody>
103 <row>
104 <entry morerows="1">
105 <paragraph>
106 北西・
107 西セル
108 <entry morecols="1">
109 <paragraph>
110 北・北東セル
111 <row>
112 <entry>
113 <paragraph>
114 真ん中のセル
115 <entry morerows="1">
116 <paragraph>
117 東・
118 南東
119 セル
120 <row>
121 <entry morecols="1">
122 <paragraph>
123 南西・南セル
124 """],
125 [u"""\
126 ========= =========
127 ダイ1ラン ダイ2ラン
128 ========= =========
130 ======== =========
131 ダイ1ラン ダイ2ラン
132 ======== =========
133 """,
134 u"""\
135 <document source="test data">
136 <table>
137 <tgroup cols="2">
138 <colspec colwidth="9">
139 <colspec colwidth="9">
140 <tbody>
141 <row>
142 <entry>
143 <paragraph>
144 ダイ1ラン
145 <entry>
146 <paragraph>
147 ダイ2ラン
148 <system_message level="3" line="6" source="test data" type="ERROR">
149 <paragraph>
150 Malformed table.
151 Text in column margin in table line 2.
152 <literal_block xml:space="preserve">
153 ======== =========
154 ダイ1ラン ダイ2ラン
155 ======== =========
156 """],
157 [u"""\
158 Some ambiguous-width characters:
160 = ===================================
161 © copyright sign
162 ® registered sign
163 « left pointing guillemet
164 » right pointing guillemet
165 – en-dash
166 — em-dash
167 ‘ single turned comma quotation mark
168 ’ single comma quotation mark
169 ‚ low single comma quotation mark
170 “ double turned comma quotation mark
171 ” double comma quotation mark
172 „ low double comma quotation mark
173 † dagger
174 ‡ double dagger
175 … ellipsis
176 ™ trade mark sign
177 ⇔ left-right double arrow
178 = ===================================
179 """,
180 b("""\
181 <document source="test data">
182 <paragraph>
183 Some ambiguous-width characters:
184 <table>
185 <tgroup cols="2">
186 <colspec colwidth="1">
187 <colspec colwidth="35">
188 <tbody>
189 <row>
190 <entry>
191 <paragraph>
192 \xa9
193 <entry>
194 <paragraph>
195 copyright sign
196 <row>
197 <entry>
198 <paragraph>
199 \xae
200 <entry>
201 <paragraph>
202 registered sign
203 <row>
204 <entry>
205 <paragraph>
206 \xab
207 <entry>
208 <paragraph>
209 left pointing guillemet
210 <row>
211 <entry>
212 <paragraph>
213 \xbb
214 <entry>
215 <paragraph>
216 right pointing guillemet
217 <row>
218 <entry>
219 <paragraph>
220 \\u2013
221 <entry>
222 <paragraph>
223 en-dash
224 <row>
225 <entry>
226 <paragraph>
227 \\u2014
228 <entry>
229 <paragraph>
230 em-dash
231 <row>
232 <entry>
233 <paragraph>
234 \\u2018
235 <entry>
236 <paragraph>
237 single turned comma quotation mark
238 <row>
239 <entry>
240 <paragraph>
241 \\u2019
242 <entry>
243 <paragraph>
244 single comma quotation mark
245 <row>
246 <entry>
247 <paragraph>
248 \\u201a
249 <entry>
250 <paragraph>
251 low single comma quotation mark
252 <row>
253 <entry>
254 <paragraph>
255 \\u201c
256 <entry>
257 <paragraph>
258 double turned comma quotation mark
259 <row>
260 <entry>
261 <paragraph>
262 \\u201d
263 <entry>
264 <paragraph>
265 double comma quotation mark
266 <row>
267 <entry>
268 <paragraph>
269 \\u201e
270 <entry>
271 <paragraph>
272 low double comma quotation mark
273 <row>
274 <entry>
275 <paragraph>
276 \\u2020
277 <entry>
278 <paragraph>
279 dagger
280 <row>
281 <entry>
282 <paragraph>
283 \\u2021
284 <entry>
285 <paragraph>
286 double dagger
287 <row>
288 <entry>
289 <paragraph>
290 \\u2026
291 <entry>
292 <paragraph>
293 ellipsis
294 <row>
295 <entry>
296 <paragraph>
297 \\u2122
298 <entry>
299 <paragraph>
300 trade mark sign
301 <row>
302 <entry>
303 <paragraph>
304 \\u21d4
305 <entry>
306 <paragraph>
307 left-right double arrow
308 """).decode('raw_unicode_escape')],
311 [u"""\
312 """,
313 u"""\
314 """],
318 if __name__ == '__main__':
319 import unittest
320 unittest.main(defaultTest='suite')