tests to inline literal open and close
[docutils.git] / docutils / test / test_parsers / test_rst / test_inline_markup.py
blobe3297f339e709cd844d333f37a91fe605e838bee
1 #! /usr/bin/env python
2 # -*- coding: utf-8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
11 """
13 from __init__ import DocutilsTestSupport
15 def suite():
16 s = DocutilsTestSupport.ParserTestSuite()
17 s.generateTests(totest)
18 return s
20 totest = {}
22 totest['emphasis'] = [
23 ["""\
24 *emphasis*
25 """,
26 """\
27 <document source="test data">
28 <paragraph>
29 <emphasis>
30 emphasis
31 """],
32 [u"""\
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
35 """,
36 u"""\
37 <document source="test data">
38 <paragraph>
39 l\'
40 <emphasis>
41 emphasis
42 with the \n\
43 <emphasis>
44 emphasis
45 \' apostrophe.
46 l\u2019
47 <emphasis>
48 emphasis
49 with the \n\
50 <emphasis>
51 emphasis
52 \u2019 apostrophe.
53 """],
54 ["""\
55 *emphasized sentence
56 across lines*
57 """,
58 """\
59 <document source="test data">
60 <paragraph>
61 <emphasis>
62 emphasized sentence
63 across lines
64 """],
65 ["""\
66 *emphasis without closing asterisk
67 """,
68 """\
69 <document source="test data">
70 <paragraph>
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
75 <paragraph>
76 Inline emphasis start-string without end-string.
77 """],
78 [r"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
81 *emphasis*., *emphasis*,, *emphasis*!, and *emphasis*\ (closing delimiters),
83 but not
84 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
85 (*), [*], '*' or '"*"' ("quoted" start-string),
86 x*2* or 2*x* (alphanumeric char before),
87 \*args or * (escaped, whitespace behind start-string),
88 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
90 However, '*args' will trigger a warning and may be problematic.
92 what about *this**?
93 """,
94 """\
95 <document source="test data">
96 <paragraph>
97 some punctuation is allowed around inline markup, e.g.
99 <emphasis>
100 emphasis
101 /, -
102 <emphasis>
103 emphasis
104 -, and :
105 <emphasis>
106 emphasis
107 : (delimiters),
109 <emphasis>
110 emphasis
111 ), [
112 <emphasis>
113 emphasis
114 ], <
115 <emphasis>
116 emphasis
117 >, {
118 <emphasis>
119 emphasis
120 } (open/close pairs)
121 <emphasis>
122 emphasis
123 ., \n\
124 <emphasis>
125 emphasis
126 ,, \n\
127 <emphasis>
128 emphasis
129 !, and \n\
130 <emphasis>
131 emphasis
132 (closing delimiters),
133 <paragraph>
134 but not
135 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
136 (*), [*], '*' or '"*"' ("quoted" start-string),
137 x*2* or 2*x* (alphanumeric char before),
138 *args or * (escaped, whitespace behind start-string),
139 or \n\
140 <emphasis>
141 the* *stars* *inside
142 (escaped, whitespace before end-string).
143 <paragraph>
144 However, '
145 <problematic ids="id2" refid="id1">
147 args' will trigger a warning and may be problematic.
148 <system_message backrefs="id2" ids="id1" level="2" line="13" source="test data" type="WARNING">
149 <paragraph>
150 Inline emphasis start-string without end-string.
151 <paragraph>
152 what about \n\
153 <emphasis>
154 this*
156 """],
157 [u"""\
158 Quotes around inline markup:
160 '*emphasis*' "*emphasis*" Straight,
161 ‘*emphasis*’ “*emphasis*” English, ...,
162 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
163 « *emphasis* » ‹ *emphasis* › French,
164 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
165 „*emphasis*” «*emphasis*» Romanian,
166 “*emphasis*„ ‘*emphasis*‚ Greek,
167 「*emphasis*」 『*emphasis*』traditional Chinese,
168 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
169 „*emphasis*” ‚*emphasis*’ Polish,
170 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
171 """,
172 u"""\
173 <document source="test data">
174 <paragraph>
175 Quotes around inline markup:
176 <paragraph>
178 <emphasis>
179 emphasis
180 \' "
181 <emphasis>
182 emphasis
183 " Straight,
184 \u2018
185 <emphasis>
186 emphasis
187 \u2019 \u201c
188 <emphasis>
189 emphasis
190 \u201d English, ...,
191 \xab\u202f
192 <emphasis>
193 emphasis
194 \u202f\xbb \u2039\u202f
195 <emphasis>
196 emphasis
197 \u202f\u203a \xab\xa0
198 <emphasis>
199 emphasis
200 \xa0\xbb \u2039\xa0
201 <emphasis>
202 emphasis
203 \xa0\u203a
204 \xab\u2005
205 <emphasis>
206 emphasis
207 \u2005\xbb \u2039\u2005
208 <emphasis>
209 emphasis
210 \u2005\u203a French,
211 \u201e
212 <emphasis>
213 emphasis
214 \u201c \u201a
215 <emphasis>
216 emphasis
217 \u2018 \xbb
218 <emphasis>
219 emphasis
220 \xab \u203a
221 <emphasis>
222 emphasis
223 \u2039 German, Czech, ...,
224 \u201e
225 <emphasis>
226 emphasis
227 \u201d \xab
228 <emphasis>
229 emphasis
230 \xbb Romanian,
231 \u201c
232 <emphasis>
233 emphasis
234 \u201e \u2018
235 <emphasis>
236 emphasis
237 \u201a Greek,
238 \u300c
239 <emphasis>
240 emphasis
241 \u300d \u300e
242 <emphasis>
243 emphasis
244 \u300ftraditional Chinese,
245 \u201d
246 <emphasis>
247 emphasis
248 \u201d \u2019
249 <emphasis>
250 emphasis
251 \u2019 \xbb
252 <emphasis>
253 emphasis
254 \xbb \u203a
255 <emphasis>
256 emphasis
257 \u203a Swedish, Finnish,
258 \u201e
259 <emphasis>
260 emphasis
261 \u201d \u201a
262 <emphasis>
263 emphasis
264 \u2019 Polish,
265 \u201e
266 <emphasis>
267 emphasis
268 \u201d \xbb
269 <emphasis>
270 emphasis
271 \xab \u2019
272 <emphasis>
273 emphasis
274 \u2019 Hungarian,
275 """],
276 [r"""
277 Emphasized asterisk: *\**
279 Emphasized double asterisk: *\***
280 """,
281 """\
282 <document source="test data">
283 <paragraph>
284 Emphasized asterisk: \n\
285 <emphasis>
287 <paragraph>
288 Emphasized double asterisk: \n\
289 <emphasis>
291 """],
294 totest['strong'] = [
295 ["""\
296 **strong**
297 """,
298 """\
299 <document source="test data">
300 <paragraph>
301 <strong>
302 strong
303 """],
304 [u"""\
305 l'**strong** and l\u2019**strong** with apostrophe
306 """,
307 u"""\
308 <document source="test data">
309 <paragraph>
311 <strong>
312 strong
313 and l\u2019
314 <strong>
315 strong
316 with apostrophe
317 """],
318 [u"""\
319 quoted '**strong**', quoted "**strong**",
320 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
321 quoted \xab**strong**\xbb
322 """,
323 u"""\
324 <document source="test data">
325 <paragraph>
326 quoted '
327 <strong>
328 strong
329 ', quoted "
330 <strong>
331 strong
333 quoted \u2018
334 <strong>
335 strong
336 \u2019, quoted \u201c
337 <strong>
338 strong
339 \u201d,
340 quoted \xab
341 <strong>
342 strong
343 \xbb
344 """],
345 [r"""
346 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
348 (however, '**kwargs' will trigger a warning and may be problematic)
349 """,
350 """\
351 <document source="test data">
352 <paragraph>
354 <strong>
355 strong
356 ) but not (**) or '(** ' or x**2 or **kwargs or **
357 <paragraph>
358 (however, '
359 <problematic ids="id2" refid="id1">
361 kwargs' will trigger a warning and may be problematic)
362 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
363 <paragraph>
364 Inline strong start-string without end-string.
365 """],
366 ["""\
367 Strong asterisk: *****
369 Strong double asterisk: ******
370 """,
371 """\
372 <document source="test data">
373 <paragraph>
374 Strong asterisk: \n\
375 <strong>
377 <paragraph>
378 Strong double asterisk: \n\
379 <strong>
381 """],
382 ["""\
383 **strong without closing asterisks
384 """,
385 """\
386 <document source="test data">
387 <paragraph>
388 <problematic ids="id2" refid="id1">
390 strong without closing asterisks
391 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
392 <paragraph>
393 Inline strong start-string without end-string.
394 """],
397 totest['literal'] = [
398 ["""\
399 ``literal``
400 """,
401 """\
402 <document source="test data">
403 <paragraph>
404 <literal>
405 literal
406 """],
407 [r"""
408 ``\literal``
409 """,
410 """\
411 <document source="test data">
412 <paragraph>
413 <literal>
414 \\literal
415 """],
416 [r"""
417 ``lite\ral``
418 """,
419 """\
420 <document source="test data">
421 <paragraph>
422 <literal>
423 lite\\ral
424 """],
425 [r"""
426 ``literal\``
427 """,
428 """\
429 <document source="test data">
430 <paragraph>
431 <literal>
432 literal\\
433 """],
434 [u"""\
435 l'``literal`` and l\u2019``literal`` with apostrophe
436 """,
437 u"""\
438 <document source="test data">
439 <paragraph>
441 <literal>
442 literal
443 and l\u2019
444 <literal>
445 literal
446 with apostrophe
447 """],
448 [u"""\
449 quoted '``literal``', quoted "``literal``",
450 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
451 quoted \xab``literal``\xbb
452 """,
453 u"""\
454 <document source="test data">
455 <paragraph>
456 quoted '
457 <literal>
458 literal
459 ', quoted "
460 <literal>
461 literal
463 quoted \u2018
464 <literal>
465 literal
466 \u2019, quoted \u201c
467 <literal>
468 literal
469 \u201d,
470 quoted \xab
471 <literal>
472 literal
473 \xbb
474 """],
475 [u"""\
476 ``'literal'`` with quotes, ``"literal"`` with quotes,
477 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
478 ``\xabliteral\xbb`` with quotes
479 """,
480 u"""\
481 <document source="test data">
482 <paragraph>
483 <literal>
484 'literal'
485 with quotes, \n\
486 <literal>
487 "literal"
488 with quotes,
489 <literal>
490 \u2018literal\u2019
491 with quotes, \n\
492 <literal>
493 \u201cliteral\u201d
494 with quotes,
495 <literal>
496 \xabliteral\xbb
497 with quotes
498 """],
499 [r"""
500 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
502 (however, ``standalone TeX quotes'' will trigger a warning
503 and may be problematic)
504 """,
505 """\
506 <document source="test data">
507 <paragraph>
508 <literal>
509 literal ``TeX quotes'' & \\backslash
510 but not "``" or ``
511 <paragraph>
512 (however, \n\
513 <problematic ids="id2" refid="id1">
515 standalone TeX quotes'' will trigger a warning
516 and may be problematic)
517 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
518 <paragraph>
519 Inline literal start-string without end-string.
520 """],
521 ["""\
522 Find the ```interpreted text``` in this paragraph!
523 """,
524 """\
525 <document source="test data">
526 <paragraph>
527 Find the \n\
528 <literal>
529 `interpreted text`
530 in this paragraph!
531 """],
532 ["""\
533 ``literal without closing backquotes
534 """,
535 """\
536 <document source="test data">
537 <paragraph>
538 <problematic ids="id2" refid="id1">
540 literal without closing backquotes
541 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
542 <paragraph>
543 Inline literal start-string without end-string.
544 """],
545 [r"""
546 Python ``list``\s use square bracket syntax.
547 """,
548 """\
549 <document source="test data">
550 <paragraph>
551 Python \n\
552 <literal>
553 list
554 s use square bracket syntax.
555 """],
556 [r"""
557 Blank after opening `` not allowed.
558 """,
559 """\
560 <document source="test data">
561 <paragraph>
562 Blank after opening `` not allowed.
563 """],
564 [r"""
565 no blank ``after closing``continues`` literal.
566 """,
567 """\
568 <document source="test data">
569 <paragraph>
570 no blank \n\
571 <literal>
572 after closing``continues
573 literal.
574 """],
575 [r"""
576 dot ``after closing``. is possible.
577 """,
578 """\
579 <document source="test data">
580 <paragraph>
581 dot \n\
582 <literal>
583 after closing
584 . is possible.
585 """],
588 totest['references'] = [
589 ["""\
590 ref_
591 """,
592 """\
593 <document source="test data">
594 <paragraph>
595 <reference name="ref" refname="ref">
597 """],
598 [u"""\
599 l'ref_ and l\u2019ref_ with apostrophe
600 """,
601 u"""\
602 <document source="test data">
603 <paragraph>
605 <reference name="ref" refname="ref">
607 and l\u2019
608 <reference name="ref" refname="ref">
610 with apostrophe
611 """],
612 [u"""\
613 quoted 'ref_', quoted "ref_",
614 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
615 quoted \xabref_\xbb,
616 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
617 \u201cref ref\u201d_, or \xabref ref\xbb_
618 """,
619 u"""\
620 <document source="test data">
621 <paragraph>
622 quoted '
623 <reference name="ref" refname="ref">
625 ', quoted "
626 <reference name="ref" refname="ref">
629 quoted \u2018
630 <reference name="ref" refname="ref">
632 \u2019, quoted \u201c
633 <reference name="ref" refname="ref">
635 \u201d,
636 quoted \xab
637 <reference name="ref" refname="ref">
639 \xbb,
640 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
641 \u201cref ref\u201d_, or \xabref ref\xbb_
642 """],
643 ["""\
644 ref__
645 """,
646 """\
647 <document source="test data">
648 <paragraph>
649 <reference anonymous="1" name="ref">
651 """],
652 [u"""\
653 l'ref__ and l\u2019ref__ with apostrophe
654 """,
655 u"""\
656 <document source="test data">
657 <paragraph>
659 <reference anonymous="1" name="ref">
661 and l\u2019
662 <reference anonymous="1" name="ref">
664 with apostrophe
665 """],
666 [u"""\
667 quoted 'ref__', quoted "ref__",
668 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
669 quoted \xabref__\xbb,
670 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
671 \u201cref ref\u201d__, or \xabref ref\xbb__
672 """,
673 u"""\
674 <document source="test data">
675 <paragraph>
676 quoted '
677 <reference anonymous="1" name="ref">
679 ', quoted "
680 <reference anonymous="1" name="ref">
683 quoted \u2018
684 <reference anonymous="1" name="ref">
686 \u2019, quoted \u201c
687 <reference anonymous="1" name="ref">
689 \u201d,
690 quoted \xab
691 <reference anonymous="1" name="ref">
693 \xbb,
694 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
695 \u201cref ref\u201d__, or \xabref ref\xbb__
696 """],
697 ["""\
698 ref_, r_, r_e-f_, -ref_, and anonymousref__,
699 but not _ref_ or __attr__ or object.__attr__
700 """,
701 """\
702 <document source="test data">
703 <paragraph>
704 <reference name="ref" refname="ref">
706 , \n\
707 <reference name="r" refname="r">
709 , \n\
710 <reference name="r_e-f" refname="r_e-f">
711 r_e-f
713 <reference name="ref" refname="ref">
715 , and \n\
716 <reference anonymous="1" name="anonymousref">
717 anonymousref
719 but not _ref_ or __attr__ or object.__attr__
720 """],
723 totest['phrase_references'] = [
724 ["""\
725 `phrase reference`_
726 """,
727 """\
728 <document source="test data">
729 <paragraph>
730 <reference name="phrase reference" refname="phrase reference">
731 phrase reference
732 """],
733 [u"""\
734 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
735 """,
736 u"""\
737 <document source="test data">
738 <paragraph>
740 <reference name="phrase reference" refname="phrase reference">
741 phrase reference
742 and l\u2019
743 <reference name="phrase reference" refname="phrase reference">
744 phrase reference
745 with apostrophe
746 """],
747 [u"""\
748 quoted '`phrase reference`_', quoted "`phrase reference`_",
749 quoted \u2018`phrase reference`_\u2019,
750 quoted \u201c`phrase reference`_\u201d,
751 quoted \xab`phrase reference`_\xbb
752 """,
753 u"""\
754 <document source="test data">
755 <paragraph>
756 quoted '
757 <reference name="phrase reference" refname="phrase reference">
758 phrase reference
759 ', quoted "
760 <reference name="phrase reference" refname="phrase reference">
761 phrase reference
763 quoted \u2018
764 <reference name="phrase reference" refname="phrase reference">
765 phrase reference
766 \u2019,
767 quoted \u201c
768 <reference name="phrase reference" refname="phrase reference">
769 phrase reference
770 \u201d,
771 quoted \xab
772 <reference name="phrase reference" refname="phrase reference">
773 phrase reference
774 \xbb
775 """],
776 [u"""\
777 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
778 `\u2018phrase reference\u2019`_ with quotes,
779 `\u201cphrase reference\u201d`_ with quotes,
780 `\xabphrase reference\xbb`_ with quotes
781 """,
782 u"""\
783 <document source="test data">
784 <paragraph>
785 <reference name="'phrase reference'" refname="'phrase reference'">
786 'phrase reference'
787 with quotes, \n\
788 <reference name=""phrase reference"" refname=""phrase reference"">
789 "phrase reference"
790 with quotes,
791 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
792 \u2018phrase reference\u2019
793 with quotes,
794 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
795 \u201cphrase reference\u201d
796 with quotes,
797 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
798 \xabphrase reference\xbb
799 with quotes
800 """],
801 ["""\
802 `anonymous reference`__
803 """,
804 """\
805 <document source="test data">
806 <paragraph>
807 <reference anonymous="1" name="anonymous reference">
808 anonymous reference
809 """],
810 [u"""\
811 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
812 """,
813 u"""\
814 <document source="test data">
815 <paragraph>
817 <reference anonymous="1" name="anonymous reference">
818 anonymous reference
819 and l\u2019
820 <reference anonymous="1" name="anonymous reference">
821 anonymous reference
822 with apostrophe
823 """],
824 [u"""\
825 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
826 quoted \u2018`anonymous reference`__\u2019,
827 quoted \u201c`anonymous reference`__\u201d,
828 quoted \xab`anonymous reference`__\xbb
829 """,
830 u"""\
831 <document source="test data">
832 <paragraph>
833 quoted '
834 <reference anonymous="1" name="anonymous reference">
835 anonymous reference
836 ', quoted "
837 <reference anonymous="1" name="anonymous reference">
838 anonymous reference
840 quoted \u2018
841 <reference anonymous="1" name="anonymous reference">
842 anonymous reference
843 \u2019,
844 quoted \u201c
845 <reference anonymous="1" name="anonymous reference">
846 anonymous reference
847 \u201d,
848 quoted \xab
849 <reference anonymous="1" name="anonymous reference">
850 anonymous reference
851 \xbb
852 """],
853 [u"""\
854 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
855 `\u2018anonymous reference\u2019`__ with quotes,
856 `\u201canonymous reference\u201d`__ with quotes,
857 `\xabanonymous reference\xbb`__ with quotes
858 """,
859 u"""\
860 <document source="test data">
861 <paragraph>
862 <reference anonymous="1" name="'anonymous reference'">
863 'anonymous reference'
864 with quotes, \n\
865 <reference anonymous="1" name=""anonymous reference"">
866 "anonymous reference"
867 with quotes,
868 <reference anonymous="1" name="\u2018anonymous reference\u2019">
869 \u2018anonymous reference\u2019
870 with quotes,
871 <reference anonymous="1" name="\u201canonymous reference\u201d">
872 \u201canonymous reference\u201d
873 with quotes,
874 <reference anonymous="1" name="\xabanonymous reference\xbb">
875 \xabanonymous reference\xbb
876 with quotes
877 """],
878 ["""\
879 `phrase reference
880 across lines`_
881 """,
882 """\
883 <document source="test data">
884 <paragraph>
885 <reference name="phrase reference across lines" refname="phrase reference across lines">
886 phrase reference
887 across lines
888 """],
889 ["""\
890 `phrase\`_ reference`_
891 """,
892 """\
893 <document source="test data">
894 <paragraph>
895 <reference name="phrase`_ reference" refname="phrase`_ reference">
896 phrase`_ reference
897 """],
898 ["""\
899 Invalid phrase reference:
901 :role:`phrase reference`_
902 """,
903 """\
904 <document source="test data">
905 <paragraph>
906 Invalid phrase reference:
907 <paragraph>
908 <problematic ids="id2" refid="id1">
909 :role:`phrase reference`_
910 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
911 <paragraph>
912 Mismatch: both interpreted text role prefix and reference suffix.
913 """],
914 ["""\
915 Invalid phrase reference:
917 `phrase reference`:role:_
918 """,
919 """\
920 <document source="test data">
921 <paragraph>
922 Invalid phrase reference:
923 <paragraph>
924 <problematic ids="id2" refid="id1">
925 `phrase reference`:role:_
926 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
927 <paragraph>
928 Mismatch: both interpreted text role suffix and reference suffix.
929 """],
930 ["""\
931 `phrase reference_ without closing backquote
932 """,
933 """\
934 <document source="test data">
935 <paragraph>
936 <problematic ids="id2" refid="id1">
938 phrase \n\
939 <reference name="reference" refname="reference">
940 reference
941 without closing backquote
942 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
943 <paragraph>
944 Inline interpreted text or phrase reference start-string without end-string.
945 """],
946 ["""\
947 `anonymous phrase reference__ without closing backquote
948 """,
949 """\
950 <document source="test data">
951 <paragraph>
952 <problematic ids="id2" refid="id1">
954 anonymous phrase \n\
955 <reference anonymous="1" name="reference">
956 reference
957 without closing backquote
958 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
959 <paragraph>
960 Inline interpreted text or phrase reference start-string without end-string.
961 """],
964 totest['embedded_URIs'] = [
965 ["""\
966 `phrase reference <http://example.com>`_
967 """,
968 """\
969 <document source="test data">
970 <paragraph>
971 <reference name="phrase reference" refuri="http://example.com">
972 phrase reference
973 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
974 """],
975 ["""\
976 `anonymous reference <http://example.com>`__
977 """,
978 """\
979 <document source="test data">
980 <paragraph>
981 <reference name="anonymous reference" refuri="http://example.com">
982 anonymous reference
983 """],
984 ["""\
985 `embedded URI on next line
986 <http://example.com>`__
987 """,
988 """\
989 <document source="test data">
990 <paragraph>
991 <reference name="embedded URI on next line" refuri="http://example.com">
992 embedded URI on next line
993 """],
994 ["""\
995 `embedded URI across lines <http://example.com/
996 long/path>`__
997 """,
998 """\
999 <document source="test data">
1000 <paragraph>
1001 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
1002 embedded URI across lines
1003 """],
1004 ["""\
1005 `embedded URI with whitespace <http://example.com/
1006 long/path /and /whitespace>`__
1007 """,
1008 """\
1009 <document source="test data">
1010 <paragraph>
1011 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
1012 embedded URI with whitespace
1013 """],
1014 [r"""
1015 `embedded URI with escaped whitespace <http://example.com/a\
1016 long/path\ and/some\ escaped\ whitespace>`__
1018 `<omitted\ reference\ text\ with\ escaped\ whitespace>`__
1019 """,
1020 """\
1021 <document source="test data">
1022 <paragraph>
1023 <reference name="embedded URI with escaped whitespace" refuri="http://example.com/a long/path and/some escaped whitespace">
1024 embedded URI with escaped whitespace
1025 <paragraph>
1026 <reference name="omitted reference text with escaped whitespace" refuri="omitted reference text with escaped whitespace">
1027 omitted reference text with escaped whitespace
1028 """],
1029 ["""\
1030 `embedded email address <jdoe@example.com>`__
1032 `embedded email address broken across lines <jdoe
1033 @example.com>`__
1034 """,
1035 """\
1036 <document source="test data">
1037 <paragraph>
1038 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
1039 embedded email address
1040 <paragraph>
1041 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
1042 embedded email address broken across lines
1043 """],
1044 [r"""
1045 `embedded URI with too much whitespace < http://example.com/
1046 long/path /and /whitespace >`__
1048 `embedded URI with too much whitespace at end <http://example.com/
1049 long/path /and /whitespace >`__
1051 `embedded URI with no preceding whitespace<http://example.com>`__
1053 `escaped URI \<http://example.com>`__
1055 See `HTML Anchors: \<a>`_.
1056 """,
1057 """\
1058 <document source="test data">
1059 <paragraph>
1060 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1061 embedded URI with too much whitespace < http://example.com/
1062 long/path /and /whitespace >
1063 <paragraph>
1064 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1065 embedded URI with too much whitespace at end <http://example.com/
1066 long/path /and /whitespace >
1067 <paragraph>
1068 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1069 embedded URI with no preceding whitespace<http://example.com>
1070 <paragraph>
1071 <reference anonymous="1" name="escaped URI <http://example.com>">
1072 escaped URI <http://example.com>
1073 <paragraph>
1074 See \n\
1075 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1076 HTML Anchors: <a>
1078 """],
1079 ["""\
1080 Relative URIs' reference text can be omitted:
1082 `<reference>`_
1084 `<anonymous>`__
1085 """,
1086 """\
1087 <document source="test data">
1088 <paragraph>
1089 Relative URIs' reference text can be omitted:
1090 <paragraph>
1091 <reference name="reference" refuri="reference">
1092 reference
1093 <target ids="reference" names="reference" refuri="reference">
1094 <paragraph>
1095 <reference name="anonymous" refuri="anonymous">
1096 anonymous
1097 """],
1098 [r"""
1099 Escape trailing low-line char in URIs:
1101 `<reference\_>`_
1103 `<anonymous\_>`__
1104 """,
1105 """\
1106 <document source="test data">
1107 <paragraph>
1108 Escape trailing low-line char in URIs:
1109 <paragraph>
1110 <reference name="reference_" refuri="reference_">
1111 reference_
1112 <target ids="reference" names="reference_" refuri="reference_">
1113 <paragraph>
1114 <reference name="anonymous_" refuri="anonymous_">
1115 anonymous_
1116 """],
1117 ["""\
1118 Escape other char in URIs:
1120 `<reference\:1>`_
1122 `<anonymous\\call>`__
1123 """,
1124 """\
1125 <document source="test data">
1126 <paragraph>
1127 Escape other char in URIs:
1128 <paragraph>
1129 <reference name="reference:1" refuri="reference:1">
1130 reference:1
1131 <target ids="reference-1" names="reference:1" refuri="reference:1">
1132 <paragraph>
1133 <reference name="anonymouscall" refuri="anonymouscall">
1134 anonymouscall
1135 """],
1138 totest['embedded_aliases'] = [
1139 ["""\
1140 `phrase reference <alias_>`_
1141 """,
1142 """\
1143 <document source="test data">
1144 <paragraph>
1145 <reference name="phrase reference" refname="alias">
1146 phrase reference
1147 <target names="phrase\ reference" refname="alias">
1148 """],
1149 ["""\
1150 `anonymous reference <alias_>`__
1151 """,
1152 """\
1153 <document source="test data">
1154 <paragraph>
1155 <reference name="anonymous reference" refname="alias">
1156 anonymous reference
1157 """],
1158 ["""\
1159 `embedded alias on next line
1160 <alias_>`__
1161 """,
1162 """\
1163 <document source="test data">
1164 <paragraph>
1165 <reference name="embedded alias on next line" refname="alias">
1166 embedded alias on next line
1167 """],
1168 ["""\
1169 `embedded alias across lines <alias
1170 phrase_>`__
1171 """,
1172 """\
1173 <document source="test data">
1174 <paragraph>
1175 <reference name="embedded alias across lines" refname="alias phrase">
1176 embedded alias across lines
1177 """],
1178 ["""\
1179 `embedded alias with whitespace <alias
1180 long phrase_>`__
1181 """,
1182 """\
1183 <document source="test data">
1184 <paragraph>
1185 <reference name="embedded alias with whitespace" refname="alias long phrase">
1186 embedded alias with whitespace
1187 """],
1188 ["""\
1189 `<embedded alias with whitespace_>`__
1190 """,
1191 """\
1192 <document source="test data">
1193 <paragraph>
1194 <reference name="embedded alias with whitespace" refname="embedded alias with whitespace">
1195 embedded alias with whitespace
1196 """],
1197 [r"""
1198 `no embedded alias (whitespace inside bracket) < alias_ >`__
1200 `no embedded alias (no preceding whitespace)<alias_>`__
1201 """,
1202 """\
1203 <document source="test data">
1204 <paragraph>
1205 <reference anonymous="1" name="no embedded alias (whitespace inside bracket) < alias_ >">
1206 no embedded alias (whitespace inside bracket) < alias_ >
1207 <paragraph>
1208 <reference anonymous="1" name="no embedded alias (no preceding whitespace)<alias_>">
1209 no embedded alias (no preceding whitespace)<alias_>
1210 """],
1211 [r"""
1212 `anonymous reference <alias\ with\\ escaped \:characters_>`__
1213 """,
1214 """\
1215 <document source="test data">
1216 <paragraph>
1217 <reference name="anonymous reference" refname="aliaswith\ escaped :characters">
1218 anonymous reference
1219 """],
1220 [r"""
1221 `anonymous reference <alias\ with\\ escaped \:characters_>`__
1222 """,
1223 """\
1224 <document source="test data">
1225 <paragraph>
1226 <reference name="anonymous reference" refname="aliaswith\ escaped :characters">
1227 anonymous reference
1228 """],
1231 totest['inline_targets'] = [
1232 ["""\
1233 _`target`
1235 Here is _`another target` in some text. And _`yet
1236 another target`, spanning lines.
1238 _`Here is a TaRgeT` with case and spacial difficulties.
1239 """,
1240 """\
1241 <document source="test data">
1242 <paragraph>
1243 <target ids="target" names="target">
1244 target
1245 <paragraph>
1246 Here is \n\
1247 <target ids="another-target" names="another\ target">
1248 another target
1249 in some text. And \n\
1250 <target ids="yet-another-target" names="yet\ another\ target">
1252 another target
1253 , spanning lines.
1254 <paragraph>
1255 <target ids="here-is-a-target" names="here\ is\ a\ target">
1256 Here is a TaRgeT
1257 with case and spacial difficulties.
1258 """],
1259 [u"""\
1260 l'_`target1` and l\u2019_`target2` with apostrophe
1261 """,
1262 u"""\
1263 <document source="test data">
1264 <paragraph>
1266 <target ids="target1" names="target1">
1267 target1
1268 and l\u2019
1269 <target ids="target2" names="target2">
1270 target2
1271 with apostrophe
1272 """],
1273 [u"""\
1274 quoted '_`target1`', quoted "_`target2`",
1275 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1276 quoted \xab_`target5`\xbb
1277 """,
1278 u"""\
1279 <document source="test data">
1280 <paragraph>
1281 quoted '
1282 <target ids="target1" names="target1">
1283 target1
1284 ', quoted "
1285 <target ids="target2" names="target2">
1286 target2
1288 quoted \u2018
1289 <target ids="target3" names="target3">
1290 target3
1291 \u2019, quoted \u201c
1292 <target ids="target4" names="target4">
1293 target4
1294 \u201d,
1295 quoted \xab
1296 <target ids="target5" names="target5">
1297 target5
1298 \xbb
1299 """],
1300 [u"""\
1301 _`'target1'` with quotes, _`"target2"` with quotes,
1302 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1303 _`\xabtarget5\xbb` with quotes
1304 """,
1305 u"""\
1306 <document source="test data">
1307 <paragraph>
1308 <target ids="target1" names="'target1'">
1309 'target1'
1310 with quotes, \n\
1311 <target ids="target2" names=""target2"">
1312 "target2"
1313 with quotes,
1314 <target ids="target3" names="\u2018target3\u2019">
1315 \u2018target3\u2019
1316 with quotes, \n\
1317 <target ids="target4" names="\u201ctarget4\u201d">
1318 \u201ctarget4\u201d
1319 with quotes,
1320 <target ids="target5" names="\xabtarget5\xbb">
1321 \xabtarget5\xbb
1322 with quotes
1323 """],
1324 ["""\
1325 But this isn't a _target; targets require backquotes.
1327 And _`this`_ is just plain confusing.
1328 """,
1329 """\
1330 <document source="test data">
1331 <paragraph>
1332 But this isn't a _target; targets require backquotes.
1333 <paragraph>
1334 And \n\
1335 <problematic ids="id2" refid="id1">
1337 this`_ is just plain confusing.
1338 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1339 <paragraph>
1340 Inline target start-string without end-string.
1341 """],
1342 ["""\
1343 _`inline target without closing backquote
1344 """,
1345 """\
1346 <document source="test data">
1347 <paragraph>
1348 <problematic ids="id2" refid="id1">
1350 inline target without closing backquote
1351 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1352 <paragraph>
1353 Inline target start-string without end-string.
1354 """],
1357 totest['footnote_reference'] = [
1358 ["""\
1359 [1]_
1360 """,
1361 """\
1362 <document source="test data">
1363 <paragraph>
1364 <footnote_reference ids="id1" refname="1">
1366 """],
1367 ["""\
1368 [#]_
1369 """,
1370 """\
1371 <document source="test data">
1372 <paragraph>
1373 <footnote_reference auto="1" ids="id1">
1374 """],
1375 ["""\
1376 [#label]_
1377 """,
1378 """\
1379 <document source="test data">
1380 <paragraph>
1381 <footnote_reference auto="1" ids="id1" refname="label">
1382 """],
1383 ["""\
1384 [*]_
1385 """,
1386 """\
1387 <document source="test data">
1388 <paragraph>
1389 <footnote_reference auto="*" ids="id1">
1390 """],
1391 ["""\
1392 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1393 """,
1394 """\
1395 <document source="test data">
1396 <paragraph>
1397 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1398 """],
1401 totest['citation_reference'] = [
1402 ["""\
1403 [citation]_
1404 """,
1405 """\
1406 <document source="test data">
1407 <paragraph>
1408 <citation_reference ids="id1" refname="citation">
1409 citation
1410 """],
1411 ["""\
1412 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1413 """,
1414 """\
1415 <document source="test data">
1416 <paragraph>
1417 <citation_reference ids="id1" refname="citation">
1418 citation
1419 and \n\
1420 <citation_reference ids="id2" refname="cit-ation">
1421 cit-ation
1422 and \n\
1423 <citation_reference ids="id3" refname="cit.ation">
1424 cit.ation
1425 and \n\
1426 <citation_reference ids="id4" refname="cit1">
1427 CIT1
1428 but not [CIT 1]_
1429 """],
1430 ["""\
1431 Adjacent citation refs are not possible: [citation]_[CIT1]_
1432 """,
1433 """\
1434 <document source="test data">
1435 <paragraph>
1436 Adjacent citation refs are not possible: [citation]_[CIT1]_
1437 """],
1440 totest['substitution_references'] = [
1441 ["""\
1442 |subref|
1443 """,
1444 """\
1445 <document source="test data">
1446 <paragraph>
1447 <substitution_reference refname="subref">
1448 subref
1449 """],
1450 ["""\
1451 |subref|_ and |subref|__
1452 """,
1453 """\
1454 <document source="test data">
1455 <paragraph>
1456 <reference refname="subref">
1457 <substitution_reference refname="subref">
1458 subref
1459 and \n\
1460 <reference anonymous="1">
1461 <substitution_reference refname="subref">
1462 subref
1463 """],
1464 ["""\
1465 |substitution reference|
1466 """,
1467 """\
1468 <document source="test data">
1469 <paragraph>
1470 <substitution_reference refname="substitution reference">
1471 substitution reference
1472 """],
1473 ["""\
1474 |substitution
1475 reference|
1476 """,
1477 """\
1478 <document source="test data">
1479 <paragraph>
1480 <substitution_reference refname="substitution reference">
1481 substitution
1482 reference
1483 """],
1484 ["""\
1485 |substitution reference without closing verbar
1486 """,
1487 """\
1488 <document source="test data">
1489 <paragraph>
1490 <problematic ids="id2" refid="id1">
1492 substitution reference without closing verbar
1493 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1494 <paragraph>
1495 Inline substitution_reference start-string without end-string.
1496 """],
1497 ["""\
1498 first | then || and finally |||
1499 """,
1500 """\
1501 <document source="test data">
1502 <paragraph>
1503 first | then || and finally |||
1504 """],
1507 totest['standalone_hyperlink'] = [
1508 ["""\
1509 http://www.standalone.hyperlink.com
1511 http:/one-slash-only.absolute.path
1513 [http://example.com]
1515 (http://example.com)
1517 <http://example.com>
1519 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1521 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1523 http://[3ffe:2a00:100:7031::1]/
1525 mailto:someone@somewhere.com
1527 news:comp.lang.python
1529 An email address in a sentence: someone@somewhere.com.
1531 ftp://ends.with.a.period.
1533 (a.question.mark@end?)
1534 """,
1535 """\
1536 <document source="test data">
1537 <paragraph>
1538 <reference refuri="http://www.standalone.hyperlink.com">
1539 http://www.standalone.hyperlink.com
1540 <paragraph>
1541 <reference refuri="http:/one-slash-only.absolute.path">
1542 http:/one-slash-only.absolute.path
1543 <paragraph>
1545 <reference refuri="http://example.com">
1546 http://example.com
1548 <paragraph>
1550 <reference refuri="http://example.com">
1551 http://example.com
1553 <paragraph>
1555 <reference refuri="http://example.com">
1556 http://example.com
1558 <paragraph>
1559 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1560 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1561 <paragraph>
1562 <reference refuri="http://[3ffe:2a00:100:7031::1">
1563 http://[3ffe:2a00:100:7031::1
1564 ] (the final "]" is ambiguous in text)
1565 <paragraph>
1566 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1567 http://[3ffe:2a00:100:7031::1]/
1568 <paragraph>
1569 <reference refuri="mailto:someone@somewhere.com">
1570 mailto:someone@somewhere.com
1571 <paragraph>
1572 <reference refuri="news:comp.lang.python">
1573 news:comp.lang.python
1574 <paragraph>
1575 An email address in a sentence: \n\
1576 <reference refuri="mailto:someone@somewhere.com">
1577 someone@somewhere.com
1579 <paragraph>
1580 <reference refuri="ftp://ends.with.a.period">
1581 ftp://ends.with.a.period
1583 <paragraph>
1585 <reference refuri="mailto:a.question.mark@end">
1586 a.question.mark@end
1588 """],
1589 [r"""
1590 Valid URLs with escaped markup characters:
1592 http://example.com/\*content\*/whatever
1594 http://example.com/\*content*/whatever
1595 """,
1596 """\
1597 <document source="test data">
1598 <paragraph>
1599 Valid URLs with escaped markup characters:
1600 <paragraph>
1601 <reference refuri="http://example.com/*content*/whatever">
1602 http://example.com/*content*/whatever
1603 <paragraph>
1604 <reference refuri="http://example.com/*content*/whatever">
1605 http://example.com/*content*/whatever
1606 """],
1607 ["""\
1608 Valid URLs may end with punctuation inside "<>":
1610 <http://example.org/ends-with-dot.>
1611 """,
1612 """\
1613 <document source="test data">
1614 <paragraph>
1615 Valid URLs may end with punctuation inside "<>":
1616 <paragraph>
1618 <reference refuri="http://example.org/ends-with-dot.">
1619 http://example.org/ends-with-dot.
1621 """],
1622 ["""\
1623 Valid URLs with interesting endings:
1625 http://example.org/ends-with-pluses++
1626 """,
1627 """\
1628 <document source="test data">
1629 <paragraph>
1630 Valid URLs with interesting endings:
1631 <paragraph>
1632 <reference refuri="http://example.org/ends-with-pluses++">
1633 http://example.org/ends-with-pluses++
1634 """],
1635 ["""\
1636 None of these are standalone hyperlinks (their "schemes"
1637 are not recognized): signal:noise, a:b.
1638 """,
1639 """\
1640 <document source="test data">
1641 <paragraph>
1642 None of these are standalone hyperlinks (their "schemes"
1643 are not recognized): signal:noise, a:b.
1644 """],
1645 ["""\
1646 Escaped email addresses are not recognized: test\@example.org
1647 """,
1648 """\
1649 <document source="test data">
1650 <paragraph>
1651 Escaped email addresses are not recognized: test@example.org
1652 """],
1655 totest['markup recognition rules'] = [
1656 ["""\
1657 __This__ should be left alone.
1658 """,
1659 """\
1660 <document source="test data">
1661 <paragraph>
1662 __This__ should be left alone.
1663 """],
1664 [r"""
1665 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1666 with backslash-escaped whitespace, including new\
1667 lines.
1668 """,
1669 """\
1670 <document source="test data">
1671 <paragraph>
1672 Character-level m
1673 <emphasis>
1675 <strong>
1677 <literal>
1679 <title_reference>
1682 with backslash-escaped whitespace, including newlines.
1683 """],
1684 [u"""\
1685 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1686 \u00bf*punctuation*? \u00a1*examples*!\u00a0*no-break-space*\u00a0.
1687 """,
1688 u"""\
1689 <document source="test data">
1690 <paragraph>
1691 text-
1692 <emphasis>
1693 separated
1694 \u2010
1695 <emphasis>
1697 \u2011
1698 <emphasis>
1699 various
1700 \u2012
1701 <emphasis>
1702 dashes
1703 \u2013
1704 <emphasis>
1706 \u2014
1707 <emphasis>
1708 hyphens
1710 \xbf
1711 <emphasis>
1712 punctuation
1713 ? \xa1
1714 <emphasis>
1715 examples
1716 !\xa0
1717 <emphasis>
1718 no-break-space
1719 \u00a0.
1720 """],
1721 # Whitespace characters:
1722 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.6
1723 [u"""\
1724 text separated by
1725 *newline*
1726 or *space* or one of
1727 \xa0*NO-BREAK SPACE*\xa0,
1728 \u1680*OGHAM SPACE MARK*\u1680,
1729 \u2000*EN QUAD*\u2000,
1730 \u2001*EM QUAD*\u2001,
1731 \u2002*EN SPACE*\u2002,
1732 \u2003*EM SPACE*\u2003,
1733 \u2004*THREE-PER-EM SPACE*\u2004,
1734 \u2005*FOUR-PER-EM SPACE*\u2005,
1735 \u2006*SIX-PER-EM SPACE*\u2006,
1736 \u2007*FIGURE SPACE*\u2007,
1737 \u2008*PUNCTUATION SPACE*\u2008,
1738 \u2009*THIN SPACE*\u2009,
1739 \u200a*HAIR SPACE*\u200a,
1740 \u202f*NARROW NO-BREAK SPACE*\u202f,
1741 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1742 \u3000*IDEOGRAPHIC SPACE*\u3000,
1743 \u2028*LINE SEPARATOR*\u2028
1744 """,
1745 u"""\
1746 <document source="test data">
1747 <paragraph>
1748 text separated by
1749 <emphasis>
1750 newline
1752 or \n\
1753 <emphasis>
1754 space
1755 or one of
1756 \xa0
1757 <emphasis>
1758 NO-BREAK SPACE
1759 \xa0,
1760 \u1680
1761 <emphasis>
1762 OGHAM SPACE MARK
1763 \u1680,
1764 \u2000
1765 <emphasis>
1766 EN QUAD
1767 \u2000,
1768 \u2001
1769 <emphasis>
1770 EM QUAD
1771 \u2001,
1772 \u2002
1773 <emphasis>
1774 EN SPACE
1775 \u2002,
1776 \u2003
1777 <emphasis>
1778 EM SPACE
1779 \u2003,
1780 \u2004
1781 <emphasis>
1782 THREE-PER-EM SPACE
1783 \u2004,
1784 \u2005
1785 <emphasis>
1786 FOUR-PER-EM SPACE
1787 \u2005,
1788 \u2006
1789 <emphasis>
1790 SIX-PER-EM SPACE
1791 \u2006,
1792 \u2007
1793 <emphasis>
1794 FIGURE SPACE
1795 \u2007,
1796 \u2008
1797 <emphasis>
1798 PUNCTUATION SPACE
1799 \u2008,
1800 \u2009
1801 <emphasis>
1802 THIN SPACE
1803 \u2009,
1804 \u200a
1805 <emphasis>
1806 HAIR SPACE
1807 \u200a,
1808 \u202f
1809 <emphasis>
1810 NARROW NO-BREAK SPACE
1811 \u202f,
1812 \u205f
1813 <emphasis>
1814 MEDIUM MATHEMATICAL SPACE
1815 \u205f,
1816 \u3000
1817 <emphasis>
1818 IDEOGRAPHIC SPACE
1819 \u3000,
1820 <paragraph>
1821 <emphasis>
1822 LINE SEPARATOR
1823 """],
1824 [u"""\
1825 inline markup separated by non-ASCII whitespace
1826 \xa0**NO-BREAK SPACE**\xa0, \xa0``NO-BREAK SPACE``\xa0, \xa0`NO-BREAK SPACE`\xa0,
1827 \u2000**EN QUAD**\u2000, \u2000``EN QUAD``\u2000, \u2000`EN QUAD`\u2000,
1828 \u202f**NARROW NBSP**\u202f, \u202f``NARROW NBSP``\u202f, \u202f`NARROW NBSP`\u202f,
1829 """,
1830 u"""\
1831 <document source="test data">
1832 <paragraph>
1833 inline markup separated by non-ASCII whitespace
1834 \xa0
1835 <strong>
1836 NO-BREAK SPACE
1837 \xa0, \xa0
1838 <literal>
1839 NO-BREAK SPACE
1840 \xa0, \xa0
1841 <title_reference>
1842 NO-BREAK SPACE
1843 \xa0,
1844 \u2000
1845 <strong>
1846 EN QUAD
1847 \u2000, \u2000
1848 <literal>
1849 EN QUAD
1850 \u2000, \u2000
1851 <title_reference>
1852 EN QUAD
1853 \u2000,
1854 \u202f
1855 <strong>
1856 NARROW NBSP
1857 \u202f, \u202f
1858 <literal>
1859 NARROW NBSP
1860 \u202f, \u202f
1861 <title_reference>
1862 NARROW NBSP
1863 \u202f,
1864 """],
1865 [u"""\
1866 no inline markup due to whitespace inside and behind: *
1867 newline
1869 * space * or one of
1870 *\xa0NO-BREAK SPACE\xa0*
1871 *\u1680OGHAM SPACE MARK\u1680*
1872 *\u2000EN QUAD\u2000*
1873 *\u2001EM QUAD\u2001*
1874 *\u2002EN SPACE\u2002*
1875 *\u2003EM SPACE\u2003*
1876 *\u2004THREE-PER-EM SPACE\u2004*
1877 *\u2005FOUR-PER-EM SPACE\u2005*
1878 *\u2006SIX-PER-EM SPACE\u2006*
1879 *\u2007FIGURE SPACE\u2007*
1880 *\u2008PUNCTUATION SPACE\u2008*
1881 *\u2009THIN SPACE\u2009*
1882 *\u200aHAIR SPACE\u200a*
1883 *\u202fNARROW NO-BREAK SPACE\u202f*
1884 *\u205fMEDIUM MATHEMATICAL SPACE\u205f*
1885 *\u3000IDEOGRAPHIC SPACE\u3000*
1886 *\u2028LINE SEPARATOR\u2028*
1887 """,
1888 u"""\
1889 <document source="test data">
1890 <paragraph>
1891 no inline markup due to whitespace inside and behind: *
1892 newline
1894 * space * or one of
1895 *\xa0NO-BREAK SPACE\xa0*
1896 *\u1680OGHAM SPACE MARK\u1680*
1897 *\u2000EN QUAD\u2000*
1898 *\u2001EM QUAD\u2001*
1899 *\u2002EN SPACE\u2002*
1900 *\u2003EM SPACE\u2003*
1901 *\u2004THREE-PER-EM SPACE\u2004*
1902 *\u2005FOUR-PER-EM SPACE\u2005*
1903 *\u2006SIX-PER-EM SPACE\u2006*
1904 *\u2007FIGURE SPACE\u2007*
1905 *\u2008PUNCTUATION SPACE\u2008*
1906 *\u2009THIN SPACE\u2009*
1907 *\u200aHAIR SPACE\u200a*
1908 *\u202fNARROW NO-BREAK SPACE\u202f*
1909 *\u205fMEDIUM MATHEMATICAL SPACE\u205f*
1910 *\u3000IDEOGRAPHIC SPACE\u3000*
1912 LINE SEPARATOR
1913 *"""],
1914 [u"""\
1915 no inline markup because of non-ASCII whitespace following /preceding the markup
1916 **\xa0NO-BREAK SPACE\xa0** ``\xa0NO-BREAK SPACE\xa0`` `\xa0NO-BREAK SPACE\xa0`
1917 **\u2000EN QUAD\u2000** ``\u2000EN QUAD\u2000`` `\u2000EN QUAD\u2000`
1918 **\u202fNARROW NBSP\u202f** ``\u202fNARROW NBSP\u202f`` `\u202fNARROW NBSP\u202f`
1919 """,
1920 u"""\
1921 <document source="test data">
1922 <paragraph>
1923 no inline markup because of non-ASCII whitespace following /preceding the markup
1924 **\xa0NO-BREAK SPACE\xa0** ``\xa0NO-BREAK SPACE\xa0`` `\xa0NO-BREAK SPACE\xa0`
1925 **\u2000EN QUAD\u2000** ``\u2000EN QUAD\u2000`` `\u2000EN QUAD\u2000`
1926 **\u202fNARROW NBSP\u202f** ``\u202fNARROW NBSP\u202f`` `\u202fNARROW NBSP\u202f`\
1927 """],
1928 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1929 [u"""\
1930 "Quoted" markup start-string (matched openers & closers) -> no markup:
1932 '*' "*" (*) <*> [*] {*}
1933 ⁅*⁆
1935 Some international quoting styles:
1936 ‘*’ “*” English, ...,
1937 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1938 „*” «*» Romanian,
1939 “*„ ‘*‚ Greek,
1940 「*」 『*』traditional Chinese,
1941 ”*” ’*’ »*» ›*› Swedish, Finnish,
1942 „*” ‚*’ Polish,
1943 „*” »*« ’*’ Hungarian,
1945 But this is „*’ emphasized »*‹.
1946 """,
1947 u"""\
1948 <document source="test data">
1949 <paragraph>
1950 "Quoted" markup start-string (matched openers & closers) -> no markup:
1951 <paragraph>
1952 '*' "*" (*) <*> [*] {*}
1953 ⁅*⁆
1954 <paragraph>
1955 Some international quoting styles:
1956 ‘*’ “*” English, ...,
1957 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1958 „*” «*» Romanian,
1959 “*„ ‘*‚ Greek,
1960 「*」 『*』traditional Chinese,
1961 ”*” ’*’ »*» ›*› Swedish, Finnish,
1962 „*” ‚*’ Polish,
1963 „*” »*« ’*’ Hungarian,
1964 <paragraph>
1965 But this is „
1966 <emphasis>
1967 ’ emphasized »
1968 ‹.
1969 """],
1973 if __name__ == '__main__':
1974 import unittest
1975 unittest.main(defaultTest='suite')