Fix [ 3402314 ] non-ASCII whitespace and punctuation around inline markup.
[docutils.git] / test / test_parsers / test_rst / test_inline_markup.py
blobb758800cf0dbed1425d3eea84b743d32bf761350
1 #! /usr/bin/env python
2 # -*- coding: utf8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
11 """
13 from __init__ import DocutilsTestSupport
15 def suite():
16 s = DocutilsTestSupport.ParserTestSuite()
17 s.generateTests(totest)
18 return s
20 totest = {}
22 totest['emphasis'] = [
23 ["""\
24 *emphasis*
25 """,
26 """\
27 <document source="test data">
28 <paragraph>
29 <emphasis>
30 emphasis
31 """],
32 [u"""\
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
35 """,
36 u"""\
37 <document source="test data">
38 <paragraph>
39 l\'
40 <emphasis>
41 emphasis
42 with the \n\
43 <emphasis>
44 emphasis
45 \' apostrophe.
46 l\u2019
47 <emphasis>
48 emphasis
49 with the \n\
50 <emphasis>
51 emphasis
52 \u2019 apostrophe.
53 """],
54 ["""\
55 *emphasized sentence
56 across lines*
57 """,
58 """\
59 <document source="test data">
60 <paragraph>
61 <emphasis>
62 emphasized sentence
63 across lines
64 """],
65 ["""\
66 *emphasis without closing asterisk
67 """,
68 """\
69 <document source="test data">
70 <paragraph>
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
75 <paragraph>
76 Inline emphasis start-string without end-string.
77 """],
78 [r"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
82 but not
83 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
84 (*), [*], '*' or '"*"' ("quoted" start-string),
85 x*2* or 2*x* (alphanumeric char before),
86 \*args or * (escaped, whitespace behind start-string)
87 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
89 However, '*args' will trigger a warning and may be problematic.
91 what about *this**?
92 """,
93 """\
94 <document source="test data">
95 <paragraph>
96 some punctuation is allowed around inline markup, e.g.
98 <emphasis>
99 emphasis
100 /, -
101 <emphasis>
102 emphasis
103 -, and :
104 <emphasis>
105 emphasis
106 : (delimiters),
108 <emphasis>
109 emphasis
110 ), [
111 <emphasis>
112 emphasis
113 ], <
114 <emphasis>
115 emphasis
116 >, {
117 <emphasis>
118 emphasis
119 } (open/close pairs)
120 <paragraph>
121 but not
122 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
123 (*), [*], '*' or '"*"' ("quoted" start-string),
124 x*2* or 2*x* (alphanumeric char before),
125 *args or * (escaped, whitespace behind start-string)
126 or \n\
127 <emphasis>
128 the* *stars* *inside
129 (escaped, whitespace before end-string).
130 <paragraph>
131 However, '
132 <problematic ids="id2" refid="id1">
134 args' will trigger a warning and may be problematic.
135 <system_message backrefs="id2" ids="id1" level="2" line="12" source="test data" type="WARNING">
136 <paragraph>
137 Inline emphasis start-string without end-string.
138 <paragraph>
139 what about \n\
140 <emphasis>
141 this*
143 """],
144 [u"""\
145 Quotes around inline markup:
147 '*emphasis*' "*emphasis*" Straight,
148 ‘*emphasis*’ “*emphasis*” English, ...,
149 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
150 « *emphasis* » ‹ *emphasis* › French,
151 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
152 „*emphasis*” «*emphasis*» Romanian,
153 “*emphasis*„ ‘*emphasis*‚ Greek,
154 「*emphasis*」 『*emphasis*』traditional Chinese,
155 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
156 „*emphasis*” ‚*emphasis*’ Polish,
157 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
158 """,
159 u"""\
160 <document source="test data">
161 <paragraph>
162 Quotes around inline markup:
163 <paragraph>
165 <emphasis>
166 emphasis
167 \' "
168 <emphasis>
169 emphasis
170 " Straight,
171 \u2018
172 <emphasis>
173 emphasis
174 \u2019 \u201c
175 <emphasis>
176 emphasis
177 \u201d English, ...,
178 \xab\u202f
179 <emphasis>
180 emphasis
181 \u202f\xbb \u2039\u202f
182 <emphasis>
183 emphasis
184 \u202f\u203a \xab\xa0
185 <emphasis>
186 emphasis
187 \xa0\xbb \u2039\xa0
188 <emphasis>
189 emphasis
190 \xa0\u203a
191 \xab\u2005
192 <emphasis>
193 emphasis
194 \u2005\xbb \u2039\u2005
195 <emphasis>
196 emphasis
197 \u2005\u203a French,
198 \u201e
199 <emphasis>
200 emphasis
201 \u201c \u201a
202 <emphasis>
203 emphasis
204 \u2018 \xbb
205 <emphasis>
206 emphasis
207 \xab \u203a
208 <emphasis>
209 emphasis
210 \u2039 German, Czech, ...,
211 \u201e
212 <emphasis>
213 emphasis
214 \u201d \xab
215 <emphasis>
216 emphasis
217 \xbb Romanian,
218 \u201c
219 <emphasis>
220 emphasis
221 \u201e \u2018
222 <emphasis>
223 emphasis
224 \u201a Greek,
225 \u300c
226 <emphasis>
227 emphasis
228 \u300d \u300e
229 <emphasis>
230 emphasis
231 \u300ftraditional Chinese,
232 \u201d
233 <emphasis>
234 emphasis
235 \u201d \u2019
236 <emphasis>
237 emphasis
238 \u2019 \xbb
239 <emphasis>
240 emphasis
241 \xbb \u203a
242 <emphasis>
243 emphasis
244 \u203a Swedish, Finnish,
245 \u201e
246 <emphasis>
247 emphasis
248 \u201d \u201a
249 <emphasis>
250 emphasis
251 \u2019 Polish,
252 \u201e
253 <emphasis>
254 emphasis
255 \u201d \xbb
256 <emphasis>
257 emphasis
258 \xab \u2019
259 <emphasis>
260 emphasis
261 \u2019 Hungarian,
262 """],
263 [r"""
264 Emphasized asterisk: *\**
266 Emphasized double asterisk: *\***
267 """,
268 """\
269 <document source="test data">
270 <paragraph>
271 Emphasized asterisk: \n\
272 <emphasis>
274 <paragraph>
275 Emphasized double asterisk: \n\
276 <emphasis>
278 """],
281 totest['strong'] = [
282 ["""\
283 **strong**
284 """,
285 """\
286 <document source="test data">
287 <paragraph>
288 <strong>
289 strong
290 """],
291 [u"""\
292 l'**strong** and l\u2019**strong** with apostrophe
293 """,
294 u"""\
295 <document source="test data">
296 <paragraph>
298 <strong>
299 strong
300 and l\u2019
301 <strong>
302 strong
303 with apostrophe
304 """],
305 [u"""\
306 quoted '**strong**', quoted "**strong**",
307 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
308 quoted \xab**strong**\xbb
309 """,
310 u"""\
311 <document source="test data">
312 <paragraph>
313 quoted '
314 <strong>
315 strong
316 ', quoted "
317 <strong>
318 strong
320 quoted \u2018
321 <strong>
322 strong
323 \u2019, quoted \u201c
324 <strong>
325 strong
326 \u201d,
327 quoted \xab
328 <strong>
329 strong
330 \xbb
331 """],
332 [r"""
333 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
335 (however, '**kwargs' will trigger a warning and may be problematic)
336 """,
337 """\
338 <document source="test data">
339 <paragraph>
341 <strong>
342 strong
343 ) but not (**) or '(** ' or x**2 or **kwargs or **
344 <paragraph>
345 (however, '
346 <problematic ids="id2" refid="id1">
348 kwargs' will trigger a warning and may be problematic)
349 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
350 <paragraph>
351 Inline strong start-string without end-string.
352 """],
353 ["""\
354 Strong asterisk: *****
356 Strong double asterisk: ******
357 """,
358 """\
359 <document source="test data">
360 <paragraph>
361 Strong asterisk: \n\
362 <strong>
364 <paragraph>
365 Strong double asterisk: \n\
366 <strong>
368 """],
369 ["""\
370 **strong without closing asterisks
371 """,
372 """\
373 <document source="test data">
374 <paragraph>
375 <problematic ids="id2" refid="id1">
377 strong without closing asterisks
378 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
379 <paragraph>
380 Inline strong start-string without end-string.
381 """],
384 totest['literal'] = [
385 ["""\
386 ``literal``
387 """,
388 """\
389 <document source="test data">
390 <paragraph>
391 <literal>
392 literal
393 """],
394 [r"""
395 ``\literal``
396 """,
397 """\
398 <document source="test data">
399 <paragraph>
400 <literal>
401 \\literal
402 """],
403 [r"""
404 ``lite\ral``
405 """,
406 """\
407 <document source="test data">
408 <paragraph>
409 <literal>
410 lite\\ral
411 """],
412 [r"""
413 ``literal\``
414 """,
415 """\
416 <document source="test data">
417 <paragraph>
418 <literal>
419 literal\\
420 """],
421 [u"""\
422 l'``literal`` and l\u2019``literal`` with apostrophe
423 """,
424 u"""\
425 <document source="test data">
426 <paragraph>
428 <literal>
429 literal
430 and l\u2019
431 <literal>
432 literal
433 with apostrophe
434 """],
435 [u"""\
436 quoted '``literal``', quoted "``literal``",
437 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
438 quoted \xab``literal``\xbb
439 """,
440 u"""\
441 <document source="test data">
442 <paragraph>
443 quoted '
444 <literal>
445 literal
446 ', quoted "
447 <literal>
448 literal
450 quoted \u2018
451 <literal>
452 literal
453 \u2019, quoted \u201c
454 <literal>
455 literal
456 \u201d,
457 quoted \xab
458 <literal>
459 literal
460 \xbb
461 """],
462 [u"""\
463 ``'literal'`` with quotes, ``"literal"`` with quotes,
464 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
465 ``\xabliteral\xbb`` with quotes
466 """,
467 u"""\
468 <document source="test data">
469 <paragraph>
470 <literal>
471 'literal'
472 with quotes, \n\
473 <literal>
474 "literal"
475 with quotes,
476 <literal>
477 \u2018literal\u2019
478 with quotes, \n\
479 <literal>
480 \u201cliteral\u201d
481 with quotes,
482 <literal>
483 \xabliteral\xbb
484 with quotes
485 """],
486 [r"""
487 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
489 (however, ``standalone TeX quotes'' will trigger a warning
490 and may be problematic)
491 """,
492 """\
493 <document source="test data">
494 <paragraph>
495 <literal>
496 literal ``TeX quotes'' & \\backslash
497 but not "``" or ``
498 <paragraph>
499 (however, \n\
500 <problematic ids="id2" refid="id1">
502 standalone TeX quotes'' will trigger a warning
503 and may be problematic)
504 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
505 <paragraph>
506 Inline literal start-string without end-string.
507 """],
508 ["""\
509 Find the ```interpreted text``` in this paragraph!
510 """,
511 """\
512 <document source="test data">
513 <paragraph>
514 Find the \n\
515 <literal>
516 `interpreted text`
517 in this paragraph!
518 """],
519 ["""\
520 ``literal without closing backquotes
521 """,
522 """\
523 <document source="test data">
524 <paragraph>
525 <problematic ids="id2" refid="id1">
527 literal without closing backquotes
528 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
529 <paragraph>
530 Inline literal start-string without end-string.
531 """],
532 [r"""
533 Python ``list``\s use square bracket syntax.
534 """,
535 """\
536 <document source="test data">
537 <paragraph>
538 Python \n\
539 <literal>
540 list
541 s use square bracket syntax.
542 """],
545 totest['references'] = [
546 ["""\
547 ref_
548 """,
549 """\
550 <document source="test data">
551 <paragraph>
552 <reference name="ref" refname="ref">
554 """],
555 [u"""\
556 l'ref_ and l\u2019ref_ with apostrophe
557 """,
558 u"""\
559 <document source="test data">
560 <paragraph>
562 <reference name="ref" refname="ref">
564 and l\u2019
565 <reference name="ref" refname="ref">
567 with apostrophe
568 """],
569 [u"""\
570 quoted 'ref_', quoted "ref_",
571 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
572 quoted \xabref_\xbb,
573 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
574 \u201cref ref\u201d_, or \xabref ref\xbb_
575 """,
576 u"""\
577 <document source="test data">
578 <paragraph>
579 quoted '
580 <reference name="ref" refname="ref">
582 ', quoted "
583 <reference name="ref" refname="ref">
586 quoted \u2018
587 <reference name="ref" refname="ref">
589 \u2019, quoted \u201c
590 <reference name="ref" refname="ref">
592 \u201d,
593 quoted \xab
594 <reference name="ref" refname="ref">
596 \xbb,
597 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
598 \u201cref ref\u201d_, or \xabref ref\xbb_
599 """],
600 ["""\
601 ref__
602 """,
603 """\
604 <document source="test data">
605 <paragraph>
606 <reference anonymous="1" name="ref">
608 """],
609 [u"""\
610 l'ref__ and l\u2019ref__ with apostrophe
611 """,
612 u"""\
613 <document source="test data">
614 <paragraph>
616 <reference anonymous="1" name="ref">
618 and l\u2019
619 <reference anonymous="1" name="ref">
621 with apostrophe
622 """],
623 [u"""\
624 quoted 'ref__', quoted "ref__",
625 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
626 quoted \xabref__\xbb,
627 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
628 \u201cref ref\u201d__, or \xabref ref\xbb__
629 """,
630 u"""\
631 <document source="test data">
632 <paragraph>
633 quoted '
634 <reference anonymous="1" name="ref">
636 ', quoted "
637 <reference anonymous="1" name="ref">
640 quoted \u2018
641 <reference anonymous="1" name="ref">
643 \u2019, quoted \u201c
644 <reference anonymous="1" name="ref">
646 \u201d,
647 quoted \xab
648 <reference anonymous="1" name="ref">
650 \xbb,
651 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
652 \u201cref ref\u201d__, or \xabref ref\xbb__
653 """],
654 ["""\
655 ref_, r_, r_e-f_, -ref_, and anonymousref__,
656 but not _ref_ or __attr__ or object.__attr__
657 """,
658 """\
659 <document source="test data">
660 <paragraph>
661 <reference name="ref" refname="ref">
663 , \n\
664 <reference name="r" refname="r">
666 , \n\
667 <reference name="r_e-f" refname="r_e-f">
668 r_e-f
670 <reference name="ref" refname="ref">
672 , and \n\
673 <reference anonymous="1" name="anonymousref">
674 anonymousref
676 but not _ref_ or __attr__ or object.__attr__
677 """],
680 totest['phrase_references'] = [
681 ["""\
682 `phrase reference`_
683 """,
684 """\
685 <document source="test data">
686 <paragraph>
687 <reference name="phrase reference" refname="phrase reference">
688 phrase reference
689 """],
690 [u"""\
691 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
692 """,
693 u"""\
694 <document source="test data">
695 <paragraph>
697 <reference name="phrase reference" refname="phrase reference">
698 phrase reference
699 and l\u2019
700 <reference name="phrase reference" refname="phrase reference">
701 phrase reference
702 with apostrophe
703 """],
704 [u"""\
705 quoted '`phrase reference`_', quoted "`phrase reference`_",
706 quoted \u2018`phrase reference`_\u2019,
707 quoted \u201c`phrase reference`_\u201d,
708 quoted \xab`phrase reference`_\xbb
709 """,
710 u"""\
711 <document source="test data">
712 <paragraph>
713 quoted '
714 <reference name="phrase reference" refname="phrase reference">
715 phrase reference
716 ', quoted "
717 <reference name="phrase reference" refname="phrase reference">
718 phrase reference
720 quoted \u2018
721 <reference name="phrase reference" refname="phrase reference">
722 phrase reference
723 \u2019,
724 quoted \u201c
725 <reference name="phrase reference" refname="phrase reference">
726 phrase reference
727 \u201d,
728 quoted \xab
729 <reference name="phrase reference" refname="phrase reference">
730 phrase reference
731 \xbb
732 """],
733 [u"""\
734 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
735 `\u2018phrase reference\u2019`_ with quotes,
736 `\u201cphrase reference\u201d`_ with quotes,
737 `\xabphrase reference\xbb`_ with quotes
738 """,
739 u"""\
740 <document source="test data">
741 <paragraph>
742 <reference name="'phrase reference'" refname="'phrase reference'">
743 'phrase reference'
744 with quotes, \n\
745 <reference name=""phrase reference"" refname=""phrase reference"">
746 "phrase reference"
747 with quotes,
748 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
749 \u2018phrase reference\u2019
750 with quotes,
751 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
752 \u201cphrase reference\u201d
753 with quotes,
754 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
755 \xabphrase reference\xbb
756 with quotes
757 """],
758 ["""\
759 `anonymous reference`__
760 """,
761 """\
762 <document source="test data">
763 <paragraph>
764 <reference anonymous="1" name="anonymous reference">
765 anonymous reference
766 """],
767 [u"""\
768 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
769 """,
770 u"""\
771 <document source="test data">
772 <paragraph>
774 <reference anonymous="1" name="anonymous reference">
775 anonymous reference
776 and l\u2019
777 <reference anonymous="1" name="anonymous reference">
778 anonymous reference
779 with apostrophe
780 """],
781 [u"""\
782 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
783 quoted \u2018`anonymous reference`__\u2019,
784 quoted \u201c`anonymous reference`__\u201d,
785 quoted \xab`anonymous reference`__\xbb
786 """,
787 u"""\
788 <document source="test data">
789 <paragraph>
790 quoted '
791 <reference anonymous="1" name="anonymous reference">
792 anonymous reference
793 ', quoted "
794 <reference anonymous="1" name="anonymous reference">
795 anonymous reference
797 quoted \u2018
798 <reference anonymous="1" name="anonymous reference">
799 anonymous reference
800 \u2019,
801 quoted \u201c
802 <reference anonymous="1" name="anonymous reference">
803 anonymous reference
804 \u201d,
805 quoted \xab
806 <reference anonymous="1" name="anonymous reference">
807 anonymous reference
808 \xbb
809 """],
810 [u"""\
811 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
812 `\u2018anonymous reference\u2019`__ with quotes,
813 `\u201canonymous reference\u201d`__ with quotes,
814 `\xabanonymous reference\xbb`__ with quotes
815 """,
816 u"""\
817 <document source="test data">
818 <paragraph>
819 <reference anonymous="1" name="'anonymous reference'">
820 'anonymous reference'
821 with quotes, \n\
822 <reference anonymous="1" name=""anonymous reference"">
823 "anonymous reference"
824 with quotes,
825 <reference anonymous="1" name="\u2018anonymous reference\u2019">
826 \u2018anonymous reference\u2019
827 with quotes,
828 <reference anonymous="1" name="\u201canonymous reference\u201d">
829 \u201canonymous reference\u201d
830 with quotes,
831 <reference anonymous="1" name="\xabanonymous reference\xbb">
832 \xabanonymous reference\xbb
833 with quotes
834 """],
835 ["""\
836 `phrase reference
837 across lines`_
838 """,
839 """\
840 <document source="test data">
841 <paragraph>
842 <reference name="phrase reference across lines" refname="phrase reference across lines">
843 phrase reference
844 across lines
845 """],
846 ["""\
847 `phrase\`_ reference`_
848 """,
849 """\
850 <document source="test data">
851 <paragraph>
852 <reference name="phrase`_ reference" refname="phrase`_ reference">
853 phrase`_ reference
854 """],
855 ["""\
856 Invalid phrase reference:
858 :role:`phrase reference`_
859 """,
860 """\
861 <document source="test data">
862 <paragraph>
863 Invalid phrase reference:
864 <paragraph>
865 <problematic ids="id2" refid="id1">
866 :role:`phrase reference`_
867 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
868 <paragraph>
869 Mismatch: both interpreted text role prefix and reference suffix.
870 """],
871 ["""\
872 Invalid phrase reference:
874 `phrase reference`:role:_
875 """,
876 """\
877 <document source="test data">
878 <paragraph>
879 Invalid phrase reference:
880 <paragraph>
881 <problematic ids="id2" refid="id1">
882 `phrase reference`:role:_
883 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
884 <paragraph>
885 Mismatch: both interpreted text role suffix and reference suffix.
886 """],
887 ["""\
888 `phrase reference_ without closing backquote
889 """,
890 """\
891 <document source="test data">
892 <paragraph>
893 <problematic ids="id2" refid="id1">
895 phrase \n\
896 <reference name="reference" refname="reference">
897 reference
898 without closing backquote
899 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
900 <paragraph>
901 Inline interpreted text or phrase reference start-string without end-string.
902 """],
903 ["""\
904 `anonymous phrase reference__ without closing backquote
905 """,
906 """\
907 <document source="test data">
908 <paragraph>
909 <problematic ids="id2" refid="id1">
911 anonymous phrase \n\
912 <reference anonymous="1" name="reference">
913 reference
914 without closing backquote
915 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
916 <paragraph>
917 Inline interpreted text or phrase reference start-string without end-string.
918 """],
921 totest['embedded_URIs'] = [
922 ["""\
923 `phrase reference <http://example.com>`_
924 """,
925 """\
926 <document source="test data">
927 <paragraph>
928 <reference name="phrase reference" refuri="http://example.com">
929 phrase reference
930 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
931 """],
932 ["""\
933 `anonymous reference <http://example.com>`__
934 """,
935 """\
936 <document source="test data">
937 <paragraph>
938 <reference name="anonymous reference" refuri="http://example.com">
939 anonymous reference
940 """],
941 ["""\
942 `embedded URI on next line
943 <http://example.com>`__
944 """,
945 """\
946 <document source="test data">
947 <paragraph>
948 <reference name="embedded URI on next line" refuri="http://example.com">
949 embedded URI on next line
950 """],
951 ["""\
952 `embedded URI across lines <http://example.com/
953 long/path>`__
954 """,
955 """\
956 <document source="test data">
957 <paragraph>
958 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
959 embedded URI across lines
960 """],
961 ["""\
962 `embedded URI with whitespace <http://example.com/
963 long/path /and /whitespace>`__
964 """,
965 """\
966 <document source="test data">
967 <paragraph>
968 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
969 embedded URI with whitespace
970 """],
971 ["""\
972 `embedded email address <jdoe@example.com>`__
974 `embedded email address broken across lines <jdoe
975 @example.com>`__
976 """,
977 """\
978 <document source="test data">
979 <paragraph>
980 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
981 embedded email address
982 <paragraph>
983 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
984 embedded email address broken across lines
985 """],
986 [r"""
987 `embedded URI with too much whitespace < http://example.com/
988 long/path /and /whitespace >`__
990 `embedded URI with too much whitespace at end <http://example.com/
991 long/path /and /whitespace >`__
993 `embedded URI with no preceding whitespace<http://example.com>`__
995 `escaped URI \<http://example.com>`__
997 See `HTML Anchors: \<a>`_.
998 """,
999 """\
1000 <document source="test data">
1001 <paragraph>
1002 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1003 embedded URI with too much whitespace < http://example.com/
1004 long/path /and /whitespace >
1005 <paragraph>
1006 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1007 embedded URI with too much whitespace at end <http://example.com/
1008 long/path /and /whitespace >
1009 <paragraph>
1010 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1011 embedded URI with no preceding whitespace<http://example.com>
1012 <paragraph>
1013 <reference anonymous="1" name="escaped URI <http://example.com>">
1014 escaped URI <http://example.com>
1015 <paragraph>
1016 See \n\
1017 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1018 HTML Anchors: <a>
1020 """],
1021 ["""\
1022 Relative URIs' reference text can be omitted:
1024 `<reference>`_
1026 `<anonymous>`__
1027 """,
1028 """\
1029 <document source="test data">
1030 <paragraph>
1031 Relative URIs' reference text can be omitted:
1032 <paragraph>
1033 <reference name="reference" refuri="reference">
1034 reference
1035 <target ids="reference" names="reference" refuri="reference">
1036 <paragraph>
1037 <reference name="anonymous" refuri="anonymous">
1038 anonymous
1039 """],
1042 totest['inline_targets'] = [
1043 ["""\
1044 _`target`
1046 Here is _`another target` in some text. And _`yet
1047 another target`, spanning lines.
1049 _`Here is a TaRgeT` with case and spacial difficulties.
1050 """,
1051 """\
1052 <document source="test data">
1053 <paragraph>
1054 <target ids="target" names="target">
1055 target
1056 <paragraph>
1057 Here is \n\
1058 <target ids="another-target" names="another\ target">
1059 another target
1060 in some text. And \n\
1061 <target ids="yet-another-target" names="yet\ another\ target">
1063 another target
1064 , spanning lines.
1065 <paragraph>
1066 <target ids="here-is-a-target" names="here\ is\ a\ target">
1067 Here is a TaRgeT
1068 with case and spacial difficulties.
1069 """],
1070 [u"""\
1071 l'_`target1` and l\u2019_`target2` with apostrophe
1072 """,
1073 u"""\
1074 <document source="test data">
1075 <paragraph>
1077 <target ids="target1" names="target1">
1078 target1
1079 and l\u2019
1080 <target ids="target2" names="target2">
1081 target2
1082 with apostrophe
1083 """],
1084 [u"""\
1085 quoted '_`target1`', quoted "_`target2`",
1086 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1087 quoted \xab_`target5`\xbb
1088 """,
1089 u"""\
1090 <document source="test data">
1091 <paragraph>
1092 quoted '
1093 <target ids="target1" names="target1">
1094 target1
1095 ', quoted "
1096 <target ids="target2" names="target2">
1097 target2
1099 quoted \u2018
1100 <target ids="target3" names="target3">
1101 target3
1102 \u2019, quoted \u201c
1103 <target ids="target4" names="target4">
1104 target4
1105 \u201d,
1106 quoted \xab
1107 <target ids="target5" names="target5">
1108 target5
1109 \xbb
1110 """],
1111 [u"""\
1112 _`'target1'` with quotes, _`"target2"` with quotes,
1113 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1114 _`\xabtarget5\xbb` with quotes
1115 """,
1116 u"""\
1117 <document source="test data">
1118 <paragraph>
1119 <target ids="target1" names="'target1'">
1120 'target1'
1121 with quotes, \n\
1122 <target ids="target2" names=""target2"">
1123 "target2"
1124 with quotes,
1125 <target ids="target3" names="\u2018target3\u2019">
1126 \u2018target3\u2019
1127 with quotes, \n\
1128 <target ids="target4" names="\u201ctarget4\u201d">
1129 \u201ctarget4\u201d
1130 with quotes,
1131 <target ids="target5" names="\xabtarget5\xbb">
1132 \xabtarget5\xbb
1133 with quotes
1134 """],
1135 ["""\
1136 But this isn't a _target; targets require backquotes.
1138 And _`this`_ is just plain confusing.
1139 """,
1140 """\
1141 <document source="test data">
1142 <paragraph>
1143 But this isn't a _target; targets require backquotes.
1144 <paragraph>
1145 And \n\
1146 <problematic ids="id2" refid="id1">
1148 this`_ is just plain confusing.
1149 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1150 <paragraph>
1151 Inline target start-string without end-string.
1152 """],
1153 ["""\
1154 _`inline target without closing backquote
1155 """,
1156 """\
1157 <document source="test data">
1158 <paragraph>
1159 <problematic ids="id2" refid="id1">
1161 inline target without closing backquote
1162 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1163 <paragraph>
1164 Inline target start-string without end-string.
1165 """],
1168 totest['footnote_reference'] = [
1169 ["""\
1170 [1]_
1171 """,
1172 """\
1173 <document source="test data">
1174 <paragraph>
1175 <footnote_reference ids="id1" refname="1">
1177 """],
1178 ["""\
1179 [#]_
1180 """,
1181 """\
1182 <document source="test data">
1183 <paragraph>
1184 <footnote_reference auto="1" ids="id1">
1185 """],
1186 ["""\
1187 [#label]_
1188 """,
1189 """\
1190 <document source="test data">
1191 <paragraph>
1192 <footnote_reference auto="1" ids="id1" refname="label">
1193 """],
1194 ["""\
1195 [*]_
1196 """,
1197 """\
1198 <document source="test data">
1199 <paragraph>
1200 <footnote_reference auto="*" ids="id1">
1201 """],
1202 ["""\
1203 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1204 """,
1205 """\
1206 <document source="test data">
1207 <paragraph>
1208 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1209 """],
1212 totest['citation_reference'] = [
1213 ["""\
1214 [citation]_
1215 """,
1216 """\
1217 <document source="test data">
1218 <paragraph>
1219 <citation_reference ids="id1" refname="citation">
1220 citation
1221 """],
1222 ["""\
1223 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1224 """,
1225 """\
1226 <document source="test data">
1227 <paragraph>
1228 <citation_reference ids="id1" refname="citation">
1229 citation
1230 and \n\
1231 <citation_reference ids="id2" refname="cit-ation">
1232 cit-ation
1233 and \n\
1234 <citation_reference ids="id3" refname="cit.ation">
1235 cit.ation
1236 and \n\
1237 <citation_reference ids="id4" refname="cit1">
1238 CIT1
1239 but not [CIT 1]_
1240 """],
1241 ["""\
1242 Adjacent citation refs are not possible: [citation]_[CIT1]_
1243 """,
1244 """\
1245 <document source="test data">
1246 <paragraph>
1247 Adjacent citation refs are not possible: [citation]_[CIT1]_
1248 """],
1251 totest['substitution_references'] = [
1252 ["""\
1253 |subref|
1254 """,
1255 """\
1256 <document source="test data">
1257 <paragraph>
1258 <substitution_reference refname="subref">
1259 subref
1260 """],
1261 ["""\
1262 |subref|_ and |subref|__
1263 """,
1264 """\
1265 <document source="test data">
1266 <paragraph>
1267 <reference refname="subref">
1268 <substitution_reference refname="subref">
1269 subref
1270 and \n\
1271 <reference anonymous="1">
1272 <substitution_reference refname="subref">
1273 subref
1274 """],
1275 ["""\
1276 |substitution reference|
1277 """,
1278 """\
1279 <document source="test data">
1280 <paragraph>
1281 <substitution_reference refname="substitution reference">
1282 substitution reference
1283 """],
1284 ["""\
1285 |substitution
1286 reference|
1287 """,
1288 """\
1289 <document source="test data">
1290 <paragraph>
1291 <substitution_reference refname="substitution reference">
1292 substitution
1293 reference
1294 """],
1295 ["""\
1296 |substitution reference without closing verbar
1297 """,
1298 """\
1299 <document source="test data">
1300 <paragraph>
1301 <problematic ids="id2" refid="id1">
1303 substitution reference without closing verbar
1304 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1305 <paragraph>
1306 Inline substitution_reference start-string without end-string.
1307 """],
1308 ["""\
1309 first | then || and finally |||
1310 """,
1311 """\
1312 <document source="test data">
1313 <paragraph>
1314 first | then || and finally |||
1315 """],
1318 totest['standalone_hyperlink'] = [
1319 ["""\
1320 http://www.standalone.hyperlink.com
1322 http:/one-slash-only.absolute.path
1324 [http://example.com]
1326 (http://example.com)
1328 <http://example.com>
1330 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1332 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1334 http://[3ffe:2a00:100:7031::1]/
1336 mailto:someone@somewhere.com
1338 news:comp.lang.python
1340 An email address in a sentence: someone@somewhere.com.
1342 ftp://ends.with.a.period.
1344 (a.question.mark@end?)
1345 """,
1346 """\
1347 <document source="test data">
1348 <paragraph>
1349 <reference refuri="http://www.standalone.hyperlink.com">
1350 http://www.standalone.hyperlink.com
1351 <paragraph>
1352 <reference refuri="http:/one-slash-only.absolute.path">
1353 http:/one-slash-only.absolute.path
1354 <paragraph>
1356 <reference refuri="http://example.com">
1357 http://example.com
1359 <paragraph>
1361 <reference refuri="http://example.com">
1362 http://example.com
1364 <paragraph>
1366 <reference refuri="http://example.com">
1367 http://example.com
1369 <paragraph>
1370 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1371 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1372 <paragraph>
1373 <reference refuri="http://[3ffe:2a00:100:7031::1">
1374 http://[3ffe:2a00:100:7031::1
1375 ] (the final "]" is ambiguous in text)
1376 <paragraph>
1377 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1378 http://[3ffe:2a00:100:7031::1]/
1379 <paragraph>
1380 <reference refuri="mailto:someone@somewhere.com">
1381 mailto:someone@somewhere.com
1382 <paragraph>
1383 <reference refuri="news:comp.lang.python">
1384 news:comp.lang.python
1385 <paragraph>
1386 An email address in a sentence: \n\
1387 <reference refuri="mailto:someone@somewhere.com">
1388 someone@somewhere.com
1390 <paragraph>
1391 <reference refuri="ftp://ends.with.a.period">
1392 ftp://ends.with.a.period
1394 <paragraph>
1396 <reference refuri="mailto:a.question.mark@end">
1397 a.question.mark@end
1399 """],
1400 [r"""
1401 Valid URLs with escaped markup characters:
1403 http://example.com/\*content\*/whatever
1405 http://example.com/\*content*/whatever
1406 """,
1407 """\
1408 <document source="test data">
1409 <paragraph>
1410 Valid URLs with escaped markup characters:
1411 <paragraph>
1412 <reference refuri="http://example.com/*content*/whatever">
1413 http://example.com/*content*/whatever
1414 <paragraph>
1415 <reference refuri="http://example.com/*content*/whatever">
1416 http://example.com/*content*/whatever
1417 """],
1418 ["""\
1419 Valid URLs may end with punctuation inside "<>":
1421 <http://example.org/ends-with-dot.>
1422 """,
1423 """\
1424 <document source="test data">
1425 <paragraph>
1426 Valid URLs may end with punctuation inside "<>":
1427 <paragraph>
1429 <reference refuri="http://example.org/ends-with-dot.">
1430 http://example.org/ends-with-dot.
1432 """],
1433 ["""\
1434 Valid URLs with interesting endings:
1436 http://example.org/ends-with-pluses++
1437 """,
1438 """\
1439 <document source="test data">
1440 <paragraph>
1441 Valid URLs with interesting endings:
1442 <paragraph>
1443 <reference refuri="http://example.org/ends-with-pluses++">
1444 http://example.org/ends-with-pluses++
1445 """],
1446 ["""\
1447 None of these are standalone hyperlinks (their "schemes"
1448 are not recognized): signal:noise, a:b.
1449 """,
1450 """\
1451 <document source="test data">
1452 <paragraph>
1453 None of these are standalone hyperlinks (their "schemes"
1454 are not recognized): signal:noise, a:b.
1455 """],
1456 ["""\
1457 Escaped email addresses are not recognized: test\@example.org
1458 """,
1459 """\
1460 <document source="test data">
1461 <paragraph>
1462 Escaped email addresses are not recognized: test@example.org
1463 """],
1466 totest['markup recognition rules'] = [
1467 ["""\
1468 __This__ should be left alone.
1469 """,
1470 """\
1471 <document source="test data">
1472 <paragraph>
1473 __This__ should be left alone.
1474 """],
1475 [r"""
1476 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1477 with backslash-escaped whitespace, including new\
1478 lines.
1479 """,
1480 """\
1481 <document source="test data">
1482 <paragraph>
1483 Character-level m
1484 <emphasis>
1486 <strong>
1488 <literal>
1490 <title_reference>
1493 with backslash-escaped whitespace, including newlines.
1494 """],
1495 [u"""\
1496 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1497 \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
1498 """,
1499 u"""\
1500 <document source="test data">
1501 <paragraph>
1502 text-
1503 <emphasis>
1504 separated
1505 \u2010
1506 <emphasis>
1508 \u2011
1509 <emphasis>
1510 various
1511 \u2012
1512 <emphasis>
1513 dashes
1514 \u2013
1515 <emphasis>
1517 \u2014
1518 <emphasis>
1519 hyphens
1521 \xbf
1522 <emphasis>
1523 punctuation
1524 ? \xa1
1525 <emphasis>
1526 examples
1527 !\xa0
1528 <emphasis>
1529 \u00a0no-break-space\u00a0
1531 """],
1532 # Whitespace characters:
1533 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4
1534 [u"""\
1535 text separated by
1536 *newline*
1537 or *space* or one of
1538 \xa0*NO-BREAK SPACE*\xa0,
1539 \u1680*OGHAM SPACE MARK*\u1680,
1540 \u2000*EN QUAD*\u2000,
1541 \u2001*EM QUAD*\u2001,
1542 \u2002*EN SPACE*\u2002,
1543 \u2003*EM SPACE*\u2003,
1544 \u2004*THREE-PER-EM SPACE*\u2004,
1545 \u2005*FOUR-PER-EM SPACE*\u2005,
1546 \u2006*SIX-PER-EM SPACE*\u2006,
1547 \u2007*FIGURE SPACE*\u2007,
1548 \u2008*PUNCTUATION SPACE*\u2008,
1549 \u2009*THIN SPACE*\u2009,
1550 \u200a*HAIR SPACE*\u200a,
1551 \u202f*NARROW NO-BREAK SPACE*\u202f,
1552 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1553 \u3000*IDEOGRAPHIC SPACE*\u3000,
1554 \u2028*LINE SEPARATOR*\u2028
1555 """,
1556 u"""\
1557 <document source="test data">
1558 <paragraph>
1559 text separated by
1560 <emphasis>
1561 newline
1563 or \n\
1564 <emphasis>
1565 space
1566 or one of
1567 \xa0
1568 <emphasis>
1569 NO-BREAK SPACE
1570 \xa0,
1571 \u1680
1572 <emphasis>
1573 OGHAM SPACE MARK
1574 \u1680,
1575 \u2000
1576 <emphasis>
1577 EN QUAD
1578 \u2000,
1579 \u2001
1580 <emphasis>
1581 EM QUAD
1582 \u2001,
1583 \u2002
1584 <emphasis>
1585 EN SPACE
1586 \u2002,
1587 \u2003
1588 <emphasis>
1589 EM SPACE
1590 \u2003,
1591 \u2004
1592 <emphasis>
1593 THREE-PER-EM SPACE
1594 \u2004,
1595 \u2005
1596 <emphasis>
1597 FOUR-PER-EM SPACE
1598 \u2005,
1599 \u2006
1600 <emphasis>
1601 SIX-PER-EM SPACE
1602 \u2006,
1603 \u2007
1604 <emphasis>
1605 FIGURE SPACE
1606 \u2007,
1607 \u2008
1608 <emphasis>
1609 PUNCTUATION SPACE
1610 \u2008,
1611 \u2009
1612 <emphasis>
1613 THIN SPACE
1614 \u2009,
1615 \u200a
1616 <emphasis>
1617 HAIR SPACE
1618 \u200a,
1619 \u202f
1620 <emphasis>
1621 NARROW NO-BREAK SPACE
1622 \u202f,
1623 \u205f
1624 <emphasis>
1625 MEDIUM MATHEMATICAL SPACE
1626 \u205f,
1627 \u3000
1628 <emphasis>
1629 IDEOGRAPHIC SPACE
1630 \u3000,
1631 <paragraph>
1632 <emphasis>
1633 LINE SEPARATOR
1634 """],
1635 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1636 [u"""\
1637 "Quoted" markup start-string (matched openers & closers) -> no markup:
1639 '*' "*" (*) <*> [*] {*}
1640 ⁅*⁆
1642 Some international quoting styles:
1643 ‘*’ “*” English, ...,
1644 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1645 „*” «*» Romanian,
1646 “*„ ‘*‚ Greek,
1647 「*」 『*』traditional Chinese,
1648 ”*” ’*’ »*» ›*› Swedish, Finnish,
1649 „*” ‚*’ Polish,
1650 „*” »*« ’*’ Hungarian,
1652 But this is „*’ emphasized »*‹.
1653 """,
1654 u"""\
1655 <document source="test data">
1656 <paragraph>
1657 "Quoted" markup start-string (matched openers & closers) -> no markup:
1658 <paragraph>
1659 '*' "*" (*) <*> [*] {*}
1660 ⁅*⁆
1661 <paragraph>
1662 Some international quoting styles:
1663 ‘*’ “*” English, ...,
1664 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1665 „*” «*» Romanian,
1666 “*„ ‘*‚ Greek,
1667 「*」 『*』traditional Chinese,
1668 ”*” ’*’ »*» ›*› Swedish, Finnish,
1669 „*” ‚*’ Polish,
1670 „*” »*« ’*’ Hungarian,
1671 <paragraph>
1672 But this is „
1673 <emphasis>
1674 ’ emphasized »
1675 ‹.
1676 """],
1680 if __name__ == '__main__':
1681 import unittest
1682 unittest.main(defaultTest='suite')