More test for inline markup recognition (no change to the rules).
[docutils.git] / test / test_parsers / test_rst / test_inline_markup.py
blobc123a177243cd2ef417e18521f4db44e919aa895
1 #! /usr/bin/env python
2 # -*- coding: utf8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
11 """
13 from __init__ import DocutilsTestSupport
15 def suite():
16 s = DocutilsTestSupport.ParserTestSuite()
17 s.generateTests(totest)
18 return s
20 totest = {}
22 totest['emphasis'] = [
23 ["""\
24 *emphasis*
25 """,
26 """\
27 <document source="test data">
28 <paragraph>
29 <emphasis>
30 emphasis
31 """],
32 [u"""\
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
35 """,
36 u"""\
37 <document source="test data">
38 <paragraph>
39 l\'
40 <emphasis>
41 emphasis
42 with the \n\
43 <emphasis>
44 emphasis
45 \' apostrophe.
46 l\u2019
47 <emphasis>
48 emphasis
49 with the \n\
50 <emphasis>
51 emphasis
52 \u2019 apostrophe.
53 """],
54 ["""\
55 *emphasized sentence
56 across lines*
57 """,
58 """\
59 <document source="test data">
60 <paragraph>
61 <emphasis>
62 emphasized sentence
63 across lines
64 """],
65 ["""\
66 *emphasis without closing asterisk
67 """,
68 """\
69 <document source="test data">
70 <paragraph>
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
75 <paragraph>
76 Inline emphasis start-string without end-string.
77 """],
78 [r"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
81 *emphasis*., *emphasis*,, *emphasis*!, and *emphasis*\ (closing delimiters),
83 but not
84 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
85 (*), [*], '*' or '"*"' ("quoted" start-string),
86 x*2* or 2*x* (alphanumeric char before),
87 \*args or * (escaped, whitespace behind start-string),
88 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
90 However, '*args' will trigger a warning and may be problematic.
92 what about *this**?
93 """,
94 """\
95 <document source="test data">
96 <paragraph>
97 some punctuation is allowed around inline markup, e.g.
99 <emphasis>
100 emphasis
101 /, -
102 <emphasis>
103 emphasis
104 -, and :
105 <emphasis>
106 emphasis
107 : (delimiters),
109 <emphasis>
110 emphasis
111 ), [
112 <emphasis>
113 emphasis
114 ], <
115 <emphasis>
116 emphasis
117 >, {
118 <emphasis>
119 emphasis
120 } (open/close pairs)
121 <emphasis>
122 emphasis
123 ., \n\
124 <emphasis>
125 emphasis
126 ,, \n\
127 <emphasis>
128 emphasis
129 !, and \n\
130 <emphasis>
131 emphasis
132 (closing delimiters),
133 <paragraph>
134 but not
135 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
136 (*), [*], '*' or '"*"' ("quoted" start-string),
137 x*2* or 2*x* (alphanumeric char before),
138 *args or * (escaped, whitespace behind start-string),
139 or \n\
140 <emphasis>
141 the* *stars* *inside
142 (escaped, whitespace before end-string).
143 <paragraph>
144 However, '
145 <problematic ids="id2" refid="id1">
147 args' will trigger a warning and may be problematic.
148 <system_message backrefs="id2" ids="id1" level="2" line="13" source="test data" type="WARNING">
149 <paragraph>
150 Inline emphasis start-string without end-string.
151 <paragraph>
152 what about \n\
153 <emphasis>
154 this*
156 """],
157 [u"""\
158 Quotes around inline markup:
160 '*emphasis*' "*emphasis*" Straight,
161 ‘*emphasis*’ “*emphasis*” English, ...,
162 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
163 « *emphasis* » ‹ *emphasis* › French,
164 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
165 „*emphasis*” «*emphasis*» Romanian,
166 “*emphasis*„ ‘*emphasis*‚ Greek,
167 「*emphasis*」 『*emphasis*』traditional Chinese,
168 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
169 „*emphasis*” ‚*emphasis*’ Polish,
170 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
171 """,
172 u"""\
173 <document source="test data">
174 <paragraph>
175 Quotes around inline markup:
176 <paragraph>
178 <emphasis>
179 emphasis
180 \' "
181 <emphasis>
182 emphasis
183 " Straight,
184 \u2018
185 <emphasis>
186 emphasis
187 \u2019 \u201c
188 <emphasis>
189 emphasis
190 \u201d English, ...,
191 \xab\u202f
192 <emphasis>
193 emphasis
194 \u202f\xbb \u2039\u202f
195 <emphasis>
196 emphasis
197 \u202f\u203a \xab\xa0
198 <emphasis>
199 emphasis
200 \xa0\xbb \u2039\xa0
201 <emphasis>
202 emphasis
203 \xa0\u203a
204 \xab\u2005
205 <emphasis>
206 emphasis
207 \u2005\xbb \u2039\u2005
208 <emphasis>
209 emphasis
210 \u2005\u203a French,
211 \u201e
212 <emphasis>
213 emphasis
214 \u201c \u201a
215 <emphasis>
216 emphasis
217 \u2018 \xbb
218 <emphasis>
219 emphasis
220 \xab \u203a
221 <emphasis>
222 emphasis
223 \u2039 German, Czech, ...,
224 \u201e
225 <emphasis>
226 emphasis
227 \u201d \xab
228 <emphasis>
229 emphasis
230 \xbb Romanian,
231 \u201c
232 <emphasis>
233 emphasis
234 \u201e \u2018
235 <emphasis>
236 emphasis
237 \u201a Greek,
238 \u300c
239 <emphasis>
240 emphasis
241 \u300d \u300e
242 <emphasis>
243 emphasis
244 \u300ftraditional Chinese,
245 \u201d
246 <emphasis>
247 emphasis
248 \u201d \u2019
249 <emphasis>
250 emphasis
251 \u2019 \xbb
252 <emphasis>
253 emphasis
254 \xbb \u203a
255 <emphasis>
256 emphasis
257 \u203a Swedish, Finnish,
258 \u201e
259 <emphasis>
260 emphasis
261 \u201d \u201a
262 <emphasis>
263 emphasis
264 \u2019 Polish,
265 \u201e
266 <emphasis>
267 emphasis
268 \u201d \xbb
269 <emphasis>
270 emphasis
271 \xab \u2019
272 <emphasis>
273 emphasis
274 \u2019 Hungarian,
275 """],
276 [r"""
277 Emphasized asterisk: *\**
279 Emphasized double asterisk: *\***
280 """,
281 """\
282 <document source="test data">
283 <paragraph>
284 Emphasized asterisk: \n\
285 <emphasis>
287 <paragraph>
288 Emphasized double asterisk: \n\
289 <emphasis>
291 """],
294 totest['strong'] = [
295 ["""\
296 **strong**
297 """,
298 """\
299 <document source="test data">
300 <paragraph>
301 <strong>
302 strong
303 """],
304 [u"""\
305 l'**strong** and l\u2019**strong** with apostrophe
306 """,
307 u"""\
308 <document source="test data">
309 <paragraph>
311 <strong>
312 strong
313 and l\u2019
314 <strong>
315 strong
316 with apostrophe
317 """],
318 [u"""\
319 quoted '**strong**', quoted "**strong**",
320 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
321 quoted \xab**strong**\xbb
322 """,
323 u"""\
324 <document source="test data">
325 <paragraph>
326 quoted '
327 <strong>
328 strong
329 ', quoted "
330 <strong>
331 strong
333 quoted \u2018
334 <strong>
335 strong
336 \u2019, quoted \u201c
337 <strong>
338 strong
339 \u201d,
340 quoted \xab
341 <strong>
342 strong
343 \xbb
344 """],
345 [r"""
346 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
348 (however, '**kwargs' will trigger a warning and may be problematic)
349 """,
350 """\
351 <document source="test data">
352 <paragraph>
354 <strong>
355 strong
356 ) but not (**) or '(** ' or x**2 or **kwargs or **
357 <paragraph>
358 (however, '
359 <problematic ids="id2" refid="id1">
361 kwargs' will trigger a warning and may be problematic)
362 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
363 <paragraph>
364 Inline strong start-string without end-string.
365 """],
366 ["""\
367 Strong asterisk: *****
369 Strong double asterisk: ******
370 """,
371 """\
372 <document source="test data">
373 <paragraph>
374 Strong asterisk: \n\
375 <strong>
377 <paragraph>
378 Strong double asterisk: \n\
379 <strong>
381 """],
382 ["""\
383 **strong without closing asterisks
384 """,
385 """\
386 <document source="test data">
387 <paragraph>
388 <problematic ids="id2" refid="id1">
390 strong without closing asterisks
391 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
392 <paragraph>
393 Inline strong start-string without end-string.
394 """],
397 totest['literal'] = [
398 ["""\
399 ``literal``
400 """,
401 """\
402 <document source="test data">
403 <paragraph>
404 <literal>
405 literal
406 """],
407 [r"""
408 ``\literal``
409 """,
410 """\
411 <document source="test data">
412 <paragraph>
413 <literal>
414 \\literal
415 """],
416 [r"""
417 ``lite\ral``
418 """,
419 """\
420 <document source="test data">
421 <paragraph>
422 <literal>
423 lite\\ral
424 """],
425 [r"""
426 ``literal\``
427 """,
428 """\
429 <document source="test data">
430 <paragraph>
431 <literal>
432 literal\\
433 """],
434 [u"""\
435 l'``literal`` and l\u2019``literal`` with apostrophe
436 """,
437 u"""\
438 <document source="test data">
439 <paragraph>
441 <literal>
442 literal
443 and l\u2019
444 <literal>
445 literal
446 with apostrophe
447 """],
448 [u"""\
449 quoted '``literal``', quoted "``literal``",
450 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
451 quoted \xab``literal``\xbb
452 """,
453 u"""\
454 <document source="test data">
455 <paragraph>
456 quoted '
457 <literal>
458 literal
459 ', quoted "
460 <literal>
461 literal
463 quoted \u2018
464 <literal>
465 literal
466 \u2019, quoted \u201c
467 <literal>
468 literal
469 \u201d,
470 quoted \xab
471 <literal>
472 literal
473 \xbb
474 """],
475 [u"""\
476 ``'literal'`` with quotes, ``"literal"`` with quotes,
477 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
478 ``\xabliteral\xbb`` with quotes
479 """,
480 u"""\
481 <document source="test data">
482 <paragraph>
483 <literal>
484 'literal'
485 with quotes, \n\
486 <literal>
487 "literal"
488 with quotes,
489 <literal>
490 \u2018literal\u2019
491 with quotes, \n\
492 <literal>
493 \u201cliteral\u201d
494 with quotes,
495 <literal>
496 \xabliteral\xbb
497 with quotes
498 """],
499 [r"""
500 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
502 (however, ``standalone TeX quotes'' will trigger a warning
503 and may be problematic)
504 """,
505 """\
506 <document source="test data">
507 <paragraph>
508 <literal>
509 literal ``TeX quotes'' & \\backslash
510 but not "``" or ``
511 <paragraph>
512 (however, \n\
513 <problematic ids="id2" refid="id1">
515 standalone TeX quotes'' will trigger a warning
516 and may be problematic)
517 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
518 <paragraph>
519 Inline literal start-string without end-string.
520 """],
521 ["""\
522 Find the ```interpreted text``` in this paragraph!
523 """,
524 """\
525 <document source="test data">
526 <paragraph>
527 Find the \n\
528 <literal>
529 `interpreted text`
530 in this paragraph!
531 """],
532 ["""\
533 ``literal without closing backquotes
534 """,
535 """\
536 <document source="test data">
537 <paragraph>
538 <problematic ids="id2" refid="id1">
540 literal without closing backquotes
541 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
542 <paragraph>
543 Inline literal start-string without end-string.
544 """],
545 [r"""
546 Python ``list``\s use square bracket syntax.
547 """,
548 """\
549 <document source="test data">
550 <paragraph>
551 Python \n\
552 <literal>
553 list
554 s use square bracket syntax.
555 """],
558 totest['references'] = [
559 ["""\
560 ref_
561 """,
562 """\
563 <document source="test data">
564 <paragraph>
565 <reference name="ref" refname="ref">
567 """],
568 [u"""\
569 l'ref_ and l\u2019ref_ with apostrophe
570 """,
571 u"""\
572 <document source="test data">
573 <paragraph>
575 <reference name="ref" refname="ref">
577 and l\u2019
578 <reference name="ref" refname="ref">
580 with apostrophe
581 """],
582 [u"""\
583 quoted 'ref_', quoted "ref_",
584 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
585 quoted \xabref_\xbb,
586 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
587 \u201cref ref\u201d_, or \xabref ref\xbb_
588 """,
589 u"""\
590 <document source="test data">
591 <paragraph>
592 quoted '
593 <reference name="ref" refname="ref">
595 ', quoted "
596 <reference name="ref" refname="ref">
599 quoted \u2018
600 <reference name="ref" refname="ref">
602 \u2019, quoted \u201c
603 <reference name="ref" refname="ref">
605 \u201d,
606 quoted \xab
607 <reference name="ref" refname="ref">
609 \xbb,
610 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
611 \u201cref ref\u201d_, or \xabref ref\xbb_
612 """],
613 ["""\
614 ref__
615 """,
616 """\
617 <document source="test data">
618 <paragraph>
619 <reference anonymous="1" name="ref">
621 """],
622 [u"""\
623 l'ref__ and l\u2019ref__ with apostrophe
624 """,
625 u"""\
626 <document source="test data">
627 <paragraph>
629 <reference anonymous="1" name="ref">
631 and l\u2019
632 <reference anonymous="1" name="ref">
634 with apostrophe
635 """],
636 [u"""\
637 quoted 'ref__', quoted "ref__",
638 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
639 quoted \xabref__\xbb,
640 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
641 \u201cref ref\u201d__, or \xabref ref\xbb__
642 """,
643 u"""\
644 <document source="test data">
645 <paragraph>
646 quoted '
647 <reference anonymous="1" name="ref">
649 ', quoted "
650 <reference anonymous="1" name="ref">
653 quoted \u2018
654 <reference anonymous="1" name="ref">
656 \u2019, quoted \u201c
657 <reference anonymous="1" name="ref">
659 \u201d,
660 quoted \xab
661 <reference anonymous="1" name="ref">
663 \xbb,
664 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
665 \u201cref ref\u201d__, or \xabref ref\xbb__
666 """],
667 ["""\
668 ref_, r_, r_e-f_, -ref_, and anonymousref__,
669 but not _ref_ or __attr__ or object.__attr__
670 """,
671 """\
672 <document source="test data">
673 <paragraph>
674 <reference name="ref" refname="ref">
676 , \n\
677 <reference name="r" refname="r">
679 , \n\
680 <reference name="r_e-f" refname="r_e-f">
681 r_e-f
683 <reference name="ref" refname="ref">
685 , and \n\
686 <reference anonymous="1" name="anonymousref">
687 anonymousref
689 but not _ref_ or __attr__ or object.__attr__
690 """],
693 totest['phrase_references'] = [
694 ["""\
695 `phrase reference`_
696 """,
697 """\
698 <document source="test data">
699 <paragraph>
700 <reference name="phrase reference" refname="phrase reference">
701 phrase reference
702 """],
703 [u"""\
704 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
705 """,
706 u"""\
707 <document source="test data">
708 <paragraph>
710 <reference name="phrase reference" refname="phrase reference">
711 phrase reference
712 and l\u2019
713 <reference name="phrase reference" refname="phrase reference">
714 phrase reference
715 with apostrophe
716 """],
717 [u"""\
718 quoted '`phrase reference`_', quoted "`phrase reference`_",
719 quoted \u2018`phrase reference`_\u2019,
720 quoted \u201c`phrase reference`_\u201d,
721 quoted \xab`phrase reference`_\xbb
722 """,
723 u"""\
724 <document source="test data">
725 <paragraph>
726 quoted '
727 <reference name="phrase reference" refname="phrase reference">
728 phrase reference
729 ', quoted "
730 <reference name="phrase reference" refname="phrase reference">
731 phrase reference
733 quoted \u2018
734 <reference name="phrase reference" refname="phrase reference">
735 phrase reference
736 \u2019,
737 quoted \u201c
738 <reference name="phrase reference" refname="phrase reference">
739 phrase reference
740 \u201d,
741 quoted \xab
742 <reference name="phrase reference" refname="phrase reference">
743 phrase reference
744 \xbb
745 """],
746 [u"""\
747 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
748 `\u2018phrase reference\u2019`_ with quotes,
749 `\u201cphrase reference\u201d`_ with quotes,
750 `\xabphrase reference\xbb`_ with quotes
751 """,
752 u"""\
753 <document source="test data">
754 <paragraph>
755 <reference name="'phrase reference'" refname="'phrase reference'">
756 'phrase reference'
757 with quotes, \n\
758 <reference name=""phrase reference"" refname=""phrase reference"">
759 "phrase reference"
760 with quotes,
761 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
762 \u2018phrase reference\u2019
763 with quotes,
764 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
765 \u201cphrase reference\u201d
766 with quotes,
767 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
768 \xabphrase reference\xbb
769 with quotes
770 """],
771 ["""\
772 `anonymous reference`__
773 """,
774 """\
775 <document source="test data">
776 <paragraph>
777 <reference anonymous="1" name="anonymous reference">
778 anonymous reference
779 """],
780 [u"""\
781 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
782 """,
783 u"""\
784 <document source="test data">
785 <paragraph>
787 <reference anonymous="1" name="anonymous reference">
788 anonymous reference
789 and l\u2019
790 <reference anonymous="1" name="anonymous reference">
791 anonymous reference
792 with apostrophe
793 """],
794 [u"""\
795 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
796 quoted \u2018`anonymous reference`__\u2019,
797 quoted \u201c`anonymous reference`__\u201d,
798 quoted \xab`anonymous reference`__\xbb
799 """,
800 u"""\
801 <document source="test data">
802 <paragraph>
803 quoted '
804 <reference anonymous="1" name="anonymous reference">
805 anonymous reference
806 ', quoted "
807 <reference anonymous="1" name="anonymous reference">
808 anonymous reference
810 quoted \u2018
811 <reference anonymous="1" name="anonymous reference">
812 anonymous reference
813 \u2019,
814 quoted \u201c
815 <reference anonymous="1" name="anonymous reference">
816 anonymous reference
817 \u201d,
818 quoted \xab
819 <reference anonymous="1" name="anonymous reference">
820 anonymous reference
821 \xbb
822 """],
823 [u"""\
824 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
825 `\u2018anonymous reference\u2019`__ with quotes,
826 `\u201canonymous reference\u201d`__ with quotes,
827 `\xabanonymous reference\xbb`__ with quotes
828 """,
829 u"""\
830 <document source="test data">
831 <paragraph>
832 <reference anonymous="1" name="'anonymous reference'">
833 'anonymous reference'
834 with quotes, \n\
835 <reference anonymous="1" name=""anonymous reference"">
836 "anonymous reference"
837 with quotes,
838 <reference anonymous="1" name="\u2018anonymous reference\u2019">
839 \u2018anonymous reference\u2019
840 with quotes,
841 <reference anonymous="1" name="\u201canonymous reference\u201d">
842 \u201canonymous reference\u201d
843 with quotes,
844 <reference anonymous="1" name="\xabanonymous reference\xbb">
845 \xabanonymous reference\xbb
846 with quotes
847 """],
848 ["""\
849 `phrase reference
850 across lines`_
851 """,
852 """\
853 <document source="test data">
854 <paragraph>
855 <reference name="phrase reference across lines" refname="phrase reference across lines">
856 phrase reference
857 across lines
858 """],
859 ["""\
860 `phrase\`_ reference`_
861 """,
862 """\
863 <document source="test data">
864 <paragraph>
865 <reference name="phrase`_ reference" refname="phrase`_ reference">
866 phrase`_ reference
867 """],
868 ["""\
869 Invalid phrase reference:
871 :role:`phrase reference`_
872 """,
873 """\
874 <document source="test data">
875 <paragraph>
876 Invalid phrase reference:
877 <paragraph>
878 <problematic ids="id2" refid="id1">
879 :role:`phrase reference`_
880 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
881 <paragraph>
882 Mismatch: both interpreted text role prefix and reference suffix.
883 """],
884 ["""\
885 Invalid phrase reference:
887 `phrase reference`:role:_
888 """,
889 """\
890 <document source="test data">
891 <paragraph>
892 Invalid phrase reference:
893 <paragraph>
894 <problematic ids="id2" refid="id1">
895 `phrase reference`:role:_
896 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
897 <paragraph>
898 Mismatch: both interpreted text role suffix and reference suffix.
899 """],
900 ["""\
901 `phrase reference_ without closing backquote
902 """,
903 """\
904 <document source="test data">
905 <paragraph>
906 <problematic ids="id2" refid="id1">
908 phrase \n\
909 <reference name="reference" refname="reference">
910 reference
911 without closing backquote
912 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
913 <paragraph>
914 Inline interpreted text or phrase reference start-string without end-string.
915 """],
916 ["""\
917 `anonymous phrase reference__ without closing backquote
918 """,
919 """\
920 <document source="test data">
921 <paragraph>
922 <problematic ids="id2" refid="id1">
924 anonymous phrase \n\
925 <reference anonymous="1" name="reference">
926 reference
927 without closing backquote
928 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
929 <paragraph>
930 Inline interpreted text or phrase reference start-string without end-string.
931 """],
934 totest['embedded_URIs'] = [
935 ["""\
936 `phrase reference <http://example.com>`_
937 """,
938 """\
939 <document source="test data">
940 <paragraph>
941 <reference name="phrase reference" refuri="http://example.com">
942 phrase reference
943 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
944 """],
945 ["""\
946 `anonymous reference <http://example.com>`__
947 """,
948 """\
949 <document source="test data">
950 <paragraph>
951 <reference name="anonymous reference" refuri="http://example.com">
952 anonymous reference
953 """],
954 ["""\
955 `embedded URI on next line
956 <http://example.com>`__
957 """,
958 """\
959 <document source="test data">
960 <paragraph>
961 <reference name="embedded URI on next line" refuri="http://example.com">
962 embedded URI on next line
963 """],
964 ["""\
965 `embedded URI across lines <http://example.com/
966 long/path>`__
967 """,
968 """\
969 <document source="test data">
970 <paragraph>
971 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
972 embedded URI across lines
973 """],
974 ["""\
975 `embedded URI with whitespace <http://example.com/
976 long/path /and /whitespace>`__
977 """,
978 """\
979 <document source="test data">
980 <paragraph>
981 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
982 embedded URI with whitespace
983 """],
984 ["""\
985 `embedded email address <jdoe@example.com>`__
987 `embedded email address broken across lines <jdoe
988 @example.com>`__
989 """,
990 """\
991 <document source="test data">
992 <paragraph>
993 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
994 embedded email address
995 <paragraph>
996 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
997 embedded email address broken across lines
998 """],
999 [r"""
1000 `embedded URI with too much whitespace < http://example.com/
1001 long/path /and /whitespace >`__
1003 `embedded URI with too much whitespace at end <http://example.com/
1004 long/path /and /whitespace >`__
1006 `embedded URI with no preceding whitespace<http://example.com>`__
1008 `escaped URI \<http://example.com>`__
1010 See `HTML Anchors: \<a>`_.
1011 """,
1012 """\
1013 <document source="test data">
1014 <paragraph>
1015 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1016 embedded URI with too much whitespace < http://example.com/
1017 long/path /and /whitespace >
1018 <paragraph>
1019 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1020 embedded URI with too much whitespace at end <http://example.com/
1021 long/path /and /whitespace >
1022 <paragraph>
1023 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1024 embedded URI with no preceding whitespace<http://example.com>
1025 <paragraph>
1026 <reference anonymous="1" name="escaped URI <http://example.com>">
1027 escaped URI <http://example.com>
1028 <paragraph>
1029 See \n\
1030 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1031 HTML Anchors: <a>
1033 """],
1034 ["""\
1035 Relative URIs' reference text can be omitted:
1037 `<reference>`_
1039 `<anonymous>`__
1040 """,
1041 """\
1042 <document source="test data">
1043 <paragraph>
1044 Relative URIs' reference text can be omitted:
1045 <paragraph>
1046 <reference name="reference" refuri="reference">
1047 reference
1048 <target ids="reference" names="reference" refuri="reference">
1049 <paragraph>
1050 <reference name="anonymous" refuri="anonymous">
1051 anonymous
1052 """],
1053 ["""\
1054 Escape trailing low-line char in URIs:
1056 `<reference\_>`_
1058 `<anonymous\_>`__
1059 """,
1060 """\
1061 <document source="test data">
1062 <paragraph>
1063 Escape trailing low-line char in URIs:
1064 <paragraph>
1065 <reference name="reference_" refuri="reference_">
1066 reference_
1067 <target ids="reference" names="reference_" refuri="reference_">
1068 <paragraph>
1069 <reference name="anonymous_" refuri="anonymous_">
1070 anonymous_
1071 """],
1074 totest['embedded_aliases'] = [
1075 ["""\
1076 `phrase reference <alias_>`_
1077 """,
1078 """\
1079 <document source="test data">
1080 <paragraph>
1081 <reference name="phrase reference" refname="alias">
1082 phrase reference
1083 <target names="phrase\ reference" refname="alias">
1084 """],
1085 ["""\
1086 `anonymous reference <alias_>`__
1087 """,
1088 """\
1089 <document source="test data">
1090 <paragraph>
1091 <reference name="anonymous reference" refname="alias">
1092 anonymous reference
1093 """],
1094 ["""\
1095 `embedded alias on next line
1096 <alias_>`__
1097 """,
1098 """\
1099 <document source="test data">
1100 <paragraph>
1101 <reference name="embedded alias on next line" refname="alias">
1102 embedded alias on next line
1103 """],
1104 ["""\
1105 `embedded alias across lines <alias
1106 phrase_>`__
1107 """,
1108 """\
1109 <document source="test data">
1110 <paragraph>
1111 <reference name="embedded alias across lines" refname="alias phrase">
1112 embedded alias across lines
1113 """],
1114 ["""\
1115 `embedded alias with whitespace <alias
1116 long phrase_>`__
1117 """,
1118 """\
1119 <document source="test data">
1120 <paragraph>
1121 <reference name="embedded alias with whitespace" refname="alias long phrase">
1122 embedded alias with whitespace
1123 """],
1124 [r"""
1125 `embedded alias with too much whitespace < alias_ >`__
1127 `embedded alias with no preceding whitespace<alias_>`__
1128 """,
1129 """\
1130 <document source="test data">
1131 <paragraph>
1132 <reference anonymous="1" name="embedded alias with too much whitespace < alias_ >">
1133 embedded alias with too much whitespace < alias_ >
1134 <paragraph>
1135 <reference anonymous="1" name="embedded alias with no preceding whitespace<alias_>">
1136 embedded alias with no preceding whitespace<alias_>
1137 """],
1140 totest['inline_targets'] = [
1141 ["""\
1142 _`target`
1144 Here is _`another target` in some text. And _`yet
1145 another target`, spanning lines.
1147 _`Here is a TaRgeT` with case and spacial difficulties.
1148 """,
1149 """\
1150 <document source="test data">
1151 <paragraph>
1152 <target ids="target" names="target">
1153 target
1154 <paragraph>
1155 Here is \n\
1156 <target ids="another-target" names="another\ target">
1157 another target
1158 in some text. And \n\
1159 <target ids="yet-another-target" names="yet\ another\ target">
1161 another target
1162 , spanning lines.
1163 <paragraph>
1164 <target ids="here-is-a-target" names="here\ is\ a\ target">
1165 Here is a TaRgeT
1166 with case and spacial difficulties.
1167 """],
1168 [u"""\
1169 l'_`target1` and l\u2019_`target2` with apostrophe
1170 """,
1171 u"""\
1172 <document source="test data">
1173 <paragraph>
1175 <target ids="target1" names="target1">
1176 target1
1177 and l\u2019
1178 <target ids="target2" names="target2">
1179 target2
1180 with apostrophe
1181 """],
1182 [u"""\
1183 quoted '_`target1`', quoted "_`target2`",
1184 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1185 quoted \xab_`target5`\xbb
1186 """,
1187 u"""\
1188 <document source="test data">
1189 <paragraph>
1190 quoted '
1191 <target ids="target1" names="target1">
1192 target1
1193 ', quoted "
1194 <target ids="target2" names="target2">
1195 target2
1197 quoted \u2018
1198 <target ids="target3" names="target3">
1199 target3
1200 \u2019, quoted \u201c
1201 <target ids="target4" names="target4">
1202 target4
1203 \u201d,
1204 quoted \xab
1205 <target ids="target5" names="target5">
1206 target5
1207 \xbb
1208 """],
1209 [u"""\
1210 _`'target1'` with quotes, _`"target2"` with quotes,
1211 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1212 _`\xabtarget5\xbb` with quotes
1213 """,
1214 u"""\
1215 <document source="test data">
1216 <paragraph>
1217 <target ids="target1" names="'target1'">
1218 'target1'
1219 with quotes, \n\
1220 <target ids="target2" names=""target2"">
1221 "target2"
1222 with quotes,
1223 <target ids="target3" names="\u2018target3\u2019">
1224 \u2018target3\u2019
1225 with quotes, \n\
1226 <target ids="target4" names="\u201ctarget4\u201d">
1227 \u201ctarget4\u201d
1228 with quotes,
1229 <target ids="target5" names="\xabtarget5\xbb">
1230 \xabtarget5\xbb
1231 with quotes
1232 """],
1233 ["""\
1234 But this isn't a _target; targets require backquotes.
1236 And _`this`_ is just plain confusing.
1237 """,
1238 """\
1239 <document source="test data">
1240 <paragraph>
1241 But this isn't a _target; targets require backquotes.
1242 <paragraph>
1243 And \n\
1244 <problematic ids="id2" refid="id1">
1246 this`_ is just plain confusing.
1247 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1248 <paragraph>
1249 Inline target start-string without end-string.
1250 """],
1251 ["""\
1252 _`inline target without closing backquote
1253 """,
1254 """\
1255 <document source="test data">
1256 <paragraph>
1257 <problematic ids="id2" refid="id1">
1259 inline target without closing backquote
1260 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1261 <paragraph>
1262 Inline target start-string without end-string.
1263 """],
1266 totest['footnote_reference'] = [
1267 ["""\
1268 [1]_
1269 """,
1270 """\
1271 <document source="test data">
1272 <paragraph>
1273 <footnote_reference ids="id1" refname="1">
1275 """],
1276 ["""\
1277 [#]_
1278 """,
1279 """\
1280 <document source="test data">
1281 <paragraph>
1282 <footnote_reference auto="1" ids="id1">
1283 """],
1284 ["""\
1285 [#label]_
1286 """,
1287 """\
1288 <document source="test data">
1289 <paragraph>
1290 <footnote_reference auto="1" ids="id1" refname="label">
1291 """],
1292 ["""\
1293 [*]_
1294 """,
1295 """\
1296 <document source="test data">
1297 <paragraph>
1298 <footnote_reference auto="*" ids="id1">
1299 """],
1300 ["""\
1301 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1302 """,
1303 """\
1304 <document source="test data">
1305 <paragraph>
1306 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1307 """],
1310 totest['citation_reference'] = [
1311 ["""\
1312 [citation]_
1313 """,
1314 """\
1315 <document source="test data">
1316 <paragraph>
1317 <citation_reference ids="id1" refname="citation">
1318 citation
1319 """],
1320 ["""\
1321 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1322 """,
1323 """\
1324 <document source="test data">
1325 <paragraph>
1326 <citation_reference ids="id1" refname="citation">
1327 citation
1328 and \n\
1329 <citation_reference ids="id2" refname="cit-ation">
1330 cit-ation
1331 and \n\
1332 <citation_reference ids="id3" refname="cit.ation">
1333 cit.ation
1334 and \n\
1335 <citation_reference ids="id4" refname="cit1">
1336 CIT1
1337 but not [CIT 1]_
1338 """],
1339 ["""\
1340 Adjacent citation refs are not possible: [citation]_[CIT1]_
1341 """,
1342 """\
1343 <document source="test data">
1344 <paragraph>
1345 Adjacent citation refs are not possible: [citation]_[CIT1]_
1346 """],
1349 totest['substitution_references'] = [
1350 ["""\
1351 |subref|
1352 """,
1353 """\
1354 <document source="test data">
1355 <paragraph>
1356 <substitution_reference refname="subref">
1357 subref
1358 """],
1359 ["""\
1360 |subref|_ and |subref|__
1361 """,
1362 """\
1363 <document source="test data">
1364 <paragraph>
1365 <reference refname="subref">
1366 <substitution_reference refname="subref">
1367 subref
1368 and \n\
1369 <reference anonymous="1">
1370 <substitution_reference refname="subref">
1371 subref
1372 """],
1373 ["""\
1374 |substitution reference|
1375 """,
1376 """\
1377 <document source="test data">
1378 <paragraph>
1379 <substitution_reference refname="substitution reference">
1380 substitution reference
1381 """],
1382 ["""\
1383 |substitution
1384 reference|
1385 """,
1386 """\
1387 <document source="test data">
1388 <paragraph>
1389 <substitution_reference refname="substitution reference">
1390 substitution
1391 reference
1392 """],
1393 ["""\
1394 |substitution reference without closing verbar
1395 """,
1396 """\
1397 <document source="test data">
1398 <paragraph>
1399 <problematic ids="id2" refid="id1">
1401 substitution reference without closing verbar
1402 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1403 <paragraph>
1404 Inline substitution_reference start-string without end-string.
1405 """],
1406 ["""\
1407 first | then || and finally |||
1408 """,
1409 """\
1410 <document source="test data">
1411 <paragraph>
1412 first | then || and finally |||
1413 """],
1416 totest['standalone_hyperlink'] = [
1417 ["""\
1418 http://www.standalone.hyperlink.com
1420 http:/one-slash-only.absolute.path
1422 [http://example.com]
1424 (http://example.com)
1426 <http://example.com>
1428 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1430 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1432 http://[3ffe:2a00:100:7031::1]/
1434 mailto:someone@somewhere.com
1436 news:comp.lang.python
1438 An email address in a sentence: someone@somewhere.com.
1440 ftp://ends.with.a.period.
1442 (a.question.mark@end?)
1443 """,
1444 """\
1445 <document source="test data">
1446 <paragraph>
1447 <reference refuri="http://www.standalone.hyperlink.com">
1448 http://www.standalone.hyperlink.com
1449 <paragraph>
1450 <reference refuri="http:/one-slash-only.absolute.path">
1451 http:/one-slash-only.absolute.path
1452 <paragraph>
1454 <reference refuri="http://example.com">
1455 http://example.com
1457 <paragraph>
1459 <reference refuri="http://example.com">
1460 http://example.com
1462 <paragraph>
1464 <reference refuri="http://example.com">
1465 http://example.com
1467 <paragraph>
1468 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1469 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1470 <paragraph>
1471 <reference refuri="http://[3ffe:2a00:100:7031::1">
1472 http://[3ffe:2a00:100:7031::1
1473 ] (the final "]" is ambiguous in text)
1474 <paragraph>
1475 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1476 http://[3ffe:2a00:100:7031::1]/
1477 <paragraph>
1478 <reference refuri="mailto:someone@somewhere.com">
1479 mailto:someone@somewhere.com
1480 <paragraph>
1481 <reference refuri="news:comp.lang.python">
1482 news:comp.lang.python
1483 <paragraph>
1484 An email address in a sentence: \n\
1485 <reference refuri="mailto:someone@somewhere.com">
1486 someone@somewhere.com
1488 <paragraph>
1489 <reference refuri="ftp://ends.with.a.period">
1490 ftp://ends.with.a.period
1492 <paragraph>
1494 <reference refuri="mailto:a.question.mark@end">
1495 a.question.mark@end
1497 """],
1498 [r"""
1499 Valid URLs with escaped markup characters:
1501 http://example.com/\*content\*/whatever
1503 http://example.com/\*content*/whatever
1504 """,
1505 """\
1506 <document source="test data">
1507 <paragraph>
1508 Valid URLs with escaped markup characters:
1509 <paragraph>
1510 <reference refuri="http://example.com/*content*/whatever">
1511 http://example.com/*content*/whatever
1512 <paragraph>
1513 <reference refuri="http://example.com/*content*/whatever">
1514 http://example.com/*content*/whatever
1515 """],
1516 ["""\
1517 Valid URLs may end with punctuation inside "<>":
1519 <http://example.org/ends-with-dot.>
1520 """,
1521 """\
1522 <document source="test data">
1523 <paragraph>
1524 Valid URLs may end with punctuation inside "<>":
1525 <paragraph>
1527 <reference refuri="http://example.org/ends-with-dot.">
1528 http://example.org/ends-with-dot.
1530 """],
1531 ["""\
1532 Valid URLs with interesting endings:
1534 http://example.org/ends-with-pluses++
1535 """,
1536 """\
1537 <document source="test data">
1538 <paragraph>
1539 Valid URLs with interesting endings:
1540 <paragraph>
1541 <reference refuri="http://example.org/ends-with-pluses++">
1542 http://example.org/ends-with-pluses++
1543 """],
1544 ["""\
1545 None of these are standalone hyperlinks (their "schemes"
1546 are not recognized): signal:noise, a:b.
1547 """,
1548 """\
1549 <document source="test data">
1550 <paragraph>
1551 None of these are standalone hyperlinks (their "schemes"
1552 are not recognized): signal:noise, a:b.
1553 """],
1554 ["""\
1555 Escaped email addresses are not recognized: test\@example.org
1556 """,
1557 """\
1558 <document source="test data">
1559 <paragraph>
1560 Escaped email addresses are not recognized: test@example.org
1561 """],
1564 totest['markup recognition rules'] = [
1565 ["""\
1566 __This__ should be left alone.
1567 """,
1568 """\
1569 <document source="test data">
1570 <paragraph>
1571 __This__ should be left alone.
1572 """],
1573 [r"""
1574 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1575 with backslash-escaped whitespace, including new\
1576 lines.
1577 """,
1578 """\
1579 <document source="test data">
1580 <paragraph>
1581 Character-level m
1582 <emphasis>
1584 <strong>
1586 <literal>
1588 <title_reference>
1591 with backslash-escaped whitespace, including newlines.
1592 """],
1593 [u"""\
1594 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1595 \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
1596 """,
1597 u"""\
1598 <document source="test data">
1599 <paragraph>
1600 text-
1601 <emphasis>
1602 separated
1603 \u2010
1604 <emphasis>
1606 \u2011
1607 <emphasis>
1608 various
1609 \u2012
1610 <emphasis>
1611 dashes
1612 \u2013
1613 <emphasis>
1615 \u2014
1616 <emphasis>
1617 hyphens
1619 \xbf
1620 <emphasis>
1621 punctuation
1622 ? \xa1
1623 <emphasis>
1624 examples
1625 !\xa0
1626 <emphasis>
1627 \u00a0no-break-space\u00a0
1629 """],
1630 # Whitespace characters:
1631 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4
1632 [u"""\
1633 text separated by
1634 *newline*
1635 or *space* or one of
1636 \xa0*NO-BREAK SPACE*\xa0,
1637 \u1680*OGHAM SPACE MARK*\u1680,
1638 \u2000*EN QUAD*\u2000,
1639 \u2001*EM QUAD*\u2001,
1640 \u2002*EN SPACE*\u2002,
1641 \u2003*EM SPACE*\u2003,
1642 \u2004*THREE-PER-EM SPACE*\u2004,
1643 \u2005*FOUR-PER-EM SPACE*\u2005,
1644 \u2006*SIX-PER-EM SPACE*\u2006,
1645 \u2007*FIGURE SPACE*\u2007,
1646 \u2008*PUNCTUATION SPACE*\u2008,
1647 \u2009*THIN SPACE*\u2009,
1648 \u200a*HAIR SPACE*\u200a,
1649 \u202f*NARROW NO-BREAK SPACE*\u202f,
1650 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1651 \u3000*IDEOGRAPHIC SPACE*\u3000,
1652 \u2028*LINE SEPARATOR*\u2028
1653 """,
1654 u"""\
1655 <document source="test data">
1656 <paragraph>
1657 text separated by
1658 <emphasis>
1659 newline
1661 or \n\
1662 <emphasis>
1663 space
1664 or one of
1665 \xa0
1666 <emphasis>
1667 NO-BREAK SPACE
1668 \xa0,
1669 \u1680
1670 <emphasis>
1671 OGHAM SPACE MARK
1672 \u1680,
1673 \u2000
1674 <emphasis>
1675 EN QUAD
1676 \u2000,
1677 \u2001
1678 <emphasis>
1679 EM QUAD
1680 \u2001,
1681 \u2002
1682 <emphasis>
1683 EN SPACE
1684 \u2002,
1685 \u2003
1686 <emphasis>
1687 EM SPACE
1688 \u2003,
1689 \u2004
1690 <emphasis>
1691 THREE-PER-EM SPACE
1692 \u2004,
1693 \u2005
1694 <emphasis>
1695 FOUR-PER-EM SPACE
1696 \u2005,
1697 \u2006
1698 <emphasis>
1699 SIX-PER-EM SPACE
1700 \u2006,
1701 \u2007
1702 <emphasis>
1703 FIGURE SPACE
1704 \u2007,
1705 \u2008
1706 <emphasis>
1707 PUNCTUATION SPACE
1708 \u2008,
1709 \u2009
1710 <emphasis>
1711 THIN SPACE
1712 \u2009,
1713 \u200a
1714 <emphasis>
1715 HAIR SPACE
1716 \u200a,
1717 \u202f
1718 <emphasis>
1719 NARROW NO-BREAK SPACE
1720 \u202f,
1721 \u205f
1722 <emphasis>
1723 MEDIUM MATHEMATICAL SPACE
1724 \u205f,
1725 \u3000
1726 <emphasis>
1727 IDEOGRAPHIC SPACE
1728 \u3000,
1729 <paragraph>
1730 <emphasis>
1731 LINE SEPARATOR
1732 """],
1733 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1734 [u"""\
1735 "Quoted" markup start-string (matched openers & closers) -> no markup:
1737 '*' "*" (*) <*> [*] {*}
1738 ⁅*⁆
1740 Some international quoting styles:
1741 ‘*’ “*” English, ...,
1742 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1743 „*” «*» Romanian,
1744 “*„ ‘*‚ Greek,
1745 「*」 『*』traditional Chinese,
1746 ”*” ’*’ »*» ›*› Swedish, Finnish,
1747 „*” ‚*’ Polish,
1748 „*” »*« ’*’ Hungarian,
1750 But this is „*’ emphasized »*‹.
1751 """,
1752 u"""\
1753 <document source="test data">
1754 <paragraph>
1755 "Quoted" markup start-string (matched openers & closers) -> no markup:
1756 <paragraph>
1757 '*' "*" (*) <*> [*] {*}
1758 ⁅*⁆
1759 <paragraph>
1760 Some international quoting styles:
1761 ‘*’ “*” English, ...,
1762 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1763 „*” «*» Romanian,
1764 “*„ ‘*‚ Greek,
1765 「*」 『*』traditional Chinese,
1766 ”*” ’*’ »*» ›*› Swedish, Finnish,
1767 „*” ‚*’ Polish,
1768 „*” »*« ’*’ Hungarian,
1769 <paragraph>
1770 But this is „
1771 <emphasis>
1772 ’ emphasized »
1773 ‹.
1774 """],
1778 if __name__ == '__main__':
1779 import unittest
1780 unittest.main(defaultTest='suite')