safe check for startswith "."
[docutils.git] / test / test_parsers / test_rst / test_inline_markup.py
blob134fa7444ccb748d5571ee9cacd89c6a3e6a1efc
1 #! /usr/bin/env python
2 # -*- coding: utf8 -*-
4 # $Id$
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
8 """
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
11 """
13 from __init__ import DocutilsTestSupport
15 def suite():
16 s = DocutilsTestSupport.ParserTestSuite()
17 s.generateTests(totest)
18 return s
20 totest = {}
22 totest['emphasis'] = [
23 ["""\
24 *emphasis*
25 """,
26 """\
27 <document source="test data">
28 <paragraph>
29 <emphasis>
30 emphasis
31 """],
32 [u"""\
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
35 """,
36 u"""\
37 <document source="test data">
38 <paragraph>
39 l\'
40 <emphasis>
41 emphasis
42 with the \n\
43 <emphasis>
44 emphasis
45 \' apostrophe.
46 l\u2019
47 <emphasis>
48 emphasis
49 with the \n\
50 <emphasis>
51 emphasis
52 \u2019 apostrophe.
53 """],
54 ["""\
55 *emphasized sentence
56 across lines*
57 """,
58 """\
59 <document source="test data">
60 <paragraph>
61 <emphasis>
62 emphasized sentence
63 across lines
64 """],
65 ["""\
66 *emphasis without closing asterisk
67 """,
68 """\
69 <document source="test data">
70 <paragraph>
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
75 <paragraph>
76 Inline emphasis start-string without end-string.
77 """],
78 [r"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
82 but not
83 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
84 (*), [*], '*' or '"*"' ("quoted" start-string),
85 x*2* or 2*x* (alphanumeric char before),
86 \*args or * (escaped, whitespace behind start-string)
87 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
89 However, '*args' will trigger a warning and may be problematic.
91 what about *this**?
92 """,
93 """\
94 <document source="test data">
95 <paragraph>
96 some punctuation is allowed around inline markup, e.g.
98 <emphasis>
99 emphasis
100 /, -
101 <emphasis>
102 emphasis
103 -, and :
104 <emphasis>
105 emphasis
106 : (delimiters),
108 <emphasis>
109 emphasis
110 ), [
111 <emphasis>
112 emphasis
113 ], <
114 <emphasis>
115 emphasis
116 >, {
117 <emphasis>
118 emphasis
119 } (open/close pairs)
120 <paragraph>
121 but not
122 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
123 (*), [*], '*' or '"*"' ("quoted" start-string),
124 x*2* or 2*x* (alphanumeric char before),
125 *args or * (escaped, whitespace behind start-string)
126 or \n\
127 <emphasis>
128 the* *stars* *inside
129 (escaped, whitespace before end-string).
130 <paragraph>
131 However, '
132 <problematic ids="id2" refid="id1">
134 args' will trigger a warning and may be problematic.
135 <system_message backrefs="id2" ids="id1" level="2" line="12" source="test data" type="WARNING">
136 <paragraph>
137 Inline emphasis start-string without end-string.
138 <paragraph>
139 what about \n\
140 <emphasis>
141 this*
143 """],
144 [u"""\
145 Quotes around inline markup:
147 '*emphasis*' "*emphasis*" Straight,
148 ‘*emphasis*’ “*emphasis*” English, ...,
149 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
150 « *emphasis* » ‹ *emphasis* › French,
151 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
152 „*emphasis*” «*emphasis*» Romanian,
153 “*emphasis*„ ‘*emphasis*‚ Greek,
154 「*emphasis*」 『*emphasis*』traditional Chinese,
155 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
156 „*emphasis*” ‚*emphasis*’ Polish,
157 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
158 """,
159 u"""\
160 <document source="test data">
161 <paragraph>
162 Quotes around inline markup:
163 <paragraph>
165 <emphasis>
166 emphasis
167 \' "
168 <emphasis>
169 emphasis
170 " Straight,
171 \u2018
172 <emphasis>
173 emphasis
174 \u2019 \u201c
175 <emphasis>
176 emphasis
177 \u201d English, ...,
178 \xab\u202f
179 <emphasis>
180 emphasis
181 \u202f\xbb \u2039\u202f
182 <emphasis>
183 emphasis
184 \u202f\u203a \xab\xa0
185 <emphasis>
186 emphasis
187 \xa0\xbb \u2039\xa0
188 <emphasis>
189 emphasis
190 \xa0\u203a
191 \xab\u2005
192 <emphasis>
193 emphasis
194 \u2005\xbb \u2039\u2005
195 <emphasis>
196 emphasis
197 \u2005\u203a French,
198 \u201e
199 <emphasis>
200 emphasis
201 \u201c \u201a
202 <emphasis>
203 emphasis
204 \u2018 \xbb
205 <emphasis>
206 emphasis
207 \xab \u203a
208 <emphasis>
209 emphasis
210 \u2039 German, Czech, ...,
211 \u201e
212 <emphasis>
213 emphasis
214 \u201d \xab
215 <emphasis>
216 emphasis
217 \xbb Romanian,
218 \u201c
219 <emphasis>
220 emphasis
221 \u201e \u2018
222 <emphasis>
223 emphasis
224 \u201a Greek,
225 \u300c
226 <emphasis>
227 emphasis
228 \u300d \u300e
229 <emphasis>
230 emphasis
231 \u300ftraditional Chinese,
232 \u201d
233 <emphasis>
234 emphasis
235 \u201d \u2019
236 <emphasis>
237 emphasis
238 \u2019 \xbb
239 <emphasis>
240 emphasis
241 \xbb \u203a
242 <emphasis>
243 emphasis
244 \u203a Swedish, Finnish,
245 \u201e
246 <emphasis>
247 emphasis
248 \u201d \u201a
249 <emphasis>
250 emphasis
251 \u2019 Polish,
252 \u201e
253 <emphasis>
254 emphasis
255 \u201d \xbb
256 <emphasis>
257 emphasis
258 \xab \u2019
259 <emphasis>
260 emphasis
261 \u2019 Hungarian,
262 """],
263 [r"""
264 Emphasized asterisk: *\**
266 Emphasized double asterisk: *\***
267 """,
268 """\
269 <document source="test data">
270 <paragraph>
271 Emphasized asterisk: \n\
272 <emphasis>
274 <paragraph>
275 Emphasized double asterisk: \n\
276 <emphasis>
278 """],
281 totest['strong'] = [
282 ["""\
283 **strong**
284 """,
285 """\
286 <document source="test data">
287 <paragraph>
288 <strong>
289 strong
290 """],
291 [u"""\
292 l'**strong** and l\u2019**strong** with apostrophe
293 """,
294 u"""\
295 <document source="test data">
296 <paragraph>
298 <strong>
299 strong
300 and l\u2019
301 <strong>
302 strong
303 with apostrophe
304 """],
305 [u"""\
306 quoted '**strong**', quoted "**strong**",
307 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
308 quoted \xab**strong**\xbb
309 """,
310 u"""\
311 <document source="test data">
312 <paragraph>
313 quoted '
314 <strong>
315 strong
316 ', quoted "
317 <strong>
318 strong
320 quoted \u2018
321 <strong>
322 strong
323 \u2019, quoted \u201c
324 <strong>
325 strong
326 \u201d,
327 quoted \xab
328 <strong>
329 strong
330 \xbb
331 """],
332 [r"""
333 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
335 (however, '**kwargs' will trigger a warning and may be problematic)
336 """,
337 """\
338 <document source="test data">
339 <paragraph>
341 <strong>
342 strong
343 ) but not (**) or '(** ' or x**2 or **kwargs or **
344 <paragraph>
345 (however, '
346 <problematic ids="id2" refid="id1">
348 kwargs' will trigger a warning and may be problematic)
349 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
350 <paragraph>
351 Inline strong start-string without end-string.
352 """],
353 ["""\
354 Strong asterisk: *****
356 Strong double asterisk: ******
357 """,
358 """\
359 <document source="test data">
360 <paragraph>
361 Strong asterisk: \n\
362 <strong>
364 <paragraph>
365 Strong double asterisk: \n\
366 <strong>
368 """],
369 ["""\
370 **strong without closing asterisks
371 """,
372 """\
373 <document source="test data">
374 <paragraph>
375 <problematic ids="id2" refid="id1">
377 strong without closing asterisks
378 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
379 <paragraph>
380 Inline strong start-string without end-string.
381 """],
384 totest['literal'] = [
385 ["""\
386 ``literal``
387 """,
388 """\
389 <document source="test data">
390 <paragraph>
391 <literal>
392 literal
393 """],
394 [r"""
395 ``\literal``
396 """,
397 """\
398 <document source="test data">
399 <paragraph>
400 <literal>
401 \\literal
402 """],
403 [r"""
404 ``lite\ral``
405 """,
406 """\
407 <document source="test data">
408 <paragraph>
409 <literal>
410 lite\\ral
411 """],
412 [r"""
413 ``literal\``
414 """,
415 """\
416 <document source="test data">
417 <paragraph>
418 <literal>
419 literal\\
420 """],
421 [u"""\
422 l'``literal`` and l\u2019``literal`` with apostrophe
423 """,
424 u"""\
425 <document source="test data">
426 <paragraph>
428 <literal>
429 literal
430 and l\u2019
431 <literal>
432 literal
433 with apostrophe
434 """],
435 [u"""\
436 quoted '``literal``', quoted "``literal``",
437 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
438 quoted \xab``literal``\xbb
439 """,
440 u"""\
441 <document source="test data">
442 <paragraph>
443 quoted '
444 <literal>
445 literal
446 ', quoted "
447 <literal>
448 literal
450 quoted \u2018
451 <literal>
452 literal
453 \u2019, quoted \u201c
454 <literal>
455 literal
456 \u201d,
457 quoted \xab
458 <literal>
459 literal
460 \xbb
461 """],
462 [u"""\
463 ``'literal'`` with quotes, ``"literal"`` with quotes,
464 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
465 ``\xabliteral\xbb`` with quotes
466 """,
467 u"""\
468 <document source="test data">
469 <paragraph>
470 <literal>
471 'literal'
472 with quotes, \n\
473 <literal>
474 "literal"
475 with quotes,
476 <literal>
477 \u2018literal\u2019
478 with quotes, \n\
479 <literal>
480 \u201cliteral\u201d
481 with quotes,
482 <literal>
483 \xabliteral\xbb
484 with quotes
485 """],
486 [r"""
487 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
489 (however, ``standalone TeX quotes'' will trigger a warning
490 and may be problematic)
491 """,
492 """\
493 <document source="test data">
494 <paragraph>
495 <literal>
496 literal ``TeX quotes'' & \\backslash
497 but not "``" or ``
498 <paragraph>
499 (however, \n\
500 <problematic ids="id2" refid="id1">
502 standalone TeX quotes'' will trigger a warning
503 and may be problematic)
504 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
505 <paragraph>
506 Inline literal start-string without end-string.
507 """],
508 ["""\
509 Find the ```interpreted text``` in this paragraph!
510 """,
511 """\
512 <document source="test data">
513 <paragraph>
514 Find the \n\
515 <literal>
516 `interpreted text`
517 in this paragraph!
518 """],
519 ["""\
520 ``literal without closing backquotes
521 """,
522 """\
523 <document source="test data">
524 <paragraph>
525 <problematic ids="id2" refid="id1">
527 literal without closing backquotes
528 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
529 <paragraph>
530 Inline literal start-string without end-string.
531 """],
532 [r"""
533 Python ``list``\s use square bracket syntax.
534 """,
535 """\
536 <document source="test data">
537 <paragraph>
538 Python \n\
539 <literal>
540 list
541 s use square bracket syntax.
542 """],
545 totest['references'] = [
546 ["""\
547 ref_
548 """,
549 """\
550 <document source="test data">
551 <paragraph>
552 <reference name="ref" refname="ref">
554 """],
555 [u"""\
556 l'ref_ and l\u2019ref_ with apostrophe
557 """,
558 u"""\
559 <document source="test data">
560 <paragraph>
562 <reference name="ref" refname="ref">
564 and l\u2019
565 <reference name="ref" refname="ref">
567 with apostrophe
568 """],
569 [u"""\
570 quoted 'ref_', quoted "ref_",
571 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
572 quoted \xabref_\xbb,
573 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
574 \u201cref ref\u201d_, or \xabref ref\xbb_
575 """,
576 u"""\
577 <document source="test data">
578 <paragraph>
579 quoted '
580 <reference name="ref" refname="ref">
582 ', quoted "
583 <reference name="ref" refname="ref">
586 quoted \u2018
587 <reference name="ref" refname="ref">
589 \u2019, quoted \u201c
590 <reference name="ref" refname="ref">
592 \u201d,
593 quoted \xab
594 <reference name="ref" refname="ref">
596 \xbb,
597 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
598 \u201cref ref\u201d_, or \xabref ref\xbb_
599 """],
600 ["""\
601 ref__
602 """,
603 """\
604 <document source="test data">
605 <paragraph>
606 <reference anonymous="1" name="ref">
608 """],
609 [u"""\
610 l'ref__ and l\u2019ref__ with apostrophe
611 """,
612 u"""\
613 <document source="test data">
614 <paragraph>
616 <reference anonymous="1" name="ref">
618 and l\u2019
619 <reference anonymous="1" name="ref">
621 with apostrophe
622 """],
623 [u"""\
624 quoted 'ref__', quoted "ref__",
625 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
626 quoted \xabref__\xbb,
627 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
628 \u201cref ref\u201d__, or \xabref ref\xbb__
629 """,
630 u"""\
631 <document source="test data">
632 <paragraph>
633 quoted '
634 <reference anonymous="1" name="ref">
636 ', quoted "
637 <reference anonymous="1" name="ref">
640 quoted \u2018
641 <reference anonymous="1" name="ref">
643 \u2019, quoted \u201c
644 <reference anonymous="1" name="ref">
646 \u201d,
647 quoted \xab
648 <reference anonymous="1" name="ref">
650 \xbb,
651 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
652 \u201cref ref\u201d__, or \xabref ref\xbb__
653 """],
654 ["""\
655 ref_, r_, r_e-f_, -ref_, and anonymousref__,
656 but not _ref_ or __attr__ or object.__attr__
657 """,
658 """\
659 <document source="test data">
660 <paragraph>
661 <reference name="ref" refname="ref">
663 , \n\
664 <reference name="r" refname="r">
666 , \n\
667 <reference name="r_e-f" refname="r_e-f">
668 r_e-f
670 <reference name="ref" refname="ref">
672 , and \n\
673 <reference anonymous="1" name="anonymousref">
674 anonymousref
676 but not _ref_ or __attr__ or object.__attr__
677 """],
680 totest['phrase_references'] = [
681 ["""\
682 `phrase reference`_
683 """,
684 """\
685 <document source="test data">
686 <paragraph>
687 <reference name="phrase reference" refname="phrase reference">
688 phrase reference
689 """],
690 [u"""\
691 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
692 """,
693 u"""\
694 <document source="test data">
695 <paragraph>
697 <reference name="phrase reference" refname="phrase reference">
698 phrase reference
699 and l\u2019
700 <reference name="phrase reference" refname="phrase reference">
701 phrase reference
702 with apostrophe
703 """],
704 [u"""\
705 quoted '`phrase reference`_', quoted "`phrase reference`_",
706 quoted \u2018`phrase reference`_\u2019,
707 quoted \u201c`phrase reference`_\u201d,
708 quoted \xab`phrase reference`_\xbb
709 """,
710 u"""\
711 <document source="test data">
712 <paragraph>
713 quoted '
714 <reference name="phrase reference" refname="phrase reference">
715 phrase reference
716 ', quoted "
717 <reference name="phrase reference" refname="phrase reference">
718 phrase reference
720 quoted \u2018
721 <reference name="phrase reference" refname="phrase reference">
722 phrase reference
723 \u2019,
724 quoted \u201c
725 <reference name="phrase reference" refname="phrase reference">
726 phrase reference
727 \u201d,
728 quoted \xab
729 <reference name="phrase reference" refname="phrase reference">
730 phrase reference
731 \xbb
732 """],
733 [u"""\
734 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
735 `\u2018phrase reference\u2019`_ with quotes,
736 `\u201cphrase reference\u201d`_ with quotes,
737 `\xabphrase reference\xbb`_ with quotes
738 """,
739 u"""\
740 <document source="test data">
741 <paragraph>
742 <reference name="'phrase reference'" refname="'phrase reference'">
743 'phrase reference'
744 with quotes, \n\
745 <reference name=""phrase reference"" refname=""phrase reference"">
746 "phrase reference"
747 with quotes,
748 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
749 \u2018phrase reference\u2019
750 with quotes,
751 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
752 \u201cphrase reference\u201d
753 with quotes,
754 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
755 \xabphrase reference\xbb
756 with quotes
757 """],
758 ["""\
759 `anonymous reference`__
760 """,
761 """\
762 <document source="test data">
763 <paragraph>
764 <reference anonymous="1" name="anonymous reference">
765 anonymous reference
766 """],
767 [u"""\
768 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
769 """,
770 u"""\
771 <document source="test data">
772 <paragraph>
774 <reference anonymous="1" name="anonymous reference">
775 anonymous reference
776 and l\u2019
777 <reference anonymous="1" name="anonymous reference">
778 anonymous reference
779 with apostrophe
780 """],
781 [u"""\
782 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
783 quoted \u2018`anonymous reference`__\u2019,
784 quoted \u201c`anonymous reference`__\u201d,
785 quoted \xab`anonymous reference`__\xbb
786 """,
787 u"""\
788 <document source="test data">
789 <paragraph>
790 quoted '
791 <reference anonymous="1" name="anonymous reference">
792 anonymous reference
793 ', quoted "
794 <reference anonymous="1" name="anonymous reference">
795 anonymous reference
797 quoted \u2018
798 <reference anonymous="1" name="anonymous reference">
799 anonymous reference
800 \u2019,
801 quoted \u201c
802 <reference anonymous="1" name="anonymous reference">
803 anonymous reference
804 \u201d,
805 quoted \xab
806 <reference anonymous="1" name="anonymous reference">
807 anonymous reference
808 \xbb
809 """],
810 [u"""\
811 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
812 `\u2018anonymous reference\u2019`__ with quotes,
813 `\u201canonymous reference\u201d`__ with quotes,
814 `\xabanonymous reference\xbb`__ with quotes
815 """,
816 u"""\
817 <document source="test data">
818 <paragraph>
819 <reference anonymous="1" name="'anonymous reference'">
820 'anonymous reference'
821 with quotes, \n\
822 <reference anonymous="1" name=""anonymous reference"">
823 "anonymous reference"
824 with quotes,
825 <reference anonymous="1" name="\u2018anonymous reference\u2019">
826 \u2018anonymous reference\u2019
827 with quotes,
828 <reference anonymous="1" name="\u201canonymous reference\u201d">
829 \u201canonymous reference\u201d
830 with quotes,
831 <reference anonymous="1" name="\xabanonymous reference\xbb">
832 \xabanonymous reference\xbb
833 with quotes
834 """],
835 ["""\
836 `phrase reference
837 across lines`_
838 """,
839 """\
840 <document source="test data">
841 <paragraph>
842 <reference name="phrase reference across lines" refname="phrase reference across lines">
843 phrase reference
844 across lines
845 """],
846 ["""\
847 `phrase\`_ reference`_
848 """,
849 """\
850 <document source="test data">
851 <paragraph>
852 <reference name="phrase`_ reference" refname="phrase`_ reference">
853 phrase`_ reference
854 """],
855 ["""\
856 Invalid phrase reference:
858 :role:`phrase reference`_
859 """,
860 """\
861 <document source="test data">
862 <paragraph>
863 Invalid phrase reference:
864 <paragraph>
865 <problematic ids="id2" refid="id1">
866 :role:`phrase reference`_
867 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
868 <paragraph>
869 Mismatch: both interpreted text role prefix and reference suffix.
870 """],
871 ["""\
872 Invalid phrase reference:
874 `phrase reference`:role:_
875 """,
876 """\
877 <document source="test data">
878 <paragraph>
879 Invalid phrase reference:
880 <paragraph>
881 <problematic ids="id2" refid="id1">
882 `phrase reference`:role:_
883 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
884 <paragraph>
885 Mismatch: both interpreted text role suffix and reference suffix.
886 """],
887 ["""\
888 `phrase reference_ without closing backquote
889 """,
890 """\
891 <document source="test data">
892 <paragraph>
893 <problematic ids="id2" refid="id1">
895 phrase \n\
896 <reference name="reference" refname="reference">
897 reference
898 without closing backquote
899 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
900 <paragraph>
901 Inline interpreted text or phrase reference start-string without end-string.
902 """],
903 ["""\
904 `anonymous phrase reference__ without closing backquote
905 """,
906 """\
907 <document source="test data">
908 <paragraph>
909 <problematic ids="id2" refid="id1">
911 anonymous phrase \n\
912 <reference anonymous="1" name="reference">
913 reference
914 without closing backquote
915 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
916 <paragraph>
917 Inline interpreted text or phrase reference start-string without end-string.
918 """],
921 totest['embedded_URIs'] = [
922 ["""\
923 `phrase reference <http://example.com>`_
924 """,
925 """\
926 <document source="test data">
927 <paragraph>
928 <reference name="phrase reference" refuri="http://example.com">
929 phrase reference
930 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
931 """],
932 ["""\
933 `anonymous reference <http://example.com>`__
934 """,
935 """\
936 <document source="test data">
937 <paragraph>
938 <reference name="anonymous reference" refuri="http://example.com">
939 anonymous reference
940 """],
941 ["""\
942 `embedded URI on next line
943 <http://example.com>`__
944 """,
945 """\
946 <document source="test data">
947 <paragraph>
948 <reference name="embedded URI on next line" refuri="http://example.com">
949 embedded URI on next line
950 """],
951 ["""\
952 `embedded URI across lines <http://example.com/
953 long/path>`__
954 """,
955 """\
956 <document source="test data">
957 <paragraph>
958 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
959 embedded URI across lines
960 """],
961 ["""\
962 `embedded URI with whitespace <http://example.com/
963 long/path /and /whitespace>`__
964 """,
965 """\
966 <document source="test data">
967 <paragraph>
968 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
969 embedded URI with whitespace
970 """],
971 ["""\
972 `embedded email address <jdoe@example.com>`__
974 `embedded email address broken across lines <jdoe
975 @example.com>`__
976 """,
977 """\
978 <document source="test data">
979 <paragraph>
980 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
981 embedded email address
982 <paragraph>
983 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
984 embedded email address broken across lines
985 """],
986 [r"""
987 `embedded URI with too much whitespace < http://example.com/
988 long/path /and /whitespace >`__
990 `embedded URI with too much whitespace at end <http://example.com/
991 long/path /and /whitespace >`__
993 `embedded URI with no preceding whitespace<http://example.com>`__
995 `escaped URI \<http://example.com>`__
997 See `HTML Anchors: \<a>`_.
998 """,
999 """\
1000 <document source="test data">
1001 <paragraph>
1002 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1003 embedded URI with too much whitespace < http://example.com/
1004 long/path /and /whitespace >
1005 <paragraph>
1006 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1007 embedded URI with too much whitespace at end <http://example.com/
1008 long/path /and /whitespace >
1009 <paragraph>
1010 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1011 embedded URI with no preceding whitespace<http://example.com>
1012 <paragraph>
1013 <reference anonymous="1" name="escaped URI <http://example.com>">
1014 escaped URI <http://example.com>
1015 <paragraph>
1016 See \n\
1017 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1018 HTML Anchors: <a>
1020 """],
1021 ["""\
1022 Relative URIs' reference text can be omitted:
1024 `<reference>`_
1026 `<anonymous>`__
1027 """,
1028 """\
1029 <document source="test data">
1030 <paragraph>
1031 Relative URIs' reference text can be omitted:
1032 <paragraph>
1033 <reference name="reference" refuri="reference">
1034 reference
1035 <target ids="reference" names="reference" refuri="reference">
1036 <paragraph>
1037 <reference name="anonymous" refuri="anonymous">
1038 anonymous
1039 """],
1040 ["""\
1041 Escape trailing low-line char in URIs:
1043 `<reference\_>`_
1045 `<anonymous\_>`__
1046 """,
1047 """\
1048 <document source="test data">
1049 <paragraph>
1050 Escape trailing low-line char in URIs:
1051 <paragraph>
1052 <reference name="reference_" refuri="reference_">
1053 reference_
1054 <target ids="reference" names="reference_" refuri="reference_">
1055 <paragraph>
1056 <reference name="anonymous_" refuri="anonymous_">
1057 anonymous_
1058 """],
1061 totest['embedded_aliases'] = [
1062 ["""\
1063 `phrase reference <alias_>`_
1064 """,
1065 """\
1066 <document source="test data">
1067 <paragraph>
1068 <reference name="phrase reference" refname="alias">
1069 phrase reference
1070 <target names="phrase\ reference" refname="alias">
1071 """],
1072 ["""\
1073 `anonymous reference <alias_>`__
1074 """,
1075 """\
1076 <document source="test data">
1077 <paragraph>
1078 <reference name="anonymous reference" refname="alias">
1079 anonymous reference
1080 """],
1081 ["""\
1082 `embedded alias on next line
1083 <alias_>`__
1084 """,
1085 """\
1086 <document source="test data">
1087 <paragraph>
1088 <reference name="embedded alias on next line" refname="alias">
1089 embedded alias on next line
1090 """],
1091 ["""\
1092 `embedded alias across lines <alias
1093 phrase_>`__
1094 """,
1095 """\
1096 <document source="test data">
1097 <paragraph>
1098 <reference name="embedded alias across lines" refname="alias phrase">
1099 embedded alias across lines
1100 """],
1101 ["""\
1102 `embedded alias with whitespace <alias
1103 long phrase_>`__
1104 """,
1105 """\
1106 <document source="test data">
1107 <paragraph>
1108 <reference name="embedded alias with whitespace" refname="alias long phrase">
1109 embedded alias with whitespace
1110 """],
1111 [r"""
1112 `embedded alias with too much whitespace < alias_ >`__
1114 `embedded alias with no preceding whitespace<alias_>`__
1115 """,
1116 """\
1117 <document source="test data">
1118 <paragraph>
1119 <reference anonymous="1" name="embedded alias with too much whitespace < alias_ >">
1120 embedded alias with too much whitespace < alias_ >
1121 <paragraph>
1122 <reference anonymous="1" name="embedded alias with no preceding whitespace<alias_>">
1123 embedded alias with no preceding whitespace<alias_>
1124 """],
1127 totest['inline_targets'] = [
1128 ["""\
1129 _`target`
1131 Here is _`another target` in some text. And _`yet
1132 another target`, spanning lines.
1134 _`Here is a TaRgeT` with case and spacial difficulties.
1135 """,
1136 """\
1137 <document source="test data">
1138 <paragraph>
1139 <target ids="target" names="target">
1140 target
1141 <paragraph>
1142 Here is \n\
1143 <target ids="another-target" names="another\ target">
1144 another target
1145 in some text. And \n\
1146 <target ids="yet-another-target" names="yet\ another\ target">
1148 another target
1149 , spanning lines.
1150 <paragraph>
1151 <target ids="here-is-a-target" names="here\ is\ a\ target">
1152 Here is a TaRgeT
1153 with case and spacial difficulties.
1154 """],
1155 [u"""\
1156 l'_`target1` and l\u2019_`target2` with apostrophe
1157 """,
1158 u"""\
1159 <document source="test data">
1160 <paragraph>
1162 <target ids="target1" names="target1">
1163 target1
1164 and l\u2019
1165 <target ids="target2" names="target2">
1166 target2
1167 with apostrophe
1168 """],
1169 [u"""\
1170 quoted '_`target1`', quoted "_`target2`",
1171 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1172 quoted \xab_`target5`\xbb
1173 """,
1174 u"""\
1175 <document source="test data">
1176 <paragraph>
1177 quoted '
1178 <target ids="target1" names="target1">
1179 target1
1180 ', quoted "
1181 <target ids="target2" names="target2">
1182 target2
1184 quoted \u2018
1185 <target ids="target3" names="target3">
1186 target3
1187 \u2019, quoted \u201c
1188 <target ids="target4" names="target4">
1189 target4
1190 \u201d,
1191 quoted \xab
1192 <target ids="target5" names="target5">
1193 target5
1194 \xbb
1195 """],
1196 [u"""\
1197 _`'target1'` with quotes, _`"target2"` with quotes,
1198 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1199 _`\xabtarget5\xbb` with quotes
1200 """,
1201 u"""\
1202 <document source="test data">
1203 <paragraph>
1204 <target ids="target1" names="'target1'">
1205 'target1'
1206 with quotes, \n\
1207 <target ids="target2" names=""target2"">
1208 "target2"
1209 with quotes,
1210 <target ids="target3" names="\u2018target3\u2019">
1211 \u2018target3\u2019
1212 with quotes, \n\
1213 <target ids="target4" names="\u201ctarget4\u201d">
1214 \u201ctarget4\u201d
1215 with quotes,
1216 <target ids="target5" names="\xabtarget5\xbb">
1217 \xabtarget5\xbb
1218 with quotes
1219 """],
1220 ["""\
1221 But this isn't a _target; targets require backquotes.
1223 And _`this`_ is just plain confusing.
1224 """,
1225 """\
1226 <document source="test data">
1227 <paragraph>
1228 But this isn't a _target; targets require backquotes.
1229 <paragraph>
1230 And \n\
1231 <problematic ids="id2" refid="id1">
1233 this`_ is just plain confusing.
1234 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1235 <paragraph>
1236 Inline target start-string without end-string.
1237 """],
1238 ["""\
1239 _`inline target without closing backquote
1240 """,
1241 """\
1242 <document source="test data">
1243 <paragraph>
1244 <problematic ids="id2" refid="id1">
1246 inline target without closing backquote
1247 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1248 <paragraph>
1249 Inline target start-string without end-string.
1250 """],
1253 totest['footnote_reference'] = [
1254 ["""\
1255 [1]_
1256 """,
1257 """\
1258 <document source="test data">
1259 <paragraph>
1260 <footnote_reference ids="id1" refname="1">
1262 """],
1263 ["""\
1264 [#]_
1265 """,
1266 """\
1267 <document source="test data">
1268 <paragraph>
1269 <footnote_reference auto="1" ids="id1">
1270 """],
1271 ["""\
1272 [#label]_
1273 """,
1274 """\
1275 <document source="test data">
1276 <paragraph>
1277 <footnote_reference auto="1" ids="id1" refname="label">
1278 """],
1279 ["""\
1280 [*]_
1281 """,
1282 """\
1283 <document source="test data">
1284 <paragraph>
1285 <footnote_reference auto="*" ids="id1">
1286 """],
1287 ["""\
1288 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1289 """,
1290 """\
1291 <document source="test data">
1292 <paragraph>
1293 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1294 """],
1297 totest['citation_reference'] = [
1298 ["""\
1299 [citation]_
1300 """,
1301 """\
1302 <document source="test data">
1303 <paragraph>
1304 <citation_reference ids="id1" refname="citation">
1305 citation
1306 """],
1307 ["""\
1308 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1309 """,
1310 """\
1311 <document source="test data">
1312 <paragraph>
1313 <citation_reference ids="id1" refname="citation">
1314 citation
1315 and \n\
1316 <citation_reference ids="id2" refname="cit-ation">
1317 cit-ation
1318 and \n\
1319 <citation_reference ids="id3" refname="cit.ation">
1320 cit.ation
1321 and \n\
1322 <citation_reference ids="id4" refname="cit1">
1323 CIT1
1324 but not [CIT 1]_
1325 """],
1326 ["""\
1327 Adjacent citation refs are not possible: [citation]_[CIT1]_
1328 """,
1329 """\
1330 <document source="test data">
1331 <paragraph>
1332 Adjacent citation refs are not possible: [citation]_[CIT1]_
1333 """],
1336 totest['substitution_references'] = [
1337 ["""\
1338 |subref|
1339 """,
1340 """\
1341 <document source="test data">
1342 <paragraph>
1343 <substitution_reference refname="subref">
1344 subref
1345 """],
1346 ["""\
1347 |subref|_ and |subref|__
1348 """,
1349 """\
1350 <document source="test data">
1351 <paragraph>
1352 <reference refname="subref">
1353 <substitution_reference refname="subref">
1354 subref
1355 and \n\
1356 <reference anonymous="1">
1357 <substitution_reference refname="subref">
1358 subref
1359 """],
1360 ["""\
1361 |substitution reference|
1362 """,
1363 """\
1364 <document source="test data">
1365 <paragraph>
1366 <substitution_reference refname="substitution reference">
1367 substitution reference
1368 """],
1369 ["""\
1370 |substitution
1371 reference|
1372 """,
1373 """\
1374 <document source="test data">
1375 <paragraph>
1376 <substitution_reference refname="substitution reference">
1377 substitution
1378 reference
1379 """],
1380 ["""\
1381 |substitution reference without closing verbar
1382 """,
1383 """\
1384 <document source="test data">
1385 <paragraph>
1386 <problematic ids="id2" refid="id1">
1388 substitution reference without closing verbar
1389 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1390 <paragraph>
1391 Inline substitution_reference start-string without end-string.
1392 """],
1393 ["""\
1394 first | then || and finally |||
1395 """,
1396 """\
1397 <document source="test data">
1398 <paragraph>
1399 first | then || and finally |||
1400 """],
1403 totest['standalone_hyperlink'] = [
1404 ["""\
1405 http://www.standalone.hyperlink.com
1407 http:/one-slash-only.absolute.path
1409 [http://example.com]
1411 (http://example.com)
1413 <http://example.com>
1415 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1417 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1419 http://[3ffe:2a00:100:7031::1]/
1421 mailto:someone@somewhere.com
1423 news:comp.lang.python
1425 An email address in a sentence: someone@somewhere.com.
1427 ftp://ends.with.a.period.
1429 (a.question.mark@end?)
1430 """,
1431 """\
1432 <document source="test data">
1433 <paragraph>
1434 <reference refuri="http://www.standalone.hyperlink.com">
1435 http://www.standalone.hyperlink.com
1436 <paragraph>
1437 <reference refuri="http:/one-slash-only.absolute.path">
1438 http:/one-slash-only.absolute.path
1439 <paragraph>
1441 <reference refuri="http://example.com">
1442 http://example.com
1444 <paragraph>
1446 <reference refuri="http://example.com">
1447 http://example.com
1449 <paragraph>
1451 <reference refuri="http://example.com">
1452 http://example.com
1454 <paragraph>
1455 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1456 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1457 <paragraph>
1458 <reference refuri="http://[3ffe:2a00:100:7031::1">
1459 http://[3ffe:2a00:100:7031::1
1460 ] (the final "]" is ambiguous in text)
1461 <paragraph>
1462 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1463 http://[3ffe:2a00:100:7031::1]/
1464 <paragraph>
1465 <reference refuri="mailto:someone@somewhere.com">
1466 mailto:someone@somewhere.com
1467 <paragraph>
1468 <reference refuri="news:comp.lang.python">
1469 news:comp.lang.python
1470 <paragraph>
1471 An email address in a sentence: \n\
1472 <reference refuri="mailto:someone@somewhere.com">
1473 someone@somewhere.com
1475 <paragraph>
1476 <reference refuri="ftp://ends.with.a.period">
1477 ftp://ends.with.a.period
1479 <paragraph>
1481 <reference refuri="mailto:a.question.mark@end">
1482 a.question.mark@end
1484 """],
1485 [r"""
1486 Valid URLs with escaped markup characters:
1488 http://example.com/\*content\*/whatever
1490 http://example.com/\*content*/whatever
1491 """,
1492 """\
1493 <document source="test data">
1494 <paragraph>
1495 Valid URLs with escaped markup characters:
1496 <paragraph>
1497 <reference refuri="http://example.com/*content*/whatever">
1498 http://example.com/*content*/whatever
1499 <paragraph>
1500 <reference refuri="http://example.com/*content*/whatever">
1501 http://example.com/*content*/whatever
1502 """],
1503 ["""\
1504 Valid URLs may end with punctuation inside "<>":
1506 <http://example.org/ends-with-dot.>
1507 """,
1508 """\
1509 <document source="test data">
1510 <paragraph>
1511 Valid URLs may end with punctuation inside "<>":
1512 <paragraph>
1514 <reference refuri="http://example.org/ends-with-dot.">
1515 http://example.org/ends-with-dot.
1517 """],
1518 ["""\
1519 Valid URLs with interesting endings:
1521 http://example.org/ends-with-pluses++
1522 """,
1523 """\
1524 <document source="test data">
1525 <paragraph>
1526 Valid URLs with interesting endings:
1527 <paragraph>
1528 <reference refuri="http://example.org/ends-with-pluses++">
1529 http://example.org/ends-with-pluses++
1530 """],
1531 ["""\
1532 None of these are standalone hyperlinks (their "schemes"
1533 are not recognized): signal:noise, a:b.
1534 """,
1535 """\
1536 <document source="test data">
1537 <paragraph>
1538 None of these are standalone hyperlinks (their "schemes"
1539 are not recognized): signal:noise, a:b.
1540 """],
1541 ["""\
1542 Escaped email addresses are not recognized: test\@example.org
1543 """,
1544 """\
1545 <document source="test data">
1546 <paragraph>
1547 Escaped email addresses are not recognized: test@example.org
1548 """],
1551 totest['markup recognition rules'] = [
1552 ["""\
1553 __This__ should be left alone.
1554 """,
1555 """\
1556 <document source="test data">
1557 <paragraph>
1558 __This__ should be left alone.
1559 """],
1560 [r"""
1561 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1562 with backslash-escaped whitespace, including new\
1563 lines.
1564 """,
1565 """\
1566 <document source="test data">
1567 <paragraph>
1568 Character-level m
1569 <emphasis>
1571 <strong>
1573 <literal>
1575 <title_reference>
1578 with backslash-escaped whitespace, including newlines.
1579 """],
1580 [u"""\
1581 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1582 \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
1583 """,
1584 u"""\
1585 <document source="test data">
1586 <paragraph>
1587 text-
1588 <emphasis>
1589 separated
1590 \u2010
1591 <emphasis>
1593 \u2011
1594 <emphasis>
1595 various
1596 \u2012
1597 <emphasis>
1598 dashes
1599 \u2013
1600 <emphasis>
1602 \u2014
1603 <emphasis>
1604 hyphens
1606 \xbf
1607 <emphasis>
1608 punctuation
1609 ? \xa1
1610 <emphasis>
1611 examples
1612 !\xa0
1613 <emphasis>
1614 \u00a0no-break-space\u00a0
1616 """],
1617 # Whitespace characters:
1618 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4
1619 [u"""\
1620 text separated by
1621 *newline*
1622 or *space* or one of
1623 \xa0*NO-BREAK SPACE*\xa0,
1624 \u1680*OGHAM SPACE MARK*\u1680,
1625 \u2000*EN QUAD*\u2000,
1626 \u2001*EM QUAD*\u2001,
1627 \u2002*EN SPACE*\u2002,
1628 \u2003*EM SPACE*\u2003,
1629 \u2004*THREE-PER-EM SPACE*\u2004,
1630 \u2005*FOUR-PER-EM SPACE*\u2005,
1631 \u2006*SIX-PER-EM SPACE*\u2006,
1632 \u2007*FIGURE SPACE*\u2007,
1633 \u2008*PUNCTUATION SPACE*\u2008,
1634 \u2009*THIN SPACE*\u2009,
1635 \u200a*HAIR SPACE*\u200a,
1636 \u202f*NARROW NO-BREAK SPACE*\u202f,
1637 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1638 \u3000*IDEOGRAPHIC SPACE*\u3000,
1639 \u2028*LINE SEPARATOR*\u2028
1640 """,
1641 u"""\
1642 <document source="test data">
1643 <paragraph>
1644 text separated by
1645 <emphasis>
1646 newline
1648 or \n\
1649 <emphasis>
1650 space
1651 or one of
1652 \xa0
1653 <emphasis>
1654 NO-BREAK SPACE
1655 \xa0,
1656 \u1680
1657 <emphasis>
1658 OGHAM SPACE MARK
1659 \u1680,
1660 \u2000
1661 <emphasis>
1662 EN QUAD
1663 \u2000,
1664 \u2001
1665 <emphasis>
1666 EM QUAD
1667 \u2001,
1668 \u2002
1669 <emphasis>
1670 EN SPACE
1671 \u2002,
1672 \u2003
1673 <emphasis>
1674 EM SPACE
1675 \u2003,
1676 \u2004
1677 <emphasis>
1678 THREE-PER-EM SPACE
1679 \u2004,
1680 \u2005
1681 <emphasis>
1682 FOUR-PER-EM SPACE
1683 \u2005,
1684 \u2006
1685 <emphasis>
1686 SIX-PER-EM SPACE
1687 \u2006,
1688 \u2007
1689 <emphasis>
1690 FIGURE SPACE
1691 \u2007,
1692 \u2008
1693 <emphasis>
1694 PUNCTUATION SPACE
1695 \u2008,
1696 \u2009
1697 <emphasis>
1698 THIN SPACE
1699 \u2009,
1700 \u200a
1701 <emphasis>
1702 HAIR SPACE
1703 \u200a,
1704 \u202f
1705 <emphasis>
1706 NARROW NO-BREAK SPACE
1707 \u202f,
1708 \u205f
1709 <emphasis>
1710 MEDIUM MATHEMATICAL SPACE
1711 \u205f,
1712 \u3000
1713 <emphasis>
1714 IDEOGRAPHIC SPACE
1715 \u3000,
1716 <paragraph>
1717 <emphasis>
1718 LINE SEPARATOR
1719 """],
1720 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1721 [u"""\
1722 "Quoted" markup start-string (matched openers & closers) -> no markup:
1724 '*' "*" (*) <*> [*] {*}
1725 ⁅*⁆
1727 Some international quoting styles:
1728 ‘*’ “*” English, ...,
1729 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1730 „*” «*» Romanian,
1731 “*„ ‘*‚ Greek,
1732 「*」 『*』traditional Chinese,
1733 ”*” ’*’ »*» ›*› Swedish, Finnish,
1734 „*” ‚*’ Polish,
1735 „*” »*« ’*’ Hungarian,
1737 But this is „*’ emphasized »*‹.
1738 """,
1739 u"""\
1740 <document source="test data">
1741 <paragraph>
1742 "Quoted" markup start-string (matched openers & closers) -> no markup:
1743 <paragraph>
1744 '*' "*" (*) <*> [*] {*}
1745 ⁅*⁆
1746 <paragraph>
1747 Some international quoting styles:
1748 ‘*’ “*” English, ...,
1749 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1750 „*” «*» Romanian,
1751 “*„ ‘*‚ Greek,
1752 「*」 『*』traditional Chinese,
1753 ”*” ’*’ »*» ›*› Swedish, Finnish,
1754 „*” ‚*’ Polish,
1755 „*” »*« ’*’ Hungarian,
1756 <paragraph>
1757 But this is „
1758 <emphasis>
1759 ’ emphasized »
1760 ‹.
1761 """],
1765 if __name__ == '__main__':
1766 import unittest
1767 unittest.main(defaultTest='suite')