Allow also non-ASCII whitespace characters around inline markup.
[docutils.git] / test / test_parsers / test_rst / test_inline_markup.py
blobdd8353958bd57e93a734777bc3964cbf24acd345
1 #! /usr/bin/env python
3 # $Id$
4 # Author: David Goodger <goodger@python.org>
5 # Copyright: This module has been placed in the public domain.
7 """
8 Tests for inline markup in docutils/parsers/rst/states.py.
9 Interpreted text tests are in a separate module, test_interpreted.py.
10 """
12 from __init__ import DocutilsTestSupport
14 def suite():
15 s = DocutilsTestSupport.ParserTestSuite()
16 s.generateTests(totest)
17 return s
19 totest = {}
21 totest['emphasis'] = [
22 ["""\
23 *emphasis*
24 """,
25 """\
26 <document source="test data">
27 <paragraph>
28 <emphasis>
29 emphasis
30 """],
31 [u"""\
32 l'*emphasis* and l\u2019*emphasis* with apostrophe
33 """,
34 u"""\
35 <document source="test data">
36 <paragraph>
38 <emphasis>
39 emphasis
40 and l\u2019
41 <emphasis>
42 emphasis
43 with apostrophe
44 """],
45 ["""\
46 *emphasized sentence
47 across lines*
48 """,
49 """\
50 <document source="test data">
51 <paragraph>
52 <emphasis>
53 emphasized sentence
54 across lines
55 """],
56 ["""\
57 *emphasis without closing asterisk
58 """,
59 """\
60 <document source="test data">
61 <paragraph>
62 <problematic ids="id2" refid="id1">
64 emphasis without closing asterisk
65 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
66 <paragraph>
67 Inline emphasis start-string without end-string.
68 """],
69 [r"""
70 '*emphasis*' and 1/*emphasis*/2 and 3-*emphasis*-4 and 5:*emphasis*:6
71 but not '*' or '"*"' or x*2* or 2*x* or \*args or *
72 or *the\* *stars\\\* *inside*
74 (however, '*args' will trigger a warning and may be problematic)
76 what about *this**?
77 """,
78 """\
79 <document source="test data">
80 <paragraph>
82 <emphasis>
83 emphasis
84 ' and 1/
85 <emphasis>
86 emphasis
87 /2 and 3-
88 <emphasis>
89 emphasis
90 -4 and 5:
91 <emphasis>
92 emphasis
94 but not '*' or '"*"' or x*2* or 2*x* or *args or *
95 or \n\
96 <emphasis>
97 the* *stars\* *inside
98 <paragraph>
99 (however, '
100 <problematic ids="id2" refid="id1">
102 args' will trigger a warning and may be problematic)
103 <system_message backrefs="id2" ids="id1" level="2" line="6" source="test data" type="WARNING">
104 <paragraph>
105 Inline emphasis start-string without end-string.
106 <paragraph>
107 what about \n\
108 <emphasis>
109 this*
111 """],
112 [u"""\
113 quoted '*emphasis*', quoted "*emphasis*",
114 quoted \u2018*emphasis*\u2019, quoted \u201c*emphasis*\u201d,
115 quoted \xab*emphasis*\xbb
116 """,
117 u"""\
118 <document source="test data">
119 <paragraph>
120 quoted '
121 <emphasis>
122 emphasis
123 ', quoted "
124 <emphasis>
125 emphasis
127 quoted \u2018
128 <emphasis>
129 emphasis
130 \u2019, quoted \u201c
131 <emphasis>
132 emphasis
133 \u201d,
134 quoted \xab
135 <emphasis>
136 emphasis
137 \xbb
138 """],
139 [r"""
140 Emphasized asterisk: *\**
142 Emphasized double asterisk: *\***
143 """,
144 """\
145 <document source="test data">
146 <paragraph>
147 Emphasized asterisk: \n\
148 <emphasis>
150 <paragraph>
151 Emphasized double asterisk: \n\
152 <emphasis>
154 """],
157 totest['strong'] = [
158 ["""\
159 **strong**
160 """,
161 """\
162 <document source="test data">
163 <paragraph>
164 <strong>
165 strong
166 """],
167 [u"""\
168 l'**strong** and l\u2019**strong** with apostrophe
169 """,
170 u"""\
171 <document source="test data">
172 <paragraph>
174 <strong>
175 strong
176 and l\u2019
177 <strong>
178 strong
179 with apostrophe
180 """],
181 [u"""\
182 quoted '**strong**', quoted "**strong**",
183 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
184 quoted \xab**strong**\xbb
185 """,
186 u"""\
187 <document source="test data">
188 <paragraph>
189 quoted '
190 <strong>
191 strong
192 ', quoted "
193 <strong>
194 strong
196 quoted \u2018
197 <strong>
198 strong
199 \u2019, quoted \u201c
200 <strong>
201 strong
202 \u201d,
203 quoted \xab
204 <strong>
205 strong
206 \xbb
207 """],
208 [r"""
209 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
211 (however, '**kwargs' will trigger a warning and may be problematic)
212 """,
213 """\
214 <document source="test data">
215 <paragraph>
217 <strong>
218 strong
219 ) but not (**) or '(** ' or x**2 or **kwargs or **
220 <paragraph>
221 (however, '
222 <problematic ids="id2" refid="id1">
224 kwargs' will trigger a warning and may be problematic)
225 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
226 <paragraph>
227 Inline strong start-string without end-string.
228 """],
229 ["""\
230 Strong asterisk: *****
232 Strong double asterisk: ******
233 """,
234 """\
235 <document source="test data">
236 <paragraph>
237 Strong asterisk: \n\
238 <strong>
240 <paragraph>
241 Strong double asterisk: \n\
242 <strong>
244 """],
245 ["""\
246 **strong without closing asterisks
247 """,
248 """\
249 <document source="test data">
250 <paragraph>
251 <problematic ids="id2" refid="id1">
253 strong without closing asterisks
254 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
255 <paragraph>
256 Inline strong start-string without end-string.
257 """],
260 totest['literal'] = [
261 ["""\
262 ``literal``
263 """,
264 """\
265 <document source="test data">
266 <paragraph>
267 <literal>
268 literal
269 """],
270 [r"""
271 ``\literal``
272 """,
273 """\
274 <document source="test data">
275 <paragraph>
276 <literal>
277 \\literal
278 """],
279 [r"""
280 ``lite\ral``
281 """,
282 """\
283 <document source="test data">
284 <paragraph>
285 <literal>
286 lite\\ral
287 """],
288 [r"""
289 ``literal\``
290 """,
291 """\
292 <document source="test data">
293 <paragraph>
294 <literal>
295 literal\\
296 """],
297 [u"""\
298 l'``literal`` and l\u2019``literal`` with apostrophe
299 """,
300 u"""\
301 <document source="test data">
302 <paragraph>
304 <literal>
305 literal
306 and l\u2019
307 <literal>
308 literal
309 with apostrophe
310 """],
311 [u"""\
312 quoted '``literal``', quoted "``literal``",
313 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
314 quoted \xab``literal``\xbb
315 """,
316 u"""\
317 <document source="test data">
318 <paragraph>
319 quoted '
320 <literal>
321 literal
322 ', quoted "
323 <literal>
324 literal
326 quoted \u2018
327 <literal>
328 literal
329 \u2019, quoted \u201c
330 <literal>
331 literal
332 \u201d,
333 quoted \xab
334 <literal>
335 literal
336 \xbb
337 """],
338 [u"""\
339 ``'literal'`` with quotes, ``"literal"`` with quotes,
340 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
341 ``\xabliteral\xbb`` with quotes
342 """,
343 u"""\
344 <document source="test data">
345 <paragraph>
346 <literal>
347 'literal'
348 with quotes,
349 <literal>
350 "literal"
351 with quotes,
352 <literal>
353 \u2018literal\u2019
354 with quotes,
355 <literal>
356 \u201cliteral\u201d
357 with quotes,
358 <literal>
359 \xabliteral\xbb
360 with quotes
361 """],
362 [r"""
363 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
365 (however, ``standalone TeX quotes'' will trigger a warning
366 and may be problematic)
367 """,
368 """\
369 <document source="test data">
370 <paragraph>
371 <literal>
372 literal ``TeX quotes'' & \\backslash
373 but not "``" or ``
374 <paragraph>
375 (however, \n\
376 <problematic ids="id2" refid="id1">
378 standalone TeX quotes'' will trigger a warning
379 and may be problematic)
380 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
381 <paragraph>
382 Inline literal start-string without end-string.
383 """],
384 ["""\
385 Find the ```interpreted text``` in this paragraph!
386 """,
387 """\
388 <document source="test data">
389 <paragraph>
390 Find the \n\
391 <literal>
392 `interpreted text`
393 in this paragraph!
394 """],
395 ["""\
396 ``literal without closing backquotes
397 """,
398 """\
399 <document source="test data">
400 <paragraph>
401 <problematic ids="id2" refid="id1">
403 literal without closing backquotes
404 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
405 <paragraph>
406 Inline literal start-string without end-string.
407 """],
408 [r"""
409 Python ``list``\s use square bracket syntax.
410 """,
411 """\
412 <document source="test data">
413 <paragraph>
414 Python \n\
415 <literal>
416 list
417 s use square bracket syntax.
418 """],
421 totest['references'] = [
422 ["""\
423 ref_
424 """,
425 """\
426 <document source="test data">
427 <paragraph>
428 <reference name="ref" refname="ref">
430 """],
431 [u"""\
432 l'ref_ and l\u2019ref_ with apostrophe
433 """,
434 u"""\
435 <document source="test data">
436 <paragraph>
438 <reference name="ref" refname="ref">
440 and l\u2019
441 <reference name="ref" refname="ref">
443 with apostrophe
444 """],
445 [u"""\
446 quoted 'ref_', quoted "ref_",
447 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
448 quoted \xabref_\xbb,
449 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
450 \u201cref ref\u201d_, or \xabref ref\xbb_
451 """,
452 u"""\
453 <document source="test data">
454 <paragraph>
455 quoted '
456 <reference name="ref" refname="ref">
458 ', quoted "
459 <reference name="ref" refname="ref">
462 quoted \u2018
463 <reference name="ref" refname="ref">
465 \u2019, quoted \u201c
466 <reference name="ref" refname="ref">
468 \u201d,
469 quoted \xab
470 <reference name="ref" refname="ref">
472 \xbb,
473 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
474 \u201cref ref\u201d_, or \xabref ref\xbb_
475 """],
476 ["""\
477 ref__
478 """,
479 """\
480 <document source="test data">
481 <paragraph>
482 <reference anonymous="1" name="ref">
484 """],
485 [u"""\
486 l'ref__ and l\u2019ref__ with apostrophe
487 """,
488 u"""\
489 <document source="test data">
490 <paragraph>
492 <reference anonymous="1" name="ref">
494 and l\u2019
495 <reference anonymous="1" name="ref">
497 with apostrophe
498 """],
499 [u"""\
500 quoted 'ref__', quoted "ref__",
501 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
502 quoted \xabref__\xbb,
503 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
504 \u201cref ref\u201d__, or \xabref ref\xbb__
505 """,
506 u"""\
507 <document source="test data">
508 <paragraph>
509 quoted '
510 <reference anonymous="1" name="ref">
512 ', quoted "
513 <reference anonymous="1" name="ref">
516 quoted \u2018
517 <reference anonymous="1" name="ref">
519 \u2019, quoted \u201c
520 <reference anonymous="1" name="ref">
522 \u201d,
523 quoted \xab
524 <reference anonymous="1" name="ref">
526 \xbb,
527 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
528 \u201cref ref\u201d__, or \xabref ref\xbb__
529 """],
530 ["""\
531 ref_, r_, r_e-f_, -ref_, and anonymousref__,
532 but not _ref_ or __attr__ or object.__attr__
533 """,
534 """\
535 <document source="test data">
536 <paragraph>
537 <reference name="ref" refname="ref">
539 , \n\
540 <reference name="r" refname="r">
542 , \n\
543 <reference name="r_e-f" refname="r_e-f">
544 r_e-f
546 <reference name="ref" refname="ref">
548 , and \n\
549 <reference anonymous="1" name="anonymousref">
550 anonymousref
552 but not _ref_ or __attr__ or object.__attr__
553 """],
556 totest['phrase_references'] = [
557 ["""\
558 `phrase reference`_
559 """,
560 """\
561 <document source="test data">
562 <paragraph>
563 <reference name="phrase reference" refname="phrase reference">
564 phrase reference
565 """],
566 [u"""\
567 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
568 """,
569 u"""\
570 <document source="test data">
571 <paragraph>
573 <reference name="phrase reference" refname="phrase reference">
574 phrase reference
575 and l\u2019
576 <reference name="phrase reference" refname="phrase reference">
577 phrase reference
578 with apostrophe
579 """],
580 [u"""\
581 quoted '`phrase reference`_', quoted "`phrase reference`_",
582 quoted \u2018`phrase reference`_\u2019,
583 quoted \u201c`phrase reference`_\u201d,
584 quoted \xab`phrase reference`_\xbb
585 """,
586 u"""\
587 <document source="test data">
588 <paragraph>
589 quoted '
590 <reference name="phrase reference" refname="phrase reference">
591 phrase reference
592 ', quoted "
593 <reference name="phrase reference" refname="phrase reference">
594 phrase reference
596 quoted \u2018
597 <reference name="phrase reference" refname="phrase reference">
598 phrase reference
599 \u2019,
600 quoted \u201c
601 <reference name="phrase reference" refname="phrase reference">
602 phrase reference
603 \u201d,
604 quoted \xab
605 <reference name="phrase reference" refname="phrase reference">
606 phrase reference
607 \xbb
608 """],
609 [u"""\
610 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
611 `\u2018phrase reference\u2019`_ with quotes,
612 `\u201cphrase reference\u201d`_ with quotes,
613 `\xabphrase reference\xbb`_ with quotes
614 """,
615 u"""\
616 <document source="test data">
617 <paragraph>
618 <reference name="'phrase reference'" refname="'phrase reference'">
619 'phrase reference'
620 with quotes,
621 <reference name=""phrase reference"" refname=""phrase reference"">
622 "phrase reference"
623 with quotes,
624 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
625 \u2018phrase reference\u2019
626 with quotes,
627 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
628 \u201cphrase reference\u201d
629 with quotes,
630 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
631 \xabphrase reference\xbb
632 with quotes
633 """],
634 ["""\
635 `anonymous reference`__
636 """,
637 """\
638 <document source="test data">
639 <paragraph>
640 <reference anonymous="1" name="anonymous reference">
641 anonymous reference
642 """],
643 [u"""\
644 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
645 """,
646 u"""\
647 <document source="test data">
648 <paragraph>
650 <reference anonymous="1" name="anonymous reference">
651 anonymous reference
652 and l\u2019
653 <reference anonymous="1" name="anonymous reference">
654 anonymous reference
655 with apostrophe
656 """],
657 [u"""\
658 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
659 quoted \u2018`anonymous reference`__\u2019,
660 quoted \u201c`anonymous reference`__\u201d,
661 quoted \xab`anonymous reference`__\xbb
662 """,
663 u"""\
664 <document source="test data">
665 <paragraph>
666 quoted '
667 <reference anonymous="1" name="anonymous reference">
668 anonymous reference
669 ', quoted "
670 <reference anonymous="1" name="anonymous reference">
671 anonymous reference
673 quoted \u2018
674 <reference anonymous="1" name="anonymous reference">
675 anonymous reference
676 \u2019,
677 quoted \u201c
678 <reference anonymous="1" name="anonymous reference">
679 anonymous reference
680 \u201d,
681 quoted \xab
682 <reference anonymous="1" name="anonymous reference">
683 anonymous reference
684 \xbb
685 """],
686 [u"""\
687 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
688 `\u2018anonymous reference\u2019`__ with quotes,
689 `\u201canonymous reference\u201d`__ with quotes,
690 `\xabanonymous reference\xbb`__ with quotes
691 """,
692 u"""\
693 <document source="test data">
694 <paragraph>
695 <reference anonymous="1" name="'anonymous reference'">
696 'anonymous reference'
697 with quotes,
698 <reference anonymous="1" name=""anonymous reference"">
699 "anonymous reference"
700 with quotes,
701 <reference anonymous="1" name="\u2018anonymous reference\u2019">
702 \u2018anonymous reference\u2019
703 with quotes,
704 <reference anonymous="1" name="\u201canonymous reference\u201d">
705 \u201canonymous reference\u201d
706 with quotes,
707 <reference anonymous="1" name="\xabanonymous reference\xbb">
708 \xabanonymous reference\xbb
709 with quotes
710 """],
711 ["""\
712 `phrase reference
713 across lines`_
714 """,
715 """\
716 <document source="test data">
717 <paragraph>
718 <reference name="phrase reference across lines" refname="phrase reference across lines">
719 phrase reference
720 across lines
721 """],
722 ["""\
723 `phrase\`_ reference`_
724 """,
725 """\
726 <document source="test data">
727 <paragraph>
728 <reference name="phrase`_ reference" refname="phrase`_ reference">
729 phrase`_ reference
730 """],
731 ["""\
732 Invalid phrase reference:
734 :role:`phrase reference`_
735 """,
736 """\
737 <document source="test data">
738 <paragraph>
739 Invalid phrase reference:
740 <paragraph>
741 <problematic ids="id2" refid="id1">
742 :role:`phrase reference`_
743 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
744 <paragraph>
745 Mismatch: both interpreted text role prefix and reference suffix.
746 """],
747 ["""\
748 Invalid phrase reference:
750 `phrase reference`:role:_
751 """,
752 """\
753 <document source="test data">
754 <paragraph>
755 Invalid phrase reference:
756 <paragraph>
757 <problematic ids="id2" refid="id1">
758 `phrase reference`:role:_
759 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
760 <paragraph>
761 Mismatch: both interpreted text role suffix and reference suffix.
762 """],
763 ["""\
764 `phrase reference_ without closing backquote
765 """,
766 """\
767 <document source="test data">
768 <paragraph>
769 <problematic ids="id2" refid="id1">
771 phrase \n\
772 <reference name="reference" refname="reference">
773 reference
774 without closing backquote
775 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
776 <paragraph>
777 Inline interpreted text or phrase reference start-string without end-string.
778 """],
779 ["""\
780 `anonymous phrase reference__ without closing backquote
781 """,
782 """\
783 <document source="test data">
784 <paragraph>
785 <problematic ids="id2" refid="id1">
787 anonymous phrase \n\
788 <reference anonymous="1" name="reference">
789 reference
790 without closing backquote
791 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
792 <paragraph>
793 Inline interpreted text or phrase reference start-string without end-string.
794 """],
797 totest['embedded_URIs'] = [
798 ["""\
799 `phrase reference <http://example.com>`_
800 """,
801 """\
802 <document source="test data">
803 <paragraph>
804 <reference name="phrase reference" refuri="http://example.com">
805 phrase reference
806 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
807 """],
808 ["""\
809 `anonymous reference <http://example.com>`__
810 """,
811 """\
812 <document source="test data">
813 <paragraph>
814 <reference name="anonymous reference" refuri="http://example.com">
815 anonymous reference
816 """],
817 ["""\
818 `embedded URI on next line
819 <http://example.com>`__
820 """,
821 """\
822 <document source="test data">
823 <paragraph>
824 <reference name="embedded URI on next line" refuri="http://example.com">
825 embedded URI on next line
826 """],
827 ["""\
828 `embedded URI across lines <http://example.com/
829 long/path>`__
830 """,
831 """\
832 <document source="test data">
833 <paragraph>
834 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
835 embedded URI across lines
836 """],
837 ["""\
838 `embedded URI with whitespace <http://example.com/
839 long/path /and /whitespace>`__
840 """,
841 """\
842 <document source="test data">
843 <paragraph>
844 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
845 embedded URI with whitespace
846 """],
847 ["""\
848 `embedded email address <jdoe@example.com>`__
850 `embedded email address broken across lines <jdoe
851 @example.com>`__
852 """,
853 """\
854 <document source="test data">
855 <paragraph>
856 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
857 embedded email address
858 <paragraph>
859 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
860 embedded email address broken across lines
861 """],
862 [r"""
863 `embedded URI with too much whitespace < http://example.com/
864 long/path /and /whitespace >`__
866 `embedded URI with too much whitespace at end <http://example.com/
867 long/path /and /whitespace >`__
869 `embedded URI with no preceding whitespace<http://example.com>`__
871 `escaped URI \<http://example.com>`__
873 See `HTML Anchors: \<a>`_.
874 """,
875 """\
876 <document source="test data">
877 <paragraph>
878 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
879 embedded URI with too much whitespace < http://example.com/
880 long/path /and /whitespace >
881 <paragraph>
882 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
883 embedded URI with too much whitespace at end <http://example.com/
884 long/path /and /whitespace >
885 <paragraph>
886 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
887 embedded URI with no preceding whitespace<http://example.com>
888 <paragraph>
889 <reference anonymous="1" name="escaped URI <http://example.com>">
890 escaped URI <http://example.com>
891 <paragraph>
892 See \n\
893 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
894 HTML Anchors: <a>
896 """],
897 ["""\
898 Relative URIs' reference text can be omitted:
900 `<reference>`_
902 `<anonymous>`__
903 """,
904 """\
905 <document source="test data">
906 <paragraph>
907 Relative URIs' reference text can be omitted:
908 <paragraph>
909 <reference name="reference" refuri="reference">
910 reference
911 <target ids="reference" names="reference" refuri="reference">
912 <paragraph>
913 <reference name="anonymous" refuri="anonymous">
914 anonymous
915 """],
918 totest['inline_targets'] = [
919 ["""\
920 _`target`
922 Here is _`another target` in some text. And _`yet
923 another target`, spanning lines.
925 _`Here is a TaRgeT` with case and spacial difficulties.
926 """,
927 """\
928 <document source="test data">
929 <paragraph>
930 <target ids="target" names="target">
931 target
932 <paragraph>
933 Here is \n\
934 <target ids="another-target" names="another\ target">
935 another target
936 in some text. And \n\
937 <target ids="yet-another-target" names="yet\ another\ target">
939 another target
940 , spanning lines.
941 <paragraph>
942 <target ids="here-is-a-target" names="here\ is\ a\ target">
943 Here is a TaRgeT
944 with case and spacial difficulties.
945 """],
946 [u"""\
947 l'_`target1` and l\u2019_`target2` with apostrophe
948 """,
949 u"""\
950 <document source="test data">
951 <paragraph>
953 <target ids="target1" names="target1">
954 target1
955 and l\u2019
956 <target ids="target2" names="target2">
957 target2
958 with apostrophe
959 """],
960 [u"""\
961 quoted '_`target1`', quoted "_`target2`",
962 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
963 quoted \xab_`target5`\xbb
964 """,
965 u"""\
966 <document source="test data">
967 <paragraph>
968 quoted '
969 <target ids="target1" names="target1">
970 target1
971 ', quoted "
972 <target ids="target2" names="target2">
973 target2
975 quoted \u2018
976 <target ids="target3" names="target3">
977 target3
978 \u2019, quoted \u201c
979 <target ids="target4" names="target4">
980 target4
981 \u201d,
982 quoted \xab
983 <target ids="target5" names="target5">
984 target5
985 \xbb
986 """],
987 [u"""\
988 _`'target1'` with quotes, _`"target2"` with quotes,
989 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
990 _`\xabtarget5\xbb` with quotes
991 """,
992 u"""\
993 <document source="test data">
994 <paragraph>
995 <target ids="target1" names="'target1'">
996 'target1'
997 with quotes,
998 <target ids="target2" names=""target2"">
999 "target2"
1000 with quotes,
1001 <target ids="target3" names="\u2018target3\u2019">
1002 \u2018target3\u2019
1003 with quotes,
1004 <target ids="target4" names="\u201ctarget4\u201d">
1005 \u201ctarget4\u201d
1006 with quotes,
1007 <target ids="target5" names="\xabtarget5\xbb">
1008 \xabtarget5\xbb
1009 with quotes
1010 """],
1011 ["""\
1012 But this isn't a _target; targets require backquotes.
1014 And _`this`_ is just plain confusing.
1015 """,
1016 """\
1017 <document source="test data">
1018 <paragraph>
1019 But this isn't a _target; targets require backquotes.
1020 <paragraph>
1021 And \n\
1022 <problematic ids="id2" refid="id1">
1024 this`_ is just plain confusing.
1025 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1026 <paragraph>
1027 Inline target start-string without end-string.
1028 """],
1029 ["""\
1030 _`inline target without closing backquote
1031 """,
1032 """\
1033 <document source="test data">
1034 <paragraph>
1035 <problematic ids="id2" refid="id1">
1037 inline target without closing backquote
1038 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1039 <paragraph>
1040 Inline target start-string without end-string.
1041 """],
1044 totest['footnote_reference'] = [
1045 ["""\
1046 [1]_
1047 """,
1048 """\
1049 <document source="test data">
1050 <paragraph>
1051 <footnote_reference ids="id1" refname="1">
1053 """],
1054 ["""\
1055 [#]_
1056 """,
1057 """\
1058 <document source="test data">
1059 <paragraph>
1060 <footnote_reference auto="1" ids="id1">
1061 """],
1062 ["""\
1063 [#label]_
1064 """,
1065 """\
1066 <document source="test data">
1067 <paragraph>
1068 <footnote_reference auto="1" ids="id1" refname="label">
1069 """],
1070 ["""\
1071 [*]_
1072 """,
1073 """\
1074 <document source="test data">
1075 <paragraph>
1076 <footnote_reference auto="*" ids="id1">
1077 """],
1078 ["""\
1079 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1080 """,
1081 """\
1082 <document source="test data">
1083 <paragraph>
1084 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1085 """],
1088 totest['citation_reference'] = [
1089 ["""\
1090 [citation]_
1091 """,
1092 """\
1093 <document source="test data">
1094 <paragraph>
1095 <citation_reference ids="id1" refname="citation">
1096 citation
1097 """],
1098 ["""\
1099 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1100 """,
1101 """\
1102 <document source="test data">
1103 <paragraph>
1104 <citation_reference ids="id1" refname="citation">
1105 citation
1106 and \n\
1107 <citation_reference ids="id2" refname="cit-ation">
1108 cit-ation
1109 and \n\
1110 <citation_reference ids="id3" refname="cit.ation">
1111 cit.ation
1112 and \n\
1113 <citation_reference ids="id4" refname="cit1">
1114 CIT1
1115 but not [CIT 1]_
1116 """],
1117 ["""\
1118 Adjacent citation refs are not possible: [citation]_[CIT1]_
1119 """,
1120 """\
1121 <document source="test data">
1122 <paragraph>
1123 Adjacent citation refs are not possible: [citation]_[CIT1]_
1124 """],
1127 totest['substitution_references'] = [
1128 ["""\
1129 |subref|
1130 """,
1131 """\
1132 <document source="test data">
1133 <paragraph>
1134 <substitution_reference refname="subref">
1135 subref
1136 """],
1137 ["""\
1138 |subref|_ and |subref|__
1139 """,
1140 """\
1141 <document source="test data">
1142 <paragraph>
1143 <reference refname="subref">
1144 <substitution_reference refname="subref">
1145 subref
1146 and \n\
1147 <reference anonymous="1">
1148 <substitution_reference refname="subref">
1149 subref
1150 """],
1151 ["""\
1152 |substitution reference|
1153 """,
1154 """\
1155 <document source="test data">
1156 <paragraph>
1157 <substitution_reference refname="substitution reference">
1158 substitution reference
1159 """],
1160 ["""\
1161 |substitution
1162 reference|
1163 """,
1164 """\
1165 <document source="test data">
1166 <paragraph>
1167 <substitution_reference refname="substitution reference">
1168 substitution
1169 reference
1170 """],
1171 ["""\
1172 |substitution reference without closing verbar
1173 """,
1174 """\
1175 <document source="test data">
1176 <paragraph>
1177 <problematic ids="id2" refid="id1">
1179 substitution reference without closing verbar
1180 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1181 <paragraph>
1182 Inline substitution_reference start-string without end-string.
1183 """],
1184 ["""\
1185 first | then || and finally |||
1186 """,
1187 """\
1188 <document source="test data">
1189 <paragraph>
1190 first | then || and finally |||
1191 """],
1194 totest['standalone_hyperlink'] = [
1195 ["""\
1196 http://www.standalone.hyperlink.com
1198 http:/one-slash-only.absolute.path
1200 [http://example.com]
1202 (http://example.com)
1204 <http://example.com>
1206 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1208 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1210 http://[3ffe:2a00:100:7031::1]/
1212 mailto:someone@somewhere.com
1214 news:comp.lang.python
1216 An email address in a sentence: someone@somewhere.com.
1218 ftp://ends.with.a.period.
1220 (a.question.mark@end?)
1221 """,
1222 """\
1223 <document source="test data">
1224 <paragraph>
1225 <reference refuri="http://www.standalone.hyperlink.com">
1226 http://www.standalone.hyperlink.com
1227 <paragraph>
1228 <reference refuri="http:/one-slash-only.absolute.path">
1229 http:/one-slash-only.absolute.path
1230 <paragraph>
1232 <reference refuri="http://example.com">
1233 http://example.com
1235 <paragraph>
1237 <reference refuri="http://example.com">
1238 http://example.com
1240 <paragraph>
1242 <reference refuri="http://example.com">
1243 http://example.com
1245 <paragraph>
1246 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1247 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1248 <paragraph>
1249 <reference refuri="http://[3ffe:2a00:100:7031::1">
1250 http://[3ffe:2a00:100:7031::1
1251 ] (the final "]" is ambiguous in text)
1252 <paragraph>
1253 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1254 http://[3ffe:2a00:100:7031::1]/
1255 <paragraph>
1256 <reference refuri="mailto:someone@somewhere.com">
1257 mailto:someone@somewhere.com
1258 <paragraph>
1259 <reference refuri="news:comp.lang.python">
1260 news:comp.lang.python
1261 <paragraph>
1262 An email address in a sentence: \n\
1263 <reference refuri="mailto:someone@somewhere.com">
1264 someone@somewhere.com
1266 <paragraph>
1267 <reference refuri="ftp://ends.with.a.period">
1268 ftp://ends.with.a.period
1270 <paragraph>
1272 <reference refuri="mailto:a.question.mark@end">
1273 a.question.mark@end
1275 """],
1276 [r"""
1277 Valid URLs with escaped markup characters:
1279 http://example.com/\*content\*/whatever
1281 http://example.com/\*content*/whatever
1282 """,
1283 """\
1284 <document source="test data">
1285 <paragraph>
1286 Valid URLs with escaped markup characters:
1287 <paragraph>
1288 <reference refuri="http://example.com/*content*/whatever">
1289 http://example.com/*content*/whatever
1290 <paragraph>
1291 <reference refuri="http://example.com/*content*/whatever">
1292 http://example.com/*content*/whatever
1293 """],
1294 ["""\
1295 Valid URLs may end with punctuation inside "<>":
1297 <http://example.org/ends-with-dot.>
1298 """,
1299 """\
1300 <document source="test data">
1301 <paragraph>
1302 Valid URLs may end with punctuation inside "<>":
1303 <paragraph>
1305 <reference refuri="http://example.org/ends-with-dot.">
1306 http://example.org/ends-with-dot.
1308 """],
1309 ["""\
1310 Valid URLs with interesting endings:
1312 http://example.org/ends-with-pluses++
1313 """,
1314 """\
1315 <document source="test data">
1316 <paragraph>
1317 Valid URLs with interesting endings:
1318 <paragraph>
1319 <reference refuri="http://example.org/ends-with-pluses++">
1320 http://example.org/ends-with-pluses++
1321 """],
1322 ["""\
1323 None of these are standalone hyperlinks (their "schemes"
1324 are not recognized): signal:noise, a:b.
1325 """,
1326 """\
1327 <document source="test data">
1328 <paragraph>
1329 None of these are standalone hyperlinks (their "schemes"
1330 are not recognized): signal:noise, a:b.
1331 """],
1332 ["""\
1333 Escaped email addresses are not recognized: test\@example.org
1334 """,
1335 """\
1336 <document source="test data">
1337 <paragraph>
1338 Escaped email addresses are not recognized: test@example.org
1339 """],
1342 totest['markup recognition rules'] = [
1343 ["""\
1344 __This__ should be left alone.
1345 """,
1346 """\
1347 <document source="test data">
1348 <paragraph>
1349 __This__ should be left alone.
1350 """],
1351 [r"""
1352 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1353 with backslash-escaped whitespace, including new\
1354 lines.
1355 """,
1356 """\
1357 <document source="test data">
1358 <paragraph>
1359 Character-level m
1360 <emphasis>
1362 <strong>
1364 <literal>
1366 <title_reference>
1369 with backslash-escaped whitespace, including newlines.
1370 """],
1371 [u"""\
1372 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1373 \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
1374 """,
1375 u"""\
1376 <document source="test data">
1377 <paragraph>
1378 text-
1379 <emphasis>
1380 separated
1381 \u2010
1382 <emphasis>
1384 \u2011
1385 <emphasis>
1386 various
1387 \u2012
1388 <emphasis>
1389 dashes
1390 \u2013
1391 <emphasis>
1393 \u2014
1394 <emphasis>
1395 hyphens
1397 \xbf
1398 <emphasis>
1399 punctuation
1400 ? \xa1
1401 <emphasis>
1402 examples
1403 !\xa0
1404 <emphasis>
1405 \u00a0no-break-space\u00a0
1407 """],
1408 # Whitespace characters:
1409 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4
1410 [u"""\
1411 text separated by
1412 *newline*
1413 or *space* or one of
1414 \xa0*NO-BREAK SPACE*\xa0,
1415 \u1680*OGHAM SPACE MARK*\u1680,
1416 \u2000*EN QUAD*\u2000,
1417 \u2001*EM QUAD*\u2001,
1418 \u2002*EN SPACE*\u2002,
1419 \u2003*EM SPACE*\u2003,
1420 \u2004*THREE-PER-EM SPACE*\u2004,
1421 \u2005*FOUR-PER-EM SPACE*\u2005,
1422 \u2006*SIX-PER-EM SPACE*\u2006,
1423 \u2007*FIGURE SPACE*\u2007,
1424 \u2008*PUNCTUATION SPACE*\u2008,
1425 \u2009*THIN SPACE*\u2009,
1426 \u200a*HAIR SPACE*\u200a,
1427 \u202f*NARROW NO-BREAK SPACE*\u202f,
1428 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1429 \u3000*IDEOGRAPHIC SPACE*\u3000,
1430 \u2028*LINE SEPARATOR*\u2028
1431 """,
1432 u"""\
1433 <document source="test data">
1434 <paragraph>
1435 text separated by
1436 <emphasis>
1437 newline
1439 or \n\
1440 <emphasis>
1441 space
1442 or one of
1443 \xa0
1444 <emphasis>
1445 NO-BREAK SPACE
1446 \xa0,
1447 \u1680
1448 <emphasis>
1449 OGHAM SPACE MARK
1450 \u1680,
1451 \u2000
1452 <emphasis>
1453 EN QUAD
1454 \u2000,
1455 \u2001
1456 <emphasis>
1457 EM QUAD
1458 \u2001,
1459 \u2002
1460 <emphasis>
1461 EN SPACE
1462 \u2002,
1463 \u2003
1464 <emphasis>
1465 EM SPACE
1466 \u2003,
1467 \u2004
1468 <emphasis>
1469 THREE-PER-EM SPACE
1470 \u2004,
1471 \u2005
1472 <emphasis>
1473 FOUR-PER-EM SPACE
1474 \u2005,
1475 \u2006
1476 <emphasis>
1477 SIX-PER-EM SPACE
1478 \u2006,
1479 \u2007
1480 <emphasis>
1481 FIGURE SPACE
1482 \u2007,
1483 \u2008
1484 <emphasis>
1485 PUNCTUATION SPACE
1486 \u2008,
1487 \u2009
1488 <emphasis>
1489 THIN SPACE
1490 \u2009,
1491 \u200a
1492 <emphasis>
1493 HAIR SPACE
1494 \u200a,
1495 \u202f
1496 <emphasis>
1497 NARROW NO-BREAK SPACE
1498 \u202f,
1499 \u205f
1500 <emphasis>
1501 MEDIUM MATHEMATICAL SPACE
1502 \u205f,
1503 \u3000
1504 <emphasis>
1505 IDEOGRAPHIC SPACE
1506 \u3000,
1507 <paragraph>
1508 <emphasis>
1509 LINE SEPARATOR
1510 """],
1511 [u"""\
1512 None of these should be markup (matched openers & closers):
1514 \u2018*\u2019 \u201c*\u201d \xab*\xbb \u00bf*? \u00a1*!
1516 But this should:
1518 l\u2019*exception*.
1519 """,
1520 u"""\
1521 <document source="test data">
1522 <paragraph>
1523 None of these should be markup (matched openers & closers):
1524 <paragraph>
1525 \u2018*\u2019 \u201c*\u201d \xab*\xbb \xbf*? \xa1*!
1526 <paragraph>
1527 But this should:
1528 <paragraph>
1529 l\u2019
1530 <emphasis>
1531 exception
1533 """],
1537 if __name__ == '__main__':
1538 import unittest
1539 unittest.main(defaultTest='suite')