5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
13 from __init__
import DocutilsTestSupport
16 s
= DocutilsTestSupport
.ParserTestSuite()
17 s
.generateTests(totest
)
22 totest
['emphasis'] = [
27 <document source="test data">
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
37 <document source="test data">
59 <document source="test data">
66 *emphasis without closing asterisk
69 <document source="test data">
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
76 Inline emphasis start-string without end-string.
78 [r
"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
83 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
84 (*), [*], '*' or '"*"' ("quoted" start-string),
85 x*2* or 2*x* (alphanumeric char before),
86 \*args or * (escaped, whitespace behind start-string)
87 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
89 However, '*args' will trigger a warning and may be problematic.
94 <document source="test data">
96 some punctuation is allowed around inline markup, e.g.
122 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
123 (*), [*], '*' or '"*"' ("quoted" start-string),
124 x*2* or 2*x* (alphanumeric char before),
125 *args or * (escaped, whitespace behind start-string)
129 (escaped, whitespace before end-string).
132 <problematic ids="id2" refid="id1">
134 args' will trigger a warning and may be problematic.
135 <system_message backrefs="id2" ids="id1" level="2" line="12" source="test data" type="WARNING">
137 Inline emphasis start-string without end-string.
145 Quotes around inline markup:
147 '*emphasis*' "*emphasis*" Straight,
148 ‘*emphasis*’ “*emphasis*” English, ...,
149 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
150 « *emphasis* » ‹ *emphasis* › French,
151 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
152 „*emphasis*” «*emphasis*» Romanian,
153 “*emphasis*„ ‘*emphasis*‚ Greek,
154 「*emphasis*」 『*emphasis*』traditional Chinese,
155 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
156 „*emphasis*” ‚*emphasis*’ Polish,
157 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
160 <document source="test data">
162 Quotes around inline markup:
181 \u202f\xbb \u2039\u202f
184 \u202f\u203a \xab\xa0
194 \u2005\xbb \u2039\u2005
210 \u2039 German, Czech, ...,
231 \u300ftraditional Chinese,
244 \u203a Swedish, Finnish,
264 Emphasized asterisk: *\**
266 Emphasized double asterisk: *\***
269 <document source="test data">
271 Emphasized asterisk: \n\
275 Emphasized double asterisk: \n\
286 <document source="test data">
292 l'**strong** and l\u2019**strong** with apostrophe
295 <document source="test data">
306 quoted '**strong**', quoted "**strong**",
307 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
308 quoted \xab**strong**\xbb
311 <document source="test data">
323 \u2019, quoted \u201c
333 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
335 (however, '**kwargs' will trigger a warning and may be problematic)
338 <document source="test data">
343 ) but not (**) or '(** ' or x**2 or **kwargs or **
346 <problematic ids="id2" refid="id1">
348 kwargs' will trigger a warning and may be problematic)
349 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
351 Inline strong start-string without end-string.
354 Strong asterisk: *****
356 Strong double asterisk: ******
359 <document source="test data">
365 Strong double asterisk: \n\
370 **strong without closing asterisks
373 <document source="test data">
375 <problematic ids="id2" refid="id1">
377 strong without closing asterisks
378 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
380 Inline strong start-string without end-string.
384 totest
['literal'] = [
389 <document source="test data">
398 <document source="test data">
407 <document source="test data">
416 <document source="test data">
422 l'``literal`` and l\u2019``literal`` with apostrophe
425 <document source="test data">
436 quoted '``literal``', quoted "``literal``",
437 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
438 quoted \xab``literal``\xbb
441 <document source="test data">
453 \u2019, quoted \u201c
463 ``'literal'`` with quotes, ``"literal"`` with quotes,
464 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
465 ``\xabliteral\xbb`` with quotes
468 <document source="test data">
487 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
489 (however, ``standalone TeX quotes'' will trigger a warning
490 and may be problematic)
493 <document source="test data">
496 literal ``TeX quotes'' & \\backslash
500 <problematic ids="id2" refid="id1">
502 standalone TeX quotes'' will trigger a warning
503 and may be problematic)
504 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
506 Inline literal start-string without end-string.
509 Find the ```interpreted text``` in this paragraph!
512 <document source="test data">
520 ``literal without closing backquotes
523 <document source="test data">
525 <problematic ids="id2" refid="id1">
527 literal without closing backquotes
528 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
530 Inline literal start-string without end-string.
533 Python ``list``\s use square bracket syntax.
536 <document source="test data">
541 s use square bracket syntax.
545 totest
['references'] = [
550 <document source="test data">
552 <reference name="ref" refname="ref">
556 l'ref_ and l\u2019ref_ with apostrophe
559 <document source="test data">
562 <reference name="ref" refname="ref">
565 <reference name="ref" refname="ref">
570 quoted 'ref_', quoted "ref_",
571 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
573 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
574 \u201cref ref\u201d_, or \xabref ref\xbb_
577 <document source="test data">
580 <reference name="ref" refname="ref">
583 <reference name="ref" refname="ref">
587 <reference name="ref" refname="ref">
589 \u2019, quoted \u201c
590 <reference name="ref" refname="ref">
594 <reference name="ref" refname="ref">
597 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
598 \u201cref ref\u201d_, or \xabref ref\xbb_
604 <document source="test data">
606 <reference anonymous="1" name="ref">
610 l'ref__ and l\u2019ref__ with apostrophe
613 <document source="test data">
616 <reference anonymous="1" name="ref">
619 <reference anonymous="1" name="ref">
624 quoted 'ref__', quoted "ref__",
625 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
626 quoted \xabref__\xbb,
627 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
628 \u201cref ref\u201d__, or \xabref ref\xbb__
631 <document source="test data">
634 <reference anonymous="1" name="ref">
637 <reference anonymous="1" name="ref">
641 <reference anonymous="1" name="ref">
643 \u2019, quoted \u201c
644 <reference anonymous="1" name="ref">
648 <reference anonymous="1" name="ref">
651 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
652 \u201cref ref\u201d__, or \xabref ref\xbb__
655 ref_, r_, r_e-f_, -ref_, and anonymousref__,
656 but not _ref_ or __attr__ or object.__attr__
659 <document source="test data">
661 <reference name="ref" refname="ref">
664 <reference name="r" refname="r">
667 <reference name="r_e-f" refname="r_e-f">
670 <reference name="ref" refname="ref">
673 <reference anonymous="1" name="anonymousref">
676 but not _ref_ or __attr__ or object.__attr__
680 totest
['phrase_references'] = [
685 <document source="test data">
687 <reference name="phrase reference" refname="phrase reference">
691 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
694 <document source="test data">
697 <reference name="phrase reference" refname="phrase reference">
700 <reference name="phrase reference" refname="phrase reference">
705 quoted '`phrase reference`_', quoted "`phrase reference`_",
706 quoted \u2018`phrase reference`_\u2019,
707 quoted \u201c`phrase reference`_\u201d,
708 quoted \xab`phrase reference`_\xbb
711 <document source="test data">
714 <reference name="phrase reference" refname="phrase reference">
717 <reference name="phrase reference" refname="phrase reference">
721 <reference name="phrase reference" refname="phrase reference">
725 <reference name="phrase reference" refname="phrase reference">
729 <reference name="phrase reference" refname="phrase reference">
734 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
735 `\u2018phrase reference\u2019`_ with quotes,
736 `\u201cphrase reference\u201d`_ with quotes,
737 `\xabphrase reference\xbb`_ with quotes
740 <document source="test data">
742 <reference name="'phrase reference'" refname="'phrase reference'">
745 <reference name=""phrase reference"" refname=""phrase reference"">
748 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
749 \u2018phrase reference\u2019
751 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
752 \u201cphrase reference\u201d
754 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
755 \xabphrase reference\xbb
759 `anonymous reference`__
762 <document source="test data">
764 <reference anonymous="1" name="anonymous reference">
768 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
771 <document source="test data">
774 <reference anonymous="1" name="anonymous reference">
777 <reference anonymous="1" name="anonymous reference">
782 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
783 quoted \u2018`anonymous reference`__\u2019,
784 quoted \u201c`anonymous reference`__\u201d,
785 quoted \xab`anonymous reference`__\xbb
788 <document source="test data">
791 <reference anonymous="1" name="anonymous reference">
794 <reference anonymous="1" name="anonymous reference">
798 <reference anonymous="1" name="anonymous reference">
802 <reference anonymous="1" name="anonymous reference">
806 <reference anonymous="1" name="anonymous reference">
811 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
812 `\u2018anonymous reference\u2019`__ with quotes,
813 `\u201canonymous reference\u201d`__ with quotes,
814 `\xabanonymous reference\xbb`__ with quotes
817 <document source="test data">
819 <reference anonymous="1" name="'anonymous reference'">
820 'anonymous reference'
822 <reference anonymous="1" name=""anonymous reference"">
823 "anonymous reference"
825 <reference anonymous="1" name="\u2018anonymous reference\u2019">
826 \u2018anonymous reference\u2019
828 <reference anonymous="1" name="\u201canonymous reference\u201d">
829 \u201canonymous reference\u201d
831 <reference anonymous="1" name="\xabanonymous reference\xbb">
832 \xabanonymous reference\xbb
840 <document source="test data">
842 <reference name="phrase reference across lines" refname="phrase reference across lines">
847 `phrase\`_ reference`_
850 <document source="test data">
852 <reference name="phrase`_ reference" refname="phrase`_ reference">
856 Invalid phrase reference:
858 :role:`phrase reference`_
861 <document source="test data">
863 Invalid phrase reference:
865 <problematic ids="id2" refid="id1">
866 :role:`phrase reference`_
867 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
869 Mismatch: both interpreted text role prefix and reference suffix.
872 Invalid phrase reference:
874 `phrase reference`:role:_
877 <document source="test data">
879 Invalid phrase reference:
881 <problematic ids="id2" refid="id1">
882 `phrase reference`:role:_
883 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
885 Mismatch: both interpreted text role suffix and reference suffix.
888 `phrase reference_ without closing backquote
891 <document source="test data">
893 <problematic ids="id2" refid="id1">
896 <reference name="reference" refname="reference">
898 without closing backquote
899 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
901 Inline interpreted text or phrase reference start-string without end-string.
904 `anonymous phrase reference__ without closing backquote
907 <document source="test data">
909 <problematic ids="id2" refid="id1">
912 <reference anonymous="1" name="reference">
914 without closing backquote
915 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
917 Inline interpreted text or phrase reference start-string without end-string.
921 totest
['embedded_URIs'] = [
923 `phrase reference <http://example.com>`_
926 <document source="test data">
928 <reference name="phrase reference" refuri="http://example.com">
930 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
933 `anonymous reference <http://example.com>`__
936 <document source="test data">
938 <reference name="anonymous reference" refuri="http://example.com">
942 `embedded URI on next line
943 <http://example.com>`__
946 <document source="test data">
948 <reference name="embedded URI on next line" refuri="http://example.com">
949 embedded URI on next line
952 `embedded URI across lines <http://example.com/
956 <document source="test data">
958 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
959 embedded URI across lines
962 `embedded URI with whitespace <http://example.com/
963 long/path /and /whitespace>`__
966 <document source="test data">
968 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
969 embedded URI with whitespace
972 `embedded email address <jdoe@example.com>`__
974 `embedded email address broken across lines <jdoe
978 <document source="test data">
980 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
981 embedded email address
983 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
984 embedded email address broken across lines
987 `embedded URI with too much whitespace < http://example.com/
988 long/path /and /whitespace >`__
990 `embedded URI with too much whitespace at end <http://example.com/
991 long/path /and /whitespace >`__
993 `embedded URI with no preceding whitespace<http://example.com>`__
995 `escaped URI \<http://example.com>`__
997 See `HTML Anchors: \<a>`_.
1000 <document source="test data">
1002 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1003 embedded URI with too much whitespace < http://example.com/
1004 long/path /and /whitespace >
1006 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1007 embedded URI with too much whitespace at end <http://example.com/
1008 long/path /and /whitespace >
1010 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1011 embedded URI with no preceding whitespace<http://example.com>
1013 <reference anonymous="1" name="escaped URI <http://example.com>">
1014 escaped URI <http://example.com>
1017 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1022 Relative URIs' reference text can be omitted:
1029 <document source="test data">
1031 Relative URIs' reference text can be omitted:
1033 <reference name="reference" refuri="reference">
1035 <target ids="reference" names="reference" refuri="reference">
1037 <reference name="anonymous" refuri="anonymous">
1041 Escape trailing low-line char in URIs:
1048 <document source="test data">
1050 Escape trailing low-line char in URIs:
1052 <reference name="reference_" refuri="reference_">
1054 <target ids="reference" names="reference_" refuri="reference_">
1056 <reference name="anonymous_" refuri="anonymous_">
1061 totest
['embedded_aliases'] = [
1063 `phrase reference <alias_>`_
1066 <document source="test data">
1068 <reference name="phrase reference" refname="alias">
1070 <target names="phrase\ reference" refname="alias">
1073 `anonymous reference <alias_>`__
1076 <document source="test data">
1078 <reference name="anonymous reference" refname="alias">
1082 `embedded alias on next line
1086 <document source="test data">
1088 <reference name="embedded alias on next line" refname="alias">
1089 embedded alias on next line
1092 `embedded alias across lines <alias
1096 <document source="test data">
1098 <reference name="embedded alias across lines" refname="alias phrase">
1099 embedded alias across lines
1102 `embedded alias with whitespace <alias
1106 <document source="test data">
1108 <reference name="embedded alias with whitespace" refname="alias long phrase">
1109 embedded alias with whitespace
1112 `embedded alias with too much whitespace < alias_ >`__
1114 `embedded alias with no preceding whitespace<alias_>`__
1117 <document source="test data">
1119 <reference anonymous="1" name="embedded alias with too much whitespace < alias_ >">
1120 embedded alias with too much whitespace < alias_ >
1122 <reference anonymous="1" name="embedded alias with no preceding whitespace<alias_>">
1123 embedded alias with no preceding whitespace<alias_>
1127 totest
['inline_targets'] = [
1131 Here is _`another target` in some text. And _`yet
1132 another target`, spanning lines.
1134 _`Here is a TaRgeT` with case and spacial difficulties.
1137 <document source="test data">
1139 <target ids="target" names="target">
1143 <target ids="another-target" names="another\ target">
1145 in some text. And \n\
1146 <target ids="yet-another-target" names="yet\ another\ target">
1151 <target ids="here-is-a-target" names="here\ is\ a\ target">
1153 with case and spacial difficulties.
1156 l'_`target1` and l\u2019_`target2` with apostrophe
1159 <document source="test data">
1162 <target ids="target1" names="target1">
1165 <target ids="target2" names="target2">
1170 quoted '_`target1`', quoted "_`target2`",
1171 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1172 quoted \xab_`target5`\xbb
1175 <document source="test data">
1178 <target ids="target1" names="target1">
1181 <target ids="target2" names="target2">
1185 <target ids="target3" names="target3">
1187 \u2019, quoted \u201c
1188 <target ids="target4" names="target4">
1192 <target ids="target5" names="target5">
1197 _`'target1'` with quotes, _`"target2"` with quotes,
1198 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1199 _`\xabtarget5\xbb` with quotes
1202 <document source="test data">
1204 <target ids="target1" names="'target1'">
1207 <target ids="target2" names=""target2"">
1210 <target ids="target3" names="\u2018target3\u2019">
1213 <target ids="target4" names="\u201ctarget4\u201d">
1216 <target ids="target5" names="\xabtarget5\xbb">
1221 But this isn't a _target; targets require backquotes.
1223 And _`this`_ is just plain confusing.
1226 <document source="test data">
1228 But this isn't a _target; targets require backquotes.
1231 <problematic ids="id2" refid="id1">
1233 this`_ is just plain confusing.
1234 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1236 Inline target start-string without end-string.
1239 _`inline target without closing backquote
1242 <document source="test data">
1244 <problematic ids="id2" refid="id1">
1246 inline target without closing backquote
1247 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1249 Inline target start-string without end-string.
1253 totest
['footnote_reference'] = [
1258 <document source="test data">
1260 <footnote_reference ids="id1" refname="1">
1267 <document source="test data">
1269 <footnote_reference auto="1" ids="id1">
1275 <document source="test data">
1277 <footnote_reference auto="1" ids="id1" refname="label">
1283 <document source="test data">
1285 <footnote_reference auto="*" ids="id1">
1288 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1291 <document source="test data">
1293 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1297 totest
['citation_reference'] = [
1302 <document source="test data">
1304 <citation_reference ids="id1" refname="citation">
1308 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1311 <document source="test data">
1313 <citation_reference ids="id1" refname="citation">
1316 <citation_reference ids="id2" refname="cit-ation">
1319 <citation_reference ids="id3" refname="cit.ation">
1322 <citation_reference ids="id4" refname="cit1">
1327 Adjacent citation refs are not possible: [citation]_[CIT1]_
1330 <document source="test data">
1332 Adjacent citation refs are not possible: [citation]_[CIT1]_
1336 totest
['substitution_references'] = [
1341 <document source="test data">
1343 <substitution_reference refname="subref">
1347 |subref|_ and |subref|__
1350 <document source="test data">
1352 <reference refname="subref">
1353 <substitution_reference refname="subref">
1356 <reference anonymous="1">
1357 <substitution_reference refname="subref">
1361 |substitution reference|
1364 <document source="test data">
1366 <substitution_reference refname="substitution reference">
1367 substitution reference
1374 <document source="test data">
1376 <substitution_reference refname="substitution reference">
1381 |substitution reference without closing verbar
1384 <document source="test data">
1386 <problematic ids="id2" refid="id1">
1388 substitution reference without closing verbar
1389 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1391 Inline substitution_reference start-string without end-string.
1394 first | then || and finally |||
1397 <document source="test data">
1399 first | then || and finally |||
1403 totest
['standalone_hyperlink'] = [
1405 http://www.standalone.hyperlink.com
1407 http:/one-slash-only.absolute.path
1409 [http://example.com]
1411 (http://example.com)
1413 <http://example.com>
1415 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1417 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1419 http://[3ffe:2a00:100:7031::1]/
1421 mailto:someone@somewhere.com
1423 news:comp.lang.python
1425 An email address in a sentence: someone@somewhere.com.
1427 ftp://ends.with.a.period.
1429 (a.question.mark@end?)
1432 <document source="test data">
1434 <reference refuri="http://www.standalone.hyperlink.com">
1435 http://www.standalone.hyperlink.com
1437 <reference refuri="http:/one-slash-only.absolute.path">
1438 http:/one-slash-only.absolute.path
1441 <reference refuri="http://example.com">
1446 <reference refuri="http://example.com">
1451 <reference refuri="http://example.com">
1455 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1456 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1458 <reference refuri="http://[3ffe:2a00:100:7031::1">
1459 http://[3ffe:2a00:100:7031::1
1460 ] (the final "]" is ambiguous in text)
1462 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1463 http://[3ffe:2a00:100:7031::1]/
1465 <reference refuri="mailto:someone@somewhere.com">
1466 mailto:someone@somewhere.com
1468 <reference refuri="news:comp.lang.python">
1469 news:comp.lang.python
1471 An email address in a sentence: \n\
1472 <reference refuri="mailto:someone@somewhere.com">
1473 someone@somewhere.com
1476 <reference refuri="ftp://ends.with.a.period">
1477 ftp://ends.with.a.period
1481 <reference refuri="mailto:a.question.mark@end">
1486 Valid URLs with escaped markup characters:
1488 http://example.com/\*content\*/whatever
1490 http://example.com/\*content*/whatever
1493 <document source="test data">
1495 Valid URLs with escaped markup characters:
1497 <reference refuri="http://example.com/*content*/whatever">
1498 http://example.com/*content*/whatever
1500 <reference refuri="http://example.com/*content*/whatever">
1501 http://example.com/*content*/whatever
1504 Valid URLs may end with punctuation inside "<>":
1506 <http://example.org/ends-with-dot.>
1509 <document source="test data">
1511 Valid URLs may end with punctuation inside "<>":
1514 <reference refuri="http://example.org/ends-with-dot.">
1515 http://example.org/ends-with-dot.
1519 Valid URLs with interesting endings:
1521 http://example.org/ends-with-pluses++
1524 <document source="test data">
1526 Valid URLs with interesting endings:
1528 <reference refuri="http://example.org/ends-with-pluses++">
1529 http://example.org/ends-with-pluses++
1532 None of these are standalone hyperlinks (their "schemes"
1533 are not recognized): signal:noise, a:b.
1536 <document source="test data">
1538 None of these are standalone hyperlinks (their "schemes"
1539 are not recognized): signal:noise, a:b.
1542 Escaped email addresses are not recognized: test\@example.org
1545 <document source="test data">
1547 Escaped email addresses are not recognized: test@example.org
1551 totest
['markup recognition rules'] = [
1553 __This__ should be left alone.
1556 <document source="test data">
1558 __This__ should be left alone.
1561 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1562 with backslash-escaped whitespace, including new\
1566 <document source="test data">
1578 with backslash-escaped whitespace, including newlines.
1581 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1582 \u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
1585 <document source="test data">
1614 \u00a0no-break-space\u00a0
1617 # Whitespace characters:
1618 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.4
1622 or *space* or one of
1623 \xa0*NO-BREAK SPACE*\xa0,
1624 \u1680*OGHAM SPACE MARK*\u1680,
1625 \u2000*EN QUAD*\u2000,
1626 \u2001*EM QUAD*\u2001,
1627 \u2002*EN SPACE*\u2002,
1628 \u2003*EM SPACE*\u2003,
1629 \u2004*THREE-PER-EM SPACE*\u2004,
1630 \u2005*FOUR-PER-EM SPACE*\u2005,
1631 \u2006*SIX-PER-EM SPACE*\u2006,
1632 \u2007*FIGURE SPACE*\u2007,
1633 \u2008*PUNCTUATION SPACE*\u2008,
1634 \u2009*THIN SPACE*\u2009,
1635 \u200a*HAIR SPACE*\u200a,
1636 \u202f*NARROW NO-BREAK SPACE*\u202f,
1637 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1638 \u3000*IDEOGRAPHIC SPACE*\u3000,
1639 \u2028*LINE SEPARATOR*\u2028
1642 <document source="test data">
1706 NARROW NO-BREAK SPACE
1710 MEDIUM MATHEMATICAL SPACE
1720 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1722 "Quoted" markup start-string (matched openers & closers) -> no markup:
1724 '*' "*" (*) <*> [*] {*}
1727 Some international quoting styles:
1728 ‘*’ “*” English, ...,
1729 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1732 「*」 『*』traditional Chinese,
1733 ”*” ’*’ »*» ›*› Swedish, Finnish,
1735 „*” »*« ’*’ Hungarian,
1737 But this is „*’ emphasized »*‹.
1740 <document source="test data">
1742 "Quoted" markup start-string (matched openers & closers) -> no markup:
1744 '*' "*" (*) <*> [*] {*}
1747 Some international quoting styles:
1748 ‘*’ “*” English, ...,
1749 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1752 「*」 『*』traditional Chinese,
1753 ”*” ’*’ »*» ›*› Swedish, Finnish,
1755 „*” »*« ’*’ Hungarian,
1765 if __name__
== '__main__':
1767 unittest
.main(defaultTest
='suite')