2 # -*- coding: utf-8 -*-
5 # Author: David Goodger <goodger@python.org>
6 # Copyright: This module has been placed in the public domain.
9 Tests for inline markup in docutils/parsers/rst/states.py.
10 Interpreted text tests are in a separate module, test_interpreted.py.
13 from __init__
import DocutilsTestSupport
16 s
= DocutilsTestSupport
.ParserTestSuite()
17 s
.generateTests(totest
)
22 totest
['emphasis'] = [
27 <document source="test data">
33 l'*emphasis* with the *emphasis*' apostrophe.
34 l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
37 <document source="test data">
59 <document source="test data">
66 *emphasis without closing asterisk
69 <document source="test data">
71 <problematic ids="id2" refid="id1">
73 emphasis without closing asterisk
74 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
76 Inline emphasis start-string without end-string.
78 [r
"""some punctuation is allowed around inline markup, e.g.
79 /*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
80 (*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)
81 *emphasis*., *emphasis*,, *emphasis*!, and *emphasis*\ (closing delimiters),
84 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
85 (*), [*], '*' or '"*"' ("quoted" start-string),
86 x*2* or 2*x* (alphanumeric char before),
87 \*args or * (escaped, whitespace behind start-string),
88 or *the\* *stars\* *inside* (escaped, whitespace before end-string).
90 However, '*args' will trigger a warning and may be problematic.
95 <document source="test data">
97 some punctuation is allowed around inline markup, e.g.
132 (closing delimiters),
135 )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs),
136 (*), [*], '*' or '"*"' ("quoted" start-string),
137 x*2* or 2*x* (alphanumeric char before),
138 *args or * (escaped, whitespace behind start-string),
142 (escaped, whitespace before end-string).
145 <problematic ids="id2" refid="id1">
147 args' will trigger a warning and may be problematic.
148 <system_message backrefs="id2" ids="id1" level="2" line="13" source="test data" type="WARNING">
150 Inline emphasis start-string without end-string.
158 Quotes around inline markup:
160 '*emphasis*' "*emphasis*" Straight,
161 ‘*emphasis*’ “*emphasis*” English, ...,
162 « *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
163 « *emphasis* » ‹ *emphasis* › French,
164 „*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
165 „*emphasis*” «*emphasis*» Romanian,
166 “*emphasis*„ ‘*emphasis*‚ Greek,
167 「*emphasis*」 『*emphasis*』traditional Chinese,
168 ”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
169 „*emphasis*” ‚*emphasis*’ Polish,
170 „*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
173 <document source="test data">
175 Quotes around inline markup:
194 \u202f\xbb \u2039\u202f
197 \u202f\u203a \xab\xa0
207 \u2005\xbb \u2039\u2005
223 \u2039 German, Czech, ...,
244 \u300ftraditional Chinese,
257 \u203a Swedish, Finnish,
277 Emphasized asterisk: *\**
279 Emphasized double asterisk: *\***
282 <document source="test data">
284 Emphasized asterisk: \n\
288 Emphasized double asterisk: \n\
299 <document source="test data">
305 l'**strong** and l\u2019**strong** with apostrophe
308 <document source="test data">
319 quoted '**strong**', quoted "**strong**",
320 quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
321 quoted \xab**strong**\xbb
324 <document source="test data">
336 \u2019, quoted \u201c
346 (**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **
348 (however, '**kwargs' will trigger a warning and may be problematic)
351 <document source="test data">
356 ) but not (**) or '(** ' or x**2 or **kwargs or **
359 <problematic ids="id2" refid="id1">
361 kwargs' will trigger a warning and may be problematic)
362 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
364 Inline strong start-string without end-string.
367 Strong asterisk: *****
369 Strong double asterisk: ******
372 <document source="test data">
378 Strong double asterisk: \n\
383 **strong without closing asterisks
386 <document source="test data">
388 <problematic ids="id2" refid="id1">
390 strong without closing asterisks
391 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
393 Inline strong start-string without end-string.
397 totest
['literal'] = [
402 <document source="test data">
411 <document source="test data">
420 <document source="test data">
429 <document source="test data">
435 l'``literal`` and l\u2019``literal`` with apostrophe
438 <document source="test data">
449 quoted '``literal``', quoted "``literal``",
450 quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
451 quoted \xab``literal``\xbb
454 <document source="test data">
466 \u2019, quoted \u201c
476 ``'literal'`` with quotes, ``"literal"`` with quotes,
477 ``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
478 ``\xabliteral\xbb`` with quotes
481 <document source="test data">
500 ``literal ``TeX quotes'' & \backslash`` but not "``" or ``
502 (however, ``standalone TeX quotes'' will trigger a warning
503 and may be problematic)
506 <document source="test data">
509 literal ``TeX quotes'' & \\backslash
513 <problematic ids="id2" refid="id1">
515 standalone TeX quotes'' will trigger a warning
516 and may be problematic)
517 <system_message backrefs="id2" ids="id1" level="2" line="4" source="test data" type="WARNING">
519 Inline literal start-string without end-string.
522 Find the ```interpreted text``` in this paragraph!
525 <document source="test data">
533 ``literal without closing backquotes
536 <document source="test data">
538 <problematic ids="id2" refid="id1">
540 literal without closing backquotes
541 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
543 Inline literal start-string without end-string.
546 Python ``list``\s use square bracket syntax.
549 <document source="test data">
554 s use square bracket syntax.
557 Blank after opening `` not allowed.
560 <document source="test data">
562 Blank after opening `` not allowed.
565 no blank ``after closing``continues`` literal.
568 <document source="test data">
572 after closing``continues
576 dot ``after closing``. is possible.
579 <document source="test data">
588 totest
['references'] = [
593 <document source="test data">
595 <reference name="ref" refname="ref">
599 l'ref_ and l\u2019ref_ with apostrophe
602 <document source="test data">
605 <reference name="ref" refname="ref">
608 <reference name="ref" refname="ref">
613 quoted 'ref_', quoted "ref_",
614 quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
616 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
617 \u201cref ref\u201d_, or \xabref ref\xbb_
620 <document source="test data">
623 <reference name="ref" refname="ref">
626 <reference name="ref" refname="ref">
630 <reference name="ref" refname="ref">
632 \u2019, quoted \u201c
633 <reference name="ref" refname="ref">
637 <reference name="ref" refname="ref">
640 but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
641 \u201cref ref\u201d_, or \xabref ref\xbb_
647 <document source="test data">
649 <reference anonymous="1" name="ref">
653 l'ref__ and l\u2019ref__ with apostrophe
656 <document source="test data">
659 <reference anonymous="1" name="ref">
662 <reference anonymous="1" name="ref">
667 quoted 'ref__', quoted "ref__",
668 quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
669 quoted \xabref__\xbb,
670 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
671 \u201cref ref\u201d__, or \xabref ref\xbb__
674 <document source="test data">
677 <reference anonymous="1" name="ref">
680 <reference anonymous="1" name="ref">
684 <reference anonymous="1" name="ref">
686 \u2019, quoted \u201c
687 <reference anonymous="1" name="ref">
691 <reference anonymous="1" name="ref">
694 but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
695 \u201cref ref\u201d__, or \xabref ref\xbb__
698 ref_, r_, r_e-f_, -ref_, and anonymousref__,
699 but not _ref_ or __attr__ or object.__attr__
702 <document source="test data">
704 <reference name="ref" refname="ref">
707 <reference name="r" refname="r">
710 <reference name="r_e-f" refname="r_e-f">
713 <reference name="ref" refname="ref">
716 <reference anonymous="1" name="anonymousref">
719 but not _ref_ or __attr__ or object.__attr__
723 totest
['phrase_references'] = [
728 <document source="test data">
730 <reference name="phrase reference" refname="phrase reference">
734 l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
737 <document source="test data">
740 <reference name="phrase reference" refname="phrase reference">
743 <reference name="phrase reference" refname="phrase reference">
748 quoted '`phrase reference`_', quoted "`phrase reference`_",
749 quoted \u2018`phrase reference`_\u2019,
750 quoted \u201c`phrase reference`_\u201d,
751 quoted \xab`phrase reference`_\xbb
754 <document source="test data">
757 <reference name="phrase reference" refname="phrase reference">
760 <reference name="phrase reference" refname="phrase reference">
764 <reference name="phrase reference" refname="phrase reference">
768 <reference name="phrase reference" refname="phrase reference">
772 <reference name="phrase reference" refname="phrase reference">
777 `'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
778 `\u2018phrase reference\u2019`_ with quotes,
779 `\u201cphrase reference\u201d`_ with quotes,
780 `\xabphrase reference\xbb`_ with quotes
783 <document source="test data">
785 <reference name="'phrase reference'" refname="'phrase reference'">
788 <reference name=""phrase reference"" refname=""phrase reference"">
791 <reference name="\u2018phrase reference\u2019" refname="\u2018phrase reference\u2019">
792 \u2018phrase reference\u2019
794 <reference name="\u201cphrase reference\u201d" refname="\u201cphrase reference\u201d">
795 \u201cphrase reference\u201d
797 <reference name="\xabphrase reference\xbb" refname="\xabphrase reference\xbb">
798 \xabphrase reference\xbb
802 `anonymous reference`__
805 <document source="test data">
807 <reference anonymous="1" name="anonymous reference">
811 l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
814 <document source="test data">
817 <reference anonymous="1" name="anonymous reference">
820 <reference anonymous="1" name="anonymous reference">
825 quoted '`anonymous reference`__', quoted "`anonymous reference`__",
826 quoted \u2018`anonymous reference`__\u2019,
827 quoted \u201c`anonymous reference`__\u201d,
828 quoted \xab`anonymous reference`__\xbb
831 <document source="test data">
834 <reference anonymous="1" name="anonymous reference">
837 <reference anonymous="1" name="anonymous reference">
841 <reference anonymous="1" name="anonymous reference">
845 <reference anonymous="1" name="anonymous reference">
849 <reference anonymous="1" name="anonymous reference">
854 `'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
855 `\u2018anonymous reference\u2019`__ with quotes,
856 `\u201canonymous reference\u201d`__ with quotes,
857 `\xabanonymous reference\xbb`__ with quotes
860 <document source="test data">
862 <reference anonymous="1" name="'anonymous reference'">
863 'anonymous reference'
865 <reference anonymous="1" name=""anonymous reference"">
866 "anonymous reference"
868 <reference anonymous="1" name="\u2018anonymous reference\u2019">
869 \u2018anonymous reference\u2019
871 <reference anonymous="1" name="\u201canonymous reference\u201d">
872 \u201canonymous reference\u201d
874 <reference anonymous="1" name="\xabanonymous reference\xbb">
875 \xabanonymous reference\xbb
883 <document source="test data">
885 <reference name="phrase reference across lines" refname="phrase reference across lines">
890 `phrase\`_ reference`_
893 <document source="test data">
895 <reference name="phrase`_ reference" refname="phrase`_ reference">
899 Invalid phrase reference:
901 :role:`phrase reference`_
904 <document source="test data">
906 Invalid phrase reference:
908 <problematic ids="id2" refid="id1">
909 :role:`phrase reference`_
910 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
912 Mismatch: both interpreted text role prefix and reference suffix.
915 Invalid phrase reference:
917 `phrase reference`:role:_
920 <document source="test data">
922 Invalid phrase reference:
924 <problematic ids="id2" refid="id1">
925 `phrase reference`:role:_
926 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
928 Mismatch: both interpreted text role suffix and reference suffix.
931 `phrase reference_ without closing backquote
934 <document source="test data">
936 <problematic ids="id2" refid="id1">
939 <reference name="reference" refname="reference">
941 without closing backquote
942 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
944 Inline interpreted text or phrase reference start-string without end-string.
947 `anonymous phrase reference__ without closing backquote
950 <document source="test data">
952 <problematic ids="id2" refid="id1">
955 <reference anonymous="1" name="reference">
957 without closing backquote
958 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
960 Inline interpreted text or phrase reference start-string without end-string.
964 totest
['embedded_URIs'] = [
966 `phrase reference <http://example.com>`_
969 <document source="test data">
971 <reference name="phrase reference" refuri="http://example.com">
973 <target ids="phrase-reference" names="phrase\ reference" refuri="http://example.com">
976 `anonymous reference <http://example.com>`__
979 <document source="test data">
981 <reference name="anonymous reference" refuri="http://example.com">
985 `embedded URI on next line
986 <http://example.com>`__
989 <document source="test data">
991 <reference name="embedded URI on next line" refuri="http://example.com">
992 embedded URI on next line
995 `embedded URI across lines <http://example.com/
999 <document source="test data">
1001 <reference name="embedded URI across lines" refuri="http://example.com/long/path">
1002 embedded URI across lines
1005 `embedded URI with whitespace <http://example.com/
1006 long/path /and /whitespace>`__
1009 <document source="test data">
1011 <reference name="embedded URI with whitespace" refuri="http://example.com/long/path/and/whitespace">
1012 embedded URI with whitespace
1015 `embedded URI with escaped whitespace <http://example.com/a\
1016 long/path\ and/some\ escaped\ whitespace>`__
1018 `<omitted\ reference\ text\ with\ escaped\ whitespace>`__
1021 <document source="test data">
1023 <reference name="embedded URI with escaped whitespace" refuri="http://example.com/a long/path and/some escaped whitespace">
1024 embedded URI with escaped whitespace
1026 <reference name="omitted reference text with escaped whitespace" refuri="omitted reference text with escaped whitespace">
1027 omitted reference text with escaped whitespace
1030 `embedded email address <jdoe@example.com>`__
1032 `embedded email address broken across lines <jdoe
1036 <document source="test data">
1038 <reference name="embedded email address" refuri="mailto:jdoe@example.com">
1039 embedded email address
1041 <reference name="embedded email address broken across lines" refuri="mailto:jdoe@example.com">
1042 embedded email address broken across lines
1045 `embedded URI with too much whitespace < http://example.com/
1046 long/path /and /whitespace >`__
1048 `embedded URI with too much whitespace at end <http://example.com/
1049 long/path /and /whitespace >`__
1051 `embedded URI with no preceding whitespace<http://example.com>`__
1053 `escaped URI \<http://example.com>`__
1055 See `HTML Anchors: \<a>`_.
1058 <document source="test data">
1060 <reference anonymous="1" name="embedded URI with too much whitespace < http://example.com/ long/path /and /whitespace >">
1061 embedded URI with too much whitespace < http://example.com/
1062 long/path /and /whitespace >
1064 <reference anonymous="1" name="embedded URI with too much whitespace at end <http://example.com/ long/path /and /whitespace >">
1065 embedded URI with too much whitespace at end <http://example.com/
1066 long/path /and /whitespace >
1068 <reference anonymous="1" name="embedded URI with no preceding whitespace<http://example.com>">
1069 embedded URI with no preceding whitespace<http://example.com>
1071 <reference anonymous="1" name="escaped URI <http://example.com>">
1072 escaped URI <http://example.com>
1075 <reference name="HTML Anchors: <a>" refname="html anchors: <a>">
1080 Relative URIs' reference text can be omitted:
1087 <document source="test data">
1089 Relative URIs' reference text can be omitted:
1091 <reference name="reference" refuri="reference">
1093 <target ids="reference" names="reference" refuri="reference">
1095 <reference name="anonymous" refuri="anonymous">
1099 Escape trailing low-line char in URIs:
1106 <document source="test data">
1108 Escape trailing low-line char in URIs:
1110 <reference name="reference_" refuri="reference_">
1112 <target ids="reference" names="reference_" refuri="reference_">
1114 <reference name="anonymous_" refuri="anonymous_">
1118 Escape other char in URIs:
1122 `<anonymous\\call>`__
1125 <document source="test data">
1127 Escape other char in URIs:
1129 <reference name="reference:1" refuri="reference:1">
1131 <target ids="reference-1" names="reference:1" refuri="reference:1">
1133 <reference name="anonymouscall" refuri="anonymouscall">
1138 totest
['embedded_aliases'] = [
1140 `phrase reference <alias_>`_
1143 <document source="test data">
1145 <reference name="phrase reference" refname="alias">
1147 <target names="phrase\ reference" refname="alias">
1150 `anonymous reference <alias_>`__
1153 <document source="test data">
1155 <reference name="anonymous reference" refname="alias">
1159 `embedded alias on next line
1163 <document source="test data">
1165 <reference name="embedded alias on next line" refname="alias">
1166 embedded alias on next line
1169 `embedded alias across lines <alias
1173 <document source="test data">
1175 <reference name="embedded alias across lines" refname="alias phrase">
1176 embedded alias across lines
1179 `embedded alias with whitespace <alias
1183 <document source="test data">
1185 <reference name="embedded alias with whitespace" refname="alias long phrase">
1186 embedded alias with whitespace
1189 `<embedded alias with whitespace_>`__
1192 <document source="test data">
1194 <reference name="embedded alias with whitespace" refname="embedded alias with whitespace">
1195 embedded alias with whitespace
1198 `no embedded alias (whitespace inside bracket) < alias_ >`__
1200 `no embedded alias (no preceding whitespace)<alias_>`__
1203 <document source="test data">
1205 <reference anonymous="1" name="no embedded alias (whitespace inside bracket) < alias_ >">
1206 no embedded alias (whitespace inside bracket) < alias_ >
1208 <reference anonymous="1" name="no embedded alias (no preceding whitespace)<alias_>">
1209 no embedded alias (no preceding whitespace)<alias_>
1212 `anonymous reference <alias\ with\\ escaped \:characters_>`__
1215 <document source="test data">
1217 <reference name="anonymous reference" refname="aliaswith\ escaped :characters">
1221 `anonymous reference <alias\ with\\ escaped \:characters_>`__
1224 <document source="test data">
1226 <reference name="anonymous reference" refname="aliaswith\ escaped :characters">
1231 totest
['inline_targets'] = [
1235 Here is _`another target` in some text. And _`yet
1236 another target`, spanning lines.
1238 _`Here is a TaRgeT` with case and spacial difficulties.
1241 <document source="test data">
1243 <target ids="target" names="target">
1247 <target ids="another-target" names="another\ target">
1249 in some text. And \n\
1250 <target ids="yet-another-target" names="yet\ another\ target">
1255 <target ids="here-is-a-target" names="here\ is\ a\ target">
1257 with case and spacial difficulties.
1260 l'_`target1` and l\u2019_`target2` with apostrophe
1263 <document source="test data">
1266 <target ids="target1" names="target1">
1269 <target ids="target2" names="target2">
1274 quoted '_`target1`', quoted "_`target2`",
1275 quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
1276 quoted \xab_`target5`\xbb
1279 <document source="test data">
1282 <target ids="target1" names="target1">
1285 <target ids="target2" names="target2">
1289 <target ids="target3" names="target3">
1291 \u2019, quoted \u201c
1292 <target ids="target4" names="target4">
1296 <target ids="target5" names="target5">
1301 _`'target1'` with quotes, _`"target2"` with quotes,
1302 _`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
1303 _`\xabtarget5\xbb` with quotes
1306 <document source="test data">
1308 <target ids="target1" names="'target1'">
1311 <target ids="target2" names=""target2"">
1314 <target ids="target3" names="\u2018target3\u2019">
1317 <target ids="target4" names="\u201ctarget4\u201d">
1320 <target ids="target5" names="\xabtarget5\xbb">
1325 But this isn't a _target; targets require backquotes.
1327 And _`this`_ is just plain confusing.
1330 <document source="test data">
1332 But this isn't a _target; targets require backquotes.
1335 <problematic ids="id2" refid="id1">
1337 this`_ is just plain confusing.
1338 <system_message backrefs="id2" ids="id1" level="2" line="3" source="test data" type="WARNING">
1340 Inline target start-string without end-string.
1343 _`inline target without closing backquote
1346 <document source="test data">
1348 <problematic ids="id2" refid="id1">
1350 inline target without closing backquote
1351 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1353 Inline target start-string without end-string.
1357 totest
['footnote_reference'] = [
1362 <document source="test data">
1364 <footnote_reference ids="id1" refname="1">
1371 <document source="test data">
1373 <footnote_reference auto="1" ids="id1">
1379 <document source="test data">
1381 <footnote_reference auto="1" ids="id1" refname="label">
1387 <document source="test data">
1389 <footnote_reference auto="*" ids="id1">
1392 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1395 <document source="test data">
1397 Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
1401 totest
['citation_reference'] = [
1406 <document source="test data">
1408 <citation_reference ids="id1" refname="citation">
1412 [citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
1415 <document source="test data">
1417 <citation_reference ids="id1" refname="citation">
1420 <citation_reference ids="id2" refname="cit-ation">
1423 <citation_reference ids="id3" refname="cit.ation">
1426 <citation_reference ids="id4" refname="cit1">
1431 Adjacent citation refs are not possible: [citation]_[CIT1]_
1434 <document source="test data">
1436 Adjacent citation refs are not possible: [citation]_[CIT1]_
1440 totest
['substitution_references'] = [
1445 <document source="test data">
1447 <substitution_reference refname="subref">
1451 |subref|_ and |subref|__
1454 <document source="test data">
1456 <reference refname="subref">
1457 <substitution_reference refname="subref">
1460 <reference anonymous="1">
1461 <substitution_reference refname="subref">
1465 |substitution reference|
1468 <document source="test data">
1470 <substitution_reference refname="substitution reference">
1471 substitution reference
1478 <document source="test data">
1480 <substitution_reference refname="substitution reference">
1485 |substitution reference without closing verbar
1488 <document source="test data">
1490 <problematic ids="id2" refid="id1">
1492 substitution reference without closing verbar
1493 <system_message backrefs="id2" ids="id1" level="2" line="1" source="test data" type="WARNING">
1495 Inline substitution_reference start-string without end-string.
1498 first | then || and finally |||
1501 <document source="test data">
1503 first | then || and finally |||
1507 totest
['standalone_hyperlink'] = [
1509 http://www.standalone.hyperlink.com
1511 http:/one-slash-only.absolute.path
1513 [http://example.com]
1515 (http://example.com)
1517 <http://example.com>
1519 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1521 http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)
1523 http://[3ffe:2a00:100:7031::1]/
1525 mailto:someone@somewhere.com
1527 news:comp.lang.python
1529 An email address in a sentence: someone@somewhere.com.
1531 ftp://ends.with.a.period.
1533 (a.question.mark@end?)
1536 <document source="test data">
1538 <reference refuri="http://www.standalone.hyperlink.com">
1539 http://www.standalone.hyperlink.com
1541 <reference refuri="http:/one-slash-only.absolute.path">
1542 http:/one-slash-only.absolute.path
1545 <reference refuri="http://example.com">
1550 <reference refuri="http://example.com">
1555 <reference refuri="http://example.com">
1559 <reference refuri="http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html">
1560 http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
1562 <reference refuri="http://[3ffe:2a00:100:7031::1">
1563 http://[3ffe:2a00:100:7031::1
1564 ] (the final "]" is ambiguous in text)
1566 <reference refuri="http://[3ffe:2a00:100:7031::1]/">
1567 http://[3ffe:2a00:100:7031::1]/
1569 <reference refuri="mailto:someone@somewhere.com">
1570 mailto:someone@somewhere.com
1572 <reference refuri="news:comp.lang.python">
1573 news:comp.lang.python
1575 An email address in a sentence: \n\
1576 <reference refuri="mailto:someone@somewhere.com">
1577 someone@somewhere.com
1580 <reference refuri="ftp://ends.with.a.period">
1581 ftp://ends.with.a.period
1585 <reference refuri="mailto:a.question.mark@end">
1590 Valid URLs with escaped markup characters:
1592 http://example.com/\*content\*/whatever
1594 http://example.com/\*content*/whatever
1597 <document source="test data">
1599 Valid URLs with escaped markup characters:
1601 <reference refuri="http://example.com/*content*/whatever">
1602 http://example.com/*content*/whatever
1604 <reference refuri="http://example.com/*content*/whatever">
1605 http://example.com/*content*/whatever
1608 Valid URLs may end with punctuation inside "<>":
1610 <http://example.org/ends-with-dot.>
1613 <document source="test data">
1615 Valid URLs may end with punctuation inside "<>":
1618 <reference refuri="http://example.org/ends-with-dot.">
1619 http://example.org/ends-with-dot.
1623 Valid URLs with interesting endings:
1625 http://example.org/ends-with-pluses++
1628 <document source="test data">
1630 Valid URLs with interesting endings:
1632 <reference refuri="http://example.org/ends-with-pluses++">
1633 http://example.org/ends-with-pluses++
1636 None of these are standalone hyperlinks (their "schemes"
1637 are not recognized): signal:noise, a:b.
1640 <document source="test data">
1642 None of these are standalone hyperlinks (their "schemes"
1643 are not recognized): signal:noise, a:b.
1646 Escaped email addresses are not recognized: test\@example.org
1649 <document source="test data">
1651 Escaped email addresses are not recognized: test@example.org
1655 totest
['markup recognition rules'] = [
1657 __This__ should be left alone.
1660 <document source="test data">
1662 __This__ should be left alone.
1665 Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
1666 with backslash-escaped whitespace, including new\
1670 <document source="test data">
1682 with backslash-escaped whitespace, including newlines.
1685 text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
1686 \u00bf*punctuation*? \u00a1*examples*!\u00a0*no-break-space*\u00a0.
1689 <document source="test data">
1721 # Whitespace characters:
1722 # \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e, fails in Python 2.6
1726 or *space* or one of
1727 \xa0*NO-BREAK SPACE*\xa0,
1728 \u1680*OGHAM SPACE MARK*\u1680,
1729 \u2000*EN QUAD*\u2000,
1730 \u2001*EM QUAD*\u2001,
1731 \u2002*EN SPACE*\u2002,
1732 \u2003*EM SPACE*\u2003,
1733 \u2004*THREE-PER-EM SPACE*\u2004,
1734 \u2005*FOUR-PER-EM SPACE*\u2005,
1735 \u2006*SIX-PER-EM SPACE*\u2006,
1736 \u2007*FIGURE SPACE*\u2007,
1737 \u2008*PUNCTUATION SPACE*\u2008,
1738 \u2009*THIN SPACE*\u2009,
1739 \u200a*HAIR SPACE*\u200a,
1740 \u202f*NARROW NO-BREAK SPACE*\u202f,
1741 \u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
1742 \u3000*IDEOGRAPHIC SPACE*\u3000,
1743 \u2028*LINE SEPARATOR*\u2028
1746 <document source="test data">
1810 NARROW NO-BREAK SPACE
1814 MEDIUM MATHEMATICAL SPACE
1825 inline markup separated by non-ASCII whitespace
1826 \xa0**NO-BREAK SPACE**\xa0, \xa0``NO-BREAK SPACE``\xa0, \xa0`NO-BREAK SPACE`\xa0,
1827 \u2000**EN QUAD**\u2000, \u2000``EN QUAD``\u2000, \u2000`EN QUAD`\u2000,
1828 \u202f**NARROW NBSP**\u202f, \u202f``NARROW NBSP``\u202f, \u202f`NARROW NBSP`\u202f,
1831 <document source="test data">
1833 inline markup separated by non-ASCII whitespace
1866 no inline markup due to whitespace inside and behind: *
1870 *\xa0NO-BREAK SPACE\xa0*
1871 *\u1680OGHAM SPACE MARK\u1680*
1872 *\u2000EN QUAD\u2000*
1873 *\u2001EM QUAD\u2001*
1874 *\u2002EN SPACE\u2002*
1875 *\u2003EM SPACE\u2003*
1876 *\u2004THREE-PER-EM SPACE\u2004*
1877 *\u2005FOUR-PER-EM SPACE\u2005*
1878 *\u2006SIX-PER-EM SPACE\u2006*
1879 *\u2007FIGURE SPACE\u2007*
1880 *\u2008PUNCTUATION SPACE\u2008*
1881 *\u2009THIN SPACE\u2009*
1882 *\u200aHAIR SPACE\u200a*
1883 *\u202fNARROW NO-BREAK SPACE\u202f*
1884 *\u205fMEDIUM MATHEMATICAL SPACE\u205f*
1885 *\u3000IDEOGRAPHIC SPACE\u3000*
1886 *\u2028LINE SEPARATOR\u2028*
1889 <document source="test data">
1891 no inline markup due to whitespace inside and behind: *
1895 *\xa0NO-BREAK SPACE\xa0*
1896 *\u1680OGHAM SPACE MARK\u1680*
1897 *\u2000EN QUAD\u2000*
1898 *\u2001EM QUAD\u2001*
1899 *\u2002EN SPACE\u2002*
1900 *\u2003EM SPACE\u2003*
1901 *\u2004THREE-PER-EM SPACE\u2004*
1902 *\u2005FOUR-PER-EM SPACE\u2005*
1903 *\u2006SIX-PER-EM SPACE\u2006*
1904 *\u2007FIGURE SPACE\u2007*
1905 *\u2008PUNCTUATION SPACE\u2008*
1906 *\u2009THIN SPACE\u2009*
1907 *\u200aHAIR SPACE\u200a*
1908 *\u202fNARROW NO-BREAK SPACE\u202f*
1909 *\u205fMEDIUM MATHEMATICAL SPACE\u205f*
1910 *\u3000IDEOGRAPHIC SPACE\u3000*
1915 no inline markup because of non-ASCII whitespace following /preceding the markup
1916 **\xa0NO-BREAK SPACE\xa0** ``\xa0NO-BREAK SPACE\xa0`` `\xa0NO-BREAK SPACE\xa0`
1917 **\u2000EN QUAD\u2000** ``\u2000EN QUAD\u2000`` `\u2000EN QUAD\u2000`
1918 **\u202fNARROW NBSP\u202f** ``\u202fNARROW NBSP\u202f`` `\u202fNARROW NBSP\u202f`
1921 <document source="test data">
1923 no inline markup because of non-ASCII whitespace following /preceding the markup
1924 **\xa0NO-BREAK SPACE\xa0** ``\xa0NO-BREAK SPACE\xa0`` `\xa0NO-BREAK SPACE\xa0`
1925 **\u2000EN QUAD\u2000** ``\u2000EN QUAD\u2000`` `\u2000EN QUAD\u2000`
1926 **\u202fNARROW NBSP\u202f** ``\u202fNARROW NBSP\u202f`` `\u202fNARROW NBSP\u202f`\
1928 # « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
1930 "Quoted" markup start-string (matched openers & closers) -> no markup:
1932 '*' "*" (*) <*> [*] {*}
1935 Some international quoting styles:
1936 ‘*’ “*” English, ...,
1937 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1940 「*」 『*』traditional Chinese,
1941 ”*” ’*’ »*» ›*› Swedish, Finnish,
1943 „*” »*« ’*’ Hungarian,
1945 But this is „*’ emphasized »*‹.
1948 <document source="test data">
1950 "Quoted" markup start-string (matched openers & closers) -> no markup:
1952 '*' "*" (*) <*> [*] {*}
1955 Some international quoting styles:
1956 ‘*’ “*” English, ...,
1957 „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
1960 「*」 『*』traditional Chinese,
1961 ”*” ’*’ »*» ›*› Swedish, Finnish,
1963 „*” »*« ’*’ Hungarian,
1973 if __name__
== '__main__':
1975 unittest
.main(defaultTest
='suite')