3 # :Copyright: © 2020 Günter Milde.
4 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
6 # Copying and distribution of this file, with or without modification,
7 # are permitted in any medium without royalty provided the copyright
8 # notice and this notice are preserved.
9 # This file is offered as-is, without any warranty.
11 # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
13 # Revision: $Revision$
16 A parser for CommonMark MarkDown text using `recommonmark`__.
18 __ https://pypi.org/project/recommonmark/
21 import docutils
.parsers
22 from docutils
import nodes
, Component
25 from recommonmark
.parser
import CommonMarkParser
26 from recommonmark
.transform
import AutoStructify
27 except ImportError as err
:
28 CommonMarkParser
= None
29 class Parser(docutils
.parsers
.Parser
):
30 def parse(self
, inputstring
, document
):
31 error
= document
.reporter
.warning(
32 'Missing dependency: MarkDown input is processed by a 3rd '
33 'party parser but Python did not find the required module '
34 '"recommonmark" (https://pypi.org/project/recommonmark/).')
35 document
.append(error
)
39 class Parser(CommonMarkParser
):
40 """MarkDown parser based on recommonmark."""
41 # TODO: settings for AutoStructify
42 # settings_spec = docutils.parsers.Parser.settings_spec + (
43 # see https://recommonmark.readthedocs.io/en/latest/#autostructify
45 supported
= ('recommonmark', 'commonmark',
47 config_section
= 'recommonmark parser'
48 config_section_dependencies
= ('parsers',)
50 # def get_transforms(self):
51 # return Component.get_transforms(self) + [AutoStructify]
53 def parse(self
, inputstring
, document
):
54 """Use the upstream parser and clean up afterwards.
56 # check for exorbitantly long lines
57 for i
, line
in enumerate(inputstring
.split('\n')):
58 if len(line
) > document
.settings
.line_length_limit
:
59 error
= document
.reporter
.error(
60 'Line %d exceeds the line-length-limit.'%(i
+1))
61 document
.append(error
)
64 # pass to upstream parser
66 CommonMarkParser
.parse(self
, inputstring
, document
)
67 except Exception as err
:
68 error
= document
.reporter
.error('Parsing with "recommonmark" '
69 'returned the error:\n%s'%err)
70 document
.append(error
)
75 # merge adjoining Text nodes:
76 for node
in document
.traverse(nodes
.TextElement
):
77 children
= node
.children
79 while i
+1 < len(children
):
80 if (isinstance(children
[i
], nodes
.Text
)
81 and isinstance(children
[i
+1], nodes
.Text
)):
82 children
[i
] = nodes
.Text(children
[i
]+children
.pop(i
+1))
83 children
[i
].parent
= node
87 # add "code" class argument to inline literal (code spans)
88 for node
in document
.traverse(lambda n
: isinstance(n
,
89 (nodes
.literal
, nodes
.literal_block
))):
90 node
['classes'].append('code')
91 # move "language" argument to classes
92 for node
in document
.traverse(nodes
.literal_block
):
93 if 'language' in node
.attributes
:
94 node
['classes'].append(node
['language'])
97 # remove empty target nodes
98 for node
in document
.traverse(nodes
.target
):
100 node
['names'] = [v
for v
in node
['names'] if v
]
101 if node
.children
or [v
for v
in node
.attributes
.values() if v
]:
103 node
.parent
.remove(node
)
105 # replace raw nodes if raw is not allowed
106 if not document
.settings
.raw_enabled
:
107 for node
in document
.traverse(nodes
.raw
):
108 warning
= document
.reporter
.warning('Raw content disabled.')
109 node
.parent
.replace(node
, warning
)
112 for node
in document
.traverse(nodes
.section
):
113 # remove spurious IDs (first may be from duplicate name)
114 if len(node
['ids']) > 1:
117 section_level
= self
.get_section_level(node
)
118 if node
['level'] != section_level
:
119 warning
= document
.reporter
.warning(
120 'Title level inconsistent. Changing from %d to %d.'
121 %(node
['level'], section_level
),
122 nodes
.literal_block('', node
[0].astext()))
123 node
.insert(1, warning
)
124 # remove non-standard attribute "level"
125 del node
['level'] # TODO: store the original md level somewhere
127 def get_section_level(self
, node
):
131 if isinstance(node
, nodes
.document
):
133 if isinstance(node
, nodes
.section
):