Clean up parser code.
[docutils.git] / docutils / docutils / parsers / recommonmark_wrapper.py
blob0a7fef5e105248c60a1b3d439df2ad78da4227f3
1 #!/usr/bin/env python
2 # -*- coding: utf8 -*-
3 # :Copyright: © 2020 Günter Milde.
4 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
6 # Copying and distribution of this file, with or without modification,
7 # are permitted in any medium without royalty provided the copyright
8 # notice and this notice are preserved.
9 # This file is offered as-is, without any warranty.
11 # .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
13 # Revision: $Revision$
14 # Date: $Date$
15 """
16 A parser for CommonMark MarkDown text using `recommonmark`__.
18 __ https://pypi.org/project/recommonmark/
19 """
21 import docutils.parsers
22 from docutils import nodes, Component
24 try:
25 from recommonmark.parser import CommonMarkParser
26 from recommonmark.transform import AutoStructify
27 except ImportError as err:
28 CommonMarkParser = None
29 class Parser(docutils.parsers.Parser):
30 def parse(self, inputstring, document):
31 error = document.reporter.warning(
32 'Missing dependency: MarkDown input is processed by a 3rd '
33 'party parser but Python did not find the required module '
34 '"recommonmark" (https://pypi.org/project/recommonmark/).')
35 document.append(error)
38 if CommonMarkParser:
39 class Parser(CommonMarkParser):
40 """MarkDown parser based on recommonmark."""
41 # TODO: settings for AutoStructify
42 # settings_spec = docutils.parsers.Parser.settings_spec + (
43 # see https://recommonmark.readthedocs.io/en/latest/#autostructify
45 supported = ('recommonmark', 'commonmark',
46 'markdown', 'md')
47 config_section = 'recommonmark parser'
48 config_section_dependencies = ('parsers',)
50 # def get_transforms(self):
51 # return Component.get_transforms(self) + [AutoStructify]
53 def parse(self, inputstring, document):
54 """Use the upstream parser and clean up afterwards.
55 """
56 # check for exorbitantly long lines
57 for i, line in enumerate(inputstring.split('\n')):
58 if len(line) > document.settings.line_length_limit:
59 error = document.reporter.error(
60 'Line %d exceeds the line-length-limit.'%(i+1))
61 document.append(error)
62 return
64 # pass to upstream parser
65 try:
66 CommonMarkParser.parse(self, inputstring, document)
67 except Exception as err:
68 error = document.reporter.error('Parsing with "recommonmark" '
69 'returned the error:\n%s'%err)
70 document.append(error)
72 # Post-Processing
73 # ---------------
75 # merge adjoining Text nodes:
76 for node in document.traverse(nodes.TextElement):
77 children = node.children
78 i = 0
79 while i+1 < len(children):
80 if (isinstance(children[i], nodes.Text)
81 and isinstance(children[i+1], nodes.Text)):
82 children[i] = nodes.Text(children[i]+children.pop(i+1))
83 children[i].parent = node
84 else:
85 i += 1
87 # add "code" class argument to inline literal (code spans)
88 for node in document.traverse(lambda n: isinstance(n,
89 (nodes.literal, nodes.literal_block))):
90 node['classes'].append('code')
91 # move "language" argument to classes
92 for node in document.traverse(nodes.literal_block):
93 if 'language' in node.attributes:
94 node['classes'].append(node['language'])
95 del node['language']
97 # remove empty target nodes
98 for node in document.traverse(nodes.target):
99 # remove empty name
100 node['names'] = [v for v in node['names'] if v]
101 if node.children or [v for v in node.attributes.values() if v]:
102 continue
103 node.parent.remove(node)
105 # replace raw nodes if raw is not allowed
106 if not document.settings.raw_enabled:
107 for node in document.traverse(nodes.raw):
108 warning = document.reporter.warning('Raw content disabled.')
109 node.parent.replace(node, warning)
111 # fix section nodes
112 for node in document.traverse(nodes.section):
113 # remove spurious IDs (first may be from duplicate name)
114 if len(node['ids']) > 1:
115 node['ids'].pop()
116 # fix section levels
117 section_level = self.get_section_level(node)
118 if node['level'] != section_level:
119 warning = document.reporter.warning(
120 'Title level inconsistent. Changing from %d to %d.'
121 %(node['level'], section_level),
122 nodes.literal_block('', node[0].astext()))
123 node.insert(1, warning)
124 # remove non-standard attribute "level"
125 del node['level'] # TODO: store the original md level somewhere
127 def get_section_level(self, node):
128 level = 1
129 while True:
130 node = node.parent
131 if isinstance(node, nodes.document):
132 return level
133 if isinstance(node, nodes.section):
134 level += 1