db2html: remove text handling for some tags
[gtk-doc.git] / tools / db2md.py
blob7a6c23fa60b7c67de5e363f2434892bb8e8db30e
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Migrate from inline docbook markup to markdown.
24 The tool converts markup in comments for the given source file(s). If --dry-run
25 is given it would only report that docbook tags were found with exit code 1.
26 To convert interatively one would make a copy of the docs/xml dir, run the
27 migration tool for some sources, rebuild the docs and compare the new xml.
28 If it looks the same (or similar enough), submit the changes and repeat for more
29 files.
31 Examples:
32 python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | sed -e 's/^ *//' | sort | uniq -c | sort -g
33 """
35 import argparse
36 import logging
37 import os
38 import re
39 import sys
40 import xml.etree.ElementTree as ET
43 def print_xml(node, depth=0):
44 # if node.text:
45 # print(' ' * depth, node.text)
46 for child in node:
47 print(' ' * depth, '<%s %s>' % (child.tag, child.attrib))
48 print_xml(child, depth + 1)
49 # if node.tail:
50 # print(' ' * depth, node.tail)
53 def convert_block(dry_run, filename, lines, beg, end):
54 logging.debug("%s: scan block %d..%d", filename, beg, end)
56 # get indentation
57 line = lines[beg]
58 indent = line.find('* ')
59 if indent == -1:
60 logging.warning("%s:%d: missing '*' in comment?", filename, beg)
61 return 0
63 indent += 2
65 found_docbook = 0
66 end_skip = None
67 content = ''
68 for ix in range(beg, end):
69 # scan for docbook tags
70 line = lines[ix]
71 content += line[indent:]
73 if not re.search(r'^\s*\*', line):
74 logging.warning("%s:%d: missing '*' in comment?", filename, ix)
75 continue
77 line = line[indent:]
79 # skip |[ ... ]| and <![CDATA[ ... ]]> blocks
80 if end_skip:
81 if re.search(end_skip, line):
82 logging.debug("%s:%d: skip code block end", filename, ix)
83 end_skip = None
84 continue
85 else:
86 if re.search(r'\|\[', line):
87 logging.debug("%s:%d: skip code block start", filename, ix)
88 end_skip = r'\]\|'
89 continue
90 # if re.search(r'<!\[CDATA\[', line):
91 # logging.debug("%s:%d: skip code block start", filename, ix)
92 # end_skip = r'\]\]>'
93 # continue
95 # TODO: skip `...` blocks
96 # check for historic non markdown compatible chars
97 if re.search(r'\s\*\w+[\s.]', line):
98 logging.warning("%s:%d: leading '*' needs escaping: '%s'", filename, ix, line)
99 # if re.search(r'\s\w+\*[\s.]', line):
100 # logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line)
101 if re.search(r'\s_\w+[\s.]', line):
102 logging.warning("%s:%d: leading '_' needs escaping: '%s'", filename, ix, line)
103 # if re.search(r'\s\w+_[\s.]', line):
104 # logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line)
106 # look for docbook
107 for m in re.finditer(r'<([^>]*)>', line):
108 tag = m.group(1)
109 tag_name = tag.split(' ')[0]
110 # check if it is a valid xml element name
111 if not re.search(r'^/?[a-z_:][a-z0-9_:.-]*/?$', tag_name, re.I):
112 continue
114 found_docbook = 1
115 break
116 # if dry_run:
117 # # python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | \
118 # # cut -d':' -f3- | sort | uniq -c | sort -g
119 # print('%s:%d:<%s>' % (filename, ix, tag_name.replace('/', '')))
121 if found_docbook:
122 # add a fake root
123 content = '<gtkdoc>' + content + '</gtkdoc>'
124 # TODO: protect |[ ... ]| sections, use CDATA?s
125 try:
126 root = ET.fromstring(content)
127 except ET.ParseError:
128 return 0
130 if not root:
131 return 0
133 if dry_run:
134 print('%s:%d:' % (filename, ix))
135 print_xml(root)
136 else:
137 # TODO: convert_tags()
138 pass
140 return found_docbook
143 def convert_file(dry_run, filename):
144 """Scan scan a single file.
146 Returns: 0 if no doocbook was found
149 found_docbook = 0
150 lines = None
151 with open(filename, 'r', encoding='utf-8') as f:
152 lines = f.read().split('\n')
154 logging.debug("%s: read file with %d lines", filename, len(lines))
156 beg = end = -1
157 for ix in range(len(lines)):
158 line = lines[ix]
159 # logging.debug("%s:%d: %d,%d: %s", filename, ix, beg, end, line)
160 if beg == -1 and end == -1:
161 if re.search(r'^\s*/\*.*\*/', line):
162 pass
163 elif re.search(r'^\s*/\*\*(\s|$)', line):
164 logging.debug("%s:%d: comment start", filename, ix)
165 beg = ix
166 elif beg > -1 and end == -1:
167 if re.search(r'^\s*\*+/', line):
168 logging.debug("%s:%d: comment end", filename, ix)
169 end = ix
171 if beg > -1 and end > -1:
172 beg += 1
173 end -= 1
174 if beg < end:
175 found_docbook = found_docbook | convert_block(dry_run, filename, lines, beg, end)
176 beg = end = -1
178 return found_docbook
181 def main(dry_run, files):
182 """Scan for docbook tags in comments. If not in dry_run mode rewrite them as
183 markdown. Report the files that contain(ed) docbook tags.
185 Returns: 0 if no doocbook was found
188 found_docbook = 0
189 for f in files:
190 found_docbook = found_docbook | convert_file(dry_run, f)
191 return found_docbook
194 if __name__ == '__main__':
195 parser = argparse.ArgumentParser(
196 description='db2md - convert docbook in comment to markdown')
197 parser.add_argument('--dry-run', default=False, action='store_true',
198 help='Only print files with docbook comments.')
199 parser.add_argument('sources', nargs='*')
200 options = parser.parse_args()
201 if len(options.sources) == 0:
202 sys.exit('Too few arguments')
204 log_level = os.environ.get('GTKDOC_TRACE')
205 if log_level == '':
206 log_level = 'INFO'
207 if log_level:
208 logging.basicConfig(stream=sys.stdout,
209 level=logging.getLevelName(log_level.upper()),
210 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
212 sys.exit(main(options.dry_run, options.sources))