2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Migrate from inline docbook markup to markdown.
24 The tool converts markup in comments for the given source file(s). If --dry-run
25 is given it would only report that docbook tags were found with exit code 1.
26 To convert interatively one would make a copy of the docs/xml dir, run the
27 migration tool for some sources, rebuild the docs and compare the new xml.
28 If it looks the same (or similar enough), submit the changes and repeat for more
32 python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | sed -e 's/^ *//' | sort | uniq -c | sort -g
40 import xml
.etree
.ElementTree
as ET
43 def print_xml(node
, depth
=0):
45 # print(' ' * depth, node.text)
47 print(' ' * depth
, '<%s %s>' % (child
.tag
, child
.attrib
))
48 print_xml(child
, depth
+ 1)
50 # print(' ' * depth, node.tail)
53 def convert_block(dry_run
, filename
, lines
, beg
, end
):
54 logging
.debug("%s: scan block %d..%d", filename
, beg
, end
)
58 indent
= line
.find('* ')
60 logging
.warning("%s:%d: missing '*' in comment?", filename
, beg
)
68 for ix
in range(beg
, end
):
69 # scan for docbook tags
71 content
+= line
[indent
:]
73 if not re
.search(r
'^\s*\*', line
):
74 logging
.warning("%s:%d: missing '*' in comment?", filename
, ix
)
79 # skip |[ ... ]| and <![CDATA[ ... ]]> blocks
81 if re
.search(end_skip
, line
):
82 logging
.debug("%s:%d: skip code block end", filename
, ix
)
86 if re
.search(r
'\|\[', line
):
87 logging
.debug("%s:%d: skip code block start", filename
, ix
)
90 # if re.search(r'<!\[CDATA\[', line):
91 # logging.debug("%s:%d: skip code block start", filename, ix)
95 # TODO: skip `...` blocks
96 # check for historic non markdown compatible chars
97 if re
.search(r
'\s\*\w+[\s.]', line
):
98 logging
.warning("%s:%d: leading '*' needs escaping: '%s'", filename
, ix
, line
)
99 # if re.search(r'\s\w+\*[\s.]', line):
100 # logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line)
101 if re
.search(r
'\s_\w+[\s.]', line
):
102 logging
.warning("%s:%d: leading '_' needs escaping: '%s'", filename
, ix
, line
)
103 # if re.search(r'\s\w+_[\s.]', line):
104 # logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line)
107 for m
in re
.finditer(r
'<([^>]*)>', line
):
109 tag_name
= tag
.split(' ')[0]
110 # check if it is a valid xml element name
111 if not re
.search(r
'^/?[a-z_:][a-z0-9_:.-]*/?$', tag_name
, re
.I
):
117 # # python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | \
118 # # cut -d':' -f3- | sort | uniq -c | sort -g
119 # print('%s:%d:<%s>' % (filename, ix, tag_name.replace('/', '')))
123 content
= '<gtkdoc>' + content
+ '</gtkdoc>'
124 # TODO: protect |[ ... ]| sections, use CDATA?s
126 root
= ET
.fromstring(content
)
127 except ET
.ParseError
:
134 print('%s:%d:' % (filename
, ix
))
137 # TODO: convert_tags()
143 def convert_file(dry_run
, filename
):
144 """Scan scan a single file.
146 Returns: 0 if no doocbook was found
151 with
open(filename
, 'r', encoding
='utf-8') as f
:
152 lines
= f
.read().split('\n')
154 logging
.debug("%s: read file with %d lines", filename
, len(lines
))
157 for ix
in range(len(lines
)):
159 # logging.debug("%s:%d: %d,%d: %s", filename, ix, beg, end, line)
160 if beg
== -1 and end
== -1:
161 if re
.search(r
'^\s*/\*.*\*/', line
):
163 elif re
.search(r
'^\s*/\*\*(\s|$)', line
):
164 logging
.debug("%s:%d: comment start", filename
, ix
)
166 elif beg
> -1 and end
== -1:
167 if re
.search(r
'^\s*\*+/', line
):
168 logging
.debug("%s:%d: comment end", filename
, ix
)
171 if beg
> -1 and end
> -1:
175 found_docbook
= found_docbook |
convert_block(dry_run
, filename
, lines
, beg
, end
)
181 def main(dry_run
, files
):
182 """Scan for docbook tags in comments. If not in dry_run mode rewrite them as
183 markdown. Report the files that contain(ed) docbook tags.
185 Returns: 0 if no doocbook was found
190 found_docbook
= found_docbook |
convert_file(dry_run
, f
)
194 if __name__
== '__main__':
195 parser
= argparse
.ArgumentParser(
196 description
='db2md - convert docbook in comment to markdown')
197 parser
.add_argument('--dry-run', default
=False, action
='store_true',
198 help='Only print files with docbook comments.')
199 parser
.add_argument('sources', nargs
='*')
200 options
= parser
.parse_args()
201 if len(options
.sources
) == 0:
202 sys
.exit('Too few arguments')
204 log_level
= os
.environ
.get('GTKDOC_TRACE')
208 logging
.basicConfig(stream
=sys
.stdout
,
209 level
=logging
.getLevelName(log_level
.upper()),
210 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
212 sys
.exit(main(options
.dry_run
, options
.sources
))