tools/db2md.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2017  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Migrate from inline docbook markup to markdown.
  23
  24 The tool converts markup in comments for the given source file(s). If --dry-run
  25 is given it would only report that docbook tags were found with exit code 1.
  26 To convert interatively one would make a copy of the docs/xml dir, run the
  27 migration tool for some sources, rebuild the docs and compare the new xml.
  28 If it looks the same (or similar enough), submit the changes and repeat for more
  29 files.
  30
  31 Examples:
  32 python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | sed -e 's/^ *//' | sort | uniq -c | sort -g
  33 """
  34
  35 import argparse
  36 import logging
  37 import os
  38 import re
  39 import sys
  40 import xml.etree.ElementTree as ET
  41
  42
  43 def print_xml(node, depth=0):
  44     # if node.text:
  45     #     print('  ' * depth, node.text)
  46     for child in node:
  47         print('  ' * depth, '<%s %s>' % (child.tag, child.attrib))
  48         print_xml(child, depth + 1)
  49     # if node.tail:
  50     #     print('  ' * depth, node.tail)
  51
  52
  53 def convert_block(dry_run, filename, lines, beg, end):
  54     logging.debug("%s: scan block %d..%d", filename, beg, end)
  55
  56     # get indentation
  57     line = lines[beg]
  58     indent = line.find('* ')
  59     if indent == -1:
  60         logging.warning("%s:%d: missing '*' in comment?", filename, beg)
  61         return 0
  62
  63     indent += 2
  64
  65     found_docbook = 0
  66     end_skip = None
  67     content = ''
  68     for ix in range(beg, end):
  69         # scan for docbook tags
  70         line = lines[ix]
  71         content += line[indent:]
  72
  73         if not re.search(r'^\s*\*', line):
  74             logging.warning("%s:%d: missing '*' in comment?", filename, ix)
  75             continue
  76
  77         line = line[indent:]
  78
  79         # skip |[ ... ]| and <![CDATA[ ...  ]]> blocks
  80         if end_skip:
  81             if re.search(end_skip, line):
  82                 logging.debug("%s:%d: skip code block end", filename, ix)
  83                 end_skip = None
  84             continue
  85         else:
  86             if re.search(r'\|\[', line):
  87                 logging.debug("%s:%d: skip code block start", filename, ix)
  88                 end_skip = r'\]\|'
  89                 continue
  90             # if re.search(r'<!\[CDATA\[', line):
  91             #     logging.debug("%s:%d: skip code block start", filename, ix)
  92             #     end_skip = r'\]\]>'
  93             #     continue
  94
  95         # TODO: skip `...` blocks
  96         # check for historic non markdown compatible chars
  97         if re.search(r'\s\*\w+[\s.]', line):
  98             logging.warning("%s:%d: leading '*' needs escaping: '%s'", filename, ix, line)
  99         # if re.search(r'\s\w+\*[\s.]', line):
 100         #     logging.warning("%s:%d: trailing '*' needs escaping: '%s'", filename, ix, line)
 101         if re.search(r'\s_\w+[\s.]', line):
 102             logging.warning("%s:%d: leading '_' needs escaping: '%s'", filename, ix, line)
 103         # if re.search(r'\s\w+_[\s.]', line):
 104         #     logging.warning("%s:%d: trailing '_' needs escaping: '%s'", filename, ix, line)
 105
 106         # look for docbook
 107         for m in re.finditer(r'<([^>]*)>', line):
 108             tag = m.group(1)
 109             tag_name = tag.split(' ')[0]
 110             # check if it is a valid xml element name
 111             if not re.search(r'^/?[a-z_:][a-z0-9_:.-]*/?$', tag_name, re.I):
 112                 continue
 113
 114             found_docbook = 1
 115             break
 116             # if dry_run:
 117             #     # python3 tools/db2md.py --dry-run tests/*/src/*.{c,h} | \
 118             #     #   cut -d':' -f3- | sort | uniq -c | sort -g
 119             #     print('%s:%d:<%s>' % (filename, ix, tag_name.replace('/', '')))
 120
 121     if found_docbook:
 122         # add a fake root
 123         content = '<gtkdoc>' + content + '</gtkdoc>'
 124         # TODO: protect |[ ... ]| sections, use CDATA?s
 125         try:
 126             root = ET.fromstring(content)
 127         except ET.ParseError:
 128             return 0
 129
 130         if not root:
 131             return 0
 132
 133         if dry_run:
 134             print('%s:%d:' % (filename, ix))
 135             print_xml(root)
 136         else:
 137             # TODO: convert_tags()
 138             pass
 139
 140     return found_docbook
 141
 142
 143 def convert_file(dry_run, filename):
 144     """Scan scan a single file.
 145
 146     Returns: 0 if no doocbook was found
 147     """
 148
 149     found_docbook = 0
 150     lines = None
 151     with open(filename, 'r', encoding='utf-8') as f:
 152         lines = f.read().split('\n')
 153
 154     logging.debug("%s: read file with %d lines", filename, len(lines))
 155
 156     beg = end = -1
 157     for ix in range(len(lines)):
 158         line = lines[ix]
 159         # logging.debug("%s:%d: %d,%d: %s", filename, ix, beg, end, line)
 160         if beg == -1 and end == -1:
 161             if re.search(r'^\s*/\*.*\*/', line):
 162                 pass
 163             elif re.search(r'^\s*/\*\*(\s|$)', line):
 164                 logging.debug("%s:%d: comment start", filename, ix)
 165                 beg = ix
 166         elif beg > -1 and end == -1:
 167             if re.search(r'^\s*\*+/', line):
 168                 logging.debug("%s:%d: comment end", filename, ix)
 169                 end = ix
 170
 171         if beg > -1 and end > -1:
 172             beg += 1
 173             end -= 1
 174             if beg < end:
 175                 found_docbook = found_docbook | convert_block(dry_run, filename, lines, beg, end)
 176             beg = end = -1
 177
 178     return found_docbook
 179
 180
 181 def main(dry_run, files):
 182     """Scan for docbook tags in comments. If not in dry_run mode rewrite them as
 183     markdown. Report the files that contain(ed) docbook tags.
 184
 185     Returns: 0 if no doocbook was found
 186     """
 187
 188     found_docbook = 0
 189     for f in files:
 190         found_docbook = found_docbook | convert_file(dry_run, f)
 191     return found_docbook
 192
 193
 194 if __name__ == '__main__':
 195     parser = argparse.ArgumentParser(
 196         description='db2md - convert docbook in comment to markdown')
 197     parser.add_argument('--dry-run', default=False, action='store_true',
 198                         help='Only print files with docbook comments.')
 199     parser.add_argument('sources', nargs='*')
 200     options = parser.parse_args()
 201     if len(options.sources) == 0:
 202         sys.exit('Too few arguments')
 203
 204     log_level = os.environ.get('GTKDOC_TRACE')
 205     if log_level == '':
 206         log_level = 'INFO'
 207     if log_level:
 208         logging.basicConfig(stream=sys.stdout,
 209                             level=logging.getLevelName(log_level.upper()),
 210                             format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
 211
 212     sys.exit(main(options.dry_run, options.sources))