tools/db2html.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2017  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Prototype for builtin docbook processing
  23
  24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26
  27 TODO: convert the docbook-xml to html
  28 - try macros for the navigation
  29
  30 Requirements:
  31 sudo pip3 install anytree jinja2 lxml
  32
  33 Examples:
  34 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
  35 ll tests/gobject/docs/db2html
  36 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
  37 ll tests/bugs/docs/db2html
  38 """
  39
  40 import argparse
  41 import errno
  42 import logging
  43 import os
  44 import sys
  45
  46 from anytree import Node
  47 from jinja2 import Template
  48 from lxml import etree
  49
  50 # http://www.sagehill.net/docbookxsl/Chunking.html
  51 CHUNK_TAGS = [
  52     'appendix',
  53     'article',
  54     'bibliography',  # in article or book
  55     'book',
  56     'chapter',
  57     'colophon',
  58     'glossary',      # in article or book
  59     'index',         # in article or book
  60     'part',
  61     'preface',
  62     'refentry',
  63     'reference',
  64     'sect1',         # except first
  65     'section',       # if equivalent to sect1
  66     'set',
  67     'setindex',
  68 ]
  69
  70 # TODO: look up the abbrevs and hierarchy for other tags
  71 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
  72 CHUNK_NAMING = {
  73     'book': {
  74         'prefix': 'bk',
  75         'count': 0,
  76         'parent': None,
  77     },
  78     'chapter': {
  79         'prefix': 'ch',
  80         'count': 0,
  81         'parent': 'book'
  82     },
  83     'index': {
  84         'prefix': 'ix',
  85         'count': 0,
  86         'parent': 'book'
  87     },
  88     'sect1': {
  89         'prefix': 's',
  90         'count': 0,
  91         'parent': 'chapter',
  92     },
  93     'section': {
  94         'prefix': 's',
  95         'count': 0,
  96         'parent': 'chapter',
  97     },
  98 }
  99
 100 DOCTYPE = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
 101
 102 BOOK_TEMPLATE = DOCTYPE + """
 103 <html>
 104 <head>
 105 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 106 <title>{{ xpath('./bookinfo/title/text()') }}</title>
 107 </head>
 108 <body>
 109 </body>
 110 </html>
 111 """
 112
 113 TEMPLATES = {
 114     'book': Template(BOOK_TEMPLATE),
 115 }
 116
 117
 118 def gen_chunk_name(node):
 119     if 'id' in node.attrib:
 120         return node.attrib['id']
 121
 122     tag = node.tag
 123     if tag not in CHUNK_NAMING:
 124         CHUNK_NAMING[tag] = {
 125             'prefix': node.tag[:2],
 126             'count': 0
 127         }
 128         logging.warning('Add CHUNK_NAMING for "%s"', tag)
 129
 130     naming = CHUNK_NAMING[tag]
 131     naming['count'] += 1
 132     name = ('%s%02d' % (naming['prefix'], naming['count']))
 133     # handle parents to make names of nested tags unique
 134     # TODO: we only need to prepend the parent if there are > 1 of them in the
 135     #       xml
 136     # while naming['parent']:
 137     #     parent = naming['parent']
 138     #     if parent not in CHUNK_NAMING:
 139     #         break;
 140     #     naming = CHUNK_NAMING[parent]
 141     #     name = ('%s%02d' % (naming['prefix'], naming['count'])) + name
 142     return name
 143
 144
 145 def chunk(out_dir, xml_node, parent=None):
 146     """Chunk the tree.
 147
 148     The first time, we're called with parent=None and in that case we return
 149     the new_node as the root of the tree
 150     """
 151     # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
 152     if xml_node.tag in CHUNK_TAGS:
 153         base = gen_chunk_name(xml_node) + '.html'
 154         out_filename = os.path.join(out_dir, base)
 155         # print('*** %s ***' % (out_filename))
 156         # TODO: do we need to remove the xml-node from the parent?
 157         #       we generate toc from the files tree
 158         # from copy import deepcopy
 159         # ..., xml=deepcopy(xml_node), ...
 160         # xml_node.getparent().remove(xml_node)
 161         parent = Node(xml_node.tag, parent=parent, xml=xml_node, filename=out_filename)
 162     for child in xml_node:
 163         chunk(out_dir, child, parent)
 164
 165     return parent
 166
 167
 168 def convert(node):
 169     """Convert the docbook chunks to html files."""
 170
 171     logging.info('Writing: %s', node.filename)
 172     with open(node.filename, 'wt') as html:
 173         if node.name in TEMPLATES:
 174             # TODO: ideally precomiple common xpath exprs once:
 175             #   func = etree.XPath("//b")
 176             #   func(xml_node)[0]
 177             def lxml_xpath(expr):
 178                 return node.xml.xpath(expr, smart_strings=False)[0]
 179
 180             template = TEMPLATES[node.name]
 181             template.globals['xpath'] = lxml_xpath
 182             # TODO: extract from xml
 183             params = {
 184             }
 185             html.write(template.render(**params))
 186         else:
 187             logging.warning('Add template for "%s"', node.name)
 188
 189     for child in node.children:
 190         convert(child)
 191
 192
 193 def main(index_file):
 194     tree = etree.parse(index_file)
 195     tree.xinclude()
 196
 197     dir_name = os.path.dirname(index_file)
 198
 199     # for testing: dump to output file
 200     # out_file = os.path.join(dir_name, 'db2html.xml')
 201     # tree.write(out_file)
 202
 203     # TODO: rename to 'html' later on
 204     out_dir = os.path.join(dir_name, 'db2html')
 205     try:
 206         os.mkdir(out_dir)
 207     except OSError as e:
 208         if e.errno != errno.EEXIST:
 209             raise
 210
 211     # We need two passes:
 212     # 1) recursively walk the tree and chunk it into a python tree so that we
 213     #   can generate navigation and link tags
 214     files = chunk(out_dir, tree.getroot())
 215     # 2) walk the tree and output files
 216     # TODO: iterate with the anytree iterator and use multiprocessing
 217     convert(files)
 218
 219
 220 if __name__ == '__main__':
 221     parser = argparse.ArgumentParser(
 222         description='db2html - chunk docbook')
 223     parser.add_argument('sources', nargs='*')
 224     options = parser.parse_args()
 225     if len(options.sources) != 1:
 226         sys.exit('Expect one source file argument.')
 227
 228     log_level = os.environ.get('GTKDOC_TRACE')
 229     if log_level == '':
 230         log_level = 'INFO'
 231     if log_level:
 232         logging.basicConfig(stream=sys.stdout,
 233                             level=logging.getLevelName(log_level.upper()),
 234                             format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
 235
 236     sys.exit(main(options.sources[0]))