tools/db2html.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2017  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Prototype for builtin docbook processing
  23
  24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26
  27 TODO: convert the docbook-xml to html
  28 - more templates
  29 - toc
  30
  31 Requirements:
  32 sudo pip3 install anytree jinja2 lxml
  33
  34 Examples:
  35 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
  36 ll tests/gobject/docs/db2html
  37
  38 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
  39 ll tests/bugs/docs/db2html
  40 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
  41 xdg-open tests/bugs/docs/db2html/index.html
  42 meld tests/bugs/docs/{html,db2html}
  43
  44 Benchmarking:
  45 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
  46 """
  47
  48 import argparse
  49 import errno
  50 import logging
  51 import os
  52 import sys
  53
  54 from anytree import Node, PreOrderIter
  55 from jinja2 import Environment, FileSystemLoader
  56 from lxml import etree
  57
  58
  59 # http://www.sagehill.net/docbookxsl/Chunking.html
  60 CHUNK_TAGS = [
  61     'appendix',
  62     'article',
  63     'bibliography',  # in article or book
  64     'book',
  65     'chapter',
  66     'colophon',
  67     'glossary',      # in article or book
  68     'index',         # in article or book
  69     'part',
  70     'preface',
  71     'refentry',
  72     'reference',
  73     'sect1',         # except first
  74     'section',       # if equivalent to sect1
  75     'set',
  76     'setindex',
  77 ]
  78
  79
  80 class ChunkParams(object):
  81     def __init__(self, prefix, parent=None):
  82         self.prefix = prefix
  83         self.parent = None
  84         self.count = 0
  85
  86
  87 # TODO: look up the abbrevs and hierarchy for other tags
  88 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
  89 CHUNK_PARAMS = {
  90     'book': ChunkParams('bk'),
  91     'chapter': ChunkParams('ch', 'book'),
  92     'index': ChunkParams('ix', 'book'),
  93     'sect1': ChunkParams('s', 'chapter'),
  94     'section': ChunkParams('s', 'chapter'),
  95 }
  96
  97 TITLE_XPATH = {
  98     'book': etree.XPath('//bookinfo/title/text()'),
  99     'chapter': etree.XPath('//chapter/title/text()'),
 100     'index': etree.XPath('//index/title/text()'),
 101     'refentry': etree.XPath('//refentry/refmeta/refentrytitle/text()'),
 102 }
 103
 104 # Jinja2 templates
 105 TOOL_PATH = os.path.dirname(os.path.abspath(__file__))
 106 TEMPLATE_ENV = Environment(
 107     # loader=PackageLoader('gtkdoc', 'templates'),
 108     # autoescape=select_autoescape(['html', 'xml'])
 109     loader=FileSystemLoader(os.path.join(TOOL_PATH, 'templates')),
 110     autoescape=False,
 111     trim_blocks=True,
 112 )
 113
 114 TEMPLATES = {
 115     'book': TEMPLATE_ENV.get_template('book.html'),
 116     'refentry': TEMPLATE_ENV.get_template('refentry.html'),
 117 }
 118
 119
 120 def gen_chunk_name(node):
 121     if 'id' in node.attrib:
 122         return node.attrib['id']
 123
 124     tag = node.tag
 125     if tag not in CHUNK_PARAMS:
 126         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 127         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 128
 129     naming = CHUNK_PARAMS[tag]
 130     naming.count += 1
 131     name = ('%s%02d' % (naming.prefix, naming.count))
 132     # handle parents to make names of nested tags unique
 133     # TODO: we only need to prepend the parent if there are > 1 of them in the
 134     #       xml
 135     # while naming.parent:
 136     #     parent = naming.parent
 137     #     if parent not in CHUNK_PARAMS:
 138     #         break;
 139     #     naming = CHUNK_PARAMS[parent]
 140     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 141     return name
 142
 143
 144 def get_chunk_title(node):
 145     tag = node.tag
 146     if tag not in TITLE_XPATH:
 147         logging.warning('Add TITLE_XPATH for "%s"', tag)
 148         return ''
 149
 150     xpath = TITLE_XPATH[tag]
 151     return xpath(node, smart_strings=False)[0]
 152
 153
 154 def chunk(xml_node, parent=None):
 155     """Chunk the tree.
 156
 157     The first time, we're called with parent=None and in that case we return
 158     the new_node as the root of the tree
 159     """
 160     # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
 161     if xml_node.tag in CHUNK_TAGS:
 162         # TODO: do we need to remove the xml-node from the parent?
 163         #       we generate toc from the files tree
 164         # from copy import deepcopy
 165         # ..., xml=deepcopy(xml_node), ...
 166         # xml_node.getparent().remove(xml_node)
 167         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 168                       filename=gen_chunk_name(xml_node) + '.html',
 169                       title=get_chunk_title(xml_node))
 170     for child in xml_node:
 171         chunk(child, parent)
 172
 173     return parent
 174
 175
 176 def convert(out_dir, files, node):
 177     """Convert the docbook chunks to a html file."""
 178
 179     logging.info('Writing: %s', node.filename)
 180     with open(os.path.join(out_dir, node.filename), 'wt') as html:
 181         if node.name in TEMPLATES:
 182             # TODO: ideally precomiple common xpath exprs once:
 183             #   func = etree.XPath('//b')
 184             #   func(xml_node)[0]
 185             def lxml_xpath(expr):
 186                 return node.xml.xpath(expr, smart_strings=False)[0]
 187
 188             template = TEMPLATES[node.name]
 189             template.globals['xpath'] = lxml_xpath
 190             params = {
 191                 'title': node.title,
 192                 'nav_home': node.root,
 193             }
 194             # nav params: up, prev, next
 195             if node.parent:
 196                 params['nav_up'] = node.parent
 197             ix = files.index(node)
 198             if ix > 0:
 199                 params['nav_prev'] = files[ix - 1]
 200             if ix < len(files) - 1:
 201                 params['nav_next'] = files[ix + 1]
 202
 203             html.write(template.render(**params))
 204         else:
 205             logging.warning('Add template for "%s"', node.name)
 206
 207
 208 def main(index_file):
 209     tree = etree.parse(index_file)
 210     tree.xinclude()
 211
 212     dir_name = os.path.dirname(index_file)
 213
 214     # for testing: dump to output file
 215     # out_file = os.path.join(dir_name, 'db2html.xml')
 216     # tree.write(out_file)
 217
 218     # TODO: rename to 'html' later on
 219     out_dir = os.path.join(dir_name, 'db2html')
 220     try:
 221         os.mkdir(out_dir)
 222     except OSError as e:
 223         if e.errno != errno.EEXIST:
 224             raise
 225
 226     # We need two passes:
 227     # 1) recursively walk the tree and chunk it into a python tree so that we
 228     #   can generate navigation and link tags
 229     files = chunk(tree.getroot())
 230     # 2) iterate the tree and output files
 231     # TODO: use multiprocessing
 232     files = list(PreOrderIter(files))
 233     for node in files:
 234         convert(out_dir, files, node)
 235
 236
 237 if __name__ == '__main__':
 238     parser = argparse.ArgumentParser(
 239         description='db2html - chunk docbook')
 240     parser.add_argument('sources', nargs='*')
 241     options = parser.parse_args()
 242     if len(options.sources) != 1:
 243         sys.exit('Expect one source file argument.')
 244
 245     log_level = os.environ.get('GTKDOC_TRACE')
 246     if log_level == '':
 247         log_level = 'INFO'
 248     if log_level:
 249         logging.basicConfig(stream=sys.stdout,
 250                             level=logging.getLevelName(log_level.upper()),
 251                             format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
 252
 253     sys.exit(main(options.sources[0]))