db2html: make navigation work
[gtk-doc.git] / tools / db2html.py
blob552d00dcfeb71ec8a3c25c516fa57d550b0078f6
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Prototype for builtin docbook processing
24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
27 TODO: convert the docbook-xml to html
28 - more templates
29 - toc
31 Requirements:
32 sudo pip3 install anytree jinja2 lxml
34 Examples:
35 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
36 ll tests/gobject/docs/db2html
38 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
39 ll tests/bugs/docs/db2html
40 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
41 xdg-open tests/bugs/docs/db2html/index.html
42 meld tests/bugs/docs/{html,db2html}
44 Benchmarking:
45 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
46 """
48 import argparse
49 import errno
50 import logging
51 import os
52 import sys
54 from anytree import Node, PreOrderIter
55 from jinja2 import Environment, FileSystemLoader
56 from lxml import etree
59 # http://www.sagehill.net/docbookxsl/Chunking.html
60 CHUNK_TAGS = [
61 'appendix',
62 'article',
63 'bibliography', # in article or book
64 'book',
65 'chapter',
66 'colophon',
67 'glossary', # in article or book
68 'index', # in article or book
69 'part',
70 'preface',
71 'refentry',
72 'reference',
73 'sect1', # except first
74 'section', # if equivalent to sect1
75 'set',
76 'setindex',
80 class ChunkParams(object):
81 def __init__(self, prefix, parent=None):
82 self.prefix = prefix
83 self.parent = None
84 self.count = 0
87 # TODO: look up the abbrevs and hierarchy for other tags
88 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
89 CHUNK_PARAMS = {
90 'book': ChunkParams('bk'),
91 'chapter': ChunkParams('ch', 'book'),
92 'index': ChunkParams('ix', 'book'),
93 'sect1': ChunkParams('s', 'chapter'),
94 'section': ChunkParams('s', 'chapter'),
97 TITLE_XPATH = {
98 'book': etree.XPath('//bookinfo/title/text()'),
99 'chapter': etree.XPath('//chapter/title/text()'),
100 'index': etree.XPath('//index/title/text()'),
101 'refentry': etree.XPath('//refentry/refmeta/refentrytitle/text()'),
104 # Jinja2 templates
105 TOOL_PATH = os.path.dirname(os.path.abspath(__file__))
106 TEMPLATE_ENV = Environment(
107 # loader=PackageLoader('gtkdoc', 'templates'),
108 # autoescape=select_autoescape(['html', 'xml'])
109 loader=FileSystemLoader(os.path.join(TOOL_PATH, 'templates')),
110 autoescape=False,
111 trim_blocks=True,
114 TEMPLATES = {
115 'book': TEMPLATE_ENV.get_template('book.html'),
116 'refentry': TEMPLATE_ENV.get_template('refentry.html'),
120 def gen_chunk_name(node):
121 if 'id' in node.attrib:
122 return node.attrib['id']
124 tag = node.tag
125 if tag not in CHUNK_PARAMS:
126 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
127 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
129 naming = CHUNK_PARAMS[tag]
130 naming.count += 1
131 name = ('%s%02d' % (naming.prefix, naming.count))
132 # handle parents to make names of nested tags unique
133 # TODO: we only need to prepend the parent if there are > 1 of them in the
134 # xml
135 # while naming.parent:
136 # parent = naming.parent
137 # if parent not in CHUNK_PARAMS:
138 # break;
139 # naming = CHUNK_PARAMS[parent]
140 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
141 return name
144 def get_chunk_title(node):
145 tag = node.tag
146 if tag not in TITLE_XPATH:
147 logging.warning('Add TITLE_XPATH for "%s"', tag)
148 return ''
150 xpath = TITLE_XPATH[tag]
151 return xpath(node, smart_strings=False)[0]
154 def chunk(xml_node, parent=None):
155 """Chunk the tree.
157 The first time, we're called with parent=None and in that case we return
158 the new_node as the root of the tree
160 # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
161 if xml_node.tag in CHUNK_TAGS:
162 # TODO: do we need to remove the xml-node from the parent?
163 # we generate toc from the files tree
164 # from copy import deepcopy
165 # ..., xml=deepcopy(xml_node), ...
166 # xml_node.getparent().remove(xml_node)
167 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
168 filename=gen_chunk_name(xml_node) + '.html',
169 title=get_chunk_title(xml_node))
170 for child in xml_node:
171 chunk(child, parent)
173 return parent
176 def convert(out_dir, files, node):
177 """Convert the docbook chunks to a html file."""
179 logging.info('Writing: %s', node.filename)
180 with open(os.path.join(out_dir, node.filename), 'wt') as html:
181 if node.name in TEMPLATES:
182 # TODO: ideally precomiple common xpath exprs once:
183 # func = etree.XPath('//b')
184 # func(xml_node)[0]
185 def lxml_xpath(expr):
186 return node.xml.xpath(expr, smart_strings=False)[0]
188 template = TEMPLATES[node.name]
189 template.globals['xpath'] = lxml_xpath
190 params = {
191 'title': node.title,
192 'nav_home': node.root,
194 # nav params: up, prev, next
195 if node.parent:
196 params['nav_up'] = node.parent
197 ix = files.index(node)
198 if ix > 0:
199 params['nav_prev'] = files[ix - 1]
200 if ix < len(files) - 1:
201 params['nav_next'] = files[ix + 1]
203 html.write(template.render(**params))
204 else:
205 logging.warning('Add template for "%s"', node.name)
208 def main(index_file):
209 tree = etree.parse(index_file)
210 tree.xinclude()
212 dir_name = os.path.dirname(index_file)
214 # for testing: dump to output file
215 # out_file = os.path.join(dir_name, 'db2html.xml')
216 # tree.write(out_file)
218 # TODO: rename to 'html' later on
219 out_dir = os.path.join(dir_name, 'db2html')
220 try:
221 os.mkdir(out_dir)
222 except OSError as e:
223 if e.errno != errno.EEXIST:
224 raise
226 # We need two passes:
227 # 1) recursively walk the tree and chunk it into a python tree so that we
228 # can generate navigation and link tags
229 files = chunk(tree.getroot())
230 # 2) iterate the tree and output files
231 # TODO: use multiprocessing
232 files = list(PreOrderIter(files))
233 for node in files:
234 convert(out_dir, files, node)
237 if __name__ == '__main__':
238 parser = argparse.ArgumentParser(
239 description='db2html - chunk docbook')
240 parser.add_argument('sources', nargs='*')
241 options = parser.parse_args()
242 if len(options.sources) != 1:
243 sys.exit('Expect one source file argument.')
245 log_level = os.environ.get('GTKDOC_TRACE')
246 if log_level == '':
247 log_level = 'INFO'
248 if log_level:
249 logging.basicConfig(stream=sys.stdout,
250 level=logging.getLevelName(log_level.upper()),
251 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
253 sys.exit(main(options.sources[0]))