db2html: add basic recursive block converter
[gtk-doc.git] / tools / db2html.py
blobab485d20a5f4872227ed6c6090d6fd190005a8f1
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Prototype for builtin docbook processing
24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
27 TODO: convert the docbook-xml to html
28 - more templates
29 - refentry/index nav headers
30 - for refsect, we need a 'long-title' that also contains refpurpose
31 - figure how to deal with all the possible docbook
32 - how can we report 'unhandled' data
33 - we need a generic transform for everything in a para (and others like
34 releaseinfo)
35 - this will walk the tree and replace nodes to convert from docbook to html
36 - we can start with 1:1, but most likely each transform will be a function
37 that mangles the sub tree and recurses for certain children (kind of what
38 xslt does)
40 OPTIONAL:
41 - minify html: https://pypi.python.org/pypi/htmlmin/
43 Requirements:
44 sudo pip3 install anytree jinja2 lxml
46 Examples:
47 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
48 ll tests/gobject/docs/db2html
50 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
51 ll tests/bugs/docs/db2html
52 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
53 xdg-open tests/bugs/docs/db2html/index.html
54 meld tests/bugs/docs/{html,db2html}
56 Benchmarking:
57 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
58 """
60 import argparse
61 import errno
62 import logging
63 import os
64 import sys
66 from anytree import Node, PreOrderIter
67 from jinja2 import Environment, FileSystemLoader
68 from lxml import etree
71 # http://www.sagehill.net/docbookxsl/Chunking.html
72 CHUNK_TAGS = [
73 'appendix',
74 'article',
75 'bibliography', # in article or book
76 'book',
77 'chapter',
78 'colophon',
79 'glossary', # in article or book
80 'index', # in article or book
81 'part',
82 'preface',
83 'refentry',
84 'reference',
85 'sect1', # except first
86 'section', # if equivalent to sect1
87 'set',
88 'setindex',
92 class ChunkParams(object):
93 def __init__(self, prefix, parent=None):
94 self.prefix = prefix
95 self.parent = None
96 self.count = 0
99 # TODO: look up the abbrevs and hierarchy for other tags
100 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
101 CHUNK_PARAMS = {
102 'book': ChunkParams('bk'),
103 'chapter': ChunkParams('ch', 'book'),
104 'index': ChunkParams('ix', 'book'),
105 'sect1': ChunkParams('s', 'chapter'),
106 'section': ChunkParams('s', 'chapter'),
109 TITLE_XPATH = {
110 'book': etree.XPath('./bookinfo/title/text()'),
111 'chapter': etree.XPath('./title/text()'),
112 'index': etree.XPath('./title/text()'),
113 'refentry': etree.XPath('./refmeta/refentrytitle/text()'),
116 # Jinja2 templates
117 TOOL_PATH = os.path.dirname(os.path.abspath(__file__))
118 TEMPLATE_ENV = Environment(
119 # loader=PackageLoader('gtkdoc', 'templates'),
120 # autoescape=select_autoescape(['html', 'xml'])
121 loader=FileSystemLoader(os.path.join(TOOL_PATH, 'templates')),
122 autoescape=False,
123 trim_blocks=True,
126 TEMPLATES = {
127 'book': TEMPLATE_ENV.get_template('book.html'),
128 'index': TEMPLATE_ENV.get_template('index.html'),
129 'refentry': TEMPLATE_ENV.get_template('refentry.html'),
133 def gen_chunk_name(node):
134 if 'id' in node.attrib:
135 return node.attrib['id']
137 tag = node.tag
138 if tag not in CHUNK_PARAMS:
139 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
140 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
142 naming = CHUNK_PARAMS[tag]
143 naming.count += 1
144 name = ('%s%02d' % (naming.prefix, naming.count))
145 # handle parents to make names of nested tags unique
146 # TODO: we only need to prepend the parent if there are > 1 of them in the
147 # xml
148 # while naming.parent:
149 # parent = naming.parent
150 # if parent not in CHUNK_PARAMS:
151 # break;
152 # naming = CHUNK_PARAMS[parent]
153 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
154 return name
157 def get_chunk_title(node):
158 tag = node.tag
159 if tag not in TITLE_XPATH:
160 logging.warning('Add TITLE_XPATH for "%s"', tag)
161 return ''
163 xpath = TITLE_XPATH[tag]
164 return xpath(node, smart_strings=False)[0]
167 def chunk(xml_node, parent=None):
168 """Chunk the tree.
170 The first time, we're called with parent=None and in that case we return
171 the new_node as the root of the tree
173 # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
174 if xml_node.tag in CHUNK_TAGS:
175 # TODO: do we need to remove the xml-node from the parent?
176 # we generate toc from the files tree
177 # from copy import deepcopy
178 # sub_tree = deepcopy(xml_node)
179 # xml_node.getparent().remove(xml_node)
180 # # or:
181 # sub_tree = etree.ElementTree(xml_node).getroot()
182 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
183 filename=gen_chunk_name(xml_node) + '.html',
184 title=get_chunk_title(xml_node))
185 for child in xml_node:
186 chunk(child, parent)
188 return parent
191 def convert__inner(xml):
192 result = ''
193 for child in xml:
194 result += convert_tags.get(child.tag)(child)
195 return result
198 def convert__unknown(xml):
199 logging.warning('Add tag converter for "%s"', xml.tag)
200 return '<!-- ' + xml.tag + '-->\n'
203 def convert_para(xml):
204 result = '<p>'
205 if xml.tag != 'para':
206 result = '<p class="%s">' % xml.tag
207 if xml.text:
208 result += xml.text
209 result += convert__inner(xml)
210 result += '\n</p>'
211 if xml.tail:
212 result += xml.tail
213 return result
216 def convert_ulink(xml):
217 url = xml.text
218 result = '<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)
219 return result
222 convert_tags = {
223 'para': convert_para,
224 'ulink': convert_ulink,
228 def convert(out_dir, files, node):
229 """Convert the docbook chunks to a html file."""
231 logging.info('Writing: %s', node.filename)
232 with open(os.path.join(out_dir, node.filename), 'wt') as html:
233 if node.name in TEMPLATES:
234 # TODO: ideally precomiple common xpath exprs once:
235 # func = etree.XPath('//b')
236 # func(xml_node)[0]
237 # unused, we can call api :)
238 # def lxml_xpath_str0(xml, expr):
239 # return xml.xpath(expr, smart_strings=False)[0]
241 # def lxml_xpath(xml, expr):
242 # return xml.xpath(expr)
244 template = TEMPLATES[node.name]
245 template.globals['convert_para'] = convert_para
246 params = {
247 'xml': node.xml,
248 'title': node.title,
249 'nav_home': node.root,
251 if 'id' in node.xml.attrib:
252 params['id'] = node.xml.attrib['id']
253 else:
254 # TODO: generate?
255 logging.warning('No top-level "id" for "%s"', node.xml.tag)
256 # nav params: up, prev, next
257 if node.parent:
258 params['nav_up'] = node.parent
259 ix = files.index(node)
260 if ix > 0:
261 params['nav_prev'] = files[ix - 1]
262 if ix < len(files) - 1:
263 params['nav_next'] = files[ix + 1]
265 # page specific vars
266 # TODO: extract into functions?
267 if node.name == 'book':
268 params['toc'] = node.root
269 elif node.name == 'refsect':
270 # TODO: toc params from xml
271 # all refsect1 + refsect1/title/text() from xml
272 pass
274 html.write(template.render(**params))
275 else:
276 logging.warning('Add template for "%s"', node.name)
279 def main(index_file):
280 tree = etree.parse(index_file)
281 tree.xinclude()
283 dir_name = os.path.dirname(index_file)
285 # for testing: dump to output file
286 # out_file = os.path.join(dir_name, 'db2html.xml')
287 # tree.write(out_file)
289 # TODO: rename to 'html' later on
290 out_dir = os.path.join(dir_name, 'db2html')
291 try:
292 os.mkdir(out_dir)
293 except OSError as e:
294 if e.errno != errno.EEXIST:
295 raise
297 # We need two passes:
298 # 1) recursively walk the tree and chunk it into a python tree so that we
299 # can generate navigation and link tags
300 files = chunk(tree.getroot())
301 # 2) iterate the tree and output files
302 # TODO: use multiprocessing
303 files = list(PreOrderIter(files))
304 for node in files:
305 convert(out_dir, files, node)
308 if __name__ == '__main__':
309 parser = argparse.ArgumentParser(
310 description='db2html - chunk docbook')
311 parser.add_argument('sources', nargs='*')
312 options = parser.parse_args()
313 if len(options.sources) != 1:
314 sys.exit('Expect one source file argument.')
316 log_level = os.environ.get('GTKDOC_TRACE')
317 if log_level == '':
318 log_level = 'INFO'
319 if log_level:
320 logging.basicConfig(stream=sys.stdout,
321 level=logging.getLevelName(log_level.upper()),
322 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
324 sys.exit(main(options.sources[0]))