db2html: more planning
[gtk-doc.git] / tools / db2html.py
blobf7f498d8177015db5fcb85316ffb42ccfc05c309
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Prototype for builtin docbook processing
24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
27 TODO: convert the docbook-xml to html
28 - try macros for the navigation
30 Requirements:
31 sudo pip3 install anytree jinja2 lxml
33 Examples:
34 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
35 ll tests/gobject/docs/db2html
36 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
37 ll tests/bugs/docs/db2html
38 """
40 import argparse
41 import errno
42 import logging
43 import os
44 import sys
46 from anytree import Node
47 from jinja2 import Template
48 from lxml import etree
50 # http://www.sagehill.net/docbookxsl/Chunking.html
51 CHUNK_TAGS = [
52 'appendix',
53 'article',
54 'bibliography', # in article or book
55 'book',
56 'chapter',
57 'colophon',
58 'glossary', # in article or book
59 'index', # in article or book
60 'part',
61 'preface',
62 'refentry',
63 'reference',
64 'sect1', # except first
65 'section', # if equivalent to sect1
66 'set',
67 'setindex',
70 # TODO: look up the abbrevs and hierarchy for other tags
71 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
72 CHUNK_NAMING = {
73 'book': {
74 'prefix': 'bk',
75 'count': 0,
76 'parent': None,
78 'chapter': {
79 'prefix': 'ch',
80 'count': 0,
81 'parent': 'book'
83 'index': {
84 'prefix': 'ix',
85 'count': 0,
86 'parent': 'book'
88 'sect1': {
89 'prefix': 's',
90 'count': 0,
91 'parent': 'chapter',
93 'section': {
94 'prefix': 's',
95 'count': 0,
96 'parent': 'chapter',
100 DOCTYPE = '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
102 BOOK_TEMPLATE = DOCTYPE + """
103 <html>
104 <head>
105 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
106 <title>{{ xpath('./bookinfo/title/text()') }}</title>
107 </head>
108 <body>
109 </body>
110 </html>
113 TEMPLATES = {
114 'book': Template(BOOK_TEMPLATE),
118 def gen_chunk_name(node):
119 if 'id' in node.attrib:
120 return node.attrib['id']
122 tag = node.tag
123 if tag not in CHUNK_NAMING:
124 CHUNK_NAMING[tag] = {
125 'prefix': node.tag[:2],
126 'count': 0
128 logging.warning('Add CHUNK_NAMING for "%s"', tag)
130 naming = CHUNK_NAMING[tag]
131 naming['count'] += 1
132 name = ('%s%02d' % (naming['prefix'], naming['count']))
133 # handle parents to make names of nested tags unique
134 # TODO: we only need to prepend the parent if there are > 1 of them in the
135 # xml
136 # while naming['parent']:
137 # parent = naming['parent']
138 # if parent not in CHUNK_NAMING:
139 # break;
140 # naming = CHUNK_NAMING[parent]
141 # name = ('%s%02d' % (naming['prefix'], naming['count'])) + name
142 return name
145 def chunk(out_dir, xml_node, parent=None):
146 """Chunk the tree.
148 The first time, we're called with parent=None and in that case we return
149 the new_node as the root of the tree
151 # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
152 if xml_node.tag in CHUNK_TAGS:
153 base = gen_chunk_name(xml_node) + '.html'
154 out_filename = os.path.join(out_dir, base)
155 # print('*** %s ***' % (out_filename))
156 # TODO: do we need to remove the xml-node from the parent?
157 # we generate toc from the files tree
158 # from copy import deepcopy
159 # ..., xml=deepcopy(xml_node), ...
160 # xml_node.getparent().remove(xml_node)
161 parent = Node(xml_node.tag, parent=parent, xml=xml_node, filename=out_filename)
162 for child in xml_node:
163 chunk(out_dir, child, parent)
165 return parent
168 def convert(node):
169 """Convert the docbook chunks to html files."""
171 logging.info('Writing: %s', node.filename)
172 with open(node.filename, 'wt') as html:
173 if node.name in TEMPLATES:
174 # TODO: ideally precomiple common xpath exprs once:
175 # func = etree.XPath("//b")
176 # func(xml_node)[0]
177 def lxml_xpath(expr):
178 return node.xml.xpath(expr, smart_strings=False)[0]
180 template = TEMPLATES[node.name]
181 template.globals['xpath'] = lxml_xpath
182 # TODO: extract from xml
183 params = {
185 html.write(template.render(**params))
186 else:
187 logging.warning('Add template for "%s"', node.name)
189 for child in node.children:
190 convert(child)
193 def main(index_file):
194 tree = etree.parse(index_file)
195 tree.xinclude()
197 dir_name = os.path.dirname(index_file)
199 # for testing: dump to output file
200 # out_file = os.path.join(dir_name, 'db2html.xml')
201 # tree.write(out_file)
203 # TODO: rename to 'html' later on
204 out_dir = os.path.join(dir_name, 'db2html')
205 try:
206 os.mkdir(out_dir)
207 except OSError as e:
208 if e.errno != errno.EEXIST:
209 raise
211 # We need two passes:
212 # 1) recursively walk the tree and chunk it into a python tree so that we
213 # can generate navigation and link tags
214 files = chunk(out_dir, tree.getroot())
215 # 2) walk the tree and output files
216 # TODO: iterate with the anytree iterator and use multiprocessing
217 convert(files)
220 if __name__ == '__main__':
221 parser = argparse.ArgumentParser(
222 description='db2html - chunk docbook')
223 parser.add_argument('sources', nargs='*')
224 options = parser.parse_args()
225 if len(options.sources) != 1:
226 sys.exit('Expect one source file argument.')
228 log_level = os.environ.get('GTKDOC_TRACE')
229 if log_level == '':
230 log_level = 'INFO'
231 if log_level:
232 logging.basicConfig(stream=sys.stdout,
233 level=logging.getLevelName(log_level.upper()),
234 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
236 sys.exit(main(options.sources[0]))