2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Prototype for builtin docbook processing
24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
27 TODO: convert the docbook-xml to html
29 - refentry/index nav headers
30 - for refsect, we need a 'long-title' that also contains refpurpose
31 - figure how to deal with all the possible docbook
32 - how can we report 'unhandled' data
33 - we need a generic transform for everything in a para (and others like
35 - this will walk the tree and replace nodes to convert from docbook to html
36 - we can start with 1:1, but most likely each transform will be a function
37 that mangles the sub tree and recurses for certain children (kind of what
41 - minify html: https://pypi.python.org/pypi/htmlmin/
44 sudo pip3 install anytree jinja2 lxml
47 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
48 ll tests/gobject/docs/db2html
50 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
51 ll tests/bugs/docs/db2html
52 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
53 xdg-open tests/bugs/docs/db2html/index.html
54 meld tests/bugs/docs/{html,db2html}
57 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
66 from anytree
import Node
, PreOrderIter
67 from jinja2
import Environment
, FileSystemLoader
68 from lxml
import etree
71 # http://www.sagehill.net/docbookxsl/Chunking.html
75 'bibliography', # in article or book
79 'glossary', # in article or book
80 'index', # in article or book
85 'sect1', # except first
86 'section', # if equivalent to sect1
92 class ChunkParams(object):
93 def __init__(self
, prefix
, parent
=None):
99 # TODO: look up the abbrevs and hierarchy for other tags
100 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
102 'book': ChunkParams('bk'),
103 'chapter': ChunkParams('ch', 'book'),
104 'index': ChunkParams('ix', 'book'),
105 'sect1': ChunkParams('s', 'chapter'),
106 'section': ChunkParams('s', 'chapter'),
110 'book': etree
.XPath('./bookinfo/title/text()'),
111 'chapter': etree
.XPath('./title/text()'),
112 'index': etree
.XPath('./title/text()'),
113 'refentry': etree
.XPath('./refmeta/refentrytitle/text()'),
117 TOOL_PATH
= os
.path
.dirname(os
.path
.abspath(__file__
))
118 TEMPLATE_ENV
= Environment(
119 # loader=PackageLoader('gtkdoc', 'templates'),
120 # autoescape=select_autoescape(['html', 'xml'])
121 loader
=FileSystemLoader(os
.path
.join(TOOL_PATH
, 'templates')),
127 'book': TEMPLATE_ENV
.get_template('book.html'),
128 'index': TEMPLATE_ENV
.get_template('index.html'),
129 'refentry': TEMPLATE_ENV
.get_template('refentry.html'),
133 def gen_chunk_name(node
):
134 if 'id' in node
.attrib
:
135 return node
.attrib
['id']
138 if tag
not in CHUNK_PARAMS
:
139 CHUNK_PARAMS
[tag
] = ChunkParams(node
.tag
[:2])
140 logging
.warning('Add CHUNK_PARAMS for "%s"', tag
)
142 naming
= CHUNK_PARAMS
[tag
]
144 name
= ('%s%02d' % (naming
.prefix
, naming
.count
))
145 # handle parents to make names of nested tags unique
146 # TODO: we only need to prepend the parent if there are > 1 of them in the
148 # while naming.parent:
149 # parent = naming.parent
150 # if parent not in CHUNK_PARAMS:
152 # naming = CHUNK_PARAMS[parent]
153 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
157 def get_chunk_title(node
):
159 if tag
not in TITLE_XPATH
:
160 logging
.warning('Add TITLE_XPATH for "%s"', tag
)
163 xpath
= TITLE_XPATH
[tag
]
164 return xpath(node
, smart_strings
=False)[0]
167 def chunk(xml_node
, parent
=None):
170 The first time, we're called with parent=None and in that case we return
171 the new_node as the root of the tree
173 # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
174 if xml_node
.tag
in CHUNK_TAGS
:
175 # TODO: do we need to remove the xml-node from the parent?
176 # we generate toc from the files tree
177 # from copy import deepcopy
178 # sub_tree = deepcopy(xml_node)
179 # xml_node.getparent().remove(xml_node)
181 # sub_tree = etree.ElementTree(xml_node).getroot()
182 parent
= Node(xml_node
.tag
, parent
=parent
, xml
=xml_node
,
183 filename
=gen_chunk_name(xml_node
) + '.html',
184 title
=get_chunk_title(xml_node
))
185 for child
in xml_node
:
191 def convert__inner(xml
):
194 result
+= convert_tags
.get(child
.tag
)(child
)
198 def convert__unknown(xml
):
199 logging
.warning('Add tag converter for "%s"', xml
.tag
)
200 return '<!-- ' + xml
.tag
+ '-->\n'
203 def convert_para(xml
):
205 if xml
.tag
!= 'para':
206 result
= '<p class="%s">' % xml
.tag
209 result
+= convert__inner(xml
)
216 def convert_ulink(xml
):
218 result
= '<a class="%s" href="%s">%s</a>' % (xml
.tag
, url
, url
)
223 'para': convert_para
,
224 'ulink': convert_ulink
,
228 def convert(out_dir
, files
, node
):
229 """Convert the docbook chunks to a html file."""
231 logging
.info('Writing: %s', node
.filename
)
232 with
open(os
.path
.join(out_dir
, node
.filename
), 'wt') as html
:
233 if node
.name
in TEMPLATES
:
234 # TODO: ideally precomiple common xpath exprs once:
235 # func = etree.XPath('//b')
237 # unused, we can call api :)
238 # def lxml_xpath_str0(xml, expr):
239 # return xml.xpath(expr, smart_strings=False)[0]
241 # def lxml_xpath(xml, expr):
242 # return xml.xpath(expr)
244 template
= TEMPLATES
[node
.name
]
245 template
.globals['convert_para'] = convert_para
249 'nav_home': node
.root
,
251 if 'id' in node
.xml
.attrib
:
252 params
['id'] = node
.xml
.attrib
['id']
255 logging
.warning('No top-level "id" for "%s"', node
.xml
.tag
)
256 # nav params: up, prev, next
258 params
['nav_up'] = node
.parent
259 ix
= files
.index(node
)
261 params
['nav_prev'] = files
[ix
- 1]
262 if ix
< len(files
) - 1:
263 params
['nav_next'] = files
[ix
+ 1]
266 # TODO: extract into functions?
267 if node
.name
== 'book':
268 params
['toc'] = node
.root
269 elif node
.name
== 'refsect':
270 # TODO: toc params from xml
271 # all refsect1 + refsect1/title/text() from xml
274 html
.write(template
.render(**params
))
276 logging
.warning('Add template for "%s"', node
.name
)
279 def main(index_file
):
280 tree
= etree
.parse(index_file
)
283 dir_name
= os
.path
.dirname(index_file
)
285 # for testing: dump to output file
286 # out_file = os.path.join(dir_name, 'db2html.xml')
287 # tree.write(out_file)
289 # TODO: rename to 'html' later on
290 out_dir
= os
.path
.join(dir_name
, 'db2html')
294 if e
.errno
!= errno
.EEXIST
:
297 # We need two passes:
298 # 1) recursively walk the tree and chunk it into a python tree so that we
299 # can generate navigation and link tags
300 files
= chunk(tree
.getroot())
301 # 2) iterate the tree and output files
302 # TODO: use multiprocessing
303 files
= list(PreOrderIter(files
))
305 convert(out_dir
, files
, node
)
308 if __name__
== '__main__':
309 parser
= argparse
.ArgumentParser(
310 description
='db2html - chunk docbook')
311 parser
.add_argument('sources', nargs
='*')
312 options
= parser
.parse_args()
313 if len(options
.sources
) != 1:
314 sys
.exit('Expect one source file argument.')
316 log_level
= os
.environ
.get('GTKDOC_TRACE')
320 logging
.basicConfig(stream
=sys
.stdout
,
321 level
=logging
.getLevelName(log_level
.upper()),
322 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
324 sys
.exit(main(options
.sources
[0]))