2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2017 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Prototype for builtin docbook processing
24 The tool loaded the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
27 TODO: convert the docbook-xml to html
28 - more templates or maybe don't use jinja2 at all
29 - refentry/index nav headers
30 - check each docbook tag if it can contain #PCDATA, if not don't check for
32 - integrate syntax-highlighing from fixxref
33 - maybe handle the combination <informalexample><programlisting> directly
34 - switch to http://pygments.org/docs/quickstart/?
35 - integrate MakeXRef from fixxref
36 - first create devhelp2 output
39 - minify html: https://pypi.python.org/pypi/htmlmin/
42 sudo pip3 install anytree jinja2 lxml
45 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
46 ll tests/gobject/docs/db2html
48 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
49 ll tests/bugs/docs/db2html
50 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
51 xdg-open tests/bugs/docs/db2html/index.html
52 meld tests/bugs/docs/{html,db2html}
55 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
64 from anytree
import Node
, PreOrderIter
65 from jinja2
import Environment
, FileSystemLoader
66 from lxml
import etree
68 # TODO(ensonic): requires gtk-doc to be installed, rewrite later
69 sys
.path
.append('/usr/share/gtk-doc/python')
70 from gtkdoc
.fixxref
import NoLinks
73 # http://www.sagehill.net/docbookxsl/Chunking.html
77 'bibliography', # in article or book
81 'glossary', # in article or book
82 'index', # in article or book
87 'sect1', # except first
88 'section', # if equivalent to sect1
94 class ChunkParams(object):
95 def __init__(self
, prefix
, parent
=None):
101 # TODO: look up the abbrevs and hierarchy for other tags
102 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
104 'book': ChunkParams('bk'),
105 'chapter': ChunkParams('ch', 'book'),
106 'index': ChunkParams('ix', 'book'),
107 'sect1': ChunkParams('s', 'chapter'),
108 'section': ChunkParams('s', 'chapter'),
112 'book': etree
.XPath('./bookinfo/title/text()'),
113 'chapter': etree
.XPath('./title/text()'),
114 'index': etree
.XPath('./title/text()'),
115 'refentry': etree
.XPath('./refmeta/refentrytitle/text()'),
119 TOOL_PATH
= os
.path
.dirname(os
.path
.abspath(__file__
))
120 TEMPLATE_ENV
= Environment(
121 # loader=PackageLoader('gtkdoc', 'templates'),
122 # autoescape=select_autoescape(['html', 'xml'])
123 loader
=FileSystemLoader(os
.path
.join(TOOL_PATH
, 'templates')),
124 # extensions=['jinja2.ext.do'],
131 'book': TEMPLATE_ENV
.get_template('book.html'),
132 'index': TEMPLATE_ENV
.get_template('index.html'),
133 'refentry': TEMPLATE_ENV
.get_template('refentry.html'),
137 def gen_chunk_name(node
):
138 if 'id' in node
.attrib
:
139 return node
.attrib
['id']
142 if tag
not in CHUNK_PARAMS
:
143 CHUNK_PARAMS
[tag
] = ChunkParams(node
.tag
[:2])
144 logging
.warning('Add CHUNK_PARAMS for "%s"', tag
)
146 naming
= CHUNK_PARAMS
[tag
]
148 name
= ('%s%02d' % (naming
.prefix
, naming
.count
))
149 # handle parents to make names of nested tags unique
150 # TODO: we only need to prepend the parent if there are > 1 of them in the
152 # while naming.parent:
153 # parent = naming.parent
154 # if parent not in CHUNK_PARAMS:
156 # naming = CHUNK_PARAMS[parent]
157 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
161 def get_chunk_title(node
):
163 if tag
not in TITLE_XPATH
:
164 logging
.warning('Add TITLE_XPATH for "%s"', tag
)
167 xpath
= TITLE_XPATH
[tag
]
168 return xpath(node
, smart_strings
=False)[0]
171 def chunk(xml_node
, parent
=None):
174 The first time, we're called with parent=None and in that case we return
175 the new_node as the root of the tree
177 # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
178 if xml_node
.tag
in CHUNK_TAGS
:
179 # TODO: do we need to remove the xml-node from the parent?
180 # we generate toc from the files tree
181 # from copy import deepcopy
182 # sub_tree = deepcopy(xml_node)
183 # xml_node.getparent().remove(xml_node)
185 # sub_tree = etree.ElementTree(xml_node).getroot()
186 parent
= Node(xml_node
.tag
, parent
=parent
, xml
=xml_node
,
187 filename
=gen_chunk_name(xml_node
) + '.html',
188 title
=get_chunk_title(xml_node
))
189 for child
in xml_node
:
197 def escape_entities(text
):
198 return text
.replace('&', '&').replace('<', '<').replace('>', '>')
201 def convert_inner(xml
, result
):
203 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(child
))
206 def convert_ignore(xml
):
213 def convert__unknown(xml
):
215 if xml
.tag
not in missing_tags
:
216 logging
.warning('Add tag converter for "%s"', xml
.tag
)
217 missing_tags
[xml
.tag
] = True
218 result
= ['<!-- ' + xml
.tag
+ '-->\n']
219 convert_inner(xml
, result
)
220 result
.append('<!-- /' + xml
.tag
+ '-->\n')
224 def convert_refsect(xml
, h_tag
, inner_func
=convert_inner
):
225 result
= ['<div class="%s">\n' % xml
.tag
]
226 title
= xml
.find('title')
227 if title
is not None:
228 if 'id' in xml
.attrib
:
229 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
230 result
.append('<%s>%s</%s>' % (h_tag
, title
.text
, h_tag
))
233 result
.append(xml
.text
)
234 inner_func(xml
, result
)
235 result
.append('</div>')
237 result
.append(xml
.tail
)
244 def convert_colspec(xml
):
248 result
.append(' class="%s"' % a
['colname'])
250 result
.append(' width="%s"' % a
['colwidth'])
252 # is in tgroup and there can be no 'text'
256 def convert_div(xml
):
257 result
= ['<div class="%s">\n' % xml
.tag
]
259 result
.append(xml
.text
)
260 convert_inner(xml
, result
)
261 result
.append('</div>')
263 result
.append(xml
.tail
)
267 def convert_em_class(xml
):
268 result
= ['<em class="%s"><code>' % xml
.tag
]
270 result
.append(xml
.text
)
271 convert_inner(xml
, result
)
272 result
.append('</code></em>')
274 result
.append(xml
.tail
)
278 def convert_entry(xml
):
280 if 'role' in xml
.attrib
:
281 result
.append(' class="%s">' % xml
.attrib
['role'])
285 result
.append(xml
.text
)
286 convert_inner(xml
, result
)
287 result
.append('</td>')
289 result
.append(xml
.tail
)
293 def convert_informaltable(xml
):
294 result
= ['<div class="informaltable"><table class="informaltable"']
296 if 'pgwide' in a
and a
['pgwide'] == '1':
297 result
.append(' width="100%"')
298 if 'frame' in a
and a
['frame'] == 'none':
299 result
.append(' border="0"')
301 convert_inner(xml
, result
)
302 result
.append('</table></div>')
304 result
.append(xml
.tail
)
308 def convert_itemizedlist(xml
):
309 result
= ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
310 convert_inner(xml
, result
)
311 result
.append('</ul></div>')
313 result
.append(xml
.tail
)
317 def convert_link(xml
):
318 # TODO: inline more fixxref functionality
319 # TODO: need to build an 'id' map and resolve against internal links too
320 linkend
= xml
.attrib
['linkend']
321 if linkend
in NoLinks
:
325 result
= ['<!-- GTKDOCLINK HREF="%s" -->' % linkend
]
327 result
.append(xml
.text
)
328 convert_inner(xml
, result
)
330 result
.append('<!-- /GTKDOCLINK -->')
332 result
.append(xml
.tail
)
336 def convert_listitem(xml
):
337 result
= ['<li class="listitem">']
338 convert_inner(xml
, result
)
339 result
.append('</li>')
340 # is in itemizedlist and there can be no 'text'
344 def convert_literal(xml
):
345 result
= ['<code class="%s">' % xml
.tag
]
347 result
.append(xml
.text
)
348 convert_inner(xml
, result
)
349 result
.append('</code>')
351 result
.append(xml
.tail
)
355 def convert_para(xml
):
357 if xml
.tag
!= 'para':
358 result
= ['<p class="%s">' % xml
.tag
]
360 result
.append(xml
.text
)
361 convert_inner(xml
, result
)
362 result
.append('</p>')
364 result
.append(xml
.tail
)
368 def convert_phrase(xml
):
370 if 'role' in xml
.attrib
:
371 result
.append(' class="%s">' % xml
.attrib
['role'])
375 result
.append(xml
.text
)
376 convert_inner(xml
, result
)
377 result
.append('</span>')
379 result
.append(xml
.tail
)
383 def convert_programlisting(xml
):
384 result
= ['<pre class="programlisting">']
386 result
.append(escape_entities(xml
.text
))
387 convert_inner(xml
, result
)
388 result
.append('</pre>')
390 result
.append(xml
.tail
)
394 def convert_refsect1(xml
):
395 # Add a divider between two consequitive refsect2
396 def convert_inner(xml
, result
):
399 if child
.tag
== 'refsect2' and prev
is not None and prev
.tag
== child
.tag
:
400 result
.append('<hr>\n')
401 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(child
))
403 return convert_refsect(xml
, 'h2', convert_inner
)
406 def convert_refsect2(xml
):
407 return convert_refsect(xml
, 'h3')
410 def convert_refsect3(xml
):
411 return convert_refsect(xml
, 'h4')
414 def convert_row(xml
):
416 convert_inner(xml
, result
)
417 result
.append('</tr>\n')
421 def convert_span(xml
):
422 result
= ['<span class="%s">' % xml
.tag
]
424 result
.append(xml
.text
)
425 convert_inner(xml
, result
)
426 result
.append('</span>')
428 result
.append(xml
.tail
)
432 def convert_tbody(xml
):
434 convert_inner(xml
, result
)
435 result
.append('</tbody>')
436 # is in tgroup and there can be no 'text'
440 def convert_tgroup(xml
):
441 # tgroup does not expand to anything, but the nested colspecs need to
442 # be put into a colgroup
443 cols
= xml
.findall('colspec')
446 result
.append('<colgroup>\n')
448 result
.extend(convert_colspec(col
))
450 result
.append('</colgroup>\n')
451 convert_inner(xml
, result
)
452 # is in informaltable and there can be no 'text'
456 def convert_ulink(xml
):
457 result
= ['<a class="%s" href="%s">%s</a>' % (xml
.tag
, xml
.attrib
['url'], xml
.text
)]
459 result
.append(xml
.tail
)
464 'colspec': convert_colspec
,
465 'entry': convert_entry
,
466 'function': convert_span
,
467 'indexterm': convert_ignore
,
468 'informalexample': convert_div
,
469 'informaltable': convert_informaltable
,
470 'itemizedlist': convert_itemizedlist
,
471 'link': convert_link
,
472 'listitem': convert_listitem
,
473 'literal': convert_literal
,
474 'para': convert_para
,
475 'parameter': convert_em_class
,
476 'phrase': convert_phrase
,
477 'programlisting': convert_programlisting
,
478 'releaseinfo': convert_para
,
479 'refsect1': convert_refsect1
,
480 'refsect2': convert_refsect2
,
481 'refsect3': convert_refsect3
,
482 'returnvalue': convert_span
,
484 'structfield': convert_em_class
,
485 'tbody': convert_tbody
,
486 'tgroup': convert_tgroup
,
487 'type': convert_span
,
488 'ulink': convert_ulink
,
489 'warning': convert_div
,
493 def convert(out_dir
, files
, node
):
494 """Convert the docbook chunks to a html file."""
496 def jinja_convert(xml
):
497 return ''.join(convert_tags
.get(xml
.tag
, convert__unknown
)(xml
))
499 logging
.info('Writing: %s', node
.filename
)
500 with
open(os
.path
.join(out_dir
, node
.filename
), 'wt') as html
:
501 if node
.name
in TEMPLATES
:
502 # TODO: ideally precompile common xpath exprs once:
503 # func = etree.XPath('//b')
505 # unused, we can call api :)
506 # def lxml_xpath_str0(xml, expr):
507 # return xml.xpath(expr, smart_strings=False)[0]
509 # def lxml_xpath(xml, expr):
510 # return xml.xpath(expr)
512 template
= TEMPLATES
[node
.name
]
513 template
.globals['convert_block'] = jinja_convert
517 'nav_home': node
.root
,
519 if 'id' in node
.xml
.attrib
:
520 params
['id'] = node
.xml
.attrib
['id']
523 logging
.warning('No top-level "id" for "%s"', node
.xml
.tag
)
524 # nav params: up, prev, next
526 params
['nav_up'] = node
.parent
527 ix
= files
.index(node
)
529 params
['nav_prev'] = files
[ix
- 1]
530 if ix
< len(files
) - 1:
531 params
['nav_next'] = files
[ix
+ 1]
533 # TODO: call a top-level python converter instead
534 # generate_{book,chapter,index,refentry}(files, node)
536 # We need to rewrite all other converters to take
537 # (xml, files, node) or (xml, params)
538 # where params is sort of like what we have above
540 html
.write(template
.render(**params
))
542 logging
.warning('Add template for "%s"', node
.name
)
545 def main(index_file
):
546 tree
= etree
.parse(index_file
)
549 dir_name
= os
.path
.dirname(index_file
)
551 # for testing: dump to output file
552 # out_file = os.path.join(dir_name, 'db2html.xml')
553 # tree.write(out_file)
555 # TODO: rename to 'html' later on
556 out_dir
= os
.path
.join(dir_name
, 'db2html')
560 if e
.errno
!= errno
.EEXIST
:
563 # We need multiple passes:
564 # 1) recursively walk the tree and chunk it into a python tree so that we
565 # can generate navigation and link tags.
566 # also collect all 'id' attributes on the way and build map of
567 # id:rel-link (in fixxref is is Links[])
568 files
= chunk(tree
.getroot())
569 # 2) iterate the tree and output files
570 # TODO: use multiprocessing
571 files
= list(PreOrderIter(files
))
573 convert(out_dir
, files
, node
)
574 # 3) create a devhelp2.xsl
575 # - toc under 'chapter'
576 # - keywords under 'functions' from all refsect2 and refsect3
579 if __name__
== '__main__':
580 parser
= argparse
.ArgumentParser(
581 description
='db2html - chunk docbook')
582 parser
.add_argument('sources', nargs
='*')
583 options
= parser
.parse_args()
584 if len(options
.sources
) != 1:
585 sys
.exit('Expect one source file argument.')
587 log_level
= os
.environ
.get('GTKDOC_TRACE')
591 logging
.basicConfig(stream
=sys
.stdout
,
592 level
=logging
.getLevelName(log_level
.upper()),
593 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
595 sys
.exit(main(options
.sources
[0]))