mkhtml2: avoid double dict lookups
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob62561291464dc5c08805638b7b56d1f719231ae8
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - 'section'/'simplesect' - the first we convert as a chunk, the nested ones we
40 need to convert as 'sect{2,3,4,...}, we can track depth in 'ctx'
41 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
42 - inside 'glossentry' we're only handling 'glossterm' and 'glossdef'
43 - convert_{figure,table} need counters.
44 - check each docbook tag if it can contain #PCDATA, if not don't check for
45 xml.text/xml.tail and add a comment (# no PCDATA allowed here)
46 - consider some perf-warnings flag
47 - see 'No "id" attribute on'
48 - find a better way to print context for warnings
49 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
50 - consolidate title handling:
51 - always use the titles-dict
52 - there only store what we have (xml, tag, ...)
53 - when chunking generate 'id's and add entries to titles-dict
54 - add accessors for title and raw_title that lazily get them
56 DIFFERENCES:
57 - titles
58 - we add the chunk label to the title in toc, on the page and in nav tooltips
59 - docbook xsl only sometimes adds the label to the titles and when it does it
60 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
61 - navigation
62 - we always add an up-link except on the first page
63 - footer
64 - we're nov omitting the footer
65 - tocs
66 - we always add "Table of Contents' before a toc
67 - docbook does that for some pages, it is configurable
69 OPTIONAL:
70 - minify html: https://pypi.python.org/pypi/htmlmin/
72 Requirements:
73 sudo pip3 install anytree lxml pygments
75 Example invocation:
76 cd tests/bugs/docs/
77 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
78 xdg-open db2html/index.html
79 meld html db2html
81 Benchmarking:
82 cd tests/bugs/docs/;
83 rm html-build.stamp; time make html-build.stamp
84 """
86 import argparse
87 import errno
88 import logging
89 import os
90 import shutil
91 import sys
93 from anytree import Node, PreOrderIter
94 from copy import deepcopy
95 from glob import glob
96 from lxml import etree
97 from pygments import highlight
98 from pygments.lexers import CLexer
99 from pygments.formatters import HtmlFormatter
100 from timeit import default_timer as timer
102 from . import config, fixxref
104 # pygments setup
105 # lazily constructed lexer cache
106 LEXERS = {
107 'c': CLexer()
109 HTML_FORMATTER = HtmlFormatter(nowrap=True)
112 class ChunkParams(object):
113 def __init__(self, prefix, parent=None, min_idx=0):
114 self.prefix = prefix
115 self.parent = parent
116 self.min_idx = min_idx
117 self.idx = 1
120 DONT_CHUNK = float('inf')
121 # docbook-xsl defines the chunk tags here.
122 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
123 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
124 # If not defined, we can just create an example without an 'id' attr and see
125 # docbook xsl does.
127 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
128 # TODO: this list has also a flag that controls wheter we add the
129 # 'Table of Contents' heading in convert_chunk_with_toc()
130 CHUNK_PARAMS = {
131 'appendix': ChunkParams('app', 'book'),
132 'book': ChunkParams('bk'),
133 'chapter': ChunkParams('ch', 'book'),
134 'glossary': ChunkParams('go', 'book'),
135 'index': ChunkParams('ix', 'book'),
136 'part': ChunkParams('pt', 'book'),
137 'preface': ChunkParams('pr', 'book'),
138 'refentry': ChunkParams('re', 'book'),
139 'reference': ChunkParams('rn', 'book'),
140 'sect1': ChunkParams('s', 'chapter', 1),
141 'section': ChunkParams('s', 'chapter', 1),
142 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
143 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
144 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
145 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
147 # TAGS we don't support:
148 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
150 TITLE_XPATHS = {
151 '_': (etree.XPath('./title'), None),
152 'book': (etree.XPath('./bookinfo/title'), None),
153 'refentry': (
154 etree.XPath('./refmeta/refentrytitle'),
155 etree.XPath('./refnamediv/refpurpose')
159 ID_XPATH = etree.XPath('//*[@id]')
161 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
162 glossary = {}
164 footnote_idx = 1
166 # nested dict with subkeys:
167 # title: textual title
168 # tag: chunk tag
169 # xml: title xml node
170 titles = {}
172 # files to copy
173 assets = set()
176 def encode_entities(text):
177 return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
180 def raw_text(xml):
181 return etree.tostring(xml, method="text", encoding=str).strip()
184 def gen_chunk_name(node, chunk_params):
185 """Generate a chunk file name
187 This is either based on the id or on the position in the doc. In the latter
188 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
189 type.
191 idval = node.attrib.get('id')
192 if idval is not None:
193 return idval
195 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
196 chunk_params.idx += 1
198 # handle parents to make names of nested tags like in docbook
199 # - we only need to prepend the parent if there are > 1 of them in the
200 # xml. None, the parents we have are not sufficient, e.g. 'index' can
201 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
202 # when we chunk explicitly and on each level maintain the 'idx'
203 # while chunk_params.parent:
204 # parent = chunk_params.parent
205 # if parent not in CHUNK_PARAMS:
206 # break;
207 # chunk_params = CHUNK_PARAMS[parent]
208 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
210 logging.info('Gen chunk name: "%s"', name)
211 return name
214 def get_chunk_titles(module, node):
215 tag = node.tag
216 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
218 ctx = {
219 'module': module,
220 'files': [],
222 result = {
223 'title': None,
224 'title_tag': None,
225 'subtitle': None,
226 'subtitle_tag': None
228 res = title(node)
229 if res:
230 # handle chunk label for tocs
231 label = node.attrib.get('label')
232 if label:
233 label += '. '
234 else:
235 label = ''
237 xml = res[0]
238 # TODO: consider to eval 'title'/'raw_title' lazily
239 result['title'] = label + ''.join(convert_title(ctx, xml))
240 result['raw_title'] = encode_entities(raw_text(xml))
241 if xml.tag != 'title':
242 result['title_tag'] = xml.tag
243 else:
244 result['title_tag'] = tag
246 if subtitle:
247 res = subtitle(node)
248 if res:
249 xml = res[0]
250 result['subtitle'] = ''.join(convert_title(ctx, xml))
251 result['subtitle_tag'] = xml.tag
252 return result
255 def chunk(xml_node, module, depth=0, idx=0, parent=None):
256 """Chunk the tree.
258 The first time, we're called with parent=None and in that case we return
259 the new_node as the root of the tree. For each tree-node we generate a
260 filename and process the children.
262 tag = xml_node.tag
263 chunk_params = CHUNK_PARAMS.get(tag)
264 if chunk_params:
265 title_args = get_chunk_titles(module, xml_node)
266 chunk_name = gen_chunk_name(xml_node, chunk_params)
268 # check idx to handle 'sect1'/'section' special casing and title-only
269 # segments
270 if idx >= chunk_params.min_idx:
271 logging.info('chunk tag: "%s"[%d]', tag, idx)
272 if parent:
273 # remove the xml-node from the parent
274 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
275 xml_node.getparent().remove(xml_node)
276 xml_node = sub_tree
278 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
279 idx=idx,
280 filename=chunk_name + '.html', anchor=None,
281 **title_args)
282 else:
283 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
284 idx=idx,
285 filename=parent.filename, anchor='#' + chunk_name,
286 **title_args)
288 depth += 1
289 idx = 0
290 for child in xml_node:
291 chunk(child, module, depth, idx, parent)
292 if child.tag in CHUNK_PARAMS:
293 idx += 1
295 return parent
298 def add_id_links_and_titles(files, links):
299 for node in files:
300 chunk_name = node.filename[:-5]
301 chunk_base = node.filename + '#'
302 for elem in ID_XPATH(node.xml):
303 attr = elem.attrib['id']
304 if attr == chunk_name:
305 links[attr] = node.filename
306 else:
307 links[attr] = chunk_base + attr
309 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
310 res = title(elem)
311 if res:
312 xml = res[0]
313 # TODO: consider to eval 'title' lazily
314 titles[attr] = {
315 'title': encode_entities(raw_text(xml)),
316 'xml': xml,
317 'tag': elem.tag,
321 def build_glossary(files):
322 for node in files:
323 if node.xml.tag != 'glossary':
324 continue
325 for term in GLOSSENTRY_XPATH(node.xml):
326 # TODO: there can be all kind of things in a glossary. This only supports
327 # what we commonly use, glossterm is mandatory
328 key_node = term.find('glossterm')
329 val_node = term.find('glossdef')
330 if key_node is not None and val_node is not None:
331 glossary[raw_text(key_node)] = raw_text(val_node)
332 else:
333 debug = []
334 if key_node is None:
335 debug.append('missing key')
336 if val_node is None:
337 debug.append('missing val')
338 logging.warning('Broken glossentry "%s": %s',
339 term.attrib['id'], ','.join(debug))
342 # conversion helpers
345 def convert_inner(ctx, xml, result):
346 for child in xml:
347 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
350 def convert_ignore(ctx, xml):
351 result = []
352 convert_inner(ctx, xml, result)
353 return result
356 def convert_skip(ctx, xml):
357 return []
360 def append_idref(attrib, result):
361 idval = attrib.get('id')
362 if idval is not None:
363 result.append('<a name="%s"></a>' % idval)
366 def append_text(ctx, text, result):
367 if text and ('no-strip' in ctx or text.strip()):
368 result.append(encode_entities(text))
371 missing_tags = {}
374 def convert__unknown(ctx, xml):
375 # don't recurse on subchunks
376 if xml.tag in CHUNK_PARAMS:
377 return []
378 if isinstance(xml, etree._Comment):
379 return ['<!-- ' + xml.text + '-->\n']
380 else:
381 # warn only once
382 if xml.tag not in missing_tags:
383 logging.warning('Add tag converter for "%s"', xml.tag)
384 missing_tags[xml.tag] = True
385 result = ['<!-- ' + xml.tag + '-->\n']
386 convert_inner(ctx, xml, result)
387 result.append('<!-- /' + xml.tag + '-->\n')
388 return result
391 def convert_mediaobject_children(ctx, xml, result):
392 # look for textobject/phrase
393 alt_text = ''
394 textobject = xml.find('textobject')
395 if textobject is not None:
396 phrase = textobject.findtext('phrase')
397 if phrase:
398 alt_text = ' alt="%s"' % phrase
400 # look for imageobject/imagedata
401 imageobject = xml.find('imageobject')
402 if imageobject is not None:
403 imagedata = imageobject.find('imagedata')
404 if imagedata is not None:
405 # TODO(ensonic): warn on missing fileref attr?
406 fileref = imagedata.attrib.get('fileref', '')
407 if fileref:
408 assets.add(fileref)
409 result.append('<img src="%s"%s>' % (fileref, alt_text))
412 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
413 result = ['<div class="%s">\n' % xml.tag]
414 title_tag = xml.find('title')
415 if title_tag is not None:
416 append_idref(xml.attrib, result)
417 result.append('<%s>%s</%s>' % (
418 h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
419 append_text(ctx, xml.text, result)
420 inner_func(ctx, xml, result)
421 result.append('</div>')
422 append_text(ctx, xml.tail, result)
423 return result
426 def xml_get_title(ctx, xml):
427 title_tag = xml.find('title')
428 if title_tag is not None:
429 return ''.join(convert_title(ctx, title_tag))
430 else:
431 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
432 return ''
435 # docbook tags
438 def convert_abstract(ctx, xml):
439 result = ["""<div class="abstract">
440 <p class="title"><b>Abstract</b></p>"""]
441 append_text(ctx, xml.text, result)
442 convert_inner(ctx, xml, result)
443 result.append('</div>')
444 append_text(ctx, xml.tail, result)
445 return result
448 def convert_acronym(ctx, xml):
449 key = xml.text
450 title = glossary.get(key, '')
451 # TODO: print a sensible warning if missing
452 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
453 if xml.tail:
454 result.append(xml.tail)
455 return result
458 def convert_anchor(ctx, xml):
459 return ['<a name="%s"></a>' % xml.attrib['id']]
462 def convert_bookinfo(ctx, xml):
463 result = ['<div class="titlepage">']
464 convert_inner(ctx, xml, result)
465 result.append("""<hr>
466 </div>""")
467 if xml.tail:
468 result.append(xml.tail)
469 return result
472 def convert_blockquote(ctx, xml):
473 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
474 append_text(ctx, xml.text, result)
475 convert_inner(ctx, xml, result)
476 result.append('</blockquote>\n</div>')
477 append_text(ctx, xml.tail, result)
478 return result
481 def convert_code(ctx, xml):
482 result = ['<code class="%s">' % xml.tag]
483 append_text(ctx, xml.text, result)
484 convert_inner(ctx, xml, result)
485 result.append('</code>')
486 append_text(ctx, xml.tail, result)
487 return result
490 def convert_colspec(ctx, xml):
491 result = ['<col']
492 colname = xml.attrib.get('colname')
493 if colname is not None:
494 result.append(' class="%s"' % colname)
495 colwidth = xml.attrib.get('colwidth')
496 if colwidth is not None:
497 result.append(' width="%s"' % colwidth)
498 result.append('>\n')
499 # is in tgroup and there can be no 'text'
500 return result
503 def convert_command(ctx, xml):
504 result = ['<strong class="userinput"><code>']
505 append_text(ctx, xml.text, result)
506 convert_inner(ctx, xml, result)
507 result.append('</code></strong>')
508 append_text(ctx, xml.tail, result)
509 return result
512 def convert_corpauthor(ctx, xml):
513 result = ['<div><h3 class="corpauthor">\n']
514 append_text(ctx, xml.text, result)
515 convert_inner(ctx, xml, result)
516 result.append('</h3></div>\n')
517 append_text(ctx, xml.tail, result)
518 return result
521 def convert_div(ctx, xml):
522 result = ['<div class="%s">\n' % xml.tag]
523 append_text(ctx, xml.text, result)
524 convert_inner(ctx, xml, result)
525 result.append('</div>')
526 append_text(ctx, xml.tail, result)
527 return result
530 def convert_emphasis(ctx, xml):
531 role = xml.attrib.get('role')
532 if role is not None:
533 result = ['<span class="%s">' % role]
534 end = '</span>'
535 else:
536 result = ['<span class="emphasis"><em>']
537 end = '</em></span>'
538 append_text(ctx, xml.text, result)
539 convert_inner(ctx, xml, result)
540 result.append(end)
541 append_text(ctx, xml.tail, result)
542 return result
545 def convert_em(ctx, xml):
546 result = ['<em class="%s">' % xml.tag]
547 append_text(ctx, xml.text, result)
548 convert_inner(ctx, xml, result)
549 result.append('</em>')
550 append_text(ctx, xml.tail, result)
551 return result
554 def convert_em_code(ctx, xml):
555 result = ['<em class="%s"><code>' % xml.tag]
556 append_idref(xml.attrib, result)
557 append_text(ctx, xml.text, result)
558 convert_inner(ctx, xml, result)
559 result.append('</code></em>')
560 append_text(ctx, xml.tail, result)
561 return result
564 def convert_entry(ctx, xml):
565 entry_type = ctx['table.entry']
566 result = ['<' + entry_type]
567 role = xml.attrib.get('role')
568 if role is not None:
569 result.append(' class="%s"' % role)
570 morerows = xml.attrib.get('morerows')
571 if morerows is not None:
572 result.append(' rowspan="%s"' % (1 + int(morerows)))
573 result.append('>')
574 append_text(ctx, xml.text, result)
575 convert_inner(ctx, xml, result)
576 result.append('</' + entry_type + '>')
577 append_text(ctx, xml.tail, result)
578 return result
581 def convert_figure(ctx, xml):
582 result = ['<div class="figure">\n']
583 append_idref(xml.attrib, result)
584 title_tag = xml.find('title')
585 if title_tag is not None:
586 # TODO(ensonic): Add a 'Figure X. ' prefix, needs a figure counter
587 result.append('<p><b>%s</b></p>' % ''.join(convert_title(ctx, title_tag)))
588 result.append('<div class="figure-contents">')
589 # TODO(ensonic): title can become alt on inner 'graphic' element
590 convert_inner(ctx, xml, result)
591 result.append('</div></div><br class="figure-break"/>')
592 append_text(ctx, xml.tail, result)
593 return result
596 def convert_footnote(ctx, xml):
597 footnotes = ctx.get('footnotes', [])
598 # footnotes idx is not per page, but per doc
599 global footnote_idx
600 idx = footnote_idx
601 footnote_idx += 1
603 # need a pair of ids for each footnote (docbook generates different ids)
604 this_id = 'footnote-%d' % idx
605 that_id = 'ftn.' + this_id
607 inner = ['<div id="%s" class="footnote">' % that_id]
608 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
609 this_id, idx))
610 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
611 # get double nested paras :/.
612 # convert_inner(ctx, xml, inner)
613 para = xml.find('para')
614 if para is None:
615 para = xml.find('simpara')
616 if para is not None:
617 inner.append(para.text)
618 else:
619 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline, raw_text(xml))
620 inner.append('</p></div>')
621 footnotes.append(inner)
622 ctx['footnotes'] = footnotes
623 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
624 that_id, this_id, idx)]
627 def convert_formalpara(ctx, xml):
628 result = None
629 title_tag = xml.find('title')
630 result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
631 para_tag = xml.find('para')
632 append_text(ctx, para_tag.text, result)
633 convert_inner(ctx, para_tag, result)
634 append_text(ctx, para_tag.tail, result)
635 result.append('</p>')
636 append_text(ctx, xml.tail, result)
637 return result
640 def convert_glossdef(ctx, xml):
641 result = ['<dd class="glossdef">']
642 convert_inner(ctx, xml, result)
643 result.append('</dd>\n')
644 return result
647 def convert_glossdiv(ctx, xml):
648 title_tag = xml.find('title')
649 title = title_tag.text
650 xml.remove(title_tag)
651 result = [
652 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
654 convert_inner(ctx, xml, result)
655 return result
658 def convert_glossentry(ctx, xml):
659 result = []
660 convert_inner(ctx, xml, result)
661 return result
664 def convert_glossterm(ctx, xml):
665 glossid = ''
666 text = ''
667 anchor = xml.find('anchor')
668 if anchor is not None:
669 glossid = anchor.attrib.get('id', '')
670 text += anchor.tail or ''
671 text += xml.text or ''
672 if glossid == '':
673 glossid = 'glossterm-' + text
674 return [
675 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
676 glossid, text)
680 def convert_graphic(ctx, xml):
681 # TODO(ensonic): warn on missing fileref attr?
682 fileref = xml.attrib.get('fileref', '')
683 if fileref:
684 assets.add(fileref)
685 return ['<div><img src="%s"></div>' % fileref]
688 def convert_indexdiv(ctx, xml):
689 title_tag = xml.find('title')
690 title = title_tag.text
691 xml.remove(title_tag)
692 result = [
693 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
695 convert_inner(ctx, xml, result)
696 return result
699 def convert_informaltable(ctx, xml):
700 result = ['<div class="informaltable"><table class="informaltable"']
701 if xml.attrib.get('pgwide') == '1':
702 result.append(' width="100%"')
703 if xml.attrib.get('frame') == 'none':
704 result.append(' border="0"')
705 result.append('>\n')
706 convert_inner(ctx, xml, result)
707 result.append('</table></div>')
708 if xml.tail:
709 result.append(xml.tail)
710 return result
713 def convert_inlinegraphic(ctx, xml):
714 # TODO(ensonic): warn on missing fileref attr?
715 fileref = xml.attrib.get('fileref', '')
716 if fileref:
717 assets.add(fileref)
718 return ['<img src="%s">' % fileref]
721 def convert_inlinemediaobject(ctx, xml):
722 result = ['<span class="inlinemediaobject">']
723 # no PCDATA allowed here
724 convert_mediaobject_children(ctx, xml, result)
725 result.append('</span>')
726 append_text(ctx, xml.tail, result)
727 return result
730 def convert_itemizedlist(ctx, xml):
731 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
732 convert_inner(ctx, xml, result)
733 result.append('</ul></div>')
734 if xml.tail:
735 result.append(xml.tail)
736 return result
739 def convert_link(ctx, xml):
740 linkend = xml.attrib['linkend']
741 result = []
742 if linkend:
743 link_text = []
744 append_text(ctx, xml.text, link_text)
745 convert_inner(ctx, xml, link_text)
746 text = ''.join(link_text)
748 (tid, href) = fixxref.GetXRef(linkend)
749 if href:
750 title_attr = ''
751 title = titles.get(tid)
752 if title:
753 title_attr = ' title="%s"' % title['title']
755 href = fixxref.MakeRelativeXRef(ctx['module'], href)
756 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
757 else:
758 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
759 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
760 result = [text]
761 else:
762 append_text(ctx, xml.text, result)
763 convert_inner(ctx, xml, result)
764 append_text(ctx, xml.tail, result)
765 return result
768 def convert_listitem(ctx, xml):
769 result = ['<li class="listitem">']
770 convert_inner(ctx, xml, result)
771 result.append('</li>')
772 # no PCDATA allowed here, is in itemizedlist
773 return result
776 def convert_literallayout(ctx, xml):
777 result = ['<div class="literallayout"><p><br>\n']
778 append_text(ctx, xml.text, result)
779 convert_inner(ctx, xml, result)
780 result.append('</p></div>')
781 append_text(ctx, xml.tail, result)
782 return result
785 def convert_mediaobject(ctx, xml):
786 result = ['<div class="mediaobject">\n']
787 # no PCDATA allowed here
788 convert_mediaobject_children(ctx, xml, result)
789 result.append('</div>')
790 append_text(ctx, xml.tail, result)
791 return result
794 def convert_orderedlist(ctx, xml):
795 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
796 convert_inner(ctx, xml, result)
797 result.append('</ol></div>')
798 append_text(ctx, xml.tail, result)
799 return result
802 def convert_para(ctx, xml):
803 result = []
804 role = xml.attrib.get('role')
805 if role is not None:
806 result.append('<p class="%s">' % role)
807 else:
808 result.append('<p>')
809 append_idref(xml.attrib, result)
810 append_text(ctx, xml.text, result)
811 convert_inner(ctx, xml, result)
812 result.append('</p>')
813 append_text(ctx, xml.tail, result)
814 return result
817 def convert_para_like(ctx, xml):
818 result = []
819 append_idref(xml.attrib, result)
820 result.append('<p class="%s">' % xml.tag)
821 append_text(ctx, xml.text, result)
822 convert_inner(ctx, xml, result)
823 result.append('</p>')
824 append_text(ctx, xml.tail, result)
825 return result
828 def convert_phrase(ctx, xml):
829 result = ['<span']
830 role = xml.attrib.get('role')
831 if role is not None:
832 result.append(' class="%s">' % role)
833 else:
834 result.append('>')
835 append_text(ctx, xml.text, result)
836 convert_inner(ctx, xml, result)
837 result.append('</span>')
838 append_text(ctx, xml.tail, result)
839 return result
842 def convert_primaryie(ctx, xml):
843 result = ['<dt>\n']
844 convert_inner(ctx, xml, result)
845 result.append('\n</dt>\n<dd></dd>\n')
846 return result
849 def convert_pre(ctx, xml):
850 # Since we're inside <pre> don't skip newlines
851 ctx['no-strip'] = True
852 result = ['<pre class="%s">' % xml.tag]
853 append_text(ctx, xml.text, result)
854 convert_inner(ctx, xml, result)
855 result.append('</pre>')
856 del ctx['no-strip']
857 append_text(ctx, xml.tail, result)
858 return result
861 def convert_programlisting(ctx, xml):
862 result = []
863 if xml.attrib.get('role', '') == 'example':
864 if xml.text:
865 lang = xml.attrib.get('language', ctx['src-lang']).lower()
866 if lang not in LEXERS:
867 LEXERS[lang] = get_lexer_by_name(lang)
868 lexer = LEXERS.get(lang, None)
869 if lexer:
870 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
872 # we do own line-numbering
873 line_count = highlighted.count('\n')
874 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
875 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
876 <tbody>
877 <tr>
878 <td class="listing_lines" align="right"><pre>%s</pre></td>
879 <td class="listing_code"><pre class="programlisting">%s</pre></td>
880 </tr>
881 </tbody>
882 </table>
883 """ % (source_lines, highlighted))
884 else:
885 logging.warn('No pygments lexer for language="%s"', lang)
886 result.append('<pre class="programlisting">')
887 result.append(xml.text)
888 result.append('</pre>')
889 else:
890 result.append('<pre class="programlisting">')
891 append_text(ctx, xml.text, result)
892 convert_inner(ctx, xml, result)
893 result.append('</pre>')
894 append_text(ctx, xml.tail, result)
895 return result
898 def convert_quote(ctx, xml):
899 result = ['<span class="quote">"<span class="quote">']
900 append_text(ctx, xml.text, result)
901 convert_inner(ctx, xml, result)
902 result.append('</span>"</span>')
903 append_text(ctx, xml.tail, result)
904 return result
907 def convert_refsect1(ctx, xml):
908 # Add a divider between two consequitive refsect2
909 def convert_inner(ctx, xml, result):
910 prev = None
911 for child in xml:
912 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
913 result.append('<hr>\n')
914 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
915 prev = child
916 return convert_sect(ctx, xml, 'h2', convert_inner)
919 def convert_refsect2(ctx, xml):
920 return convert_sect(ctx, xml, 'h3')
923 def convert_refsect3(ctx, xml):
924 return convert_sect(ctx, xml, 'h4')
927 def convert_row(ctx, xml):
928 result = ['<tr>\n']
929 convert_inner(ctx, xml, result)
930 result.append('</tr>\n')
931 return result
934 def convert_sbr(ctx, xml):
935 return ['<br>']
938 def convert_sect1_tag(ctx, xml):
939 return convert_sect(ctx, xml, 'h2')
942 def convert_sect2(ctx, xml):
943 return convert_sect(ctx, xml, 'h3')
946 def convert_sect3(ctx, xml):
947 return convert_sect(ctx, xml, 'h4')
950 def convert_simpara(ctx, xml):
951 result = ['<p>']
952 append_text(ctx, xml.text, result)
953 convert_inner(ctx, xml, result)
954 result.append('</p>')
955 append_text(ctx, xml.tail, result)
956 return result
959 def convert_span(ctx, xml):
960 result = ['<span class="%s">' % xml.tag]
961 append_text(ctx, xml.text, result)
962 convert_inner(ctx, xml, result)
963 result.append('</span>')
964 append_text(ctx, xml.tail, result)
965 return result
968 def convert_table(ctx, xml):
969 result = ['<div class="table">']
970 append_idref(xml.attrib, result)
971 title_tag = xml.find('title')
972 if title_tag is not None:
973 result.append('<p class="title"><b>')
974 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
975 result.extend(convert_title(ctx, title_tag))
976 result.append('</b></p>')
977 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
979 convert_inner(ctx, xml, result)
981 result.append('</table></div></div>')
982 append_text(ctx, xml.tail, result)
983 return result
986 def convert_tag(ctx, xml):
987 classval = xml.attrib.get('class')
988 if classval is not None:
989 result = ['<code class="sgmltag-%s">' % classval]
990 else:
991 result = ['<code>']
992 append_text(ctx, xml.text, result)
993 result.append('</code>')
994 append_text(ctx, xml.tail, result)
995 return result
998 def convert_tbody(ctx, xml):
999 result = ['<tbody>']
1000 ctx['table.entry'] = 'td'
1001 convert_inner(ctx, xml, result)
1002 result.append('</tbody>')
1003 # is in tgroup and there can be no 'text'
1004 return result
1007 def convert_tgroup(ctx, xml):
1008 # tgroup does not expand to anything, but the nested colspecs need to
1009 # be put into a colgroup
1010 cols = xml.findall('colspec')
1011 result = []
1012 if cols:
1013 result.append('<colgroup>\n')
1014 for col in cols:
1015 result.extend(convert_colspec(ctx, col))
1016 xml.remove(col)
1017 result.append('</colgroup>\n')
1018 convert_inner(ctx, xml, result)
1019 # is in informaltable and there can be no 'text'
1020 return result
1023 def convert_thead(ctx, xml):
1024 result = ['<thead>']
1025 ctx['table.entry'] = 'th'
1026 convert_inner(ctx, xml, result)
1027 result.append('</thead>')
1028 # is in tgroup and there can be no 'text'
1029 return result
1032 def convert_title(ctx, xml):
1033 # This is always explicitly called from some context
1034 result = []
1035 append_text(ctx, xml.text, result)
1036 convert_inner(ctx, xml, result)
1037 append_text(ctx, xml.tail, result)
1038 return result
1041 def convert_ulink(ctx, xml):
1042 if xml.text:
1043 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
1044 else:
1045 url = xml.attrib['url']
1046 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
1047 append_text(ctx, xml.tail, result)
1048 return result
1051 def convert_userinput(ctx, xml):
1052 result = ['<span class="command"><strong>']
1053 append_text(ctx, xml.text, result)
1054 convert_inner(ctx, xml, result)
1055 result.append('</strong></span>')
1056 append_text(ctx, xml.tail, result)
1057 return result
1060 def convert_variablelist(ctx, xml):
1061 result = ["""<div class="variablelist"><table border="0" class="variablelist">
1062 <colgroup>
1063 <col align="left" valign="top">
1064 <col>
1065 </colgroup>
1066 <tbody>"""]
1067 convert_inner(ctx, xml, result)
1068 result.append("""</tbody>
1069 </table></div>""")
1070 return result
1073 def convert_varlistentry(ctx, xml):
1074 result = ['<tr>']
1076 result.append('<td><p>')
1077 term = xml.find('term')
1078 result.extend(convert_span(ctx, term))
1079 result.append('</p></td>')
1081 result.append('<td>')
1082 listitem = xml.find('listitem')
1083 convert_inner(ctx, listitem, result)
1084 result.append('</td>')
1086 result.append('<tr>')
1087 return result
1090 def convert_xref(ctx, xml):
1091 result = []
1092 linkend = xml.attrib['linkend']
1093 (tid, href) = fixxref.GetXRef(linkend)
1094 try:
1095 title = titles[tid]
1096 # all sectN need to become 'section
1097 tag = title['tag']
1098 tag = {
1099 'sect1': 'section',
1100 'sect2': 'section',
1101 'sect3': 'section',
1102 'sect4': 'section',
1103 'sect5': 'section',
1104 }.get(tag, tag)
1105 result = [
1106 '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1107 (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1109 except KeyError:
1110 logging.warning('invalid linkend "%s"', tid)
1112 append_text(ctx, xml.tail, result)
1113 return result
1116 # TODO(ensonic): turn into class with converters as functions and ctx as self
1117 convert_tags = {
1118 'abstract': convert_abstract,
1119 'acronym': convert_acronym,
1120 'anchor': convert_anchor,
1121 'application': convert_span,
1122 'bookinfo': convert_bookinfo,
1123 'blockquote': convert_blockquote,
1124 'classname': convert_code,
1125 'caption': convert_div,
1126 'code': convert_code,
1127 'colspec': convert_colspec,
1128 'constant': convert_code,
1129 'command': convert_command,
1130 'corpauthor': convert_corpauthor,
1131 'emphasis': convert_emphasis,
1132 'entry': convert_entry,
1133 'envar': convert_code,
1134 'footnote': convert_footnote,
1135 'figure': convert_figure,
1136 'filename': convert_code,
1137 'firstterm': convert_em,
1138 'formalpara': convert_formalpara,
1139 'function': convert_code,
1140 'glossdef': convert_glossdef,
1141 'glossdiv': convert_glossdiv,
1142 'glossentry': convert_glossentry,
1143 'glossterm': convert_glossterm,
1144 'graphic': convert_graphic,
1145 'indexdiv': convert_indexdiv,
1146 'indexentry': convert_ignore,
1147 'indexterm': convert_skip,
1148 'informalexample': convert_div,
1149 'informaltable': convert_informaltable,
1150 'inlinegraphic': convert_inlinegraphic,
1151 'inlinemediaobject': convert_inlinemediaobject,
1152 'interfacename': convert_code,
1153 'itemizedlist': convert_itemizedlist,
1154 'legalnotice': convert_div,
1155 'link': convert_link,
1156 'listitem': convert_listitem,
1157 'literal': convert_code,
1158 'literallayout': convert_literallayout,
1159 'mediaobject': convert_mediaobject,
1160 'note': convert_div,
1161 'option': convert_code,
1162 'orderedlist': convert_orderedlist,
1163 'para': convert_para,
1164 'partintro': convert_div,
1165 'parameter': convert_em_code,
1166 'phrase': convert_phrase,
1167 'primaryie': convert_primaryie,
1168 'programlisting': convert_programlisting,
1169 'quote': convert_quote,
1170 'releaseinfo': convert_para_like,
1171 'refsect1': convert_refsect1,
1172 'refsect2': convert_refsect2,
1173 'refsect3': convert_refsect3,
1174 'replaceable': convert_em_code,
1175 'returnvalue': convert_span,
1176 'row': convert_row,
1177 'sbr': convert_sbr,
1178 'screen': convert_pre,
1179 'section': convert_sect2, # FIXME: need tracking of nesting
1180 'sect1': convert_sect1_tag,
1181 'sect2': convert_sect2,
1182 'sect3': convert_sect3,
1183 'simpara': convert_simpara,
1184 'simplesect': convert_sect2, # FIXME: need tracking of nesting
1185 'structfield': convert_em_code,
1186 'structname': convert_span,
1187 'synopsis': convert_pre,
1188 'symbol': convert_span,
1189 'table': convert_table,
1190 'tag': convert_tag,
1191 'tbody': convert_tbody,
1192 'term': convert_span,
1193 'tgroup': convert_tgroup,
1194 'thead': convert_thead,
1195 'title': convert_skip,
1196 'type': convert_span,
1197 'ulink': convert_ulink,
1198 'userinput': convert_userinput,
1199 'varname': convert_code,
1200 'variablelist': convert_variablelist,
1201 'varlistentry': convert_varlistentry,
1202 'warning': convert_div,
1203 'xref': convert_xref,
1206 # conversion helpers
1208 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1209 <html>
1210 <head>
1211 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1212 <title>%s</title>
1213 %s<link rel="stylesheet" href="style.css" type="text/css">
1214 </head>
1215 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1219 def generate_head_links(ctx):
1220 n = ctx['nav_home']
1221 result = [
1222 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.raw_title)
1225 n = ctx.get('nav_up')
1226 if n is not None:
1227 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1229 n = ctx.get('nav_prev')
1230 if n is not None:
1231 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1233 n = ctx.get('nav_next')
1234 if n is not None:
1235 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1237 return ''.join(result)
1240 def generate_nav_links(ctx):
1241 n = ctx['nav_home']
1242 result = [
1243 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1246 n = ctx.get('nav_up')
1247 if n is not None:
1248 result.append(
1249 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1250 else:
1251 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1253 n = ctx.get('nav_prev')
1254 if n is not None:
1255 result.append(
1256 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1257 else:
1258 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1260 n = ctx.get('nav_next')
1261 if n is not None:
1262 result.append(
1263 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1264 else:
1265 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1267 return ''.join(result)
1270 def generate_toc(ctx, node):
1271 result = []
1272 for c in node.children:
1273 # TODO: urlencode the filename: urllib.parse.quote_plus()
1274 link = c.filename
1275 if c.anchor:
1276 link += c.anchor
1277 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1278 c.title_tag, link, c.title))
1279 if c.subtitle:
1280 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1281 result.append('</dt>\n')
1282 if c.children:
1283 result.append('<dd><dl>')
1284 result.extend(generate_toc(ctx, c))
1285 result.append('</dl></dd>')
1286 return result
1289 def generate_basic_nav(ctx):
1290 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1291 <tr valign="middle">
1292 <td width="100%%" align="left" class="shortcuts"></td>
1294 </tr>
1295 </table>
1296 """ % generate_nav_links(ctx)
1299 def generate_alpha_nav(ctx, divs, prefix, span_id):
1300 ix_nav = []
1301 for s in divs:
1302 title = xml_get_title(ctx, s)
1303 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1305 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1306 <tr valign="middle">
1307 <td width="100%%" align="left" class="shortcuts">
1308 <span id="nav_%s">
1310 </span>
1311 </td>
1313 </tr>
1314 </table>
1315 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1318 def generate_refentry_nav(ctx, refsect1s, result):
1319 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1320 <tr valign="middle">
1321 <td width="100%" align="left" class="shortcuts">
1322 <a href="#" class="shortcut">Top</a>""")
1324 for s in refsect1s:
1325 # don't list TOC sections (role="xxx_proto")
1326 if s.attrib.get('role', '').endswith("_proto"):
1327 continue
1328 # skip section without 'id' attrs
1329 ref_id = s.attrib.get('id')
1330 if ref_id is None:
1331 continue
1333 # skip foreign sections
1334 if '.' not in ref_id:
1335 continue
1337 title = xml_get_title(ctx, s)
1338 span_id = ref_id.split('.')[1].replace('-', '_')
1340 result.append("""
1341 <span id="nav_%s">
1342    <span class="dim">|</span> 
1343 <a href="#%s" class="shortcut">%s</a>
1344 </span>
1345 """ % (span_id, ref_id, title))
1346 result.append("""
1347 </td>
1349 </tr>
1350 </table>
1351 """ % generate_nav_links(ctx))
1354 def generate_footer(ctx):
1355 footnotes = ctx.get('footnotes')
1356 if footnotes is None:
1357 return []
1359 result = ["""<div class="footnotes">\n
1360 <br><hr style="width:100; text-align:left;margin-left: 0">
1361 """]
1362 for f in footnotes:
1363 result.extend(f)
1364 result.append('</div>\n')
1365 return result
1368 def get_id_path(node):
1369 """ Generate the 'id'.
1370 We need to walk up the xml-tree and check the positions for each sibling.
1371 When reaching the top of the tree we collect remaining index entries from
1372 the chunked-tree.
1374 ix = []
1375 xml = node.xml
1376 parent = xml.getparent()
1377 while parent is not None:
1378 children = parent.getchildren()
1379 ix.insert(0, str(children.index(xml) + 1))
1380 xml = parent
1381 parent = xml.getparent()
1382 while node is not None:
1383 ix.insert(0, str(node.idx + 1))
1384 node = node.parent
1386 return ix
1389 def get_id(node):
1390 xml = node.xml
1391 node_id = xml.attrib.get('id', None)
1392 if node_id:
1393 return node_id
1395 # TODO: this is moot if nothing links to it, we could also consider to omit
1396 # the <a name="$id"></a> tag.
1397 logging.info('%d: No "id" attribute on "%s", generating one',
1398 xml.sourceline, xml.tag)
1399 ix = get_id_path(node)
1400 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1401 return 'id-' + '.'.join(ix)
1404 def convert_chunk_with_toc(ctx, div_class, title_tag):
1405 node = ctx['node']
1406 result = [
1407 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1408 generate_basic_nav(ctx),
1409 '<div class="%s">' % div_class,
1411 if node.title:
1412 result.append("""
1413 <div class="titlepage">
1414 <%s class="title"><a name="%s"></a>%s</%s>
1415 </div>""" % (
1416 title_tag, get_id(node), node.title, title_tag))
1418 toc = generate_toc(ctx, node)
1419 if toc:
1420 # TODO: not all docbook page types use this extra heading
1421 result.append("""<p><b>Table of Contents</b></p>
1422 <div class="toc">
1423 <dl class="toc">
1424 """)
1425 result.extend(toc)
1426 result.append("""</dl>
1427 </div>
1428 """)
1429 convert_inner(ctx, node.xml, result)
1430 result.extend(generate_footer(ctx))
1431 result.append("""</div>
1432 </body>
1433 </html>""")
1434 return result
1437 # docbook chunks
1440 def convert_book(ctx):
1441 node = ctx['node']
1442 result = [
1443 HTML_HEADER % (node.title, generate_head_links(ctx)),
1444 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1445 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1446 </table>
1447 <div class="book">
1448 """ % node.title
1450 bookinfo = node.xml.findall('bookinfo')[0]
1451 result.extend(convert_bookinfo(ctx, bookinfo))
1452 result.append("""<div class="toc">
1453 <dl class="toc">
1454 """)
1455 result.extend(generate_toc(ctx, node.root))
1456 result.append("""</dl>
1457 </div>
1458 """)
1459 result.extend(generate_footer(ctx))
1460 result.append("""</div>
1461 </body>
1462 </html>""")
1463 return result
1466 def convert_chapter(ctx):
1467 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1470 def convert_glossary(ctx):
1471 node = ctx['node']
1472 glossdivs = node.xml.findall('glossdiv')
1474 result = [
1475 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1476 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1477 """<div class="glossary">
1478 <div class="titlepage"><h%1d class="title">
1479 <a name="%s"></a>%s</h%1d>
1480 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1482 for i in glossdivs:
1483 result.extend(convert_glossdiv(ctx, i))
1484 result.extend(generate_footer(ctx))
1485 result.append("""</div>
1486 </body>
1487 </html>""")
1488 return result
1491 def convert_index(ctx):
1492 node = ctx['node']
1493 # Get all indexdivs under indexdiv
1494 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1496 result = [
1497 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1498 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1499 """<div class="index">
1500 <div class="titlepage"><h%1d class="title">
1501 <a name="%s"></a>%s</h%1d>
1502 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1504 for i in indexdivs:
1505 result.extend(convert_indexdiv(ctx, i))
1506 result.extend(generate_footer(ctx))
1507 result.append("""</div>
1508 </body>
1509 </html>""")
1510 return result
1513 def convert_part(ctx):
1514 return convert_chunk_with_toc(ctx, 'part', 'h1')
1517 def convert_preface(ctx):
1518 node = ctx['node']
1519 result = [
1520 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1521 generate_basic_nav(ctx),
1522 '<div class="preface">'
1524 if node.title:
1525 result.append("""
1526 <div class="titlepage">
1527 <h2 class="title"><a name="%s"></a>%s</h2>
1528 </div>""" % (get_id(node), node.title))
1529 convert_inner(ctx, node.xml, result)
1530 result.extend(generate_footer(ctx))
1531 result.append("""</div>
1532 </body>
1533 </html>""")
1534 return result
1537 def convert_reference(ctx):
1538 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1541 def convert_refentry(ctx):
1542 node = ctx['node']
1543 node_id = get_id(node)
1544 refsect1s = node.xml.findall('refsect1')
1546 gallery = ''
1547 refmeta = node.xml.find('refmeta')
1548 if refmeta is not None:
1549 refmiscinfo = refmeta.find('refmiscinfo')
1550 if refmiscinfo is not None:
1551 inlinegraphic = refmiscinfo.find('inlinegraphic')
1552 if inlinegraphic is not None:
1553 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1555 result = [
1556 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1558 generate_refentry_nav(ctx, refsect1s, result)
1559 result.append("""
1560 <div class="refentry">
1561 <a name="%s"></a>
1562 <div class="refnamediv">
1563 <table width="100%%"><tr>
1564 <td valign="top">
1565 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1566 <p>%s %s</p>
1567 </td>
1568 <td class="gallery_image" valign="top" align="right">%s</td>
1569 </tr></table>
1570 </div>
1571 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1573 for s in refsect1s:
1574 result.extend(convert_refsect1(ctx, s))
1575 result.extend(generate_footer(ctx))
1576 result.append("""</div>
1577 </body>
1578 </html>""")
1579 return result
1582 def convert_section(ctx):
1583 return convert_chunk_with_toc(ctx, 'section', 'h2')
1586 def convert_sect1(ctx):
1587 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1590 # TODO(ensonic): turn into class with converters as functions and ctx as self
1591 convert_chunks = {
1592 'book': convert_book,
1593 'chapter': convert_chapter,
1594 'glossary': convert_glossary,
1595 'index': convert_index,
1596 'part': convert_part,
1597 'preface': convert_preface,
1598 'reference': convert_reference,
1599 'refentry': convert_refentry,
1600 'section': convert_section,
1601 'sect1': convert_sect1,
1605 def generate_nav_nodes(files, node):
1606 nav = {
1607 'nav_home': node.root,
1609 # nav params: up, prev, next
1610 if node.parent:
1611 nav['nav_up'] = node.parent
1612 ix = files.index(node)
1613 if ix > 0:
1614 nav['nav_prev'] = files[ix - 1]
1615 if ix < len(files) - 1:
1616 nav['nav_next'] = files[ix + 1]
1617 return nav
1620 def convert(out_dir, module, files, node, src_lang):
1621 """Convert the docbook chunks to a html file.
1623 Args:
1624 out_dir: already created output dir
1625 files: list of nodes in the tree in pre-order
1626 node: current tree node
1629 logging.info('Writing: %s', node.filename)
1630 with open(os.path.join(out_dir, node.filename), 'wt',
1631 newline='\n', encoding='utf-8') as html:
1632 ctx = {
1633 'module': module,
1634 'files': files,
1635 'node': node,
1636 'src-lang': src_lang,
1638 ctx.update(generate_nav_nodes(files, node))
1640 converter = convert_chunks.get(node.name)
1641 if converter is not None:
1642 for line in converter(ctx):
1643 html.write(line)
1644 else:
1645 logging.warning('Add chunk converter for "%s"', node.name)
1648 def create_devhelp2_toc(node):
1649 result = []
1650 for c in node.children:
1651 if c.children:
1652 result.append('<sub name="%s" link="%s">\n' % (c.raw_title, c.filename))
1653 result.extend(create_devhelp2_toc(c))
1654 result.append('</sub>\n')
1655 else:
1656 result.append('<sub name="%s" link="%s"/>\n' % (c.raw_title, c.filename))
1657 return result
1660 def create_devhelp2_condition_attribs(node):
1661 condition = node.attrib.get('condition')
1662 if condition is not None:
1663 # condition -> since, deprecated, ... (separated with '|')
1664 cond = condition.replace('"', '&quot;').split('|')
1665 keywords = []
1666 for c in cond:
1667 if ':' in c:
1668 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1669 else:
1670 # deprecated can have no description
1671 keywords.append('{}="{}"'.format(c, ''))
1672 return ' ' + ' '.join(keywords)
1673 else:
1674 return ''
1677 def create_devhelp2_refsect2_keyword(node, base_link):
1678 node_id = node.attrib['id']
1679 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1680 node.attrib['role'], titles[node_id]['title'], base_link + node_id,
1681 create_devhelp2_condition_attribs(node))
1684 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1685 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1686 node.attrib['role'], title, base_link + name,
1687 create_devhelp2_condition_attribs(node))
1690 def create_devhelp2(out_dir, module, xml, files):
1691 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1692 newline='\n', encoding='utf-8') as idx:
1693 bookinfo_nodes = xml.xpath('/book/bookinfo')
1694 title = ''
1695 if bookinfo_nodes is not None:
1696 bookinfo = bookinfo_nodes[0]
1697 title = bookinfo.xpath('./title/text()')[0]
1698 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1699 # TODO: support author too (see devhelp2.xsl)
1700 # TODO: fixxref uses '--src-lang' to set the language
1701 result = [
1702 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1703 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1704 <chapters>
1705 """ % (title, module, online_url)
1707 # toc
1708 result.extend(create_devhelp2_toc(files[0].root))
1709 result.append(""" </chapters>
1710 <functions>
1711 """)
1712 # keywords from all refsect2 and refsect3
1713 refsect2 = etree.XPath('//refsect2[@role]')
1714 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1715 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1716 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1717 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1718 for node in files:
1719 base_link = node.filename + '#'
1720 refsect2_nodes = refsect2(node.xml)
1721 for refsect2_node in refsect2_nodes:
1722 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1723 refsect3_nodes = refsect3_enum(refsect2_node)
1724 for refsect3_node in refsect3_nodes:
1725 details_node = refsect3_enum_details(refsect3_node)[0]
1726 name = details_node.attrib['id']
1727 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1728 refsect3_nodes = refsect3_struct(refsect2_node)
1729 for refsect3_node in refsect3_nodes:
1730 details_node = refsect3_struct_details(refsect3_node)[0]
1731 name = details_node.attrib['id']
1732 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1734 result.append(""" </functions>
1735 </book>
1736 """)
1737 for line in result:
1738 idx.write(line)
1741 def get_dirs(uninstalled):
1742 if uninstalled:
1743 # this does not work from buiddir!=srcdir
1744 gtkdocdir = os.path.split(sys.argv[0])[0]
1745 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1746 # try 'srcdir' (set from makefiles) too
1747 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1748 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1749 styledir = gtkdocdir + '/style'
1750 else:
1751 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1752 styledir = gtkdocdir
1753 return (gtkdocdir, styledir)
1756 def main(module, index_file, out_dir, uninstalled, src_lang, paths):
1758 # == Loading phase ==
1759 # the next 3 steps could be done in paralel
1761 # 1) load the docuemnt
1762 _t = timer()
1763 # does not seem to be faster
1764 # parser = etree.XMLParser(collect_ids=False)
1765 # tree = etree.parse(index_file, parser)
1766 tree = etree.parse(index_file)
1767 tree.xinclude()
1768 logging.warning("1: %7.3lf: load doc", timer() - _t)
1770 # 2) copy datafiles
1771 _t = timer()
1772 # TODO: handle additional images
1773 (gtkdocdir, styledir) = get_dirs(uninstalled)
1774 # copy navigation images and stylesheets to html directory ...
1775 css_file = os.path.join(styledir, 'style.css')
1776 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1777 shutil.copy(f, out_dir)
1778 css_file = os.path.join(out_dir, 'style.css')
1779 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1780 css.write(HTML_FORMATTER.get_style_defs())
1781 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1783 # 3) load xref targets
1784 _t = timer()
1785 # TODO: migrate options from fixxref
1786 # TODO: ideally explicity specify the files we need, this will save us the
1787 # globbing and we'll load less files.
1788 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1789 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1791 # == Processing phase ==
1793 # 4) recursively walk the tree and chunk it into a python tree so that we
1794 # can generate navigation and link tags.
1795 _t = timer()
1796 files = chunk(tree.getroot(), module)
1797 files = [f for f in PreOrderIter(files) if f.anchor is None]
1798 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1800 # 5) extract tables:
1801 _t = timer()
1802 # TODO: can be done in parallel
1803 # - find all 'id' attribs and add them to the link map
1804 # - .. get their titles and store them into the titles map
1805 add_id_links_and_titles(files, fixxref.Links)
1806 # - build glossary dict
1807 build_glossary(files)
1808 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1810 # == Output phase ==
1811 # the next two step could be done in parllel
1813 # 6) create a xxx.devhelp2 file
1814 _t = timer()
1815 create_devhelp2(out_dir, module, tree.getroot(), files)
1816 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1818 # 7) iterate the tree and output files
1819 _t = timer()
1820 # TODO: can be done in parallel, figure out why this is not faster
1821 # from multiprocessing.pool import Pool
1822 # with Pool(4) as p:
1823 # p.apply_async(convert, args=(out_dir, module, files))
1824 # from multiprocessing.pool import ThreadPool
1825 # with ThreadPool(4) as p:
1826 # p.apply_async(convert, args=(out_dir, module, files))
1827 for node in files:
1828 convert(out_dir, module, files, node, src_lang)
1829 logging.warning("7: %7.3lf: create html", timer() - _t)
1831 # 8) copy assets over
1832 _t = timer()
1833 paths = set(paths + [os.getcwd()])
1834 for a in assets:
1835 logging.info('trying %s in %s', a, str(paths))
1836 copied = False
1837 for p in paths:
1838 try:
1839 shutil.copy(os.path.join(p, a), out_dir)
1840 copied = True
1841 except FileNotFoundError:
1842 pass
1843 if not copied:
1844 logging.warning('file %s not found in path (did you add --path?)', a)
1845 logging.warning("8: %7.3lf: copy assets", timer() - _t)
1848 def run(options):
1849 logging.info('options: %s', str(options.__dict__))
1850 module = options.args[0]
1851 document = options.args[1]
1853 # TODO: rename to 'html' later on
1854 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1855 # outputs into the working directory
1856 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1857 try:
1858 os.mkdir(out_dir)
1859 except OSError as e:
1860 if e.errno != errno.EEXIST:
1861 raise
1863 sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang,
1864 options.path))