mkhtml2: tolerate wonky glossterms
[gtk-doc.git] / gtkdoc / mkhtml2.py
blobac10e7acd0fd61ce31bf0952a643e9f28f185cd7
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - check each docbook tag if it can contain #PCDATA, if not don't check for
43 xml.text
44 - consider some perf-warnings flag
45 - see 'No "id" attribute on'
46 - find a better way to print context for warnings
47 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
48 - copy images
49 - do we need to find them on the respective tags and search them in the path
50 setup by '--path'
51 - commandline options
52 - mkhtml:
53 --path 'Extra source directories' - used to find images
54 - fixxref:
56 DIFFERENCES:
57 - titles
58 - we add the chunk label to the title in toc, on the page and in nav tooltips
59 - docbook xsl only sometimes adds the label to the titles and when it does it
60 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
61 - navigation
62 - we always add an up-link except on the first page
63 - footer
64 - we're nov omitting the footer
65 - tocs
66 - we always add "Table of Contents' before a toc
67 - docbook does that for some pages, it is configurable
69 OPTIONAL:
70 - minify html: https://pypi.python.org/pypi/htmlmin/
72 Requirements:
73 sudo pip3 install anytree lxml pygments
75 Example invocation:
76 cd tests/bugs/docs/
77 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
78 xdg-open db2html/index.html
79 meld html db2html
81 Benchmarking:
82 cd tests/bugs/docs/;
83 rm html-build.stamp; time make html-build.stamp
84 """
86 import argparse
87 import errno
88 import logging
89 import os
90 import shutil
91 import sys
93 from anytree import Node, PreOrderIter
94 from copy import deepcopy
95 from glob import glob
96 from lxml import etree
97 from pygments import highlight
98 from pygments.lexers import CLexer
99 from pygments.formatters import HtmlFormatter
100 from timeit import default_timer as timer
102 from . import config, fixxref
104 # pygments setup
105 # lazily constructed lexer cache
106 LEXERS = {
107 'c': CLexer()
109 HTML_FORMATTER = HtmlFormatter(nowrap=True)
112 class ChunkParams(object):
113 def __init__(self, prefix, parent=None, min_idx=0):
114 self.prefix = prefix
115 self.parent = parent
116 self.min_idx = min_idx
117 self.idx = 1
120 DONT_CHUNK = float('inf')
121 # docbook-xsl defines the chunk tags here.
122 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
123 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
124 # If not defined, we can just create an example without an 'id' attr and see
125 # docbook xsl does.
127 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
128 # TODO: this list has also a flag that controls wheter we add the
129 # 'Table of Contents' heading in convert_chunk_with_toc()
130 CHUNK_PARAMS = {
131 'appendix': ChunkParams('app', 'book'),
132 'book': ChunkParams('bk'),
133 'chapter': ChunkParams('ch', 'book'),
134 'glossary': ChunkParams('go', 'book'),
135 'index': ChunkParams('ix', 'book'),
136 'part': ChunkParams('pt', 'book'),
137 'preface': ChunkParams('pr', 'book'),
138 'refentry': ChunkParams('re', 'book'),
139 'reference': ChunkParams('rn', 'book'),
140 'sect1': ChunkParams('s', 'chapter', 1),
141 'section': ChunkParams('s', 'chapter', 1),
142 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
143 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
144 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
145 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
147 # TAGS we don't support:
148 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
150 TITLE_XPATHS = {
151 '_': (etree.XPath('./title'), None),
152 'book': (etree.XPath('./bookinfo/title'), None),
153 'refentry': (
154 etree.XPath('./refmeta/refentrytitle'),
155 etree.XPath('./refnamediv/refpurpose')
159 ID_XPATH = etree.XPath('//*[@id]')
161 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
162 glossary = {}
164 footnote_idx = 1
166 # nested dict with subkeys:
167 # title: textual title
168 # tag: chunk tag
169 # xml: title xml node
170 titles = {}
173 def gen_chunk_name(node, chunk_params):
174 """Generate a chunk file name
176 This is either based on the id or on the position in the doc. In the latter
177 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
178 type.
180 if 'id' in node.attrib:
181 return node.attrib['id']
183 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
184 chunk_params.idx += 1
186 # handle parents to make names of nested tags like in docbook
187 # - we only need to prepend the parent if there are > 1 of them in the
188 # xml. None, the parents we have are not sufficient, e.g. 'index' can
189 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
190 # when we chunk explicitly and on each level maintain the 'idx'
191 # while chunk_params.parent:
192 # parent = chunk_params.parent
193 # if parent not in CHUNK_PARAMS:
194 # break;
195 # chunk_params = CHUNK_PARAMS[parent]
196 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
198 logging.info('Gen chunk name: "%s"', name)
199 return name
202 def get_chunk_titles(module, node):
203 tag = node.tag
204 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
206 ctx = {
207 'module': module,
208 'files': [],
210 result = {
211 'title': None,
212 'title_tag': None,
213 'subtitle': None,
214 'subtitle_tag': None
216 res = title(node)
217 if res:
218 # handle chunk label for tocs
219 label = node.attrib.get('label')
220 if label:
221 label += '. '
222 else:
223 label = ''
225 xml = res[0]
226 result['title'] = label + ''.join(convert_title(ctx, xml))
227 if xml.tag != 'title':
228 result['title_tag'] = xml.tag
229 else:
230 result['title_tag'] = tag
232 if subtitle:
233 res = subtitle(node)
234 if res:
235 xml = res[0]
236 result['subtitle'] = ''.join(convert_title(ctx, xml))
237 result['subtitle_tag'] = xml.tag
238 return result
241 def chunk(xml_node, module, depth=0, idx=0, parent=None):
242 """Chunk the tree.
244 The first time, we're called with parent=None and in that case we return
245 the new_node as the root of the tree. For each tree-node we generate a
246 filename and process the children.
248 tag = xml_node.tag
249 chunk_params = CHUNK_PARAMS.get(tag)
250 if chunk_params:
251 title_args = get_chunk_titles(module, xml_node)
252 chunk_name = gen_chunk_name(xml_node, chunk_params)
254 # check idx to handle 'sect1'/'section' special casing and title-only
255 # segments
256 if idx >= chunk_params.min_idx:
257 logging.info('chunk tag: "%s"[%d]', tag, idx)
258 if parent:
259 # remove the xml-node from the parent
260 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
261 xml_node.getparent().remove(xml_node)
262 xml_node = sub_tree
264 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
265 idx=idx,
266 filename=chunk_name + '.html', anchor=None,
267 **title_args)
268 else:
269 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
270 idx=idx,
271 filename=parent.filename, anchor='#' + chunk_name,
272 **title_args)
274 depth += 1
275 idx = 0
276 for child in xml_node:
277 chunk(child, module, depth, idx, parent)
278 if child.tag in CHUNK_PARAMS:
279 idx += 1
281 return parent
284 def add_id_links_and_titles(files, links):
285 for node in files:
286 chunk_name = node.filename[:-5]
287 chunk_base = node.filename + '#'
288 for elem in ID_XPATH(node.xml):
289 attr = elem.attrib['id']
290 if attr == chunk_name:
291 links[attr] = node.filename
292 else:
293 links[attr] = chunk_base + attr
295 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
296 res = title(elem)
297 if res:
298 xml = res[0]
299 # TODO: consider to eval those lazily
300 titles[attr] = {
301 'title': etree.tostring(xml, method="text", encoding=str).strip(),
302 'xml': xml,
303 'tag': elem.tag,
307 def build_glossary(files):
308 for node in files:
309 if node.xml.tag != 'glossary':
310 continue
311 for term in GLOSSENTRY_XPATH(node.xml):
312 # TODO: there can be all kind of things in a glossary. This only supports
313 # what we commonly use, glossterm is mandatory
314 key_node = term.find('glossterm')
315 val_node = term.find('glossdef')
316 if key_node is not None and val_node is not None:
317 key = etree.tostring(key_node, method="text", encoding=str).strip()
318 val = etree.tostring(val_node, method="text", encoding=str).strip()
319 glossary[key] = val
320 # logging.debug('glosentry: %s:%s', key, val)
321 else:
322 debug = []
323 if key_node is None:
324 debug.append('missing key')
325 if val_node is None:
326 debug.append('missing val')
327 logging.warning('Unexpected glossentry %s:', term.attrib['id'], ','.join(debug))
330 # conversion helpers
333 def convert_inner(ctx, xml, result):
334 for child in xml:
335 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
338 def convert_ignore(ctx, xml):
339 result = []
340 convert_inner(ctx, xml, result)
341 return result
344 def convert_skip(ctx, xml):
345 return []
348 def append_idref(attrib, result):
349 if 'id' in attrib:
350 result.append('<a name="%s"></a>' % attrib['id'])
353 def append_text(ctx, text, result):
354 if text and ('no-strip' in ctx or text.strip()):
355 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
358 missing_tags = {}
361 def convert__unknown(ctx, xml):
362 # don't recurse on subchunks
363 if xml.tag in CHUNK_PARAMS:
364 return []
365 if isinstance(xml, etree._Comment):
366 return ['<!-- ' + xml.text + '-->\n']
367 else:
368 # warn only once
369 if xml.tag not in missing_tags:
370 logging.warning('Add tag converter for "%s"', xml.tag)
371 missing_tags[xml.tag] = True
372 result = ['<!-- ' + xml.tag + '-->\n']
373 convert_inner(ctx, xml, result)
374 result.append('<!-- /' + xml.tag + '-->\n')
375 return result
378 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
379 result = ['<div class="%s">\n' % xml.tag]
380 title_tag = xml.find('title')
381 if title_tag is not None:
382 if 'id' in xml.attrib:
383 result.append('<a name="%s"></a>' % xml.attrib['id'])
384 result.append('<%s>%s</%s>' % (
385 h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
386 append_text(ctx, xml.text, result)
387 inner_func(ctx, xml, result)
388 result.append('</div>')
389 append_text(ctx, xml.tail, result)
390 return result
393 def xml_get_title(ctx, xml):
394 title_tag = xml.find('title')
395 if title_tag is not None:
396 return ''.join(convert_title(ctx, title_tag))
397 else:
398 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
399 return ''
402 # docbook tags
405 def convert_abstract(ctx, xml):
406 result = ["""<div class="abstract">
407 <p class="title"><b>Abstract</b></p>"""]
408 append_text(ctx, xml.text, result)
409 convert_inner(ctx, xml, result)
410 result.append('</div>')
411 append_text(ctx, xml.tail, result)
412 return result
415 def convert_acronym(ctx, xml):
416 key = xml.text
417 title = glossary.get(key, '')
418 # TODO: print a sensible warning if missing
419 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
420 if xml.tail:
421 result.append(xml.tail)
422 return result
425 def convert_anchor(ctx, xml):
426 return ['<a name="%s"></a>' % xml.attrib['id']]
429 def convert_bookinfo(ctx, xml):
430 result = ['<div class="titlepage">']
431 convert_inner(ctx, xml, result)
432 result.append("""<hr>
433 </div>""")
434 if xml.tail:
435 result.append(xml.tail)
436 return result
439 def convert_blockquote(ctx, xml):
440 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
441 append_text(ctx, xml.text, result)
442 convert_inner(ctx, xml, result)
443 result.append('</blockquote>\n</div>')
444 append_text(ctx, xml.tail, result)
445 return result
448 def convert_code(ctx, xml):
449 result = ['<code class="%s">' % xml.tag]
450 append_text(ctx, xml.text, result)
451 convert_inner(ctx, xml, result)
452 result.append('</code>')
453 append_text(ctx, xml.tail, result)
454 return result
457 def convert_colspec(ctx, xml):
458 result = ['<col']
459 a = xml.attrib
460 if 'colname' in a:
461 result.append(' class="%s"' % a['colname'])
462 if 'colwidth' in a:
463 result.append(' width="%s"' % a['colwidth'])
464 result.append('>\n')
465 # is in tgroup and there can be no 'text'
466 return result
469 def convert_command(ctx, xml):
470 result = ['<strong class="userinput"><code>']
471 append_text(ctx, xml.text, result)
472 convert_inner(ctx, xml, result)
473 result.append('</code></strong>')
474 append_text(ctx, xml.tail, result)
475 return result
478 def convert_corpauthor(ctx, xml):
479 result = ['<div><h3 class="corpauthor">\n']
480 append_text(ctx, xml.text, result)
481 convert_inner(ctx, xml, result)
482 result.append('</h3></div>\n')
483 append_text(ctx, xml.tail, result)
484 return result
487 def convert_div(ctx, xml):
488 result = ['<div class="%s">\n' % xml.tag]
489 append_text(ctx, xml.text, result)
490 convert_inner(ctx, xml, result)
491 result.append('</div>')
492 append_text(ctx, xml.tail, result)
493 return result
496 def convert_emphasis(ctx, xml):
497 if 'role' in xml.attrib:
498 result = ['<span class="%s">' % xml.attrib['role']]
499 end = '</span>'
500 else:
501 result = ['<span class="emphasis"><em>']
502 end = '</em></span>'
503 append_text(ctx, xml.text, result)
504 convert_inner(ctx, xml, result)
505 result.append(end)
506 append_text(ctx, xml.tail, result)
507 return result
510 def convert_em_class(ctx, xml):
511 result = ['<em class="%s"><code>' % xml.tag]
512 append_idref(xml.attrib, result)
513 append_text(ctx, xml.text, result)
514 convert_inner(ctx, xml, result)
515 result.append('</code></em>')
516 append_text(ctx, xml.tail, result)
517 return result
520 def convert_entry(ctx, xml):
521 entry_type = ctx['table.entry']
522 result = ['<' + entry_type]
523 if 'role' in xml.attrib:
524 result.append(' class="%s"' % xml.attrib['role'])
525 if 'morerows' in xml.attrib:
526 result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
527 result.append('>')
528 append_text(ctx, xml.text, result)
529 convert_inner(ctx, xml, result)
530 result.append('</' + entry_type + '>')
531 append_text(ctx, xml.tail, result)
532 return result
535 def convert_footnote(ctx, xml):
536 footnotes = ctx.get('footnotes', [])
537 # footnotes idx is not per page, but per doc
538 global footnote_idx
539 idx = footnote_idx
540 footnote_idx += 1
542 # need a pair of ids for each footnote (docbook generates different ids)
543 this_id = 'footnote-%d' % idx
544 that_id = 'ftn.' + this_id
546 inner = ['<div id="%s" class="footnote">' % that_id]
547 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
548 this_id, idx))
549 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
550 # get double nested paras :/.
551 # convert_inner(ctx, xml, inner)
552 para = xml.find('para')
553 if para is None:
554 para = xml.find('simpara')
555 if para is not None:
556 inner.append(para.text)
557 else:
558 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
559 etree.tostring(xml, method="text", encoding=str).strip())
560 inner.append('</p></div>')
561 footnotes.append(inner)
562 ctx['footnotes'] = footnotes
563 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
564 that_id, this_id, idx)]
567 def convert_formalpara(ctx, xml):
568 result = None
569 title_tag = xml.find('title')
570 result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
571 para_tag = xml.find('para')
572 append_text(ctx, para_tag.text, result)
573 convert_inner(ctx, para_tag, result)
574 append_text(ctx, para_tag.tail, result)
575 result.append('</p>')
576 append_text(ctx, xml.tail, result)
577 return result
580 def convert_glossdef(ctx, xml):
581 result = ['<dd class="glossdef">']
582 convert_inner(ctx, xml, result)
583 result.append('</dd>\n')
584 return result
587 def convert_glossdiv(ctx, xml):
588 title_tag = xml.find('title')
589 title = title_tag.text
590 xml.remove(title_tag)
591 result = [
592 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
594 convert_inner(ctx, xml, result)
595 return result
598 def convert_glossentry(ctx, xml):
599 result = []
600 convert_inner(ctx, xml, result)
601 return result
604 def convert_glossterm(ctx, xml):
605 glossid = ''
606 text = ''
607 anchor = xml.find('anchor')
608 if anchor is not None:
609 glossid = anchor.attrib.get('id', '')
610 text += anchor.tail or ''
611 text += xml.text or ''
612 if glossid == '':
613 glossid = 'glossterm-' + text
614 return [
615 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
616 glossid, text)
620 def convert_imageobject(ctx, xml):
621 imagedata = xml.find('imagedata')
622 if imagedata is not None:
623 # TODO(ensonic): warn on missing fileref attr?
624 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
625 else:
626 return []
629 def convert_indexdiv(ctx, xml):
630 title_tag = xml.find('title')
631 title = title_tag.text
632 xml.remove(title_tag)
633 result = [
634 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
636 convert_inner(ctx, xml, result)
637 return result
640 def convert_informaltable(ctx, xml):
641 result = ['<div class="informaltable"><table class="informaltable"']
642 a = xml.attrib
643 if 'pgwide' in a and a['pgwide'] == '1':
644 result.append(' width="100%"')
645 if 'frame' in a and a['frame'] == 'none':
646 result.append(' border="0"')
647 result.append('>\n')
648 convert_inner(ctx, xml, result)
649 result.append('</table></div>')
650 if xml.tail:
651 result.append(xml.tail)
652 return result
655 def convert_inlinegraphic(ctx, xml):
656 # TODO(ensonic): warn on missing fileref attr?
657 return ['<img src="%s">' % xml.attrib.get('fileref', '')]
660 def convert_itemizedlist(ctx, xml):
661 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
662 convert_inner(ctx, xml, result)
663 result.append('</ul></div>')
664 if xml.tail:
665 result.append(xml.tail)
666 return result
669 def convert_link(ctx, xml):
670 linkend = xml.attrib['linkend']
671 result = []
672 if linkend:
673 link_text = []
674 append_text(ctx, xml.text, link_text)
675 convert_inner(ctx, xml, link_text)
676 text = ''.join(link_text)
678 (tid, href) = fixxref.GetXRef(linkend)
679 if href:
680 title_attr = ''
681 title = titles.get(tid)
682 if title:
683 title_attr = ' title="%s"' % title['title']
685 href = fixxref.MakeRelativeXRef(ctx['module'], href)
686 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
687 else:
688 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
689 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
690 result = [text]
691 else:
692 append_text(ctx, xml.text, result)
693 convert_inner(ctx, xml, result)
694 append_text(ctx, xml.tail, result)
695 return result
698 def convert_listitem(ctx, xml):
699 result = ['<li class="listitem">']
700 convert_inner(ctx, xml, result)
701 result.append('</li>')
702 # is in itemizedlist and there can be no 'text'
703 return result
706 def convert_literallayout(ctx, xml):
707 result = ['<div class="literallayout"><p><br>\n']
708 append_text(ctx, xml.text, result)
709 convert_inner(ctx, xml, result)
710 result.append('</p></div>')
711 append_text(ctx, xml.tail, result)
712 return result
715 def convert_orderedlist(ctx, xml):
716 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
717 convert_inner(ctx, xml, result)
718 result.append('</ol></div>')
719 append_text(ctx, xml.tail, result)
720 return result
723 def convert_para(ctx, xml):
724 result = []
725 if 'role' in xml.attrib:
726 result.append('<p class="%s">' % xml.attrib['role'])
727 else:
728 result.append('<p>')
729 if 'id' in xml.attrib:
730 result.append('<a name="%s"></a>' % xml.attrib['id'])
731 append_text(ctx, xml.text, result)
732 convert_inner(ctx, xml, result)
733 result.append('</p>')
734 append_text(ctx, xml.tail, result)
735 return result
738 def convert_para_like(ctx, xml):
739 result = []
740 if 'id' in xml.attrib:
741 result.append('<a name="%s"></a>' % xml.attrib['id'])
742 result.append('<p class="%s">' % xml.tag)
743 append_text(ctx, xml.text, result)
744 convert_inner(ctx, xml, result)
745 result.append('</p>')
746 append_text(ctx, xml.tail, result)
747 return result
750 def convert_phrase(ctx, xml):
751 result = ['<span']
752 if 'role' in xml.attrib:
753 result.append(' class="%s">' % xml.attrib['role'])
754 else:
755 result.append('>')
756 append_text(ctx, xml.text, result)
757 convert_inner(ctx, xml, result)
758 result.append('</span>')
759 append_text(ctx, xml.tail, result)
760 return result
763 def convert_primaryie(ctx, xml):
764 result = ['<dt>\n']
765 convert_inner(ctx, xml, result)
766 result.append('\n</dt>\n<dd></dd>\n')
767 return result
770 def convert_pre(ctx, xml):
771 # Since we're inside <pre> don't skip newlines
772 ctx['no-strip'] = True
773 result = ['<pre class="%s">' % xml.tag]
774 append_text(ctx, xml.text, result)
775 convert_inner(ctx, xml, result)
776 result.append('</pre>')
777 del ctx['no-strip']
778 append_text(ctx, xml.tail, result)
779 return result
782 def convert_programlisting(ctx, xml):
783 result = []
784 if xml.attrib.get('role', '') == 'example':
785 if xml.text:
786 lang = xml.attrib.get('language', ctx['src-lang']).lower()
787 if lang not in LEXERS:
788 LEXERS[lang] = get_lexer_by_name(lang)
789 lexer = LEXERS.get(lang, None)
790 if lexer:
791 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
793 # we do own line-numbering
794 line_count = highlighted.count('\n')
795 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
796 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
797 <tbody>
798 <tr>
799 <td class="listing_lines" align="right"><pre>%s</pre></td>
800 <td class="listing_code"><pre class="programlisting">%s</pre></td>
801 </tr>
802 </tbody>
803 </table>
804 """ % (source_lines, highlighted))
805 else:
806 logging.warn('No pygments lexer for language="%s"', lang)
807 result.append('<pre class="programlisting">')
808 result.append(xml.text)
809 result.append('</pre>')
810 else:
811 result.append('<pre class="programlisting">')
812 append_text(ctx, xml.text, result)
813 convert_inner(ctx, xml, result)
814 result.append('</pre>')
815 append_text(ctx, xml.tail, result)
816 return result
819 def convert_quote(ctx, xml):
820 result = ['<span class="quote">"<span class="quote">']
821 append_text(ctx, xml.text, result)
822 convert_inner(ctx, xml, result)
823 result.append('</span>"</span>')
824 append_text(ctx, xml.tail, result)
825 return result
828 def convert_refsect1(ctx, xml):
829 # Add a divider between two consequitive refsect2
830 def convert_inner(ctx, xml, result):
831 prev = None
832 for child in xml:
833 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
834 result.append('<hr>\n')
835 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
836 prev = child
837 return convert_sect(ctx, xml, 'h2', convert_inner)
840 def convert_refsect2(ctx, xml):
841 return convert_sect(ctx, xml, 'h3')
844 def convert_refsect3(ctx, xml):
845 return convert_sect(ctx, xml, 'h4')
848 def convert_row(ctx, xml):
849 result = ['<tr>\n']
850 convert_inner(ctx, xml, result)
851 result.append('</tr>\n')
852 return result
855 def convert_sect1_tag(ctx, xml):
856 return convert_sect(ctx, xml, 'h2')
859 def convert_sect2(ctx, xml):
860 return convert_sect(ctx, xml, 'h3')
863 def convert_sect3(ctx, xml):
864 return convert_sect(ctx, xml, 'h4')
867 def convert_simpara(ctx, xml):
868 result = ['<p>']
869 append_text(ctx, xml.text, result)
870 convert_inner(ctx, xml, result)
871 result.append('</p>')
872 append_text(ctx, xml.tail, result)
873 return result
876 def convert_span(ctx, xml):
877 result = ['<span class="%s">' % xml.tag]
878 append_text(ctx, xml.text, result)
879 convert_inner(ctx, xml, result)
880 result.append('</span>')
881 append_text(ctx, xml.tail, result)
882 return result
885 def convert_table(ctx, xml):
886 result = ['<div class="table">']
887 append_idref(xml.attrib, result)
888 title_tag = xml.find('title')
889 if title_tag is not None:
890 result.append('<p class="title"><b>')
891 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
892 result.extend(convert_title(ctx, title_tag))
893 result.append('</b></p>')
894 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
896 convert_inner(ctx, xml, result)
898 result.append('</table></div></div>')
899 append_text(ctx, xml.tail, result)
900 return result
903 def convert_tbody(ctx, xml):
904 result = ['<tbody>']
905 ctx['table.entry'] = 'td'
906 convert_inner(ctx, xml, result)
907 result.append('</tbody>')
908 # is in tgroup and there can be no 'text'
909 return result
912 def convert_tgroup(ctx, xml):
913 # tgroup does not expand to anything, but the nested colspecs need to
914 # be put into a colgroup
915 cols = xml.findall('colspec')
916 result = []
917 if cols:
918 result.append('<colgroup>\n')
919 for col in cols:
920 result.extend(convert_colspec(ctx, col))
921 xml.remove(col)
922 result.append('</colgroup>\n')
923 convert_inner(ctx, xml, result)
924 # is in informaltable and there can be no 'text'
925 return result
928 def convert_thead(ctx, xml):
929 result = ['<thead>']
930 ctx['table.entry'] = 'th'
931 convert_inner(ctx, xml, result)
932 result.append('</thead>')
933 # is in tgroup and there can be no 'text'
934 return result
937 def convert_title(ctx, xml):
938 # This is always explicitly called from some context
939 result = []
940 append_text(ctx, xml.text, result)
941 convert_inner(ctx, xml, result)
942 append_text(ctx, xml.tail, result)
943 return result
946 def convert_ulink(ctx, xml):
947 if xml.text:
948 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
949 else:
950 url = xml.attrib['url']
951 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
952 append_text(ctx, xml.tail, result)
953 return result
956 def convert_userinput(ctx, xml):
957 result = ['<span class="command"><strong>']
958 append_text(ctx, xml.text, result)
959 convert_inner(ctx, xml, result)
960 result.append('</strong></span>')
961 append_text(ctx, xml.tail, result)
962 return result
965 def convert_variablelist(ctx, xml):
966 result = ["""<div class="variablelist"><table border="0" class="variablelist">
967 <colgroup>
968 <col align="left" valign="top">
969 <col>
970 </colgroup>
971 <tbody>"""]
972 convert_inner(ctx, xml, result)
973 result.append("""</tbody>
974 </table></div>""")
975 return result
978 def convert_varlistentry(ctx, xml):
979 result = ['<tr>']
981 result.append('<td><p>')
982 term = xml.find('term')
983 result.extend(convert_span(ctx, term))
984 result.append('</p></td>')
986 result.append('<td>')
987 listitem = xml.find('listitem')
988 convert_inner(ctx, listitem, result)
989 result.append('</td>')
991 result.append('<tr>')
992 return result
995 def convert_xref(ctx, xml):
996 linkend = xml.attrib['linkend']
997 (tid, href) = fixxref.GetXRef(linkend)
998 title = titles.get(tid)
999 # all sectN need to become 'section
1000 tag = title['tag']
1001 tag = {
1002 'sect1': 'section',
1003 'sect2': 'section',
1004 'sect3': 'section',
1005 'sect4': 'section',
1006 'sect5': 'section',
1007 }.get(tag, tag)
1008 result = [
1009 '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1010 (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1013 append_text(ctx, xml.tail, result)
1014 return result
1017 # TODO(ensonic): turn into class with converters as functions and ctx as self
1018 convert_tags = {
1019 'abstract': convert_abstract,
1020 'acronym': convert_acronym,
1021 'anchor': convert_anchor,
1022 'application': convert_span,
1023 'bookinfo': convert_bookinfo,
1024 'blockquote': convert_blockquote,
1025 'caption': convert_div,
1026 'code': convert_code,
1027 'colspec': convert_colspec,
1028 'constant': convert_code,
1029 'command': convert_command,
1030 'corpauthor': convert_corpauthor,
1031 'emphasis': convert_emphasis,
1032 'entry': convert_entry,
1033 'envar': convert_code,
1034 'footnote': convert_footnote,
1035 'filename': convert_code,
1036 'formalpara': convert_formalpara,
1037 'function': convert_code,
1038 'glossdef': convert_glossdef,
1039 'glossdiv': convert_glossdiv,
1040 'glossentry': convert_glossentry,
1041 'glossterm': convert_glossterm,
1042 'imageobject': convert_imageobject,
1043 'indexdiv': convert_indexdiv,
1044 'indexentry': convert_ignore,
1045 'indexterm': convert_skip,
1046 'informalexample': convert_div,
1047 'informaltable': convert_informaltable,
1048 'inlinegraphic': convert_inlinegraphic,
1049 'inlinemediaobject': convert_span,
1050 'itemizedlist': convert_itemizedlist,
1051 'legalnotice': convert_div,
1052 'link': convert_link,
1053 'listitem': convert_listitem,
1054 'literal': convert_code,
1055 'literallayout': convert_literallayout,
1056 'mediaobject': convert_div,
1057 'note': convert_div,
1058 'option': convert_code,
1059 'orderedlist': convert_orderedlist,
1060 'para': convert_para,
1061 'partintro': convert_div,
1062 'parameter': convert_em_class,
1063 'phrase': convert_phrase,
1064 'primaryie': convert_primaryie,
1065 'programlisting': convert_programlisting,
1066 'quote': convert_quote,
1067 'releaseinfo': convert_para_like,
1068 'refsect1': convert_refsect1,
1069 'refsect2': convert_refsect2,
1070 'refsect3': convert_refsect3,
1071 'replaceable': convert_em_class,
1072 'returnvalue': convert_span,
1073 'row': convert_row,
1074 'screen': convert_pre,
1075 'sect1': convert_sect1_tag,
1076 'sect2': convert_sect2,
1077 'sect3': convert_sect3,
1078 'simpara': convert_simpara,
1079 'structfield': convert_em_class,
1080 'structname': convert_span,
1081 'synopsis': convert_pre,
1082 'symbol': convert_span,
1083 'table': convert_table,
1084 'tbody': convert_tbody,
1085 'term': convert_span,
1086 'tgroup': convert_tgroup,
1087 'thead': convert_thead,
1088 'title': convert_skip,
1089 'type': convert_span,
1090 'ulink': convert_ulink,
1091 'userinput': convert_userinput,
1092 'varname': convert_code,
1093 'variablelist': convert_variablelist,
1094 'varlistentry': convert_varlistentry,
1095 'warning': convert_div,
1096 'xref': convert_xref,
1099 # conversion helpers
1101 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1102 <html>
1103 <head>
1104 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1105 <title>%s</title>
1106 %s<link rel="stylesheet" href="style.css" type="text/css">
1107 </head>
1108 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1112 def generate_head_links(ctx):
1113 n = ctx['nav_home']
1114 result = [
1115 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1117 if 'nav_up' in ctx:
1118 n = ctx['nav_up']
1119 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1120 if 'nav_prev' in ctx:
1121 n = ctx['nav_prev']
1122 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1123 if 'nav_next' in ctx:
1124 n = ctx['nav_next']
1125 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1126 return ''.join(result)
1129 def generate_nav_links(ctx):
1130 n = ctx['nav_home']
1131 result = [
1132 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1134 if 'nav_up' in ctx:
1135 n = ctx['nav_up']
1136 result.append(
1137 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1138 else:
1139 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1140 if 'nav_prev' in ctx:
1141 n = ctx['nav_prev']
1142 result.append(
1143 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1144 else:
1145 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1146 if 'nav_next' in ctx:
1147 n = ctx['nav_next']
1148 result.append(
1149 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1150 else:
1151 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1153 return ''.join(result)
1156 def generate_toc(ctx, node):
1157 result = []
1158 for c in node.children:
1159 # TODO: urlencode the filename: urllib.parse.quote_plus()
1160 link = c.filename
1161 if c.anchor:
1162 link += c.anchor
1163 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1164 c.title_tag, link, c.title))
1165 if c.subtitle:
1166 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1167 result.append('</dt>\n')
1168 if c.children:
1169 result.append('<dd><dl>')
1170 result.extend(generate_toc(ctx, c))
1171 result.append('</dl></dd>')
1172 return result
1175 def generate_basic_nav(ctx):
1176 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1177 <tr valign="middle">
1178 <td width="100%%" align="left" class="shortcuts"></td>
1180 </tr>
1181 </table>
1182 """ % generate_nav_links(ctx)
1185 def generate_alpha_nav(ctx, divs, prefix, span_id):
1186 ix_nav = []
1187 for s in divs:
1188 title = xml_get_title(ctx, s)
1189 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1191 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1192 <tr valign="middle">
1193 <td width="100%%" align="left" class="shortcuts">
1194 <span id="nav_%s">
1196 </span>
1197 </td>
1199 </tr>
1200 </table>
1201 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1204 def generate_refentry_nav(ctx, refsect1s, result):
1205 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1206 <tr valign="middle">
1207 <td width="100%" align="left" class="shortcuts">
1208 <a href="#" class="shortcut">Top</a>""")
1210 for s in refsect1s:
1211 # don't list TOC sections (role="xxx_proto")
1212 if s.attrib.get('role', '').endswith("_proto"):
1213 continue
1214 # skip section without 'id' attrs
1215 if 'id' not in s.attrib:
1216 continue
1218 ref_id = s.attrib['id']
1219 # skip foreign sections
1220 if '.' not in ref_id:
1221 continue
1223 title = xml_get_title(ctx, s)
1224 span_id = ref_id.split('.')[1].replace('-', '_')
1226 result.append("""
1227 <span id="nav_%s">
1228    <span class="dim">|</span> 
1229 <a href="#%s" class="shortcut">%s</a>
1230 </span>
1231 """ % (span_id, ref_id, title))
1232 result.append("""
1233 </td>
1235 </tr>
1236 </table>
1237 """ % generate_nav_links(ctx))
1240 def generate_footer(ctx):
1241 result = []
1242 if 'footnotes' in ctx:
1243 result.append("""<div class="footnotes">\n
1244 <br><hr style="width:100; text-align:left;margin-left: 0">
1245 """)
1246 for f in ctx['footnotes']:
1247 result.extend(f)
1248 result.append('</div>\n')
1249 return result
1252 def get_id_path(node):
1253 """ Generate the 'id'.
1254 We need to walk up the xml-tree and check the positions for each sibling.
1255 When reaching the top of the tree we collect remaining index entries from
1256 the chunked-tree.
1258 ix = []
1259 xml = node.xml
1260 parent = xml.getparent()
1261 while parent is not None:
1262 children = parent.getchildren()
1263 ix.insert(0, str(children.index(xml) + 1))
1264 xml = parent
1265 parent = xml.getparent()
1266 while node is not None:
1267 ix.insert(0, str(node.idx + 1))
1268 node = node.parent
1270 return ix
1273 def get_id(node):
1274 xml = node.xml
1275 node_id = xml.attrib.get('id', None)
1276 if node_id:
1277 return node_id
1279 # TODO: this is moot if nothing links to it, we could also consider to omit
1280 # the <a name="$id"></a> tag.
1281 logging.info('%d: No "id" attribute on "%s", generating one',
1282 xml.sourceline, xml.tag)
1283 ix = get_id_path(node)
1284 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1285 return 'id-' + '.'.join(ix)
1288 def convert_chunk_with_toc(ctx, div_class, title_tag):
1289 node = ctx['node']
1290 result = [
1291 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1292 generate_basic_nav(ctx),
1293 '<div class="%s">' % div_class,
1295 if node.title:
1296 result.append("""
1297 <div class="titlepage">
1298 <%s class="title"><a name="%s"></a>%s</%s>
1299 </div>""" % (
1300 title_tag, get_id(node), node.title, title_tag))
1302 toc = generate_toc(ctx, node)
1303 if toc:
1304 # TODO: not all docbook page types use this extra heading
1305 result.append("""<p><b>Table of Contents</b></p>
1306 <div class="toc">
1307 <dl class="toc">
1308 """)
1309 result.extend(toc)
1310 result.append("""</dl>
1311 </div>
1312 """)
1313 convert_inner(ctx, node.xml, result)
1314 result.extend(generate_footer(ctx))
1315 result.append("""</div>
1316 </body>
1317 </html>""")
1318 return result
1321 # docbook chunks
1324 def convert_book(ctx):
1325 node = ctx['node']
1326 result = [
1327 HTML_HEADER % (node.title, generate_head_links(ctx)),
1328 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1329 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1330 </table>
1331 <div class="book">
1332 """ % node.title
1334 bookinfo = node.xml.findall('bookinfo')[0]
1335 result.extend(convert_bookinfo(ctx, bookinfo))
1336 result.append("""<div class="toc">
1337 <dl class="toc">
1338 """)
1339 result.extend(generate_toc(ctx, node.root))
1340 result.append("""</dl>
1341 </div>
1342 """)
1343 result.extend(generate_footer(ctx))
1344 result.append("""</div>
1345 </body>
1346 </html>""")
1347 return result
1350 def convert_chapter(ctx):
1351 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1354 def convert_glossary(ctx):
1355 node = ctx['node']
1356 glossdivs = node.xml.findall('glossdiv')
1358 result = [
1359 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1360 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1361 """<div class="glossary">
1362 <div class="titlepage"><h%1d class="title">
1363 <a name="%s"></a>%s</h%1d>
1364 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1366 for i in glossdivs:
1367 result.extend(convert_glossdiv(ctx, i))
1368 result.extend(generate_footer(ctx))
1369 result.append("""</div>
1370 </body>
1371 </html>""")
1372 return result
1375 def convert_index(ctx):
1376 node = ctx['node']
1377 # Get all indexdivs under indexdiv
1378 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1380 result = [
1381 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1382 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1383 """<div class="index">
1384 <div class="titlepage"><h%1d class="title">
1385 <a name="%s"></a>%s</h%1d>
1386 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1388 for i in indexdivs:
1389 result.extend(convert_indexdiv(ctx, i))
1390 result.extend(generate_footer(ctx))
1391 result.append("""</div>
1392 </body>
1393 </html>""")
1394 return result
1397 def convert_part(ctx):
1398 return convert_chunk_with_toc(ctx, 'part', 'h1')
1401 def convert_preface(ctx):
1402 node = ctx['node']
1403 result = [
1404 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1405 generate_basic_nav(ctx),
1406 '<div class="preface">'
1408 if node.title:
1409 result.append("""
1410 <div class="titlepage">
1411 <h2 class="title"><a name="%s"></a>%s</h2>
1412 </div>""" % (get_id(node), node.title))
1413 convert_inner(ctx, node.xml, result)
1414 result.extend(generate_footer(ctx))
1415 result.append("""</div>
1416 </body>
1417 </html>""")
1418 return result
1421 def convert_reference(ctx):
1422 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1425 def convert_refentry(ctx):
1426 node = ctx['node']
1427 node_id = get_id(node)
1428 refsect1s = node.xml.findall('refsect1')
1430 gallery = ''
1431 refmeta = node.xml.find('refmeta')
1432 if refmeta is not None:
1433 refmiscinfo = refmeta.find('refmiscinfo')
1434 if refmiscinfo is not None:
1435 inlinegraphic = refmiscinfo.find('inlinegraphic')
1436 if inlinegraphic is not None:
1437 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1439 result = [
1440 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1442 generate_refentry_nav(ctx, refsect1s, result)
1443 result.append("""
1444 <div class="refentry">
1445 <a name="%s"></a>
1446 <div class="refnamediv">
1447 <table width="100%%"><tr>
1448 <td valign="top">
1449 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1450 <p>%s %s</p>
1451 </td>
1452 <td class="gallery_image" valign="top" align="right">%s</td>
1453 </tr></table>
1454 </div>
1455 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1457 for s in refsect1s:
1458 result.extend(convert_refsect1(ctx, s))
1459 result.extend(generate_footer(ctx))
1460 result.append("""</div>
1461 </body>
1462 </html>""")
1463 return result
1466 def convert_sect1(ctx):
1467 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1470 # TODO(ensonic): turn into class with converters as functions and ctx as self
1471 convert_chunks = {
1472 'book': convert_book,
1473 'chapter': convert_chapter,
1474 'glossary': convert_glossary,
1475 'index': convert_index,
1476 'part': convert_part,
1477 'preface': convert_preface,
1478 'reference': convert_reference,
1479 'refentry': convert_refentry,
1480 'sect1': convert_sect1,
1484 def generate_nav_nodes(files, node):
1485 nav = {
1486 'nav_home': node.root,
1488 # nav params: up, prev, next
1489 if node.parent:
1490 nav['nav_up'] = node.parent
1491 ix = files.index(node)
1492 if ix > 0:
1493 nav['nav_prev'] = files[ix - 1]
1494 if ix < len(files) - 1:
1495 nav['nav_next'] = files[ix + 1]
1496 return nav
1499 def convert(out_dir, module, files, node, src_lang):
1500 """Convert the docbook chunks to a html file.
1502 Args:
1503 out_dir: already created output dir
1504 files: list of nodes in the tree in pre-order
1505 node: current tree node
1508 logging.info('Writing: %s', node.filename)
1509 with open(os.path.join(out_dir, node.filename), 'wt',
1510 newline='\n', encoding='utf-8') as html:
1511 ctx = {
1512 'module': module,
1513 'files': files,
1514 'node': node,
1515 'src-lang': src_lang,
1517 ctx.update(generate_nav_nodes(files, node))
1519 if node.name in convert_chunks:
1520 for line in convert_chunks[node.name](ctx):
1521 html.write(line)
1522 else:
1523 logging.warning('Add converter/template for "%s"', node.name)
1526 def create_devhelp2_toc(node):
1527 result = []
1528 for c in node.children:
1529 if c.children:
1530 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1531 result.extend(create_devhelp2_toc(c))
1532 result.append('</sub>\n')
1533 else:
1534 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1535 return result
1538 def create_devhelp2_condition_attribs(node):
1539 if 'condition' in node.attrib:
1540 # condition -> since, deprecated, ... (separated with '|')
1541 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1542 keywords = []
1543 for c in cond:
1544 if ':' in c:
1545 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1546 else:
1547 # deprecated can have no description
1548 keywords.append('{}="{}"'.format(c, ''))
1549 return ' ' + ' '.join(keywords)
1550 else:
1551 return ''
1554 def create_devhelp2_refsect2_keyword(node, base_link):
1555 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1556 node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1557 create_devhelp2_condition_attribs(node))
1560 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1561 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1562 node.attrib['role'], title, base_link + name,
1563 create_devhelp2_condition_attribs(node))
1566 def create_devhelp2(out_dir, module, xml, files):
1567 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1568 newline='\n', encoding='utf-8') as idx:
1569 bookinfo_nodes = xml.xpath('/book/bookinfo')
1570 title = ''
1571 if bookinfo_nodes is not None:
1572 bookinfo = bookinfo_nodes[0]
1573 title = bookinfo.xpath('./title/text()')[0]
1574 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1575 # TODO: support author too (see devhelp2.xsl)
1576 # TODO: fixxref uses '--src-lang' to set the language
1577 result = [
1578 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1579 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1580 <chapters>
1581 """ % (title, module, online_url)
1583 # toc
1584 result.extend(create_devhelp2_toc(files[0].root))
1585 result.append(""" </chapters>
1586 <functions>
1587 """)
1588 # keywords from all refsect2 and refsect3
1589 refsect2 = etree.XPath('//refsect2[@role]')
1590 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1591 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1592 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1593 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1594 for node in files:
1595 base_link = node.filename + '#'
1596 refsect2_nodes = refsect2(node.xml)
1597 for refsect2_node in refsect2_nodes:
1598 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1599 refsect3_nodes = refsect3_enum(refsect2_node)
1600 for refsect3_node in refsect3_nodes:
1601 details_node = refsect3_enum_details(refsect3_node)[0]
1602 name = details_node.attrib['id']
1603 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1604 refsect3_nodes = refsect3_struct(refsect2_node)
1605 for refsect3_node in refsect3_nodes:
1606 details_node = refsect3_struct_details(refsect3_node)[0]
1607 name = details_node.attrib['id']
1608 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1610 result.append(""" </functions>
1611 </book>
1612 """)
1613 for line in result:
1614 idx.write(line)
1617 def get_dirs(uninstalled):
1618 if uninstalled:
1619 # this does not work from buiddir!=srcdir
1620 gtkdocdir = os.path.split(sys.argv[0])[0]
1621 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1622 # try 'srcdir' (set from makefiles) too
1623 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1624 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1625 styledir = gtkdocdir + '/style'
1626 else:
1627 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1628 styledir = gtkdocdir
1629 return (gtkdocdir, styledir)
1632 def main(module, index_file, out_dir, uninstalled, src_lang):
1634 # == Loading phase ==
1635 # the next 3 steps could be done in paralel
1637 # 1) load the docuemnt
1638 _t = timer()
1639 # does not seem to be faster
1640 # parser = etree.XMLParser(collect_ids=False)
1641 # tree = etree.parse(index_file, parser)
1642 tree = etree.parse(index_file)
1643 tree.xinclude()
1644 logging.warning("1: %7.3lf: load doc", timer() - _t)
1646 # 2) copy datafiles
1647 _t = timer()
1648 # TODO: handle additional images
1649 (gtkdocdir, styledir) = get_dirs(uninstalled)
1650 # copy navigation images and stylesheets to html directory ...
1651 css_file = os.path.join(styledir, 'style.css')
1652 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1653 shutil.copy(f, out_dir)
1654 css_file = os.path.join(out_dir, 'style.css')
1655 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1656 css.write(HTML_FORMATTER.get_style_defs())
1657 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1659 # 3) load xref targets
1660 _t = timer()
1661 # TODO: migrate options from fixxref
1662 # TODO: ideally explicity specify the files we need, this will save us the
1663 # globbing and we'll load less files.
1664 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1665 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1667 # == Processing phase ==
1669 # 4) recursively walk the tree and chunk it into a python tree so that we
1670 # can generate navigation and link tags.
1671 _t = timer()
1672 files = chunk(tree.getroot(), module)
1673 files = [f for f in PreOrderIter(files) if f.anchor is None]
1674 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1676 # 5) extract tables:
1677 _t = timer()
1678 # TODO: can be done in parallel
1679 # - find all 'id' attribs and add them to the link map
1680 # - .. get their titles and store them into the titles map
1681 add_id_links_and_titles(files, fixxref.Links)
1682 # - build glossary dict
1683 build_glossary(files)
1684 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1686 # == Output phase ==
1687 # the next two step could be done in parllel
1689 # 6) create a xxx.devhelp2 file
1690 _t = timer()
1691 create_devhelp2(out_dir, module, tree.getroot(), files)
1692 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1694 # 7) iterate the tree and output files
1695 _t = timer()
1696 # TODO: can be done in parallel, figure out why this is not faster
1697 # from multiprocessing.pool import Pool
1698 # with Pool(4) as p:
1699 # p.apply_async(convert, args=(out_dir, module, files))
1700 # from multiprocessing.pool import ThreadPool
1701 # with ThreadPool(4) as p:
1702 # p.apply_async(convert, args=(out_dir, module, files))
1703 for node in files:
1704 convert(out_dir, module, files, node, src_lang)
1705 logging.warning("7: %7.3lf: create html", timer() - _t)
1708 def run(options):
1709 logging.info('options: %s', str(options.__dict__))
1710 module = options.args[0]
1711 document = options.args[1]
1713 # TODO: rename to 'html' later on
1714 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1715 # outputs into the working directory
1716 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1717 try:
1718 os.mkdir(out_dir)
1719 except OSError as e:
1720 if e.errno != errno.EEXIST:
1721 raise
1723 sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang))