c10e-html: strip more stuff
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob717b640b8f036312b492177fa724b7138237469d
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - check each docbook tag if it can contain #PCDATA, if not don't check for
43 xml.text
44 - consider some perf-warnings flag
45 - see 'No "id" attribute on'
46 - find a better way to print context for warnings
47 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
49 DIFFERENCES:
50 - titles
51 - we add the chunk label to both title in toc and tile on the page
52 - docbook xsl only sometimes adds the label to the titles and when it does it
53 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
54 - navigation
55 - we always add an up-link except on the first page
57 OPTIONAL:
58 - minify html: https://pypi.python.org/pypi/htmlmin/
60 Requirements:
61 sudo pip3 install anytree lxml pygments
63 Example invocation:
64 cd tests/bugs/docs/
65 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
66 xdg-open db2html/index.html
67 meld html db2html
69 Benchmarking:
70 cd tests/bugs/docs/;
71 rm html-build.stamp; time make html-build.stamp
72 """
74 import argparse
75 import errno
76 import logging
77 import os
78 import shutil
79 import sys
81 from anytree import Node, PreOrderIter
82 from copy import deepcopy
83 from glob import glob
84 from lxml import etree
85 from pygments import highlight
86 from pygments.lexers import CLexer
87 from pygments.formatters import HtmlFormatter
88 from timeit import default_timer as timer
90 from . import config, fixxref
92 # pygments setup
93 # lazily constructed lexer cache
94 LEXERS = {
95 'c': CLexer()
97 HTML_FORMATTER = HtmlFormatter(nowrap=True)
100 class ChunkParams(object):
101 def __init__(self, prefix, parent=None, min_idx=0):
102 self.prefix = prefix
103 self.parent = parent
104 self.min_idx = min_idx
105 self.idx = 1
108 DONT_CHUNK = float('inf')
109 # docbook-xsl defines the chunk tags here.
110 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
111 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
112 # If not defined, we can just create an example without an 'id' attr and see
113 # docbook xsl does.
115 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
116 # TODO: this list has also a flag that controls wheter we add the
117 # 'Table of Contents' heading in convert_chunk_with_toc()
118 CHUNK_PARAMS = {
119 'appendix': ChunkParams('app', 'book'),
120 'book': ChunkParams('bk'),
121 'chapter': ChunkParams('ch', 'book'),
122 'glossary': ChunkParams('go', 'book'),
123 'index': ChunkParams('ix', 'book'),
124 'part': ChunkParams('pt', 'book'),
125 'preface': ChunkParams('pr', 'book'),
126 'refentry': ChunkParams('re', 'book'),
127 'reference': ChunkParams('rn', 'book'),
128 'sect1': ChunkParams('s', 'chapter', 1),
129 'section': ChunkParams('s', 'chapter', 1),
130 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
131 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
132 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
133 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
135 # TAGS we don't support:
136 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
138 TITLE_XPATHS = {
139 '_': (etree.XPath('./title'), None),
140 'book': (etree.XPath('./bookinfo/title'), None),
141 'refentry': (
142 etree.XPath('./refmeta/refentrytitle'),
143 etree.XPath('./refnamediv/refpurpose')
147 ID_XPATH = etree.XPath('//*[@id]')
149 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
150 glossary = {}
152 footnote_idx = 1
154 # nested dict with subkeys:
155 # title: textual title
156 # tag: chunk tag
157 # xml: title xml node
158 titles = {}
161 def gen_chunk_name(node, chunk_params):
162 """Generate a chunk file name
164 This is either based on the id or on the position in the doc. In the latter
165 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
166 type.
168 if 'id' in node.attrib:
169 return node.attrib['id']
171 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
172 chunk_params.idx += 1
174 # handle parents to make names of nested tags like in docbook
175 # - we only need to prepend the parent if there are > 1 of them in the
176 # xml. None, the parents we have are not sufficient, e.g. 'index' can
177 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
178 # when we chunk explicitly and on each level maintain the 'idx'
179 # while chunk_params.parent:
180 # parent = chunk_params.parent
181 # if parent not in CHUNK_PARAMS:
182 # break;
183 # chunk_params = CHUNK_PARAMS[parent]
184 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
186 logging.info('Gen chunk name: "%s"', name)
187 return name
190 def get_chunk_titles(module, node):
191 tag = node.tag
192 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
194 ctx = {
195 'module': module,
196 'files': [],
198 result = {
199 'title': None,
200 'title_tag': None,
201 'subtitle': None,
202 'subtitle_tag': None
204 res = title(node)
205 if res:
206 # handle chunk label for tocs
207 label = node.attrib.get('label')
208 if label:
209 label += '. '
210 else:
211 label = ''
213 xml = res[0]
214 result['title'] = label + ''.join(convert_title(ctx, xml))
215 if xml.tag != 'title':
216 result['title_tag'] = xml.tag
217 else:
218 result['title_tag'] = tag
220 if subtitle:
221 res = subtitle(node)
222 if res:
223 xml = res[0]
224 result['subtitle'] = ''.join(convert_title(ctx, xml))
225 result['subtitle_tag'] = xml.tag
226 return result
229 def chunk(xml_node, module, depth=0, idx=0, parent=None):
230 """Chunk the tree.
232 The first time, we're called with parent=None and in that case we return
233 the new_node as the root of the tree. For each tree-node we generate a
234 filename and process the children.
236 tag = xml_node.tag
237 chunk_params = CHUNK_PARAMS.get(tag)
238 if chunk_params:
239 title_args = get_chunk_titles(module, xml_node)
240 chunk_name = gen_chunk_name(xml_node, chunk_params)
242 # check idx to handle 'sect1'/'section' special casing and title-only
243 # segments
244 if idx >= chunk_params.min_idx:
245 logging.info('chunk tag: "%s"[%d]', tag, idx)
246 if parent:
247 # remove the xml-node from the parent
248 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
249 xml_node.getparent().remove(xml_node)
250 xml_node = sub_tree
252 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
253 idx=idx,
254 filename=chunk_name + '.html', anchor=None,
255 **title_args)
256 else:
257 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
258 idx=idx,
259 filename=parent.filename, anchor='#' + chunk_name,
260 **title_args)
262 depth += 1
263 idx = 0
264 for child in xml_node:
265 chunk(child, module, depth, idx, parent)
266 if child.tag in CHUNK_PARAMS:
267 idx += 1
269 return parent
272 def add_id_links_and_titles(files, links):
273 for node in files:
274 chunk_name = node.filename[:-5]
275 chunk_base = node.filename + '#'
276 for elem in ID_XPATH(node.xml):
277 attr = elem.attrib['id']
278 if attr == chunk_name:
279 links[attr] = node.filename
280 else:
281 links[attr] = chunk_base + attr
283 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
284 res = title(elem)
285 if res:
286 xml = res[0]
287 # TODO: consider to eval those lazily
288 titles[attr] = {
289 'title': etree.tostring(xml, method="text", encoding=str).strip(),
290 'xml': xml,
291 'tag': elem.tag,
295 def build_glossary(files):
296 for node in files:
297 if node.xml.tag != 'glossary':
298 continue
299 for term in GLOSSENTRY_XPATH(node.xml):
300 # TODO: there can be all kind of things in a glossary. This only supports
301 # what we commonly use
302 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
303 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
304 glossary[key] = value
305 # logging.debug('glosentry: %s:%s', key, value)
308 # conversion helpers
311 def convert_inner(ctx, xml, result):
312 for child in xml:
313 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
316 def convert_ignore(ctx, xml):
317 result = []
318 convert_inner(ctx, xml, result)
319 return result
322 def convert_skip(ctx, xml):
323 return []
326 def append_text(text, result):
327 if text and text.strip():
328 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
331 missing_tags = {}
334 def convert__unknown(ctx, xml):
335 # don't recurse on subchunks
336 if xml.tag in CHUNK_PARAMS:
337 return []
338 if isinstance(xml, etree._Comment):
339 return ['<!-- ' + xml.text + '-->\n']
340 else:
341 # warn only once
342 if xml.tag not in missing_tags:
343 logging.warning('Add tag converter for "%s"', xml.tag)
344 missing_tags[xml.tag] = True
345 result = ['<!-- ' + xml.tag + '-->\n']
346 convert_inner(ctx, xml, result)
347 result.append('<!-- /' + xml.tag + '-->\n')
348 return result
351 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
352 result = ['<div class="%s">\n' % xml.tag]
353 title = xml.find('title')
354 if title is not None:
355 if 'id' in xml.attrib:
356 result.append('<a name="%s"></a>' % xml.attrib['id'])
357 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
358 append_text(xml.text, result)
359 inner_func(ctx, xml, result)
360 result.append('</div>')
361 append_text(xml.tail, result)
362 return result
365 def xml_get_title(ctx, xml):
366 title_tag = xml.find('title')
367 if title_tag is not None:
368 return ''.join(convert_title(ctx, title_tag))
369 else:
370 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
371 return ''
374 # docbook tags
377 def convert_abstract(ctx, xml):
378 result = ["""<div class="abstract">
379 <p class="title"><b>Abstract</b></p>"""]
380 append_text(xml.text, result)
381 convert_inner(ctx, xml, result)
382 result.append('</div>')
383 append_text(xml.tail, result)
384 return result
387 def convert_acronym(ctx, xml):
388 key = xml.text
389 title = glossary.get(key, '')
390 # TODO: print a sensible warning if missing
391 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
392 if xml.tail:
393 result.append(xml.tail)
394 return result
397 def convert_anchor(ctx, xml):
398 return ['<a name="%s"></a>' % xml.attrib['id']]
401 def convert_bookinfo(ctx, xml):
402 result = ['<div class="titlepage">']
403 convert_inner(ctx, xml, result)
404 result.append("""<hr>
405 </div>""")
406 if xml.tail:
407 result.append(xml.tail)
408 return result
411 def convert_blockquote(ctx, xml):
412 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
413 append_text(xml.text, result)
414 convert_inner(ctx, xml, result)
415 result.append('</blockquote>\n</div>')
416 append_text(xml.tail, result)
417 return result
420 def convert_code(ctx, xml):
421 result = ['<code class="%s">' % xml.tag]
422 append_text(xml.text, result)
423 convert_inner(ctx, xml, result)
424 result.append('</code>')
425 append_text(xml.tail, result)
426 return result
429 def convert_colspec(ctx, xml):
430 result = ['<col']
431 a = xml.attrib
432 if 'colname' in a:
433 result.append(' class="%s"' % a['colname'])
434 if 'colwidth' in a:
435 result.append(' width="%s"' % a['colwidth'])
436 result.append('>\n')
437 # is in tgroup and there can be no 'text'
438 return result
441 def convert_command(ctx, xml):
442 result = ['<strong class="userinput"><code>']
443 append_text(xml.text, result)
444 convert_inner(ctx, xml, result)
445 result.append('</code></strong>')
446 append_text(xml.tail, result)
447 return result
450 def convert_corpauthor(ctx, xml):
451 result = ['<div><h3 class="corpauthor">\n']
452 append_text(xml.text, result)
453 convert_inner(ctx, xml, result)
454 result.append('</h3></div>\n')
455 append_text(xml.tail, result)
456 return result
459 def convert_div(ctx, xml):
460 result = ['<div class="%s">\n' % xml.tag]
461 append_text(xml.text, result)
462 convert_inner(ctx, xml, result)
463 result.append('</div>')
464 append_text(xml.tail, result)
465 return result
468 def convert_emphasis(ctx, xml):
469 result = ['<span class="emphasis"><em>']
470 append_text(xml.text, result)
471 convert_inner(ctx, xml, result)
472 result.append('</em></span>')
473 append_text(xml.tail, result)
474 return result
477 def convert_em_class(ctx, xml):
478 result = ['<em class="%s"><code>' % xml.tag]
479 append_text(xml.text, result)
480 convert_inner(ctx, xml, result)
481 result.append('</code></em>')
482 append_text(xml.tail, result)
483 return result
486 def convert_entry(ctx, xml):
487 entry_type = ctx['table.entry']
488 result = ['<' + entry_type]
489 if 'role' in xml.attrib:
490 result.append(' class="%s"' % xml.attrib['role'])
491 if 'morerows' in xml.attrib:
492 result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
493 result.append('>')
494 append_text(xml.text, result)
495 convert_inner(ctx, xml, result)
496 result.append('</' + entry_type + '>')
497 append_text(xml.tail, result)
498 return result
501 def convert_footnote(ctx, xml):
502 footnotes = ctx.get('footnotes', [])
503 # footnotes idx is not per page, but per doc
504 global footnote_idx
505 idx = footnote_idx
506 footnote_idx += 1
508 # need a pair of ids for each footnote (docbook generates different ids)
509 this_id = 'footnote-%d' % idx
510 that_id = 'ftn.' + this_id
512 inner = ['<div id="%s" class="footnote">' % that_id]
513 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
514 this_id, idx))
515 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
516 # get double nested paras :/.
517 # convert_inner(ctx, xml, inner)
518 para = xml.find('para')
519 if para is None:
520 para = xml.find('simpara')
521 if para is not None:
522 inner.append(para.text)
523 else:
524 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
525 etree.tostring(xml, method="text", encoding=str).strip())
526 inner.append('</p></div>')
527 footnotes.append(inner)
528 ctx['footnotes'] = footnotes
529 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
530 that_id, this_id, idx)]
533 def convert_formalpara(ctx, xml):
534 result = None
535 title_tag = xml.find('title')
536 result = ['<p><b>%s</b>' % title_tag.text]
537 para_tag = xml.find('para')
538 append_text(para_tag.text, result)
539 convert_inner(ctx, para_tag, result)
540 append_text(para_tag.tail, result)
541 result.append('</p>')
542 append_text(xml.tail, result)
543 return result
546 def convert_glossdef(ctx, xml):
547 result = ['<dd class="glossdef">']
548 convert_inner(ctx, xml, result)
549 result.append('</dd>\n')
550 return result
553 def convert_glossdiv(ctx, xml):
554 title_tag = xml.find('title')
555 title = title_tag.text
556 xml.remove(title_tag)
557 result = [
558 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
560 convert_inner(ctx, xml, result)
561 return result
564 def convert_glossentry(ctx, xml):
565 result = []
566 convert_inner(ctx, xml, result)
567 return result
570 def convert_glossterm(ctx, xml):
571 glossid = ''
572 text = ''
573 anchor = xml.find('anchor')
574 if anchor is not None:
575 glossid = anchor.attrib.get('id', '')
576 text += anchor.tail or ''
577 text += xml.text or ''
578 if glossid == '':
579 glossid = 'glossterm-' + text
580 return [
581 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
582 glossid, text)
586 def convert_imageobject(ctx, xml):
587 imagedata = xml.find('imagedata')
588 if imagedata is not None:
589 # TODO(ensonic): warn on missing fileref attr?
590 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
591 else:
592 return []
595 def convert_indexdiv(ctx, xml):
596 title_tag = xml.find('title')
597 title = title_tag.text
598 xml.remove(title_tag)
599 result = [
600 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
602 convert_inner(ctx, xml, result)
603 return result
606 def convert_informaltable(ctx, xml):
607 result = ['<div class="informaltable"><table class="informaltable"']
608 a = xml.attrib
609 if 'pgwide' in a and a['pgwide'] == '1':
610 result.append(' width="100%"')
611 if 'frame' in a and a['frame'] == 'none':
612 result.append(' border="0"')
613 result.append('>\n')
614 convert_inner(ctx, xml, result)
615 result.append('</table></div>')
616 if xml.tail:
617 result.append(xml.tail)
618 return result
621 def convert_inlinegraphic(ctx, xml):
622 # TODO(ensonic): warn on missing fileref attr?
623 return ['<img src="%s">' % xml.attrib.get('fileref', '')]
626 def convert_itemizedlist(ctx, xml):
627 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
628 convert_inner(ctx, xml, result)
629 result.append('</ul></div>')
630 if xml.tail:
631 result.append(xml.tail)
632 return result
635 def convert_link(ctx, xml):
636 linkend = xml.attrib['linkend']
637 if linkend in fixxref.NoLinks:
638 linkend = None
639 result = []
640 if linkend:
641 link_text = []
642 append_text(xml.text, link_text)
643 convert_inner(ctx, xml, link_text)
644 text = ''.join(link_text)
646 (tid, href) = fixxref.GetXRef(linkend)
647 if href:
648 title_attr = ''
649 title = titles.get(tid)
650 if title:
651 title_attr = ' title="%s"' % title['title']
653 href = fixxref.MakeRelativeXRef(ctx['module'], href)
654 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
655 else:
656 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
657 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
658 result = [text]
659 else:
660 append_text(xml.text, result)
661 convert_inner(ctx, xml, result)
662 append_text(xml.tail, result)
663 return result
666 def convert_listitem(ctx, xml):
667 result = ['<li class="listitem">']
668 convert_inner(ctx, xml, result)
669 result.append('</li>')
670 # is in itemizedlist and there can be no 'text'
671 return result
674 def convert_literallayout(ctx, xml):
675 result = ['<div class="literallayout"><p><br>\n']
676 append_text(xml.text, result)
677 convert_inner(ctx, xml, result)
678 result.append('</p></div>')
679 append_text(xml.tail, result)
680 return result
683 def convert_orderedlist(ctx, xml):
684 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
685 convert_inner(ctx, xml, result)
686 result.append('</ol></div>')
687 append_text(xml.tail, result)
688 return result
691 def convert_para(ctx, xml):
692 result = []
693 if 'id' in xml.attrib:
694 result.append('<a name="%s"></a>' % xml.attrib['id'])
695 if 'role' in xml.attrib:
696 result.append('<p class="%s">' % xml.attrib['role'])
697 else:
698 result.append('<p>')
699 append_text(xml.text, result)
700 convert_inner(ctx, xml, result)
701 result.append('</p>')
702 append_text(xml.tail, result)
703 return result
706 def convert_para_like(ctx, xml):
707 result = []
708 if 'id' in xml.attrib:
709 result.append('<a name="%s"></a>' % xml.attrib['id'])
710 result.append('<p class="%s">' % xml.tag)
711 append_text(xml.text, result)
712 convert_inner(ctx, xml, result)
713 result.append('</p>')
714 append_text(xml.tail, result)
715 return result
718 def convert_phrase(ctx, xml):
719 result = ['<span']
720 if 'role' in xml.attrib:
721 result.append(' class="%s">' % xml.attrib['role'])
722 else:
723 result.append('>')
724 append_text(xml.text, result)
725 convert_inner(ctx, xml, result)
726 result.append('</span>')
727 append_text(xml.tail, result)
728 return result
731 def convert_primaryie(ctx, xml):
732 result = ['<dt>\n']
733 convert_inner(ctx, xml, result)
734 result.append('\n</dt>\n<dd></dd>\n')
735 return result
738 def convert_pre(ctx, xml):
739 result = ['<pre class="%s">\n' % xml.tag]
740 append_text(xml.text, result)
741 convert_inner(ctx, xml, result)
742 result.append('</pre>')
743 append_text(xml.tail, result)
744 return result
747 def convert_programlisting(ctx, xml):
748 result = []
749 if xml.attrib.get('role', '') == 'example':
750 if xml.text:
751 lang = xml.attrib.get('language', 'c').lower()
752 if lang not in LEXERS:
753 LEXERS[lang] = get_lexer_by_name(lang)
754 lexer = LEXERS.get(lang, None)
755 if lexer:
756 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
758 # we do own line-numbering
759 line_count = highlighted.count('\n')
760 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
761 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
762 <tbody>
763 <tr>
764 <td class="listing_lines" align="right"><pre>%s</pre></td>
765 <td class="listing_code"><pre class="programlisting">%s</pre></td>
766 </tr>
767 </tbody>
768 </table>
769 """ % (source_lines, highlighted))
770 else:
771 logging.warn('No pygments lexer for language="%s"', lang)
772 result.append('<pre class="programlisting">')
773 result.append(xml.text)
774 result.append('</pre>')
775 else:
776 result.append('<pre class="programlisting">')
777 append_text(xml.text, result)
778 convert_inner(ctx, xml, result)
779 result.append('</pre>')
780 append_text(xml.tail, result)
781 return result
784 def convert_quote(ctx, xml):
785 result = ['<span class="quote">"<span class="quote">']
786 append_text(xml.text, result)
787 convert_inner(ctx, xml, result)
788 result.append('</span>"</span>')
789 append_text(xml.tail, result)
790 return result
793 def convert_refsect1(ctx, xml):
794 # Add a divider between two consequitive refsect2
795 def convert_inner(ctx, xml, result):
796 prev = None
797 for child in xml:
798 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
799 result.append('<hr>\n')
800 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
801 prev = child
802 return convert_sect(ctx, xml, 'h2', convert_inner)
805 def convert_refsect2(ctx, xml):
806 return convert_sect(ctx, xml, 'h3')
809 def convert_refsect3(ctx, xml):
810 return convert_sect(ctx, xml, 'h4')
813 def convert_row(ctx, xml):
814 result = ['<tr>\n']
815 convert_inner(ctx, xml, result)
816 result.append('</tr>\n')
817 return result
820 def convert_sect1_tag(ctx, xml):
821 return convert_sect(ctx, xml, 'h2')
824 def convert_sect2(ctx, xml):
825 return convert_sect(ctx, xml, 'h3')
828 def convert_sect3(ctx, xml):
829 return convert_sect(ctx, xml, 'h4')
832 def convert_simpara(ctx, xml):
833 result = ['<p>']
834 append_text(xml.text, result)
835 convert_inner(ctx, xml, result)
836 result.append('</p>')
837 append_text(xml.tail, result)
838 return result
841 def convert_span(ctx, xml):
842 result = ['<span class="%s">' % xml.tag]
843 append_text(xml.text, result)
844 convert_inner(ctx, xml, result)
845 result.append('</span>')
846 append_text(xml.tail, result)
847 return result
850 def convert_table(ctx, xml):
851 result = ['<div class="table">']
852 if 'id' in xml.attrib:
853 result.append('<a name="%s"></a>' % xml.attrib['id'])
854 title_tag = xml.find('title')
855 if title_tag is not None:
856 result.append('<p class="title"><b>')
857 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
858 result.extend(convert_title(ctx, title_tag))
859 result.append('</b></p>')
860 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
862 convert_inner(ctx, xml, result)
864 result.append('</table></div></div>')
865 append_text(xml.tail, result)
866 return result
869 def convert_tbody(ctx, xml):
870 result = ['<tbody>']
871 ctx['table.entry'] = 'td'
872 convert_inner(ctx, xml, result)
873 result.append('</tbody>')
874 # is in tgroup and there can be no 'text'
875 return result
878 def convert_tgroup(ctx, xml):
879 # tgroup does not expand to anything, but the nested colspecs need to
880 # be put into a colgroup
881 cols = xml.findall('colspec')
882 result = []
883 if cols:
884 result.append('<colgroup>\n')
885 for col in cols:
886 result.extend(convert_colspec(ctx, col))
887 xml.remove(col)
888 result.append('</colgroup>\n')
889 convert_inner(ctx, xml, result)
890 # is in informaltable and there can be no 'text'
891 return result
894 def convert_thead(ctx, xml):
895 result = ['<thead>']
896 ctx['table.entry'] = 'th'
897 convert_inner(ctx, xml, result)
898 result.append('</thead>')
899 # is in tgroup and there can be no 'text'
900 return result
903 def convert_title(ctx, xml):
904 # This is always explicitly called from some context
905 result = []
906 append_text(xml.text, result)
907 convert_inner(ctx, xml, result)
908 append_text(xml.tail, result)
909 return result
912 def convert_ulink(ctx, xml):
913 if xml.text:
914 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
915 else:
916 url = xml.attrib['url']
917 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
918 append_text(xml.tail, result)
919 return result
922 def convert_userinput(ctx, xml):
923 result = ['<span class="command"><strong>']
924 append_text(xml.text, result)
925 convert_inner(ctx, xml, result)
926 result.append('</strong></span>')
927 append_text(xml.tail, result)
928 return result
931 def convert_variablelist(ctx, xml):
932 result = ["""<div class="variablelist"><table border="0" class="variablelist">
933 <colgroup>
934 <col align="left" valign="top">
935 <col>
936 </colgroup>
937 <tbody>"""]
938 convert_inner(ctx, xml, result)
939 result.append("""</tbody>
940 </table></div>""")
941 return result
944 def convert_varlistentry(ctx, xml):
945 result = ['<tr>']
947 result.append('<td><p>')
948 term = xml.find('term')
949 result.extend(convert_span(ctx, term))
950 result.append('</p></td>')
952 result.append('<td>')
953 listitem = xml.find('listitem')
954 convert_inner(ctx, listitem, result)
955 result.append('</td>')
957 result.append('<tr>')
958 return result
961 def convert_xref(ctx, xml):
962 linkend = xml.attrib['linkend']
963 (tid, href) = fixxref.GetXRef(linkend)
964 title = titles.get(tid)
965 # all sectN need to become 'section
966 tag = title['tag']
967 tag = {
968 'sect1': 'section',
969 'sect2': 'section',
970 'sect3': 'section',
971 'sect4': 'section',
972 'sect5': 'section',
973 }.get(tag, tag)
974 result = [
975 '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
976 (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
979 append_text(xml.tail, result)
980 return result
983 # TODO(ensonic): turn into class with converters as functions and ctx as self
984 convert_tags = {
985 'abstract': convert_abstract,
986 'acronym': convert_acronym,
987 'anchor': convert_anchor,
988 'application': convert_span,
989 'bookinfo': convert_bookinfo,
990 'blockquote': convert_blockquote,
991 'caption': convert_div,
992 'code': convert_code,
993 'colspec': convert_colspec,
994 'constant': convert_code,
995 'command': convert_command,
996 'corpauthor': convert_corpauthor,
997 'emphasis': convert_emphasis,
998 'entry': convert_entry,
999 'envar': convert_code,
1000 'footnote': convert_footnote,
1001 'filename': convert_code,
1002 'formalpara': convert_formalpara,
1003 'function': convert_code,
1004 'glossdef': convert_glossdef,
1005 'glossdiv': convert_glossdiv,
1006 'glossentry': convert_glossentry,
1007 'glossterm': convert_glossterm,
1008 'imageobject': convert_imageobject,
1009 'indexdiv': convert_indexdiv,
1010 'indexentry': convert_ignore,
1011 'indexterm': convert_skip,
1012 'informalexample': convert_div,
1013 'informaltable': convert_informaltable,
1014 'inlinegraphic': convert_inlinegraphic,
1015 'inlinemediaobject': convert_span,
1016 'itemizedlist': convert_itemizedlist,
1017 'legalnotice': convert_div,
1018 'link': convert_link,
1019 'listitem': convert_listitem,
1020 'literal': convert_code,
1021 'literallayout': convert_literallayout,
1022 'mediaobject': convert_div,
1023 'note': convert_div,
1024 'option': convert_code,
1025 'orderedlist': convert_orderedlist,
1026 'para': convert_para,
1027 'partintro': convert_div,
1028 'parameter': convert_em_class,
1029 'phrase': convert_phrase,
1030 'primaryie': convert_primaryie,
1031 'programlisting': convert_programlisting,
1032 'quote': convert_quote,
1033 'releaseinfo': convert_para_like,
1034 'refsect1': convert_refsect1,
1035 'refsect2': convert_refsect2,
1036 'refsect3': convert_refsect3,
1037 'replaceable': convert_em_class,
1038 'returnvalue': convert_span,
1039 'row': convert_row,
1040 'screen': convert_pre,
1041 'sect1': convert_sect1_tag,
1042 'sect2': convert_sect2,
1043 'sect3': convert_sect3,
1044 'simpara': convert_simpara,
1045 'structfield': convert_em_class,
1046 'structname': convert_span,
1047 'synopsis': convert_pre,
1048 'symbol': convert_span,
1049 'table': convert_table,
1050 'tbody': convert_tbody,
1051 'term': convert_span,
1052 'tgroup': convert_tgroup,
1053 'thead': convert_thead,
1054 'title': convert_skip,
1055 'type': convert_span,
1056 'ulink': convert_ulink,
1057 'userinput': convert_userinput,
1058 'varname': convert_code,
1059 'variablelist': convert_variablelist,
1060 'varlistentry': convert_varlistentry,
1061 'warning': convert_div,
1062 'xref': convert_xref,
1065 # conversion helpers
1067 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1068 <html>
1069 <head>
1070 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1071 <title>%s</title>
1072 %s<link rel="stylesheet" href="style.css" type="text/css">
1073 </head>
1074 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1078 def generate_head_links(ctx):
1079 n = ctx['nav_home']
1080 result = [
1081 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1083 if 'nav_up' in ctx:
1084 n = ctx['nav_up']
1085 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1086 if 'nav_prev' in ctx:
1087 n = ctx['nav_prev']
1088 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1089 if 'nav_next' in ctx:
1090 n = ctx['nav_next']
1091 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1092 return ''.join(result)
1095 def generate_nav_links(ctx):
1096 n = ctx['nav_home']
1097 result = [
1098 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1100 if 'nav_up' in ctx:
1101 n = ctx['nav_up']
1102 result.append(
1103 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1104 else:
1105 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1106 if 'nav_prev' in ctx:
1107 n = ctx['nav_prev']
1108 result.append(
1109 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1110 else:
1111 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1112 if 'nav_next' in ctx:
1113 n = ctx['nav_next']
1114 result.append(
1115 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1116 else:
1117 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1119 return ''.join(result)
1122 def generate_toc(ctx, node):
1123 result = []
1124 for c in node.children:
1125 # TODO: urlencode the filename: urllib.parse.quote_plus()
1126 link = c.filename
1127 if c.anchor:
1128 link += c.anchor
1129 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1130 c.title_tag, link, c.title))
1131 if c.subtitle:
1132 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1133 result.append('</dt>\n')
1134 if c.children:
1135 result.append('<dd><dl>')
1136 result.extend(generate_toc(ctx, c))
1137 result.append('</dl></dd>')
1138 return result
1141 def generate_basic_nav(ctx):
1142 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1143 <tr valign="middle">
1144 <td width="100%%" align="left" class="shortcuts"></td>
1146 </tr>
1147 </table>
1148 """ % generate_nav_links(ctx)
1151 def generate_alpha_nav(ctx, divs, prefix, span_id):
1152 ix_nav = []
1153 for s in divs:
1154 title = xml_get_title(ctx, s)
1155 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1157 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1158 <tr valign="middle">
1159 <td width="100%%" align="left" class="shortcuts">
1160 <span id="nav_%s">
1162 </span>
1163 </td>
1165 </tr>
1166 </table>
1167 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1170 def generate_refentry_nav(ctx, refsect1s, result):
1171 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1172 <tr valign="middle">
1173 <td width="100%" align="left" class="shortcuts">
1174 <a href="#" class="shortcut">Top</a>""")
1176 for s in refsect1s:
1177 # don't list TOC sections (role="xxx_proto")
1178 if s.attrib.get('role', '').endswith("_proto"):
1179 continue
1180 # skip section without 'id' attrs
1181 if 'id' not in s.attrib:
1182 continue
1184 ref_id = s.attrib['id']
1185 # skip foreign sections
1186 if '.' not in ref_id:
1187 continue
1189 title = xml_get_title(ctx, s)
1190 span_id = ref_id.split('.')[1].replace('-', '_')
1192 result.append("""
1193 <span id="nav_%s">
1194    <span class="dim">|</span> 
1195 <a href="#%s" class="shortcut">%s</a>
1196 </span>
1197 """ % (span_id, ref_id, title))
1198 result.append("""
1199 </td>
1201 </tr>
1202 </table>
1203 """ % generate_nav_links(ctx))
1206 def generate_footer(ctx):
1207 result = []
1208 if 'footnotes' in ctx:
1209 result.append("""<div class="footnotes">\n
1210 <br><hr style="width:100; text-align:left;margin-left: 0">
1211 """)
1212 for f in ctx['footnotes']:
1213 result.extend(f)
1214 result.append('</div>\n')
1215 return result
1218 def get_id_path(node):
1219 """ Generate the 'id'.
1220 We need to walk up the xml-tree and check the positions for each sibling.
1221 When reaching the top of the tree we collect remaining index entries from
1222 the chunked-tree.
1224 ix = []
1225 xml = node.xml
1226 parent = xml.getparent()
1227 while parent is not None:
1228 children = parent.getchildren()
1229 ix.insert(0, str(children.index(xml) + 1))
1230 xml = parent
1231 parent = xml.getparent()
1232 while node is not None:
1233 ix.insert(0, str(node.idx + 1))
1234 node = node.parent
1236 return ix
1239 def get_id(node):
1240 xml = node.xml
1241 node_id = xml.attrib.get('id', None)
1242 if node_id:
1243 return node_id
1245 # TODO: this is moot if nothing links to it, we could also consider to omit
1246 # the <a name="$id"></a> tag.
1247 logging.info('%d: No "id" attribute on "%s", generating one',
1248 xml.sourceline, xml.tag)
1249 ix = get_id_path(node)
1250 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1251 return 'id-' + '.'.join(ix)
1254 def convert_chunk_with_toc(ctx, div_class, title_tag):
1255 node = ctx['node']
1256 result = [
1257 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1258 generate_basic_nav(ctx),
1259 '<div class="%s">' % div_class,
1261 if node.title:
1262 result.append("""
1263 <div class="titlepage">
1264 <%s class="title"><a name="%s"></a>%s</%s>
1265 </div>""" % (
1266 title_tag, get_id(node), node.title, title_tag))
1268 toc = generate_toc(ctx, node)
1269 if toc:
1270 # TODO: not all docbook page types use this extra heading
1271 result.append("""<p><b>Table of Contents</b></p>
1272 <div class="toc">
1273 <dl class="toc">
1274 """)
1275 result.extend(toc)
1276 result.append("""</dl>
1277 </div>
1278 """)
1279 convert_inner(ctx, node.xml, result)
1280 result.extend(generate_footer(ctx))
1281 result.append("""</div>
1282 </body>
1283 </html>""")
1284 return result
1287 # docbook chunks
1290 def convert_book(ctx):
1291 node = ctx['node']
1292 result = [
1293 HTML_HEADER % (node.title, generate_head_links(ctx)),
1294 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1295 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1296 </table>
1297 <div class="book">
1298 """ % node.title
1300 bookinfo = node.xml.findall('bookinfo')[0]
1301 result.extend(convert_bookinfo(ctx, bookinfo))
1302 result.append("""<div class="toc">
1303 <dl class="toc">
1304 """)
1305 result.extend(generate_toc(ctx, node.root))
1306 result.append("""</dl>
1307 </div>
1308 """)
1309 result.extend(generate_footer(ctx))
1310 result.append("""</div>
1311 </body>
1312 </html>""")
1313 return result
1316 def convert_chapter(ctx):
1317 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1320 def convert_glossary(ctx):
1321 node = ctx['node']
1322 glossdivs = node.xml.findall('glossdiv')
1324 result = [
1325 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1326 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1327 """<div class="glossary">
1328 <div class="titlepage"><h%1d class="title">
1329 <a name="%s"></a>%s</h%1d>
1330 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1332 for i in glossdivs:
1333 result.extend(convert_glossdiv(ctx, i))
1334 result.extend(generate_footer(ctx))
1335 result.append("""</div>
1336 </body>
1337 </html>""")
1338 return result
1341 def convert_index(ctx):
1342 node = ctx['node']
1343 # Get all indexdivs under indexdiv
1344 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1346 result = [
1347 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1348 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1349 """<div class="index">
1350 <div class="titlepage"><h%1d class="title">
1351 <a name="%s"></a>%s</h%1d>
1352 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1354 for i in indexdivs:
1355 result.extend(convert_indexdiv(ctx, i))
1356 result.extend(generate_footer(ctx))
1357 result.append("""</div>
1358 </body>
1359 </html>""")
1360 return result
1363 def convert_part(ctx):
1364 return convert_chunk_with_toc(ctx, 'part', 'h1')
1367 def convert_preface(ctx):
1368 node = ctx['node']
1369 result = [
1370 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1371 generate_basic_nav(ctx),
1372 '<div class="preface">'
1374 if node.title:
1375 result.append("""
1376 <div class="titlepage">
1377 <h2 class="title"><a name="%s"></a>%s</h2>
1378 </div>""" % (get_id(node), node.title))
1379 convert_inner(ctx, node.xml, result)
1380 result.extend(generate_footer(ctx))
1381 result.append("""</div>
1382 </body>
1383 </html>""")
1384 return result
1387 def convert_reference(ctx):
1388 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1391 def convert_refentry(ctx):
1392 node = ctx['node']
1393 node_id = get_id(node)
1394 refsect1s = node.xml.findall('refsect1')
1396 gallery = ''
1397 refmeta = node.xml.find('refmeta')
1398 if refmeta is not None:
1399 refmiscinfo = refmeta.find('refmiscinfo')
1400 if refmiscinfo is not None:
1401 inlinegraphic = refmiscinfo.find('inlinegraphic')
1402 if inlinegraphic is not None:
1403 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1405 result = [
1406 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1408 generate_refentry_nav(ctx, refsect1s, result)
1409 result.append("""
1410 <div class="refentry">
1411 <a name="%s"></a>
1412 <div class="refnamediv">
1413 <table width="100%%"><tr>
1414 <td valign="top">
1415 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1416 <p>%s %s</p>
1417 </td>
1418 <td class="gallery_image" valign="top" align="right">%s</td>
1419 </tr></table>
1420 </div>
1421 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1423 for s in refsect1s:
1424 result.extend(convert_refsect1(ctx, s))
1425 result.extend(generate_footer(ctx))
1426 result.append("""</div>
1427 </body>
1428 </html>""")
1429 return result
1432 def convert_sect1(ctx):
1433 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1436 # TODO(ensonic): turn into class with converters as functions and ctx as self
1437 convert_chunks = {
1438 'book': convert_book,
1439 'chapter': convert_chapter,
1440 'glossary': convert_glossary,
1441 'index': convert_index,
1442 'part': convert_part,
1443 'preface': convert_preface,
1444 'reference': convert_reference,
1445 'refentry': convert_refentry,
1446 'sect1': convert_sect1,
1450 def generate_nav_nodes(files, node):
1451 nav = {
1452 'nav_home': node.root,
1454 # nav params: up, prev, next
1455 if node.parent:
1456 nav['nav_up'] = node.parent
1457 ix = files.index(node)
1458 if ix > 0:
1459 nav['nav_prev'] = files[ix - 1]
1460 if ix < len(files) - 1:
1461 nav['nav_next'] = files[ix + 1]
1462 return nav
1465 def convert(out_dir, module, files, node):
1466 """Convert the docbook chunks to a html file.
1468 Args:
1469 out_dir: already created output dir
1470 files: list of nodes in the tree in pre-order
1471 node: current tree node
1474 logging.info('Writing: %s', node.filename)
1475 with open(os.path.join(out_dir, node.filename), 'wt',
1476 newline='\n', encoding='utf-8') as html:
1477 ctx = {
1478 'module': module,
1479 'files': files,
1480 'node': node,
1482 ctx.update(generate_nav_nodes(files, node))
1484 if node.name in convert_chunks:
1485 for line in convert_chunks[node.name](ctx):
1486 html.write(line)
1487 else:
1488 logging.warning('Add converter/template for "%s"', node.name)
1491 def create_devhelp2_toc(node):
1492 result = []
1493 for c in node.children:
1494 if c.children:
1495 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1496 result.extend(create_devhelp2_toc(c))
1497 result.append('</sub>\n')
1498 else:
1499 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1500 return result
1503 def create_devhelp2_condition_attribs(node):
1504 if 'condition' in node.attrib:
1505 # condition -> since, deprecated, ... (separated with '|')
1506 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1507 keywords = []
1508 for c in cond:
1509 if ':' in c:
1510 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1511 else:
1512 # deprecated can have no description
1513 keywords.append('{}="{}"'.format(c, ''))
1514 return ' ' + ' '.join(keywords)
1515 else:
1516 return ''
1519 def create_devhelp2_refsect2_keyword(node, base_link):
1520 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1521 node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1522 create_devhelp2_condition_attribs(node))
1525 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1526 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1527 node.attrib['role'], title, base_link + name,
1528 create_devhelp2_condition_attribs(node))
1531 def create_devhelp2(out_dir, module, xml, files):
1532 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1533 newline='\n', encoding='utf-8') as idx:
1534 bookinfo_nodes = xml.xpath('/book/bookinfo')
1535 title = ''
1536 if bookinfo_nodes is not None:
1537 bookinfo = bookinfo_nodes[0]
1538 title = bookinfo.xpath('./title/text()')[0]
1539 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1540 # TODO: support author too (see devhelp2.xsl)
1541 # TODO: fixxref uses '--src-lang' to set the language
1542 result = [
1543 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1544 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1545 <chapters>
1546 """ % (title, module, online_url)
1548 # toc
1549 result.extend(create_devhelp2_toc(files[0].root))
1550 result.append(""" </chapters>
1551 <functions>
1552 """)
1553 # keywords from all refsect2 and refsect3
1554 refsect2 = etree.XPath('//refsect2[@role]')
1555 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1556 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1557 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1558 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1559 for node in files:
1560 base_link = node.filename + '#'
1561 refsect2_nodes = refsect2(node.xml)
1562 for refsect2_node in refsect2_nodes:
1563 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1564 refsect3_nodes = refsect3_enum(refsect2_node)
1565 for refsect3_node in refsect3_nodes:
1566 details_node = refsect3_enum_details(refsect3_node)[0]
1567 name = details_node.attrib['id']
1568 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1569 refsect3_nodes = refsect3_struct(refsect2_node)
1570 for refsect3_node in refsect3_nodes:
1571 details_node = refsect3_struct_details(refsect3_node)[0]
1572 name = details_node.attrib['id']
1573 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1575 result.append(""" </functions>
1576 </book>
1577 """)
1578 for line in result:
1579 idx.write(line)
1582 def get_dirs(uninstalled):
1583 if uninstalled:
1584 # this does not work from buiddir!=srcdir
1585 gtkdocdir = os.path.split(sys.argv[0])[0]
1586 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1587 # try 'srcdir' (set from makefiles) too
1588 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1589 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1590 styledir = gtkdocdir + '/style'
1591 else:
1592 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1593 styledir = gtkdocdir
1594 return (gtkdocdir, styledir)
1597 def main(module, index_file, out_dir, uninstalled):
1599 # == Loading phase ==
1600 # the next 3 steps could be done in paralel
1602 # 1) load the docuemnt
1603 _t = timer()
1604 # does not seem to be faster
1605 # parser = etree.XMLParser(collect_ids=False)
1606 # tree = etree.parse(index_file, parser)
1607 tree = etree.parse(index_file)
1608 tree.xinclude()
1609 logging.warning("1: %7.3lf: load doc", timer() - _t)
1611 # 2) copy datafiles
1612 _t = timer()
1613 # TODO: handle additional images
1614 (gtkdocdir, styledir) = get_dirs(uninstalled)
1615 # copy navigation images and stylesheets to html directory ...
1616 css_file = os.path.join(styledir, 'style.css')
1617 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1618 shutil.copy(f, out_dir)
1619 css_file = os.path.join(out_dir, 'style.css')
1620 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1621 css.write(HTML_FORMATTER.get_style_defs())
1622 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1624 # 3) load xref targets
1625 _t = timer()
1626 # TODO: migrate options from fixxref
1627 # TODO: ideally explicity specify the files we need, this will save us the
1628 # globbing and we'll load less files.
1629 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1630 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1632 # == Processing phase ==
1634 # 4) recursively walk the tree and chunk it into a python tree so that we
1635 # can generate navigation and link tags.
1636 _t = timer()
1637 files = chunk(tree.getroot(), module)
1638 files = [f for f in PreOrderIter(files) if f.anchor is None]
1639 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1641 # 5) extract tables:
1642 _t = timer()
1643 # TODO: can be done in parallel
1644 # - find all 'id' attribs and add them to the link map
1645 # - .. get their titles and store them into the titles map
1646 add_id_links_and_titles(files, fixxref.Links)
1647 # - build glossary dict
1648 build_glossary(files)
1649 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1651 # == Output phase ==
1652 # the next two step could be done in parllel
1654 # 6) create a xxx.devhelp2 file
1655 _t = timer()
1656 create_devhelp2(out_dir, module, tree.getroot(), files)
1657 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1659 # 7) iterate the tree and output files
1660 _t = timer()
1661 # TODO: can be done in parallel, figure out why this is not faster
1662 # from multiprocessing.pool import Pool
1663 # with Pool(4) as p:
1664 # p.apply_async(convert, args=(out_dir, module, files))
1665 # from multiprocessing.pool import ThreadPool
1666 # with ThreadPool(4) as p:
1667 # p.apply_async(convert, args=(out_dir, module, files))
1668 for node in files:
1669 convert(out_dir, module, files, node)
1670 logging.warning("7: %7.3lf: create html", timer() - _t)
1673 def run(options):
1674 logging.info('options: %s', str(options.__dict__))
1675 module = options.args[0]
1676 document = options.args[1]
1678 # TODO: rename to 'html' later on
1679 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1680 # outputs into the working directory
1681 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1682 try:
1683 os.mkdir(out_dir)
1684 except OSError as e:
1685 if e.errno != errno.EEXIST:
1686 raise
1688 sys.exit(main(module, document, out_dir, options.uninstalled))