Update French translation
[gtk-doc.git] / gtkdoc / mkhtml2.py
blobfef48768715d6a7703fff409c88804c7b9e241fa
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - 'section'/'simplesect' - the first we convert as a chunk, the nested ones we
40 need to convert as 'sect{2,3,4,...}, we can track depth in 'ctx'
41 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
42 - inside 'glossentry' we're only handling 'glossterm' and 'glossdef'
43 - convert_{figure,table} need counters.
44 - check each docbook tag if it can contain #PCDATA, if not don't check for
45 xml.text/xml.tail and add a comment (# no PCDATA allowed here)
46 - find a better way to print context for warnings
47 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
48 - consolidate title handling:
49 - always use the titles-dict
50 - convert_title(): uses titles.get(tid)['title']
51 - convert_xref(): uses titles[tid]['tag'], ['title'] and ['xml']
52 - create_devhelp2_refsect2_keyword(): uses titles[tid]['title']
53 - there only store what we have (xml, tag, ...)
54 - when chunking generate 'id's and add entries to titles-dict
55 - add accessors for title and raw_title that lazily get them
56 - see if any of the other ~10 places that call convert_title() could use this
57 cache
58 - performance
59 - consider some perf-warnings flag
60 - see 'No "id" attribute on'
61 - xinclude processing in libxml2 is slow
62 - if we disable it, we get '{http://www.w3.org/2003/XInclude}include' tags
63 and we could try handling them ourself, in some cases those are subtrees
64 that we extract for chunking anyway
66 DIFFERENCES:
67 - titles
68 - we add the chunk label to the title in toc, on the page and in nav tooltips
69 - docbook xsl only sometimes adds the label to the titles and when it does it
70 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
71 - navigation
72 - we always add an up-link except on the first page
73 - footer
74 - we're nov omitting the footer
75 - tocs
76 - we always add "Table of Contents' before a toc
77 - docbook does that for some pages, it is configurable
79 OPTIONAL:
80 - minify html: https://pypi.python.org/pypi/htmlmin/
82 Requirements:
83 sudo pip3 install anytree lxml pygments
85 Example invocation:
86 cd tests/bugs/docs/
87 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
88 xdg-open db2html/index.html
89 meld html db2html
91 Benchmarking:
92 cd tests/bugs/docs/;
93 rm html-build.stamp; time make html-build.stamp
94 """
96 import argparse
97 import errno
98 import logging
99 import os
100 import shutil
101 import sys
103 from anytree import Node, PreOrderIter
104 from copy import deepcopy
105 from glob import glob
106 from lxml import etree
107 from pygments import highlight
108 from pygments.lexers import CLexer
109 from pygments.formatters import HtmlFormatter
110 from timeit import default_timer as timer
112 from . import config, fixxref
114 # pygments setup
115 # lazily constructed lexer cache
116 LEXERS = {
117 'c': CLexer()
119 HTML_FORMATTER = HtmlFormatter(nowrap=True)
122 class ChunkParams(object):
123 def __init__(self, prefix, parent=None, min_idx=0):
124 self.prefix = prefix
125 self.parent = parent
126 self.min_idx = min_idx
127 self.idx = 1
130 DONT_CHUNK = float('inf')
131 # docbook-xsl defines the chunk tags here.
132 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
133 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
134 # If not defined, we can just create an example without an 'id' attr and see
135 # docbook xsl does.
137 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
138 # TODO: this list has also a flag that controls wheter we add the
139 # 'Table of Contents' heading in convert_chunk_with_toc()
140 CHUNK_PARAMS = {
141 'appendix': ChunkParams('app', 'book'),
142 'book': ChunkParams('bk'),
143 'chapter': ChunkParams('ch', 'book'),
144 'glossary': ChunkParams('go', 'book'),
145 'index': ChunkParams('ix', 'book'),
146 'part': ChunkParams('pt', 'book'),
147 'preface': ChunkParams('pr', 'book'),
148 'refentry': ChunkParams('re', 'book'),
149 'reference': ChunkParams('rn', 'book'),
150 'sect1': ChunkParams('s', 'chapter', 1),
151 'section': ChunkParams('s', 'chapter', 1),
152 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
153 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
154 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
155 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
157 # TAGS we don't support:
158 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
160 TITLE_XPATHS = {
161 '_': (etree.XPath('./title'), None),
162 'book': (etree.XPath('./bookinfo/title'), None),
163 'refentry': (
164 etree.XPath('./refmeta/refentrytitle'),
165 etree.XPath('./refnamediv/refpurpose')
169 ID_XPATH = etree.XPath('//*[@id]')
171 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
172 glossary = {}
174 footnote_idx = 1
176 # nested dict with subkeys:
177 # title: textual title
178 # tag: chunk tag
179 # xml: title xml node
180 titles = {}
182 # files to copy
183 assets = set()
186 def encode_entities(text):
187 return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
190 def raw_text(xml):
191 return etree.tostring(xml, method="text", encoding=str).strip()
194 def gen_chunk_name(node, chunk_params):
195 """Generate a chunk file name
197 This is either based on the id or on the position in the doc. In the latter
198 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
199 type.
201 idval = node.attrib.get('id')
202 if idval is not None:
203 return idval
205 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
206 chunk_params.idx += 1
208 # handle parents to make names of nested tags like in docbook
209 # - we only need to prepend the parent if there are > 1 of them in the
210 # xml. None, the parents we have are not sufficient, e.g. 'index' can
211 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
212 # when we chunk explicitly and on each level maintain the 'idx'
213 # while chunk_params.parent:
214 # parent = chunk_params.parent
215 # if parent not in CHUNK_PARAMS:
216 # break;
217 # chunk_params = CHUNK_PARAMS[parent]
218 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
220 logging.info('Gen chunk name: "%s"', name)
221 return name
224 def get_chunk_titles(module, node):
225 tag = node.tag
226 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
228 ctx = {
229 'module': module,
230 'files': [],
232 result = {
233 'title': None,
234 'title_tag': None,
235 'subtitle': None,
236 'subtitle_tag': None
238 res = title(node)
239 if res:
240 # handle chunk label for tocs
241 label = node.attrib.get('label')
242 if label:
243 label += '. '
244 else:
245 label = ''
247 xml = res[0]
248 # TODO: consider to eval 'title'/'raw_title' lazily
249 result['title'] = label + ''.join(convert_title(ctx, xml))
250 result['raw_title'] = encode_entities(raw_text(xml))
251 if xml.tag != 'title':
252 result['title_tag'] = xml.tag
253 else:
254 result['title_tag'] = tag
256 if subtitle:
257 res = subtitle(node)
258 if res:
259 xml = res[0]
260 result['subtitle'] = ''.join(convert_title(ctx, xml))
261 result['subtitle_tag'] = xml.tag
262 return result
265 def chunk(xml_node, module, depth=0, idx=0, parent=None):
266 """Chunk the tree.
268 The first time, we're called with parent=None and in that case we return
269 the new_node as the root of the tree. For each tree-node we generate a
270 filename and process the children.
272 tag = xml_node.tag
273 chunk_params = CHUNK_PARAMS.get(tag)
274 if chunk_params:
275 title_args = get_chunk_titles(module, xml_node)
276 chunk_name = gen_chunk_name(xml_node, chunk_params)
278 # check idx to handle 'sect1'/'section' special casing and title-only
279 # segments
280 if idx >= chunk_params.min_idx:
281 logging.info('chunk tag: "%s"[%d]', tag, idx)
282 if parent:
283 # remove the xml-node from the parent
284 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
285 xml_node.getparent().remove(xml_node)
286 xml_node = sub_tree
288 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
289 idx=idx,
290 filename=chunk_name + '.html', anchor=None,
291 **title_args)
292 else:
293 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
294 idx=idx,
295 filename=parent.filename, anchor='#' + chunk_name,
296 **title_args)
298 depth += 1
299 idx = 0
300 for child in xml_node:
301 chunk(child, module, depth, idx, parent)
302 if child.tag in CHUNK_PARAMS:
303 idx += 1
305 return parent
308 def add_id_links_and_titles(files, links):
309 for node in files:
310 chunk_name = node.filename[:-5]
311 chunk_base = node.filename + '#'
312 for elem in ID_XPATH(node.xml):
313 attr = elem.attrib['id']
314 if attr == chunk_name:
315 links[attr] = node.filename
316 else:
317 links[attr] = chunk_base + attr
319 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
320 res = title(elem)
321 if res:
322 xml = res[0]
323 # TODO: consider to eval 'title' lazily
324 titles[attr] = {
325 'title': encode_entities(raw_text(xml)),
326 'xml': xml,
327 'tag': elem.tag,
331 def build_glossary(files):
332 for node in files:
333 if node.xml.tag != 'glossary':
334 continue
335 for term in GLOSSENTRY_XPATH(node.xml):
336 # TODO: there can be all kind of things in a glossary. This only supports
337 # what we commonly use, glossterm is mandatory
338 key_node = term.find('glossterm')
339 val_node = term.find('glossdef')
340 if key_node is not None and val_node is not None:
341 glossary[raw_text(key_node)] = raw_text(val_node)
342 else:
343 debug = []
344 if key_node is None:
345 debug.append('missing key')
346 if val_node is None:
347 debug.append('missing val')
348 logging.warning('Broken glossentry "%s": %s',
349 term.attrib['id'], ','.join(debug))
352 # conversion helpers
355 def convert_inner(ctx, xml, result):
356 for child in xml:
357 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
360 def convert_ignore(ctx, xml):
361 result = []
362 convert_inner(ctx, xml, result)
363 return result
366 def convert_skip(ctx, xml):
367 return []
370 def append_idref(attrib, result):
371 idval = attrib.get('id')
372 if idval is not None:
373 result.append('<a name="%s"></a>' % idval)
376 def append_text(ctx, text, result):
377 if text and ('no-strip' in ctx or text.strip()):
378 result.append(encode_entities(text))
381 missing_tags = {}
384 def convert__unknown(ctx, xml):
385 # don't recurse on subchunks
386 if xml.tag in CHUNK_PARAMS:
387 return []
388 if isinstance(xml, etree._Comment):
389 return ['<!-- ' + xml.text + '-->\n']
390 else:
391 # warn only once
392 if xml.tag not in missing_tags:
393 logging.warning('Add tag converter for "%s"', xml.tag)
394 missing_tags[xml.tag] = True
395 result = ['<!-- ' + xml.tag + '-->\n']
396 convert_inner(ctx, xml, result)
397 result.append('<!-- /' + xml.tag + '-->\n')
398 return result
401 def convert_mediaobject_children(ctx, xml, result):
402 # look for textobject/phrase
403 alt_text = ''
404 textobject = xml.find('textobject')
405 if textobject is not None:
406 phrase = textobject.findtext('phrase')
407 if phrase:
408 alt_text = ' alt="%s"' % phrase
410 # look for imageobject/imagedata
411 imageobject = xml.find('imageobject')
412 if imageobject is not None:
413 imagedata = imageobject.find('imagedata')
414 if imagedata is not None:
415 # TODO(ensonic): warn on missing fileref attr?
416 fileref = imagedata.attrib.get('fileref', '')
417 if fileref:
418 assets.add(fileref)
419 result.append('<img src="%s"%s>' % (fileref, alt_text))
422 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
423 result = ['<div class="%s">\n' % xml.tag]
424 title_tag = xml.find('title')
425 if title_tag is not None:
426 append_idref(xml.attrib, result)
427 result.append('<%s>%s</%s>' % (
428 h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
429 append_text(ctx, xml.text, result)
430 inner_func(ctx, xml, result)
431 result.append('</div>')
432 append_text(ctx, xml.tail, result)
433 return result
436 def xml_get_title(ctx, xml):
437 title_tag = xml.find('title')
438 if title_tag is not None:
439 return ''.join(convert_title(ctx, title_tag))
440 else:
441 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
442 return ''
445 # docbook tags
448 def convert_abstract(ctx, xml):
449 result = ["""<div class="abstract">
450 <p class="title"><b>Abstract</b></p>"""]
451 append_text(ctx, xml.text, result)
452 convert_inner(ctx, xml, result)
453 result.append('</div>')
454 append_text(ctx, xml.tail, result)
455 return result
458 def convert_acronym(ctx, xml):
459 key = xml.text
460 title = glossary.get(key, '')
461 # TODO: print a sensible warning if missing
462 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
463 if xml.tail:
464 result.append(xml.tail)
465 return result
468 def convert_anchor(ctx, xml):
469 return ['<a name="%s"></a>' % xml.attrib['id']]
472 def convert_bookinfo(ctx, xml):
473 result = ['<div class="titlepage">']
474 convert_inner(ctx, xml, result)
475 result.append("""<hr>
476 </div>""")
477 if xml.tail:
478 result.append(xml.tail)
479 return result
482 def convert_blockquote(ctx, xml):
483 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
484 append_text(ctx, xml.text, result)
485 convert_inner(ctx, xml, result)
486 result.append('</blockquote>\n</div>')
487 append_text(ctx, xml.tail, result)
488 return result
491 def convert_code(ctx, xml):
492 result = ['<code class="%s">' % xml.tag]
493 append_text(ctx, xml.text, result)
494 convert_inner(ctx, xml, result)
495 result.append('</code>')
496 append_text(ctx, xml.tail, result)
497 return result
500 def convert_colspec(ctx, xml):
501 result = ['<col']
502 colname = xml.attrib.get('colname')
503 if colname is not None:
504 result.append(' class="%s"' % colname)
505 colwidth = xml.attrib.get('colwidth')
506 if colwidth is not None:
507 result.append(' width="%s"' % colwidth)
508 result.append('>\n')
509 # is in tgroup and there can be no 'text'
510 return result
513 def convert_command(ctx, xml):
514 result = ['<strong class="userinput"><code>']
515 append_text(ctx, xml.text, result)
516 convert_inner(ctx, xml, result)
517 result.append('</code></strong>')
518 append_text(ctx, xml.tail, result)
519 return result
522 def convert_corpauthor(ctx, xml):
523 result = ['<div><h3 class="corpauthor">\n']
524 append_text(ctx, xml.text, result)
525 convert_inner(ctx, xml, result)
526 result.append('</h3></div>\n')
527 append_text(ctx, xml.tail, result)
528 return result
531 def convert_div(ctx, xml):
532 result = ['<div class="%s">\n' % xml.tag]
533 append_text(ctx, xml.text, result)
534 convert_inner(ctx, xml, result)
535 result.append('</div>')
536 append_text(ctx, xml.tail, result)
537 return result
540 def convert_emphasis(ctx, xml):
541 role = xml.attrib.get('role')
542 if role is not None:
543 result = ['<span class="%s">' % role]
544 end = '</span>'
545 else:
546 result = ['<span class="emphasis"><em>']
547 end = '</em></span>'
548 append_text(ctx, xml.text, result)
549 convert_inner(ctx, xml, result)
550 result.append(end)
551 append_text(ctx, xml.tail, result)
552 return result
555 def convert_em(ctx, xml):
556 result = ['<em class="%s">' % xml.tag]
557 append_text(ctx, xml.text, result)
558 convert_inner(ctx, xml, result)
559 result.append('</em>')
560 append_text(ctx, xml.tail, result)
561 return result
564 def convert_em_code(ctx, xml):
565 result = ['<em class="%s"><code>' % xml.tag]
566 append_idref(xml.attrib, result)
567 append_text(ctx, xml.text, result)
568 convert_inner(ctx, xml, result)
569 result.append('</code></em>')
570 append_text(ctx, xml.tail, result)
571 return result
574 def convert_entry(ctx, xml):
575 entry_type = ctx['table.entry']
576 result = ['<' + entry_type]
577 role = xml.attrib.get('role')
578 if role is not None:
579 result.append(' class="%s"' % role)
580 morerows = xml.attrib.get('morerows')
581 if morerows is not None:
582 result.append(' rowspan="%s"' % (1 + int(morerows)))
583 result.append('>')
584 append_text(ctx, xml.text, result)
585 convert_inner(ctx, xml, result)
586 result.append('</' + entry_type + '>')
587 append_text(ctx, xml.tail, result)
588 return result
591 def convert_figure(ctx, xml):
592 result = ['<div class="figure">\n']
593 append_idref(xml.attrib, result)
594 title_tag = xml.find('title')
595 if title_tag is not None:
596 # TODO(ensonic): Add a 'Figure X. ' prefix, needs a figure counter
597 result.append('<p><b>%s</b></p>' % ''.join(convert_title(ctx, title_tag)))
598 result.append('<div class="figure-contents">')
599 # TODO(ensonic): title can become alt on inner 'graphic' element
600 convert_inner(ctx, xml, result)
601 result.append('</div></div><br class="figure-break"/>')
602 append_text(ctx, xml.tail, result)
603 return result
606 def convert_footnote(ctx, xml):
607 footnotes = ctx.get('footnotes', [])
608 # footnotes idx is not per page, but per doc
609 global footnote_idx
610 idx = footnote_idx
611 footnote_idx += 1
613 # need a pair of ids for each footnote (docbook generates different ids)
614 this_id = 'footnote-%d' % idx
615 that_id = 'ftn.' + this_id
617 inner = ['<div id="%s" class="footnote">' % that_id]
618 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
619 this_id, idx))
620 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
621 # get double nested paras :/.
622 # convert_inner(ctx, xml, inner)
623 para = xml.find('para')
624 if para is None:
625 para = xml.find('simpara')
626 if para is not None:
627 inner.append(para.text)
628 else:
629 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline, raw_text(xml))
630 inner.append('</p></div>')
631 footnotes.append(inner)
632 ctx['footnotes'] = footnotes
633 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
634 that_id, this_id, idx)]
637 def convert_formalpara(ctx, xml):
638 result = None
639 title_tag = xml.find('title')
640 result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
641 para_tag = xml.find('para')
642 append_text(ctx, para_tag.text, result)
643 convert_inner(ctx, para_tag, result)
644 append_text(ctx, para_tag.tail, result)
645 result.append('</p>')
646 append_text(ctx, xml.tail, result)
647 return result
650 def convert_glossdef(ctx, xml):
651 result = ['<dd class="glossdef">']
652 convert_inner(ctx, xml, result)
653 result.append('</dd>\n')
654 return result
657 def convert_glossdiv(ctx, xml):
658 title_tag = xml.find('title')
659 title = title_tag.text
660 xml.remove(title_tag)
661 result = [
662 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
664 convert_inner(ctx, xml, result)
665 return result
668 def convert_glossentry(ctx, xml):
669 result = []
670 convert_inner(ctx, xml, result)
671 return result
674 def convert_glossterm(ctx, xml):
675 glossid = ''
676 text = ''
677 anchor = xml.find('anchor')
678 if anchor is not None:
679 glossid = anchor.attrib.get('id', '')
680 text += anchor.tail or ''
681 text += xml.text or ''
682 if glossid == '':
683 glossid = 'glossterm-' + text
684 return [
685 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
686 glossid, text)
690 def convert_graphic(ctx, xml):
691 # TODO(ensonic): warn on missing fileref attr?
692 fileref = xml.attrib.get('fileref', '')
693 if fileref:
694 assets.add(fileref)
695 return ['<div><img src="%s"></div>' % fileref]
698 def convert_indexdiv(ctx, xml):
699 title_tag = xml.find('title')
700 title = title_tag.text
701 xml.remove(title_tag)
702 result = [
703 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
705 convert_inner(ctx, xml, result)
706 return result
709 def convert_informaltable(ctx, xml):
710 result = ['<div class="informaltable"><table class="informaltable"']
711 if xml.attrib.get('pgwide') == '1':
712 result.append(' width="100%"')
713 if xml.attrib.get('frame') == 'none':
714 result.append(' border="0"')
715 result.append('>\n')
716 convert_inner(ctx, xml, result)
717 result.append('</table></div>')
718 if xml.tail:
719 result.append(xml.tail)
720 return result
723 def convert_inlinegraphic(ctx, xml):
724 # TODO(ensonic): warn on missing fileref attr?
725 fileref = xml.attrib.get('fileref', '')
726 if fileref:
727 assets.add(fileref)
728 return ['<img src="%s">' % fileref]
731 def convert_inlinemediaobject(ctx, xml):
732 result = ['<span class="inlinemediaobject">']
733 # no PCDATA allowed here
734 convert_mediaobject_children(ctx, xml, result)
735 result.append('</span>')
736 append_text(ctx, xml.tail, result)
737 return result
740 def convert_itemizedlist(ctx, xml):
741 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
742 convert_inner(ctx, xml, result)
743 result.append('</ul></div>')
744 if xml.tail:
745 result.append(xml.tail)
746 return result
749 def convert_link(ctx, xml):
750 linkend = xml.attrib['linkend']
751 result = []
752 if linkend:
753 link_text = []
754 append_text(ctx, xml.text, link_text)
755 convert_inner(ctx, xml, link_text)
756 text = ''.join(link_text)
758 (tid, href) = fixxref.GetXRef(linkend)
759 if href:
760 title_attr = ''
761 title = titles.get(tid)
762 if title:
763 title_attr = ' title="%s"' % title['title']
765 href = fixxref.MakeRelativeXRef(ctx['module'], href)
766 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
767 else:
768 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
769 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
770 result = [text]
771 else:
772 append_text(ctx, xml.text, result)
773 convert_inner(ctx, xml, result)
774 append_text(ctx, xml.tail, result)
775 return result
778 def convert_listitem(ctx, xml):
779 result = ['<li class="listitem">']
780 convert_inner(ctx, xml, result)
781 result.append('</li>')
782 # no PCDATA allowed here, is in itemizedlist
783 return result
786 def convert_literallayout(ctx, xml):
787 result = ['<div class="literallayout"><p><br>\n']
788 append_text(ctx, xml.text, result)
789 convert_inner(ctx, xml, result)
790 result.append('</p></div>')
791 append_text(ctx, xml.tail, result)
792 return result
795 def convert_mediaobject(ctx, xml):
796 result = ['<div class="mediaobject">\n']
797 # no PCDATA allowed here
798 convert_mediaobject_children(ctx, xml, result)
799 result.append('</div>')
800 append_text(ctx, xml.tail, result)
801 return result
804 def convert_orderedlist(ctx, xml):
805 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
806 convert_inner(ctx, xml, result)
807 result.append('</ol></div>')
808 append_text(ctx, xml.tail, result)
809 return result
812 def convert_para(ctx, xml):
813 result = []
814 role = xml.attrib.get('role')
815 if role is not None:
816 result.append('<p class="%s">' % role)
817 else:
818 result.append('<p>')
819 append_idref(xml.attrib, result)
820 append_text(ctx, xml.text, result)
821 convert_inner(ctx, xml, result)
822 result.append('</p>')
823 append_text(ctx, xml.tail, result)
824 return result
827 def convert_para_like(ctx, xml):
828 result = []
829 append_idref(xml.attrib, result)
830 result.append('<p class="%s">' % xml.tag)
831 append_text(ctx, xml.text, result)
832 convert_inner(ctx, xml, result)
833 result.append('</p>')
834 append_text(ctx, xml.tail, result)
835 return result
838 def convert_phrase(ctx, xml):
839 result = ['<span']
840 role = xml.attrib.get('role')
841 if role is not None:
842 result.append(' class="%s">' % role)
843 else:
844 result.append('>')
845 append_text(ctx, xml.text, result)
846 convert_inner(ctx, xml, result)
847 result.append('</span>')
848 append_text(ctx, xml.tail, result)
849 return result
852 def convert_primaryie(ctx, xml):
853 result = ['<dt>\n']
854 convert_inner(ctx, xml, result)
855 result.append('\n</dt>\n<dd></dd>\n')
856 return result
859 def convert_pre(ctx, xml):
860 # Since we're inside <pre> don't skip newlines
861 ctx['no-strip'] = True
862 result = ['<pre class="%s">' % xml.tag]
863 append_text(ctx, xml.text, result)
864 convert_inner(ctx, xml, result)
865 result.append('</pre>')
866 del ctx['no-strip']
867 append_text(ctx, xml.tail, result)
868 return result
871 def convert_programlisting(ctx, xml):
872 result = []
873 if xml.attrib.get('role', '') == 'example':
874 if xml.text:
875 lang = xml.attrib.get('language', ctx['src-lang']).lower()
876 if lang not in LEXERS:
877 LEXERS[lang] = get_lexer_by_name(lang)
878 lexer = LEXERS.get(lang, None)
879 if lexer:
880 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
882 # we do own line-numbering
883 line_count = highlighted.count('\n')
884 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
885 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
886 <tbody>
887 <tr>
888 <td class="listing_lines" align="right"><pre>%s</pre></td>
889 <td class="listing_code"><pre class="programlisting">%s</pre></td>
890 </tr>
891 </tbody>
892 </table>
893 """ % (source_lines, highlighted))
894 else:
895 logging.warn('No pygments lexer for language="%s"', lang)
896 result.append('<pre class="programlisting">')
897 result.append(xml.text)
898 result.append('</pre>')
899 else:
900 result.append('<pre class="programlisting">')
901 append_text(ctx, xml.text, result)
902 convert_inner(ctx, xml, result)
903 result.append('</pre>')
904 append_text(ctx, xml.tail, result)
905 return result
908 def convert_quote(ctx, xml):
909 result = ['<span class="quote">"<span class="quote">']
910 append_text(ctx, xml.text, result)
911 convert_inner(ctx, xml, result)
912 result.append('</span>"</span>')
913 append_text(ctx, xml.tail, result)
914 return result
917 def convert_refsect1(ctx, xml):
918 # Add a divider between two consequitive refsect2
919 def convert_inner(ctx, xml, result):
920 prev = None
921 for child in xml:
922 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
923 result.append('<hr>\n')
924 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
925 prev = child
926 return convert_sect(ctx, xml, 'h2', convert_inner)
929 def convert_refsect2(ctx, xml):
930 return convert_sect(ctx, xml, 'h3')
933 def convert_refsect3(ctx, xml):
934 return convert_sect(ctx, xml, 'h4')
937 def convert_row(ctx, xml):
938 result = ['<tr>\n']
939 convert_inner(ctx, xml, result)
940 result.append('</tr>\n')
941 return result
944 def convert_sbr(ctx, xml):
945 return ['<br>']
948 def convert_sect1_tag(ctx, xml):
949 return convert_sect(ctx, xml, 'h2')
952 def convert_sect2(ctx, xml):
953 return convert_sect(ctx, xml, 'h3')
956 def convert_sect3(ctx, xml):
957 return convert_sect(ctx, xml, 'h4')
960 def convert_simpara(ctx, xml):
961 result = ['<p>']
962 append_text(ctx, xml.text, result)
963 convert_inner(ctx, xml, result)
964 result.append('</p>')
965 append_text(ctx, xml.tail, result)
966 return result
969 def convert_span(ctx, xml):
970 result = ['<span class="%s">' % xml.tag]
971 append_text(ctx, xml.text, result)
972 convert_inner(ctx, xml, result)
973 result.append('</span>')
974 append_text(ctx, xml.tail, result)
975 return result
978 def convert_table(ctx, xml):
979 result = ['<div class="table">']
980 append_idref(xml.attrib, result)
981 title_tag = xml.find('title')
982 if title_tag is not None:
983 result.append('<p class="title"><b>')
984 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
985 result.extend(convert_title(ctx, title_tag))
986 result.append('</b></p>')
987 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
989 convert_inner(ctx, xml, result)
991 result.append('</table></div></div>')
992 append_text(ctx, xml.tail, result)
993 return result
996 def convert_tag(ctx, xml):
997 classval = xml.attrib.get('class')
998 if classval is not None:
999 result = ['<code class="sgmltag-%s">' % classval]
1000 else:
1001 result = ['<code>']
1002 append_text(ctx, xml.text, result)
1003 result.append('</code>')
1004 append_text(ctx, xml.tail, result)
1005 return result
1008 def convert_tbody(ctx, xml):
1009 result = ['<tbody>']
1010 ctx['table.entry'] = 'td'
1011 convert_inner(ctx, xml, result)
1012 result.append('</tbody>')
1013 # is in tgroup and there can be no 'text'
1014 return result
1017 def convert_tgroup(ctx, xml):
1018 # tgroup does not expand to anything, but the nested colspecs need to
1019 # be put into a colgroup
1020 cols = xml.findall('colspec')
1021 result = []
1022 if cols:
1023 result.append('<colgroup>\n')
1024 for col in cols:
1025 result.extend(convert_colspec(ctx, col))
1026 xml.remove(col)
1027 result.append('</colgroup>\n')
1028 convert_inner(ctx, xml, result)
1029 # is in informaltable and there can be no 'text'
1030 return result
1033 def convert_thead(ctx, xml):
1034 result = ['<thead>']
1035 ctx['table.entry'] = 'th'
1036 convert_inner(ctx, xml, result)
1037 result.append('</thead>')
1038 # is in tgroup and there can be no 'text'
1039 return result
1042 def convert_title(ctx, xml):
1043 # This is always explicitly called from some context
1044 result = []
1045 append_text(ctx, xml.text, result)
1046 convert_inner(ctx, xml, result)
1047 append_text(ctx, xml.tail, result)
1048 return result
1051 def convert_ulink(ctx, xml):
1052 if xml.text:
1053 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
1054 else:
1055 url = xml.attrib['url']
1056 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
1057 append_text(ctx, xml.tail, result)
1058 return result
1061 def convert_userinput(ctx, xml):
1062 result = ['<span class="command"><strong>']
1063 append_text(ctx, xml.text, result)
1064 convert_inner(ctx, xml, result)
1065 result.append('</strong></span>')
1066 append_text(ctx, xml.tail, result)
1067 return result
1070 def convert_variablelist(ctx, xml):
1071 result = ["""<div class="variablelist"><table border="0" class="variablelist">
1072 <colgroup>
1073 <col align="left" valign="top">
1074 <col>
1075 </colgroup>
1076 <tbody>"""]
1077 convert_inner(ctx, xml, result)
1078 result.append("""</tbody>
1079 </table></div>""")
1080 return result
1083 def convert_varlistentry(ctx, xml):
1084 result = ['<tr>']
1086 result.append('<td><p>')
1087 term = xml.find('term')
1088 result.extend(convert_span(ctx, term))
1089 result.append('</p></td>')
1091 result.append('<td>')
1092 listitem = xml.find('listitem')
1093 convert_inner(ctx, listitem, result)
1094 result.append('</td>')
1096 result.append('<tr>')
1097 return result
1100 def convert_xref(ctx, xml):
1101 result = []
1102 linkend = xml.attrib['linkend']
1103 (tid, href) = fixxref.GetXRef(linkend)
1104 try:
1105 title = titles[tid]
1106 # all sectN need to become 'section
1107 tag = title['tag']
1108 tag = {
1109 'sect1': 'section',
1110 'sect2': 'section',
1111 'sect3': 'section',
1112 'sect4': 'section',
1113 'sect5': 'section',
1114 }.get(tag, tag)
1115 result = [
1116 '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1117 (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1119 except KeyError:
1120 logging.warning('invalid linkend "%s"', tid)
1122 append_text(ctx, xml.tail, result)
1123 return result
1126 # TODO(ensonic): turn into class with converters as functions and ctx as self
1127 convert_tags = {
1128 'abstract': convert_abstract,
1129 'acronym': convert_acronym,
1130 'anchor': convert_anchor,
1131 'application': convert_span,
1132 'bookinfo': convert_bookinfo,
1133 'blockquote': convert_blockquote,
1134 'classname': convert_code,
1135 'caption': convert_div,
1136 'code': convert_code,
1137 'colspec': convert_colspec,
1138 'constant': convert_code,
1139 'command': convert_command,
1140 'corpauthor': convert_corpauthor,
1141 'emphasis': convert_emphasis,
1142 'entry': convert_entry,
1143 'envar': convert_code,
1144 'footnote': convert_footnote,
1145 'figure': convert_figure,
1146 'filename': convert_code,
1147 'firstterm': convert_em,
1148 'formalpara': convert_formalpara,
1149 'function': convert_code,
1150 'glossdef': convert_glossdef,
1151 'glossdiv': convert_glossdiv,
1152 'glossentry': convert_glossentry,
1153 'glossterm': convert_glossterm,
1154 'graphic': convert_graphic,
1155 'indexdiv': convert_indexdiv,
1156 'indexentry': convert_ignore,
1157 'indexterm': convert_skip,
1158 'informalexample': convert_div,
1159 'informaltable': convert_informaltable,
1160 'inlinegraphic': convert_inlinegraphic,
1161 'inlinemediaobject': convert_inlinemediaobject,
1162 'interfacename': convert_code,
1163 'itemizedlist': convert_itemizedlist,
1164 'legalnotice': convert_div,
1165 'link': convert_link,
1166 'listitem': convert_listitem,
1167 'literal': convert_code,
1168 'literallayout': convert_literallayout,
1169 'mediaobject': convert_mediaobject,
1170 'note': convert_div,
1171 'option': convert_code,
1172 'orderedlist': convert_orderedlist,
1173 'para': convert_para,
1174 'partintro': convert_div,
1175 'parameter': convert_em_code,
1176 'phrase': convert_phrase,
1177 'primaryie': convert_primaryie,
1178 'programlisting': convert_programlisting,
1179 'quote': convert_quote,
1180 'releaseinfo': convert_para_like,
1181 'refsect1': convert_refsect1,
1182 'refsect2': convert_refsect2,
1183 'refsect3': convert_refsect3,
1184 'replaceable': convert_em_code,
1185 'returnvalue': convert_span,
1186 'row': convert_row,
1187 'sbr': convert_sbr,
1188 'screen': convert_pre,
1189 'section': convert_sect2, # FIXME: need tracking of nesting
1190 'sect1': convert_sect1_tag,
1191 'sect2': convert_sect2,
1192 'sect3': convert_sect3,
1193 'simpara': convert_simpara,
1194 'simplesect': convert_sect2, # FIXME: need tracking of nesting
1195 'structfield': convert_em_code,
1196 'structname': convert_span,
1197 'synopsis': convert_pre,
1198 'symbol': convert_span,
1199 'table': convert_table,
1200 'tag': convert_tag,
1201 'tbody': convert_tbody,
1202 'term': convert_span,
1203 'tgroup': convert_tgroup,
1204 'thead': convert_thead,
1205 'title': convert_skip,
1206 'type': convert_span,
1207 'ulink': convert_ulink,
1208 'userinput': convert_userinput,
1209 'varname': convert_code,
1210 'variablelist': convert_variablelist,
1211 'varlistentry': convert_varlistentry,
1212 'warning': convert_div,
1213 'xref': convert_xref,
1216 # conversion helpers
1218 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1219 <html>
1220 <head>
1221 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1222 <title>%s</title>
1223 %s<link rel="stylesheet" href="style.css" type="text/css">
1224 </head>
1225 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1229 def generate_head_links(ctx):
1230 n = ctx['nav_home']
1231 result = [
1232 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.raw_title)
1235 n = ctx.get('nav_up')
1236 if n is not None:
1237 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1239 n = ctx.get('nav_prev')
1240 if n is not None:
1241 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1243 n = ctx.get('nav_next')
1244 if n is not None:
1245 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1247 return ''.join(result)
1250 def generate_nav_links(ctx):
1251 n = ctx['nav_home']
1252 result = [
1253 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1256 n = ctx.get('nav_up')
1257 if n is not None:
1258 result.append(
1259 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1260 else:
1261 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1263 n = ctx.get('nav_prev')
1264 if n is not None:
1265 result.append(
1266 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1267 else:
1268 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1270 n = ctx.get('nav_next')
1271 if n is not None:
1272 result.append(
1273 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1274 else:
1275 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1277 return ''.join(result)
1280 def generate_toc(ctx, node):
1281 result = []
1282 for c in node.children:
1283 # TODO: urlencode the filename: urllib.parse.quote_plus()
1284 link = c.filename
1285 if c.anchor:
1286 link += c.anchor
1287 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1288 c.title_tag, link, c.title))
1289 if c.subtitle:
1290 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1291 result.append('</dt>\n')
1292 if c.children:
1293 result.append('<dd><dl>')
1294 result.extend(generate_toc(ctx, c))
1295 result.append('</dl></dd>')
1296 return result
1299 def generate_basic_nav(ctx):
1300 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1301 <tr valign="middle">
1302 <td width="100%%" align="left" class="shortcuts"></td>
1304 </tr>
1305 </table>
1306 """ % generate_nav_links(ctx)
1309 def generate_alpha_nav(ctx, divs, prefix, span_id):
1310 ix_nav = []
1311 for s in divs:
1312 title = xml_get_title(ctx, s)
1313 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1315 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1316 <tr valign="middle">
1317 <td width="100%%" align="left" class="shortcuts">
1318 <span id="nav_%s">
1320 </span>
1321 </td>
1323 </tr>
1324 </table>
1325 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1328 def generate_refentry_nav(ctx, refsect1s, result):
1329 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1330 <tr valign="middle">
1331 <td width="100%" align="left" class="shortcuts">
1332 <a href="#" class="shortcut">Top</a>""")
1334 for s in refsect1s:
1335 # don't list TOC sections (role="xxx_proto")
1336 if s.attrib.get('role', '').endswith("_proto"):
1337 continue
1338 # skip section without 'id' attrs
1339 ref_id = s.attrib.get('id')
1340 if ref_id is None:
1341 continue
1343 # skip foreign sections
1344 if '.' not in ref_id:
1345 continue
1347 title = xml_get_title(ctx, s)
1348 span_id = ref_id.split('.')[1].replace('-', '_')
1350 result.append("""
1351 <span id="nav_%s">
1352    <span class="dim">|</span> 
1353 <a href="#%s" class="shortcut">%s</a>
1354 </span>
1355 """ % (span_id, ref_id, title))
1356 result.append("""
1357 </td>
1359 </tr>
1360 </table>
1361 """ % generate_nav_links(ctx))
1364 def generate_footer(ctx):
1365 footnotes = ctx.get('footnotes')
1366 if footnotes is None:
1367 return []
1369 result = ["""<div class="footnotes">\n
1370 <br><hr style="width:100; text-align:left;margin-left: 0">
1371 """]
1372 for f in footnotes:
1373 result.extend(f)
1374 result.append('</div>\n')
1375 return result
1378 def get_id_path(node):
1379 """ Generate the 'id'.
1380 We need to walk up the xml-tree and check the positions for each sibling.
1381 When reaching the top of the tree we collect remaining index entries from
1382 the chunked-tree.
1384 ix = []
1385 xml = node.xml
1386 parent = xml.getparent()
1387 while parent is not None:
1388 children = parent.getchildren()
1389 ix.insert(0, str(children.index(xml) + 1))
1390 xml = parent
1391 parent = xml.getparent()
1392 while node is not None:
1393 ix.insert(0, str(node.idx + 1))
1394 node = node.parent
1396 return ix
1399 def get_id(node):
1400 xml = node.xml
1401 node_id = xml.attrib.get('id', None)
1402 if node_id:
1403 return node_id
1405 # TODO: this is moot if nothing links to it, we could also consider to omit
1406 # the <a name="$id"></a> tag.
1407 logging.info('%d: No "id" attribute on "%s", generating one',
1408 xml.sourceline, xml.tag)
1409 ix = get_id_path(node)
1410 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1411 return 'id-' + '.'.join(ix)
1414 def convert_chunk_with_toc(ctx, div_class, title_tag):
1415 node = ctx['node']
1416 result = [
1417 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1418 generate_basic_nav(ctx),
1419 '<div class="%s">' % div_class,
1421 if node.title:
1422 result.append("""
1423 <div class="titlepage">
1424 <%s class="title"><a name="%s"></a>%s</%s>
1425 </div>""" % (
1426 title_tag, get_id(node), node.title, title_tag))
1428 toc = generate_toc(ctx, node)
1429 if toc:
1430 # TODO: not all docbook page types use this extra heading
1431 result.append("""<p><b>Table of Contents</b></p>
1432 <div class="toc">
1433 <dl class="toc">
1434 """)
1435 result.extend(toc)
1436 result.append("""</dl>
1437 </div>
1438 """)
1439 convert_inner(ctx, node.xml, result)
1440 result.extend(generate_footer(ctx))
1441 result.append("""</div>
1442 </body>
1443 </html>""")
1444 return result
1447 # docbook chunks
1450 def convert_book(ctx):
1451 node = ctx['node']
1452 result = [
1453 HTML_HEADER % (node.title, generate_head_links(ctx)),
1454 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1455 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1456 </table>
1457 <div class="book">
1458 """ % node.title
1460 bookinfo = node.xml.findall('bookinfo')[0]
1461 result.extend(convert_bookinfo(ctx, bookinfo))
1462 result.append("""<div class="toc">
1463 <dl class="toc">
1464 """)
1465 result.extend(generate_toc(ctx, node.root))
1466 result.append("""</dl>
1467 </div>
1468 """)
1469 result.extend(generate_footer(ctx))
1470 result.append("""</div>
1471 </body>
1472 </html>""")
1473 return result
1476 def convert_chapter(ctx):
1477 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1480 def convert_glossary(ctx):
1481 node = ctx['node']
1482 glossdivs = node.xml.findall('glossdiv')
1484 result = [
1485 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1486 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1487 """<div class="glossary">
1488 <div class="titlepage"><h%1d class="title">
1489 <a name="%s"></a>%s</h%1d>
1490 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1492 for i in glossdivs:
1493 result.extend(convert_glossdiv(ctx, i))
1494 result.extend(generate_footer(ctx))
1495 result.append("""</div>
1496 </body>
1497 </html>""")
1498 return result
1501 def convert_index(ctx):
1502 node = ctx['node']
1503 # Get all indexdivs under indexdiv
1504 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1506 result = [
1507 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1508 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1509 """<div class="index">
1510 <div class="titlepage"><h%1d class="title">
1511 <a name="%s"></a>%s</h%1d>
1512 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1514 for i in indexdivs:
1515 result.extend(convert_indexdiv(ctx, i))
1516 result.extend(generate_footer(ctx))
1517 result.append("""</div>
1518 </body>
1519 </html>""")
1520 return result
1523 def convert_part(ctx):
1524 return convert_chunk_with_toc(ctx, 'part', 'h1')
1527 def convert_preface(ctx):
1528 node = ctx['node']
1529 result = [
1530 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1531 generate_basic_nav(ctx),
1532 '<div class="preface">'
1534 if node.title:
1535 result.append("""
1536 <div class="titlepage">
1537 <h2 class="title"><a name="%s"></a>%s</h2>
1538 </div>""" % (get_id(node), node.title))
1539 convert_inner(ctx, node.xml, result)
1540 result.extend(generate_footer(ctx))
1541 result.append("""</div>
1542 </body>
1543 </html>""")
1544 return result
1547 def convert_reference(ctx):
1548 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1551 def convert_refentry(ctx):
1552 node = ctx['node']
1553 node_id = get_id(node)
1554 refsect1s = node.xml.findall('refsect1')
1556 gallery = ''
1557 refmeta = node.xml.find('refmeta')
1558 if refmeta is not None:
1559 refmiscinfo = refmeta.find('refmiscinfo')
1560 if refmiscinfo is not None:
1561 inlinegraphic = refmiscinfo.find('inlinegraphic')
1562 if inlinegraphic is not None:
1563 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1565 result = [
1566 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1568 generate_refentry_nav(ctx, refsect1s, result)
1569 result.append("""
1570 <div class="refentry">
1571 <a name="%s"></a>
1572 <div class="refnamediv">
1573 <table width="100%%"><tr>
1574 <td valign="top">
1575 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1576 <p>%s %s</p>
1577 </td>
1578 <td class="gallery_image" valign="top" align="right">%s</td>
1579 </tr></table>
1580 </div>
1581 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1583 for s in refsect1s:
1584 result.extend(convert_refsect1(ctx, s))
1585 result.extend(generate_footer(ctx))
1586 result.append("""</div>
1587 </body>
1588 </html>""")
1589 return result
1592 def convert_section(ctx):
1593 return convert_chunk_with_toc(ctx, 'section', 'h2')
1596 def convert_sect1(ctx):
1597 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1600 # TODO(ensonic): turn into class with converters as functions and ctx as self
1601 convert_chunks = {
1602 'book': convert_book,
1603 'chapter': convert_chapter,
1604 'glossary': convert_glossary,
1605 'index': convert_index,
1606 'part': convert_part,
1607 'preface': convert_preface,
1608 'reference': convert_reference,
1609 'refentry': convert_refentry,
1610 'section': convert_section,
1611 'sect1': convert_sect1,
1615 def generate_nav_nodes(files, node):
1616 nav = {
1617 'nav_home': node.root,
1619 # nav params: up, prev, next
1620 if node.parent:
1621 nav['nav_up'] = node.parent
1622 ix = files.index(node)
1623 if ix > 0:
1624 nav['nav_prev'] = files[ix - 1]
1625 if ix < len(files) - 1:
1626 nav['nav_next'] = files[ix + 1]
1627 return nav
1630 def convert(out_dir, module, files, node, src_lang):
1631 """Convert the docbook chunks to a html file.
1633 Args:
1634 out_dir: already created output dir
1635 files: list of nodes in the tree in pre-order
1636 node: current tree node
1639 logging.info('Writing: %s', node.filename)
1640 with open(os.path.join(out_dir, node.filename), 'wt',
1641 newline='\n', encoding='utf-8') as html:
1642 ctx = {
1643 'module': module,
1644 'files': files,
1645 'node': node,
1646 'src-lang': src_lang,
1648 ctx.update(generate_nav_nodes(files, node))
1650 converter = convert_chunks.get(node.name)
1651 if converter is not None:
1652 for line in converter(ctx):
1653 html.write(line)
1654 else:
1655 logging.warning('Add chunk converter for "%s"', node.name)
1658 def create_devhelp2_toc(node):
1659 result = []
1660 for c in node.children:
1661 if c.children:
1662 result.append('<sub name="%s" link="%s">\n' % (c.raw_title, c.filename))
1663 result.extend(create_devhelp2_toc(c))
1664 result.append('</sub>\n')
1665 else:
1666 result.append('<sub name="%s" link="%s"/>\n' % (c.raw_title, c.filename))
1667 return result
1670 def create_devhelp2_condition_attribs(node):
1671 condition = node.attrib.get('condition')
1672 if condition is not None:
1673 # condition -> since, deprecated, ... (separated with '|')
1674 cond = condition.replace('"', '&quot;').split('|')
1675 keywords = []
1676 for c in cond:
1677 if ':' in c:
1678 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1679 else:
1680 # deprecated can have no description
1681 keywords.append('{}="{}"'.format(c, ''))
1682 return ' ' + ' '.join(keywords)
1683 else:
1684 return ''
1687 def create_devhelp2_refsect2_keyword(node, base_link):
1688 node_id = node.attrib['id']
1689 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1690 node.attrib['role'], titles[node_id]['title'], base_link + node_id,
1691 create_devhelp2_condition_attribs(node))
1694 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1695 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1696 node.attrib['role'], title, base_link + name,
1697 create_devhelp2_condition_attribs(node))
1700 def create_devhelp2(out_dir, module, xml, files):
1701 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1702 newline='\n', encoding='utf-8') as idx:
1703 bookinfo_nodes = xml.xpath('/book/bookinfo')
1704 title = ''
1705 if bookinfo_nodes is not None:
1706 bookinfo = bookinfo_nodes[0]
1707 title = bookinfo.xpath('./title/text()')[0]
1708 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1709 # TODO: support author too (see devhelp2.xsl)
1710 # TODO: fixxref uses '--src-lang' to set the language
1711 result = [
1712 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1713 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1714 <chapters>
1715 """ % (title, module, online_url)
1717 # toc
1718 result.extend(create_devhelp2_toc(files[0].root))
1719 result.append(""" </chapters>
1720 <functions>
1721 """)
1722 # keywords from all refsect2 and refsect3
1723 refsect2 = etree.XPath('//refsect2[@role]')
1724 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1725 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1726 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1727 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1728 for node in files:
1729 base_link = node.filename + '#'
1730 refsect2_nodes = refsect2(node.xml)
1731 for refsect2_node in refsect2_nodes:
1732 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1733 refsect3_nodes = refsect3_enum(refsect2_node)
1734 for refsect3_node in refsect3_nodes:
1735 details_node = refsect3_enum_details(refsect3_node)[0]
1736 name = details_node.attrib['id']
1737 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1738 refsect3_nodes = refsect3_struct(refsect2_node)
1739 for refsect3_node in refsect3_nodes:
1740 details_node = refsect3_struct_details(refsect3_node)[0]
1741 name = details_node.attrib['id']
1742 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1744 result.append(""" </functions>
1745 </book>
1746 """)
1747 for line in result:
1748 idx.write(line)
1751 def get_dirs(uninstalled):
1752 if uninstalled:
1753 # this does not work from buiddir!=srcdir
1754 gtkdocdir = os.path.split(sys.argv[0])[0]
1755 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1756 # try 'srcdir' (set from makefiles) too
1757 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1758 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1759 styledir = gtkdocdir + '/style'
1760 else:
1761 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1762 styledir = gtkdocdir
1763 return (gtkdocdir, styledir)
1766 def main(module, index_file, out_dir, uninstalled, src_lang, paths):
1768 # == Loading phase ==
1769 # the next 3 steps could be done in paralel
1771 # 1) load the docuemnt
1772 _t = timer()
1773 # does not seem to be faster
1774 # parser = etree.XMLParser(dtd_validation=False, collect_ids=False)
1775 # tree = etree.parse(index_file, parser)
1776 tree = etree.parse(index_file)
1777 logging.warning("1a: %7.3lf: load doc", timer() - _t)
1778 _t = timer()
1779 tree.xinclude()
1780 logging.warning("1b: %7.3lf: xinclude doc", timer() - _t)
1782 # 2) copy datafiles
1783 _t = timer()
1784 # TODO: handle additional images
1785 (gtkdocdir, styledir) = get_dirs(uninstalled)
1786 # copy navigation images and stylesheets to html directory ...
1787 css_file = os.path.join(styledir, 'style.css')
1788 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1789 shutil.copy(f, out_dir)
1790 css_file = os.path.join(out_dir, 'style.css')
1791 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1792 css.write(HTML_FORMATTER.get_style_defs())
1793 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1795 # 3) load xref targets
1796 _t = timer()
1797 # TODO: migrate options from fixxref
1798 # TODO: ideally explicity specify the files we need, this will save us the
1799 # globbing and we'll load less files.
1800 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1801 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1803 # == Processing phase ==
1805 # 4) recursively walk the tree and chunk it into a python tree so that we
1806 # can generate navigation and link tags.
1807 _t = timer()
1808 files = chunk(tree.getroot(), module)
1809 files = [f for f in PreOrderIter(files) if f.anchor is None]
1810 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1812 # 5) extract tables:
1813 _t = timer()
1814 # TODO: can be done in parallel
1815 # - find all 'id' attribs and add them to the link map
1816 # - .. get their titles and store them into the titles map
1817 add_id_links_and_titles(files, fixxref.Links)
1818 # - build glossary dict
1819 build_glossary(files)
1820 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1822 # == Output phase ==
1823 # the next two step could be done in parllel
1825 # 6) create a xxx.devhelp2 file
1826 _t = timer()
1827 create_devhelp2(out_dir, module, tree.getroot(), files)
1828 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1830 # 7) iterate the tree and output files
1831 _t = timer()
1832 # TODO: can be done in parallel, figure out why this is not faster
1833 # from multiprocessing.pool import Pool
1834 # with Pool(4) as p:
1835 # p.apply_async(convert, args=(out_dir, module, files))
1836 # from multiprocessing.pool import ThreadPool
1837 # with ThreadPool(4) as p:
1838 # p.apply_async(convert, args=(out_dir, module, files))
1839 for node in files:
1840 convert(out_dir, module, files, node, src_lang)
1841 logging.warning("7: %7.3lf: create html", timer() - _t)
1843 # 8) copy assets over
1844 _t = timer()
1845 paths = set(paths + [os.getcwd()])
1846 for a in assets:
1847 logging.info('trying %s in %s', a, str(paths))
1848 copied = False
1849 for p in paths:
1850 try:
1851 shutil.copy(os.path.join(p, a), out_dir)
1852 copied = True
1853 except FileNotFoundError:
1854 pass
1855 if not copied:
1856 logging.warning('file %s not found in path (did you add --path?)', a)
1857 logging.warning("8: %7.3lf: copy assets", timer() - _t)
1860 def run(options):
1861 logging.info('options: %s', str(options.__dict__))
1862 module = options.args[0]
1863 document = options.args[1]
1865 # TODO: rename to 'html' later on
1866 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1867 # outputs into the working directory
1868 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1869 try:
1870 os.mkdir(out_dir)
1871 except OSError as e:
1872 if e.errno != errno.EEXIST:
1873 raise
1875 sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang,
1876 options.path))