mkhtml2: add tag converters for sect2/sect3.
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob062026cf6542dd0d61ff974e6ff4713c81543a8f
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - more chunk converters
39 - more tag converters:
40 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
41 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
42 attr on the <img> tag of the 'imageobject'
43 - check each docbook tag if it can contain #PCDATA, if not don't check for
44 xml.text
45 - consider some perf-warnings flag
46 - see 'No "id" attribute on'
48 OPTIONAL:
49 - minify html: https://pypi.python.org/pypi/htmlmin/
51 Requirements:
52 sudo pip3 install anytree lxml pygments
54 Example invocation:
55 cd tests/bugs/docs/
56 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
57 xdg-open db2html/index.html
58 meld html db2html
60 Benchmarking:
61 cd tests/bugs/docs/;
62 rm html-build.stamp; time make html-build.stamp
63 """
65 import argparse
66 import errno
67 import logging
68 import os
69 import shutil
70 import sys
72 from anytree import Node, PreOrderIter
73 from copy import deepcopy
74 from glob import glob
75 from lxml import etree
76 from pygments import highlight
77 from pygments.lexers import CLexer
78 from pygments.formatters import HtmlFormatter
80 from . import config, fixxref
82 # pygments setup
83 # lazily constructed lexer cache
84 LEXERS = {
85 'c': CLexer()
87 HTML_FORMATTER = HtmlFormatter(nowrap=True)
89 # http://www.sagehill.net/docbookxsl/Chunking.html
90 CHUNK_TAGS = [
91 'appendix',
92 'article',
93 'bibliography', # in article or book
94 'book',
95 'chapter',
96 'colophon',
97 'glossary', # in article or book
98 'index', # in article or book
99 'part',
100 'preface',
101 'refentry',
102 'reference',
103 'sect1', # except first
104 'section', # if equivalent to sect1
105 'set',
106 'setindex',
110 class ChunkParams(object):
111 def __init__(self, prefix, parent=None):
112 self.prefix = prefix
113 self.parent = None
114 self.count = 0
117 # TODO: look up the abbrevs and hierarchy for other tags
118 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
119 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
121 # If not defined, we can just create an example without an 'id' attr and see
122 # docbook xsl does.
123 CHUNK_PARAMS = {
124 'appendix': ChunkParams('app', 'book'),
125 'book': ChunkParams('bk'),
126 'chapter': ChunkParams('ch', 'book'),
127 'index': ChunkParams('ix', 'book'),
128 'part': ChunkParams('pt', 'book'),
129 'preface': ChunkParams('pr', 'book'),
130 'reference': ChunkParams('rn', 'book'),
131 'sect1': ChunkParams('s', 'chapter'),
132 'section': ChunkParams('s', 'chapter'),
135 TITLE_XPATHS = {
136 '_': (etree.XPath('./title'), None),
137 'book': (etree.XPath('./bookinfo/title'), None),
138 'refentry': (
139 etree.XPath('./refmeta/refentrytitle'),
140 etree.XPath('./refnamediv/refpurpose')
144 ID_XPATH = etree.XPath('//@id')
146 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
147 glossary = {}
149 footnote_idx = 1
152 def gen_chunk_name(node):
153 if 'id' in node.attrib:
154 return node.attrib['id']
156 tag = node.tag
157 if tag not in CHUNK_PARAMS:
158 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
159 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
161 naming = CHUNK_PARAMS[tag]
162 naming.count += 1
163 name = ('%s%02d' % (naming.prefix, naming.count))
164 # handle parents to make names of nested tags unique
165 # TODO: we only need to prepend the parent if there are > 1 of them in the
166 # xml
167 # while naming.parent:
168 # parent = naming.parent
169 # if parent not in CHUNK_PARAMS:
170 # break;
171 # naming = CHUNK_PARAMS[parent]
172 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
173 return name
176 def get_chunk_titles(node):
177 tag = node.tag
178 if tag not in TITLE_XPATHS:
179 # Use defaults
180 (title, subtitle) = TITLE_XPATHS['_']
181 else:
182 (title, subtitle) = TITLE_XPATHS[tag]
184 xml = title(node)[0]
185 result = {
186 'title': xml.text
188 if xml.tag != 'title':
189 result['title_tag'] = xml.tag
190 else:
191 result['title_tag'] = tag
193 if subtitle:
194 xml = subtitle(node)[0]
195 result['subtitle'] = xml.text
196 result['subtitle_tag'] = xml.tag
197 else:
198 result['subtitle'] = None
199 result['subtitle_tag'] = None
200 return result
203 def chunk(xml_node, parent=None):
204 """Chunk the tree.
206 The first time, we're called with parent=None and in that case we return
207 the new_node as the root of the tree
209 if xml_node.tag in CHUNK_TAGS:
210 if parent:
211 # remove the xml-node from the parent
212 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
213 xml_node.getparent().remove(xml_node)
214 xml_node = sub_tree
216 title_args = get_chunk_titles(xml_node)
217 chunk_name = gen_chunk_name(xml_node)
218 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
219 filename=chunk_name + '.html', **title_args)
221 for child in xml_node:
222 chunk(child, parent)
224 return parent
227 def add_id_links(files, links):
228 for node in files:
229 chunk_name = node.filename[:-5]
230 chunk_base = node.filename + '#'
231 for attr in ID_XPATH(node.xml):
232 if attr == chunk_name:
233 links[attr] = node.filename
234 else:
235 links[attr] = chunk_base + attr
238 def build_glossary(files):
239 for node in files:
240 if node.xml.tag != 'glossary':
241 continue
242 for term in GLOSSENTRY_XPATH(node.xml):
243 # TODO: there can be all kind of things in a glossary. This only supports
244 # what we commonly use
245 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
246 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
247 glossary[key] = value
248 # logging.debug('glosentry: %s:%s', key, value)
251 # conversion helpers
254 def convert_inner(ctx, xml, result):
255 for child in xml:
256 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
259 def convert_ignore(ctx, xml):
260 result = []
261 convert_inner(ctx, xml, result)
262 return result
265 def convert_skip(ctx, xml):
266 return ['']
269 def append_text(text, result):
270 if text and text.strip():
271 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
274 missing_tags = {}
277 def convert__unknown(ctx, xml):
278 # don't recurse on subchunks
279 if xml.tag in CHUNK_TAGS:
280 return []
281 if isinstance(xml, etree._Comment):
282 return ['<!-- ' + xml.text + '-->\n']
283 else:
284 # warn only once
285 if xml.tag not in missing_tags:
286 logging.warning('Add tag converter for "%s"', xml.tag)
287 missing_tags[xml.tag] = True
288 result = ['<!-- ' + xml.tag + '-->\n']
289 convert_inner(ctx, xml, result)
290 result.append('<!-- /' + xml.tag + '-->\n')
291 return result
294 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
295 result = ['<div class="%s">\n' % xml.tag]
296 title = xml.find('title')
297 if title is not None:
298 if 'id' in xml.attrib:
299 result.append('<a name="%s"></a>' % xml.attrib['id'])
300 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
301 xml.remove(title)
302 append_text(xml.text, result)
303 inner_func(ctx, xml, result)
304 result.append('</div>')
305 append_text(xml.tail, result)
306 return result
309 def xml_get_title(xml):
310 title = xml.find('title')
311 if title is not None:
312 return title.text
313 else:
314 # TODO(ensonic): any way to get the file (inlcudes) too?
315 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
316 return ''
319 # docbook tags
322 def convert_acronym(ctx, xml):
323 key = xml.text
324 title = glossary.get(key, '')
325 # TODO: print a sensible warning if missing
326 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
327 if xml.tail:
328 result.append(xml.tail)
329 return result
332 def convert_bookinfo(ctx, xml):
333 result = ['<div class="titlepage">']
334 convert_inner(ctx, xml, result)
335 result.append("""<hr>
336 </div>""")
337 if xml.tail:
338 result.append(xml.tail)
339 return result
342 def convert_blockquote(ctx, xml):
343 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
344 append_text(xml.text, result)
345 convert_inner(ctx, xml, result)
346 result.append('</blockquote>\n</div>')
347 append_text(xml.tail, result)
348 return result
351 def convert_code(ctx, xml):
352 result = ['<code class="%s">' % xml.tag]
353 append_text(xml.text, result)
354 convert_inner(ctx, xml, result)
355 result.append('</code>')
356 append_text(xml.tail, result)
357 return result
360 def convert_colspec(ctx, xml):
361 result = ['<col']
362 a = xml.attrib
363 if 'colname' in a:
364 result.append(' class="%s"' % a['colname'])
365 if 'colwidth' in a:
366 result.append(' width="%s"' % a['colwidth'])
367 result.append('>\n')
368 # is in tgroup and there can be no 'text'
369 return result
372 def convert_command(ctx, xml):
373 result = ['<strong class="userinput"><code>']
374 append_text(xml.text, result)
375 convert_inner(ctx, xml, result)
376 result.append('</code></strong>')
377 append_text(xml.tail, result)
378 return result
381 def convert_corpauthor(ctx, xml):
382 result = ['<div><h3 class="corpauthor">\n']
383 append_text(xml.text, result)
384 convert_inner(ctx, xml, result)
385 result.append('</h3></div>\n')
386 append_text(xml.tail, result)
387 return result
390 def convert_div(ctx, xml):
391 result = ['<div class="%s">\n' % xml.tag]
392 append_text(xml.text, result)
393 convert_inner(ctx, xml, result)
394 result.append('</div>')
395 append_text(xml.tail, result)
396 return result
399 def convert_em_class(ctx, xml):
400 result = ['<em class="%s"><code>' % xml.tag]
401 append_text(xml.text, result)
402 convert_inner(ctx, xml, result)
403 result.append('</code></em>')
404 append_text(xml.tail, result)
405 return result
408 def convert_entry(ctx, xml):
409 result = ['<td']
410 if 'role' in xml.attrib:
411 result.append(' class="%s">' % xml.attrib['role'])
412 else:
413 result.append('>')
414 append_text(xml.text, result)
415 convert_inner(ctx, xml, result)
416 result.append('</td>')
417 append_text(xml.tail, result)
418 return result
421 def convert_footnote(ctx, xml):
422 footnotes = ctx.get('footnotes', [])
423 # footnotes idx is not per page, but per doc
424 global footnote_idx
425 idx = footnote_idx
426 footnote_idx += 1
428 # need a pair of ids for each footnote (docbook generates different ids)
429 this_id = 'footnote-%d' % idx
430 that_id = 'ftn.' + this_id
432 inner = ['<div id="%s" class="footnote">' % that_id]
433 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
434 this_id, idx))
435 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
436 # get double nested paras :/.
437 # convert_inner(ctx, xml, inner)
438 para = xml.find('para')
439 if para is None:
440 para = xml.find('simpara')
441 if para is not None:
442 inner.append(para.text)
443 else:
444 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
445 etree.tostring(xml, method="text", encoding=str).strip())
446 inner.append('</p></div>')
447 footnotes.append(inner)
448 ctx['footnotes'] = footnotes
449 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
450 that_id, this_id, idx)]
453 def convert_glossdef(ctx, xml):
454 result = ['<dd class="glossdef">']
455 convert_inner(ctx, xml, result)
456 result.append('</dd>\n')
457 return result
460 def convert_glossdiv(ctx, xml):
461 title_tag = xml.find('title')
462 title = title_tag.text
463 xml.remove(title_tag)
464 result = [
465 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
467 convert_inner(ctx, xml, result)
468 return result
471 def convert_glossentry(ctx, xml):
472 result = []
473 convert_inner(ctx, xml, result)
474 return result
477 def convert_glossterm(ctx, xml):
478 glossid = ''
479 text = ''
480 anchor = xml.find('anchor')
481 if anchor is not None:
482 glossid = anchor.attrib.get('id', '')
483 text += anchor.tail or ''
484 text += xml.text or ''
485 if glossid == '':
486 glossid = 'glossterm-' + text
487 return [
488 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
489 glossid, text)
493 def convert_imageobject(ctx, xml):
494 imagedata = xml.find('imagedata')
495 if imagedata is not None:
496 # TODO(ensonic): warn on missing fileref attr?
497 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
498 else:
499 return []
502 def convert_indexdiv(ctx, xml):
503 title_tag = xml.find('title')
504 title = title_tag.text
505 xml.remove(title_tag)
506 result = [
507 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
509 convert_inner(ctx, xml, result)
510 return result
513 def convert_informaltable(ctx, xml):
514 result = ['<div class="informaltable"><table class="informaltable"']
515 a = xml.attrib
516 if 'pgwide' in a and a['pgwide'] == '1':
517 result.append(' width="100%"')
518 if 'frame' in a and a['frame'] == 'none':
519 result.append(' border="0"')
520 result.append('>\n')
521 convert_inner(ctx, xml, result)
522 result.append('</table></div>')
523 if xml.tail:
524 result.append(xml.tail)
525 return result
528 def convert_itemizedlist(ctx, xml):
529 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
530 convert_inner(ctx, xml, result)
531 result.append('</ul></div>')
532 if xml.tail:
533 result.append(xml.tail)
534 return result
537 def convert_link(ctx, xml):
538 linkend = xml.attrib['linkend']
539 if linkend in fixxref.NoLinks:
540 linkend = None
541 result = []
542 if linkend:
543 link_text = []
544 convert_inner(ctx, xml, link_text)
545 append_text(xml.text, link_text)
546 # TODO: fixxref does some weird checks in xml.text
547 result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
548 append_text(xml.tail, result)
549 return result
552 def convert_listitem(ctx, xml):
553 result = ['<li class="listitem">']
554 convert_inner(ctx, xml, result)
555 result.append('</li>')
556 # is in itemizedlist and there can be no 'text'
557 return result
560 def convert_orderedlist(ctx, xml):
561 result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
562 convert_inner(ctx, xml, result)
563 result.append('</ol></div>')
564 append_text(xml.tail, result)
565 return result
568 def convert_para(ctx, xml):
569 result = []
570 if 'id' in xml.attrib:
571 result.append('<a name="%s"></a>' % xml.attrib['id'])
572 result.append('<p>')
573 append_text(xml.text, result)
574 convert_inner(ctx, xml, result)
575 result.append('</p>')
576 append_text(xml.tail, result)
577 return result
580 def convert_para_like(ctx, xml):
581 result = []
582 if 'id' in xml.attrib:
583 result.append('<a name="%s"></a>' % xml.attrib['id'])
584 result.append('<p class="%s">' % xml.tag)
585 append_text(xml.text, result)
586 convert_inner(ctx, xml, result)
587 result.append('</p>')
588 append_text(xml.tail, result)
589 return result
592 def convert_phrase(ctx, xml):
593 result = ['<span']
594 if 'role' in xml.attrib:
595 result.append(' class="%s">' % xml.attrib['role'])
596 else:
597 result.append('>')
598 append_text(xml.text, result)
599 convert_inner(ctx, xml, result)
600 result.append('</span>')
601 append_text(xml.tail, result)
602 return result
605 def convert_primaryie(ctx, xml):
606 result = ['<dt>\n']
607 convert_inner(ctx, xml, result)
608 result.append('\n</dt>\n<dd></dd>\n')
609 return result
612 def convert_pre(ctx, xml):
613 result = ['<pre class="%s">\n' % xml.tag]
614 append_text(xml.text, result)
615 convert_inner(ctx, xml, result)
616 result.append('</pre>')
617 append_text(xml.tail, result)
618 return result
621 def convert_programlisting(ctx, xml):
622 result = []
623 if xml.attrib.get('role', '') == 'example':
624 if xml.text:
625 lang = xml.attrib.get('language', 'c').lower()
626 if lang not in LEXERS:
627 LEXERS[lang] = get_lexer_by_name(lang)
628 lexer = LEXERS.get(lang, None)
629 if lexer:
630 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
632 # we do own line-numbering
633 line_count = highlighted.count('\n')
634 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
635 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
636 <tbody>
637 <tr>
638 <td class="listing_lines" align="right"><pre>%s</pre></td>
639 <td class="listing_code"><pre class="programlisting">%s</pre></td>
640 </tr>
641 </tbody>
642 </table>
643 """ % (source_lines, highlighted))
644 else:
645 logging.warn('No pygments lexer for language="%s"', lang)
646 result.append('<pre class="programlisting">')
647 result.append(xml.text)
648 result.append('</pre>')
649 else:
650 result.append('<pre class="programlisting">')
651 append_text(xml.text, result)
652 convert_inner(ctx, xml, result)
653 result.append('</pre>')
654 append_text(xml.tail, result)
655 return result
658 def convert_quote(ctx, xml):
659 result = ['<span class="quote">"<span class="quote">']
660 append_text(xml.text, result)
661 convert_inner(ctx, xml, result)
662 result.append('</span>"</span>')
663 append_text(xml.tail, result)
664 return result
667 def convert_refsect1(ctx, xml):
668 # Add a divider between two consequitive refsect2
669 def convert_inner(ctx, xml, result):
670 prev = None
671 for child in xml:
672 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
673 result.append('<hr>\n')
674 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
675 prev = child
676 return convert_sect(ctx, xml, 'h2', convert_inner)
679 def convert_refsect2(ctx, xml):
680 return convert_sect(ctx, xml, 'h3')
683 def convert_refsect3(ctx, xml):
684 return convert_sect(ctx, xml, 'h4')
687 def convert_row(ctx, xml):
688 result = ['<tr>\n']
689 convert_inner(ctx, xml, result)
690 result.append('</tr>\n')
691 return result
694 def convert_sect2(ctx, xml):
695 return convert_sect(ctx, xml, 'h3')
698 def convert_sect3(ctx, xml):
699 return convert_sect(ctx, xml, 'h4')
702 def convert_simpara(ctx, xml):
703 result = ['<p>']
704 append_text(xml.text, result)
705 result.append('</p>')
706 append_text(xml.tail, result)
707 return result
710 def convert_span(ctx, xml):
711 result = ['<span class="%s">' % xml.tag]
712 append_text(xml.text, result)
713 convert_inner(ctx, xml, result)
714 result.append('</span>')
715 append_text(xml.tail, result)
716 return result
719 def convert_tbody(ctx, xml):
720 result = ['<tbody>']
721 convert_inner(ctx, xml, result)
722 result.append('</tbody>')
723 # is in tgroup and there can be no 'text'
724 return result
727 def convert_tgroup(ctx, xml):
728 # tgroup does not expand to anything, but the nested colspecs need to
729 # be put into a colgroup
730 cols = xml.findall('colspec')
731 result = []
732 if cols:
733 result.append('<colgroup>\n')
734 for col in cols:
735 result.extend(convert_colspec(ctx, col))
736 xml.remove(col)
737 result.append('</colgroup>\n')
738 convert_inner(ctx, xml, result)
739 # is in informaltable and there can be no 'text'
740 return result
743 def convert_ulink(ctx, xml):
744 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
745 if xml.tail:
746 result.append(xml.tail)
747 return result
750 def convert_userinput(ctx, xml):
751 result = ['<span class="command"><strong>']
752 append_text(xml.text, result)
753 convert_inner(ctx, xml, result)
754 result.append('</strong></span>')
755 append_text(xml.tail, result)
756 return result
759 def convert_variablelist(ctx, xml):
760 result = ["""<div class="variablelist"><table border="0" class="variablelist">
761 <colgroup>
762 <col align="left" valign="top">
763 <col>
764 </colgroup>
765 <tbody>"""]
766 convert_inner(ctx, xml, result)
767 result.append("""</tbody>
768 </table></div>""")
769 return result
772 def convert_varlistentry(ctx, xml):
773 result = ['<tr>']
775 result.append('<td><p>')
776 term = xml.find('term')
777 result.extend(convert_span(ctx, term))
778 result.append('</p></td>')
780 result.append('<td>')
781 listitem = xml.find('listitem')
782 convert_inner(ctx, listitem, result)
783 result.append('</td>')
785 result.append('<tr>')
786 return result
789 # TODO(ensonic): turn into class with converters as functions and ctx as self
790 convert_tags = {
791 'acronym': convert_acronym,
792 'application': convert_span,
793 'bookinfo': convert_bookinfo,
794 'blockquote': convert_blockquote,
795 'caption': convert_div,
796 'code': convert_code,
797 'colspec': convert_colspec,
798 'constant': convert_code,
799 'command': convert_command,
800 'corpauthor': convert_corpauthor,
801 'emphasis': convert_span,
802 'entry': convert_entry,
803 'envar': convert_code,
804 'footnote': convert_footnote,
805 'filename': convert_code,
806 'function': convert_code,
807 'glossdef': convert_glossdef,
808 'glossdiv': convert_glossdiv,
809 'glossentry': convert_glossentry,
810 'glossterm': convert_glossterm,
811 'imageobject': convert_imageobject,
812 'indexdiv': convert_indexdiv,
813 'indexentry': convert_ignore,
814 'indexterm': convert_skip,
815 'informalexample': convert_div,
816 'informaltable': convert_informaltable,
817 'inlinemediaobject': convert_span,
818 'itemizedlist': convert_itemizedlist,
819 'legalnotice': convert_div,
820 'link': convert_link,
821 'listitem': convert_listitem,
822 'literal': convert_code,
823 'mediaobject': convert_div,
824 'note': convert_div,
825 'option': convert_code,
826 'orderedlist': convert_orderedlist,
827 'para': convert_para,
828 'parameter': convert_em_class,
829 'phrase': convert_phrase,
830 'primaryie': convert_primaryie,
831 'programlisting': convert_programlisting,
832 'quote': convert_quote,
833 'releaseinfo': convert_para_like,
834 'refsect1': convert_refsect1,
835 'refsect2': convert_refsect2,
836 'refsect3': convert_refsect3,
837 'replaceable': convert_em_class,
838 'returnvalue': convert_span,
839 'row': convert_row,
840 'screen': convert_pre,
841 'sect2': convert_sect2,
842 'sect3': convert_sect3,
843 'simpara': convert_simpara,
844 'structfield': convert_em_class,
845 'structname': convert_span,
846 'synopsis': convert_pre,
847 'symbol': convert_span,
848 'tbody': convert_tbody,
849 'tgroup': convert_tgroup,
850 'term': convert_span,
851 'type': convert_span,
852 'ulink': convert_ulink,
853 'userinput': convert_userinput,
854 'varname': convert_code,
855 'variablelist': convert_variablelist,
856 'varlistentry': convert_varlistentry,
857 'warning': convert_div,
860 # conversion helpers
862 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
863 <html>
864 <head>
865 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
866 <title>%s</title>
867 %s<link rel="stylesheet" href="style.css" type="text/css">
868 </head>
869 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
873 def generate_head_links(ctx):
874 n = ctx['nav_home']
875 result = [
876 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
878 if 'nav_up' in ctx:
879 n = ctx['nav_up']
880 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
881 if 'nav_prev' in ctx:
882 n = ctx['nav_prev']
883 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
884 if 'nav_next' in ctx:
885 n = ctx['nav_next']
886 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
887 return ''.join(result)
890 def generate_nav_links(ctx):
891 n = ctx['nav_home']
892 result = [
893 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
895 if 'nav_up' in ctx:
896 n = ctx['nav_up']
897 result.append(
898 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
899 else:
900 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
901 if 'nav_prev' in ctx:
902 n = ctx['nav_prev']
903 result.append(
904 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
905 else:
906 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
907 if 'nav_next' in ctx:
908 n = ctx['nav_next']
909 result.append(
910 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
911 else:
912 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
914 return ''.join(result)
917 def generate_toc(ctx, node):
918 result = []
919 for c in node.children:
920 # TODO: urlencode the filename: urllib.parse.quote_plus()
921 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
922 c.title_tag, c.filename, c.title))
923 if c.subtitle:
924 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
925 result.append('</dt>\n')
926 if c.children:
927 result.append('<dd><dl>')
928 result.extend(generate_toc(ctx, c))
929 result.append('</dl></dd>')
930 return result
933 def generate_basic_nav(ctx):
934 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
935 <tr valign="middle">
936 <td width="100%%" align="left" class="shortcuts"></td>
938 </tr>
939 </table>
940 """ % generate_nav_links(ctx)
943 def generate_alpha_nav(ctx, divs, prefix):
944 ix_nav = []
945 for s in divs:
946 title = xml_get_title(s)
947 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
949 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
950 <tr valign="middle">
951 <td width="100%%" align="left" class="shortcuts">
952 <span id="nav_index">
954 </span>
955 </td>
957 </tr>
958 </table>
959 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
962 def generate_refentry_nav(ctx, refsect1s, result):
963 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
964 <tr valign="middle">
965 <td width="100%" align="left" class="shortcuts">
966 <a href="#" class="shortcut">Top</a>""")
968 for s in refsect1s:
969 # don't list TOC sections (role="xxx_proto")
970 if s.attrib.get('role', '').endswith("_proto"):
971 continue
972 # skip section without 'id' attrs
973 if 'id' not in s.attrib:
974 continue
976 title = xml_get_title(s)
977 result.append("""
978   <span class="dim">|</span> 
979 <a href="#%s" class="shortcut">%s</a>
980 """ % (s.attrib['id'], title))
981 result.append("""
982 </td>
984 </tr>
985 </table>
986 """ % generate_nav_links(ctx))
989 def generate_footer(ctx):
990 result = []
991 if 'footnotes' in ctx:
992 result.append("""<div class="footnotes">\n
993 <br><hr style="width:100; text-align:left;margin-left: 0">
994 """)
995 for f in ctx['footnotes']:
996 result.extend(f)
997 result.append('</div>\n')
998 return result
1001 def get_id(node):
1002 xml = node.xml
1003 node_id = xml.attrib.get('id', None)
1004 if node_id:
1005 return node_id
1007 logging.info('%d: No "id" attribute on "%s", generating one',
1008 xml.sourceline, xml.tag)
1009 ix = []
1010 # Generate the 'id'. We need to walk up the xml-tree and check the positions
1011 # for each sibling.
1012 parent = xml.getparent()
1013 while parent is not None:
1014 children = parent.getchildren()
1015 ix.insert(0, str(children.index(xml) + 1))
1016 xml = parent
1017 parent = xml.getparent()
1018 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1019 return 'id-1.' + '.'.join(ix)
1022 def convert_chunk_with_toc(ctx, div_class, title_tag):
1023 node = ctx['node']
1024 result = [
1025 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1026 generate_basic_nav(ctx),
1027 '<div class="%s">' % div_class,
1029 title = node.xml.find('title')
1030 if title is not None:
1031 result.append("""
1032 <div class="titlepage">
1033 <%s class="title"><a name="%s"></a>%s</%s>
1034 </div>""" % (
1035 title_tag, get_id(node), title.text, title_tag))
1036 node.xml.remove(title)
1037 convert_inner(ctx, node.xml, result)
1038 result.append("""<p>
1039 <b>Table of Contents</b>
1040 </p>
1041 <div class="toc">
1042 <dl class="toc">
1043 """)
1044 result.extend(generate_toc(ctx, node))
1045 result.append("""</dl>
1046 </div>
1047 """)
1048 result.extend(generate_footer(ctx))
1049 result.append("""</div>
1050 </body>
1051 </html>""")
1052 return result
1055 # docbook chunks
1058 def convert_book(ctx):
1059 node = ctx['node']
1060 result = [
1061 HTML_HEADER % (node.title, generate_head_links(ctx)),
1062 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1063 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1064 </table>
1065 <div class="book">
1066 """ % node.title
1068 bookinfo = node.xml.findall('bookinfo')[0]
1069 # we already used the title
1070 title = bookinfo.find('title')
1071 if title is not None:
1072 bookinfo.remove(title)
1073 result.extend(convert_bookinfo(ctx, bookinfo))
1074 result.append("""<div class="toc">
1075 <dl class="toc">
1076 """)
1077 result.extend(generate_toc(ctx, node.root))
1078 result.append("""</dl>
1079 </div>
1080 """)
1081 result.extend(generate_footer(ctx))
1082 result.append("""</div>
1083 </body>
1084 </html>""")
1085 return result
1088 def convert_chapter(ctx):
1089 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1092 def convert_glossary(ctx):
1093 node = ctx['node']
1094 glossdivs = node.xml.findall('glossdiv')
1096 result = [
1097 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1098 generate_alpha_nav(ctx, glossdivs, 'gls'),
1099 """<div class="index">
1100 <div class="titlepage"><h1 class="title">
1101 <a name="%s"></a>%s</h1>
1102 </div>""" % (get_id(node), node.title)
1104 for i in glossdivs:
1105 result.extend(convert_glossdiv(ctx, i))
1106 result.extend(generate_footer(ctx))
1107 result.append("""</div>
1108 </body>
1109 </html>""")
1110 return result
1113 def convert_index(ctx):
1114 node = ctx['node']
1115 # Get all indexdivs under indexdiv
1116 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1118 result = [
1119 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1120 generate_alpha_nav(ctx, indexdivs, 'idx'),
1121 """<div class="glossary">
1122 <div class="titlepage"><h2 class="title">
1123 <a name="%s"></a>%s</h2>
1124 </div>""" % (get_id(node), node.title)
1126 for i in indexdivs:
1127 result.extend(convert_indexdiv(ctx, i))
1128 result.extend(generate_footer(ctx))
1129 result.append("""</div>
1130 </body>
1131 </html>""")
1132 return result
1135 def convert_part(ctx):
1136 return convert_chunk_with_toc(ctx, 'part', 'h1')
1139 def convert_preface(ctx):
1140 node = ctx['node']
1141 result = [
1142 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1143 generate_basic_nav(ctx),
1144 '<div class="preface">'
1146 title = node.xml.find('title')
1147 if title is not None:
1148 result.append("""
1149 <div class="titlepage">
1150 <h2 class="title"><a name="%s"></a>%s</h2>
1151 </div>""" % (get_id(node), title.text))
1152 node.xml.remove(title)
1153 convert_inner(ctx, node.xml, result)
1154 result.extend(generate_footer(ctx))
1155 result.append("""</div>
1156 </body>
1157 </html>""")
1158 return result
1161 def convert_reference(ctx):
1162 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1165 def convert_refentry(ctx):
1166 node = ctx['node']
1167 node_id = get_id(node)
1168 refsect1s = node.xml.findall('refsect1')
1170 result = [
1171 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1173 generate_refentry_nav(ctx, refsect1s, result)
1174 result.append("""
1175 <div class="refentry">
1176 <a name="%s"></a>
1177 <div class="refnamediv">
1178 <table width="100%%"><tr>
1179 <td valign="top">
1180 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1181 <p>%s — module for gtk-doc unit test</p>
1182 </td>
1183 <td class="gallery_image" valign="top" align="right"></td>
1184 </tr></table>
1185 </div>
1186 """ % (node_id, node_id, node.title, node.title))
1188 for s in refsect1s:
1189 result.extend(convert_refsect1(ctx, s))
1190 result.extend(generate_footer(ctx))
1191 result.append("""</div>
1192 </body>
1193 </html>""")
1194 return result
1197 def convert_sect1(ctx):
1198 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1201 # TODO(ensonic): turn into class with converters as functions and ctx as self
1202 convert_chunks = {
1203 'book': convert_book,
1204 'chapter': convert_chapter,
1205 'glossary': convert_glossary,
1206 'index': convert_index,
1207 'part': convert_part,
1208 'preface': convert_preface,
1209 'reference': convert_reference,
1210 'refentry': convert_refentry,
1211 'sect1': convert_sect1,
1215 def generate_nav_nodes(files, node):
1216 nav = {
1217 'nav_home': node.root,
1219 # nav params: up, prev, next
1220 if node.parent:
1221 nav['nav_up'] = node.parent
1222 ix = files.index(node)
1223 if ix > 0:
1224 nav['nav_prev'] = files[ix - 1]
1225 if ix < len(files) - 1:
1226 nav['nav_next'] = files[ix + 1]
1227 return nav
1230 def convert(out_dir, module, files, node):
1231 """Convert the docbook chunks to a html file.
1233 Args:
1234 out_dir: already created output dir
1235 files: list of nodes in the tree in pre-order
1236 node: current tree node
1239 logging.info('Writing: %s', node.filename)
1240 with open(os.path.join(out_dir, node.filename), 'wt',
1241 newline='\n', encoding='utf-8') as html:
1242 ctx = {
1243 'module': module,
1244 'files': files,
1245 'node': node,
1247 ctx.update(generate_nav_nodes(files, node))
1249 if node.name in convert_chunks:
1250 for line in convert_chunks[node.name](ctx):
1251 html.write(line)
1252 else:
1253 logging.warning('Add converter/template for "%s"', node.name)
1256 def create_devhelp2_toc(node):
1257 result = []
1258 for c in node.children:
1259 if c.children:
1260 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1261 result.extend(create_devhelp2_toc(c))
1262 result.append('</sub>\n')
1263 else:
1264 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1265 return result
1268 def create_devhelp2_condition_attribs(node):
1269 if 'condition' in node.attrib:
1270 # condition -> since, deprecated, ... (separated with '|')
1271 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1272 keywords = []
1273 for c in cond:
1274 if ':' in c:
1275 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1276 else:
1277 # deprecated can have no description
1278 keywords.append('{}="{}"'.format(c, ''))
1279 return ' ' + ' '.join(keywords)
1280 else:
1281 return ''
1284 def create_devhelp2_refsect2_keyword(node, base_link):
1285 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1286 node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1287 create_devhelp2_condition_attribs(node))
1290 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1291 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1292 node.attrib['role'], title, base_link + name,
1293 create_devhelp2_condition_attribs(node))
1296 def create_devhelp2(out_dir, module, xml, files):
1297 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1298 newline='\n', encoding='utf-8') as idx:
1299 bookinfo_nodes = xml.xpath('/book/bookinfo')
1300 title = ''
1301 if bookinfo_nodes is not None:
1302 bookinfo = bookinfo_nodes[0]
1303 title = bookinfo.xpath('./title/text()')[0]
1304 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1305 # TODO: support author too (see devhelp2.xsl)
1306 # TODO: fixxref uses '--src-lang' to set the language
1307 result = [
1308 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1309 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1310 <chapters>
1311 """ % (title, module, online_url)
1313 # toc
1314 result.extend(create_devhelp2_toc(files[0].root))
1315 result.append(""" </chapters>
1316 <functions>
1317 """)
1318 # keywords from all refsect2 and refsect3
1319 refsect2 = etree.XPath('//refsect2[@role]')
1320 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1321 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1322 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1323 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1324 for node in files:
1325 base_link = node.filename + '#'
1326 refsect2_nodes = refsect2(node.xml)
1327 for refsect2_node in refsect2_nodes:
1328 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1329 refsect3_nodes = refsect3_enum(refsect2_node)
1330 for refsect3_node in refsect3_nodes:
1331 details_node = refsect3_enum_details(refsect3_node)[0]
1332 name = details_node.attrib['id']
1333 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1334 refsect3_nodes = refsect3_struct(refsect2_node)
1335 for refsect3_node in refsect3_nodes:
1336 details_node = refsect3_struct_details(refsect3_node)[0]
1337 name = details_node.attrib['id']
1338 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1340 result.append(""" </functions>
1341 </book>
1342 """)
1343 for line in result:
1344 idx.write(line)
1347 def get_dirs(uninstalled):
1348 if uninstalled:
1349 # this does not work from buiddir!=srcdir
1350 gtkdocdir = os.path.split(sys.argv[0])[0]
1351 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1352 # try 'srcdir' (set from makefiles) too
1353 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1354 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1355 styledir = gtkdocdir + '/style'
1356 else:
1357 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1358 styledir = gtkdocdir
1359 return (gtkdocdir, styledir)
1362 def main(module, index_file, out_dir, uninstalled):
1363 tree = etree.parse(index_file)
1364 tree.xinclude()
1366 (gtkdocdir, styledir) = get_dirs(uninstalled)
1367 # copy navigation images and stylesheets to html directory ...
1368 css_file = os.path.join(styledir, 'style.css')
1369 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1370 shutil.copy(f, out_dir)
1371 css_file = os.path.join(out_dir, 'style.css')
1372 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1373 css.write(HTML_FORMATTER.get_style_defs())
1375 # TODO: migrate options from fixxref
1376 # TODO: do in parallel with loading the xml above.
1377 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1379 # We do multiple passes:
1380 # 1) recursively walk the tree and chunk it into a python tree so that we
1381 # can generate navigation and link tags.
1382 files = chunk(tree.getroot())
1383 files = list(PreOrderIter(files))
1384 # 2) extract tables:
1385 # TODO: use multiprocessing
1386 # - find all 'id' attribs and add them to the link map
1387 add_id_links(files, fixxref.Links)
1388 # - build glossary dict
1389 build_glossary(files)
1391 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1392 create_devhelp2(out_dir, module, tree.getroot(), files)
1393 # 4) iterate the tree and output files
1394 # TODO: use multiprocessing
1395 for node in files:
1396 convert(out_dir, module, files, node)
1399 def run(options):
1400 logging.info('options: %s', str(options.__dict__))
1401 module = options.args[0]
1402 document = options.args[1]
1404 # TODO: rename to 'html' later on
1405 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1406 # outputs into the working directory
1407 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1408 try:
1409 os.mkdir(out_dir)
1410 except OSError as e:
1411 if e.errno != errno.EEXIST:
1412 raise
1414 sys.exit(main(module, document, out_dir, options.uninstalled))