mkhtml2: skip sections without 'id' atts for refentry nav
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob7653d9bb8fd13f19b1b7677f24c23597a67464c2
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - more chunk converters
39 - more tag converters:
40 - footnote: maybe track those in ctx and write them out at the end of the chunk
41 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
42 attr on the <img> tag of the 'imageobject'
43 - check each docbook tag if it can contain #PCDATA, if not don't check for
44 xml.text
45 - consider some perf-warnings flag
46 - see 'No "id" attribute on'
48 OPTIONAL:
49 - minify html: https://pypi.python.org/pypi/htmlmin/
51 Requirements:
52 sudo pip3 install anytree lxml pygments
54 Example invocation:
55 cd tests/bugs/docs/
56 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
57 xdg-open db2html/index.html
58 meld html db2html
60 Benchmarking:
61 cd tests/bugs/docs/;
62 rm html-build.stamp; time make html-build.stamp
63 """
65 import argparse
66 import errno
67 import logging
68 import os
69 import shutil
70 import sys
72 from anytree import Node, PreOrderIter
73 from copy import deepcopy
74 from glob import glob
75 from lxml import etree
76 from pygments import highlight
77 from pygments.lexers import CLexer
78 from pygments.formatters import HtmlFormatter
80 from . import config, fixxref
82 # pygments setup
83 # lazily constructed lexer cache
84 LEXERS = {
85 'c': CLexer()
87 HTML_FORMATTER = HtmlFormatter(nowrap=True)
89 # http://www.sagehill.net/docbookxsl/Chunking.html
90 CHUNK_TAGS = [
91 'appendix',
92 'article',
93 'bibliography', # in article or book
94 'book',
95 'chapter',
96 'colophon',
97 'glossary', # in article or book
98 'index', # in article or book
99 'part',
100 'preface',
101 'refentry',
102 'reference',
103 'sect1', # except first
104 'section', # if equivalent to sect1
105 'set',
106 'setindex',
110 class ChunkParams(object):
111 def __init__(self, prefix, parent=None):
112 self.prefix = prefix
113 self.parent = None
114 self.count = 0
117 # TODO: look up the abbrevs and hierarchy for other tags
118 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
119 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
121 # If not defined, we can just create an example without an 'id' attr and see
122 # docbook xsl does.
123 CHUNK_PARAMS = {
124 'appendix': ChunkParams('app', 'book'),
125 'book': ChunkParams('bk'),
126 'chapter': ChunkParams('ch', 'book'),
127 'index': ChunkParams('ix', 'book'),
128 'part': ChunkParams('pt', 'book'),
129 'preface': ChunkParams('pr', 'book'),
130 'sect1': ChunkParams('s', 'chapter'),
131 'section': ChunkParams('s', 'chapter'),
134 TITLE_XPATHS = {
135 '_': (etree.XPath('./title'), None),
136 'book': (etree.XPath('./bookinfo/title'), None),
137 'refentry': (
138 etree.XPath('./refmeta/refentrytitle'),
139 etree.XPath('./refnamediv/refpurpose')
143 ID_XPATH = etree.XPath('//@id')
145 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
146 glossary = {}
149 def gen_chunk_name(node):
150 if 'id' in node.attrib:
151 return node.attrib['id']
153 tag = node.tag
154 if tag not in CHUNK_PARAMS:
155 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
156 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
158 naming = CHUNK_PARAMS[tag]
159 naming.count += 1
160 name = ('%s%02d' % (naming.prefix, naming.count))
161 # handle parents to make names of nested tags unique
162 # TODO: we only need to prepend the parent if there are > 1 of them in the
163 # xml
164 # while naming.parent:
165 # parent = naming.parent
166 # if parent not in CHUNK_PARAMS:
167 # break;
168 # naming = CHUNK_PARAMS[parent]
169 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
170 return name
173 def get_chunk_titles(node):
174 tag = node.tag
175 if tag not in TITLE_XPATHS:
176 # Use defaults
177 (title, subtitle) = TITLE_XPATHS['_']
178 else:
179 (title, subtitle) = TITLE_XPATHS[tag]
181 xml = title(node)[0]
182 result = {
183 'title': xml.text
185 if xml.tag != 'title':
186 result['title_tag'] = xml.tag
187 else:
188 result['title_tag'] = tag
190 if subtitle:
191 xml = subtitle(node)[0]
192 result['subtitle'] = xml.text
193 result['subtitle_tag'] = xml.tag
194 else:
195 result['subtitle'] = None
196 result['subtitle_tag'] = None
197 return result
200 def chunk(xml_node, parent=None):
201 """Chunk the tree.
203 The first time, we're called with parent=None and in that case we return
204 the new_node as the root of the tree
206 if xml_node.tag in CHUNK_TAGS:
207 if parent:
208 # remove the xml-node from the parent
209 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
210 xml_node.getparent().remove(xml_node)
211 xml_node = sub_tree
213 title_args = get_chunk_titles(xml_node)
214 chunk_name = gen_chunk_name(xml_node)
215 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
216 filename=chunk_name + '.html', **title_args)
218 for child in xml_node:
219 chunk(child, parent)
221 return parent
224 def add_id_links(files, links):
225 for node in files:
226 chunk_name = node.filename[:-5]
227 chunk_base = node.filename + '#'
228 for attr in ID_XPATH(node.xml):
229 if attr == chunk_name:
230 links[attr] = node.filename
231 else:
232 links[attr] = chunk_base + attr
235 def build_glossary(files):
236 for node in files:
237 if node.xml.tag != 'glossary':
238 continue
239 for term in GLOSSENTRY_XPATH(node.xml):
240 # TODO: there can be all kind of things in a glossary. This only supports
241 # what we commonly use
242 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
243 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
244 glossary[key] = value
245 # logging.debug('glosentry: %s:%s', key, value)
248 # conversion helpers
251 def convert_inner(ctx, xml, result):
252 for child in xml:
253 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
256 def convert_ignore(ctx, xml):
257 result = []
258 convert_inner(ctx, xml, result)
259 return result
262 def convert_skip(ctx, xml):
263 return ['']
266 missing_tags = {}
269 def convert__unknown(ctx, xml):
270 # don't recurse on subchunks
271 if xml.tag in CHUNK_TAGS:
272 return []
273 if isinstance(xml, etree._Comment):
274 return ['<!-- ' + xml.text + '-->\n']
275 else:
276 # warn only once
277 if xml.tag not in missing_tags:
278 logging.warning('Add tag converter for "%s"', xml.tag)
279 missing_tags[xml.tag] = True
280 result = ['<!-- ' + xml.tag + '-->\n']
281 convert_inner(ctx, xml, result)
282 result.append('<!-- /' + xml.tag + '-->\n')
283 return result
286 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
287 result = ['<div class="%s">\n' % xml.tag]
288 title = xml.find('title')
289 if title is not None:
290 if 'id' in xml.attrib:
291 result.append('<a name="%s"></a>' % xml.attrib['id'])
292 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
293 xml.remove(title)
294 if xml.text:
295 result.append(xml.text)
296 inner_func(ctx, xml, result)
297 result.append('</div>')
298 if xml.tail:
299 result.append(xml.tail)
300 return result
303 def xml_get_title(xml):
304 title = xml.find('title')
305 if title is not None:
306 return title.text
307 else:
308 # TODO(ensonic): any way to get the file (inlcudes) too?
309 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
310 return ''
313 # docbook tags
316 def convert_acronym(ctx, xml):
317 key = xml.text
318 title = glossary.get(key, '')
319 # TODO: print a sensible warning if missing
320 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
321 if xml.tail:
322 result.append(xml.tail)
323 return result
326 def convert_bookinfo(ctx, xml):
327 result = ['<div class="titlepage">']
328 convert_inner(ctx, xml, result)
329 result.append("""<hr>
330 </div>""")
331 if xml.tail:
332 result.append(xml.tail)
333 return result
336 def convert_blockquote(ctx, xml):
337 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
338 if xml.text:
339 result.append(xml.text)
340 convert_inner(ctx, xml, result)
341 result.append('</blockquote>\n</div>')
342 if xml.tail:
343 result.append(xml.tail)
344 return result
347 def convert_colspec(ctx, xml):
348 result = ['<col']
349 a = xml.attrib
350 if 'colname' in a:
351 result.append(' class="%s"' % a['colname'])
352 if 'colwidth' in a:
353 result.append(' width="%s"' % a['colwidth'])
354 result.append('>\n')
355 # is in tgroup and there can be no 'text'
356 return result
359 def convert_corpauthor(ctx, xml):
360 result = ['<div><h3 class="corpauthor">\n']
361 if xml.text:
362 result.append(xml.text)
363 convert_inner(ctx, xml, result)
364 result.append('</h3></div>\n')
365 if xml.tail:
366 result.append(xml.tail)
367 return result
370 def convert_div(ctx, xml):
371 result = ['<div class="%s">\n' % xml.tag]
372 if xml.text:
373 result.append(xml.text)
374 convert_inner(ctx, xml, result)
375 result.append('</div>')
376 if xml.tail:
377 result.append(xml.tail)
378 return result
381 def convert_em_class(ctx, xml):
382 result = ['<em class="%s"><code>' % xml.tag]
383 if xml.text:
384 result.append(xml.text)
385 convert_inner(ctx, xml, result)
386 result.append('</code></em>')
387 if xml.tail:
388 result.append(xml.tail)
389 return result
392 def convert_entry(ctx, xml):
393 result = ['<td']
394 if 'role' in xml.attrib:
395 result.append(' class="%s">' % xml.attrib['role'])
396 else:
397 result.append('>')
398 if xml.text:
399 result.append(xml.text)
400 convert_inner(ctx, xml, result)
401 result.append('</td>')
402 if xml.tail:
403 result.append(xml.tail)
404 return result
407 def convert_glossdef(ctx, xml):
408 result = ['<dd class="glossdef">']
409 convert_inner(ctx, xml, result)
410 result.append('</dd>\n')
411 return result
414 def convert_glossdiv(ctx, xml):
415 title_tag = xml.find('title')
416 title = title_tag.text
417 xml.remove(title_tag)
418 result = [
419 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
421 convert_inner(ctx, xml, result)
422 return result
425 def convert_glossentry(ctx, xml):
426 result = []
427 convert_inner(ctx, xml, result)
428 return result
431 def convert_glossterm(ctx, xml):
432 glossid = ''
433 text = ''
434 anchor = xml.find('anchor')
435 if anchor is not None:
436 glossid = anchor.attrib.get('id', '')
437 text += anchor.tail or ''
438 text += xml.text or ''
439 if glossid == '':
440 glossid = 'glossterm-' + text
441 return [
442 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
443 glossid, text)
447 def convert_imageobject(ctx, xml):
448 imagedata = xml.find('imagedata')
449 if imagedata is not None:
450 # TODO(ensonic): warn on missing fileref attr?
451 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
452 else:
453 return []
456 def convert_indexdiv(ctx, xml):
457 title_tag = xml.find('title')
458 title = title_tag.text
459 xml.remove(title_tag)
460 result = [
461 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
463 convert_inner(ctx, xml, result)
464 return result
467 def convert_informaltable(ctx, xml):
468 result = ['<div class="informaltable"><table class="informaltable"']
469 a = xml.attrib
470 if 'pgwide' in a and a['pgwide'] == '1':
471 result.append(' width="100%"')
472 if 'frame' in a and a['frame'] == 'none':
473 result.append(' border="0"')
474 result.append('>\n')
475 convert_inner(ctx, xml, result)
476 result.append('</table></div>')
477 if xml.tail:
478 result.append(xml.tail)
479 return result
482 def convert_itemizedlist(ctx, xml):
483 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
484 convert_inner(ctx, xml, result)
485 result.append('</ul></div>')
486 if xml.tail:
487 result.append(xml.tail)
488 return result
491 def convert_link(ctx, xml):
492 linkend = xml.attrib['linkend']
493 if linkend in fixxref.NoLinks:
494 linkend = None
495 result = []
496 if linkend:
497 link_text = []
498 convert_inner(ctx, xml, link_text)
499 if xml.text:
500 link_text.append(xml.text)
501 # TODO: fixxref does some weird checks in xml.text
502 result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
503 if xml.tail:
504 result.append(xml.tail)
505 return result
508 def convert_listitem(ctx, xml):
509 result = ['<li class="listitem">']
510 convert_inner(ctx, xml, result)
511 result.append('</li>')
512 # is in itemizedlist and there can be no 'text'
513 return result
516 def convert_literal(ctx, xml):
517 result = ['<code class="%s">' % xml.tag]
518 if xml.text:
519 result.append(xml.text)
520 convert_inner(ctx, xml, result)
521 result.append('</code>')
522 if xml.tail:
523 result.append(xml.tail)
524 return result
527 def convert_orderedlist(ctx, xml):
528 result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
529 convert_inner(ctx, xml, result)
530 result.append('</ol></div>')
531 if xml.tail:
532 result.append(xml.tail)
533 return result
536 def convert_para(ctx, xml):
537 result = []
538 if 'id' in xml.attrib:
539 result.append('<a name="%s"></a>' % xml.attrib['id'])
540 result.append('<p>')
541 if xml.text:
542 result.append(xml.text)
543 convert_inner(ctx, xml, result)
544 result.append('</p>')
545 if xml.tail:
546 result.append(xml.tail)
547 return result
550 def convert_para_like(ctx, xml):
551 result = []
552 if 'id' in xml.attrib:
553 result.append('<a name="%s"></a>' % xml.attrib['id'])
554 result.append('<p class="%s">' % xml.tag)
555 if xml.text:
556 result.append(xml.text)
557 convert_inner(ctx, xml, result)
558 result.append('</p>')
559 if xml.tail:
560 result.append(xml.tail)
561 return result
564 def convert_phrase(ctx, xml):
565 result = ['<span']
566 if 'role' in xml.attrib:
567 result.append(' class="%s">' % xml.attrib['role'])
568 else:
569 result.append('>')
570 if xml.text:
571 result.append(xml.text)
572 convert_inner(ctx, xml, result)
573 result.append('</span>')
574 if xml.tail:
575 result.append(xml.tail)
576 return result
579 def convert_primaryie(ctx, xml):
580 result = ['<dt>\n']
581 convert_inner(ctx, xml, result)
582 result.append('\n</dt>\n<dd></dd>\n')
583 return result
586 def convert_pre(ctx, xml):
587 result = ['<pre class="%s">\n' % xml.tag]
588 if xml.text:
589 result.append(xml.text)
590 convert_inner(ctx, xml, result)
591 result.append('</pre>')
592 if xml.tail:
593 result.append(xml.tail)
594 return result
597 def convert_programlisting(ctx, xml):
598 result = []
599 if xml.attrib.get('role', '') == 'example':
600 if xml.text:
601 lang = xml.attrib.get('language', 'c').lower()
602 if lang not in LEXERS:
603 LEXERS[lang] = get_lexer_by_name(lang)
604 lexer = LEXERS.get(lang, None)
605 if lexer:
606 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
608 # we do own line-numbering
609 line_count = highlighted.count('\n')
610 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
611 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
612 <tbody>
613 <tr>
614 <td class="listing_lines" align="right"><pre>%s</pre></td>
615 <td class="listing_code"><pre class="programlisting">%s</pre></td>
616 </tr>
617 </tbody>
618 </table>
619 """ % (source_lines, highlighted))
620 else:
621 logging.warn('No pygments lexer for language="%s"', lang)
622 result.append('<pre class="programlisting">')
623 result.append(xml.text)
624 result.append('</pre>')
625 else:
626 result.append('<pre class="programlisting">')
627 if xml.text:
628 result.append(xml.text)
629 convert_inner(ctx, xml, result)
630 result.append('</pre>')
631 if xml.tail:
632 result.append(xml.tail)
633 return result
636 def convert_refsect1(ctx, xml):
637 # Add a divider between two consequitive refsect2
638 def convert_inner(ctx, xml, result):
639 prev = None
640 for child in xml:
641 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
642 result.append('<hr>\n')
643 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
644 prev = child
645 return convert_refsect(ctx, xml, 'h2', convert_inner)
648 def convert_refsect2(ctx, xml):
649 return convert_refsect(ctx, xml, 'h3')
652 def convert_refsect3(ctx, xml):
653 return convert_refsect(ctx, xml, 'h4')
656 def convert_row(ctx, xml):
657 result = ['<tr>\n']
658 convert_inner(ctx, xml, result)
659 result.append('</tr>\n')
660 return result
663 def convert_simpara(ctx, xml):
664 result = ['<p>']
665 if xml.text:
666 result.append(xml.text)
667 result.append('</p>')
668 if xml.tail:
669 result.append(xml.tail)
670 return result
673 def convert_span(ctx, xml):
674 result = ['<span class="%s">' % xml.tag]
675 if xml.text:
676 result.append(xml.text)
677 convert_inner(ctx, xml, result)
678 result.append('</span>')
679 if xml.tail:
680 result.append(xml.tail)
681 return result
684 def convert_tbody(ctx, xml):
685 result = ['<tbody>']
686 convert_inner(ctx, xml, result)
687 result.append('</tbody>')
688 # is in tgroup and there can be no 'text'
689 return result
692 def convert_tgroup(ctx, xml):
693 # tgroup does not expand to anything, but the nested colspecs need to
694 # be put into a colgroup
695 cols = xml.findall('colspec')
696 result = []
697 if cols:
698 result.append('<colgroup>\n')
699 for col in cols:
700 result.extend(convert_colspec(ctx, col))
701 xml.remove(col)
702 result.append('</colgroup>\n')
703 convert_inner(ctx, xml, result)
704 # is in informaltable and there can be no 'text'
705 return result
708 def convert_ulink(ctx, xml):
709 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
710 if xml.tail:
711 result.append(xml.tail)
712 return result
715 # TODO(ensonic): turn into class with converters as functions and ctx as self
716 convert_tags = {
717 'acronym': convert_acronym,
718 'bookinfo': convert_bookinfo,
719 'blockquote': convert_blockquote,
720 'caption': convert_div,
721 'colspec': convert_colspec,
722 'corpauthor': convert_corpauthor,
723 'emphasis': convert_span,
724 'entry': convert_entry,
725 'function': convert_span,
726 'glossdef': convert_glossdef,
727 'glossdiv': convert_glossdiv,
728 'glossentry': convert_glossentry,
729 'glossterm': convert_glossterm,
730 'imageobject': convert_imageobject,
731 'indexdiv': convert_indexdiv,
732 'indexentry': convert_ignore,
733 'indexterm': convert_skip,
734 'informalexample': convert_div,
735 'informaltable': convert_informaltable,
736 'inlinemediaobject': convert_span,
737 'itemizedlist': convert_itemizedlist,
738 'legalnotice': convert_para_like,
739 'link': convert_link,
740 'listitem': convert_listitem,
741 'literal': convert_literal,
742 'mediaobject': convert_div,
743 'note': convert_div,
744 'orderedlist': convert_orderedlist,
745 'para': convert_para,
746 'parameter': convert_em_class,
747 'phrase': convert_phrase,
748 'primaryie': convert_primaryie,
749 'programlisting': convert_programlisting,
750 'releaseinfo': convert_para_like,
751 'refsect1': convert_refsect1,
752 'refsect2': convert_refsect2,
753 'refsect3': convert_refsect3,
754 'replaceable': convert_em_class,
755 'returnvalue': convert_span,
756 'row': convert_row,
757 'screen': convert_pre,
758 'simpara': convert_simpara,
759 'structfield': convert_em_class,
760 'tbody': convert_tbody,
761 'tgroup': convert_tgroup,
762 'type': convert_span,
763 'ulink': convert_ulink,
764 'warning': convert_div,
767 # conversion helpers
769 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
770 <html>
771 <head>
772 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
773 <title>%s</title>
774 %s<link rel="stylesheet" href="style.css" type="text/css">
775 </head>
776 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
780 def generate_head_links(ctx):
781 n = ctx['nav_home']
782 result = [
783 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
785 if 'nav_up' in ctx:
786 n = ctx['nav_up']
787 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
788 if 'nav_prev' in ctx:
789 n = ctx['nav_prev']
790 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
791 if 'nav_next' in ctx:
792 n = ctx['nav_next']
793 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
794 return ''.join(result)
797 def generate_nav_links(ctx):
798 n = ctx['nav_home']
799 result = [
800 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
802 if 'nav_up' in ctx:
803 n = ctx['nav_up']
804 result.append(
805 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
806 else:
807 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
808 if 'nav_prev' in ctx:
809 n = ctx['nav_prev']
810 result.append(
811 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
812 else:
813 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
814 if 'nav_next' in ctx:
815 n = ctx['nav_next']
816 result.append(
817 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
818 else:
819 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
821 return ''.join(result)
824 def generate_toc(ctx, node):
825 result = []
826 for c in node.children:
827 # TODO: urlencode the filename: urllib.parse.quote_plus()
828 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
829 c.title_tag, c.filename, c.title))
830 if c.subtitle:
831 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
832 result.append('</dt>\n')
833 if c.children:
834 result.append('<dd><dl>')
835 result.extend(generate_toc(ctx, c))
836 result.append('</dl></dd>')
837 return result
840 def generate_basic_nav(ctx):
841 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
842 <tr valign="middle">
843 <td width="100%%" align="left" class="shortcuts"></td>
845 </tr>
846 </table>
847 """ % generate_nav_links(ctx)
850 def generate_alpha_nav(ctx, divs, prefix):
851 ix_nav = []
852 for s in divs:
853 title = xml_get_title(s)
854 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
856 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
857 <tr valign="middle">
858 <td width="100%%" align="left" class="shortcuts">
859 <span id="nav_index">
861 </span>
862 </td>
864 </tr>
865 </table>
866 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
869 def generate_refentry_nav(ctx, refsect1s, result):
870 result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
871 <tr valign="middle">
872 <td width="100%%" align="left" class="shortcuts">
873 <a href="#" class="shortcut">Top</a>""")
875 for s in refsect1s:
876 # don't list TOC sections (role="xxx_proto")
877 if s.attrib.get('role', '').endswith("_proto"):
878 continue
879 # skip section without 'id' attrs
880 if 'id' not in s.attrib:
881 continue
883 title = xml_get_title(s)
884 result.append("""
885 <span id="nav_description">
886   <span class="dim">|</span> 
887 <a href="#%s" class="shortcut">%s</a>
888 </span>""" % (s.attrib['id'], title))
889 result.append("""
890 </td>
892 </tr>
893 </table>
894 """ % generate_nav_links(ctx))
897 def get_id(node):
898 xml = node.xml
899 node_id = xml.attrib.get('id', None)
900 if node_id:
901 return node_id
903 logging.info('%d: No "id" attribute on "%s", generating one',
904 xml.sourceline, xml.tag)
905 ix = []
906 # Generate the 'id'. We need to walk up the xml-tree and check the positions
907 # for each sibling.
908 parent = xml.getparent()
909 while parent is not None:
910 children = parent.getchildren()
911 ix.insert(0, str(children.index(xml) + 1))
912 xml = parent
913 parent = xml.getparent()
914 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
915 return 'id-1.' + '.'.join(ix)
918 def convert_chunk_with_toc(ctx, div_class, title_tag):
919 node = ctx['node']
920 result = [
921 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
922 generate_basic_nav(ctx),
923 '<div class="%s">' % div_class,
925 title = node.xml.find('title')
926 if title is not None:
927 result.append("""
928 <div class="titlepage">
929 <%s class="title"><a name="%s"></a>%s</%s>
930 </div>""" % (
931 title_tag, get_id(node), title.text, title_tag))
932 node.xml.remove(title)
933 convert_inner(ctx, node.xml, result)
934 result.append("""<p>
935 <b>Table of Contents</b>
936 </p>
937 <div class="toc">
938 <dl class="toc">
939 """)
940 result.extend(generate_toc(ctx, node))
941 result.append("""</dl>
942 </div>
943 </div>
944 </body>
945 </html>""")
946 return result
949 # docbook chunks
952 def convert_book(ctx):
953 node = ctx['node']
954 result = [
955 HTML_HEADER % (node.title, generate_head_links(ctx)),
956 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
957 <tr><th valign="middle"><p class="title">%s</p></th></tr>
958 </table>
959 <div class="book">
960 """ % node.title
962 bookinfo = node.xml.findall('bookinfo')[0]
963 # we already used the title
964 title = bookinfo.find('title')
965 if title is not None:
966 bookinfo.remove(title)
967 result.extend(convert_bookinfo(ctx, bookinfo))
968 result.append("""<div class="toc">
969 <dl class="toc">
970 """)
971 result.extend(generate_toc(ctx, node.root))
972 result.append("""</dl>
973 </div>
974 </div>
975 </body>
976 </html>""")
977 return result
980 def convert_chapter(ctx):
981 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
984 def convert_glossary(ctx):
985 node = ctx['node']
986 glossdivs = node.xml.findall('glossdiv')
988 result = [
989 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
990 generate_alpha_nav(ctx, glossdivs, 'gls'),
991 """<div class="index">
992 <div class="titlepage"><h1 class="title">
993 <a name="%s"></a>%s</h1>
994 </div>""" % (get_id(node), node.title)
997 for i in glossdivs:
998 result.extend(convert_glossdiv(ctx, i))
1000 result.append("""</div>
1001 </body>
1002 </html>""")
1003 return result
1006 def convert_index(ctx):
1007 node = ctx['node']
1008 # Get all indexdivs under indexdiv
1009 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1011 result = [
1012 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1013 generate_alpha_nav(ctx, indexdivs, 'idx'),
1014 """<div class="glossary">
1015 <div class="titlepage"><h2 class="title">
1016 <a name="%s"></a>%s</h2>
1017 </div>""" % (get_id(node), node.title)
1019 for i in indexdivs:
1020 result.extend(convert_indexdiv(ctx, i))
1021 result.append("""</div>
1022 </body>
1023 </html>""")
1024 return result
1027 def convert_part(ctx):
1028 return convert_chunk_with_toc(ctx, 'part', 'h1')
1031 def convert_preface(ctx):
1032 node = ctx['node']
1033 result = [
1034 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1035 generate_basic_nav(ctx),
1036 '<div class="preface">'
1038 title = node.xml.find('title')
1039 if title is not None:
1040 result.append("""
1041 <div class="titlepage">
1042 <h2 class="title"><a name="%s"></a>%s</h2>
1043 </div>""" % (get_id(node), title.text))
1044 node.xml.remove(title)
1045 convert_inner(ctx, node.xml, result)
1046 result.append("""</div>
1047 </body>
1048 </html>""")
1049 return result
1052 def convert_reference(ctx):
1053 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1056 def convert_refentry(ctx):
1057 node = ctx['node']
1058 node_id = get_id(node)
1059 refsect1s = node.xml.findall('refsect1')
1061 result = [
1062 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1064 generate_refentry_nav(ctx, refsect1s, result)
1065 result.append("""
1066 <div class="refentry">
1067 <a name="%s"></a>
1068 <div class="refnamediv">
1069 <table width="100%%"><tr>
1070 <td valign="top">
1071 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1072 <p>%s — module for gtk-doc unit test</p>
1073 </td>
1074 <td class="gallery_image" valign="top" align="right"></td>
1075 </tr></table>
1076 </div>
1077 """ % (node_id, node_id, node.title, node.title))
1079 for s in refsect1s:
1080 result.extend(convert_refsect1(ctx, s))
1081 result.append("""</div>
1082 </body>
1083 </html>""")
1084 return result
1087 # TODO(ensonic): turn into class with converters as functions and ctx as self
1088 convert_chunks = {
1089 'book': convert_book,
1090 'chapter': convert_chapter,
1091 'glossary': convert_glossary,
1092 'index': convert_index,
1093 'part': convert_part,
1094 'preface': convert_preface,
1095 'reference': convert_reference,
1096 'refentry': convert_refentry,
1100 def generate_nav_nodes(files, node):
1101 nav = {
1102 'nav_home': node.root,
1104 # nav params: up, prev, next
1105 if node.parent:
1106 nav['nav_up'] = node.parent
1107 ix = files.index(node)
1108 if ix > 0:
1109 nav['nav_prev'] = files[ix - 1]
1110 if ix < len(files) - 1:
1111 nav['nav_next'] = files[ix + 1]
1112 return nav
1115 def convert(out_dir, module, files, node):
1116 """Convert the docbook chunks to a html file.
1118 Args:
1119 out_dir: already created output dir
1120 files: list of nodes in the tree in pre-order
1121 node: current tree node
1124 logging.info('Writing: %s', node.filename)
1125 with open(os.path.join(out_dir, node.filename), 'wt',
1126 newline='\n', encoding='utf-8') as html:
1127 ctx = {
1128 'module': module,
1129 'files': files,
1130 'node': node,
1132 ctx.update(generate_nav_nodes(files, node))
1134 if node.name in convert_chunks:
1135 for line in convert_chunks[node.name](ctx):
1136 html.write(line)
1137 else:
1138 logging.warning('Add converter/template for "%s"', node.name)
1141 def create_devhelp2_toc(node):
1142 result = []
1143 for c in node.children:
1144 if c.children:
1145 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1146 result.extend(create_devhelp2_toc(c))
1147 result.append('</sub>\n')
1148 else:
1149 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1150 return result
1153 def create_devhelp2_condition_attribs(node):
1154 if 'condition' in node.attrib:
1155 # condition -> since, deprecated, ... (separated with '|')
1156 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1157 keywords = []
1158 for c in cond:
1159 if ':' in c:
1160 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1161 else:
1162 # deprecated can have no description
1163 keywords.append('{}="{}"'.format(c, ''))
1164 return ' ' + ' '.join(keywords)
1165 else:
1166 return ''
1169 def create_devhelp2_refsect2_keyword(node, base_link):
1170 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1171 node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1172 create_devhelp2_condition_attribs(node))
1175 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1176 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1177 node.attrib['role'], title, base_link + name,
1178 create_devhelp2_condition_attribs(node))
1181 def create_devhelp2(out_dir, module, xml, files):
1182 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1183 newline='\n', encoding='utf-8') as idx:
1184 bookinfo_nodes = xml.xpath('/book/bookinfo')
1185 title = ''
1186 if bookinfo_nodes is not None:
1187 bookinfo = bookinfo_nodes[0]
1188 title = bookinfo.xpath('./title/text()')[0]
1189 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1190 # TODO: support author too (see devhelp2.xsl)
1191 # TODO: fixxref uses '--src-lang' to set the language
1192 result = [
1193 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1194 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1195 <chapters>
1196 """ % (title, module, online_url)
1198 # toc
1199 result.extend(create_devhelp2_toc(files[0].root))
1200 result.append(""" </chapters>
1201 <functions>
1202 """)
1203 # keywords from all refsect2 and refsect3
1204 refsect2 = etree.XPath('//refsect2[@role]')
1205 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1206 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1207 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1208 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1209 for node in files:
1210 base_link = node.filename + '#'
1211 refsect2_nodes = refsect2(node.xml)
1212 for refsect2_node in refsect2_nodes:
1213 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1214 refsect3_nodes = refsect3_enum(refsect2_node)
1215 for refsect3_node in refsect3_nodes:
1216 details_node = refsect3_enum_details(refsect3_node)[0]
1217 name = details_node.attrib['id']
1218 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1219 refsect3_nodes = refsect3_struct(refsect2_node)
1220 for refsect3_node in refsect3_nodes:
1221 details_node = refsect3_struct_details(refsect3_node)[0]
1222 name = details_node.attrib['id']
1223 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1225 result.append(""" </functions>
1226 </book>
1227 """)
1228 for line in result:
1229 idx.write(line)
1232 def get_dirs(uninstalled):
1233 if uninstalled:
1234 # this does not work from buiddir!=srcdir
1235 gtkdocdir = os.path.split(sys.argv[0])[0]
1236 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1237 # try 'srcdir' (set from makefiles) too
1238 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1239 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1240 styledir = gtkdocdir + '/style'
1241 else:
1242 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1243 styledir = gtkdocdir
1244 return (gtkdocdir, styledir)
1247 def main(module, index_file, out_dir, uninstalled):
1248 tree = etree.parse(index_file)
1249 tree.xinclude()
1251 (gtkdocdir, styledir) = get_dirs(uninstalled)
1252 # copy navigation images and stylesheets to html directory ...
1253 css_file = os.path.join(styledir, 'style.css')
1254 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1255 shutil.copy(f, out_dir)
1256 css_file = os.path.join(out_dir, 'style.css')
1257 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1258 css.write(HTML_FORMATTER.get_style_defs())
1260 # TODO: migrate options from fixxref
1261 # TODO: do in parallel with loading the xml above.
1262 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1264 # We do multiple passes:
1265 # 1) recursively walk the tree and chunk it into a python tree so that we
1266 # can generate navigation and link tags.
1267 files = chunk(tree.getroot())
1268 files = list(PreOrderIter(files))
1269 # 2) extract tables:
1270 # TODO: use multiprocessing
1271 # - find all 'id' attribs and add them to the link map
1272 add_id_links(files, fixxref.Links)
1273 # - build glossary dict
1274 build_glossary(files)
1276 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1277 create_devhelp2(out_dir, module, tree.getroot(), files)
1278 # 4) iterate the tree and output files
1279 # TODO: use multiprocessing
1280 for node in files:
1281 convert(out_dir, module, files, node)
1284 def run(options):
1285 logging.info('options: %s', str(options.__dict__))
1286 module = options.args[0]
1287 document = options.args[1]
1289 # TODO: rename to 'html' later on
1290 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1291 # outputs into the working directory
1292 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1293 try:
1294 os.mkdir(out_dir)
1295 except OSError as e:
1296 if e.errno != errno.EEXIST:
1297 raise
1299 sys.exit(main(module, document, out_dir, options.uninstalled))