mkhtml2: handle languge attr for highlighing
[gtk-doc.git] / gtkdoc / mkhtml2.py
blob3f26bea122c99793635314d787441cc182b36d01
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 TODO:
33 - more chunk converters
34 - more tag converters:
35 - footnote: maybe track those in ctx and write them out at the end of the chunk
36 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
37 attr on the <img> tag of the 'imageobject'
38 - check each docbook tag if it can contain #PCDATA, if not don't check for
39 xml.text
41 OPTIONAL:
42 - minify html: https://pypi.python.org/pypi/htmlmin/
44 Requirements:
45 sudo pip3 install anytree lxml pygments
47 Example invocation:
48 cd tests/bugs/docs/
49 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
50 xdg-open db2html/index.html
51 meld html db2html
53 Benchmarking:
54 cd tests/bugs/docs/;
55 rm html-build.stamp; time make html-build.stamp
56 """
58 import argparse
59 import errno
60 import logging
61 import os
62 import shutil
63 import sys
65 from anytree import Node, PreOrderIter
66 from copy import deepcopy
67 from glob import glob
68 from lxml import etree
69 from pygments import highlight
70 from pygments.lexers import CLexer
71 from pygments.formatters import HtmlFormatter
73 from . import config, fixxref
75 # pygments setup
76 # lazily constructed lexer cache
77 LEXERS = {
78 'c': CLexer()
80 HTML_FORMATTER = HtmlFormatter(nowrap=True)
82 # http://www.sagehill.net/docbookxsl/Chunking.html
83 CHUNK_TAGS = [
84 'appendix',
85 'article',
86 'bibliography', # in article or book
87 'book',
88 'chapter',
89 'colophon',
90 'glossary', # in article or book
91 'index', # in article or book
92 'part',
93 'preface',
94 'refentry',
95 'reference',
96 'sect1', # except first
97 'section', # if equivalent to sect1
98 'set',
99 'setindex',
103 class ChunkParams(object):
104 def __init__(self, prefix, parent=None):
105 self.prefix = prefix
106 self.parent = None
107 self.count = 0
110 # TODO: look up the abbrevs and hierarchy for other tags
111 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
112 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
114 # If not defined, we can just create an example without an 'id' attr and see
115 # docbook xsl does.
116 CHUNK_PARAMS = {
117 'appendix': ChunkParams('app', 'book'),
118 'book': ChunkParams('bk'),
119 'chapter': ChunkParams('ch', 'book'),
120 'index': ChunkParams('ix', 'book'),
121 'part': ChunkParams('pt', 'book'),
122 'preface': ChunkParams('pr', 'book'),
123 'sect1': ChunkParams('s', 'chapter'),
124 'section': ChunkParams('s', 'chapter'),
127 TITLE_XPATHS = {
128 '_': (etree.XPath('./title'), None),
129 'book': (etree.XPath('./bookinfo/title'), None),
130 'refentry': (
131 etree.XPath('./refmeta/refentrytitle'),
132 etree.XPath('./refnamediv/refpurpose')
136 ID_XPATH = etree.XPath('//@id')
138 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
139 glossary = {}
142 def gen_chunk_name(node):
143 if 'id' in node.attrib:
144 return node.attrib['id']
146 tag = node.tag
147 if tag not in CHUNK_PARAMS:
148 CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
149 logging.warning('Add CHUNK_PARAMS for "%s"', tag)
151 naming = CHUNK_PARAMS[tag]
152 naming.count += 1
153 name = ('%s%02d' % (naming.prefix, naming.count))
154 # handle parents to make names of nested tags unique
155 # TODO: we only need to prepend the parent if there are > 1 of them in the
156 # xml
157 # while naming.parent:
158 # parent = naming.parent
159 # if parent not in CHUNK_PARAMS:
160 # break;
161 # naming = CHUNK_PARAMS[parent]
162 # name = ('%s%02d' % (naming.prefix, naming.count)) + name
163 return name
166 def get_chunk_titles(node):
167 tag = node.tag
168 if tag not in TITLE_XPATHS:
169 # Use defaults
170 (title, subtitle) = TITLE_XPATHS['_']
171 else:
172 (title, subtitle) = TITLE_XPATHS[tag]
174 xml = title(node)[0]
175 result = {
176 'title': xml.text
178 if xml.tag != 'title':
179 result['title_tag'] = xml.tag
180 else:
181 result['title_tag'] = tag
183 if subtitle:
184 xml = subtitle(node)[0]
185 result['subtitle'] = xml.text
186 result['subtitle_tag'] = xml.tag
187 else:
188 result['subtitle'] = None
189 result['subtitle_tag'] = None
190 return result
193 def chunk(xml_node, parent=None):
194 """Chunk the tree.
196 The first time, we're called with parent=None and in that case we return
197 the new_node as the root of the tree
199 if xml_node.tag in CHUNK_TAGS:
200 if parent:
201 # remove the xml-node from the parent
202 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
203 xml_node.getparent().remove(xml_node)
204 xml_node = sub_tree
206 title_args = get_chunk_titles(xml_node)
207 chunk_name = gen_chunk_name(xml_node)
208 parent = Node(xml_node.tag, parent=parent, xml=xml_node,
209 filename=chunk_name + '.html', **title_args)
211 for child in xml_node:
212 chunk(child, parent)
214 return parent
217 def add_id_links(files, links):
218 for node in files:
219 chunk_name = node.filename[:-5]
220 chunk_base = node.filename + '#'
221 for attr in ID_XPATH(node.xml):
222 if attr == chunk_name:
223 links[attr] = node.filename
224 else:
225 links[attr] = chunk_base + attr
228 def build_glossary(files):
229 for node in files:
230 if node.xml.tag != 'glossary':
231 continue
232 for term in GLOSSENTRY_XPATH(node.xml):
233 # TODO: there can be all kind of things in a glossary. This only supports
234 # what we commonly use
235 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
236 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
237 glossary[key] = value
238 # logging.debug('glosentry: %s:%s', key, value)
241 # conversion helpers
244 def convert_inner(ctx, xml, result):
245 for child in xml:
246 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
249 def convert_ignore(ctx, xml):
250 result = []
251 convert_inner(ctx, xml, result)
252 return result
255 def convert_skip(ctx, xml):
256 return ['']
259 missing_tags = {}
262 def convert__unknown(ctx, xml):
263 # don't recurse on subchunks
264 if xml.tag in CHUNK_TAGS:
265 return []
266 # warn only once
267 if xml.tag not in missing_tags:
268 logging.warning('Add tag converter for "%s"', xml.tag)
269 missing_tags[xml.tag] = True
270 result = ['<!-- ' + xml.tag + '-->\n']
271 convert_inner(ctx, xml, result)
272 result.append('<!-- /' + xml.tag + '-->\n')
273 return result
276 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
277 result = ['<div class="%s">\n' % xml.tag]
278 title = xml.find('title')
279 if title is not None:
280 if 'id' in xml.attrib:
281 result.append('<a name="%s"></a>' % xml.attrib['id'])
282 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
283 xml.remove(title)
284 if xml.text:
285 result.append(xml.text)
286 inner_func(ctx, xml, result)
287 result.append('</div>')
288 if xml.tail:
289 result.append(xml.tail)
290 return result
293 def xml_get_title(xml):
294 title = xml.find('title')
295 if title is not None:
296 return title.text
297 else:
298 # TODO(ensonic): any way to get the file (inlcudes) too?
299 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
300 return ''
303 # docbook tags
306 def convert_acronym(ctx, xml):
307 key = xml.text
308 title = glossary.get(key, '')
309 # TODO: print a sensible warning if missing
310 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
311 if xml.tail:
312 result.append(xml.tail)
313 return result
316 def convert_bookinfo(ctx, xml):
317 result = ['<div class="titlepage">']
318 convert_inner(ctx, xml, result)
319 result.append("""<hr>
320 </div>""")
321 if xml.tail:
322 result.append(xml.tail)
323 return result
326 def convert_blockquote(ctx, xml):
327 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
328 if xml.text:
329 result.append(xml.text)
330 convert_inner(ctx, xml, result)
331 result.append('</blockquote>\n</div>')
332 if xml.tail:
333 result.append(xml.tail)
334 return result
337 def convert_colspec(ctx, xml):
338 result = ['<col']
339 a = xml.attrib
340 if 'colname' in a:
341 result.append(' class="%s"' % a['colname'])
342 if 'colwidth' in a:
343 result.append(' width="%s"' % a['colwidth'])
344 result.append('>\n')
345 # is in tgroup and there can be no 'text'
346 return result
349 def convert_corpauthor(ctx, xml):
350 result = ['<div><h3 class="corpauthor">\n']
351 if xml.text:
352 result.append(xml.text)
353 convert_inner(ctx, xml, result)
354 result.append('</h3></div>\n')
355 if xml.tail:
356 result.append(xml.tail)
357 return result
360 def convert_div(ctx, xml):
361 result = ['<div class="%s">\n' % xml.tag]
362 if xml.text:
363 result.append(xml.text)
364 convert_inner(ctx, xml, result)
365 result.append('</div>')
366 if xml.tail:
367 result.append(xml.tail)
368 return result
371 def convert_em_class(ctx, xml):
372 result = ['<em class="%s"><code>' % xml.tag]
373 if xml.text:
374 result.append(xml.text)
375 convert_inner(ctx, xml, result)
376 result.append('</code></em>')
377 if xml.tail:
378 result.append(xml.tail)
379 return result
382 def convert_entry(ctx, xml):
383 result = ['<td']
384 if 'role' in xml.attrib:
385 result.append(' class="%s">' % xml.attrib['role'])
386 else:
387 result.append('>')
388 if xml.text:
389 result.append(xml.text)
390 convert_inner(ctx, xml, result)
391 result.append('</td>')
392 if xml.tail:
393 result.append(xml.tail)
394 return result
397 def convert_imageobject(ctx, xml):
398 imagedata = xml.find('imagedata')
399 if imagedata is not None:
400 # TODO(ensonic): warn on missing fileref attr?
401 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
402 else:
403 return []
406 def convert_indexdiv(ctx, xml):
407 title_tag = xml.find('title')
408 title = title_tag.text
409 xml.remove(title_tag)
410 result = [
411 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
413 convert_inner(ctx, xml, result)
414 return result
417 def convert_informaltable(ctx, xml):
418 result = ['<div class="informaltable"><table class="informaltable"']
419 a = xml.attrib
420 if 'pgwide' in a and a['pgwide'] == '1':
421 result.append(' width="100%"')
422 if 'frame' in a and a['frame'] == 'none':
423 result.append(' border="0"')
424 result.append('>\n')
425 convert_inner(ctx, xml, result)
426 result.append('</table></div>')
427 if xml.tail:
428 result.append(xml.tail)
429 return result
432 def convert_itemizedlist(ctx, xml):
433 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
434 convert_inner(ctx, xml, result)
435 result.append('</ul></div>')
436 if xml.tail:
437 result.append(xml.tail)
438 return result
441 def convert_link(ctx, xml):
442 linkend = xml.attrib['linkend']
443 if linkend in fixxref.NoLinks:
444 linkend = None
445 result = []
446 if linkend:
447 link_text = []
448 convert_inner(ctx, xml, link_text)
449 if xml.text:
450 link_text.append(xml.text)
451 # TODO: fixxref does some weird checks in xml.text
452 result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
453 if xml.tail:
454 result.append(xml.tail)
455 return result
458 def convert_listitem(ctx, xml):
459 result = ['<li class="listitem">']
460 convert_inner(ctx, xml, result)
461 result.append('</li>')
462 # is in itemizedlist and there can be no 'text'
463 return result
466 def convert_literal(ctx, xml):
467 result = ['<code class="%s">' % xml.tag]
468 if xml.text:
469 result.append(xml.text)
470 convert_inner(ctx, xml, result)
471 result.append('</code>')
472 if xml.tail:
473 result.append(xml.tail)
474 return result
477 def convert_orderedlist(ctx, xml):
478 result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
479 convert_inner(ctx, xml, result)
480 result.append('</ol></div>')
481 if xml.tail:
482 result.append(xml.tail)
483 return result
486 def convert_para(ctx, xml):
487 result = []
488 if 'id' in xml.attrib:
489 result.append('<a name="%s"></a>' % xml.attrib['id'])
490 result.append('<p>')
491 if xml.text:
492 result.append(xml.text)
493 convert_inner(ctx, xml, result)
494 result.append('</p>')
495 if xml.tail:
496 result.append(xml.tail)
497 return result
500 def convert_para_like(ctx, xml):
501 result = []
502 if 'id' in xml.attrib:
503 result.append('<a name="%s"></a>' % xml.attrib['id'])
504 result.append('<p class="%s">' % xml.tag)
505 if xml.text:
506 result.append(xml.text)
507 convert_inner(ctx, xml, result)
508 result.append('</p>')
509 if xml.tail:
510 result.append(xml.tail)
511 return result
514 def convert_phrase(ctx, xml):
515 result = ['<span']
516 if 'role' in xml.attrib:
517 result.append(' class="%s">' % xml.attrib['role'])
518 else:
519 result.append('>')
520 if xml.text:
521 result.append(xml.text)
522 convert_inner(ctx, xml, result)
523 result.append('</span>')
524 if xml.tail:
525 result.append(xml.tail)
526 return result
529 def convert_primaryie(ctx, xml):
530 result = ['<dt>\n']
531 convert_inner(ctx, xml, result)
532 result.append('\n</dt>\n<dd></dd>\n')
533 return result
536 def convert_pre(ctx, xml):
537 result = ['<pre class="%s">\n' % xml.tag]
538 if xml.text:
539 result.append(xml.text)
540 convert_inner(ctx, xml, result)
541 result.append('</pre>')
542 if xml.tail:
543 result.append(xml.tail)
544 return result
547 def convert_programlisting(ctx, xml):
548 result = []
549 if xml.attrib.get('role', '') == 'example':
550 if xml.text:
551 lang = xml.attrib.get('language', 'c').lower()
552 if lang not in LEXERS:
553 LEXERS[lang] = get_lexer_by_name(lang)
554 lexer = LEXERS.get(lang, None)
555 if lexer:
556 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
558 # we do own line-numbering
559 line_count = highlighted.count('\n')
560 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
561 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
562 <tbody>
563 <tr>
564 <td class="listing_lines" align="right"><pre>%s</pre></td>
565 <td class="listing_code"><pre class="programlisting">%s</pre></td>
566 </tr>
567 </tbody>
568 </table>
569 """ % (source_lines, highlighted))
570 else:
571 logging.warn('No pygments lexer for language="%s"', lang)
572 result.append('<pre class="programlisting">')
573 result.append(xml.text)
574 result.append('</pre>')
575 else:
576 result.append('<pre class="programlisting">')
577 if xml.text:
578 result.append(xml.text)
579 convert_inner(ctx, xml, result)
580 result.append('</pre>')
581 if xml.tail:
582 result.append(xml.tail)
583 return result
586 def convert_refsect1(ctx, xml):
587 # Add a divider between two consequitive refsect2
588 def convert_inner(ctx, xml, result):
589 prev = None
590 for child in xml:
591 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
592 result.append('<hr>\n')
593 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
594 prev = child
595 return convert_refsect(ctx, xml, 'h2', convert_inner)
598 def convert_refsect2(ctx, xml):
599 return convert_refsect(ctx, xml, 'h3')
602 def convert_refsect3(ctx, xml):
603 return convert_refsect(ctx, xml, 'h4')
606 def convert_row(ctx, xml):
607 result = ['<tr>\n']
608 convert_inner(ctx, xml, result)
609 result.append('</tr>\n')
610 return result
613 def convert_simpara(ctx, xml):
614 result = ['<p>']
615 if xml.text:
616 result.append(xml.text)
617 result.append('</p>')
618 if xml.tail:
619 result.append(xml.tail)
620 return result
623 def convert_span(ctx, xml):
624 result = ['<span class="%s">' % xml.tag]
625 if xml.text:
626 result.append(xml.text)
627 convert_inner(ctx, xml, result)
628 result.append('</span>')
629 if xml.tail:
630 result.append(xml.tail)
631 return result
634 def convert_tbody(ctx, xml):
635 result = ['<tbody>']
636 convert_inner(ctx, xml, result)
637 result.append('</tbody>')
638 # is in tgroup and there can be no 'text'
639 return result
642 def convert_tgroup(ctx, xml):
643 # tgroup does not expand to anything, but the nested colspecs need to
644 # be put into a colgroup
645 cols = xml.findall('colspec')
646 result = []
647 if cols:
648 result.append('<colgroup>\n')
649 for col in cols:
650 result.extend(convert_colspec(ctx, col))
651 xml.remove(col)
652 result.append('</colgroup>\n')
653 convert_inner(ctx, xml, result)
654 # is in informaltable and there can be no 'text'
655 return result
658 def convert_ulink(ctx, xml):
659 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
660 if xml.tail:
661 result.append(xml.tail)
662 return result
665 # TODO(ensonic): turn into class with converters as functions and ctx as self
666 convert_tags = {
667 'acronym': convert_acronym,
668 'bookinfo': convert_bookinfo,
669 'blockquote': convert_blockquote,
670 'caption': convert_div,
671 'colspec': convert_colspec,
672 'corpauthor': convert_corpauthor,
673 'emphasis': convert_span,
674 'entry': convert_entry,
675 'function': convert_span,
676 'imageobject': convert_imageobject,
677 'indexdiv': convert_indexdiv,
678 'indexentry': convert_ignore,
679 'indexterm': convert_skip,
680 'informalexample': convert_div,
681 'informaltable': convert_informaltable,
682 'inlinemediaobject': convert_span,
683 'itemizedlist': convert_itemizedlist,
684 'legalnotice': convert_para_like,
685 'link': convert_link,
686 'listitem': convert_listitem,
687 'literal': convert_literal,
688 'mediaobject': convert_div,
689 'note': convert_div,
690 'orderedlist': convert_orderedlist,
691 'para': convert_para,
692 'parameter': convert_em_class,
693 'phrase': convert_phrase,
694 'primaryie': convert_primaryie,
695 'programlisting': convert_programlisting,
696 'releaseinfo': convert_para_like,
697 'refsect1': convert_refsect1,
698 'refsect2': convert_refsect2,
699 'refsect3': convert_refsect3,
700 'replaceable': convert_em_class,
701 'returnvalue': convert_span,
702 'row': convert_row,
703 'screen': convert_pre,
704 'simpara': convert_simpara,
705 'structfield': convert_em_class,
706 'tbody': convert_tbody,
707 'tgroup': convert_tgroup,
708 'type': convert_span,
709 'ulink': convert_ulink,
710 'warning': convert_div,
713 # conversion helpers
715 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
716 <html>
717 <head>
718 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
719 <title>%s</title>
720 %s<link rel="stylesheet" href="style.css" type="text/css">
721 </head>
722 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
726 def generate_head_links(ctx):
727 n = ctx['nav_home']
728 result = [
729 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
731 if 'nav_up' in ctx:
732 n = ctx['nav_up']
733 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
734 if 'nav_prev' in ctx:
735 n = ctx['nav_prev']
736 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
737 if 'nav_next' in ctx:
738 n = ctx['nav_next']
739 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
740 return ''.join(result)
743 def generate_nav_links(ctx):
744 n = ctx['nav_home']
745 result = [
746 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
748 if 'nav_up' in ctx:
749 n = ctx['nav_up']
750 result.append(
751 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
752 else:
753 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
754 if 'nav_prev' in ctx:
755 n = ctx['nav_prev']
756 result.append(
757 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
758 else:
759 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
760 if 'nav_next' in ctx:
761 n = ctx['nav_next']
762 result.append(
763 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
764 else:
765 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
767 return ''.join(result)
770 def generate_toc(ctx, node):
771 result = []
772 for c in node.children:
773 # TODO: urlencode the filename: urllib.parse.quote_plus()
774 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
775 c.title_tag, c.filename, c.title))
776 if c.subtitle:
777 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
778 result.append('</dt>\n')
779 if c.children:
780 result.append('<dd><dl>')
781 result.extend(generate_toc(ctx, c))
782 result.append('</dl></dd>')
783 return result
786 def generate_basic_nav(ctx):
787 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
788 <tr valign="middle">
789 <td width="100%%" align="left" class="shortcuts"></td>
791 </tr>
792 </table>
793 """ % generate_nav_links(ctx)
796 def generate_index_nav(ctx, indexdivs):
797 ix_nav = []
798 for s in indexdivs:
799 title = xml_get_title(s)
800 ix_nav.append('<a class="shortcut" href="#idx%s">%s</a>' % (title, title))
802 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
803 <tr valign="middle">
804 <td width="100%%" align="left" class="shortcuts">
805 <span id="nav_index">
807 </span>
808 </td>
810 </tr>
811 </table>
812 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
815 def generate_refentry_nav(ctx, refsect1s, result):
816 result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
817 <tr valign="middle">
818 <td width="100%%" align="left" class="shortcuts">
819 <a href="#" class="shortcut">Top</a>""")
821 for s in refsect1s:
822 # don't list TOC sections (role="xxx_proto")
823 if s.attrib.get('role', '').endswith("_proto"):
824 continue
826 title = xml_get_title(s)
827 result.append("""
828 <span id="nav_description">
829   <span class="dim">|</span> 
830 <a href="#%s" class="shortcut">%s</a>
831 </span>""" % (s.attrib['id'], title))
832 result.append("""
833 </td>
835 </tr>
836 </table>
837 """ % generate_nav_links(ctx))
840 def get_id(node):
841 xml = node.xml
842 node_id = xml.attrib.get('id', None)
843 if node_id:
844 return node_id
846 logging.warning('%d: No "id" attribute on "%s"', xml.sourceline, xml.tag)
847 ix = []
848 # Generate the 'id'. We need to walk up the xml-tree and check the positions
849 # for each sibling.
850 parent = xml.getparent()
851 while parent is not None:
852 children = parent.getchildren()
853 ix.insert(0, str(children.index(xml) + 1))
854 xml = parent
855 parent = xml.getparent()
856 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
857 return 'id-1.' + '.'.join(ix)
860 def convert_chunk_with_toc(ctx, div_class, title_tag):
861 node = ctx['node']
862 result = [
863 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
864 generate_basic_nav(ctx),
865 '<div class="%s">' % div_class,
867 title = node.xml.find('title')
868 if title is not None:
869 result.append("""
870 <div class="titlepage">
871 <%s class="title"><a name="%s"></a>%s</%s>
872 </div>""" % (
873 title_tag, get_id(node), title.text, title_tag))
874 node.xml.remove(title)
875 convert_inner(ctx, node.xml, result)
876 result.append("""<p>
877 <b>Table of Contents</b>
878 </p>
879 <div class="toc">
880 <dl class="toc">
881 """)
882 result.extend(generate_toc(ctx, node))
883 result.append("""</dl>
884 </div>
885 </div>
886 </body>
887 </html>""")
888 return result
891 # docbook chunks
894 def convert_book(ctx):
895 node = ctx['node']
896 result = [
897 HTML_HEADER % (node.title, generate_head_links(ctx)),
898 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
899 <tr><th valign="middle"><p class="title">%s</p></th></tr>
900 </table>
901 <div class="book">
902 """ % node.title
904 bookinfo = node.xml.findall('bookinfo')[0]
905 # we already used the title
906 title = bookinfo.find('title')
907 if title is not None:
908 bookinfo.remove(title)
909 result.extend(convert_bookinfo(ctx, bookinfo))
910 result.append("""<div class="toc">
911 <dl class="toc">
912 """)
913 result.extend(generate_toc(ctx, node.root))
914 result.append("""</dl>
915 </div>
916 </div>
917 </body>
918 </html>""")
919 return result
922 def convert_chapter(ctx):
923 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
926 def convert_index(ctx):
927 node = ctx['node']
928 node_id = get_id(node)
929 # Get all indexdivs under indexdiv
930 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
932 result = [
933 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
934 generate_index_nav(ctx, indexdivs),
935 """<div class="index">
936 <div class="titlepage"><h1 class="title">
937 <a name="%s"></a>%s</h1>
938 </div>""" % (node_id, node.title)
940 for i in indexdivs:
941 result.extend(convert_indexdiv(ctx, i))
942 result.append("""</div>
943 </body>
944 </html>""")
945 return result
948 def convert_part(ctx):
949 return convert_chunk_with_toc(ctx, 'part', 'h1')
952 def convert_preface(ctx):
953 node = ctx['node']
954 result = [
955 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
956 generate_basic_nav(ctx),
957 '<div class="preface">'
959 title = node.xml.find('title')
960 if title is not None:
961 result.append("""
962 <div class="titlepage">
963 <h2 class="title"><a name="%s"></a>%s</h2>
964 </div>""" % (get_id(node), title.text))
965 node.xml.remove(title)
966 convert_inner(ctx, node.xml, result)
967 result.append("""</div>
968 </body>
969 </html>""")
970 return result
973 def convert_reference(ctx):
974 return convert_chunk_with_toc(ctx, 'reference', 'h1')
977 def convert_refentry(ctx):
978 node = ctx['node']
979 node_id = get_id(node)
980 refsect1s = node.xml.findall('refsect1')
982 result = [
983 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
985 generate_refentry_nav(ctx, refsect1s, result)
986 result.append("""
987 <div class="refentry">
988 <a name="%s"></a>
989 <div class="refnamediv">
990 <table width="100%%"><tr>
991 <td valign="top">
992 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
993 <p>%s — module for gtk-doc unit test</p>
994 </td>
995 <td class="gallery_image" valign="top" align="right"></td>
996 </tr></table>
997 </div>
998 """ % (node_id, node_id, node.title, node.title))
1000 for s in refsect1s:
1001 result.extend(convert_refsect1(ctx, s))
1002 result.append("""</div>
1003 </body>
1004 </html>""")
1005 return result
1008 # TODO(ensonic): turn into class with converters as functions and ctx as self
1009 convert_chunks = {
1010 'book': convert_book,
1011 'chapter': convert_chapter,
1012 'index': convert_index,
1013 'part': convert_part,
1014 'preface': convert_preface,
1015 'reference': convert_reference,
1016 'refentry': convert_refentry,
1020 def generate_nav_nodes(files, node):
1021 nav = {
1022 'nav_home': node.root,
1024 # nav params: up, prev, next
1025 if node.parent:
1026 nav['nav_up'] = node.parent
1027 ix = files.index(node)
1028 if ix > 0:
1029 nav['nav_prev'] = files[ix - 1]
1030 if ix < len(files) - 1:
1031 nav['nav_next'] = files[ix + 1]
1032 return nav
1035 def convert(out_dir, module, files, node):
1036 """Convert the docbook chunks to a html file.
1038 Args:
1039 out_dir: already created output dir
1040 files: list of nodes in the tree in pre-order
1041 node: current tree node
1044 logging.info('Writing: %s', node.filename)
1045 with open(os.path.join(out_dir, node.filename), 'wt') as html:
1046 ctx = {
1047 'module': module,
1048 'files': files,
1049 'node': node,
1051 ctx.update(generate_nav_nodes(files, node))
1053 if node.name in convert_chunks:
1054 for line in convert_chunks[node.name](ctx):
1055 html.write(line)
1056 else:
1057 logging.warning('Add converter/template for "%s"', node.name)
1060 def create_devhelp2_toc(node):
1061 result = []
1062 for c in node.children:
1063 if c.children:
1064 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1065 result.extend(create_devhelp2_toc(c))
1066 result.append('</sub>\n')
1067 else:
1068 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1069 return result
1072 def create_devhelp2_condition_attribs(node):
1073 if 'condition' in node.attrib:
1074 # condition -> since, deprecated, ... (separated with '|')
1075 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1076 return' ' + ' '.join(['%s="%s"' % tuple(c.split(':', 1)) for c in cond])
1077 else:
1078 return ''
1081 def create_devhelp2_refsect2_keyword(node, base_link):
1082 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1083 node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1084 create_devhelp2_condition_attribs(node))
1087 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1088 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1089 node.attrib['role'], title, base_link + name,
1090 create_devhelp2_condition_attribs(node))
1093 def create_devhelp2(out_dir, module, xml, files):
1094 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt') as idx:
1095 bookinfo_nodes = xml.xpath('/book/bookinfo')
1096 title = ''
1097 if bookinfo_nodes is not None:
1098 bookinfo = bookinfo_nodes[0]
1099 title = bookinfo.xpath('./title/text()')[0]
1100 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1101 # TODO: support author too (see devhelp2.xsl)
1102 # TODO: fixxref uses '--src-lang' to set the language
1103 result = [
1104 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1105 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1106 <chapters>
1107 """ % (title, module, online_url)
1109 # toc
1110 result.extend(create_devhelp2_toc(files[0].root))
1111 result.append(""" </chapters>
1112 <functions>
1113 """)
1114 # keywords from all refsect2 and refsect3
1115 refsect2 = etree.XPath('//refsect2[@role]')
1116 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1117 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1118 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1119 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1120 for node in files:
1121 base_link = node.filename + '#'
1122 refsect2_nodes = refsect2(node.xml)
1123 for refsect2_node in refsect2_nodes:
1124 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1125 refsect3_nodes = refsect3_enum(refsect2_node)
1126 for refsect3_node in refsect3_nodes:
1127 details_node = refsect3_enum_details(refsect3_node)[0]
1128 name = details_node.attrib['id']
1129 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1130 refsect3_nodes = refsect3_struct(refsect2_node)
1131 for refsect3_node in refsect3_nodes:
1132 details_node = refsect3_struct_details(refsect3_node)[0]
1133 name = details_node.attrib['id']
1134 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1136 result.append(""" </functions>
1137 </book>
1138 """)
1139 for line in result:
1140 idx.write(line)
1143 def get_dirs(uninstalled):
1144 if uninstalled:
1145 # this does not work from buiddir!=srcdir
1146 gtkdocdir = os.path.split(sys.argv[0])[0]
1147 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1148 # try 'srcdir' (set from makefiles) too
1149 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1150 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1151 styledir = gtkdocdir + '/style'
1152 else:
1153 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1154 styledir = gtkdocdir
1155 return (gtkdocdir, styledir)
1158 def main(module, index_file, out_dir, uninstalled):
1159 tree = etree.parse(index_file)
1160 tree.xinclude()
1162 (gtkdocdir, styledir) = get_dirs(uninstalled)
1163 # copy navigation images and stylesheets to html directory ...
1164 css_file = os.path.join(styledir, 'style.css')
1165 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1166 shutil.copy(f, out_dir)
1167 css_file = os.path.join(out_dir, 'style.css')
1168 with open(css_file, 'at') as css:
1169 css.write(HTML_FORMATTER.get_style_defs())
1171 # TODO: migrate options from fixxref
1172 # TODO: do in parallel with loading the xml above.
1173 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1175 # We do multiple passes:
1176 # 1) recursively walk the tree and chunk it into a python tree so that we
1177 # can generate navigation and link tags.
1178 files = chunk(tree.getroot())
1179 files = list(PreOrderIter(files))
1180 # 2) extract tables:
1181 # TODO: use multiprocessing
1182 # - find all 'id' attribs and add them to the link map
1183 add_id_links(files, fixxref.Links)
1184 # - build glossary dict
1185 build_glossary(files)
1187 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1188 create_devhelp2(out_dir, module, tree.getroot(), files)
1189 # 4) iterate the tree and output files
1190 # TODO: use multiprocessing
1191 for node in files:
1192 convert(out_dir, module, files, node)
1195 def run(options):
1196 logging.info('options: %s', str(options.__dict__))
1197 module = options.args[0]
1198 document = options.args[1]
1200 # TODO: rename to 'html' later on
1201 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1202 # outputs into the working directory
1203 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1204 try:
1205 os.mkdir(out_dir)
1206 except OSError as e:
1207 if e.errno != errno.EEXIST:
1208 raise
1210 sys.exit(main(module, document, out_dir, options.uninstalled))