2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - glossary/index: depending on the parents, the headings as h1/h2
43 - maybe track depth when chunking
44 - handle 'label' attributes on part/chapter/section-types
45 - the titles will have a generated prefix, such as 'Part I:'
46 - in the toc it would only be only the label: 'I.'
47 - we need to separate the toc data from the chunking tree
48 - since we don't chunk first 'secttion'/'sect1' those are missing from the
50 - the toc also lists 'sect2' (TODO: check how deep it goes)
51 - replace get_title with a result.extend(convert_title(ctx, title_tag))
53 - check each docbook tag if it can contain #PCDATA, if not don't check for
55 - consider some perf-warnings flag
56 - see 'No "id" attribute on'
59 - minify html: https://pypi.python.org/pypi/htmlmin/
62 sudo pip3 install anytree lxml pygments
66 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
67 xdg-open db2html/index.html
72 rm html-build.stamp; time make html-build.stamp
82 from anytree
import Node
, PreOrderIter
83 from copy
import deepcopy
85 from lxml
import etree
86 from pygments
import highlight
87 from pygments
.lexers
import CLexer
88 from pygments
.formatters
import HtmlFormatter
90 from . import config
, fixxref
93 # lazily constructed lexer cache
97 HTML_FORMATTER
= HtmlFormatter(nowrap
=True)
100 class ChunkParams(object):
101 def __init__(self
, prefix
, parent
=None, min_idx
=0):
104 self
.min_idx
= min_idx
107 # TODO: look up the abbrevs and hierarchy for other tags
108 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
109 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
111 # If not defined, we can just create an example without an 'id' attr and see
114 'appendix': ChunkParams('app', 'book'),
115 'book': ChunkParams('bk'),
116 'chapter': ChunkParams('ch', 'book'),
117 'glossary': ChunkParams('go', 'book'),
118 'index': ChunkParams('ix', 'book'),
119 'part': ChunkParams('pt', 'book'),
120 'preface': ChunkParams('pr', 'book'),
121 'refentry': ChunkParams('re', 'book'),
122 'reference': ChunkParams('rn', 'book'),
123 'sect1': ChunkParams('s', 'chapter', 1),
124 'section': ChunkParams('s', 'chapter', 1),
126 # TAGS we don't support:
127 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
130 '_': (etree
.XPath('./title'), None),
131 'book': (etree
.XPath('./bookinfo/title'), None),
133 etree
.XPath('./refmeta/refentrytitle'),
134 etree
.XPath('./refnamediv/refpurpose')
138 ID_XPATH
= etree
.XPath('//@id')
140 GLOSSENTRY_XPATH
= etree
.XPath('//glossentry')
146 def gen_chunk_name(node
, chunk_params
, idx
):
147 """Generate a chunk file name
149 This is either based on the id or on the position in the doc. In the latter
150 case it uses a prefix from CHUNK_PARAMS.
152 if 'id' in node
.attrib
:
153 return node
.attrib
['id']
155 name
= ('%s%02d' % (chunk_params
.prefix
, idx
))
156 # handle parents to make names of nested tags unique
157 # TODO: we only need to prepend the parent if there are > 1 of them in the
158 # xml. None, the parents we have are not sufficient, e.g. 'index' can
159 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
160 # when we chunk explicitly and on each level maintain the 'idx'
161 # while naming.parent:
162 # parent = naming.parent
163 # if parent not in CHUNK_PARAMS:
165 # chunk_params = CHUNK_PARAMS[parent]
166 # name = ('%s%02d' % (naming.prefix, idx)) + name
167 logging
.info('Gen chunk name: "%s"', name
)
171 def get_chunk_titles(node
):
173 if tag
not in TITLE_XPATHS
:
175 (title
, subtitle
) = TITLE_XPATHS
['_']
177 (title
, subtitle
) = TITLE_XPATHS
[tag
]
188 result
['title'] = xml
.text
189 if xml
.tag
!= 'title':
190 result
['title_tag'] = xml
.tag
192 result
['title_tag'] = tag
198 result
['subtitle'] = xml
.text
199 result
['subtitle_tag'] = xml
.tag
203 def chunk(xml_node
, idx
=0, parent
=None):
206 The first time, we're called with parent=None and in that case we return
207 the new_node as the root of the tree
210 chunk_params
= CHUNK_PARAMS
.get(tag
)
211 # TODO: if this is None, we should stop traversing, right?
213 # also check idx to handle 'sect1'/'section' special casing
214 if chunk_params
and idx
>= chunk_params
.min_idx
:
215 logging
.info('chunk tag: "%s"[%d]', tag
, idx
)
217 # remove the xml-node from the parent
218 sub_tree
= etree
.ElementTree(deepcopy(xml_node
)).getroot()
219 xml_node
.getparent().remove(xml_node
)
222 title_args
= get_chunk_titles(xml_node
)
223 chunk_name
= gen_chunk_name(xml_node
, chunk_params
, (idx
+ 1))
224 parent
= Node(tag
, parent
=parent
, xml
=xml_node
,
225 filename
=chunk_name
+ '.html', **title_args
)
228 for child
in xml_node
:
229 chunk(child
, idx
, parent
)
230 if child
.tag
in CHUNK_PARAMS
:
236 def add_id_links(files
, links
):
238 chunk_name
= node
.filename
[:-5]
239 chunk_base
= node
.filename
+ '#'
240 for attr
in ID_XPATH(node
.xml
):
241 if attr
== chunk_name
:
242 links
[attr
] = node
.filename
244 links
[attr
] = chunk_base
+ attr
247 def build_glossary(files
):
249 if node
.xml
.tag
!= 'glossary':
251 for term
in GLOSSENTRY_XPATH(node
.xml
):
252 # TODO: there can be all kind of things in a glossary. This only supports
253 # what we commonly use
254 key
= etree
.tostring(term
.find('glossterm'), method
="text", encoding
=str).strip()
255 value
= etree
.tostring(term
.find('glossdef'), method
="text", encoding
=str).strip()
256 glossary
[key
] = value
257 # logging.debug('glosentry: %s:%s', key, value)
263 def convert_inner(ctx
, xml
, result
):
265 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(ctx
, child
))
268 def convert_ignore(ctx
, xml
):
270 convert_inner(ctx
, xml
, result
)
274 def convert_skip(ctx
, xml
):
278 def append_text(text
, result
):
279 if text
and text
.strip():
280 result
.append(text
.replace('<', '<').replace('>', '>'))
286 def convert__unknown(ctx
, xml
):
287 # don't recurse on subchunks
288 if xml
.tag
in CHUNK_PARAMS
:
290 if isinstance(xml
, etree
._Comment
):
291 return ['<!-- ' + xml
.text
+ '-->\n']
294 if xml
.tag
not in missing_tags
:
295 logging
.warning('Add tag converter for "%s"', xml
.tag
)
296 missing_tags
[xml
.tag
] = True
297 result
= ['<!-- ' + xml
.tag
+ '-->\n']
298 convert_inner(ctx
, xml
, result
)
299 result
.append('<!-- /' + xml
.tag
+ '-->\n')
303 def convert_sect(ctx
, xml
, h_tag
, inner_func
=convert_inner
):
304 result
= ['<div class="%s">\n' % xml
.tag
]
305 title
= xml
.find('title')
306 if title
is not None:
307 if 'id' in xml
.attrib
:
308 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
309 result
.append('<%s>%s</%s>' % (h_tag
, title
.text
, h_tag
))
311 append_text(xml
.text
, result
)
312 inner_func(ctx
, xml
, result
)
313 result
.append('</div>')
314 append_text(xml
.tail
, result
)
318 def xml_get_title(xml
):
319 title
= xml
.find('title')
320 if title
is not None:
323 # TODO(ensonic): any way to get the file (inlcudes) too?
324 logging
.warning('%s: Expected title tag under "%s %s"', xml
.sourceline
, xml
.tag
, str(xml
.attrib
))
331 def convert_abstract(ctx
, xml
):
332 result
= ["""<div class="abstract">
333 <p class="title"><b>Abstract</b></p>"""]
334 append_text(xml
.text
, result
)
335 convert_inner(ctx
, xml
, result
)
336 result
.append('</div>')
337 append_text(xml
.tail
, result
)
341 def convert_acronym(ctx
, xml
):
343 title
= glossary
.get(key
, '')
344 # TODO: print a sensible warning if missing
345 result
= ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title
, key
)]
347 result
.append(xml
.tail
)
351 def convert_anchor(ctx
, xml
):
352 return ['<a name="%s"></a>' % xml
.attrib
['id']]
355 def convert_bookinfo(ctx
, xml
):
356 result
= ['<div class="titlepage">']
357 convert_inner(ctx
, xml
, result
)
358 result
.append("""<hr>
361 result
.append(xml
.tail
)
365 def convert_blockquote(ctx
, xml
):
366 result
= ['<div class="blockquote">\n<blockquote class="blockquote">']
367 append_text(xml
.text
, result
)
368 convert_inner(ctx
, xml
, result
)
369 result
.append('</blockquote>\n</div>')
370 append_text(xml
.tail
, result
)
374 def convert_code(ctx
, xml
):
375 result
= ['<code class="%s">' % xml
.tag
]
376 append_text(xml
.text
, result
)
377 convert_inner(ctx
, xml
, result
)
378 result
.append('</code>')
379 append_text(xml
.tail
, result
)
383 def convert_colspec(ctx
, xml
):
387 result
.append(' class="%s"' % a
['colname'])
389 result
.append(' width="%s"' % a
['colwidth'])
391 # is in tgroup and there can be no 'text'
395 def convert_command(ctx
, xml
):
396 result
= ['<strong class="userinput"><code>']
397 append_text(xml
.text
, result
)
398 convert_inner(ctx
, xml
, result
)
399 result
.append('</code></strong>')
400 append_text(xml
.tail
, result
)
404 def convert_corpauthor(ctx
, xml
):
405 result
= ['<div><h3 class="corpauthor">\n']
406 append_text(xml
.text
, result
)
407 convert_inner(ctx
, xml
, result
)
408 result
.append('</h3></div>\n')
409 append_text(xml
.tail
, result
)
413 def convert_div(ctx
, xml
):
414 result
= ['<div class="%s">\n' % xml
.tag
]
415 append_text(xml
.text
, result
)
416 convert_inner(ctx
, xml
, result
)
417 result
.append('</div>')
418 append_text(xml
.tail
, result
)
422 def convert_em_class(ctx
, xml
):
423 result
= ['<em class="%s"><code>' % xml
.tag
]
424 append_text(xml
.text
, result
)
425 convert_inner(ctx
, xml
, result
)
426 result
.append('</code></em>')
427 append_text(xml
.tail
, result
)
431 def convert_entry(ctx
, xml
):
432 entry_type
= ctx
['table.entry']
433 result
= ['<' + entry_type
]
434 if 'role' in xml
.attrib
:
435 result
.append(' class="%s"' % xml
.attrib
['role'])
436 if 'morerows' in xml
.attrib
:
437 result
.append(' rowspan="%s"' % (1 + int(xml
.attrib
['morerows'])))
439 append_text(xml
.text
, result
)
440 convert_inner(ctx
, xml
, result
)
441 result
.append('</' + entry_type
+ '>')
442 append_text(xml
.tail
, result
)
446 def convert_footnote(ctx
, xml
):
447 footnotes
= ctx
.get('footnotes', [])
448 # footnotes idx is not per page, but per doc
453 # need a pair of ids for each footnote (docbook generates different ids)
454 this_id
= 'footnote-%d' % idx
455 that_id
= 'ftn.' + this_id
457 inner
= ['<div id="%s" class="footnote">' % that_id
]
458 inner
.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
460 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
461 # get double nested paras :/.
462 # convert_inner(ctx, xml, inner)
463 para
= xml
.find('para')
465 para
= xml
.find('simpara')
467 inner
.append(para
.text
)
469 logging
.warning('%s: Unhandled footnote content: %s', xml
.sourceline
,
470 etree
.tostring(xml
, method
="text", encoding
=str).strip())
471 inner
.append('</p></div>')
472 footnotes
.append(inner
)
473 ctx
['footnotes'] = footnotes
474 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
475 that_id
, this_id
, idx
)]
478 def convert_formalpara(ctx
, xml
):
480 title_tag
= xml
.find('title')
481 result
= ['<p><b>%s</b>' % title_tag
.text
]
482 para_tag
= xml
.find('para')
483 append_text(para_tag
.text
, result
)
484 convert_inner(ctx
, para_tag
, result
)
485 append_text(para_tag
.tail
, result
)
486 result
.append('</p>')
487 append_text(xml
.tail
, result
)
491 def convert_glossdef(ctx
, xml
):
492 result
= ['<dd class="glossdef">']
493 convert_inner(ctx
, xml
, result
)
494 result
.append('</dd>\n')
498 def convert_glossdiv(ctx
, xml
):
499 title_tag
= xml
.find('title')
500 title
= title_tag
.text
501 xml
.remove(title_tag
)
503 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title
, title
)
505 convert_inner(ctx
, xml
, result
)
509 def convert_glossentry(ctx
, xml
):
511 convert_inner(ctx
, xml
, result
)
515 def convert_glossterm(ctx
, xml
):
518 anchor
= xml
.find('anchor')
519 if anchor
is not None:
520 glossid
= anchor
.attrib
.get('id', '')
521 text
+= anchor
.tail
or ''
522 text
+= xml
.text
or ''
524 glossid
= 'glossterm-' + text
526 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
531 def convert_imageobject(ctx
, xml
):
532 imagedata
= xml
.find('imagedata')
533 if imagedata
is not None:
534 # TODO(ensonic): warn on missing fileref attr?
535 return ['<img src="%s">' % imagedata
.attrib
.get('fileref', '')]
540 def convert_indexdiv(ctx
, xml
):
541 title_tag
= xml
.find('title')
542 title
= title_tag
.text
543 xml
.remove(title_tag
)
545 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title
, title
)
547 convert_inner(ctx
, xml
, result
)
551 def convert_informaltable(ctx
, xml
):
552 result
= ['<div class="informaltable"><table class="informaltable"']
554 if 'pgwide' in a
and a
['pgwide'] == '1':
555 result
.append(' width="100%"')
556 if 'frame' in a
and a
['frame'] == 'none':
557 result
.append(' border="0"')
559 convert_inner(ctx
, xml
, result
)
560 result
.append('</table></div>')
562 result
.append(xml
.tail
)
566 def convert_itemizedlist(ctx
, xml
):
567 result
= ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
568 convert_inner(ctx
, xml
, result
)
569 result
.append('</ul></div>')
571 result
.append(xml
.tail
)
575 def convert_link(ctx
, xml
):
576 linkend
= xml
.attrib
['linkend']
577 if linkend
in fixxref
.NoLinks
:
582 convert_inner(ctx
, xml
, link_text
)
583 append_text(xml
.text
, link_text
)
584 # TODO: fixxref does some weird checks in xml.text
585 result
= [fixxref
.MakeXRef(ctx
['module'], '', 0, linkend
, ''.join(link_text
))]
586 append_text(xml
.tail
, result
)
590 def convert_listitem(ctx
, xml
):
591 result
= ['<li class="listitem">']
592 convert_inner(ctx
, xml
, result
)
593 result
.append('</li>')
594 # is in itemizedlist and there can be no 'text'
598 def convert_literallayout(ctx
, xml
):
599 result
= ['<div class="literallayout"><p><br>\n']
600 append_text(xml
.text
, result
)
601 convert_inner(ctx
, xml
, result
)
602 result
.append('</p></div>')
603 append_text(xml
.tail
, result
)
607 def convert_orderedlist(ctx
, xml
):
608 result
= ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
609 convert_inner(ctx
, xml
, result
)
610 result
.append('</ol></div>')
611 append_text(xml
.tail
, result
)
615 def convert_para(ctx
, xml
):
617 if 'id' in xml
.attrib
:
618 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
620 append_text(xml
.text
, result
)
621 convert_inner(ctx
, xml
, result
)
622 result
.append('</p>')
623 append_text(xml
.tail
, result
)
627 def convert_para_like(ctx
, xml
):
629 if 'id' in xml
.attrib
:
630 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
631 result
.append('<p class="%s">' % xml
.tag
)
632 append_text(xml
.text
, result
)
633 convert_inner(ctx
, xml
, result
)
634 result
.append('</p>')
635 append_text(xml
.tail
, result
)
639 def convert_phrase(ctx
, xml
):
641 if 'role' in xml
.attrib
:
642 result
.append(' class="%s">' % xml
.attrib
['role'])
645 append_text(xml
.text
, result
)
646 convert_inner(ctx
, xml
, result
)
647 result
.append('</span>')
648 append_text(xml
.tail
, result
)
652 def convert_primaryie(ctx
, xml
):
654 convert_inner(ctx
, xml
, result
)
655 result
.append('\n</dt>\n<dd></dd>\n')
659 def convert_pre(ctx
, xml
):
660 result
= ['<pre class="%s">\n' % xml
.tag
]
661 append_text(xml
.text
, result
)
662 convert_inner(ctx
, xml
, result
)
663 result
.append('</pre>')
664 append_text(xml
.tail
, result
)
668 def convert_programlisting(ctx
, xml
):
670 if xml
.attrib
.get('role', '') == 'example':
672 lang
= xml
.attrib
.get('language', 'c').lower()
673 if lang
not in LEXERS
:
674 LEXERS
[lang
] = get_lexer_by_name(lang
)
675 lexer
= LEXERS
.get(lang
, None)
677 highlighted
= highlight(xml
.text
, lexer
, HTML_FORMATTER
)
679 # we do own line-numbering
680 line_count
= highlighted
.count('\n')
681 source_lines
= '\n'.join([str(i
) for i
in range(1, line_count
+ 1)])
682 result
.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
685 <td class="listing_lines" align="right"><pre>%s</pre></td>
686 <td class="listing_code"><pre class="programlisting">%s</pre></td>
690 """ % (source_lines
, highlighted
))
692 logging
.warn('No pygments lexer for language="%s"', lang
)
693 result
.append('<pre class="programlisting">')
694 result
.append(xml
.text
)
695 result
.append('</pre>')
697 result
.append('<pre class="programlisting">')
698 append_text(xml
.text
, result
)
699 convert_inner(ctx
, xml
, result
)
700 result
.append('</pre>')
701 append_text(xml
.tail
, result
)
705 def convert_quote(ctx
, xml
):
706 result
= ['<span class="quote">"<span class="quote">']
707 append_text(xml
.text
, result
)
708 convert_inner(ctx
, xml
, result
)
709 result
.append('</span>"</span>')
710 append_text(xml
.tail
, result
)
714 def convert_refsect1(ctx
, xml
):
715 # Add a divider between two consequitive refsect2
716 def convert_inner(ctx
, xml
, result
):
719 if child
.tag
== 'refsect2' and prev
is not None and prev
.tag
== child
.tag
:
720 result
.append('<hr>\n')
721 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(ctx
, child
))
723 return convert_sect(ctx
, xml
, 'h2', convert_inner
)
726 def convert_refsect2(ctx
, xml
):
727 return convert_sect(ctx
, xml
, 'h3')
730 def convert_refsect3(ctx
, xml
):
731 return convert_sect(ctx
, xml
, 'h4')
734 def convert_row(ctx
, xml
):
736 convert_inner(ctx
, xml
, result
)
737 result
.append('</tr>\n')
741 def convert_sect1_tag(ctx
, xml
):
742 return convert_sect(ctx
, xml
, 'h2')
745 def convert_sect2(ctx
, xml
):
746 return convert_sect(ctx
, xml
, 'h3')
749 def convert_sect3(ctx
, xml
):
750 return convert_sect(ctx
, xml
, 'h4')
753 def convert_simpara(ctx
, xml
):
755 append_text(xml
.text
, result
)
756 result
.append('</p>')
757 append_text(xml
.tail
, result
)
761 def convert_span(ctx
, xml
):
762 result
= ['<span class="%s">' % xml
.tag
]
763 append_text(xml
.text
, result
)
764 convert_inner(ctx
, xml
, result
)
765 result
.append('</span>')
766 append_text(xml
.tail
, result
)
770 def convert_table(ctx
, xml
):
771 result
= ['<div class="table">']
772 if 'id' in xml
.attrib
:
773 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
774 title_tag
= xml
.find('title')
775 if title_tag
is not None:
776 result
.append('<p class="title"><b>')
777 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
778 result
.extend(convert_title(ctx
, title_tag
))
779 result
.append('</b></p>')
780 xml
.remove(title_tag
)
781 result
.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
783 convert_inner(ctx
, xml
, result
)
785 result
.append('</table></div></div>')
786 append_text(xml
.tail
, result
)
790 def convert_tbody(ctx
, xml
):
792 ctx
['table.entry'] = 'td'
793 convert_inner(ctx
, xml
, result
)
794 result
.append('</tbody>')
795 # is in tgroup and there can be no 'text'
799 def convert_tgroup(ctx
, xml
):
800 # tgroup does not expand to anything, but the nested colspecs need to
801 # be put into a colgroup
802 cols
= xml
.findall('colspec')
805 result
.append('<colgroup>\n')
807 result
.extend(convert_colspec(ctx
, col
))
809 result
.append('</colgroup>\n')
810 convert_inner(ctx
, xml
, result
)
811 # is in informaltable and there can be no 'text'
815 def convert_thead(ctx
, xml
):
817 ctx
['table.entry'] = 'th'
818 convert_inner(ctx
, xml
, result
)
819 result
.append('</thead>')
820 # is in tgroup and there can be no 'text'
824 def convert_title(ctx
, xml
):
825 # This is always called from some context
827 append_text(xml
.text
, result
)
828 convert_inner(ctx
, xml
, result
)
829 append_text(xml
.tail
, result
)
833 def convert_ulink(ctx
, xml
):
834 result
= ['<a class="%s" href="%s">%s</a>' % (xml
.tag
, xml
.attrib
['url'], xml
.text
)]
836 result
.append(xml
.tail
)
840 def convert_userinput(ctx
, xml
):
841 result
= ['<span class="command"><strong>']
842 append_text(xml
.text
, result
)
843 convert_inner(ctx
, xml
, result
)
844 result
.append('</strong></span>')
845 append_text(xml
.tail
, result
)
849 def convert_variablelist(ctx
, xml
):
850 result
= ["""<div class="variablelist"><table border="0" class="variablelist">
852 <col align="left" valign="top">
856 convert_inner(ctx
, xml
, result
)
857 result
.append("""</tbody>
862 def convert_varlistentry(ctx
, xml
):
865 result
.append('<td><p>')
866 term
= xml
.find('term')
867 result
.extend(convert_span(ctx
, term
))
868 result
.append('</p></td>')
870 result
.append('<td>')
871 listitem
= xml
.find('listitem')
872 convert_inner(ctx
, listitem
, result
)
873 result
.append('</td>')
875 result
.append('<tr>')
879 # TODO(ensonic): turn into class with converters as functions and ctx as self
881 'abstract': convert_abstract
,
882 'acronym': convert_acronym
,
883 'anchor': convert_anchor
,
884 'application': convert_span
,
885 'bookinfo': convert_bookinfo
,
886 'blockquote': convert_blockquote
,
887 'caption': convert_div
,
888 'code': convert_code
,
889 'colspec': convert_colspec
,
890 'constant': convert_code
,
891 'command': convert_command
,
892 'corpauthor': convert_corpauthor
,
893 'emphasis': convert_span
,
894 'entry': convert_entry
,
895 'envar': convert_code
,
896 'footnote': convert_footnote
,
897 'filename': convert_code
,
898 'formalpara': convert_formalpara
,
899 'function': convert_code
,
900 'glossdef': convert_glossdef
,
901 'glossdiv': convert_glossdiv
,
902 'glossentry': convert_glossentry
,
903 'glossterm': convert_glossterm
,
904 'imageobject': convert_imageobject
,
905 'indexdiv': convert_indexdiv
,
906 'indexentry': convert_ignore
,
907 'indexterm': convert_skip
,
908 'informalexample': convert_div
,
909 'informaltable': convert_informaltable
,
910 'inlinemediaobject': convert_span
,
911 'itemizedlist': convert_itemizedlist
,
912 'legalnotice': convert_div
,
913 'link': convert_link
,
914 'listitem': convert_listitem
,
915 'literal': convert_code
,
916 'literallayout': convert_literallayout
,
917 'mediaobject': convert_div
,
919 'option': convert_code
,
920 'orderedlist': convert_orderedlist
,
921 'para': convert_para
,
922 'partintro': convert_div
,
923 'parameter': convert_em_class
,
924 'phrase': convert_phrase
,
925 'primaryie': convert_primaryie
,
926 'programlisting': convert_programlisting
,
927 'quote': convert_quote
,
928 'releaseinfo': convert_para_like
,
929 'refsect1': convert_refsect1
,
930 'refsect2': convert_refsect2
,
931 'refsect3': convert_refsect3
,
932 'replaceable': convert_em_class
,
933 'returnvalue': convert_span
,
935 'screen': convert_pre
,
936 'sect1': convert_sect1_tag
,
937 'sect2': convert_sect2
,
938 'sect3': convert_sect3
,
939 'simpara': convert_simpara
,
940 'structfield': convert_em_class
,
941 'structname': convert_span
,
942 'synopsis': convert_pre
,
943 'symbol': convert_span
,
944 'table': convert_table
,
945 'tbody': convert_tbody
,
946 'term': convert_span
,
947 'tgroup': convert_tgroup
,
948 'thead': convert_thead
,
949 'type': convert_span
,
950 'ulink': convert_ulink
,
951 'userinput': convert_userinput
,
952 'varname': convert_code
,
953 'variablelist': convert_variablelist
,
954 'varlistentry': convert_varlistentry
,
955 'warning': convert_div
,
960 HTML_HEADER
= """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
963 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
965 %s<link rel="stylesheet" href="style.css" type="text/css">
967 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
971 def generate_head_links(ctx
):
974 '<link rel="home" href="%s" title="%s">\n' % (n
.filename
, n
.title
)
978 result
.append('<link rel="up" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
979 if 'nav_prev' in ctx
:
981 result
.append('<link rel="prev" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
982 if 'nav_next' in ctx
:
984 result
.append('<link rel="next" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
985 return ''.join(result
)
988 def generate_nav_links(ctx
):
991 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n
.filename
996 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n
.filename
)
998 result
.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
999 if 'nav_prev' in ctx
:
1002 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n
.filename
)
1004 result
.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1005 if 'nav_next' in ctx
:
1008 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n
.filename
)
1010 result
.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1012 return ''.join(result
)
1015 def generate_toc(ctx
, node
):
1017 for c
in node
.children
:
1018 # TODO: urlencode the filename: urllib.parse.quote_plus()
1019 result
.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1020 c
.title_tag
, c
.filename
, c
.title
))
1022 result
.append('<span class="%s"> — %s</span>' % (c
.subtitle_tag
, c
.subtitle
))
1023 result
.append('</dt>\n')
1025 result
.append('<dd><dl>')
1026 result
.extend(generate_toc(ctx
, c
))
1027 result
.append('</dl></dd>')
1031 def generate_basic_nav(ctx
):
1032 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1033 <tr valign="middle">
1034 <td width="100%%" align="left" class="shortcuts"></td>
1038 """ % generate_nav_links(ctx
)
1041 def generate_alpha_nav(ctx
, divs
, prefix
):
1044 title
= xml_get_title(s
)
1045 ix_nav
.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix
, title
, title
))
1047 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1048 <tr valign="middle">
1049 <td width="100%%" align="left" class="shortcuts">
1050 <span id="nav_index">
1057 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav
), generate_nav_links(ctx
))
1060 def generate_refentry_nav(ctx
, refsect1s
, result
):
1061 result
.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1062 <tr valign="middle">
1063 <td width="100%" align="left" class="shortcuts">
1064 <a href="#" class="shortcut">Top</a>""")
1067 # don't list TOC sections (role="xxx_proto")
1068 if s
.attrib
.get('role', '').endswith("_proto"):
1070 # skip section without 'id' attrs
1071 if 'id' not in s
.attrib
:
1074 title
= xml_get_title(s
)
1076 Â Â <span class="dim">|</span>Â
1077 <a href="#%s" class="shortcut">%s</a>
1078 """ % (s
.attrib
['id'], title
))
1084 """ % generate_nav_links(ctx
))
1087 def generate_footer(ctx
):
1089 if 'footnotes' in ctx
:
1090 result
.append("""<div class="footnotes">\n
1091 <br><hr style="width:100; text-align:left;margin-left: 0">
1093 for f
in ctx
['footnotes']:
1095 result
.append('</div>\n')
1101 node_id
= xml
.attrib
.get('id', None)
1105 logging
.info('%d: No "id" attribute on "%s", generating one',
1106 xml
.sourceline
, xml
.tag
)
1108 # Generate the 'id'. We need to walk up the xml-tree and check the positions
1110 parent
= xml
.getparent()
1111 while parent
is not None:
1112 children
= parent
.getchildren()
1113 ix
.insert(0, str(children
.index(xml
) + 1))
1115 parent
= xml
.getparent()
1116 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1117 return 'id-1.' + '.'.join(ix
)
1120 def convert_chunk_with_toc(ctx
, div_class
, title_tag
):
1123 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1124 generate_basic_nav(ctx
),
1125 '<div class="%s">' % div_class
,
1127 title
= node
.xml
.find('title')
1128 if title
is not None:
1130 <div class="titlepage">
1131 <%s class="title"><a name="%s"></a>%s</%s>
1133 title_tag
, get_id(node
), title
.text
, title_tag
))
1134 node
.xml
.remove(title
)
1136 toc
= generate_toc(ctx
, node
)
1138 # TODO: not all docbook page types use this extra heading
1139 result
.append("""<p><b>Table of Contents</b></p>
1144 result
.append("""</dl>
1147 convert_inner(ctx
, node
.xml
, result
)
1148 result
.extend(generate_footer(ctx
))
1149 result
.append("""</div>
1158 def convert_book(ctx
):
1161 HTML_HEADER
% (node
.title
, generate_head_links(ctx
)),
1162 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1163 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1168 bookinfo
= node
.xml
.findall('bookinfo')[0]
1169 # we already used the title
1170 title
= bookinfo
.find('title')
1171 if title
is not None:
1172 bookinfo
.remove(title
)
1173 result
.extend(convert_bookinfo(ctx
, bookinfo
))
1174 result
.append("""<div class="toc">
1177 result
.extend(generate_toc(ctx
, node
.root
))
1178 result
.append("""</dl>
1181 result
.extend(generate_footer(ctx
))
1182 result
.append("""</div>
1188 def convert_chapter(ctx
):
1189 return convert_chunk_with_toc(ctx
, 'chapter', 'h2')
1192 def convert_glossary(ctx
):
1194 glossdivs
= node
.xml
.findall('glossdiv')
1197 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1198 generate_alpha_nav(ctx
, glossdivs
, 'gls'),
1199 """<div class="glossary">
1200 <div class="titlepage"><h1 class="title">
1201 <a name="%s"></a>%s</h1>
1202 </div>""" % (get_id(node
), node
.title
)
1205 result
.extend(convert_glossdiv(ctx
, i
))
1206 result
.extend(generate_footer(ctx
))
1207 result
.append("""</div>
1213 def convert_index(ctx
):
1215 # Get all indexdivs under indexdiv
1216 indexdivs
= node
.xml
.find('indexdiv').findall('indexdiv')
1219 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1220 generate_alpha_nav(ctx
, indexdivs
, 'idx'),
1221 """<div class="index">
1222 <div class="titlepage"><h2 class="title">
1223 <a name="%s"></a>%s</h2>
1224 </div>""" % (get_id(node
), node
.title
)
1227 result
.extend(convert_indexdiv(ctx
, i
))
1228 result
.extend(generate_footer(ctx
))
1229 result
.append("""</div>
1235 def convert_part(ctx
):
1236 return convert_chunk_with_toc(ctx
, 'part', 'h1')
1239 def convert_preface(ctx
):
1242 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1243 generate_basic_nav(ctx
),
1244 '<div class="preface">'
1246 title
= node
.xml
.find('title')
1247 if title
is not None:
1249 <div class="titlepage">
1250 <h2 class="title"><a name="%s"></a>%s</h2>
1251 </div>""" % (get_id(node
), title
.text
))
1252 node
.xml
.remove(title
)
1253 convert_inner(ctx
, node
.xml
, result
)
1254 result
.extend(generate_footer(ctx
))
1255 result
.append("""</div>
1261 def convert_reference(ctx
):
1262 return convert_chunk_with_toc(ctx
, 'reference', 'h1')
1265 def convert_refentry(ctx
):
1267 node_id
= get_id(node
)
1268 refsect1s
= node
.xml
.findall('refsect1')
1271 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
))
1273 generate_refentry_nav(ctx
, refsect1s
, result
)
1275 <div class="refentry">
1277 <div class="refnamediv">
1278 <table width="100%%"><tr>
1280 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1281 <p>%s — module for gtk-doc unit test</p>
1283 <td class="gallery_image" valign="top" align="right"></td>
1286 """ % (node_id
, node_id
, node
.title
, node
.title
))
1289 result
.extend(convert_refsect1(ctx
, s
))
1290 result
.extend(generate_footer(ctx
))
1291 result
.append("""</div>
1297 def convert_sect1(ctx
):
1298 return convert_chunk_with_toc(ctx
, 'sect1', 'h2')
1301 # TODO(ensonic): turn into class with converters as functions and ctx as self
1303 'book': convert_book
,
1304 'chapter': convert_chapter
,
1305 'glossary': convert_glossary
,
1306 'index': convert_index
,
1307 'part': convert_part
,
1308 'preface': convert_preface
,
1309 'reference': convert_reference
,
1310 'refentry': convert_refentry
,
1311 'sect1': convert_sect1
,
1315 def generate_nav_nodes(files
, node
):
1317 'nav_home': node
.root
,
1319 # nav params: up, prev, next
1321 nav
['nav_up'] = node
.parent
1322 ix
= files
.index(node
)
1324 nav
['nav_prev'] = files
[ix
- 1]
1325 if ix
< len(files
) - 1:
1326 nav
['nav_next'] = files
[ix
+ 1]
1330 def convert(out_dir
, module
, files
, node
):
1331 """Convert the docbook chunks to a html file.
1334 out_dir: already created output dir
1335 files: list of nodes in the tree in pre-order
1336 node: current tree node
1339 logging
.info('Writing: %s', node
.filename
)
1340 with
open(os
.path
.join(out_dir
, node
.filename
), 'wt',
1341 newline
='\n', encoding
='utf-8') as html
:
1347 ctx
.update(generate_nav_nodes(files
, node
))
1349 if node
.name
in convert_chunks
:
1350 for line
in convert_chunks
[node
.name
](ctx
):
1353 logging
.warning('Add converter/template for "%s"', node
.name
)
1356 def create_devhelp2_toc(node
):
1358 for c
in node
.children
:
1360 result
.append('<sub name="%s" link="%s">\n' % (c
.title
, c
.filename
))
1361 result
.extend(create_devhelp2_toc(c
))
1362 result
.append('</sub>\n')
1364 result
.append('<sub name="%s" link="%s"/>\n' % (c
.title
, c
.filename
))
1368 def create_devhelp2_condition_attribs(node
):
1369 if 'condition' in node
.attrib
:
1370 # condition -> since, deprecated, ... (separated with '|')
1371 cond
= node
.attrib
['condition'].replace('"', '"').split('|')
1375 keywords
.append('{}="{}"'.format(*c
.split(':', 1)))
1377 # deprecated can have no description
1378 keywords
.append('{}="{}"'.format(c
, ''))
1379 return ' ' + ' '.join(keywords
)
1384 def create_devhelp2_refsect2_keyword(node
, base_link
):
1385 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1386 node
.attrib
['role'], xml_get_title(node
), base_link
+ node
.attrib
['id'],
1387 create_devhelp2_condition_attribs(node
))
1390 def create_devhelp2_refsect3_keyword(node
, base_link
, title
, name
):
1391 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1392 node
.attrib
['role'], title
, base_link
+ name
,
1393 create_devhelp2_condition_attribs(node
))
1396 def create_devhelp2(out_dir
, module
, xml
, files
):
1397 with
open(os
.path
.join(out_dir
, module
+ '.devhelp2'), 'wt',
1398 newline
='\n', encoding
='utf-8') as idx
:
1399 bookinfo_nodes
= xml
.xpath('/book/bookinfo')
1401 if bookinfo_nodes
is not None:
1402 bookinfo
= bookinfo_nodes
[0]
1403 title
= bookinfo
.xpath('./title/text()')[0]
1404 online_url
= bookinfo
.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1405 # TODO: support author too (see devhelp2.xsl)
1406 # TODO: fixxref uses '--src-lang' to set the language
1408 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1409 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1411 """ % (title
, module
, online_url
)
1414 result
.extend(create_devhelp2_toc(files
[0].root
))
1415 result
.append(""" </chapters>
1418 # keywords from all refsect2 and refsect3
1419 refsect2
= etree
.XPath('//refsect2[@role]')
1420 refsect3_enum
= etree
.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1421 refsect3_enum_details
= etree
.XPath('entry[@role="enum_member_name"]/para')
1422 refsect3_struct
= etree
.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1423 refsect3_struct_details
= etree
.XPath('entry[@role="struct_member_name"]/para/structfield')
1425 base_link
= node
.filename
+ '#'
1426 refsect2_nodes
= refsect2(node
.xml
)
1427 for refsect2_node
in refsect2_nodes
:
1428 result
.append(create_devhelp2_refsect2_keyword(refsect2_node
, base_link
))
1429 refsect3_nodes
= refsect3_enum(refsect2_node
)
1430 for refsect3_node
in refsect3_nodes
:
1431 details_node
= refsect3_enum_details(refsect3_node
)[0]
1432 name
= details_node
.attrib
['id']
1433 result
.append(create_devhelp2_refsect3_keyword(refsect3_node
, base_link
, details_node
.text
, name
))
1434 refsect3_nodes
= refsect3_struct(refsect2_node
)
1435 for refsect3_node
in refsect3_nodes
:
1436 details_node
= refsect3_struct_details(refsect3_node
)[0]
1437 name
= details_node
.attrib
['id']
1438 result
.append(create_devhelp2_refsect3_keyword(refsect3_node
, base_link
, name
, name
))
1440 result
.append(""" </functions>
1447 def get_dirs(uninstalled
):
1449 # this does not work from buiddir!=srcdir
1450 gtkdocdir
= os
.path
.split(sys
.argv
[0])[0]
1451 if not os
.path
.exists(gtkdocdir
+ '/gtk-doc.xsl'):
1452 # try 'srcdir' (set from makefiles) too
1453 if os
.path
.exists(os
.environ
.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1454 gtkdocdir
= os
.environ
['ABS_TOP_SRCDIR']
1455 styledir
= gtkdocdir
+ '/style'
1457 gtkdocdir
= os
.path
.join(config
.datadir
, 'gtk-doc/data')
1458 styledir
= gtkdocdir
1459 return (gtkdocdir
, styledir
)
1462 def main(module
, index_file
, out_dir
, uninstalled
):
1463 tree
= etree
.parse(index_file
)
1466 (gtkdocdir
, styledir
) = get_dirs(uninstalled
)
1467 # copy navigation images and stylesheets to html directory ...
1468 css_file
= os
.path
.join(styledir
, 'style.css')
1469 for f
in glob(os
.path
.join(styledir
, '*.png')) + [css_file
]:
1470 shutil
.copy(f
, out_dir
)
1471 css_file
= os
.path
.join(out_dir
, 'style.css')
1472 with
open(css_file
, 'at', newline
='\n', encoding
='utf-8') as css
:
1473 css
.write(HTML_FORMATTER
.get_style_defs())
1475 # TODO: migrate options from fixxref
1476 # TODO: do in parallel with loading the xml above.
1477 fixxref
.LoadIndicies(out_dir
, '/usr/share/gtk-doc/html', [])
1479 # We do multiple passes:
1480 # 1) recursively walk the tree and chunk it into a python tree so that we
1481 # can generate navigation and link tags.
1482 files
= chunk(tree
.getroot())
1483 files
= list(PreOrderIter(files
))
1484 # 2) extract tables:
1485 # TODO: use multiprocessing
1486 # - find all 'id' attribs and add them to the link map
1487 add_id_links(files
, fixxref
.Links
)
1488 # - build glossary dict
1489 build_glossary(files
)
1491 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1492 create_devhelp2(out_dir
, module
, tree
.getroot(), files
)
1493 # 4) iterate the tree and output files
1494 # TODO: use multiprocessing
1496 convert(out_dir
, module
, files
, node
)
1500 logging
.info('options: %s', str(options
.__dict
__))
1501 module
= options
.args
[0]
1502 document
= options
.args
[1]
1504 # TODO: rename to 'html' later on
1505 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1506 # outputs into the working directory
1507 out_dir
= os
.path
.join(os
.path
.dirname(document
), 'db2html')
1510 except OSError as e
:
1511 if e
.errno
!= errno
.EEXIST
:
1514 sys
.exit(main(module
, document
, out_dir
, options
.uninstalled
))