2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - glossary/index: depending on the parents, the headings as h1/h2
43 - maybe track depth when chunking
44 - handle 'label' attributes on part/chapter/section-types
45 - the titles will have a generated prefix, such as 'Part I:'
46 - in the toc it would only be only the label: 'I.'
47 - we need to separate the toc data from the chunking tree
48 - since we don't chunk first 'secttion'/'sect1' those are missing from the
50 - the toc also lists 'sect2' (TODO: check how deep it goes)
51 - replace get_title with a result.extend(convert_title(ctx, title_tag))
53 - check each docbook tag if it can contain #PCDATA, if not don't check for
55 - consider some perf-warnings flag
56 - see 'No "id" attribute on'
59 - minify html: https://pypi.python.org/pypi/htmlmin/
62 sudo pip3 install anytree lxml pygments
66 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
67 xdg-open db2html/index.html
72 rm html-build.stamp; time make html-build.stamp
82 from anytree
import Node
, PreOrderIter
83 from copy
import deepcopy
85 from lxml
import etree
86 from pygments
import highlight
87 from pygments
.lexers
import CLexer
88 from pygments
.formatters
import HtmlFormatter
90 from . import config
, fixxref
93 # lazily constructed lexer cache
97 HTML_FORMATTER
= HtmlFormatter(nowrap
=True)
99 # http://www.sagehill.net/docbookxsl/Chunking.html
103 'bibliography', # in article or book
107 'glossary', # in article or book
108 'index', # in article or book
113 'sect1', # except first
114 'section', # if equivalent to sect1
120 class ChunkParams(object):
121 def __init__(self
, prefix
, parent
=None, min_idx
=0):
124 self
.min_idx
= min_idx
127 # TODO: look up the abbrevs and hierarchy for other tags
128 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
129 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
131 # If not defined, we can just create an example without an 'id' attr and see
134 'appendix': ChunkParams('app', 'book'),
135 'book': ChunkParams('bk'),
136 'chapter': ChunkParams('ch', 'book'),
137 'index': ChunkParams('ix', 'book'),
138 'part': ChunkParams('pt', 'book'),
139 'preface': ChunkParams('pr', 'book'),
140 'reference': ChunkParams('rn', 'book'),
141 'sect1': ChunkParams('s', 'chapter', 1),
142 'section': ChunkParams('s', 'chapter', 1),
146 '_': (etree
.XPath('./title'), None),
147 'book': (etree
.XPath('./bookinfo/title'), None),
149 etree
.XPath('./refmeta/refentrytitle'),
150 etree
.XPath('./refnamediv/refpurpose')
154 ID_XPATH
= etree
.XPath('//@id')
156 GLOSSENTRY_XPATH
= etree
.XPath('//glossentry')
162 def get_chunk_min_idx(tag
):
163 if tag
not in CHUNK_PARAMS
:
166 return CHUNK_PARAMS
[tag
].min_idx
169 def gen_chunk_name(node
, idx
):
170 if 'id' in node
.attrib
:
171 return node
.attrib
['id']
174 if tag
not in CHUNK_PARAMS
:
175 CHUNK_PARAMS
[tag
] = ChunkParams(node
.tag
[:2])
176 logging
.warning('Add CHUNK_PARAMS for "%s"', tag
)
178 naming
= CHUNK_PARAMS
[tag
]
179 name
= ('%s%02d' % (naming
.prefix
, idx
))
180 # handle parents to make names of nested tags unique
181 # TODO: we only need to prepend the parent if there are > 1 of them in the
183 # while naming.parent:
184 # parent = naming.parent
185 # if parent not in CHUNK_PARAMS:
187 # naming = CHUNK_PARAMS[parent]
188 # name = ('%s%02d' % (naming.prefix, idx)) + name
189 logging
.info('Gen chunk name: "%s"', name
)
193 def get_chunk_titles(node
):
195 if tag
not in TITLE_XPATHS
:
197 (title
, subtitle
) = TITLE_XPATHS
['_']
199 (title
, subtitle
) = TITLE_XPATHS
[tag
]
205 if xml
.tag
!= 'title':
206 result
['title_tag'] = xml
.tag
208 result
['title_tag'] = tag
211 xml
= subtitle(node
)[0]
212 result
['subtitle'] = xml
.text
213 result
['subtitle_tag'] = xml
.tag
215 result
['subtitle'] = None
216 result
['subtitle_tag'] = None
220 def chunk(xml_node
, idx
=0, parent
=None):
223 The first time, we're called with parent=None and in that case we return
224 the new_node as the root of the tree
227 # also check idx to handle 'sect1'/'section' special casing
228 if tag
in CHUNK_TAGS
and idx
>= get_chunk_min_idx(tag
):
229 logging
.info('chunk tag: "%s"[%d]', tag
, idx
)
231 # remove the xml-node from the parent
232 sub_tree
= etree
.ElementTree(deepcopy(xml_node
)).getroot()
233 xml_node
.getparent().remove(xml_node
)
236 title_args
= get_chunk_titles(xml_node
)
237 chunk_name
= gen_chunk_name(xml_node
, (idx
+ 1))
238 parent
= Node(tag
, parent
=parent
, xml
=xml_node
,
239 filename
=chunk_name
+ '.html', **title_args
)
242 for child
in xml_node
:
243 new_parent
= chunk(child
, idx
, parent
)
244 if child
.tag
in CHUNK_TAGS
:
250 def add_id_links(files
, links
):
252 chunk_name
= node
.filename
[:-5]
253 chunk_base
= node
.filename
+ '#'
254 for attr
in ID_XPATH(node
.xml
):
255 if attr
== chunk_name
:
256 links
[attr
] = node
.filename
258 links
[attr
] = chunk_base
+ attr
261 def build_glossary(files
):
263 if node
.xml
.tag
!= 'glossary':
265 for term
in GLOSSENTRY_XPATH(node
.xml
):
266 # TODO: there can be all kind of things in a glossary. This only supports
267 # what we commonly use
268 key
= etree
.tostring(term
.find('glossterm'), method
="text", encoding
=str).strip()
269 value
= etree
.tostring(term
.find('glossdef'), method
="text", encoding
=str).strip()
270 glossary
[key
] = value
271 # logging.debug('glosentry: %s:%s', key, value)
277 def convert_inner(ctx
, xml
, result
):
279 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(ctx
, child
))
282 def convert_ignore(ctx
, xml
):
284 convert_inner(ctx
, xml
, result
)
288 def convert_skip(ctx
, xml
):
292 def append_text(text
, result
):
293 if text
and text
.strip():
294 result
.append(text
.replace('<', '<').replace('>', '>'))
300 def convert__unknown(ctx
, xml
):
301 # don't recurse on subchunks
302 if xml
.tag
in CHUNK_TAGS
:
304 if isinstance(xml
, etree
._Comment
):
305 return ['<!-- ' + xml
.text
+ '-->\n']
308 if xml
.tag
not in missing_tags
:
309 logging
.warning('Add tag converter for "%s"', xml
.tag
)
310 missing_tags
[xml
.tag
] = True
311 result
= ['<!-- ' + xml
.tag
+ '-->\n']
312 convert_inner(ctx
, xml
, result
)
313 result
.append('<!-- /' + xml
.tag
+ '-->\n')
317 def convert_sect(ctx
, xml
, h_tag
, inner_func
=convert_inner
):
318 result
= ['<div class="%s">\n' % xml
.tag
]
319 title
= xml
.find('title')
320 if title
is not None:
321 if 'id' in xml
.attrib
:
322 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
323 result
.append('<%s>%s</%s>' % (h_tag
, title
.text
, h_tag
))
325 append_text(xml
.text
, result
)
326 inner_func(ctx
, xml
, result
)
327 result
.append('</div>')
328 append_text(xml
.tail
, result
)
332 def xml_get_title(xml
):
333 title
= xml
.find('title')
334 if title
is not None:
337 # TODO(ensonic): any way to get the file (inlcudes) too?
338 logging
.warning('%s: Expected title tag under "%s %s"', xml
.sourceline
, xml
.tag
, str(xml
.attrib
))
345 def convert_abstract(ctx
, xml
):
346 result
= ["""<div class="abstract">
347 <p class="title"><b>Abstract</b></p>"""]
348 append_text(xml
.text
, result
)
349 convert_inner(ctx
, xml
, result
)
350 result
.append('</div>')
351 append_text(xml
.tail
, result
)
355 def convert_acronym(ctx
, xml
):
357 title
= glossary
.get(key
, '')
358 # TODO: print a sensible warning if missing
359 result
= ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title
, key
)]
361 result
.append(xml
.tail
)
365 def convert_anchor(ctx
, xml
):
366 return ['<a name="%s"></a>' % xml
.attrib
['id']]
369 def convert_bookinfo(ctx
, xml
):
370 result
= ['<div class="titlepage">']
371 convert_inner(ctx
, xml
, result
)
372 result
.append("""<hr>
375 result
.append(xml
.tail
)
379 def convert_blockquote(ctx
, xml
):
380 result
= ['<div class="blockquote">\n<blockquote class="blockquote">']
381 append_text(xml
.text
, result
)
382 convert_inner(ctx
, xml
, result
)
383 result
.append('</blockquote>\n</div>')
384 append_text(xml
.tail
, result
)
388 def convert_code(ctx
, xml
):
389 result
= ['<code class="%s">' % xml
.tag
]
390 append_text(xml
.text
, result
)
391 convert_inner(ctx
, xml
, result
)
392 result
.append('</code>')
393 append_text(xml
.tail
, result
)
397 def convert_colspec(ctx
, xml
):
401 result
.append(' class="%s"' % a
['colname'])
403 result
.append(' width="%s"' % a
['colwidth'])
405 # is in tgroup and there can be no 'text'
409 def convert_command(ctx
, xml
):
410 result
= ['<strong class="userinput"><code>']
411 append_text(xml
.text
, result
)
412 convert_inner(ctx
, xml
, result
)
413 result
.append('</code></strong>')
414 append_text(xml
.tail
, result
)
418 def convert_corpauthor(ctx
, xml
):
419 result
= ['<div><h3 class="corpauthor">\n']
420 append_text(xml
.text
, result
)
421 convert_inner(ctx
, xml
, result
)
422 result
.append('</h3></div>\n')
423 append_text(xml
.tail
, result
)
427 def convert_div(ctx
, xml
):
428 result
= ['<div class="%s">\n' % xml
.tag
]
429 append_text(xml
.text
, result
)
430 convert_inner(ctx
, xml
, result
)
431 result
.append('</div>')
432 append_text(xml
.tail
, result
)
436 def convert_em_class(ctx
, xml
):
437 result
= ['<em class="%s"><code>' % xml
.tag
]
438 append_text(xml
.text
, result
)
439 convert_inner(ctx
, xml
, result
)
440 result
.append('</code></em>')
441 append_text(xml
.tail
, result
)
445 def convert_entry(ctx
, xml
):
446 entry_type
= ctx
['table.entry']
447 result
= ['<' + entry_type
]
448 if 'role' in xml
.attrib
:
449 result
.append(' class="%s"' % xml
.attrib
['role'])
450 if 'morerows' in xml
.attrib
:
451 result
.append(' rowspan="%s"' % (1 + int(xml
.attrib
['morerows'])))
453 append_text(xml
.text
, result
)
454 convert_inner(ctx
, xml
, result
)
455 result
.append('</' + entry_type
+ '>')
456 append_text(xml
.tail
, result
)
460 def convert_footnote(ctx
, xml
):
461 footnotes
= ctx
.get('footnotes', [])
462 # footnotes idx is not per page, but per doc
467 # need a pair of ids for each footnote (docbook generates different ids)
468 this_id
= 'footnote-%d' % idx
469 that_id
= 'ftn.' + this_id
471 inner
= ['<div id="%s" class="footnote">' % that_id
]
472 inner
.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
474 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
475 # get double nested paras :/.
476 # convert_inner(ctx, xml, inner)
477 para
= xml
.find('para')
479 para
= xml
.find('simpara')
481 inner
.append(para
.text
)
483 logging
.warning('%s: Unhandled footnote content: %s', xml
.sourceline
,
484 etree
.tostring(xml
, method
="text", encoding
=str).strip())
485 inner
.append('</p></div>')
486 footnotes
.append(inner
)
487 ctx
['footnotes'] = footnotes
488 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
489 that_id
, this_id
, idx
)]
492 def convert_formalpara(ctx
, xml
):
494 title_tag
= xml
.find('title')
495 result
= ['<p><b>%s</b>' % title_tag
.text
]
496 para_tag
= xml
.find('para')
497 append_text(para_tag
.text
, result
)
498 convert_inner(ctx
, para_tag
, result
)
499 append_text(para_tag
.tail
, result
)
500 result
.append('</p>')
501 append_text(xml
.tail
, result
)
505 def convert_glossdef(ctx
, xml
):
506 result
= ['<dd class="glossdef">']
507 convert_inner(ctx
, xml
, result
)
508 result
.append('</dd>\n')
512 def convert_glossdiv(ctx
, xml
):
513 title_tag
= xml
.find('title')
514 title
= title_tag
.text
515 xml
.remove(title_tag
)
517 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title
, title
)
519 convert_inner(ctx
, xml
, result
)
523 def convert_glossentry(ctx
, xml
):
525 convert_inner(ctx
, xml
, result
)
529 def convert_glossterm(ctx
, xml
):
532 anchor
= xml
.find('anchor')
533 if anchor
is not None:
534 glossid
= anchor
.attrib
.get('id', '')
535 text
+= anchor
.tail
or ''
536 text
+= xml
.text
or ''
538 glossid
= 'glossterm-' + text
540 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
545 def convert_imageobject(ctx
, xml
):
546 imagedata
= xml
.find('imagedata')
547 if imagedata
is not None:
548 # TODO(ensonic): warn on missing fileref attr?
549 return ['<img src="%s">' % imagedata
.attrib
.get('fileref', '')]
554 def convert_indexdiv(ctx
, xml
):
555 title_tag
= xml
.find('title')
556 title
= title_tag
.text
557 xml
.remove(title_tag
)
559 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title
, title
)
561 convert_inner(ctx
, xml
, result
)
565 def convert_informaltable(ctx
, xml
):
566 result
= ['<div class="informaltable"><table class="informaltable"']
568 if 'pgwide' in a
and a
['pgwide'] == '1':
569 result
.append(' width="100%"')
570 if 'frame' in a
and a
['frame'] == 'none':
571 result
.append(' border="0"')
573 convert_inner(ctx
, xml
, result
)
574 result
.append('</table></div>')
576 result
.append(xml
.tail
)
580 def convert_itemizedlist(ctx
, xml
):
581 result
= ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
582 convert_inner(ctx
, xml
, result
)
583 result
.append('</ul></div>')
585 result
.append(xml
.tail
)
589 def convert_link(ctx
, xml
):
590 linkend
= xml
.attrib
['linkend']
591 if linkend
in fixxref
.NoLinks
:
596 convert_inner(ctx
, xml
, link_text
)
597 append_text(xml
.text
, link_text
)
598 # TODO: fixxref does some weird checks in xml.text
599 result
= [fixxref
.MakeXRef(ctx
['module'], '', 0, linkend
, ''.join(link_text
))]
600 append_text(xml
.tail
, result
)
604 def convert_listitem(ctx
, xml
):
605 result
= ['<li class="listitem">']
606 convert_inner(ctx
, xml
, result
)
607 result
.append('</li>')
608 # is in itemizedlist and there can be no 'text'
612 def convert_literallayout(ctx
, xml
):
613 result
= ['<div class="literallayout"><p><br>\n']
614 append_text(xml
.text
, result
)
615 convert_inner(ctx
, xml
, result
)
616 result
.append('</p></div>')
617 append_text(xml
.tail
, result
)
621 def convert_orderedlist(ctx
, xml
):
622 result
= ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
623 convert_inner(ctx
, xml
, result
)
624 result
.append('</ol></div>')
625 append_text(xml
.tail
, result
)
629 def convert_para(ctx
, xml
):
631 if 'id' in xml
.attrib
:
632 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
634 append_text(xml
.text
, result
)
635 convert_inner(ctx
, xml
, result
)
636 result
.append('</p>')
637 append_text(xml
.tail
, result
)
641 def convert_para_like(ctx
, xml
):
643 if 'id' in xml
.attrib
:
644 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
645 result
.append('<p class="%s">' % xml
.tag
)
646 append_text(xml
.text
, result
)
647 convert_inner(ctx
, xml
, result
)
648 result
.append('</p>')
649 append_text(xml
.tail
, result
)
653 def convert_phrase(ctx
, xml
):
655 if 'role' in xml
.attrib
:
656 result
.append(' class="%s">' % xml
.attrib
['role'])
659 append_text(xml
.text
, result
)
660 convert_inner(ctx
, xml
, result
)
661 result
.append('</span>')
662 append_text(xml
.tail
, result
)
666 def convert_primaryie(ctx
, xml
):
668 convert_inner(ctx
, xml
, result
)
669 result
.append('\n</dt>\n<dd></dd>\n')
673 def convert_pre(ctx
, xml
):
674 result
= ['<pre class="%s">\n' % xml
.tag
]
675 append_text(xml
.text
, result
)
676 convert_inner(ctx
, xml
, result
)
677 result
.append('</pre>')
678 append_text(xml
.tail
, result
)
682 def convert_programlisting(ctx
, xml
):
684 if xml
.attrib
.get('role', '') == 'example':
686 lang
= xml
.attrib
.get('language', 'c').lower()
687 if lang
not in LEXERS
:
688 LEXERS
[lang
] = get_lexer_by_name(lang
)
689 lexer
= LEXERS
.get(lang
, None)
691 highlighted
= highlight(xml
.text
, lexer
, HTML_FORMATTER
)
693 # we do own line-numbering
694 line_count
= highlighted
.count('\n')
695 source_lines
= '\n'.join([str(i
) for i
in range(1, line_count
+ 1)])
696 result
.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
699 <td class="listing_lines" align="right"><pre>%s</pre></td>
700 <td class="listing_code"><pre class="programlisting">%s</pre></td>
704 """ % (source_lines
, highlighted
))
706 logging
.warn('No pygments lexer for language="%s"', lang
)
707 result
.append('<pre class="programlisting">')
708 result
.append(xml
.text
)
709 result
.append('</pre>')
711 result
.append('<pre class="programlisting">')
712 append_text(xml
.text
, result
)
713 convert_inner(ctx
, xml
, result
)
714 result
.append('</pre>')
715 append_text(xml
.tail
, result
)
719 def convert_quote(ctx
, xml
):
720 result
= ['<span class="quote">"<span class="quote">']
721 append_text(xml
.text
, result
)
722 convert_inner(ctx
, xml
, result
)
723 result
.append('</span>"</span>')
724 append_text(xml
.tail
, result
)
728 def convert_refsect1(ctx
, xml
):
729 # Add a divider between two consequitive refsect2
730 def convert_inner(ctx
, xml
, result
):
733 if child
.tag
== 'refsect2' and prev
is not None and prev
.tag
== child
.tag
:
734 result
.append('<hr>\n')
735 result
.extend(convert_tags
.get(child
.tag
, convert__unknown
)(ctx
, child
))
737 return convert_sect(ctx
, xml
, 'h2', convert_inner
)
740 def convert_refsect2(ctx
, xml
):
741 return convert_sect(ctx
, xml
, 'h3')
744 def convert_refsect3(ctx
, xml
):
745 return convert_sect(ctx
, xml
, 'h4')
748 def convert_row(ctx
, xml
):
750 convert_inner(ctx
, xml
, result
)
751 result
.append('</tr>\n')
755 def convert_sect1_tag(ctx
, xml
):
756 return convert_sect(ctx
, xml
, 'h2')
759 def convert_sect2(ctx
, xml
):
760 return convert_sect(ctx
, xml
, 'h3')
763 def convert_sect3(ctx
, xml
):
764 return convert_sect(ctx
, xml
, 'h4')
767 def convert_simpara(ctx
, xml
):
769 append_text(xml
.text
, result
)
770 result
.append('</p>')
771 append_text(xml
.tail
, result
)
775 def convert_span(ctx
, xml
):
776 result
= ['<span class="%s">' % xml
.tag
]
777 append_text(xml
.text
, result
)
778 convert_inner(ctx
, xml
, result
)
779 result
.append('</span>')
780 append_text(xml
.tail
, result
)
784 def convert_table(ctx
, xml
):
785 result
= ['<div class="table">']
786 if 'id' in xml
.attrib
:
787 result
.append('<a name="%s"></a>' % xml
.attrib
['id'])
788 title_tag
= xml
.find('title')
789 if title_tag
is not None:
790 result
.append('<p class="title"><b>')
791 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
792 result
.extend(convert_title(ctx
, title_tag
))
793 result
.append('</b></p>')
794 xml
.remove(title_tag
)
795 result
.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
797 convert_inner(ctx
, xml
, result
)
799 result
.append('</table></div></div>')
800 append_text(xml
.tail
, result
)
804 def convert_tbody(ctx
, xml
):
806 ctx
['table.entry'] = 'td'
807 convert_inner(ctx
, xml
, result
)
808 result
.append('</tbody>')
809 # is in tgroup and there can be no 'text'
813 def convert_tgroup(ctx
, xml
):
814 # tgroup does not expand to anything, but the nested colspecs need to
815 # be put into a colgroup
816 cols
= xml
.findall('colspec')
819 result
.append('<colgroup>\n')
821 result
.extend(convert_colspec(ctx
, col
))
823 result
.append('</colgroup>\n')
824 convert_inner(ctx
, xml
, result
)
825 # is in informaltable and there can be no 'text'
829 def convert_thead(ctx
, xml
):
831 ctx
['table.entry'] = 'th'
832 convert_inner(ctx
, xml
, result
)
833 result
.append('</thead>')
834 # is in tgroup and there can be no 'text'
838 def convert_title(ctx
, xml
):
839 # This is always called from some context
841 append_text(xml
.text
, result
)
842 convert_inner(ctx
, xml
, result
)
843 append_text(xml
.tail
, result
)
847 def convert_ulink(ctx
, xml
):
848 result
= ['<a class="%s" href="%s">%s</a>' % (xml
.tag
, xml
.attrib
['url'], xml
.text
)]
850 result
.append(xml
.tail
)
854 def convert_userinput(ctx
, xml
):
855 result
= ['<span class="command"><strong>']
856 append_text(xml
.text
, result
)
857 convert_inner(ctx
, xml
, result
)
858 result
.append('</strong></span>')
859 append_text(xml
.tail
, result
)
863 def convert_variablelist(ctx
, xml
):
864 result
= ["""<div class="variablelist"><table border="0" class="variablelist">
866 <col align="left" valign="top">
870 convert_inner(ctx
, xml
, result
)
871 result
.append("""</tbody>
876 def convert_varlistentry(ctx
, xml
):
879 result
.append('<td><p>')
880 term
= xml
.find('term')
881 result
.extend(convert_span(ctx
, term
))
882 result
.append('</p></td>')
884 result
.append('<td>')
885 listitem
= xml
.find('listitem')
886 convert_inner(ctx
, listitem
, result
)
887 result
.append('</td>')
889 result
.append('<tr>')
893 # TODO(ensonic): turn into class with converters as functions and ctx as self
895 'abstract': convert_abstract
,
896 'acronym': convert_acronym
,
897 'anchor': convert_anchor
,
898 'application': convert_span
,
899 'bookinfo': convert_bookinfo
,
900 'blockquote': convert_blockquote
,
901 'caption': convert_div
,
902 'code': convert_code
,
903 'colspec': convert_colspec
,
904 'constant': convert_code
,
905 'command': convert_command
,
906 'corpauthor': convert_corpauthor
,
907 'emphasis': convert_span
,
908 'entry': convert_entry
,
909 'envar': convert_code
,
910 'footnote': convert_footnote
,
911 'filename': convert_code
,
912 'formalpara': convert_formalpara
,
913 'function': convert_code
,
914 'glossdef': convert_glossdef
,
915 'glossdiv': convert_glossdiv
,
916 'glossentry': convert_glossentry
,
917 'glossterm': convert_glossterm
,
918 'imageobject': convert_imageobject
,
919 'indexdiv': convert_indexdiv
,
920 'indexentry': convert_ignore
,
921 'indexterm': convert_skip
,
922 'informalexample': convert_div
,
923 'informaltable': convert_informaltable
,
924 'inlinemediaobject': convert_span
,
925 'itemizedlist': convert_itemizedlist
,
926 'legalnotice': convert_div
,
927 'link': convert_link
,
928 'listitem': convert_listitem
,
929 'literal': convert_code
,
930 'literallayout': convert_literallayout
,
931 'mediaobject': convert_div
,
933 'option': convert_code
,
934 'orderedlist': convert_orderedlist
,
935 'para': convert_para
,
936 'partintro': convert_div
,
937 'parameter': convert_em_class
,
938 'phrase': convert_phrase
,
939 'primaryie': convert_primaryie
,
940 'programlisting': convert_programlisting
,
941 'quote': convert_quote
,
942 'releaseinfo': convert_para_like
,
943 'refsect1': convert_refsect1
,
944 'refsect2': convert_refsect2
,
945 'refsect3': convert_refsect3
,
946 'replaceable': convert_em_class
,
947 'returnvalue': convert_span
,
949 'screen': convert_pre
,
950 'sect1': convert_sect1_tag
,
951 'sect2': convert_sect2
,
952 'sect3': convert_sect3
,
953 'simpara': convert_simpara
,
954 'structfield': convert_em_class
,
955 'structname': convert_span
,
956 'synopsis': convert_pre
,
957 'symbol': convert_span
,
958 'table': convert_table
,
959 'tbody': convert_tbody
,
960 'term': convert_span
,
961 'tgroup': convert_tgroup
,
962 'thead': convert_thead
,
963 'type': convert_span
,
964 'ulink': convert_ulink
,
965 'userinput': convert_userinput
,
966 'varname': convert_code
,
967 'variablelist': convert_variablelist
,
968 'varlistentry': convert_varlistentry
,
969 'warning': convert_div
,
974 HTML_HEADER
= """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
977 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
979 %s<link rel="stylesheet" href="style.css" type="text/css">
981 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
985 def generate_head_links(ctx
):
988 '<link rel="home" href="%s" title="%s">\n' % (n
.filename
, n
.title
)
992 result
.append('<link rel="up" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
993 if 'nav_prev' in ctx
:
995 result
.append('<link rel="prev" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
996 if 'nav_next' in ctx
:
998 result
.append('<link rel="next" href="%s" title="%s">\n' % (n
.filename
, n
.title
))
999 return ''.join(result
)
1002 def generate_nav_links(ctx
):
1005 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n
.filename
1010 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n
.filename
)
1012 result
.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1013 if 'nav_prev' in ctx
:
1016 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n
.filename
)
1018 result
.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1019 if 'nav_next' in ctx
:
1022 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n
.filename
)
1024 result
.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1026 return ''.join(result
)
1029 def generate_toc(ctx
, node
):
1031 for c
in node
.children
:
1032 # TODO: urlencode the filename: urllib.parse.quote_plus()
1033 result
.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1034 c
.title_tag
, c
.filename
, c
.title
))
1036 result
.append('<span class="%s"> — %s</span>' % (c
.subtitle_tag
, c
.subtitle
))
1037 result
.append('</dt>\n')
1039 result
.append('<dd><dl>')
1040 result
.extend(generate_toc(ctx
, c
))
1041 result
.append('</dl></dd>')
1045 def generate_basic_nav(ctx
):
1046 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1047 <tr valign="middle">
1048 <td width="100%%" align="left" class="shortcuts"></td>
1052 """ % generate_nav_links(ctx
)
1055 def generate_alpha_nav(ctx
, divs
, prefix
):
1058 title
= xml_get_title(s
)
1059 ix_nav
.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix
, title
, title
))
1061 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1062 <tr valign="middle">
1063 <td width="100%%" align="left" class="shortcuts">
1064 <span id="nav_index">
1071 """ % ('\n<span class="dim">|</span>\n'.join(ix_nav
), generate_nav_links(ctx
))
1074 def generate_refentry_nav(ctx
, refsect1s
, result
):
1075 result
.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1076 <tr valign="middle">
1077 <td width="100%" align="left" class="shortcuts">
1078 <a href="#" class="shortcut">Top</a>""")
1081 # don't list TOC sections (role="xxx_proto")
1082 if s
.attrib
.get('role', '').endswith("_proto"):
1084 # skip section without 'id' attrs
1085 if 'id' not in s
.attrib
:
1088 title
= xml_get_title(s
)
1090 Â Â <span class="dim">|</span>Â
1091 <a href="#%s" class="shortcut">%s</a>
1092 """ % (s
.attrib
['id'], title
))
1098 """ % generate_nav_links(ctx
))
1101 def generate_footer(ctx
):
1103 if 'footnotes' in ctx
:
1104 result
.append("""<div class="footnotes">\n
1105 <br><hr style="width:100; text-align:left;margin-left: 0">
1107 for f
in ctx
['footnotes']:
1109 result
.append('</div>\n')
1115 node_id
= xml
.attrib
.get('id', None)
1119 logging
.info('%d: No "id" attribute on "%s", generating one',
1120 xml
.sourceline
, xml
.tag
)
1122 # Generate the 'id'. We need to walk up the xml-tree and check the positions
1124 parent
= xml
.getparent()
1125 while parent
is not None:
1126 children
= parent
.getchildren()
1127 ix
.insert(0, str(children
.index(xml
) + 1))
1129 parent
= xml
.getparent()
1130 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1131 return 'id-1.' + '.'.join(ix
)
1134 def convert_chunk_with_toc(ctx
, div_class
, title_tag
):
1137 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1138 generate_basic_nav(ctx
),
1139 '<div class="%s">' % div_class
,
1141 title
= node
.xml
.find('title')
1142 if title
is not None:
1144 <div class="titlepage">
1145 <%s class="title"><a name="%s"></a>%s</%s>
1147 title_tag
, get_id(node
), title
.text
, title_tag
))
1148 node
.xml
.remove(title
)
1150 toc
= generate_toc(ctx
, node
)
1152 # TODO: not all docbook page types use this extra heading
1153 result
.append("""<p><b>Table of Contents</b></p>
1158 result
.append("""</dl>
1161 convert_inner(ctx
, node
.xml
, result
)
1162 result
.extend(generate_footer(ctx
))
1163 result
.append("""</div>
1172 def convert_book(ctx
):
1175 HTML_HEADER
% (node
.title
, generate_head_links(ctx
)),
1176 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1177 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1182 bookinfo
= node
.xml
.findall('bookinfo')[0]
1183 # we already used the title
1184 title
= bookinfo
.find('title')
1185 if title
is not None:
1186 bookinfo
.remove(title
)
1187 result
.extend(convert_bookinfo(ctx
, bookinfo
))
1188 result
.append("""<div class="toc">
1191 result
.extend(generate_toc(ctx
, node
.root
))
1192 result
.append("""</dl>
1195 result
.extend(generate_footer(ctx
))
1196 result
.append("""</div>
1202 def convert_chapter(ctx
):
1203 return convert_chunk_with_toc(ctx
, 'chapter', 'h2')
1206 def convert_glossary(ctx
):
1208 glossdivs
= node
.xml
.findall('glossdiv')
1211 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1212 generate_alpha_nav(ctx
, glossdivs
, 'gls'),
1213 """<div class="glossary">
1214 <div class="titlepage"><h1 class="title">
1215 <a name="%s"></a>%s</h1>
1216 </div>""" % (get_id(node
), node
.title
)
1219 result
.extend(convert_glossdiv(ctx
, i
))
1220 result
.extend(generate_footer(ctx
))
1221 result
.append("""</div>
1227 def convert_index(ctx
):
1229 # Get all indexdivs under indexdiv
1230 indexdivs
= node
.xml
.find('indexdiv').findall('indexdiv')
1233 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1234 generate_alpha_nav(ctx
, indexdivs
, 'idx'),
1235 """<div class="index">
1236 <div class="titlepage"><h2 class="title">
1237 <a name="%s"></a>%s</h2>
1238 </div>""" % (get_id(node
), node
.title
)
1241 result
.extend(convert_indexdiv(ctx
, i
))
1242 result
.extend(generate_footer(ctx
))
1243 result
.append("""</div>
1249 def convert_part(ctx
):
1250 return convert_chunk_with_toc(ctx
, 'part', 'h1')
1253 def convert_preface(ctx
):
1256 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
)),
1257 generate_basic_nav(ctx
),
1258 '<div class="preface">'
1260 title
= node
.xml
.find('title')
1261 if title
is not None:
1263 <div class="titlepage">
1264 <h2 class="title"><a name="%s"></a>%s</h2>
1265 </div>""" % (get_id(node
), title
.text
))
1266 node
.xml
.remove(title
)
1267 convert_inner(ctx
, node
.xml
, result
)
1268 result
.extend(generate_footer(ctx
))
1269 result
.append("""</div>
1275 def convert_reference(ctx
):
1276 return convert_chunk_with_toc(ctx
, 'reference', 'h1')
1279 def convert_refentry(ctx
):
1281 node_id
= get_id(node
)
1282 refsect1s
= node
.xml
.findall('refsect1')
1285 HTML_HEADER
% (node
.title
+ ": " + node
.root
.title
, generate_head_links(ctx
))
1287 generate_refentry_nav(ctx
, refsect1s
, result
)
1289 <div class="refentry">
1291 <div class="refnamediv">
1292 <table width="100%%"><tr>
1294 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1295 <p>%s — module for gtk-doc unit test</p>
1297 <td class="gallery_image" valign="top" align="right"></td>
1300 """ % (node_id
, node_id
, node
.title
, node
.title
))
1303 result
.extend(convert_refsect1(ctx
, s
))
1304 result
.extend(generate_footer(ctx
))
1305 result
.append("""</div>
1311 def convert_sect1(ctx
):
1312 return convert_chunk_with_toc(ctx
, 'sect1', 'h2')
1315 # TODO(ensonic): turn into class with converters as functions and ctx as self
1317 'book': convert_book
,
1318 'chapter': convert_chapter
,
1319 'glossary': convert_glossary
,
1320 'index': convert_index
,
1321 'part': convert_part
,
1322 'preface': convert_preface
,
1323 'reference': convert_reference
,
1324 'refentry': convert_refentry
,
1325 'sect1': convert_sect1
,
1329 def generate_nav_nodes(files
, node
):
1331 'nav_home': node
.root
,
1333 # nav params: up, prev, next
1335 nav
['nav_up'] = node
.parent
1336 ix
= files
.index(node
)
1338 nav
['nav_prev'] = files
[ix
- 1]
1339 if ix
< len(files
) - 1:
1340 nav
['nav_next'] = files
[ix
+ 1]
1344 def convert(out_dir
, module
, files
, node
):
1345 """Convert the docbook chunks to a html file.
1348 out_dir: already created output dir
1349 files: list of nodes in the tree in pre-order
1350 node: current tree node
1353 logging
.info('Writing: %s', node
.filename
)
1354 with
open(os
.path
.join(out_dir
, node
.filename
), 'wt',
1355 newline
='\n', encoding
='utf-8') as html
:
1361 ctx
.update(generate_nav_nodes(files
, node
))
1363 if node
.name
in convert_chunks
:
1364 for line
in convert_chunks
[node
.name
](ctx
):
1367 logging
.warning('Add converter/template for "%s"', node
.name
)
1370 def create_devhelp2_toc(node
):
1372 for c
in node
.children
:
1374 result
.append('<sub name="%s" link="%s">\n' % (c
.title
, c
.filename
))
1375 result
.extend(create_devhelp2_toc(c
))
1376 result
.append('</sub>\n')
1378 result
.append('<sub name="%s" link="%s"/>\n' % (c
.title
, c
.filename
))
1382 def create_devhelp2_condition_attribs(node
):
1383 if 'condition' in node
.attrib
:
1384 # condition -> since, deprecated, ... (separated with '|')
1385 cond
= node
.attrib
['condition'].replace('"', '"').split('|')
1389 keywords
.append('{}="{}"'.format(*c
.split(':', 1)))
1391 # deprecated can have no description
1392 keywords
.append('{}="{}"'.format(c
, ''))
1393 return ' ' + ' '.join(keywords
)
1398 def create_devhelp2_refsect2_keyword(node
, base_link
):
1399 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1400 node
.attrib
['role'], xml_get_title(node
), base_link
+ node
.attrib
['id'],
1401 create_devhelp2_condition_attribs(node
))
1404 def create_devhelp2_refsect3_keyword(node
, base_link
, title
, name
):
1405 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1406 node
.attrib
['role'], title
, base_link
+ name
,
1407 create_devhelp2_condition_attribs(node
))
1410 def create_devhelp2(out_dir
, module
, xml
, files
):
1411 with
open(os
.path
.join(out_dir
, module
+ '.devhelp2'), 'wt',
1412 newline
='\n', encoding
='utf-8') as idx
:
1413 bookinfo_nodes
= xml
.xpath('/book/bookinfo')
1415 if bookinfo_nodes
is not None:
1416 bookinfo
= bookinfo_nodes
[0]
1417 title
= bookinfo
.xpath('./title/text()')[0]
1418 online_url
= bookinfo
.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1419 # TODO: support author too (see devhelp2.xsl)
1420 # TODO: fixxref uses '--src-lang' to set the language
1422 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1423 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1425 """ % (title
, module
, online_url
)
1428 result
.extend(create_devhelp2_toc(files
[0].root
))
1429 result
.append(""" </chapters>
1432 # keywords from all refsect2 and refsect3
1433 refsect2
= etree
.XPath('//refsect2[@role]')
1434 refsect3_enum
= etree
.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1435 refsect3_enum_details
= etree
.XPath('entry[@role="enum_member_name"]/para')
1436 refsect3_struct
= etree
.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1437 refsect3_struct_details
= etree
.XPath('entry[@role="struct_member_name"]/para/structfield')
1439 base_link
= node
.filename
+ '#'
1440 refsect2_nodes
= refsect2(node
.xml
)
1441 for refsect2_node
in refsect2_nodes
:
1442 result
.append(create_devhelp2_refsect2_keyword(refsect2_node
, base_link
))
1443 refsect3_nodes
= refsect3_enum(refsect2_node
)
1444 for refsect3_node
in refsect3_nodes
:
1445 details_node
= refsect3_enum_details(refsect3_node
)[0]
1446 name
= details_node
.attrib
['id']
1447 result
.append(create_devhelp2_refsect3_keyword(refsect3_node
, base_link
, details_node
.text
, name
))
1448 refsect3_nodes
= refsect3_struct(refsect2_node
)
1449 for refsect3_node
in refsect3_nodes
:
1450 details_node
= refsect3_struct_details(refsect3_node
)[0]
1451 name
= details_node
.attrib
['id']
1452 result
.append(create_devhelp2_refsect3_keyword(refsect3_node
, base_link
, name
, name
))
1454 result
.append(""" </functions>
1461 def get_dirs(uninstalled
):
1463 # this does not work from buiddir!=srcdir
1464 gtkdocdir
= os
.path
.split(sys
.argv
[0])[0]
1465 if not os
.path
.exists(gtkdocdir
+ '/gtk-doc.xsl'):
1466 # try 'srcdir' (set from makefiles) too
1467 if os
.path
.exists(os
.environ
.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1468 gtkdocdir
= os
.environ
['ABS_TOP_SRCDIR']
1469 styledir
= gtkdocdir
+ '/style'
1471 gtkdocdir
= os
.path
.join(config
.datadir
, 'gtk-doc/data')
1472 styledir
= gtkdocdir
1473 return (gtkdocdir
, styledir
)
1476 def main(module
, index_file
, out_dir
, uninstalled
):
1477 tree
= etree
.parse(index_file
)
1480 (gtkdocdir
, styledir
) = get_dirs(uninstalled
)
1481 # copy navigation images and stylesheets to html directory ...
1482 css_file
= os
.path
.join(styledir
, 'style.css')
1483 for f
in glob(os
.path
.join(styledir
, '*.png')) + [css_file
]:
1484 shutil
.copy(f
, out_dir
)
1485 css_file
= os
.path
.join(out_dir
, 'style.css')
1486 with
open(css_file
, 'at', newline
='\n', encoding
='utf-8') as css
:
1487 css
.write(HTML_FORMATTER
.get_style_defs())
1489 # TODO: migrate options from fixxref
1490 # TODO: do in parallel with loading the xml above.
1491 fixxref
.LoadIndicies(out_dir
, '/usr/share/gtk-doc/html', [])
1493 # We do multiple passes:
1494 # 1) recursively walk the tree and chunk it into a python tree so that we
1495 # can generate navigation and link tags.
1496 files
= chunk(tree
.getroot())
1497 files
= list(PreOrderIter(files
))
1498 # 2) extract tables:
1499 # TODO: use multiprocessing
1500 # - find all 'id' attribs and add them to the link map
1501 add_id_links(files
, fixxref
.Links
)
1502 # - build glossary dict
1503 build_glossary(files
)
1505 # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1506 create_devhelp2(out_dir
, module
, tree
.getroot(), files
)
1507 # 4) iterate the tree and output files
1508 # TODO: use multiprocessing
1510 convert(out_dir
, module
, files
, node
)
1514 logging
.info('options: %s', str(options
.__dict
__))
1515 module
= options
.args
[0]
1516 document
= options
.args
[1]
1518 # TODO: rename to 'html' later on
1519 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1520 # outputs into the working directory
1521 out_dir
= os
.path
.join(os
.path
.dirname(document
), 'db2html')
1524 except OSError as e
:
1525 if e
.errno
!= errno
.EEXIST
:
1528 sys
.exit(main(module
, document
, out_dir
, options
.uninstalled
))