mkhtml2: handle label attrs for titles
[gtk-doc.git] / gtkdoc / mkhtml2.py
blobe98426aa1b8a9e4c67cb1a6bec4f7f6a648d0c25
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - handle the 'xref' tag
43 - this needs the title + the type of the target
44 - for the title, see add_id_links_and_titles(), we can also store the tag
45 in another map
46 - check each docbook tag if it can contain #PCDATA, if not don't check for
47 xml.text
48 - consider some perf-warnings flag
49 - see 'No "id" attribute on'
50 - find a better way to print context for warnings
51 - we use 'xml.sourceline', but this all does not help a lot due to xi:include
53 DIFFERENCES:
54 - titles
55 - we add the chunk label to both title in toc and tile on the page
56 - docbook xsl only sometimes adds the label to the titles and when it does it
57 adds name chunk type too (e.g. 'Part I.' instead of 'I.')
58 - navigation
59 - we always add an up-link except on the first page
61 OPTIONAL:
62 - minify html: https://pypi.python.org/pypi/htmlmin/
64 Requirements:
65 sudo pip3 install anytree lxml pygments
67 Example invocation:
68 cd tests/bugs/docs/
69 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
70 xdg-open db2html/index.html
71 meld html db2html
73 Benchmarking:
74 cd tests/bugs/docs/;
75 rm html-build.stamp; time make html-build.stamp
76 """
78 import argparse
79 import errno
80 import logging
81 import os
82 import shutil
83 import sys
85 from anytree import Node, PreOrderIter
86 from copy import deepcopy
87 from glob import glob
88 from lxml import etree
89 from pygments import highlight
90 from pygments.lexers import CLexer
91 from pygments.formatters import HtmlFormatter
92 from timeit import default_timer as timer
94 from . import config, fixxref
96 # pygments setup
97 # lazily constructed lexer cache
98 LEXERS = {
99 'c': CLexer()
101 HTML_FORMATTER = HtmlFormatter(nowrap=True)
104 class ChunkParams(object):
105 def __init__(self, prefix, parent=None, min_idx=0):
106 self.prefix = prefix
107 self.parent = parent
108 self.min_idx = min_idx
109 self.idx = 1
112 DONT_CHUNK = float('inf')
113 # docbook-xsl defines the chunk tags here.
114 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
115 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
116 # If not defined, we can just create an example without an 'id' attr and see
117 # docbook xsl does.
119 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
120 # TODO: this list has also a flag that controls wheter we add the
121 # 'Table of Contents' heading in convert_chunk_with_toc()
122 CHUNK_PARAMS = {
123 'appendix': ChunkParams('app', 'book'),
124 'book': ChunkParams('bk'),
125 'chapter': ChunkParams('ch', 'book'),
126 'glossary': ChunkParams('go', 'book'),
127 'index': ChunkParams('ix', 'book'),
128 'part': ChunkParams('pt', 'book'),
129 'preface': ChunkParams('pr', 'book'),
130 'refentry': ChunkParams('re', 'book'),
131 'reference': ChunkParams('rn', 'book'),
132 'sect1': ChunkParams('s', 'chapter', 1),
133 'section': ChunkParams('s', 'chapter', 1),
134 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
135 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
136 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
137 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
139 # TAGS we don't support:
140 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
142 TITLE_XPATHS = {
143 '_': (etree.XPath('./title'), None),
144 'book': (etree.XPath('./bookinfo/title'), None),
145 'refentry': (
146 etree.XPath('./refmeta/refentrytitle'),
147 etree.XPath('./refnamediv/refpurpose')
151 ID_XPATH = etree.XPath('//*[@id]')
153 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
154 glossary = {}
156 footnote_idx = 1
158 titles = {}
161 def gen_chunk_name(node, chunk_params):
162 """Generate a chunk file name
164 This is either based on the id or on the position in the doc. In the latter
165 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
166 type.
168 if 'id' in node.attrib:
169 return node.attrib['id']
171 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
172 chunk_params.idx += 1
174 # handle parents to make names of nested tags like in docbook
175 # - we only need to prepend the parent if there are > 1 of them in the
176 # xml. None, the parents we have are not sufficient, e.g. 'index' can
177 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
178 # when we chunk explicitly and on each level maintain the 'idx'
179 # while chunk_params.parent:
180 # parent = chunk_params.parent
181 # if parent not in CHUNK_PARAMS:
182 # break;
183 # chunk_params = CHUNK_PARAMS[parent]
184 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
186 logging.info('Gen chunk name: "%s"', name)
187 return name
190 def get_chunk_titles(module, node):
191 tag = node.tag
192 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
194 ctx = {
195 'module': module,
196 'files': [],
198 result = {
199 'title': None,
200 'title_tag': None,
201 'subtitle': None,
202 'subtitle_tag': None
204 res = title(node)
205 if res:
206 # handle chunk label for tocs
207 label = node.attrib.get('label')
208 if label:
209 label += '. '
210 else:
211 label = ''
213 xml = res[0]
214 result['title'] = label + ''.join(convert_title(ctx, xml))
215 if xml.tag != 'title':
216 result['title_tag'] = xml.tag
217 else:
218 result['title_tag'] = tag
220 if subtitle:
221 res = subtitle(node)
222 if res:
223 xml = res[0]
224 result['subtitle'] = ''.join(convert_title(ctx, xml))
225 result['subtitle_tag'] = xml.tag
226 return result
229 def chunk(xml_node, module, depth=0, idx=0, parent=None):
230 """Chunk the tree.
232 The first time, we're called with parent=None and in that case we return
233 the new_node as the root of the tree. For each tree-node we generate a
234 filename and process the children.
236 tag = xml_node.tag
237 chunk_params = CHUNK_PARAMS.get(tag)
238 if chunk_params:
239 title_args = get_chunk_titles(module, xml_node)
240 chunk_name = gen_chunk_name(xml_node, chunk_params)
242 # check idx to handle 'sect1'/'section' special casing and title-only
243 # segments
244 if idx >= chunk_params.min_idx:
245 logging.info('chunk tag: "%s"[%d]', tag, idx)
246 if parent:
247 # remove the xml-node from the parent
248 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
249 xml_node.getparent().remove(xml_node)
250 xml_node = sub_tree
252 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
253 idx=idx,
254 filename=chunk_name + '.html', anchor=None,
255 **title_args)
256 else:
257 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
258 idx=idx,
259 filename=parent.filename, anchor='#' + chunk_name,
260 **title_args)
262 depth += 1
263 idx = 0
264 for child in xml_node:
265 chunk(child, module, depth, idx, parent)
266 if child.tag in CHUNK_PARAMS:
267 idx += 1
269 return parent
272 def add_id_links_and_titles(files, links):
273 for node in files:
274 chunk_name = node.filename[:-5]
275 chunk_base = node.filename + '#'
276 for elem in ID_XPATH(node.xml):
277 attr = elem.attrib['id']
278 if attr == chunk_name:
279 links[attr] = node.filename
280 else:
281 links[attr] = chunk_base + attr
283 title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
284 res = title(elem)
285 if res:
286 # we need the plain text content
287 titles[attr] = etree.tostring(res[0], method="text",
288 encoding=str).strip()
291 def build_glossary(files):
292 for node in files:
293 if node.xml.tag != 'glossary':
294 continue
295 for term in GLOSSENTRY_XPATH(node.xml):
296 # TODO: there can be all kind of things in a glossary. This only supports
297 # what we commonly use
298 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
299 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
300 glossary[key] = value
301 # logging.debug('glosentry: %s:%s', key, value)
304 # conversion helpers
307 def convert_inner(ctx, xml, result):
308 for child in xml:
309 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
312 def convert_ignore(ctx, xml):
313 result = []
314 convert_inner(ctx, xml, result)
315 return result
318 def convert_skip(ctx, xml):
319 return []
322 def append_text(text, result):
323 if text and text.strip():
324 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
327 missing_tags = {}
330 def convert__unknown(ctx, xml):
331 # don't recurse on subchunks
332 if xml.tag in CHUNK_PARAMS:
333 return []
334 if isinstance(xml, etree._Comment):
335 return ['<!-- ' + xml.text + '-->\n']
336 else:
337 # warn only once
338 if xml.tag not in missing_tags:
339 logging.warning('Add tag converter for "%s"', xml.tag)
340 missing_tags[xml.tag] = True
341 result = ['<!-- ' + xml.tag + '-->\n']
342 convert_inner(ctx, xml, result)
343 result.append('<!-- /' + xml.tag + '-->\n')
344 return result
347 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
348 result = ['<div class="%s">\n' % xml.tag]
349 title = xml.find('title')
350 if title is not None:
351 if 'id' in xml.attrib:
352 result.append('<a name="%s"></a>' % xml.attrib['id'])
353 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
354 append_text(xml.text, result)
355 inner_func(ctx, xml, result)
356 result.append('</div>')
357 append_text(xml.tail, result)
358 return result
361 def xml_get_title(ctx, xml):
362 title_tag = xml.find('title')
363 if title_tag is not None:
364 return ''.join(convert_title(ctx, title_tag))
365 else:
366 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
367 return ''
370 # docbook tags
373 def convert_abstract(ctx, xml):
374 result = ["""<div class="abstract">
375 <p class="title"><b>Abstract</b></p>"""]
376 append_text(xml.text, result)
377 convert_inner(ctx, xml, result)
378 result.append('</div>')
379 append_text(xml.tail, result)
380 return result
383 def convert_acronym(ctx, xml):
384 key = xml.text
385 title = glossary.get(key, '')
386 # TODO: print a sensible warning if missing
387 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
388 if xml.tail:
389 result.append(xml.tail)
390 return result
393 def convert_anchor(ctx, xml):
394 return ['<a name="%s"></a>' % xml.attrib['id']]
397 def convert_bookinfo(ctx, xml):
398 result = ['<div class="titlepage">']
399 convert_inner(ctx, xml, result)
400 result.append("""<hr>
401 </div>""")
402 if xml.tail:
403 result.append(xml.tail)
404 return result
407 def convert_blockquote(ctx, xml):
408 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
409 append_text(xml.text, result)
410 convert_inner(ctx, xml, result)
411 result.append('</blockquote>\n</div>')
412 append_text(xml.tail, result)
413 return result
416 def convert_code(ctx, xml):
417 result = ['<code class="%s">' % xml.tag]
418 append_text(xml.text, result)
419 convert_inner(ctx, xml, result)
420 result.append('</code>')
421 append_text(xml.tail, result)
422 return result
425 def convert_colspec(ctx, xml):
426 result = ['<col']
427 a = xml.attrib
428 if 'colname' in a:
429 result.append(' class="%s"' % a['colname'])
430 if 'colwidth' in a:
431 result.append(' width="%s"' % a['colwidth'])
432 result.append('>\n')
433 # is in tgroup and there can be no 'text'
434 return result
437 def convert_command(ctx, xml):
438 result = ['<strong class="userinput"><code>']
439 append_text(xml.text, result)
440 convert_inner(ctx, xml, result)
441 result.append('</code></strong>')
442 append_text(xml.tail, result)
443 return result
446 def convert_corpauthor(ctx, xml):
447 result = ['<div><h3 class="corpauthor">\n']
448 append_text(xml.text, result)
449 convert_inner(ctx, xml, result)
450 result.append('</h3></div>\n')
451 append_text(xml.tail, result)
452 return result
455 def convert_div(ctx, xml):
456 result = ['<div class="%s">\n' % xml.tag]
457 append_text(xml.text, result)
458 convert_inner(ctx, xml, result)
459 result.append('</div>')
460 append_text(xml.tail, result)
461 return result
464 def convert_emphasis(ctx, xml):
465 result = ['<span class="emphasis"><em>']
466 append_text(xml.text, result)
467 convert_inner(ctx, xml, result)
468 result.append('</em></span>')
469 append_text(xml.tail, result)
470 return result
473 def convert_em_class(ctx, xml):
474 result = ['<em class="%s"><code>' % xml.tag]
475 append_text(xml.text, result)
476 convert_inner(ctx, xml, result)
477 result.append('</code></em>')
478 append_text(xml.tail, result)
479 return result
482 def convert_entry(ctx, xml):
483 entry_type = ctx['table.entry']
484 result = ['<' + entry_type]
485 if 'role' in xml.attrib:
486 result.append(' class="%s"' % xml.attrib['role'])
487 if 'morerows' in xml.attrib:
488 result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
489 result.append('>')
490 append_text(xml.text, result)
491 convert_inner(ctx, xml, result)
492 result.append('</' + entry_type + '>')
493 append_text(xml.tail, result)
494 return result
497 def convert_footnote(ctx, xml):
498 footnotes = ctx.get('footnotes', [])
499 # footnotes idx is not per page, but per doc
500 global footnote_idx
501 idx = footnote_idx
502 footnote_idx += 1
504 # need a pair of ids for each footnote (docbook generates different ids)
505 this_id = 'footnote-%d' % idx
506 that_id = 'ftn.' + this_id
508 inner = ['<div id="%s" class="footnote">' % that_id]
509 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
510 this_id, idx))
511 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
512 # get double nested paras :/.
513 # convert_inner(ctx, xml, inner)
514 para = xml.find('para')
515 if para is None:
516 para = xml.find('simpara')
517 if para is not None:
518 inner.append(para.text)
519 else:
520 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
521 etree.tostring(xml, method="text", encoding=str).strip())
522 inner.append('</p></div>')
523 footnotes.append(inner)
524 ctx['footnotes'] = footnotes
525 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
526 that_id, this_id, idx)]
529 def convert_formalpara(ctx, xml):
530 result = None
531 title_tag = xml.find('title')
532 result = ['<p><b>%s</b>' % title_tag.text]
533 para_tag = xml.find('para')
534 append_text(para_tag.text, result)
535 convert_inner(ctx, para_tag, result)
536 append_text(para_tag.tail, result)
537 result.append('</p>')
538 append_text(xml.tail, result)
539 return result
542 def convert_glossdef(ctx, xml):
543 result = ['<dd class="glossdef">']
544 convert_inner(ctx, xml, result)
545 result.append('</dd>\n')
546 return result
549 def convert_glossdiv(ctx, xml):
550 title_tag = xml.find('title')
551 title = title_tag.text
552 xml.remove(title_tag)
553 result = [
554 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
556 convert_inner(ctx, xml, result)
557 return result
560 def convert_glossentry(ctx, xml):
561 result = []
562 convert_inner(ctx, xml, result)
563 return result
566 def convert_glossterm(ctx, xml):
567 glossid = ''
568 text = ''
569 anchor = xml.find('anchor')
570 if anchor is not None:
571 glossid = anchor.attrib.get('id', '')
572 text += anchor.tail or ''
573 text += xml.text or ''
574 if glossid == '':
575 glossid = 'glossterm-' + text
576 return [
577 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
578 glossid, text)
582 def convert_imageobject(ctx, xml):
583 imagedata = xml.find('imagedata')
584 if imagedata is not None:
585 # TODO(ensonic): warn on missing fileref attr?
586 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
587 else:
588 return []
591 def convert_indexdiv(ctx, xml):
592 title_tag = xml.find('title')
593 title = title_tag.text
594 xml.remove(title_tag)
595 result = [
596 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
598 convert_inner(ctx, xml, result)
599 return result
602 def convert_informaltable(ctx, xml):
603 result = ['<div class="informaltable"><table class="informaltable"']
604 a = xml.attrib
605 if 'pgwide' in a and a['pgwide'] == '1':
606 result.append(' width="100%"')
607 if 'frame' in a and a['frame'] == 'none':
608 result.append(' border="0"')
609 result.append('>\n')
610 convert_inner(ctx, xml, result)
611 result.append('</table></div>')
612 if xml.tail:
613 result.append(xml.tail)
614 return result
617 def convert_inlinegraphic(ctx, xml):
618 # TODO(ensonic): warn on missing fileref attr?
619 return ['<img src="%s">' % xml.attrib.get('fileref', '')]
622 def convert_itemizedlist(ctx, xml):
623 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
624 convert_inner(ctx, xml, result)
625 result.append('</ul></div>')
626 if xml.tail:
627 result.append(xml.tail)
628 return result
631 def convert_link(ctx, xml):
632 linkend = xml.attrib['linkend']
633 if linkend in fixxref.NoLinks:
634 linkend = None
635 result = []
636 if linkend:
637 link_text = []
638 append_text(xml.text, link_text)
639 convert_inner(ctx, xml, link_text)
640 text = ''.join(link_text)
642 (tid, href) = fixxref.GetXRef(linkend)
643 if href:
644 title_attr = ''
645 title = titles.get(tid)
646 if title:
647 title_attr = ' title="%s"' % title
649 href = fixxref.MakeRelativeXRef(ctx['module'], href)
650 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
651 else:
652 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
653 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
654 result = [text]
655 else:
656 append_text(xml.text, result)
657 convert_inner(ctx, xml, result)
658 append_text(xml.tail, result)
659 return result
662 def convert_listitem(ctx, xml):
663 result = ['<li class="listitem">']
664 convert_inner(ctx, xml, result)
665 result.append('</li>')
666 # is in itemizedlist and there can be no 'text'
667 return result
670 def convert_literallayout(ctx, xml):
671 result = ['<div class="literallayout"><p><br>\n']
672 append_text(xml.text, result)
673 convert_inner(ctx, xml, result)
674 result.append('</p></div>')
675 append_text(xml.tail, result)
676 return result
679 def convert_orderedlist(ctx, xml):
680 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
681 convert_inner(ctx, xml, result)
682 result.append('</ol></div>')
683 append_text(xml.tail, result)
684 return result
687 def convert_para(ctx, xml):
688 result = []
689 if 'id' in xml.attrib:
690 result.append('<a name="%s"></a>' % xml.attrib['id'])
691 if 'role' in xml.attrib:
692 result.append('<p class="%s">' % xml.attrib['role'])
693 else:
694 result.append('<p>')
695 append_text(xml.text, result)
696 convert_inner(ctx, xml, result)
697 result.append('</p>')
698 append_text(xml.tail, result)
699 return result
702 def convert_para_like(ctx, xml):
703 result = []
704 if 'id' in xml.attrib:
705 result.append('<a name="%s"></a>' % xml.attrib['id'])
706 result.append('<p class="%s">' % xml.tag)
707 append_text(xml.text, result)
708 convert_inner(ctx, xml, result)
709 result.append('</p>')
710 append_text(xml.tail, result)
711 return result
714 def convert_phrase(ctx, xml):
715 result = ['<span']
716 if 'role' in xml.attrib:
717 result.append(' class="%s">' % xml.attrib['role'])
718 else:
719 result.append('>')
720 append_text(xml.text, result)
721 convert_inner(ctx, xml, result)
722 result.append('</span>')
723 append_text(xml.tail, result)
724 return result
727 def convert_primaryie(ctx, xml):
728 result = ['<dt>\n']
729 convert_inner(ctx, xml, result)
730 result.append('\n</dt>\n<dd></dd>\n')
731 return result
734 def convert_pre(ctx, xml):
735 result = ['<pre class="%s">\n' % xml.tag]
736 append_text(xml.text, result)
737 convert_inner(ctx, xml, result)
738 result.append('</pre>')
739 append_text(xml.tail, result)
740 return result
743 def convert_programlisting(ctx, xml):
744 result = []
745 if xml.attrib.get('role', '') == 'example':
746 if xml.text:
747 lang = xml.attrib.get('language', 'c').lower()
748 if lang not in LEXERS:
749 LEXERS[lang] = get_lexer_by_name(lang)
750 lexer = LEXERS.get(lang, None)
751 if lexer:
752 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
754 # we do own line-numbering
755 line_count = highlighted.count('\n')
756 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
757 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
758 <tbody>
759 <tr>
760 <td class="listing_lines" align="right"><pre>%s</pre></td>
761 <td class="listing_code"><pre class="programlisting">%s</pre></td>
762 </tr>
763 </tbody>
764 </table>
765 """ % (source_lines, highlighted))
766 else:
767 logging.warn('No pygments lexer for language="%s"', lang)
768 result.append('<pre class="programlisting">')
769 result.append(xml.text)
770 result.append('</pre>')
771 else:
772 result.append('<pre class="programlisting">')
773 append_text(xml.text, result)
774 convert_inner(ctx, xml, result)
775 result.append('</pre>')
776 append_text(xml.tail, result)
777 return result
780 def convert_quote(ctx, xml):
781 result = ['<span class="quote">"<span class="quote">']
782 append_text(xml.text, result)
783 convert_inner(ctx, xml, result)
784 result.append('</span>"</span>')
785 append_text(xml.tail, result)
786 return result
789 def convert_refsect1(ctx, xml):
790 # Add a divider between two consequitive refsect2
791 def convert_inner(ctx, xml, result):
792 prev = None
793 for child in xml:
794 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
795 result.append('<hr>\n')
796 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
797 prev = child
798 return convert_sect(ctx, xml, 'h2', convert_inner)
801 def convert_refsect2(ctx, xml):
802 return convert_sect(ctx, xml, 'h3')
805 def convert_refsect3(ctx, xml):
806 return convert_sect(ctx, xml, 'h4')
809 def convert_row(ctx, xml):
810 result = ['<tr>\n']
811 convert_inner(ctx, xml, result)
812 result.append('</tr>\n')
813 return result
816 def convert_sect1_tag(ctx, xml):
817 return convert_sect(ctx, xml, 'h2')
820 def convert_sect2(ctx, xml):
821 return convert_sect(ctx, xml, 'h3')
824 def convert_sect3(ctx, xml):
825 return convert_sect(ctx, xml, 'h4')
828 def convert_simpara(ctx, xml):
829 result = ['<p>']
830 append_text(xml.text, result)
831 result.append('</p>')
832 append_text(xml.tail, result)
833 return result
836 def convert_span(ctx, xml):
837 result = ['<span class="%s">' % xml.tag]
838 append_text(xml.text, result)
839 convert_inner(ctx, xml, result)
840 result.append('</span>')
841 append_text(xml.tail, result)
842 return result
845 def convert_table(ctx, xml):
846 result = ['<div class="table">']
847 if 'id' in xml.attrib:
848 result.append('<a name="%s"></a>' % xml.attrib['id'])
849 title_tag = xml.find('title')
850 if title_tag is not None:
851 result.append('<p class="title"><b>')
852 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
853 result.extend(convert_title(ctx, title_tag))
854 result.append('</b></p>')
855 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
857 convert_inner(ctx, xml, result)
859 result.append('</table></div></div>')
860 append_text(xml.tail, result)
861 return result
864 def convert_tbody(ctx, xml):
865 result = ['<tbody>']
866 ctx['table.entry'] = 'td'
867 convert_inner(ctx, xml, result)
868 result.append('</tbody>')
869 # is in tgroup and there can be no 'text'
870 return result
873 def convert_tgroup(ctx, xml):
874 # tgroup does not expand to anything, but the nested colspecs need to
875 # be put into a colgroup
876 cols = xml.findall('colspec')
877 result = []
878 if cols:
879 result.append('<colgroup>\n')
880 for col in cols:
881 result.extend(convert_colspec(ctx, col))
882 xml.remove(col)
883 result.append('</colgroup>\n')
884 convert_inner(ctx, xml, result)
885 # is in informaltable and there can be no 'text'
886 return result
889 def convert_thead(ctx, xml):
890 result = ['<thead>']
891 ctx['table.entry'] = 'th'
892 convert_inner(ctx, xml, result)
893 result.append('</thead>')
894 # is in tgroup and there can be no 'text'
895 return result
898 def convert_title(ctx, xml):
899 # This is always explicitly called from some context
900 result = []
901 append_text(xml.text, result)
902 convert_inner(ctx, xml, result)
903 append_text(xml.tail, result)
904 return result
907 def convert_ulink(ctx, xml):
908 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
909 append_text(xml.tail, result)
910 return result
913 def convert_userinput(ctx, xml):
914 result = ['<span class="command"><strong>']
915 append_text(xml.text, result)
916 convert_inner(ctx, xml, result)
917 result.append('</strong></span>')
918 append_text(xml.tail, result)
919 return result
922 def convert_variablelist(ctx, xml):
923 result = ["""<div class="variablelist"><table border="0" class="variablelist">
924 <colgroup>
925 <col align="left" valign="top">
926 <col>
927 </colgroup>
928 <tbody>"""]
929 convert_inner(ctx, xml, result)
930 result.append("""</tbody>
931 </table></div>""")
932 return result
935 def convert_varlistentry(ctx, xml):
936 result = ['<tr>']
938 result.append('<td><p>')
939 term = xml.find('term')
940 result.extend(convert_span(ctx, term))
941 result.append('</p></td>')
943 result.append('<td>')
944 listitem = xml.find('listitem')
945 convert_inner(ctx, listitem, result)
946 result.append('</td>')
948 result.append('<tr>')
949 return result
952 # TODO(ensonic): turn into class with converters as functions and ctx as self
953 convert_tags = {
954 'abstract': convert_abstract,
955 'acronym': convert_acronym,
956 'anchor': convert_anchor,
957 'application': convert_span,
958 'bookinfo': convert_bookinfo,
959 'blockquote': convert_blockquote,
960 'caption': convert_div,
961 'code': convert_code,
962 'colspec': convert_colspec,
963 'constant': convert_code,
964 'command': convert_command,
965 'corpauthor': convert_corpauthor,
966 'emphasis': convert_emphasis,
967 'entry': convert_entry,
968 'envar': convert_code,
969 'footnote': convert_footnote,
970 'filename': convert_code,
971 'formalpara': convert_formalpara,
972 'function': convert_code,
973 'glossdef': convert_glossdef,
974 'glossdiv': convert_glossdiv,
975 'glossentry': convert_glossentry,
976 'glossterm': convert_glossterm,
977 'imageobject': convert_imageobject,
978 'indexdiv': convert_indexdiv,
979 'indexentry': convert_ignore,
980 'indexterm': convert_skip,
981 'informalexample': convert_div,
982 'informaltable': convert_informaltable,
983 'inlinegraphic': convert_inlinegraphic,
984 'inlinemediaobject': convert_span,
985 'itemizedlist': convert_itemizedlist,
986 'legalnotice': convert_div,
987 'link': convert_link,
988 'listitem': convert_listitem,
989 'literal': convert_code,
990 'literallayout': convert_literallayout,
991 'mediaobject': convert_div,
992 'note': convert_div,
993 'option': convert_code,
994 'orderedlist': convert_orderedlist,
995 'para': convert_para,
996 'partintro': convert_div,
997 'parameter': convert_em_class,
998 'phrase': convert_phrase,
999 'primaryie': convert_primaryie,
1000 'programlisting': convert_programlisting,
1001 'quote': convert_quote,
1002 'releaseinfo': convert_para_like,
1003 'refsect1': convert_refsect1,
1004 'refsect2': convert_refsect2,
1005 'refsect3': convert_refsect3,
1006 'replaceable': convert_em_class,
1007 'returnvalue': convert_span,
1008 'row': convert_row,
1009 'screen': convert_pre,
1010 'sect1': convert_sect1_tag,
1011 'sect2': convert_sect2,
1012 'sect3': convert_sect3,
1013 'simpara': convert_simpara,
1014 'structfield': convert_em_class,
1015 'structname': convert_span,
1016 'synopsis': convert_pre,
1017 'symbol': convert_span,
1018 'table': convert_table,
1019 'tbody': convert_tbody,
1020 'term': convert_span,
1021 'tgroup': convert_tgroup,
1022 'thead': convert_thead,
1023 'title': convert_skip,
1024 'type': convert_span,
1025 'ulink': convert_ulink,
1026 'userinput': convert_userinput,
1027 'varname': convert_code,
1028 'variablelist': convert_variablelist,
1029 'varlistentry': convert_varlistentry,
1030 'warning': convert_div,
1033 # conversion helpers
1035 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1036 <html>
1037 <head>
1038 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1039 <title>%s</title>
1040 %s<link rel="stylesheet" href="style.css" type="text/css">
1041 </head>
1042 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1046 def generate_head_links(ctx):
1047 n = ctx['nav_home']
1048 result = [
1049 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1051 if 'nav_up' in ctx:
1052 n = ctx['nav_up']
1053 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1054 if 'nav_prev' in ctx:
1055 n = ctx['nav_prev']
1056 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1057 if 'nav_next' in ctx:
1058 n = ctx['nav_next']
1059 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1060 return ''.join(result)
1063 def generate_nav_links(ctx):
1064 n = ctx['nav_home']
1065 result = [
1066 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1068 if 'nav_up' in ctx:
1069 n = ctx['nav_up']
1070 result.append(
1071 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1072 else:
1073 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1074 if 'nav_prev' in ctx:
1075 n = ctx['nav_prev']
1076 result.append(
1077 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1078 else:
1079 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1080 if 'nav_next' in ctx:
1081 n = ctx['nav_next']
1082 result.append(
1083 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1084 else:
1085 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1087 return ''.join(result)
1090 def generate_toc(ctx, node):
1091 result = []
1092 for c in node.children:
1093 # TODO: urlencode the filename: urllib.parse.quote_plus()
1094 link = c.filename
1095 if c.anchor:
1096 link += c.anchor
1097 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1098 c.title_tag, link, c.title))
1099 if c.subtitle:
1100 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1101 result.append('</dt>\n')
1102 if c.children:
1103 result.append('<dd><dl>')
1104 result.extend(generate_toc(ctx, c))
1105 result.append('</dl></dd>')
1106 return result
1109 def generate_basic_nav(ctx):
1110 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1111 <tr valign="middle">
1112 <td width="100%%" align="left" class="shortcuts"></td>
1114 </tr>
1115 </table>
1116 """ % generate_nav_links(ctx)
1119 def generate_alpha_nav(ctx, divs, prefix, span_id):
1120 ix_nav = []
1121 for s in divs:
1122 title = xml_get_title(ctx, s)
1123 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1125 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1126 <tr valign="middle">
1127 <td width="100%%" align="left" class="shortcuts">
1128 <span id="nav_%s">
1130 </span>
1131 </td>
1133 </tr>
1134 </table>
1135 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1138 def generate_refentry_nav(ctx, refsect1s, result):
1139 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1140 <tr valign="middle">
1141 <td width="100%" align="left" class="shortcuts">
1142 <a href="#" class="shortcut">Top</a>""")
1144 for s in refsect1s:
1145 # don't list TOC sections (role="xxx_proto")
1146 if s.attrib.get('role', '').endswith("_proto"):
1147 continue
1148 # skip section without 'id' attrs
1149 if 'id' not in s.attrib:
1150 continue
1152 ref_id = s.attrib['id']
1153 # skip foreign sections
1154 if '.' not in ref_id:
1155 continue
1157 title = xml_get_title(ctx, s)
1158 span_id = ref_id.split('.')[1].replace('-', '_')
1160 result.append("""
1161 <span id="nav_%s">
1162    <span class="dim">|</span> 
1163 <a href="#%s" class="shortcut">%s</a>
1164 </span>
1165 """ % (span_id, ref_id, title))
1166 result.append("""
1167 </td>
1169 </tr>
1170 </table>
1171 """ % generate_nav_links(ctx))
1174 def generate_footer(ctx):
1175 result = []
1176 if 'footnotes' in ctx:
1177 result.append("""<div class="footnotes">\n
1178 <br><hr style="width:100; text-align:left;margin-left: 0">
1179 """)
1180 for f in ctx['footnotes']:
1181 result.extend(f)
1182 result.append('</div>\n')
1183 return result
1186 def get_id_path(node):
1187 """ Generate the 'id'.
1188 We need to walk up the xml-tree and check the positions for each sibling.
1189 When reaching the top of the tree we collect remaining index entries from
1190 the chunked-tree.
1192 ix = []
1193 xml = node.xml
1194 parent = xml.getparent()
1195 while parent is not None:
1196 children = parent.getchildren()
1197 ix.insert(0, str(children.index(xml) + 1))
1198 xml = parent
1199 parent = xml.getparent()
1200 while node is not None:
1201 ix.insert(0, str(node.idx + 1))
1202 node = node.parent
1204 return ix
1207 def get_id(node):
1208 xml = node.xml
1209 node_id = xml.attrib.get('id', None)
1210 if node_id:
1211 return node_id
1213 # TODO: this is moot if nothing links to it, we could also consider to omit
1214 # the <a name="$id"></a> tag.
1215 logging.info('%d: No "id" attribute on "%s", generating one',
1216 xml.sourceline, xml.tag)
1217 ix = get_id_path(node)
1218 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1219 return 'id-' + '.'.join(ix)
1222 def convert_chunk_with_toc(ctx, div_class, title_tag):
1223 node = ctx['node']
1224 result = [
1225 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1226 generate_basic_nav(ctx),
1227 '<div class="%s">' % div_class,
1229 if node.title:
1230 result.append("""
1231 <div class="titlepage">
1232 <%s class="title"><a name="%s"></a>%s</%s>
1233 </div>""" % (
1234 title_tag, get_id(node), node.title, title_tag))
1236 toc = generate_toc(ctx, node)
1237 if toc:
1238 # TODO: not all docbook page types use this extra heading
1239 result.append("""<p><b>Table of Contents</b></p>
1240 <div class="toc">
1241 <dl class="toc">
1242 """)
1243 result.extend(toc)
1244 result.append("""</dl>
1245 </div>
1246 """)
1247 convert_inner(ctx, node.xml, result)
1248 result.extend(generate_footer(ctx))
1249 result.append("""</div>
1250 </body>
1251 </html>""")
1252 return result
1255 # docbook chunks
1258 def convert_book(ctx):
1259 node = ctx['node']
1260 result = [
1261 HTML_HEADER % (node.title, generate_head_links(ctx)),
1262 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1263 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1264 </table>
1265 <div class="book">
1266 """ % node.title
1268 bookinfo = node.xml.findall('bookinfo')[0]
1269 result.extend(convert_bookinfo(ctx, bookinfo))
1270 result.append("""<div class="toc">
1271 <dl class="toc">
1272 """)
1273 result.extend(generate_toc(ctx, node.root))
1274 result.append("""</dl>
1275 </div>
1276 """)
1277 result.extend(generate_footer(ctx))
1278 result.append("""</div>
1279 </body>
1280 </html>""")
1281 return result
1284 def convert_chapter(ctx):
1285 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1288 def convert_glossary(ctx):
1289 node = ctx['node']
1290 glossdivs = node.xml.findall('glossdiv')
1292 result = [
1293 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1294 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1295 """<div class="glossary">
1296 <div class="titlepage"><h%1d class="title">
1297 <a name="%s"></a>%s</h%1d>
1298 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1300 for i in glossdivs:
1301 result.extend(convert_glossdiv(ctx, i))
1302 result.extend(generate_footer(ctx))
1303 result.append("""</div>
1304 </body>
1305 </html>""")
1306 return result
1309 def convert_index(ctx):
1310 node = ctx['node']
1311 # Get all indexdivs under indexdiv
1312 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1314 result = [
1315 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1316 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1317 """<div class="index">
1318 <div class="titlepage"><h%1d class="title">
1319 <a name="%s"></a>%s</h%1d>
1320 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1322 for i in indexdivs:
1323 result.extend(convert_indexdiv(ctx, i))
1324 result.extend(generate_footer(ctx))
1325 result.append("""</div>
1326 </body>
1327 </html>""")
1328 return result
1331 def convert_part(ctx):
1332 return convert_chunk_with_toc(ctx, 'part', 'h1')
1335 def convert_preface(ctx):
1336 node = ctx['node']
1337 result = [
1338 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1339 generate_basic_nav(ctx),
1340 '<div class="preface">'
1342 if node.title:
1343 result.append("""
1344 <div class="titlepage">
1345 <h2 class="title"><a name="%s"></a>%s</h2>
1346 </div>""" % (get_id(node), node.title))
1347 convert_inner(ctx, node.xml, result)
1348 result.extend(generate_footer(ctx))
1349 result.append("""</div>
1350 </body>
1351 </html>""")
1352 return result
1355 def convert_reference(ctx):
1356 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1359 def convert_refentry(ctx):
1360 node = ctx['node']
1361 node_id = get_id(node)
1362 refsect1s = node.xml.findall('refsect1')
1364 gallery = ''
1365 refmeta = node.xml.find('refmeta')
1366 if refmeta is not None:
1367 refmiscinfo = refmeta.find('refmiscinfo')
1368 if refmiscinfo is not None:
1369 inlinegraphic = refmiscinfo.find('inlinegraphic')
1370 if inlinegraphic is not None:
1371 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1373 result = [
1374 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1376 generate_refentry_nav(ctx, refsect1s, result)
1377 result.append("""
1378 <div class="refentry">
1379 <a name="%s"></a>
1380 <div class="refnamediv">
1381 <table width="100%%"><tr>
1382 <td valign="top">
1383 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1384 <p>%s — %s</p>
1385 </td>
1386 <td class="gallery_image" valign="top" align="right">%s</td>
1387 </tr></table>
1388 </div>
1389 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1391 for s in refsect1s:
1392 result.extend(convert_refsect1(ctx, s))
1393 result.extend(generate_footer(ctx))
1394 result.append("""</div>
1395 </body>
1396 </html>""")
1397 return result
1400 def convert_sect1(ctx):
1401 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1404 # TODO(ensonic): turn into class with converters as functions and ctx as self
1405 convert_chunks = {
1406 'book': convert_book,
1407 'chapter': convert_chapter,
1408 'glossary': convert_glossary,
1409 'index': convert_index,
1410 'part': convert_part,
1411 'preface': convert_preface,
1412 'reference': convert_reference,
1413 'refentry': convert_refentry,
1414 'sect1': convert_sect1,
1418 def generate_nav_nodes(files, node):
1419 nav = {
1420 'nav_home': node.root,
1422 # nav params: up, prev, next
1423 if node.parent:
1424 nav['nav_up'] = node.parent
1425 ix = files.index(node)
1426 if ix > 0:
1427 nav['nav_prev'] = files[ix - 1]
1428 if ix < len(files) - 1:
1429 nav['nav_next'] = files[ix + 1]
1430 return nav
1433 def convert(out_dir, module, files, node):
1434 """Convert the docbook chunks to a html file.
1436 Args:
1437 out_dir: already created output dir
1438 files: list of nodes in the tree in pre-order
1439 node: current tree node
1442 logging.info('Writing: %s', node.filename)
1443 with open(os.path.join(out_dir, node.filename), 'wt',
1444 newline='\n', encoding='utf-8') as html:
1445 ctx = {
1446 'module': module,
1447 'files': files,
1448 'node': node,
1450 ctx.update(generate_nav_nodes(files, node))
1452 if node.name in convert_chunks:
1453 for line in convert_chunks[node.name](ctx):
1454 html.write(line)
1455 else:
1456 logging.warning('Add converter/template for "%s"', node.name)
1459 def create_devhelp2_toc(node):
1460 result = []
1461 for c in node.children:
1462 if c.children:
1463 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1464 result.extend(create_devhelp2_toc(c))
1465 result.append('</sub>\n')
1466 else:
1467 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1468 return result
1471 def create_devhelp2_condition_attribs(node):
1472 if 'condition' in node.attrib:
1473 # condition -> since, deprecated, ... (separated with '|')
1474 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1475 keywords = []
1476 for c in cond:
1477 if ':' in c:
1478 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1479 else:
1480 # deprecated can have no description
1481 keywords.append('{}="{}"'.format(c, ''))
1482 return ' ' + ' '.join(keywords)
1483 else:
1484 return ''
1487 def create_devhelp2_refsect2_keyword(node, base_link):
1488 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1489 node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1490 create_devhelp2_condition_attribs(node))
1493 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1494 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1495 node.attrib['role'], title, base_link + name,
1496 create_devhelp2_condition_attribs(node))
1499 def create_devhelp2(out_dir, module, xml, files):
1500 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1501 newline='\n', encoding='utf-8') as idx:
1502 bookinfo_nodes = xml.xpath('/book/bookinfo')
1503 title = ''
1504 if bookinfo_nodes is not None:
1505 bookinfo = bookinfo_nodes[0]
1506 title = bookinfo.xpath('./title/text()')[0]
1507 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1508 # TODO: support author too (see devhelp2.xsl)
1509 # TODO: fixxref uses '--src-lang' to set the language
1510 result = [
1511 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1512 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1513 <chapters>
1514 """ % (title, module, online_url)
1516 # toc
1517 result.extend(create_devhelp2_toc(files[0].root))
1518 result.append(""" </chapters>
1519 <functions>
1520 """)
1521 # keywords from all refsect2 and refsect3
1522 refsect2 = etree.XPath('//refsect2[@role]')
1523 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1524 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1525 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1526 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1527 for node in files:
1528 base_link = node.filename + '#'
1529 refsect2_nodes = refsect2(node.xml)
1530 for refsect2_node in refsect2_nodes:
1531 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1532 refsect3_nodes = refsect3_enum(refsect2_node)
1533 for refsect3_node in refsect3_nodes:
1534 details_node = refsect3_enum_details(refsect3_node)[0]
1535 name = details_node.attrib['id']
1536 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1537 refsect3_nodes = refsect3_struct(refsect2_node)
1538 for refsect3_node in refsect3_nodes:
1539 details_node = refsect3_struct_details(refsect3_node)[0]
1540 name = details_node.attrib['id']
1541 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1543 result.append(""" </functions>
1544 </book>
1545 """)
1546 for line in result:
1547 idx.write(line)
1550 def get_dirs(uninstalled):
1551 if uninstalled:
1552 # this does not work from buiddir!=srcdir
1553 gtkdocdir = os.path.split(sys.argv[0])[0]
1554 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1555 # try 'srcdir' (set from makefiles) too
1556 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1557 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1558 styledir = gtkdocdir + '/style'
1559 else:
1560 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1561 styledir = gtkdocdir
1562 return (gtkdocdir, styledir)
1565 def main(module, index_file, out_dir, uninstalled):
1567 # == Loading phase ==
1568 # the next 3 steps could be done in paralel
1570 # 1) load the docuemnt
1571 _t = timer()
1572 # does not seem to be faster
1573 # parser = etree.XMLParser(collect_ids=False)
1574 # tree = etree.parse(index_file, parser)
1575 tree = etree.parse(index_file)
1576 tree.xinclude()
1577 logging.warning("1: %7.3lf: load doc", timer() - _t)
1579 # 2) copy datafiles
1580 _t = timer()
1581 # TODO: handle additional images
1582 (gtkdocdir, styledir) = get_dirs(uninstalled)
1583 # copy navigation images and stylesheets to html directory ...
1584 css_file = os.path.join(styledir, 'style.css')
1585 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1586 shutil.copy(f, out_dir)
1587 css_file = os.path.join(out_dir, 'style.css')
1588 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1589 css.write(HTML_FORMATTER.get_style_defs())
1590 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1592 # 3) load xref targets
1593 _t = timer()
1594 # TODO: migrate options from fixxref
1595 # TODO: ideally explicity specify the files we need, this will save us the
1596 # globbing and we'll load less files.
1597 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1598 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1600 # == Processing phase ==
1602 # 4) recursively walk the tree and chunk it into a python tree so that we
1603 # can generate navigation and link tags.
1604 _t = timer()
1605 files = chunk(tree.getroot(), module)
1606 files = [f for f in PreOrderIter(files) if f.anchor is None]
1607 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1609 # 5) extract tables:
1610 _t = timer()
1611 # TODO: can be done in parallel
1612 # - find all 'id' attribs and add them to the link map
1613 # - .. get their titles and store them into the titles map
1614 add_id_links_and_titles(files, fixxref.Links)
1615 # - build glossary dict
1616 build_glossary(files)
1617 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1619 # == Output phase ==
1620 # the next two step could be done in parllel
1622 # 6) create a xxx.devhelp2 file
1623 _t = timer()
1624 create_devhelp2(out_dir, module, tree.getroot(), files)
1625 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1627 # 7) iterate the tree and output files
1628 _t = timer()
1629 # TODO: can be done in parallel, figure out why this is not faster
1630 # from multiprocessing.pool import Pool
1631 # with Pool(4) as p:
1632 # p.apply_async(convert, args=(out_dir, module, files))
1633 # from multiprocessing.pool import ThreadPool
1634 # with ThreadPool(4) as p:
1635 # p.apply_async(convert, args=(out_dir, module, files))
1636 for node in files:
1637 convert(out_dir, module, files, node)
1638 logging.warning("7: %7.3lf: create html", timer() - _t)
1641 def run(options):
1642 logging.info('options: %s', str(options.__dict__))
1643 module = options.args[0]
1644 document = options.args[1]
1646 # TODO: rename to 'html' later on
1647 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1648 # outputs into the working directory
1649 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1650 try:
1651 os.mkdir(out_dir)
1652 except OSError as e:
1653 if e.errno != errno.EEXIST:
1654 raise
1656 sys.exit(main(module, document, out_dir, options.uninstalled))