mkhtml2: simplify looking up title xpath expressions
[gtk-doc.git] / gtkdoc / mkhtml2.py
blobcbdcf82675b21b91971639bd555c639c635f80ba
1 #!/usr/bin/env python3
2 # -*- python; coding: utf-8 -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2018 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 """Generate html from docbook
24 The tool loads the main xml document (<module>-docs.xml) and chunks it
25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
26 Each chunk is converted to html using python functions.
28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
29 this tools will replace both without relying on external tools such as xsltproc
30 and source-highlight.
32 Please note, that we're not aiming for complete docbook-xml support. All tags
33 used in the generated xml are of course handled. More tags used in handwritten
34 xml can be easilly supported, but for some combinations of tags we prefer
35 simplicity.
37 TODO:
38 - tag converters:
39 - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
40 - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
41 attr on the <img> tag of the 'imageobject'
42 - handle 'label' attributes on part/chapter/section-types
43 - the titles will have a generated prefix, such as 'Part I:'
44 (locale dependent)
45 - in the toc it would only be the label: 'I.'
46 - 'link' adds a 'title' attr to 'a' if the target has a title.
47 - there is an implementation in convert_link() but it is slow
48 - we might need to collect titles as we chunk
49 - if we do this we'd need to keep iterating, but might be able to replace
50 add_id_links()
51 - handle the 'xref' tag
52 - this needs the title + the type of the target
53 - for the title, see convert_link()
54 - check each docbook tag if it can contain #PCDATA, if not don't check for
55 xml.text
56 - consider some perf-warnings flag
57 - see 'No "id" attribute on'
59 OPTIONAL:
60 - minify html: https://pypi.python.org/pypi/htmlmin/
62 Requirements:
63 sudo pip3 install anytree lxml pygments
65 Example invocation:
66 cd tests/bugs/docs/
67 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
68 xdg-open db2html/index.html
69 meld html db2html
71 Benchmarking:
72 cd tests/bugs/docs/;
73 rm html-build.stamp; time make html-build.stamp
74 """
76 import argparse
77 import errno
78 import logging
79 import os
80 import shutil
81 import sys
83 from anytree import Node, PreOrderIter
84 from copy import deepcopy
85 from glob import glob
86 from lxml import etree
87 from pygments import highlight
88 from pygments.lexers import CLexer
89 from pygments.formatters import HtmlFormatter
90 from timeit import default_timer as timer
92 from . import config, fixxref
94 # pygments setup
95 # lazily constructed lexer cache
96 LEXERS = {
97 'c': CLexer()
99 HTML_FORMATTER = HtmlFormatter(nowrap=True)
102 class ChunkParams(object):
103 def __init__(self, prefix, parent=None, min_idx=0):
104 self.prefix = prefix
105 self.parent = parent
106 self.min_idx = min_idx
107 self.idx = 1
110 DONT_CHUNK = float('inf')
111 # docbook-xsl defines the chunk tags here.
112 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
113 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
114 # If not defined, we can just create an example without an 'id' attr and see
115 # docbook xsl does.
117 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
118 # TODO: this list has also a flag that controls wheter we add the
119 # 'Table of Contents' heading in convert_chunk_with_toc()
120 CHUNK_PARAMS = {
121 'appendix': ChunkParams('app', 'book'),
122 'book': ChunkParams('bk'),
123 'chapter': ChunkParams('ch', 'book'),
124 'glossary': ChunkParams('go', 'book'),
125 'index': ChunkParams('ix', 'book'),
126 'part': ChunkParams('pt', 'book'),
127 'preface': ChunkParams('pr', 'book'),
128 'refentry': ChunkParams('re', 'book'),
129 'reference': ChunkParams('rn', 'book'),
130 'sect1': ChunkParams('s', 'chapter', 1),
131 'section': ChunkParams('s', 'chapter', 1),
132 'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
133 'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
134 'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
135 'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
137 # TAGS we don't support:
138 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
140 TITLE_XPATHS = {
141 '_': (etree.XPath('./title'), None),
142 'book': (etree.XPath('./bookinfo/title'), None),
143 'refentry': (
144 etree.XPath('./refmeta/refentrytitle'),
145 etree.XPath('./refnamediv/refpurpose')
149 ID_XPATH = etree.XPath('//@id')
151 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
152 glossary = {}
154 footnote_idx = 1
157 def gen_chunk_name(node, chunk_params):
158 """Generate a chunk file name
160 This is either based on the id or on the position in the doc. In the latter
161 case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
162 type.
164 if 'id' in node.attrib:
165 return node.attrib['id']
167 name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
168 chunk_params.idx += 1
170 # handle parents to make names of nested tags like in docbook
171 # - we only need to prepend the parent if there are > 1 of them in the
172 # xml. None, the parents we have are not sufficient, e.g. 'index' can
173 # be in 'book' or 'part' or ... Maybe we can track the chunk_parents
174 # when we chunk explicitly and on each level maintain the 'idx'
175 # while chunk_params.parent:
176 # parent = chunk_params.parent
177 # if parent not in CHUNK_PARAMS:
178 # break;
179 # chunk_params = CHUNK_PARAMS[parent]
180 # name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
182 logging.info('Gen chunk name: "%s"', name)
183 return name
186 def get_chunk_titles(module, node):
187 tag = node.tag
188 (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
190 ctx = {
191 'module': module,
192 'files': [],
194 result = {
195 'title': None,
196 'title_tag': None,
197 'subtitle': None,
198 'subtitle_tag': None
200 res = title(node)
201 if res:
202 xml = res[0]
203 result['title'] = ''.join(convert_title(ctx, xml))
204 if xml.tag != 'title':
205 result['title_tag'] = xml.tag
206 else:
207 result['title_tag'] = tag
209 if subtitle:
210 res = subtitle(node)
211 if res:
212 xml = res[0]
213 result['subtitle'] = ''.join(convert_title(ctx, xml))
214 result['subtitle_tag'] = xml.tag
215 return result
218 def chunk(xml_node, module, depth=0, idx=0, parent=None):
219 """Chunk the tree.
221 The first time, we're called with parent=None and in that case we return
222 the new_node as the root of the tree. For each tree-node we generate a
223 filename and process the children.
225 tag = xml_node.tag
226 chunk_params = CHUNK_PARAMS.get(tag)
227 if chunk_params:
228 title_args = get_chunk_titles(module, xml_node)
229 chunk_name = gen_chunk_name(xml_node, chunk_params)
231 # check idx to handle 'sect1'/'section' special casing and title-only
232 # segments
233 if idx >= chunk_params.min_idx:
234 logging.info('chunk tag: "%s"[%d]', tag, idx)
235 if parent:
236 # remove the xml-node from the parent
237 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
238 xml_node.getparent().remove(xml_node)
239 xml_node = sub_tree
241 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
242 idx=idx,
243 filename=chunk_name + '.html', anchor=None,
244 **title_args)
245 else:
246 parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
247 idx=idx,
248 filename=parent.filename, anchor='#' + chunk_name,
249 **title_args)
251 depth += 1
252 idx = 0
253 for child in xml_node:
254 chunk(child, module, depth, idx, parent)
255 if child.tag in CHUNK_PARAMS:
256 idx += 1
258 return parent
261 def add_id_links(files, links):
262 for node in files:
263 chunk_name = node.filename[:-5]
264 chunk_base = node.filename + '#'
265 for attr in ID_XPATH(node.xml):
266 if attr == chunk_name:
267 links[attr] = node.filename
268 else:
269 links[attr] = chunk_base + attr
272 def build_glossary(files):
273 for node in files:
274 if node.xml.tag != 'glossary':
275 continue
276 for term in GLOSSENTRY_XPATH(node.xml):
277 # TODO: there can be all kind of things in a glossary. This only supports
278 # what we commonly use
279 key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
280 value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
281 glossary[key] = value
282 # logging.debug('glosentry: %s:%s', key, value)
285 # conversion helpers
288 def convert_inner(ctx, xml, result):
289 for child in xml:
290 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
293 def convert_ignore(ctx, xml):
294 result = []
295 convert_inner(ctx, xml, result)
296 return result
299 def convert_skip(ctx, xml):
300 return ['']
303 def append_text(text, result):
304 if text and text.strip():
305 result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
308 missing_tags = {}
311 def convert__unknown(ctx, xml):
312 # don't recurse on subchunks
313 if xml.tag in CHUNK_PARAMS:
314 return []
315 if isinstance(xml, etree._Comment):
316 return ['<!-- ' + xml.text + '-->\n']
317 else:
318 # warn only once
319 if xml.tag not in missing_tags:
320 logging.warning('Add tag converter for "%s"', xml.tag)
321 missing_tags[xml.tag] = True
322 result = ['<!-- ' + xml.tag + '-->\n']
323 convert_inner(ctx, xml, result)
324 result.append('<!-- /' + xml.tag + '-->\n')
325 return result
328 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
329 result = ['<div class="%s">\n' % xml.tag]
330 title = xml.find('title')
331 if title is not None:
332 if 'id' in xml.attrib:
333 result.append('<a name="%s"></a>' % xml.attrib['id'])
334 result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
335 append_text(xml.text, result)
336 inner_func(ctx, xml, result)
337 result.append('</div>')
338 append_text(xml.tail, result)
339 return result
342 def xml_get_title(ctx, xml):
343 title_tag = xml.find('title')
344 if title_tag is not None:
345 return ''.join(convert_title(ctx, title_tag))
346 else:
347 # TODO(ensonic): any way to get the file (inlcudes) too?
348 logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
349 return ''
352 # docbook tags
355 def convert_abstract(ctx, xml):
356 result = ["""<div class="abstract">
357 <p class="title"><b>Abstract</b></p>"""]
358 append_text(xml.text, result)
359 convert_inner(ctx, xml, result)
360 result.append('</div>')
361 append_text(xml.tail, result)
362 return result
365 def convert_acronym(ctx, xml):
366 key = xml.text
367 title = glossary.get(key, '')
368 # TODO: print a sensible warning if missing
369 result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
370 if xml.tail:
371 result.append(xml.tail)
372 return result
375 def convert_anchor(ctx, xml):
376 return ['<a name="%s"></a>' % xml.attrib['id']]
379 def convert_bookinfo(ctx, xml):
380 result = ['<div class="titlepage">']
381 convert_inner(ctx, xml, result)
382 result.append("""<hr>
383 </div>""")
384 if xml.tail:
385 result.append(xml.tail)
386 return result
389 def convert_blockquote(ctx, xml):
390 result = ['<div class="blockquote">\n<blockquote class="blockquote">']
391 append_text(xml.text, result)
392 convert_inner(ctx, xml, result)
393 result.append('</blockquote>\n</div>')
394 append_text(xml.tail, result)
395 return result
398 def convert_code(ctx, xml):
399 result = ['<code class="%s">' % xml.tag]
400 append_text(xml.text, result)
401 convert_inner(ctx, xml, result)
402 result.append('</code>')
403 append_text(xml.tail, result)
404 return result
407 def convert_colspec(ctx, xml):
408 result = ['<col']
409 a = xml.attrib
410 if 'colname' in a:
411 result.append(' class="%s"' % a['colname'])
412 if 'colwidth' in a:
413 result.append(' width="%s"' % a['colwidth'])
414 result.append('>\n')
415 # is in tgroup and there can be no 'text'
416 return result
419 def convert_command(ctx, xml):
420 result = ['<strong class="userinput"><code>']
421 append_text(xml.text, result)
422 convert_inner(ctx, xml, result)
423 result.append('</code></strong>')
424 append_text(xml.tail, result)
425 return result
428 def convert_corpauthor(ctx, xml):
429 result = ['<div><h3 class="corpauthor">\n']
430 append_text(xml.text, result)
431 convert_inner(ctx, xml, result)
432 result.append('</h3></div>\n')
433 append_text(xml.tail, result)
434 return result
437 def convert_div(ctx, xml):
438 result = ['<div class="%s">\n' % xml.tag]
439 append_text(xml.text, result)
440 convert_inner(ctx, xml, result)
441 result.append('</div>')
442 append_text(xml.tail, result)
443 return result
446 def convert_emphasis(ctx, xml):
447 result = ['<span class="emphasis"><em>']
448 append_text(xml.text, result)
449 convert_inner(ctx, xml, result)
450 result.append('</em></span>')
451 append_text(xml.tail, result)
452 return result
455 def convert_em_class(ctx, xml):
456 result = ['<em class="%s"><code>' % xml.tag]
457 append_text(xml.text, result)
458 convert_inner(ctx, xml, result)
459 result.append('</code></em>')
460 append_text(xml.tail, result)
461 return result
464 def convert_entry(ctx, xml):
465 entry_type = ctx['table.entry']
466 result = ['<' + entry_type]
467 if 'role' in xml.attrib:
468 result.append(' class="%s"' % xml.attrib['role'])
469 if 'morerows' in xml.attrib:
470 result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
471 result.append('>')
472 append_text(xml.text, result)
473 convert_inner(ctx, xml, result)
474 result.append('</' + entry_type + '>')
475 append_text(xml.tail, result)
476 return result
479 def convert_footnote(ctx, xml):
480 footnotes = ctx.get('footnotes', [])
481 # footnotes idx is not per page, but per doc
482 global footnote_idx
483 idx = footnote_idx
484 footnote_idx += 1
486 # need a pair of ids for each footnote (docbook generates different ids)
487 this_id = 'footnote-%d' % idx
488 that_id = 'ftn.' + this_id
490 inner = ['<div id="%s" class="footnote">' % that_id]
491 inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
492 this_id, idx))
493 # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
494 # get double nested paras :/.
495 # convert_inner(ctx, xml, inner)
496 para = xml.find('para')
497 if para is None:
498 para = xml.find('simpara')
499 if para is not None:
500 inner.append(para.text)
501 else:
502 logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
503 etree.tostring(xml, method="text", encoding=str).strip())
504 inner.append('</p></div>')
505 footnotes.append(inner)
506 ctx['footnotes'] = footnotes
507 return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
508 that_id, this_id, idx)]
511 def convert_formalpara(ctx, xml):
512 result = None
513 title_tag = xml.find('title')
514 result = ['<p><b>%s</b>' % title_tag.text]
515 para_tag = xml.find('para')
516 append_text(para_tag.text, result)
517 convert_inner(ctx, para_tag, result)
518 append_text(para_tag.tail, result)
519 result.append('</p>')
520 append_text(xml.tail, result)
521 return result
524 def convert_glossdef(ctx, xml):
525 result = ['<dd class="glossdef">']
526 convert_inner(ctx, xml, result)
527 result.append('</dd>\n')
528 return result
531 def convert_glossdiv(ctx, xml):
532 title_tag = xml.find('title')
533 title = title_tag.text
534 xml.remove(title_tag)
535 result = [
536 '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
538 convert_inner(ctx, xml, result)
539 return result
542 def convert_glossentry(ctx, xml):
543 result = []
544 convert_inner(ctx, xml, result)
545 return result
548 def convert_glossterm(ctx, xml):
549 glossid = ''
550 text = ''
551 anchor = xml.find('anchor')
552 if anchor is not None:
553 glossid = anchor.attrib.get('id', '')
554 text += anchor.tail or ''
555 text += xml.text or ''
556 if glossid == '':
557 glossid = 'glossterm-' + text
558 return [
559 '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
560 glossid, text)
564 def convert_imageobject(ctx, xml):
565 imagedata = xml.find('imagedata')
566 if imagedata is not None:
567 # TODO(ensonic): warn on missing fileref attr?
568 return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
569 else:
570 return []
573 def convert_indexdiv(ctx, xml):
574 title_tag = xml.find('title')
575 title = title_tag.text
576 xml.remove(title_tag)
577 result = [
578 '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
580 convert_inner(ctx, xml, result)
581 return result
584 def convert_informaltable(ctx, xml):
585 result = ['<div class="informaltable"><table class="informaltable"']
586 a = xml.attrib
587 if 'pgwide' in a and a['pgwide'] == '1':
588 result.append(' width="100%"')
589 if 'frame' in a and a['frame'] == 'none':
590 result.append(' border="0"')
591 result.append('>\n')
592 convert_inner(ctx, xml, result)
593 result.append('</table></div>')
594 if xml.tail:
595 result.append(xml.tail)
596 return result
599 def convert_inlinegraphic(ctx, xml):
600 # TODO(ensonic): warn on missing fileref attr?
601 return ['<img src="%s">' % xml.attrib.get('fileref', '')]
604 def convert_itemizedlist(ctx, xml):
605 result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
606 convert_inner(ctx, xml, result)
607 result.append('</ul></div>')
608 if xml.tail:
609 result.append(xml.tail)
610 return result
613 def convert_link(ctx, xml):
614 linkend = xml.attrib['linkend']
615 if linkend in fixxref.NoLinks:
616 linkend = None
617 result = []
618 if linkend:
619 link_text = []
620 append_text(xml.text, link_text)
621 convert_inner(ctx, xml, link_text)
622 text = ''.join(link_text)
624 (tid, href) = fixxref.GetXRef(linkend)
625 if href:
626 module = ctx['module']
627 title_attr = ''
628 # search for a title under id='tid' in all chunks
629 # NOTE: this will only work for local links
630 # TODO: this works but is super slow
631 # id_xpath = etree.XPath('//*[@id="%s"]' % tid)
632 # for c in ctx['files']:
633 # nodes = id_xpath(c.xml)
634 # if nodes:
635 # title = get_chunk_titles(module, nodes[0])['title']
636 # if title:
637 # title_attr = ' title="%s"' % title
638 # logging.debug('Have title node: href=%s%s', tid, title_attr)
639 # break
641 href = fixxref.MakeRelativeXRef(module, href)
642 result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
643 else:
644 # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
645 fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
646 result = [text]
647 else:
648 append_text(xml.text, result)
649 convert_inner(ctx, xml, result)
650 append_text(xml.tail, result)
651 return result
654 def convert_listitem(ctx, xml):
655 result = ['<li class="listitem">']
656 convert_inner(ctx, xml, result)
657 result.append('</li>')
658 # is in itemizedlist and there can be no 'text'
659 return result
662 def convert_literallayout(ctx, xml):
663 result = ['<div class="literallayout"><p><br>\n']
664 append_text(xml.text, result)
665 convert_inner(ctx, xml, result)
666 result.append('</p></div>')
667 append_text(xml.tail, result)
668 return result
671 def convert_orderedlist(ctx, xml):
672 result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
673 convert_inner(ctx, xml, result)
674 result.append('</ol></div>')
675 append_text(xml.tail, result)
676 return result
679 def convert_para(ctx, xml):
680 result = []
681 if 'id' in xml.attrib:
682 result.append('<a name="%s"></a>' % xml.attrib['id'])
683 if 'role' in xml.attrib:
684 result.append('<p class="%s">' % xml.attrib['role'])
685 else:
686 result.append('<p>')
687 append_text(xml.text, result)
688 convert_inner(ctx, xml, result)
689 result.append('</p>')
690 append_text(xml.tail, result)
691 return result
694 def convert_para_like(ctx, xml):
695 result = []
696 if 'id' in xml.attrib:
697 result.append('<a name="%s"></a>' % xml.attrib['id'])
698 result.append('<p class="%s">' % xml.tag)
699 append_text(xml.text, result)
700 convert_inner(ctx, xml, result)
701 result.append('</p>')
702 append_text(xml.tail, result)
703 return result
706 def convert_phrase(ctx, xml):
707 result = ['<span']
708 if 'role' in xml.attrib:
709 result.append(' class="%s">' % xml.attrib['role'])
710 else:
711 result.append('>')
712 append_text(xml.text, result)
713 convert_inner(ctx, xml, result)
714 result.append('</span>')
715 append_text(xml.tail, result)
716 return result
719 def convert_primaryie(ctx, xml):
720 result = ['<dt>\n']
721 convert_inner(ctx, xml, result)
722 result.append('\n</dt>\n<dd></dd>\n')
723 return result
726 def convert_pre(ctx, xml):
727 result = ['<pre class="%s">\n' % xml.tag]
728 append_text(xml.text, result)
729 convert_inner(ctx, xml, result)
730 result.append('</pre>')
731 append_text(xml.tail, result)
732 return result
735 def convert_programlisting(ctx, xml):
736 result = []
737 if xml.attrib.get('role', '') == 'example':
738 if xml.text:
739 lang = xml.attrib.get('language', 'c').lower()
740 if lang not in LEXERS:
741 LEXERS[lang] = get_lexer_by_name(lang)
742 lexer = LEXERS.get(lang, None)
743 if lexer:
744 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
746 # we do own line-numbering
747 line_count = highlighted.count('\n')
748 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
749 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
750 <tbody>
751 <tr>
752 <td class="listing_lines" align="right"><pre>%s</pre></td>
753 <td class="listing_code"><pre class="programlisting">%s</pre></td>
754 </tr>
755 </tbody>
756 </table>
757 """ % (source_lines, highlighted))
758 else:
759 logging.warn('No pygments lexer for language="%s"', lang)
760 result.append('<pre class="programlisting">')
761 result.append(xml.text)
762 result.append('</pre>')
763 else:
764 result.append('<pre class="programlisting">')
765 append_text(xml.text, result)
766 convert_inner(ctx, xml, result)
767 result.append('</pre>')
768 append_text(xml.tail, result)
769 return result
772 def convert_quote(ctx, xml):
773 result = ['<span class="quote">"<span class="quote">']
774 append_text(xml.text, result)
775 convert_inner(ctx, xml, result)
776 result.append('</span>"</span>')
777 append_text(xml.tail, result)
778 return result
781 def convert_refsect1(ctx, xml):
782 # Add a divider between two consequitive refsect2
783 def convert_inner(ctx, xml, result):
784 prev = None
785 for child in xml:
786 if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
787 result.append('<hr>\n')
788 result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
789 prev = child
790 return convert_sect(ctx, xml, 'h2', convert_inner)
793 def convert_refsect2(ctx, xml):
794 return convert_sect(ctx, xml, 'h3')
797 def convert_refsect3(ctx, xml):
798 return convert_sect(ctx, xml, 'h4')
801 def convert_row(ctx, xml):
802 result = ['<tr>\n']
803 convert_inner(ctx, xml, result)
804 result.append('</tr>\n')
805 return result
808 def convert_sect1_tag(ctx, xml):
809 return convert_sect(ctx, xml, 'h2')
812 def convert_sect2(ctx, xml):
813 return convert_sect(ctx, xml, 'h3')
816 def convert_sect3(ctx, xml):
817 return convert_sect(ctx, xml, 'h4')
820 def convert_simpara(ctx, xml):
821 result = ['<p>']
822 append_text(xml.text, result)
823 result.append('</p>')
824 append_text(xml.tail, result)
825 return result
828 def convert_span(ctx, xml):
829 result = ['<span class="%s">' % xml.tag]
830 append_text(xml.text, result)
831 convert_inner(ctx, xml, result)
832 result.append('</span>')
833 append_text(xml.tail, result)
834 return result
837 def convert_table(ctx, xml):
838 result = ['<div class="table">']
839 if 'id' in xml.attrib:
840 result.append('<a name="%s"></a>' % xml.attrib['id'])
841 title_tag = xml.find('title')
842 if title_tag is not None:
843 result.append('<p class="title"><b>')
844 # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
845 result.extend(convert_title(ctx, title_tag))
846 result.append('</b></p>')
847 xml.remove(title_tag)
848 result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
850 convert_inner(ctx, xml, result)
852 result.append('</table></div></div>')
853 append_text(xml.tail, result)
854 return result
857 def convert_tbody(ctx, xml):
858 result = ['<tbody>']
859 ctx['table.entry'] = 'td'
860 convert_inner(ctx, xml, result)
861 result.append('</tbody>')
862 # is in tgroup and there can be no 'text'
863 return result
866 def convert_tgroup(ctx, xml):
867 # tgroup does not expand to anything, but the nested colspecs need to
868 # be put into a colgroup
869 cols = xml.findall('colspec')
870 result = []
871 if cols:
872 result.append('<colgroup>\n')
873 for col in cols:
874 result.extend(convert_colspec(ctx, col))
875 xml.remove(col)
876 result.append('</colgroup>\n')
877 convert_inner(ctx, xml, result)
878 # is in informaltable and there can be no 'text'
879 return result
882 def convert_thead(ctx, xml):
883 result = ['<thead>']
884 ctx['table.entry'] = 'th'
885 convert_inner(ctx, xml, result)
886 result.append('</thead>')
887 # is in tgroup and there can be no 'text'
888 return result
891 def convert_title(ctx, xml):
892 # This is always called from some context
893 result = []
894 append_text(xml.text, result)
895 convert_inner(ctx, xml, result)
896 append_text(xml.tail, result)
897 return result
900 def convert_ulink(ctx, xml):
901 result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
902 if xml.tail:
903 result.append(xml.tail)
904 return result
907 def convert_userinput(ctx, xml):
908 result = ['<span class="command"><strong>']
909 append_text(xml.text, result)
910 convert_inner(ctx, xml, result)
911 result.append('</strong></span>')
912 append_text(xml.tail, result)
913 return result
916 def convert_variablelist(ctx, xml):
917 result = ["""<div class="variablelist"><table border="0" class="variablelist">
918 <colgroup>
919 <col align="left" valign="top">
920 <col>
921 </colgroup>
922 <tbody>"""]
923 convert_inner(ctx, xml, result)
924 result.append("""</tbody>
925 </table></div>""")
926 return result
929 def convert_varlistentry(ctx, xml):
930 result = ['<tr>']
932 result.append('<td><p>')
933 term = xml.find('term')
934 result.extend(convert_span(ctx, term))
935 result.append('</p></td>')
937 result.append('<td>')
938 listitem = xml.find('listitem')
939 convert_inner(ctx, listitem, result)
940 result.append('</td>')
942 result.append('<tr>')
943 return result
946 # TODO(ensonic): turn into class with converters as functions and ctx as self
947 convert_tags = {
948 'abstract': convert_abstract,
949 'acronym': convert_acronym,
950 'anchor': convert_anchor,
951 'application': convert_span,
952 'bookinfo': convert_bookinfo,
953 'blockquote': convert_blockquote,
954 'caption': convert_div,
955 'code': convert_code,
956 'colspec': convert_colspec,
957 'constant': convert_code,
958 'command': convert_command,
959 'corpauthor': convert_corpauthor,
960 'emphasis': convert_emphasis,
961 'entry': convert_entry,
962 'envar': convert_code,
963 'footnote': convert_footnote,
964 'filename': convert_code,
965 'formalpara': convert_formalpara,
966 'function': convert_code,
967 'glossdef': convert_glossdef,
968 'glossdiv': convert_glossdiv,
969 'glossentry': convert_glossentry,
970 'glossterm': convert_glossterm,
971 'imageobject': convert_imageobject,
972 'indexdiv': convert_indexdiv,
973 'indexentry': convert_ignore,
974 'indexterm': convert_skip,
975 'informalexample': convert_div,
976 'informaltable': convert_informaltable,
977 'inlinegraphic': convert_inlinegraphic,
978 'inlinemediaobject': convert_span,
979 'itemizedlist': convert_itemizedlist,
980 'legalnotice': convert_div,
981 'link': convert_link,
982 'listitem': convert_listitem,
983 'literal': convert_code,
984 'literallayout': convert_literallayout,
985 'mediaobject': convert_div,
986 'note': convert_div,
987 'option': convert_code,
988 'orderedlist': convert_orderedlist,
989 'para': convert_para,
990 'partintro': convert_div,
991 'parameter': convert_em_class,
992 'phrase': convert_phrase,
993 'primaryie': convert_primaryie,
994 'programlisting': convert_programlisting,
995 'quote': convert_quote,
996 'releaseinfo': convert_para_like,
997 'refsect1': convert_refsect1,
998 'refsect2': convert_refsect2,
999 'refsect3': convert_refsect3,
1000 'replaceable': convert_em_class,
1001 'returnvalue': convert_span,
1002 'row': convert_row,
1003 'screen': convert_pre,
1004 'sect1': convert_sect1_tag,
1005 'sect2': convert_sect2,
1006 'sect3': convert_sect3,
1007 'simpara': convert_simpara,
1008 'structfield': convert_em_class,
1009 'structname': convert_span,
1010 'synopsis': convert_pre,
1011 'symbol': convert_span,
1012 'table': convert_table,
1013 'tbody': convert_tbody,
1014 'term': convert_span,
1015 'tgroup': convert_tgroup,
1016 'thead': convert_thead,
1017 'title': convert_skip,
1018 'type': convert_span,
1019 'ulink': convert_ulink,
1020 'userinput': convert_userinput,
1021 'varname': convert_code,
1022 'variablelist': convert_variablelist,
1023 'varlistentry': convert_varlistentry,
1024 'warning': convert_div,
1027 # conversion helpers
1029 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1030 <html>
1031 <head>
1032 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1033 <title>%s</title>
1034 %s<link rel="stylesheet" href="style.css" type="text/css">
1035 </head>
1036 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1040 def generate_head_links(ctx):
1041 n = ctx['nav_home']
1042 result = [
1043 '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1045 if 'nav_up' in ctx:
1046 n = ctx['nav_up']
1047 result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1048 if 'nav_prev' in ctx:
1049 n = ctx['nav_prev']
1050 result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1051 if 'nav_next' in ctx:
1052 n = ctx['nav_next']
1053 result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1054 return ''.join(result)
1057 def generate_nav_links(ctx):
1058 n = ctx['nav_home']
1059 result = [
1060 '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1062 if 'nav_up' in ctx:
1063 n = ctx['nav_up']
1064 result.append(
1065 '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1066 else:
1067 result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1068 if 'nav_prev' in ctx:
1069 n = ctx['nav_prev']
1070 result.append(
1071 '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1072 else:
1073 result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1074 if 'nav_next' in ctx:
1075 n = ctx['nav_next']
1076 result.append(
1077 '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1078 else:
1079 result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1081 return ''.join(result)
1084 def generate_toc(ctx, node):
1085 result = []
1086 for c in node.children:
1087 # TODO: urlencode the filename: urllib.parse.quote_plus()
1088 link = c.filename
1089 if c.anchor:
1090 link += c.anchor
1091 result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1092 c.title_tag, link, c.title))
1093 if c.subtitle:
1094 result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1095 result.append('</dt>\n')
1096 if c.children:
1097 result.append('<dd><dl>')
1098 result.extend(generate_toc(ctx, c))
1099 result.append('</dl></dd>')
1100 return result
1103 def generate_basic_nav(ctx):
1104 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1105 <tr valign="middle">
1106 <td width="100%%" align="left" class="shortcuts"></td>
1108 </tr>
1109 </table>
1110 """ % generate_nav_links(ctx)
1113 def generate_alpha_nav(ctx, divs, prefix, span_id):
1114 ix_nav = []
1115 for s in divs:
1116 title = xml_get_title(ctx, s)
1117 ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1119 return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1120 <tr valign="middle">
1121 <td width="100%%" align="left" class="shortcuts">
1122 <span id="nav_%s">
1124 </span>
1125 </td>
1127 </tr>
1128 </table>
1129 """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1132 def generate_refentry_nav(ctx, refsect1s, result):
1133 result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1134 <tr valign="middle">
1135 <td width="100%" align="left" class="shortcuts">
1136 <a href="#" class="shortcut">Top</a>""")
1138 for s in refsect1s:
1139 # don't list TOC sections (role="xxx_proto")
1140 if s.attrib.get('role', '').endswith("_proto"):
1141 continue
1142 # skip section without 'id' attrs
1143 if 'id' not in s.attrib:
1144 continue
1146 ref_id = s.attrib['id']
1147 # skip foreign sections
1148 if '.' not in ref_id:
1149 continue
1151 title = xml_get_title(ctx, s)
1152 span_id = ref_id.split('.')[1].replace('-', '_')
1154 result.append("""
1155 <span id="nav_%s">
1156    <span class="dim">|</span> 
1157 <a href="#%s" class="shortcut">%s</a>
1158 </span>
1159 """ % (span_id, ref_id, title))
1160 result.append("""
1161 </td>
1163 </tr>
1164 </table>
1165 """ % generate_nav_links(ctx))
1168 def generate_footer(ctx):
1169 result = []
1170 if 'footnotes' in ctx:
1171 result.append("""<div class="footnotes">\n
1172 <br><hr style="width:100; text-align:left;margin-left: 0">
1173 """)
1174 for f in ctx['footnotes']:
1175 result.extend(f)
1176 result.append('</div>\n')
1177 return result
1180 def get_id_path(node):
1181 """ Generate the 'id'.
1182 We need to walk up the xml-tree and check the positions for each sibling.
1183 When reaching the top of the tree we collect remaining index entries from
1184 the chunked-tree.
1186 ix = []
1187 xml = node.xml
1188 parent = xml.getparent()
1189 while parent is not None:
1190 children = parent.getchildren()
1191 ix.insert(0, str(children.index(xml) + 1))
1192 xml = parent
1193 parent = xml.getparent()
1194 while node is not None:
1195 ix.insert(0, str(node.idx + 1))
1196 node = node.parent
1198 return ix
1201 def get_id(node):
1202 xml = node.xml
1203 node_id = xml.attrib.get('id', None)
1204 if node_id:
1205 return node_id
1207 # TODO: this is moot if nothing links to it, we could also consider to omit
1208 # the <a name="$id"></a> tag.
1209 logging.info('%d: No "id" attribute on "%s", generating one',
1210 xml.sourceline, xml.tag)
1211 ix = get_id_path(node)
1212 # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1213 return 'id-' + '.'.join(ix)
1216 def convert_chunk_with_toc(ctx, div_class, title_tag):
1217 node = ctx['node']
1218 result = [
1219 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1220 generate_basic_nav(ctx),
1221 '<div class="%s">' % div_class,
1223 title = node.xml.find('title')
1224 if title is not None:
1225 result.append("""
1226 <div class="titlepage">
1227 <%s class="title"><a name="%s"></a>%s</%s>
1228 </div>""" % (
1229 title_tag, get_id(node), title.text, title_tag))
1231 toc = generate_toc(ctx, node)
1232 if toc:
1233 # TODO: not all docbook page types use this extra heading
1234 result.append("""<p><b>Table of Contents</b></p>
1235 <div class="toc">
1236 <dl class="toc">
1237 """)
1238 result.extend(toc)
1239 result.append("""</dl>
1240 </div>
1241 """)
1242 convert_inner(ctx, node.xml, result)
1243 result.extend(generate_footer(ctx))
1244 result.append("""</div>
1245 </body>
1246 </html>""")
1247 return result
1250 # docbook chunks
1253 def convert_book(ctx):
1254 node = ctx['node']
1255 result = [
1256 HTML_HEADER % (node.title, generate_head_links(ctx)),
1257 """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1258 <tr><th valign="middle"><p class="title">%s</p></th></tr>
1259 </table>
1260 <div class="book">
1261 """ % node.title
1263 bookinfo = node.xml.findall('bookinfo')[0]
1264 result.extend(convert_bookinfo(ctx, bookinfo))
1265 result.append("""<div class="toc">
1266 <dl class="toc">
1267 """)
1268 result.extend(generate_toc(ctx, node.root))
1269 result.append("""</dl>
1270 </div>
1271 """)
1272 result.extend(generate_footer(ctx))
1273 result.append("""</div>
1274 </body>
1275 </html>""")
1276 return result
1279 def convert_chapter(ctx):
1280 return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1283 def convert_glossary(ctx):
1284 node = ctx['node']
1285 glossdivs = node.xml.findall('glossdiv')
1287 result = [
1288 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1289 generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1290 """<div class="glossary">
1291 <div class="titlepage"><h%1d class="title">
1292 <a name="%s"></a>%s</h%1d>
1293 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1295 for i in glossdivs:
1296 result.extend(convert_glossdiv(ctx, i))
1297 result.extend(generate_footer(ctx))
1298 result.append("""</div>
1299 </body>
1300 </html>""")
1301 return result
1304 def convert_index(ctx):
1305 node = ctx['node']
1306 # Get all indexdivs under indexdiv
1307 indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1309 result = [
1310 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1311 generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1312 """<div class="index">
1313 <div class="titlepage"><h%1d class="title">
1314 <a name="%s"></a>%s</h%1d>
1315 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1317 for i in indexdivs:
1318 result.extend(convert_indexdiv(ctx, i))
1319 result.extend(generate_footer(ctx))
1320 result.append("""</div>
1321 </body>
1322 </html>""")
1323 return result
1326 def convert_part(ctx):
1327 return convert_chunk_with_toc(ctx, 'part', 'h1')
1330 def convert_preface(ctx):
1331 node = ctx['node']
1332 result = [
1333 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1334 generate_basic_nav(ctx),
1335 '<div class="preface">'
1337 title = node.xml.find('title')
1338 if title is not None:
1339 result.append("""
1340 <div class="titlepage">
1341 <h2 class="title"><a name="%s"></a>%s</h2>
1342 </div>""" % (get_id(node), title.text))
1343 convert_inner(ctx, node.xml, result)
1344 result.extend(generate_footer(ctx))
1345 result.append("""</div>
1346 </body>
1347 </html>""")
1348 return result
1351 def convert_reference(ctx):
1352 return convert_chunk_with_toc(ctx, 'reference', 'h1')
1355 def convert_refentry(ctx):
1356 node = ctx['node']
1357 node_id = get_id(node)
1358 refsect1s = node.xml.findall('refsect1')
1360 gallery = ''
1361 refmeta = node.xml.find('refmeta')
1362 if refmeta is not None:
1363 refmiscinfo = refmeta.find('refmiscinfo')
1364 if refmiscinfo is not None:
1365 inlinegraphic = refmiscinfo.find('inlinegraphic')
1366 if inlinegraphic is not None:
1367 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1369 result = [
1370 HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1372 generate_refentry_nav(ctx, refsect1s, result)
1373 result.append("""
1374 <div class="refentry">
1375 <a name="%s"></a>
1376 <div class="refnamediv">
1377 <table width="100%%"><tr>
1378 <td valign="top">
1379 <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1380 <p>%s — %s</p>
1381 </td>
1382 <td class="gallery_image" valign="top" align="right">%s</td>
1383 </tr></table>
1384 </div>
1385 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1387 for s in refsect1s:
1388 result.extend(convert_refsect1(ctx, s))
1389 result.extend(generate_footer(ctx))
1390 result.append("""</div>
1391 </body>
1392 </html>""")
1393 return result
1396 def convert_sect1(ctx):
1397 return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1400 # TODO(ensonic): turn into class with converters as functions and ctx as self
1401 convert_chunks = {
1402 'book': convert_book,
1403 'chapter': convert_chapter,
1404 'glossary': convert_glossary,
1405 'index': convert_index,
1406 'part': convert_part,
1407 'preface': convert_preface,
1408 'reference': convert_reference,
1409 'refentry': convert_refentry,
1410 'sect1': convert_sect1,
1414 def generate_nav_nodes(files, node):
1415 nav = {
1416 'nav_home': node.root,
1418 # nav params: up, prev, next
1419 if node.parent:
1420 nav['nav_up'] = node.parent
1421 ix = files.index(node)
1422 if ix > 0:
1423 nav['nav_prev'] = files[ix - 1]
1424 if ix < len(files) - 1:
1425 nav['nav_next'] = files[ix + 1]
1426 return nav
1429 def convert(out_dir, module, files, node):
1430 """Convert the docbook chunks to a html file.
1432 Args:
1433 out_dir: already created output dir
1434 files: list of nodes in the tree in pre-order
1435 node: current tree node
1438 logging.info('Writing: %s', node.filename)
1439 with open(os.path.join(out_dir, node.filename), 'wt',
1440 newline='\n', encoding='utf-8') as html:
1441 ctx = {
1442 'module': module,
1443 'files': files,
1444 'node': node,
1446 ctx.update(generate_nav_nodes(files, node))
1448 if node.name in convert_chunks:
1449 for line in convert_chunks[node.name](ctx):
1450 html.write(line)
1451 else:
1452 logging.warning('Add converter/template for "%s"', node.name)
1455 def create_devhelp2_toc(node):
1456 result = []
1457 for c in node.children:
1458 if c.children:
1459 result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1460 result.extend(create_devhelp2_toc(c))
1461 result.append('</sub>\n')
1462 else:
1463 result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1464 return result
1467 def create_devhelp2_condition_attribs(node):
1468 if 'condition' in node.attrib:
1469 # condition -> since, deprecated, ... (separated with '|')
1470 cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1471 keywords = []
1472 for c in cond:
1473 if ':' in c:
1474 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1475 else:
1476 # deprecated can have no description
1477 keywords.append('{}="{}"'.format(c, ''))
1478 return ' ' + ' '.join(keywords)
1479 else:
1480 return ''
1483 def create_devhelp2_refsect2_keyword(node, base_link):
1484 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1485 node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1486 create_devhelp2_condition_attribs(node))
1489 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1490 return' <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1491 node.attrib['role'], title, base_link + name,
1492 create_devhelp2_condition_attribs(node))
1495 def create_devhelp2(out_dir, module, xml, files):
1496 with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1497 newline='\n', encoding='utf-8') as idx:
1498 bookinfo_nodes = xml.xpath('/book/bookinfo')
1499 title = ''
1500 if bookinfo_nodes is not None:
1501 bookinfo = bookinfo_nodes[0]
1502 title = bookinfo.xpath('./title/text()')[0]
1503 online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1504 # TODO: support author too (see devhelp2.xsl)
1505 # TODO: fixxref uses '--src-lang' to set the language
1506 result = [
1507 """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1508 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1509 <chapters>
1510 """ % (title, module, online_url)
1512 # toc
1513 result.extend(create_devhelp2_toc(files[0].root))
1514 result.append(""" </chapters>
1515 <functions>
1516 """)
1517 # keywords from all refsect2 and refsect3
1518 refsect2 = etree.XPath('//refsect2[@role]')
1519 refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1520 refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1521 refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1522 refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1523 for node in files:
1524 base_link = node.filename + '#'
1525 refsect2_nodes = refsect2(node.xml)
1526 for refsect2_node in refsect2_nodes:
1527 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1528 refsect3_nodes = refsect3_enum(refsect2_node)
1529 for refsect3_node in refsect3_nodes:
1530 details_node = refsect3_enum_details(refsect3_node)[0]
1531 name = details_node.attrib['id']
1532 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1533 refsect3_nodes = refsect3_struct(refsect2_node)
1534 for refsect3_node in refsect3_nodes:
1535 details_node = refsect3_struct_details(refsect3_node)[0]
1536 name = details_node.attrib['id']
1537 result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1539 result.append(""" </functions>
1540 </book>
1541 """)
1542 for line in result:
1543 idx.write(line)
1546 def get_dirs(uninstalled):
1547 if uninstalled:
1548 # this does not work from buiddir!=srcdir
1549 gtkdocdir = os.path.split(sys.argv[0])[0]
1550 if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1551 # try 'srcdir' (set from makefiles) too
1552 if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1553 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1554 styledir = gtkdocdir + '/style'
1555 else:
1556 gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1557 styledir = gtkdocdir
1558 return (gtkdocdir, styledir)
1561 def main(module, index_file, out_dir, uninstalled):
1563 # == Loading phase ==
1564 # the next 3 steps could be done in paralel
1566 # 1) load the docuemnt
1567 _t = timer()
1568 tree = etree.parse(index_file)
1569 tree.xinclude()
1570 logging.warning("1: %7.3lf: load doc", timer() - _t)
1572 # 2) copy datafiles
1573 _t = timer()
1574 # TODO: handle additional images
1575 (gtkdocdir, styledir) = get_dirs(uninstalled)
1576 # copy navigation images and stylesheets to html directory ...
1577 css_file = os.path.join(styledir, 'style.css')
1578 for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1579 shutil.copy(f, out_dir)
1580 css_file = os.path.join(out_dir, 'style.css')
1581 with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1582 css.write(HTML_FORMATTER.get_style_defs())
1583 logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1585 # 3) load xref targets
1586 _t = timer()
1587 # TODO: migrate options from fixxref
1588 # TODO: ideally explicity specify the files we need, this will save us the
1589 # globbing and we'll load less files.
1590 fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1591 logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1593 # == Processing phase ==
1595 # 4) recursively walk the tree and chunk it into a python tree so that we
1596 # can generate navigation and link tags.
1597 _t = timer()
1598 files = chunk(tree.getroot(), module)
1599 files = [f for f in PreOrderIter(files) if f.anchor is None]
1600 logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1602 # 5) extract tables:
1603 _t = timer()
1604 # TODO: can be done in parallel
1605 # - find all 'id' attribs and add them to the link map
1606 add_id_links(files, fixxref.Links)
1607 # - build glossary dict
1608 build_glossary(files)
1609 logging.warning("5: %7.3lf: extract tables", timer() - _t)
1611 # == Output phase ==
1612 # the next two step could be done in parllel
1614 # 6) create a xxx.devhelp2 file
1615 _t = timer()
1616 create_devhelp2(out_dir, module, tree.getroot(), files)
1617 logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1619 # 7) iterate the tree and output files
1620 _t = timer()
1621 # TODO: can be done in parallel, figure out why this is not faster
1622 # from multiprocessing.pool import Pool
1623 # with Pool(4) as p:
1624 # p.apply_async(convert, args=(out_dir, module, files))
1625 # from multiprocessing.pool import ThreadPool
1626 # with ThreadPool(4) as p:
1627 # p.apply_async(convert, args=(out_dir, module, files))
1628 for node in files:
1629 convert(out_dir, module, files, node)
1630 logging.warning("7: %7.3lf: create html", timer() - _t)
1633 def run(options):
1634 logging.info('options: %s', str(options.__dict__))
1635 module = options.args[0]
1636 document = options.args[1]
1638 # TODO: rename to 'html' later on
1639 # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1640 # outputs into the working directory
1641 out_dir = os.path.join(os.path.dirname(document), 'db2html')
1642 try:
1643 os.mkdir(out_dir)
1644 except OSError as e:
1645 if e.errno != errno.EEXIST:
1646 raise
1648 sys.exit(main(module, document, out_dir, options.uninstalled))