gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - glossary/index: depending on the parents, the headings as h1/h2
  43     - maybe track depth when chunking
  44   - handle 'label' attributes on part/chapter/section-types
  45     - the titles will have a generated prefix, such as 'Part I:'
  46     - in the toc it would only be only the label: 'I.'
  47   - we need to separate the toc data from the chunking tree
  48     - since we don't chunk first 'secttion'/'sect1' those are missing from the
  49       toc
  50     - the toc also lists 'sect2' (TODO: check how deep it goes)
  51   - replace get_title with a result.extend(convert_title(ctx, title_tag))
  52     - see convert_table()
  53 - check each docbook tag if it can contain #PCDATA, if not don't check for
  54   xml.text
  55 - consider some perf-warnings flag
  56   - see 'No "id" attribute on'
  57
  58 OPTIONAL:
  59 - minify html: https://pypi.python.org/pypi/htmlmin/
  60
  61 Requirements:
  62 sudo pip3 install anytree lxml pygments
  63
  64 Example invocation:
  65 cd tests/bugs/docs/
  66 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  67 xdg-open db2html/index.html
  68 meld html db2html
  69
  70 Benchmarking:
  71 cd tests/bugs/docs/;
  72 rm html-build.stamp; time make html-build.stamp
  73 """
  74
  75 import argparse
  76 import errno
  77 import logging
  78 import os
  79 import shutil
  80 import sys
  81
  82 from anytree import Node, PreOrderIter
  83 from copy import deepcopy
  84 from glob import glob
  85 from lxml import etree
  86 from pygments import highlight
  87 from pygments.lexers import CLexer
  88 from pygments.formatters import HtmlFormatter
  89
  90 from . import config, fixxref
  91
  92 # pygments setup
  93 # lazily constructed lexer cache
  94 LEXERS = {
  95     'c': CLexer()
  96 }
  97 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  98
  99
 100 class ChunkParams(object):
 101     def __init__(self, prefix, parent=None, min_idx=0):
 102         self.prefix = prefix
 103         self.parent = parent
 104         self.min_idx = min_idx
 105
 106
 107 # TODO: look up the abbrevs and hierarchy for other tags
 108 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 109 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 110 #
 111 # If not defined, we can just create an example without an 'id' attr and see
 112 # docbook xsl does.
 113 CHUNK_PARAMS = {
 114     'appendix': ChunkParams('app', 'book'),
 115     'book': ChunkParams('bk'),
 116     'chapter': ChunkParams('ch', 'book'),
 117     'glossary': ChunkParams('go', 'book'),
 118     'index': ChunkParams('ix', 'book'),
 119     'part': ChunkParams('pt', 'book'),
 120     'preface': ChunkParams('pr', 'book'),
 121     'refentry': ChunkParams('re', 'book'),
 122     'reference': ChunkParams('rn', 'book'),
 123     'sect1': ChunkParams('s', 'chapter', 1),
 124     'section': ChunkParams('s', 'chapter', 1),
 125 }
 126 # TAGS we don't support:
 127 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 128
 129 TITLE_XPATHS = {
 130     '_': (etree.XPath('./title'), None),
 131     'book': (etree.XPath('./bookinfo/title'), None),
 132     'refentry': (
 133         etree.XPath('./refmeta/refentrytitle'),
 134         etree.XPath('./refnamediv/refpurpose')
 135     ),
 136 }
 137
 138 ID_XPATH = etree.XPath('//@id')
 139
 140 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 141 glossary = {}
 142
 143 footnote_idx = 1
 144
 145
 146 def gen_chunk_name(node, chunk_params, idx):
 147     """Generate a chunk file name
 148
 149     This is either based on the id or on the position in the doc. In the latter
 150     case it uses a prefix from CHUNK_PARAMS.
 151     """
 152     if 'id' in node.attrib:
 153         return node.attrib['id']
 154
 155     name = ('%s%02d' % (chunk_params.prefix, idx))
 156     # handle parents to make names of nested tags unique
 157     # TODO: we only need to prepend the parent if there are > 1 of them in the
 158     #       xml. None, the parents we have are not sufficient, e.g. 'index' can
 159     #       be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 160     #       when we chunk explicitly and on each level maintain the 'idx'
 161     # while naming.parent:
 162     #     parent = naming.parent
 163     #     if parent not in CHUNK_PARAMS:
 164     #         break;
 165     #     chunk_params = CHUNK_PARAMS[parent]
 166     #     name = ('%s%02d' % (naming.prefix, idx)) + name
 167     logging.info('Gen chunk name: "%s"', name)
 168     return name
 169
 170
 171 def get_chunk_titles(node):
 172     tag = node.tag
 173     if tag not in TITLE_XPATHS:
 174         # Use defaults
 175         (title, subtitle) = TITLE_XPATHS['_']
 176     else:
 177         (title, subtitle) = TITLE_XPATHS[tag]
 178
 179     result = {
 180         'title': None,
 181         'title_tag': None,
 182         'subtitle': None,
 183         'subtitle_tag': None
 184     }
 185     res = title(node)
 186     if res:
 187         xml = res[0]
 188         result['title'] = xml.text
 189         if xml.tag != 'title':
 190             result['title_tag'] = xml.tag
 191         else:
 192             result['title_tag'] = tag
 193
 194     if subtitle:
 195         res = subtitle(node)
 196         if res:
 197             xml = res[0]
 198             result['subtitle'] = xml.text
 199             result['subtitle_tag'] = xml.tag
 200     return result
 201
 202
 203 def chunk(xml_node, idx=0, parent=None):
 204     """Chunk the tree.
 205
 206     The first time, we're called with parent=None and in that case we return
 207     the new_node as the root of the tree
 208     """
 209     tag = xml_node.tag
 210     chunk_params = CHUNK_PARAMS.get(tag)
 211     # TODO: if this is None, we should stop traversing, right?
 212
 213     # also check idx to handle 'sect1'/'section' special casing
 214     if chunk_params and idx >= chunk_params.min_idx:
 215         logging.info('chunk tag: "%s"[%d]', tag, idx)
 216         if parent:
 217             # remove the xml-node from the parent
 218             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 219             xml_node.getparent().remove(xml_node)
 220             xml_node = sub_tree
 221
 222         title_args = get_chunk_titles(xml_node)
 223         chunk_name = gen_chunk_name(xml_node, chunk_params, (idx + 1))
 224         parent = Node(tag, parent=parent, xml=xml_node,
 225                       filename=chunk_name + '.html', **title_args)
 226
 227     idx = 0
 228     for child in xml_node:
 229         chunk(child, idx, parent)
 230         if child.tag in CHUNK_PARAMS:
 231             idx += 1
 232
 233     return parent
 234
 235
 236 def add_id_links(files, links):
 237     for node in files:
 238         chunk_name = node.filename[:-5]
 239         chunk_base = node.filename + '#'
 240         for attr in ID_XPATH(node.xml):
 241             if attr == chunk_name:
 242                 links[attr] = node.filename
 243             else:
 244                 links[attr] = chunk_base + attr
 245
 246
 247 def build_glossary(files):
 248     for node in files:
 249         if node.xml.tag != 'glossary':
 250             continue
 251         for term in GLOSSENTRY_XPATH(node.xml):
 252             # TODO: there can be all kind of things in a glossary. This only supports
 253             # what we commonly use
 254             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 255             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 256             glossary[key] = value
 257             # logging.debug('glosentry: %s:%s', key, value)
 258
 259
 260 # conversion helpers
 261
 262
 263 def convert_inner(ctx, xml, result):
 264     for child in xml:
 265         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 266
 267
 268 def convert_ignore(ctx, xml):
 269     result = []
 270     convert_inner(ctx, xml, result)
 271     return result
 272
 273
 274 def convert_skip(ctx, xml):
 275     return ['']
 276
 277
 278 def append_text(text, result):
 279     if text and text.strip():
 280         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 281
 282
 283 missing_tags = {}
 284
 285
 286 def convert__unknown(ctx, xml):
 287     # don't recurse on subchunks
 288     if xml.tag in CHUNK_PARAMS:
 289         return []
 290     if isinstance(xml, etree._Comment):
 291         return ['<!-- ' + xml.text + '-->\n']
 292     else:
 293         # warn only once
 294         if xml.tag not in missing_tags:
 295             logging.warning('Add tag converter for "%s"', xml.tag)
 296             missing_tags[xml.tag] = True
 297         result = ['<!-- ' + xml.tag + '-->\n']
 298         convert_inner(ctx, xml, result)
 299         result.append('<!-- /' + xml.tag + '-->\n')
 300         return result
 301
 302
 303 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 304     result = ['<div class="%s">\n' % xml.tag]
 305     title = xml.find('title')
 306     if title is not None:
 307         if 'id' in xml.attrib:
 308             result.append('<a name="%s"></a>' % xml.attrib['id'])
 309         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 310         xml.remove(title)
 311     append_text(xml.text, result)
 312     inner_func(ctx, xml, result)
 313     result.append('</div>')
 314     append_text(xml.tail, result)
 315     return result
 316
 317
 318 def xml_get_title(xml):
 319     title = xml.find('title')
 320     if title is not None:
 321         return title.text
 322     else:
 323         # TODO(ensonic): any way to get the file (inlcudes) too?
 324         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 325         return ''
 326
 327
 328 # docbook tags
 329
 330
 331 def convert_abstract(ctx, xml):
 332     result = ["""<div class="abstract">
 333     <p class="title"><b>Abstract</b></p>"""]
 334     append_text(xml.text, result)
 335     convert_inner(ctx, xml, result)
 336     result.append('</div>')
 337     append_text(xml.tail, result)
 338     return result
 339
 340
 341 def convert_acronym(ctx, xml):
 342     key = xml.text
 343     title = glossary.get(key, '')
 344     # TODO: print a sensible warning if missing
 345     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 346     if xml.tail:
 347         result.append(xml.tail)
 348     return result
 349
 350
 351 def convert_anchor(ctx, xml):
 352     return ['<a name="%s"></a>' % xml.attrib['id']]
 353
 354
 355 def convert_bookinfo(ctx, xml):
 356     result = ['<div class="titlepage">']
 357     convert_inner(ctx, xml, result)
 358     result.append("""<hr>
 359 </div>""")
 360     if xml.tail:
 361         result.append(xml.tail)
 362     return result
 363
 364
 365 def convert_blockquote(ctx, xml):
 366     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 367     append_text(xml.text, result)
 368     convert_inner(ctx, xml, result)
 369     result.append('</blockquote>\n</div>')
 370     append_text(xml.tail, result)
 371     return result
 372
 373
 374 def convert_code(ctx, xml):
 375     result = ['<code class="%s">' % xml.tag]
 376     append_text(xml.text, result)
 377     convert_inner(ctx, xml, result)
 378     result.append('</code>')
 379     append_text(xml.tail, result)
 380     return result
 381
 382
 383 def convert_colspec(ctx, xml):
 384     result = ['<col']
 385     a = xml.attrib
 386     if 'colname' in a:
 387         result.append(' class="%s"' % a['colname'])
 388     if 'colwidth' in a:
 389         result.append(' width="%s"' % a['colwidth'])
 390     result.append('>\n')
 391     # is in tgroup and there can be no 'text'
 392     return result
 393
 394
 395 def convert_command(ctx, xml):
 396     result = ['<strong class="userinput"><code>']
 397     append_text(xml.text, result)
 398     convert_inner(ctx, xml, result)
 399     result.append('</code></strong>')
 400     append_text(xml.tail, result)
 401     return result
 402
 403
 404 def convert_corpauthor(ctx, xml):
 405     result = ['<div><h3 class="corpauthor">\n']
 406     append_text(xml.text, result)
 407     convert_inner(ctx, xml, result)
 408     result.append('</h3></div>\n')
 409     append_text(xml.tail, result)
 410     return result
 411
 412
 413 def convert_div(ctx, xml):
 414     result = ['<div class="%s">\n' % xml.tag]
 415     append_text(xml.text, result)
 416     convert_inner(ctx, xml, result)
 417     result.append('</div>')
 418     append_text(xml.tail, result)
 419     return result
 420
 421
 422 def convert_em_class(ctx, xml):
 423     result = ['<em class="%s"><code>' % xml.tag]
 424     append_text(xml.text, result)
 425     convert_inner(ctx, xml, result)
 426     result.append('</code></em>')
 427     append_text(xml.tail, result)
 428     return result
 429
 430
 431 def convert_entry(ctx, xml):
 432     entry_type = ctx['table.entry']
 433     result = ['<' + entry_type]
 434     if 'role' in xml.attrib:
 435         result.append(' class="%s"' % xml.attrib['role'])
 436     if 'morerows' in xml.attrib:
 437         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 438     result.append('>')
 439     append_text(xml.text, result)
 440     convert_inner(ctx, xml, result)
 441     result.append('</' + entry_type + '>')
 442     append_text(xml.tail, result)
 443     return result
 444
 445
 446 def convert_footnote(ctx, xml):
 447     footnotes = ctx.get('footnotes', [])
 448     # footnotes idx is not per page, but per doc
 449     global footnote_idx
 450     idx = footnote_idx
 451     footnote_idx += 1
 452
 453     # need a pair of ids for each footnote (docbook generates different ids)
 454     this_id = 'footnote-%d' % idx
 455     that_id = 'ftn.' + this_id
 456
 457     inner = ['<div id="%s" class="footnote">' % that_id]
 458     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 459         this_id, idx))
 460     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 461     # get double nested paras :/.
 462     # convert_inner(ctx, xml, inner)
 463     para = xml.find('para')
 464     if para is None:
 465         para = xml.find('simpara')
 466     if para is not None:
 467         inner.append(para.text)
 468     else:
 469         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 470                         etree.tostring(xml, method="text", encoding=str).strip())
 471     inner.append('</p></div>')
 472     footnotes.append(inner)
 473     ctx['footnotes'] = footnotes
 474     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 475         that_id, this_id, idx)]
 476
 477
 478 def convert_formalpara(ctx, xml):
 479     result = None
 480     title_tag = xml.find('title')
 481     result = ['<p><b>%s</b>' % title_tag.text]
 482     para_tag = xml.find('para')
 483     append_text(para_tag.text, result)
 484     convert_inner(ctx, para_tag, result)
 485     append_text(para_tag.tail, result)
 486     result.append('</p>')
 487     append_text(xml.tail, result)
 488     return result
 489
 490
 491 def convert_glossdef(ctx, xml):
 492     result = ['<dd class="glossdef">']
 493     convert_inner(ctx, xml, result)
 494     result.append('</dd>\n')
 495     return result
 496
 497
 498 def convert_glossdiv(ctx, xml):
 499     title_tag = xml.find('title')
 500     title = title_tag.text
 501     xml.remove(title_tag)
 502     result = [
 503         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 504     ]
 505     convert_inner(ctx, xml, result)
 506     return result
 507
 508
 509 def convert_glossentry(ctx, xml):
 510     result = []
 511     convert_inner(ctx, xml, result)
 512     return result
 513
 514
 515 def convert_glossterm(ctx, xml):
 516     glossid = ''
 517     text = ''
 518     anchor = xml.find('anchor')
 519     if anchor is not None:
 520         glossid = anchor.attrib.get('id', '')
 521         text += anchor.tail or ''
 522     text += xml.text or ''
 523     if glossid == '':
 524         glossid = 'glossterm-' + text
 525     return [
 526         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 527             glossid, text)
 528     ]
 529
 530
 531 def convert_imageobject(ctx, xml):
 532     imagedata = xml.find('imagedata')
 533     if imagedata is not None:
 534         # TODO(ensonic): warn on missing fileref attr?
 535         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 536     else:
 537         return []
 538
 539
 540 def convert_indexdiv(ctx, xml):
 541     title_tag = xml.find('title')
 542     title = title_tag.text
 543     xml.remove(title_tag)
 544     result = [
 545         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 546     ]
 547     convert_inner(ctx, xml, result)
 548     return result
 549
 550
 551 def convert_informaltable(ctx, xml):
 552     result = ['<div class="informaltable"><table class="informaltable"']
 553     a = xml.attrib
 554     if 'pgwide' in a and a['pgwide'] == '1':
 555         result.append(' width="100%"')
 556     if 'frame' in a and a['frame'] == 'none':
 557         result.append(' border="0"')
 558     result.append('>\n')
 559     convert_inner(ctx, xml, result)
 560     result.append('</table></div>')
 561     if xml.tail:
 562         result.append(xml.tail)
 563     return result
 564
 565
 566 def convert_itemizedlist(ctx, xml):
 567     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 568     convert_inner(ctx, xml, result)
 569     result.append('</ul></div>')
 570     if xml.tail:
 571         result.append(xml.tail)
 572     return result
 573
 574
 575 def convert_link(ctx, xml):
 576     linkend = xml.attrib['linkend']
 577     if linkend in fixxref.NoLinks:
 578         linkend = None
 579     result = []
 580     if linkend:
 581         link_text = []
 582         convert_inner(ctx, xml, link_text)
 583         append_text(xml.text, link_text)
 584         # TODO: fixxref does some weird checks in xml.text
 585         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 586     append_text(xml.tail, result)
 587     return result
 588
 589
 590 def convert_listitem(ctx, xml):
 591     result = ['<li class="listitem">']
 592     convert_inner(ctx, xml, result)
 593     result.append('</li>')
 594     # is in itemizedlist and there can be no 'text'
 595     return result
 596
 597
 598 def convert_literallayout(ctx, xml):
 599     result = ['<div class="literallayout"><p><br>\n']
 600     append_text(xml.text, result)
 601     convert_inner(ctx, xml, result)
 602     result.append('</p></div>')
 603     append_text(xml.tail, result)
 604     return result
 605
 606
 607 def convert_orderedlist(ctx, xml):
 608     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 609     convert_inner(ctx, xml, result)
 610     result.append('</ol></div>')
 611     append_text(xml.tail, result)
 612     return result
 613
 614
 615 def convert_para(ctx, xml):
 616     result = []
 617     if 'id' in xml.attrib:
 618         result.append('<a name="%s"></a>' % xml.attrib['id'])
 619     result.append('<p>')
 620     append_text(xml.text, result)
 621     convert_inner(ctx, xml, result)
 622     result.append('</p>')
 623     append_text(xml.tail, result)
 624     return result
 625
 626
 627 def convert_para_like(ctx, xml):
 628     result = []
 629     if 'id' in xml.attrib:
 630         result.append('<a name="%s"></a>' % xml.attrib['id'])
 631     result.append('<p class="%s">' % xml.tag)
 632     append_text(xml.text, result)
 633     convert_inner(ctx, xml, result)
 634     result.append('</p>')
 635     append_text(xml.tail, result)
 636     return result
 637
 638
 639 def convert_phrase(ctx, xml):
 640     result = ['<span']
 641     if 'role' in xml.attrib:
 642         result.append(' class="%s">' % xml.attrib['role'])
 643     else:
 644         result.append('>')
 645     append_text(xml.text, result)
 646     convert_inner(ctx, xml, result)
 647     result.append('</span>')
 648     append_text(xml.tail, result)
 649     return result
 650
 651
 652 def convert_primaryie(ctx, xml):
 653     result = ['<dt>\n']
 654     convert_inner(ctx, xml, result)
 655     result.append('\n</dt>\n<dd></dd>\n')
 656     return result
 657
 658
 659 def convert_pre(ctx, xml):
 660     result = ['<pre class="%s">\n' % xml.tag]
 661     append_text(xml.text, result)
 662     convert_inner(ctx, xml, result)
 663     result.append('</pre>')
 664     append_text(xml.tail, result)
 665     return result
 666
 667
 668 def convert_programlisting(ctx, xml):
 669     result = []
 670     if xml.attrib.get('role', '') == 'example':
 671         if xml.text:
 672             lang = xml.attrib.get('language', 'c').lower()
 673             if lang not in LEXERS:
 674                 LEXERS[lang] = get_lexer_by_name(lang)
 675             lexer = LEXERS.get(lang, None)
 676             if lexer:
 677                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 678
 679                 # we do own line-numbering
 680                 line_count = highlighted.count('\n')
 681                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 682                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 683   <tbody>
 684     <tr>
 685       <td class="listing_lines" align="right"><pre>%s</pre></td>
 686       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 687     </tr>
 688   </tbody>
 689 </table>
 690 """ % (source_lines, highlighted))
 691             else:
 692                 logging.warn('No pygments lexer for language="%s"', lang)
 693                 result.append('<pre class="programlisting">')
 694                 result.append(xml.text)
 695                 result.append('</pre>')
 696     else:
 697         result.append('<pre class="programlisting">')
 698         append_text(xml.text, result)
 699         convert_inner(ctx, xml, result)
 700         result.append('</pre>')
 701     append_text(xml.tail, result)
 702     return result
 703
 704
 705 def convert_quote(ctx, xml):
 706     result = ['<span class="quote">"<span class="quote">']
 707     append_text(xml.text, result)
 708     convert_inner(ctx, xml, result)
 709     result.append('</span>"</span>')
 710     append_text(xml.tail, result)
 711     return result
 712
 713
 714 def convert_refsect1(ctx, xml):
 715     # Add a divider between two consequitive refsect2
 716     def convert_inner(ctx, xml, result):
 717         prev = None
 718         for child in xml:
 719             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 720                 result.append('<hr>\n')
 721             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 722             prev = child
 723     return convert_sect(ctx, xml, 'h2', convert_inner)
 724
 725
 726 def convert_refsect2(ctx, xml):
 727     return convert_sect(ctx, xml, 'h3')
 728
 729
 730 def convert_refsect3(ctx, xml):
 731     return convert_sect(ctx, xml, 'h4')
 732
 733
 734 def convert_row(ctx, xml):
 735     result = ['<tr>\n']
 736     convert_inner(ctx, xml, result)
 737     result.append('</tr>\n')
 738     return result
 739
 740
 741 def convert_sect1_tag(ctx, xml):
 742     return convert_sect(ctx, xml, 'h2')
 743
 744
 745 def convert_sect2(ctx, xml):
 746     return convert_sect(ctx, xml, 'h3')
 747
 748
 749 def convert_sect3(ctx, xml):
 750     return convert_sect(ctx, xml, 'h4')
 751
 752
 753 def convert_simpara(ctx, xml):
 754     result = ['<p>']
 755     append_text(xml.text, result)
 756     result.append('</p>')
 757     append_text(xml.tail, result)
 758     return result
 759
 760
 761 def convert_span(ctx, xml):
 762     result = ['<span class="%s">' % xml.tag]
 763     append_text(xml.text, result)
 764     convert_inner(ctx, xml, result)
 765     result.append('</span>')
 766     append_text(xml.tail, result)
 767     return result
 768
 769
 770 def convert_table(ctx, xml):
 771     result = ['<div class="table">']
 772     if 'id' in xml.attrib:
 773         result.append('<a name="%s"></a>' % xml.attrib['id'])
 774     title_tag = xml.find('title')
 775     if title_tag is not None:
 776         result.append('<p class="title"><b>')
 777         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 778         result.extend(convert_title(ctx, title_tag))
 779         result.append('</b></p>')
 780         xml.remove(title_tag)
 781     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 782
 783     convert_inner(ctx, xml, result)
 784
 785     result.append('</table></div></div>')
 786     append_text(xml.tail, result)
 787     return result
 788
 789
 790 def convert_tbody(ctx, xml):
 791     result = ['<tbody>']
 792     ctx['table.entry'] = 'td'
 793     convert_inner(ctx, xml, result)
 794     result.append('</tbody>')
 795     # is in tgroup and there can be no 'text'
 796     return result
 797
 798
 799 def convert_tgroup(ctx, xml):
 800     # tgroup does not expand to anything, but the nested colspecs need to
 801     # be put into a colgroup
 802     cols = xml.findall('colspec')
 803     result = []
 804     if cols:
 805         result.append('<colgroup>\n')
 806         for col in cols:
 807             result.extend(convert_colspec(ctx, col))
 808             xml.remove(col)
 809         result.append('</colgroup>\n')
 810     convert_inner(ctx, xml, result)
 811     # is in informaltable and there can be no 'text'
 812     return result
 813
 814
 815 def convert_thead(ctx, xml):
 816     result = ['<thead>']
 817     ctx['table.entry'] = 'th'
 818     convert_inner(ctx, xml, result)
 819     result.append('</thead>')
 820     # is in tgroup and there can be no 'text'
 821     return result
 822
 823
 824 def convert_title(ctx, xml):
 825     # This is always called from some context
 826     result = []
 827     append_text(xml.text, result)
 828     convert_inner(ctx, xml, result)
 829     append_text(xml.tail, result)
 830     return result
 831
 832
 833 def convert_ulink(ctx, xml):
 834     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 835     if xml.tail:
 836         result.append(xml.tail)
 837     return result
 838
 839
 840 def convert_userinput(ctx, xml):
 841     result = ['<span class="command"><strong>']
 842     append_text(xml.text, result)
 843     convert_inner(ctx, xml, result)
 844     result.append('</strong></span>')
 845     append_text(xml.tail, result)
 846     return result
 847
 848
 849 def convert_variablelist(ctx, xml):
 850     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 851 <colgroup>
 852 <col align="left" valign="top">
 853 <col>
 854 </colgroup>
 855 <tbody>"""]
 856     convert_inner(ctx, xml, result)
 857     result.append("""</tbody>
 858 </table></div>""")
 859     return result
 860
 861
 862 def convert_varlistentry(ctx, xml):
 863     result = ['<tr>']
 864
 865     result.append('<td><p>')
 866     term = xml.find('term')
 867     result.extend(convert_span(ctx, term))
 868     result.append('</p></td>')
 869
 870     result.append('<td>')
 871     listitem = xml.find('listitem')
 872     convert_inner(ctx, listitem, result)
 873     result.append('</td>')
 874
 875     result.append('<tr>')
 876     return result
 877
 878
 879 # TODO(ensonic): turn into class with converters as functions and ctx as self
 880 convert_tags = {
 881     'abstract': convert_abstract,
 882     'acronym': convert_acronym,
 883     'anchor': convert_anchor,
 884     'application': convert_span,
 885     'bookinfo': convert_bookinfo,
 886     'blockquote': convert_blockquote,
 887     'caption': convert_div,
 888     'code': convert_code,
 889     'colspec': convert_colspec,
 890     'constant': convert_code,
 891     'command': convert_command,
 892     'corpauthor': convert_corpauthor,
 893     'emphasis': convert_span,
 894     'entry': convert_entry,
 895     'envar': convert_code,
 896     'footnote': convert_footnote,
 897     'filename': convert_code,
 898     'formalpara': convert_formalpara,
 899     'function': convert_code,
 900     'glossdef': convert_glossdef,
 901     'glossdiv': convert_glossdiv,
 902     'glossentry': convert_glossentry,
 903     'glossterm': convert_glossterm,
 904     'imageobject': convert_imageobject,
 905     'indexdiv': convert_indexdiv,
 906     'indexentry': convert_ignore,
 907     'indexterm': convert_skip,
 908     'informalexample': convert_div,
 909     'informaltable': convert_informaltable,
 910     'inlinemediaobject': convert_span,
 911     'itemizedlist': convert_itemizedlist,
 912     'legalnotice': convert_div,
 913     'link': convert_link,
 914     'listitem': convert_listitem,
 915     'literal': convert_code,
 916     'literallayout': convert_literallayout,
 917     'mediaobject': convert_div,
 918     'note': convert_div,
 919     'option': convert_code,
 920     'orderedlist': convert_orderedlist,
 921     'para': convert_para,
 922     'partintro': convert_div,
 923     'parameter': convert_em_class,
 924     'phrase': convert_phrase,
 925     'primaryie': convert_primaryie,
 926     'programlisting': convert_programlisting,
 927     'quote': convert_quote,
 928     'releaseinfo': convert_para_like,
 929     'refsect1': convert_refsect1,
 930     'refsect2': convert_refsect2,
 931     'refsect3': convert_refsect3,
 932     'replaceable': convert_em_class,
 933     'returnvalue': convert_span,
 934     'row': convert_row,
 935     'screen': convert_pre,
 936     'sect1': convert_sect1_tag,
 937     'sect2': convert_sect2,
 938     'sect3': convert_sect3,
 939     'simpara': convert_simpara,
 940     'structfield': convert_em_class,
 941     'structname': convert_span,
 942     'synopsis': convert_pre,
 943     'symbol': convert_span,
 944     'table': convert_table,
 945     'tbody': convert_tbody,
 946     'term': convert_span,
 947     'tgroup': convert_tgroup,
 948     'thead': convert_thead,
 949     'type': convert_span,
 950     'ulink': convert_ulink,
 951     'userinput': convert_userinput,
 952     'varname': convert_code,
 953     'variablelist': convert_variablelist,
 954     'varlistentry': convert_varlistentry,
 955     'warning': convert_div,
 956 }
 957
 958 # conversion helpers
 959
 960 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 961 <html>
 962 <head>
 963 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 964 <title>%s</title>
 965 %s<link rel="stylesheet" href="style.css" type="text/css">
 966 </head>
 967 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 968 """
 969
 970
 971 def generate_head_links(ctx):
 972     n = ctx['nav_home']
 973     result = [
 974         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 975     ]
 976     if 'nav_up' in ctx:
 977         n = ctx['nav_up']
 978         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 979     if 'nav_prev' in ctx:
 980         n = ctx['nav_prev']
 981         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 982     if 'nav_next' in ctx:
 983         n = ctx['nav_next']
 984         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 985     return ''.join(result)
 986
 987
 988 def generate_nav_links(ctx):
 989     n = ctx['nav_home']
 990     result = [
 991         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 992     ]
 993     if 'nav_up' in ctx:
 994         n = ctx['nav_up']
 995         result.append(
 996             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 997     else:
 998         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 999     if 'nav_prev' in ctx:
1000         n = ctx['nav_prev']
1001         result.append(
1002             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1003     else:
1004         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1005     if 'nav_next' in ctx:
1006         n = ctx['nav_next']
1007         result.append(
1008             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1009     else:
1010         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1011
1012     return ''.join(result)
1013
1014
1015 def generate_toc(ctx, node):
1016     result = []
1017     for c in node.children:
1018         # TODO: urlencode the filename: urllib.parse.quote_plus()
1019         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1020             c.title_tag, c.filename, c.title))
1021         if c.subtitle:
1022             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1023         result.append('</dt>\n')
1024         if c.children:
1025             result.append('<dd><dl>')
1026             result.extend(generate_toc(ctx, c))
1027             result.append('</dl></dd>')
1028     return result
1029
1030
1031 def generate_basic_nav(ctx):
1032     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1033   <tr valign="middle">
1034     <td width="100%%" align="left" class="shortcuts"></td>
1035     %s
1036   </tr>
1037 </table>
1038     """ % generate_nav_links(ctx)
1039
1040
1041 def generate_alpha_nav(ctx, divs, prefix, span_id):
1042     ix_nav = []
1043     for s in divs:
1044         title = xml_get_title(s)
1045         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1046
1047     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1048   <tr valign="middle">
1049     <td width="100%%" align="left" class="shortcuts">
1050       <span id="nav_%s">
1051         %s
1052       </span>
1053     </td>
1054     %s
1055   </tr>
1056 </table>
1057     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1058
1059
1060 def generate_refentry_nav(ctx, refsect1s, result):
1061     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1062   <tr valign="middle">
1063     <td width="100%" align="left" class="shortcuts">
1064       <a href="#" class="shortcut">Top</a>""")
1065
1066     for s in refsect1s:
1067         # don't list TOC sections (role="xxx_proto")
1068         if s.attrib.get('role', '').endswith("_proto"):
1069             continue
1070         # skip section without 'id' attrs
1071         if 'id' not in s.attrib:
1072             continue
1073
1074         title = xml_get_title(s)
1075         result.append("""
1076           <span class="dim">|</span>
1077           <a href="#%s" class="shortcut">%s</a>
1078           """ % (s.attrib['id'], title))
1079     result.append("""
1080     </td>
1081     %s
1082   </tr>
1083 </table>
1084 """ % generate_nav_links(ctx))
1085
1086
1087 def generate_footer(ctx):
1088     result = []
1089     if 'footnotes' in ctx:
1090         result.append("""<div class="footnotes">\n
1091 <br><hr style="width:100; text-align:left;margin-left: 0">
1092 """)
1093         for f in ctx['footnotes']:
1094             result.extend(f)
1095         result.append('</div>\n')
1096     return result
1097
1098
1099 def get_id(node):
1100     xml = node.xml
1101     node_id = xml.attrib.get('id', None)
1102     if node_id:
1103         return node_id
1104
1105     logging.info('%d: No "id" attribute on "%s", generating one',
1106                  xml.sourceline, xml.tag)
1107     ix = []
1108     # Generate the 'id'. We need to walk up the xml-tree and check the positions
1109     # for each sibling.
1110     parent = xml.getparent()
1111     while parent is not None:
1112         children = parent.getchildren()
1113         ix.insert(0, str(children.index(xml) + 1))
1114         xml = parent
1115         parent = xml.getparent()
1116     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1117     return 'id-1.' + '.'.join(ix)
1118
1119
1120 def convert_chunk_with_toc(ctx, div_class, title_tag):
1121     node = ctx['node']
1122     result = [
1123         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1124         generate_basic_nav(ctx),
1125         '<div class="%s">' % div_class,
1126     ]
1127     title = node.xml.find('title')
1128     if title is not None:
1129         result.append("""
1130 <div class="titlepage">
1131 <%s class="title"><a name="%s"></a>%s</%s>
1132 </div>""" % (
1133             title_tag, get_id(node), title.text, title_tag))
1134         node.xml.remove(title)
1135
1136     toc = generate_toc(ctx, node)
1137     if toc:
1138         # TODO: not all docbook page types use this extra heading
1139         result.append("""<p><b>Table of Contents</b></p>
1140     <div class="toc">
1141       <dl class="toc">
1142     """)
1143         result.extend(toc)
1144         result.append("""</dl>
1145     </div>
1146     """)
1147     convert_inner(ctx, node.xml, result)
1148     result.extend(generate_footer(ctx))
1149     result.append("""</div>
1150 </body>
1151 </html>""")
1152     return result
1153
1154
1155 # docbook chunks
1156
1157
1158 def convert_book(ctx):
1159     node = ctx['node']
1160     result = [
1161         HTML_HEADER % (node.title, generate_head_links(ctx)),
1162         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1163     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1164 </table>
1165 <div class="book">
1166 """ % node.title
1167     ]
1168     bookinfo = node.xml.findall('bookinfo')[0]
1169     # we already used the title
1170     title = bookinfo.find('title')
1171     if title is not None:
1172         bookinfo.remove(title)
1173     result.extend(convert_bookinfo(ctx, bookinfo))
1174     result.append("""<div class="toc">
1175   <dl class="toc">
1176 """)
1177     result.extend(generate_toc(ctx, node.root))
1178     result.append("""</dl>
1179 </div>
1180 """)
1181     result.extend(generate_footer(ctx))
1182     result.append("""</div>
1183 </body>
1184 </html>""")
1185     return result
1186
1187
1188 def convert_chapter(ctx):
1189     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1190
1191
1192 def convert_glossary(ctx):
1193     node = ctx['node']
1194     glossdivs = node.xml.findall('glossdiv')
1195
1196     result = [
1197         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1198         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1199         """<div class="glossary">
1200 <div class="titlepage"><h1 class="title">
1201 <a name="%s"></a>%s</h1>
1202 </div>""" % (get_id(node), node.title)
1203     ]
1204     for i in glossdivs:
1205         result.extend(convert_glossdiv(ctx, i))
1206     result.extend(generate_footer(ctx))
1207     result.append("""</div>
1208 </body>
1209 </html>""")
1210     return result
1211
1212
1213 def convert_index(ctx):
1214     node = ctx['node']
1215     # Get all indexdivs under indexdiv
1216     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1217
1218     result = [
1219         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1220         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1221         """<div class="index">
1222 <div class="titlepage"><h2 class="title">
1223 <a name="%s"></a>%s</h2>
1224 </div>""" % (get_id(node), node.title)
1225     ]
1226     for i in indexdivs:
1227         result.extend(convert_indexdiv(ctx, i))
1228     result.extend(generate_footer(ctx))
1229     result.append("""</div>
1230 </body>
1231 </html>""")
1232     return result
1233
1234
1235 def convert_part(ctx):
1236     return convert_chunk_with_toc(ctx, 'part', 'h1')
1237
1238
1239 def convert_preface(ctx):
1240     node = ctx['node']
1241     result = [
1242         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1243         generate_basic_nav(ctx),
1244         '<div class="preface">'
1245     ]
1246     title = node.xml.find('title')
1247     if title is not None:
1248         result.append("""
1249 <div class="titlepage">
1250 <h2 class="title"><a name="%s"></a>%s</h2>
1251 </div>""" % (get_id(node), title.text))
1252         node.xml.remove(title)
1253     convert_inner(ctx, node.xml, result)
1254     result.extend(generate_footer(ctx))
1255     result.append("""</div>
1256 </body>
1257 </html>""")
1258     return result
1259
1260
1261 def convert_reference(ctx):
1262     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1263
1264
1265 def convert_refentry(ctx):
1266     node = ctx['node']
1267     node_id = get_id(node)
1268     refsect1s = node.xml.findall('refsect1')
1269
1270     result = [
1271         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1272     ]
1273     generate_refentry_nav(ctx, refsect1s, result)
1274     result.append("""
1275 <div class="refentry">
1276 <a name="%s"></a>
1277 <div class="refnamediv">
1278   <table width="100%%"><tr>
1279     <td valign="top">
1280       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1281       <p>%s — module for gtk-doc unit test</p>
1282     </td>
1283     <td class="gallery_image" valign="top" align="right"></td>
1284   </tr></table>
1285 </div>
1286 """ % (node_id, node_id, node.title, node.title))
1287
1288     for s in refsect1s:
1289         result.extend(convert_refsect1(ctx, s))
1290     result.extend(generate_footer(ctx))
1291     result.append("""</div>
1292 </body>
1293 </html>""")
1294     return result
1295
1296
1297 def convert_sect1(ctx):
1298     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1299
1300
1301 # TODO(ensonic): turn into class with converters as functions and ctx as self
1302 convert_chunks = {
1303     'book': convert_book,
1304     'chapter': convert_chapter,
1305     'glossary': convert_glossary,
1306     'index': convert_index,
1307     'part': convert_part,
1308     'preface': convert_preface,
1309     'reference': convert_reference,
1310     'refentry': convert_refentry,
1311     'sect1': convert_sect1,
1312 }
1313
1314
1315 def generate_nav_nodes(files, node):
1316     nav = {
1317         'nav_home': node.root,
1318     }
1319     # nav params: up, prev, next
1320     if node.parent:
1321         nav['nav_up'] = node.parent
1322     ix = files.index(node)
1323     if ix > 0:
1324         nav['nav_prev'] = files[ix - 1]
1325     if ix < len(files) - 1:
1326         nav['nav_next'] = files[ix + 1]
1327     return nav
1328
1329
1330 def convert(out_dir, module, files, node):
1331     """Convert the docbook chunks to a html file.
1332
1333     Args:
1334       out_dir: already created output dir
1335       files: list of nodes in the tree in pre-order
1336       node: current tree node
1337     """
1338
1339     logging.info('Writing: %s', node.filename)
1340     with open(os.path.join(out_dir, node.filename), 'wt',
1341               newline='\n', encoding='utf-8') as html:
1342         ctx = {
1343             'module': module,
1344             'files': files,
1345             'node': node,
1346         }
1347         ctx.update(generate_nav_nodes(files, node))
1348
1349         if node.name in convert_chunks:
1350             for line in convert_chunks[node.name](ctx):
1351                 html.write(line)
1352         else:
1353             logging.warning('Add converter/template for "%s"', node.name)
1354
1355
1356 def create_devhelp2_toc(node):
1357     result = []
1358     for c in node.children:
1359         if c.children:
1360             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1361             result.extend(create_devhelp2_toc(c))
1362             result.append('</sub>\n')
1363         else:
1364             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1365     return result
1366
1367
1368 def create_devhelp2_condition_attribs(node):
1369     if 'condition' in node.attrib:
1370         # condition -> since, deprecated, ... (separated with '|')
1371         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1372         keywords = []
1373         for c in cond:
1374             if ':' in c:
1375                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1376             else:
1377                 # deprecated can have no description
1378                 keywords.append('{}="{}"'.format(c, ''))
1379         return ' ' + ' '.join(keywords)
1380     else:
1381         return ''
1382
1383
1384 def create_devhelp2_refsect2_keyword(node, base_link):
1385     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1386         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1387         create_devhelp2_condition_attribs(node))
1388
1389
1390 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1391     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1392         node.attrib['role'], title, base_link + name,
1393         create_devhelp2_condition_attribs(node))
1394
1395
1396 def create_devhelp2(out_dir, module, xml, files):
1397     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1398               newline='\n', encoding='utf-8') as idx:
1399         bookinfo_nodes = xml.xpath('/book/bookinfo')
1400         title = ''
1401         if bookinfo_nodes is not None:
1402             bookinfo = bookinfo_nodes[0]
1403             title = bookinfo.xpath('./title/text()')[0]
1404             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1405             # TODO: support author too (see devhelp2.xsl)
1406         # TODO: fixxref uses '--src-lang' to set the language
1407         result = [
1408             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1409 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1410   <chapters>
1411 """ % (title, module, online_url)
1412         ]
1413         # toc
1414         result.extend(create_devhelp2_toc(files[0].root))
1415         result.append("""  </chapters>
1416   <functions>
1417 """)
1418         # keywords from all refsect2 and refsect3
1419         refsect2 = etree.XPath('//refsect2[@role]')
1420         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1421         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1422         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1423         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1424         for node in files:
1425             base_link = node.filename + '#'
1426             refsect2_nodes = refsect2(node.xml)
1427             for refsect2_node in refsect2_nodes:
1428                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1429                 refsect3_nodes = refsect3_enum(refsect2_node)
1430                 for refsect3_node in refsect3_nodes:
1431                     details_node = refsect3_enum_details(refsect3_node)[0]
1432                     name = details_node.attrib['id']
1433                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1434                 refsect3_nodes = refsect3_struct(refsect2_node)
1435                 for refsect3_node in refsect3_nodes:
1436                     details_node = refsect3_struct_details(refsect3_node)[0]
1437                     name = details_node.attrib['id']
1438                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1439
1440         result.append("""  </functions>
1441 </book>
1442 """)
1443         for line in result:
1444             idx.write(line)
1445
1446
1447 def get_dirs(uninstalled):
1448     if uninstalled:
1449         # this does not work from buiddir!=srcdir
1450         gtkdocdir = os.path.split(sys.argv[0])[0]
1451         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1452             # try 'srcdir' (set from makefiles) too
1453             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1454                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1455         styledir = gtkdocdir + '/style'
1456     else:
1457         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1458         styledir = gtkdocdir
1459     return (gtkdocdir, styledir)
1460
1461
1462 def main(module, index_file, out_dir, uninstalled):
1463     tree = etree.parse(index_file)
1464     tree.xinclude()
1465
1466     (gtkdocdir, styledir) = get_dirs(uninstalled)
1467     # copy navigation images and stylesheets to html directory ...
1468     css_file = os.path.join(styledir, 'style.css')
1469     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1470         shutil.copy(f, out_dir)
1471     css_file = os.path.join(out_dir, 'style.css')
1472     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1473         css.write(HTML_FORMATTER.get_style_defs())
1474
1475     # TODO: migrate options from fixxref
1476     # TODO: do in parallel with loading the xml above.
1477     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1478
1479     # We do multiple passes:
1480     # 1) recursively walk the tree and chunk it into a python tree so that we
1481     #   can generate navigation and link tags.
1482     files = chunk(tree.getroot())
1483     files = list(PreOrderIter(files))
1484     # 2) extract tables:
1485     # TODO: use multiprocessing
1486     # - find all 'id' attribs and add them to the link map
1487     add_id_links(files, fixxref.Links)
1488     # - build glossary dict
1489     build_glossary(files)
1490
1491     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1492     create_devhelp2(out_dir, module, tree.getroot(), files)
1493     # 4) iterate the tree and output files
1494     # TODO: use multiprocessing
1495     for node in files:
1496         convert(out_dir, module, files, node)
1497
1498
1499 def run(options):
1500     logging.info('options: %s', str(options.__dict__))
1501     module = options.args[0]
1502     document = options.args[1]
1503
1504     # TODO: rename to 'html' later on
1505     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1506     #   outputs into the working directory
1507     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1508     try:
1509         os.mkdir(out_dir)
1510     except OSError as e:
1511         if e.errno != errno.EEXIST:
1512             raise
1513
1514     sys.exit(main(module, document, out_dir, options.uninstalled))