gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - 'section'/'simplesect' - the first we convert as a chunk, the nested ones we
  40     need to convert as 'sect{2,3,4,...}, we can track depth in 'ctx'
  41   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  42 - check each docbook tag if it can contain #PCDATA, if not don't check for
  43   xml.text/xml.tail and add a comment (# no PCDATA allowed here)
  44 - consider some perf-warnings flag
  45   - see 'No "id" attribute on'
  46 - find a better way to print context for warnings
  47   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  48 - copy images
  49   - do we need to find them on the respective tags (inlinegraphic, imageobject)
  50     and search them in the path setup by '--path'
  51 - commandline options
  52   - mkhtml:
  53     --path 'Extra source directories' - used to find images
  54   - fixxref:
  55 - consolidate title handling:
  56   - always use the titles-dict
  57   - there only store what we have (xml, tag, ...)
  58   - when chunking generate 'id's and add entries to titles-dict
  59   - add accessors for title and raw_title that lazily get them
  60
  61 DIFFERENCES:
  62 - titles
  63   - we add the chunk label to the title in toc, on the page and in nav tooltips
  64   - docbook xsl only sometimes adds the label to the titles and when it does it
  65     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  66 - navigation
  67   - we always add an up-link except on the first page
  68 - footer
  69   - we're nov omitting the footer
  70 - tocs
  71   - we always add "Table of Contents' before a toc
  72   - docbook does that for some pages, it is configurable
  73
  74 OPTIONAL:
  75 - minify html: https://pypi.python.org/pypi/htmlmin/
  76
  77 Requirements:
  78 sudo pip3 install anytree lxml pygments
  79
  80 Example invocation:
  81 cd tests/bugs/docs/
  82 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  83 xdg-open db2html/index.html
  84 meld html db2html
  85
  86 Benchmarking:
  87 cd tests/bugs/docs/;
  88 rm html-build.stamp; time make html-build.stamp
  89 """
  90
  91 import argparse
  92 import errno
  93 import logging
  94 import os
  95 import shutil
  96 import sys
  97
  98 from anytree import Node, PreOrderIter
  99 from copy import deepcopy
 100 from glob import glob
 101 from lxml import etree
 102 from pygments import highlight
 103 from pygments.lexers import CLexer
 104 from pygments.formatters import HtmlFormatter
 105 from timeit import default_timer as timer
 106
 107 from . import config, fixxref
 108
 109 # pygments setup
 110 # lazily constructed lexer cache
 111 LEXERS = {
 112     'c': CLexer()
 113 }
 114 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 115
 116
 117 class ChunkParams(object):
 118     def __init__(self, prefix, parent=None, min_idx=0):
 119         self.prefix = prefix
 120         self.parent = parent
 121         self.min_idx = min_idx
 122         self.idx = 1
 123
 124
 125 DONT_CHUNK = float('inf')
 126 # docbook-xsl defines the chunk tags here.
 127 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 128 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 129 # If not defined, we can just create an example without an 'id' attr and see
 130 # docbook xsl does.
 131 #
 132 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 133 # TODO: this list has also a flag that controls wheter we add the
 134 # 'Table of Contents' heading in convert_chunk_with_toc()
 135 CHUNK_PARAMS = {
 136     'appendix': ChunkParams('app', 'book'),
 137     'book': ChunkParams('bk'),
 138     'chapter': ChunkParams('ch', 'book'),
 139     'glossary': ChunkParams('go', 'book'),
 140     'index': ChunkParams('ix', 'book'),
 141     'part': ChunkParams('pt', 'book'),
 142     'preface': ChunkParams('pr', 'book'),
 143     'refentry': ChunkParams('re', 'book'),
 144     'reference': ChunkParams('rn', 'book'),
 145     'sect1': ChunkParams('s', 'chapter', 1),
 146     'section': ChunkParams('s', 'chapter', 1),
 147     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 148     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 149     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 150     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 151 }
 152 # TAGS we don't support:
 153 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 154
 155 TITLE_XPATHS = {
 156     '_': (etree.XPath('./title'), None),
 157     'book': (etree.XPath('./bookinfo/title'), None),
 158     'refentry': (
 159         etree.XPath('./refmeta/refentrytitle'),
 160         etree.XPath('./refnamediv/refpurpose')
 161     ),
 162 }
 163
 164 ID_XPATH = etree.XPath('//*[@id]')
 165
 166 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 167 glossary = {}
 168
 169 footnote_idx = 1
 170
 171 # nested dict with subkeys:
 172 # title: textual title
 173 # tag: chunk tag
 174 # xml: title xml node
 175 titles = {}
 176
 177
 178 def encode_entities(text):
 179     return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 180
 181
 182 def raw_text(xml):
 183     return etree.tostring(xml, method="text", encoding=str).strip()
 184
 185
 186 def gen_chunk_name(node, chunk_params):
 187     """Generate a chunk file name
 188
 189     This is either based on the id or on the position in the doc. In the latter
 190     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 191     type.
 192     """
 193     if 'id' in node.attrib:
 194         return node.attrib['id']
 195
 196     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 197     chunk_params.idx += 1
 198
 199     # handle parents to make names of nested tags like in docbook
 200     # - we only need to prepend the parent if there are > 1 of them in the
 201     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 202     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 203     #   when we chunk explicitly and on each level maintain the 'idx'
 204     # while chunk_params.parent:
 205     #     parent = chunk_params.parent
 206     #     if parent not in CHUNK_PARAMS:
 207     #         break;
 208     #     chunk_params = CHUNK_PARAMS[parent]
 209     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 210
 211     logging.info('Gen chunk name: "%s"', name)
 212     return name
 213
 214
 215 def get_chunk_titles(module, node):
 216     tag = node.tag
 217     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 218
 219     ctx = {
 220         'module': module,
 221         'files': [],
 222     }
 223     result = {
 224         'title': None,
 225         'title_tag': None,
 226         'subtitle': None,
 227         'subtitle_tag': None
 228     }
 229     res = title(node)
 230     if res:
 231         # handle chunk label for tocs
 232         label = node.attrib.get('label')
 233         if label:
 234             label += '. '
 235         else:
 236             label = ''
 237
 238         xml = res[0]
 239         # TODO: consider to eval 'title'/'raw_title' lazily
 240         result['title'] = label + ''.join(convert_title(ctx, xml))
 241         result['raw_title'] = encode_entities(raw_text(xml))
 242         if xml.tag != 'title':
 243             result['title_tag'] = xml.tag
 244         else:
 245             result['title_tag'] = tag
 246
 247     if subtitle:
 248         res = subtitle(node)
 249         if res:
 250             xml = res[0]
 251             result['subtitle'] = ''.join(convert_title(ctx, xml))
 252             result['subtitle_tag'] = xml.tag
 253     return result
 254
 255
 256 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 257     """Chunk the tree.
 258
 259     The first time, we're called with parent=None and in that case we return
 260     the new_node as the root of the tree. For each tree-node we generate a
 261     filename and process the children.
 262     """
 263     tag = xml_node.tag
 264     chunk_params = CHUNK_PARAMS.get(tag)
 265     if chunk_params:
 266         title_args = get_chunk_titles(module, xml_node)
 267         chunk_name = gen_chunk_name(xml_node, chunk_params)
 268
 269         # check idx to handle 'sect1'/'section' special casing and title-only
 270         # segments
 271         if idx >= chunk_params.min_idx:
 272             logging.info('chunk tag: "%s"[%d]', tag, idx)
 273             if parent:
 274                 # remove the xml-node from the parent
 275                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 276                 xml_node.getparent().remove(xml_node)
 277                 xml_node = sub_tree
 278
 279             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 280                           idx=idx,
 281                           filename=chunk_name + '.html', anchor=None,
 282                           **title_args)
 283         else:
 284             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 285                           idx=idx,
 286                           filename=parent.filename, anchor='#' + chunk_name,
 287                           **title_args)
 288
 289         depth += 1
 290         idx = 0
 291         for child in xml_node:
 292             chunk(child, module, depth, idx, parent)
 293             if child.tag in CHUNK_PARAMS:
 294                 idx += 1
 295
 296     return parent
 297
 298
 299 def add_id_links_and_titles(files, links):
 300     for node in files:
 301         chunk_name = node.filename[:-5]
 302         chunk_base = node.filename + '#'
 303         for elem in ID_XPATH(node.xml):
 304             attr = elem.attrib['id']
 305             if attr == chunk_name:
 306                 links[attr] = node.filename
 307             else:
 308                 links[attr] = chunk_base + attr
 309
 310             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 311             res = title(elem)
 312             if res:
 313                 xml = res[0]
 314                 # TODO: consider to eval 'title' lazily
 315                 titles[attr] = {
 316                     'title': encode_entities(raw_text(xml)),
 317                     'xml': xml,
 318                     'tag': elem.tag,
 319                 }
 320
 321
 322 def build_glossary(files):
 323     for node in files:
 324         if node.xml.tag != 'glossary':
 325             continue
 326         for term in GLOSSENTRY_XPATH(node.xml):
 327             # TODO: there can be all kind of things in a glossary. This only supports
 328             # what we commonly use, glossterm is mandatory
 329             key_node = term.find('glossterm')
 330             val_node = term.find('glossdef')
 331             if key_node is not None and val_node is not None:
 332                 glossary[raw_text(key_node)] = raw_text(val_node)
 333             else:
 334                 debug = []
 335                 if key_node is None:
 336                     debug.append('missing key')
 337                 if val_node is None:
 338                     debug.append('missing val')
 339                 logging.warning('Broken glossentry "%s": %s',
 340                                 term.attrib['id'], ','.join(debug))
 341
 342
 343 # conversion helpers
 344
 345
 346 def convert_inner(ctx, xml, result):
 347     for child in xml:
 348         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 349
 350
 351 def convert_ignore(ctx, xml):
 352     result = []
 353     convert_inner(ctx, xml, result)
 354     return result
 355
 356
 357 def convert_skip(ctx, xml):
 358     return []
 359
 360
 361 def append_idref(attrib, result):
 362     if 'id' in attrib:
 363         result.append('<a name="%s"></a>' % attrib['id'])
 364
 365
 366 def append_text(ctx, text, result):
 367     if text and ('no-strip' in ctx or text.strip()):
 368         result.append(encode_entities(text))
 369
 370
 371 missing_tags = {}
 372
 373
 374 def convert__unknown(ctx, xml):
 375     # don't recurse on subchunks
 376     if xml.tag in CHUNK_PARAMS:
 377         return []
 378     if isinstance(xml, etree._Comment):
 379         return ['<!-- ' + xml.text + '-->\n']
 380     else:
 381         # warn only once
 382         if xml.tag not in missing_tags:
 383             logging.warning('Add tag converter for "%s"', xml.tag)
 384             missing_tags[xml.tag] = True
 385         result = ['<!-- ' + xml.tag + '-->\n']
 386         convert_inner(ctx, xml, result)
 387         result.append('<!-- /' + xml.tag + '-->\n')
 388         return result
 389
 390
 391 def convert_mediaobject_children(ctx, xml, result):
 392     # look for textobject/phrase
 393     alt_text = ''
 394     textobject = xml.find('textobject')
 395     if textobject is not None:
 396         phrase = textobject.findtext('phrase')
 397         if phrase:
 398             alt_text = ' alt="%s"' % phrase
 399
 400     # look for imageobject/imagedata
 401     imageobject = xml.find('imageobject')
 402     if imageobject is not None:
 403         imagedata = imageobject.find('imagedata')
 404         if imagedata is not None:
 405             # TODO(ensonic): warn on missing fileref attr?
 406             result.append('<img src="%s"%s>' % (
 407                           imagedata.attrib.get('fileref', ''), alt_text))
 408
 409
 410 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 411     result = ['<div class="%s">\n' % xml.tag]
 412     title_tag = xml.find('title')
 413     if title_tag is not None:
 414         if 'id' in xml.attrib:
 415             result.append('<a name="%s"></a>' % xml.attrib['id'])
 416         result.append('<%s>%s</%s>' % (
 417             h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
 418     append_text(ctx, xml.text, result)
 419     inner_func(ctx, xml, result)
 420     result.append('</div>')
 421     append_text(ctx, xml.tail, result)
 422     return result
 423
 424
 425 def xml_get_title(ctx, xml):
 426     title_tag = xml.find('title')
 427     if title_tag is not None:
 428         return ''.join(convert_title(ctx, title_tag))
 429     else:
 430         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 431         return ''
 432
 433
 434 # docbook tags
 435
 436
 437 def convert_abstract(ctx, xml):
 438     result = ["""<div class="abstract">
 439     <p class="title"><b>Abstract</b></p>"""]
 440     append_text(ctx, xml.text, result)
 441     convert_inner(ctx, xml, result)
 442     result.append('</div>')
 443     append_text(ctx, xml.tail, result)
 444     return result
 445
 446
 447 def convert_acronym(ctx, xml):
 448     key = xml.text
 449     title = glossary.get(key, '')
 450     # TODO: print a sensible warning if missing
 451     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 452     if xml.tail:
 453         result.append(xml.tail)
 454     return result
 455
 456
 457 def convert_anchor(ctx, xml):
 458     return ['<a name="%s"></a>' % xml.attrib['id']]
 459
 460
 461 def convert_bookinfo(ctx, xml):
 462     result = ['<div class="titlepage">']
 463     convert_inner(ctx, xml, result)
 464     result.append("""<hr>
 465 </div>""")
 466     if xml.tail:
 467         result.append(xml.tail)
 468     return result
 469
 470
 471 def convert_blockquote(ctx, xml):
 472     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 473     append_text(ctx, xml.text, result)
 474     convert_inner(ctx, xml, result)
 475     result.append('</blockquote>\n</div>')
 476     append_text(ctx, xml.tail, result)
 477     return result
 478
 479
 480 def convert_code(ctx, xml):
 481     result = ['<code class="%s">' % xml.tag]
 482     append_text(ctx, xml.text, result)
 483     convert_inner(ctx, xml, result)
 484     result.append('</code>')
 485     append_text(ctx, xml.tail, result)
 486     return result
 487
 488
 489 def convert_colspec(ctx, xml):
 490     result = ['<col']
 491     a = xml.attrib
 492     if 'colname' in a:
 493         result.append(' class="%s"' % a['colname'])
 494     if 'colwidth' in a:
 495         result.append(' width="%s"' % a['colwidth'])
 496     result.append('>\n')
 497     # is in tgroup and there can be no 'text'
 498     return result
 499
 500
 501 def convert_command(ctx, xml):
 502     result = ['<strong class="userinput"><code>']
 503     append_text(ctx, xml.text, result)
 504     convert_inner(ctx, xml, result)
 505     result.append('</code></strong>')
 506     append_text(ctx, xml.tail, result)
 507     return result
 508
 509
 510 def convert_corpauthor(ctx, xml):
 511     result = ['<div><h3 class="corpauthor">\n']
 512     append_text(ctx, xml.text, result)
 513     convert_inner(ctx, xml, result)
 514     result.append('</h3></div>\n')
 515     append_text(ctx, xml.tail, result)
 516     return result
 517
 518
 519 def convert_div(ctx, xml):
 520     result = ['<div class="%s">\n' % xml.tag]
 521     append_text(ctx, xml.text, result)
 522     convert_inner(ctx, xml, result)
 523     result.append('</div>')
 524     append_text(ctx, xml.tail, result)
 525     return result
 526
 527
 528 def convert_emphasis(ctx, xml):
 529     if 'role' in xml.attrib:
 530         result = ['<span class="%s">' % xml.attrib['role']]
 531         end = '</span>'
 532     else:
 533         result = ['<span class="emphasis"><em>']
 534         end = '</em></span>'
 535     append_text(ctx, xml.text, result)
 536     convert_inner(ctx, xml, result)
 537     result.append(end)
 538     append_text(ctx, xml.tail, result)
 539     return result
 540
 541
 542 def convert_em(ctx, xml):
 543     result = ['<em class="%s">' % xml.tag]
 544     append_text(ctx, xml.text, result)
 545     convert_inner(ctx, xml, result)
 546     result.append('</em>')
 547     append_text(ctx, xml.tail, result)
 548     return result
 549
 550
 551 def convert_em_code(ctx, xml):
 552     result = ['<em class="%s"><code>' % xml.tag]
 553     append_idref(xml.attrib, result)
 554     append_text(ctx, xml.text, result)
 555     convert_inner(ctx, xml, result)
 556     result.append('</code></em>')
 557     append_text(ctx, xml.tail, result)
 558     return result
 559
 560
 561 def convert_entry(ctx, xml):
 562     entry_type = ctx['table.entry']
 563     result = ['<' + entry_type]
 564     if 'role' in xml.attrib:
 565         result.append(' class="%s"' % xml.attrib['role'])
 566     if 'morerows' in xml.attrib:
 567         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 568     result.append('>')
 569     append_text(ctx, xml.text, result)
 570     convert_inner(ctx, xml, result)
 571     result.append('</' + entry_type + '>')
 572     append_text(ctx, xml.tail, result)
 573     return result
 574
 575
 576 def convert_footnote(ctx, xml):
 577     footnotes = ctx.get('footnotes', [])
 578     # footnotes idx is not per page, but per doc
 579     global footnote_idx
 580     idx = footnote_idx
 581     footnote_idx += 1
 582
 583     # need a pair of ids for each footnote (docbook generates different ids)
 584     this_id = 'footnote-%d' % idx
 585     that_id = 'ftn.' + this_id
 586
 587     inner = ['<div id="%s" class="footnote">' % that_id]
 588     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 589         this_id, idx))
 590     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 591     # get double nested paras :/.
 592     # convert_inner(ctx, xml, inner)
 593     para = xml.find('para')
 594     if para is None:
 595         para = xml.find('simpara')
 596     if para is not None:
 597         inner.append(para.text)
 598     else:
 599         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline, raw_text(xml))
 600     inner.append('</p></div>')
 601     footnotes.append(inner)
 602     ctx['footnotes'] = footnotes
 603     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 604         that_id, this_id, idx)]
 605
 606
 607 def convert_formalpara(ctx, xml):
 608     result = None
 609     title_tag = xml.find('title')
 610     result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
 611     para_tag = xml.find('para')
 612     append_text(ctx, para_tag.text, result)
 613     convert_inner(ctx, para_tag, result)
 614     append_text(ctx, para_tag.tail, result)
 615     result.append('</p>')
 616     append_text(ctx, xml.tail, result)
 617     return result
 618
 619
 620 def convert_glossdef(ctx, xml):
 621     result = ['<dd class="glossdef">']
 622     convert_inner(ctx, xml, result)
 623     result.append('</dd>\n')
 624     return result
 625
 626
 627 def convert_glossdiv(ctx, xml):
 628     title_tag = xml.find('title')
 629     title = title_tag.text
 630     xml.remove(title_tag)
 631     result = [
 632         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 633     ]
 634     convert_inner(ctx, xml, result)
 635     return result
 636
 637
 638 def convert_glossentry(ctx, xml):
 639     result = []
 640     convert_inner(ctx, xml, result)
 641     return result
 642
 643
 644 def convert_glossterm(ctx, xml):
 645     glossid = ''
 646     text = ''
 647     anchor = xml.find('anchor')
 648     if anchor is not None:
 649         glossid = anchor.attrib.get('id', '')
 650         text += anchor.tail or ''
 651     text += xml.text or ''
 652     if glossid == '':
 653         glossid = 'glossterm-' + text
 654     return [
 655         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 656             glossid, text)
 657     ]
 658
 659
 660 def convert_indexdiv(ctx, xml):
 661     title_tag = xml.find('title')
 662     title = title_tag.text
 663     xml.remove(title_tag)
 664     result = [
 665         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 666     ]
 667     convert_inner(ctx, xml, result)
 668     return result
 669
 670
 671 def convert_informaltable(ctx, xml):
 672     result = ['<div class="informaltable"><table class="informaltable"']
 673     a = xml.attrib
 674     if 'pgwide' in a and a['pgwide'] == '1':
 675         result.append(' width="100%"')
 676     if 'frame' in a and a['frame'] == 'none':
 677         result.append(' border="0"')
 678     result.append('>\n')
 679     convert_inner(ctx, xml, result)
 680     result.append('</table></div>')
 681     if xml.tail:
 682         result.append(xml.tail)
 683     return result
 684
 685
 686 def convert_inlinegraphic(ctx, xml):
 687     # TODO(ensonic): warn on missing fileref attr?
 688     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 689
 690
 691 def convert_inlinemediaobject(ctx, xml):
 692     result = ['<span class="inlinemediaobject">']
 693     # no PCDATA allowed here
 694     convert_mediaobject_children(ctx, xml, result)
 695     result.append('</span>')
 696     append_text(ctx, xml.tail, result)
 697     return result
 698
 699
 700 def convert_itemizedlist(ctx, xml):
 701     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 702     convert_inner(ctx, xml, result)
 703     result.append('</ul></div>')
 704     if xml.tail:
 705         result.append(xml.tail)
 706     return result
 707
 708
 709 def convert_link(ctx, xml):
 710     linkend = xml.attrib['linkend']
 711     result = []
 712     if linkend:
 713         link_text = []
 714         append_text(ctx, xml.text, link_text)
 715         convert_inner(ctx, xml, link_text)
 716         text = ''.join(link_text)
 717
 718         (tid, href) = fixxref.GetXRef(linkend)
 719         if href:
 720             title_attr = ''
 721             title = titles.get(tid)
 722             if title:
 723                 title_attr = ' title="%s"' % title['title']
 724
 725             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 726             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 727         else:
 728             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 729             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 730             result = [text]
 731     else:
 732         append_text(ctx, xml.text, result)
 733         convert_inner(ctx, xml, result)
 734     append_text(ctx, xml.tail, result)
 735     return result
 736
 737
 738 def convert_listitem(ctx, xml):
 739     result = ['<li class="listitem">']
 740     convert_inner(ctx, xml, result)
 741     result.append('</li>')
 742     # no PCDATA allowed here, is in itemizedlist
 743     return result
 744
 745
 746 def convert_literallayout(ctx, xml):
 747     result = ['<div class="literallayout"><p><br>\n']
 748     append_text(ctx, xml.text, result)
 749     convert_inner(ctx, xml, result)
 750     result.append('</p></div>')
 751     append_text(ctx, xml.tail, result)
 752     return result
 753
 754
 755 def convert_mediaobject(ctx, xml):
 756     result = ['<div class="mediaobject">\n']
 757     # no PCDATA allowed here
 758     convert_mediaobject_children(ctx, xml, result)
 759     result.append('</div>')
 760     append_text(ctx, xml.tail, result)
 761     return result
 762
 763
 764 def convert_orderedlist(ctx, xml):
 765     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 766     convert_inner(ctx, xml, result)
 767     result.append('</ol></div>')
 768     append_text(ctx, xml.tail, result)
 769     return result
 770
 771
 772 def convert_para(ctx, xml):
 773     result = []
 774     if 'role' in xml.attrib:
 775         result.append('<p class="%s">' % xml.attrib['role'])
 776     else:
 777         result.append('<p>')
 778     if 'id' in xml.attrib:
 779         result.append('<a name="%s"></a>' % xml.attrib['id'])
 780     append_text(ctx, xml.text, result)
 781     convert_inner(ctx, xml, result)
 782     result.append('</p>')
 783     append_text(ctx, xml.tail, result)
 784     return result
 785
 786
 787 def convert_para_like(ctx, xml):
 788     result = []
 789     if 'id' in xml.attrib:
 790         result.append('<a name="%s"></a>' % xml.attrib['id'])
 791     result.append('<p class="%s">' % xml.tag)
 792     append_text(ctx, xml.text, result)
 793     convert_inner(ctx, xml, result)
 794     result.append('</p>')
 795     append_text(ctx, xml.tail, result)
 796     return result
 797
 798
 799 def convert_phrase(ctx, xml):
 800     result = ['<span']
 801     if 'role' in xml.attrib:
 802         result.append(' class="%s">' % xml.attrib['role'])
 803     else:
 804         result.append('>')
 805     append_text(ctx, xml.text, result)
 806     convert_inner(ctx, xml, result)
 807     result.append('</span>')
 808     append_text(ctx, xml.tail, result)
 809     return result
 810
 811
 812 def convert_primaryie(ctx, xml):
 813     result = ['<dt>\n']
 814     convert_inner(ctx, xml, result)
 815     result.append('\n</dt>\n<dd></dd>\n')
 816     return result
 817
 818
 819 def convert_pre(ctx, xml):
 820     # Since we're inside <pre> don't skip newlines
 821     ctx['no-strip'] = True
 822     result = ['<pre class="%s">' % xml.tag]
 823     append_text(ctx, xml.text, result)
 824     convert_inner(ctx, xml, result)
 825     result.append('</pre>')
 826     del ctx['no-strip']
 827     append_text(ctx, xml.tail, result)
 828     return result
 829
 830
 831 def convert_programlisting(ctx, xml):
 832     result = []
 833     if xml.attrib.get('role', '') == 'example':
 834         if xml.text:
 835             lang = xml.attrib.get('language', ctx['src-lang']).lower()
 836             if lang not in LEXERS:
 837                 LEXERS[lang] = get_lexer_by_name(lang)
 838             lexer = LEXERS.get(lang, None)
 839             if lexer:
 840                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 841
 842                 # we do own line-numbering
 843                 line_count = highlighted.count('\n')
 844                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 845                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 846   <tbody>
 847     <tr>
 848       <td class="listing_lines" align="right"><pre>%s</pre></td>
 849       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 850     </tr>
 851   </tbody>
 852 </table>
 853 """ % (source_lines, highlighted))
 854             else:
 855                 logging.warn('No pygments lexer for language="%s"', lang)
 856                 result.append('<pre class="programlisting">')
 857                 result.append(xml.text)
 858                 result.append('</pre>')
 859     else:
 860         result.append('<pre class="programlisting">')
 861         append_text(ctx, xml.text, result)
 862         convert_inner(ctx, xml, result)
 863         result.append('</pre>')
 864     append_text(ctx, xml.tail, result)
 865     return result
 866
 867
 868 def convert_quote(ctx, xml):
 869     result = ['<span class="quote">"<span class="quote">']
 870     append_text(ctx, xml.text, result)
 871     convert_inner(ctx, xml, result)
 872     result.append('</span>"</span>')
 873     append_text(ctx, xml.tail, result)
 874     return result
 875
 876
 877 def convert_refsect1(ctx, xml):
 878     # Add a divider between two consequitive refsect2
 879     def convert_inner(ctx, xml, result):
 880         prev = None
 881         for child in xml:
 882             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 883                 result.append('<hr>\n')
 884             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 885             prev = child
 886     return convert_sect(ctx, xml, 'h2', convert_inner)
 887
 888
 889 def convert_refsect2(ctx, xml):
 890     return convert_sect(ctx, xml, 'h3')
 891
 892
 893 def convert_refsect3(ctx, xml):
 894     return convert_sect(ctx, xml, 'h4')
 895
 896
 897 def convert_row(ctx, xml):
 898     result = ['<tr>\n']
 899     convert_inner(ctx, xml, result)
 900     result.append('</tr>\n')
 901     return result
 902
 903
 904 def convert_sbr(ctx, xml):
 905     return ['<br>']
 906
 907
 908 def convert_sect1_tag(ctx, xml):
 909     return convert_sect(ctx, xml, 'h2')
 910
 911
 912 def convert_sect2(ctx, xml):
 913     return convert_sect(ctx, xml, 'h3')
 914
 915
 916 def convert_sect3(ctx, xml):
 917     return convert_sect(ctx, xml, 'h4')
 918
 919
 920 def convert_simpara(ctx, xml):
 921     result = ['<p>']
 922     append_text(ctx, xml.text, result)
 923     convert_inner(ctx, xml, result)
 924     result.append('</p>')
 925     append_text(ctx, xml.tail, result)
 926     return result
 927
 928
 929 def convert_span(ctx, xml):
 930     result = ['<span class="%s">' % xml.tag]
 931     append_text(ctx, xml.text, result)
 932     convert_inner(ctx, xml, result)
 933     result.append('</span>')
 934     append_text(ctx, xml.tail, result)
 935     return result
 936
 937
 938 def convert_table(ctx, xml):
 939     result = ['<div class="table">']
 940     append_idref(xml.attrib, result)
 941     title_tag = xml.find('title')
 942     if title_tag is not None:
 943         result.append('<p class="title"><b>')
 944         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 945         result.extend(convert_title(ctx, title_tag))
 946         result.append('</b></p>')
 947     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 948
 949     convert_inner(ctx, xml, result)
 950
 951     result.append('</table></div></div>')
 952     append_text(ctx, xml.tail, result)
 953     return result
 954
 955
 956 def convert_tbody(ctx, xml):
 957     result = ['<tbody>']
 958     ctx['table.entry'] = 'td'
 959     convert_inner(ctx, xml, result)
 960     result.append('</tbody>')
 961     # is in tgroup and there can be no 'text'
 962     return result
 963
 964
 965 def convert_tgroup(ctx, xml):
 966     # tgroup does not expand to anything, but the nested colspecs need to
 967     # be put into a colgroup
 968     cols = xml.findall('colspec')
 969     result = []
 970     if cols:
 971         result.append('<colgroup>\n')
 972         for col in cols:
 973             result.extend(convert_colspec(ctx, col))
 974             xml.remove(col)
 975         result.append('</colgroup>\n')
 976     convert_inner(ctx, xml, result)
 977     # is in informaltable and there can be no 'text'
 978     return result
 979
 980
 981 def convert_thead(ctx, xml):
 982     result = ['<thead>']
 983     ctx['table.entry'] = 'th'
 984     convert_inner(ctx, xml, result)
 985     result.append('</thead>')
 986     # is in tgroup and there can be no 'text'
 987     return result
 988
 989
 990 def convert_title(ctx, xml):
 991     # This is always explicitly called from some context
 992     result = []
 993     append_text(ctx, xml.text, result)
 994     convert_inner(ctx, xml, result)
 995     append_text(ctx, xml.tail, result)
 996     return result
 997
 998
 999 def convert_ulink(ctx, xml):
1000     if xml.text:
1001         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
1002     else:
1003         url = xml.attrib['url']
1004         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
1005     append_text(ctx, xml.tail, result)
1006     return result
1007
1008
1009 def convert_userinput(ctx, xml):
1010     result = ['<span class="command"><strong>']
1011     append_text(ctx, xml.text, result)
1012     convert_inner(ctx, xml, result)
1013     result.append('</strong></span>')
1014     append_text(ctx, xml.tail, result)
1015     return result
1016
1017
1018 def convert_variablelist(ctx, xml):
1019     result = ["""<div class="variablelist"><table border="0" class="variablelist">
1020 <colgroup>
1021 <col align="left" valign="top">
1022 <col>
1023 </colgroup>
1024 <tbody>"""]
1025     convert_inner(ctx, xml, result)
1026     result.append("""</tbody>
1027 </table></div>""")
1028     return result
1029
1030
1031 def convert_varlistentry(ctx, xml):
1032     result = ['<tr>']
1033
1034     result.append('<td><p>')
1035     term = xml.find('term')
1036     result.extend(convert_span(ctx, term))
1037     result.append('</p></td>')
1038
1039     result.append('<td>')
1040     listitem = xml.find('listitem')
1041     convert_inner(ctx, listitem, result)
1042     result.append('</td>')
1043
1044     result.append('<tr>')
1045     return result
1046
1047
1048 def convert_xref(ctx, xml):
1049     linkend = xml.attrib['linkend']
1050     (tid, href) = fixxref.GetXRef(linkend)
1051     title = titles.get(tid)
1052     # all sectN need to become 'section
1053     tag = title['tag']
1054     tag = {
1055         'sect1': 'section',
1056         'sect2': 'section',
1057         'sect3': 'section',
1058         'sect4': 'section',
1059         'sect5': 'section',
1060     }.get(tag, tag)
1061     result = [
1062         '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1063         (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1064     ]
1065
1066     append_text(ctx, xml.tail, result)
1067     return result
1068
1069
1070 # TODO(ensonic): turn into class with converters as functions and ctx as self
1071 convert_tags = {
1072     'abstract': convert_abstract,
1073     'acronym': convert_acronym,
1074     'anchor': convert_anchor,
1075     'application': convert_span,
1076     'bookinfo': convert_bookinfo,
1077     'blockquote': convert_blockquote,
1078     'classname': convert_code,
1079     'caption': convert_div,
1080     'code': convert_code,
1081     'colspec': convert_colspec,
1082     'constant': convert_code,
1083     'command': convert_command,
1084     'corpauthor': convert_corpauthor,
1085     'emphasis': convert_emphasis,
1086     'entry': convert_entry,
1087     'envar': convert_code,
1088     'footnote': convert_footnote,
1089     'filename': convert_code,
1090     'firstterm': convert_em,
1091     'formalpara': convert_formalpara,
1092     'function': convert_code,
1093     'glossdef': convert_glossdef,
1094     'glossdiv': convert_glossdiv,
1095     'glossentry': convert_glossentry,
1096     'glossterm': convert_glossterm,
1097     'indexdiv': convert_indexdiv,
1098     'indexentry': convert_ignore,
1099     'indexterm': convert_skip,
1100     'informalexample': convert_div,
1101     'informaltable': convert_informaltable,
1102     'inlinegraphic': convert_inlinegraphic,
1103     'inlinemediaobject': convert_inlinemediaobject,
1104     'interfacename': convert_code,
1105     'itemizedlist': convert_itemizedlist,
1106     'legalnotice': convert_div,
1107     'link': convert_link,
1108     'listitem': convert_listitem,
1109     'literal': convert_code,
1110     'literallayout': convert_literallayout,
1111     'mediaobject': convert_mediaobject,
1112     'note': convert_div,
1113     'option': convert_code,
1114     'orderedlist': convert_orderedlist,
1115     'para': convert_para,
1116     'partintro': convert_div,
1117     'parameter': convert_em_code,
1118     'phrase': convert_phrase,
1119     'primaryie': convert_primaryie,
1120     'programlisting': convert_programlisting,
1121     'quote': convert_quote,
1122     'releaseinfo': convert_para_like,
1123     'refsect1': convert_refsect1,
1124     'refsect2': convert_refsect2,
1125     'refsect3': convert_refsect3,
1126     'replaceable': convert_em_code,
1127     'returnvalue': convert_span,
1128     'row': convert_row,
1129     'sbr': convert_sbr,
1130     'screen': convert_pre,
1131     'section': convert_sect2,      # FIXME: need tracking of nesting
1132     'sect1': convert_sect1_tag,
1133     'sect2': convert_sect2,
1134     'sect3': convert_sect3,
1135     'simpara': convert_simpara,
1136     'simplesect': convert_sect2,   # FIXME: need tracking of nesting
1137     'structfield': convert_em_code,
1138     'structname': convert_span,
1139     'synopsis': convert_pre,
1140     'symbol': convert_span,
1141     'table': convert_table,
1142     'tbody': convert_tbody,
1143     'term': convert_span,
1144     'tgroup': convert_tgroup,
1145     'thead': convert_thead,
1146     'title': convert_skip,
1147     'type': convert_span,
1148     'ulink': convert_ulink,
1149     'userinput': convert_userinput,
1150     'varname': convert_code,
1151     'variablelist': convert_variablelist,
1152     'varlistentry': convert_varlistentry,
1153     'warning': convert_div,
1154     'xref': convert_xref,
1155 }
1156
1157 # conversion helpers
1158
1159 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1160 <html>
1161 <head>
1162 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1163 <title>%s</title>
1164 %s<link rel="stylesheet" href="style.css" type="text/css">
1165 </head>
1166 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1167 """
1168
1169
1170 def generate_head_links(ctx):
1171     n = ctx['nav_home']
1172     result = [
1173         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.raw_title)
1174     ]
1175     if 'nav_up' in ctx:
1176         n = ctx['nav_up']
1177         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1178     if 'nav_prev' in ctx:
1179         n = ctx['nav_prev']
1180         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1181     if 'nav_next' in ctx:
1182         n = ctx['nav_next']
1183         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1184     return ''.join(result)
1185
1186
1187 def generate_nav_links(ctx):
1188     n = ctx['nav_home']
1189     result = [
1190         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1191     ]
1192     if 'nav_up' in ctx:
1193         n = ctx['nav_up']
1194         result.append(
1195             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1196     else:
1197         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1198     if 'nav_prev' in ctx:
1199         n = ctx['nav_prev']
1200         result.append(
1201             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1202     else:
1203         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1204     if 'nav_next' in ctx:
1205         n = ctx['nav_next']
1206         result.append(
1207             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1208     else:
1209         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1210
1211     return ''.join(result)
1212
1213
1214 def generate_toc(ctx, node):
1215     result = []
1216     for c in node.children:
1217         # TODO: urlencode the filename: urllib.parse.quote_plus()
1218         link = c.filename
1219         if c.anchor:
1220             link += c.anchor
1221         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1222             c.title_tag, link, c.title))
1223         if c.subtitle:
1224             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1225         result.append('</dt>\n')
1226         if c.children:
1227             result.append('<dd><dl>')
1228             result.extend(generate_toc(ctx, c))
1229             result.append('</dl></dd>')
1230     return result
1231
1232
1233 def generate_basic_nav(ctx):
1234     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1235   <tr valign="middle">
1236     <td width="100%%" align="left" class="shortcuts"></td>
1237     %s
1238   </tr>
1239 </table>
1240     """ % generate_nav_links(ctx)
1241
1242
1243 def generate_alpha_nav(ctx, divs, prefix, span_id):
1244     ix_nav = []
1245     for s in divs:
1246         title = xml_get_title(ctx, s)
1247         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1248
1249     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1250   <tr valign="middle">
1251     <td width="100%%" align="left" class="shortcuts">
1252       <span id="nav_%s">
1253         %s
1254       </span>
1255     </td>
1256     %s
1257   </tr>
1258 </table>
1259     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1260
1261
1262 def generate_refentry_nav(ctx, refsect1s, result):
1263     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1264   <tr valign="middle">
1265     <td width="100%" align="left" class="shortcuts">
1266       <a href="#" class="shortcut">Top</a>""")
1267
1268     for s in refsect1s:
1269         # don't list TOC sections (role="xxx_proto")
1270         if s.attrib.get('role', '').endswith("_proto"):
1271             continue
1272         # skip section without 'id' attrs
1273         if 'id' not in s.attrib:
1274             continue
1275
1276         ref_id = s.attrib['id']
1277         # skip foreign sections
1278         if '.' not in ref_id:
1279             continue
1280
1281         title = xml_get_title(ctx, s)
1282         span_id = ref_id.split('.')[1].replace('-', '_')
1283
1284         result.append("""
1285           <span id="nav_%s">
1286             <span class="dim">|</span>
1287             <a href="#%s" class="shortcut">%s</a>
1288           </span>
1289           """ % (span_id, ref_id, title))
1290     result.append("""
1291     </td>
1292     %s
1293   </tr>
1294 </table>
1295 """ % generate_nav_links(ctx))
1296
1297
1298 def generate_footer(ctx):
1299     result = []
1300     if 'footnotes' in ctx:
1301         result.append("""<div class="footnotes">\n
1302 <br><hr style="width:100; text-align:left;margin-left: 0">
1303 """)
1304         for f in ctx['footnotes']:
1305             result.extend(f)
1306         result.append('</div>\n')
1307     return result
1308
1309
1310 def get_id_path(node):
1311     """ Generate the 'id'.
1312     We need to walk up the xml-tree and check the positions for each sibling.
1313     When reaching the top of the tree we collect remaining index entries from
1314     the chunked-tree.
1315     """
1316     ix = []
1317     xml = node.xml
1318     parent = xml.getparent()
1319     while parent is not None:
1320         children = parent.getchildren()
1321         ix.insert(0, str(children.index(xml) + 1))
1322         xml = parent
1323         parent = xml.getparent()
1324     while node is not None:
1325         ix.insert(0, str(node.idx + 1))
1326         node = node.parent
1327
1328     return ix
1329
1330
1331 def get_id(node):
1332     xml = node.xml
1333     node_id = xml.attrib.get('id', None)
1334     if node_id:
1335         return node_id
1336
1337     # TODO: this is moot if nothing links to it, we could also consider to omit
1338     # the <a name="$id"></a> tag.
1339     logging.info('%d: No "id" attribute on "%s", generating one',
1340                  xml.sourceline, xml.tag)
1341     ix = get_id_path(node)
1342     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1343     return 'id-' + '.'.join(ix)
1344
1345
1346 def convert_chunk_with_toc(ctx, div_class, title_tag):
1347     node = ctx['node']
1348     result = [
1349         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1350         generate_basic_nav(ctx),
1351         '<div class="%s">' % div_class,
1352     ]
1353     if node.title:
1354         result.append("""
1355 <div class="titlepage">
1356 <%s class="title"><a name="%s"></a>%s</%s>
1357 </div>""" % (
1358             title_tag, get_id(node), node.title, title_tag))
1359
1360     toc = generate_toc(ctx, node)
1361     if toc:
1362         # TODO: not all docbook page types use this extra heading
1363         result.append("""<p><b>Table of Contents</b></p>
1364     <div class="toc">
1365       <dl class="toc">
1366     """)
1367         result.extend(toc)
1368         result.append("""</dl>
1369     </div>
1370     """)
1371     convert_inner(ctx, node.xml, result)
1372     result.extend(generate_footer(ctx))
1373     result.append("""</div>
1374 </body>
1375 </html>""")
1376     return result
1377
1378
1379 # docbook chunks
1380
1381
1382 def convert_book(ctx):
1383     node = ctx['node']
1384     result = [
1385         HTML_HEADER % (node.title, generate_head_links(ctx)),
1386         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1387     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1388 </table>
1389 <div class="book">
1390 """ % node.title
1391     ]
1392     bookinfo = node.xml.findall('bookinfo')[0]
1393     result.extend(convert_bookinfo(ctx, bookinfo))
1394     result.append("""<div class="toc">
1395   <dl class="toc">
1396 """)
1397     result.extend(generate_toc(ctx, node.root))
1398     result.append("""</dl>
1399 </div>
1400 """)
1401     result.extend(generate_footer(ctx))
1402     result.append("""</div>
1403 </body>
1404 </html>""")
1405     return result
1406
1407
1408 def convert_chapter(ctx):
1409     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1410
1411
1412 def convert_glossary(ctx):
1413     node = ctx['node']
1414     glossdivs = node.xml.findall('glossdiv')
1415
1416     result = [
1417         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1418         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1419         """<div class="glossary">
1420 <div class="titlepage"><h%1d class="title">
1421 <a name="%s"></a>%s</h%1d>
1422 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1423     ]
1424     for i in glossdivs:
1425         result.extend(convert_glossdiv(ctx, i))
1426     result.extend(generate_footer(ctx))
1427     result.append("""</div>
1428 </body>
1429 </html>""")
1430     return result
1431
1432
1433 def convert_index(ctx):
1434     node = ctx['node']
1435     # Get all indexdivs under indexdiv
1436     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1437
1438     result = [
1439         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1440         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1441         """<div class="index">
1442 <div class="titlepage"><h%1d class="title">
1443 <a name="%s"></a>%s</h%1d>
1444 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1445     ]
1446     for i in indexdivs:
1447         result.extend(convert_indexdiv(ctx, i))
1448     result.extend(generate_footer(ctx))
1449     result.append("""</div>
1450 </body>
1451 </html>""")
1452     return result
1453
1454
1455 def convert_part(ctx):
1456     return convert_chunk_with_toc(ctx, 'part', 'h1')
1457
1458
1459 def convert_preface(ctx):
1460     node = ctx['node']
1461     result = [
1462         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1463         generate_basic_nav(ctx),
1464         '<div class="preface">'
1465     ]
1466     if node.title:
1467         result.append("""
1468 <div class="titlepage">
1469 <h2 class="title"><a name="%s"></a>%s</h2>
1470 </div>""" % (get_id(node), node.title))
1471     convert_inner(ctx, node.xml, result)
1472     result.extend(generate_footer(ctx))
1473     result.append("""</div>
1474 </body>
1475 </html>""")
1476     return result
1477
1478
1479 def convert_reference(ctx):
1480     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1481
1482
1483 def convert_refentry(ctx):
1484     node = ctx['node']
1485     node_id = get_id(node)
1486     refsect1s = node.xml.findall('refsect1')
1487
1488     gallery = ''
1489     refmeta = node.xml.find('refmeta')
1490     if refmeta is not None:
1491         refmiscinfo = refmeta.find('refmiscinfo')
1492         if refmiscinfo is not None:
1493             inlinegraphic = refmiscinfo.find('inlinegraphic')
1494             if inlinegraphic is not None:
1495                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1496
1497     result = [
1498         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1499     ]
1500     generate_refentry_nav(ctx, refsect1s, result)
1501     result.append("""
1502 <div class="refentry">
1503 <a name="%s"></a>
1504 <div class="refnamediv">
1505   <table width="100%%"><tr>
1506     <td valign="top">
1507       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1508       <p>%s — %s</p>
1509     </td>
1510     <td class="gallery_image" valign="top" align="right">%s</td>
1511   </tr></table>
1512 </div>
1513 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1514
1515     for s in refsect1s:
1516         result.extend(convert_refsect1(ctx, s))
1517     result.extend(generate_footer(ctx))
1518     result.append("""</div>
1519 </body>
1520 </html>""")
1521     return result
1522
1523
1524 def convert_section(ctx):
1525     return convert_chunk_with_toc(ctx, 'section', 'h2')
1526
1527
1528 def convert_sect1(ctx):
1529     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1530
1531
1532 # TODO(ensonic): turn into class with converters as functions and ctx as self
1533 convert_chunks = {
1534     'book': convert_book,
1535     'chapter': convert_chapter,
1536     'glossary': convert_glossary,
1537     'index': convert_index,
1538     'part': convert_part,
1539     'preface': convert_preface,
1540     'reference': convert_reference,
1541     'refentry': convert_refentry,
1542     'section': convert_section,
1543     'sect1': convert_sect1,
1544 }
1545
1546
1547 def generate_nav_nodes(files, node):
1548     nav = {
1549         'nav_home': node.root,
1550     }
1551     # nav params: up, prev, next
1552     if node.parent:
1553         nav['nav_up'] = node.parent
1554     ix = files.index(node)
1555     if ix > 0:
1556         nav['nav_prev'] = files[ix - 1]
1557     if ix < len(files) - 1:
1558         nav['nav_next'] = files[ix + 1]
1559     return nav
1560
1561
1562 def convert(out_dir, module, files, node, src_lang):
1563     """Convert the docbook chunks to a html file.
1564
1565     Args:
1566       out_dir: already created output dir
1567       files: list of nodes in the tree in pre-order
1568       node: current tree node
1569     """
1570
1571     logging.info('Writing: %s', node.filename)
1572     with open(os.path.join(out_dir, node.filename), 'wt',
1573               newline='\n', encoding='utf-8') as html:
1574         ctx = {
1575             'module': module,
1576             'files': files,
1577             'node': node,
1578             'src-lang': src_lang,
1579         }
1580         ctx.update(generate_nav_nodes(files, node))
1581
1582         if node.name in convert_chunks:
1583             for line in convert_chunks[node.name](ctx):
1584                 html.write(line)
1585         else:
1586             logging.warning('Add converter/template for "%s"', node.name)
1587
1588
1589 def create_devhelp2_toc(node):
1590     result = []
1591     for c in node.children:
1592         if c.children:
1593             result.append('<sub name="%s" link="%s">\n' % (c.raw_title, c.filename))
1594             result.extend(create_devhelp2_toc(c))
1595             result.append('</sub>\n')
1596         else:
1597             result.append('<sub name="%s" link="%s"/>\n' % (c.raw_title, c.filename))
1598     return result
1599
1600
1601 def create_devhelp2_condition_attribs(node):
1602     if 'condition' in node.attrib:
1603         # condition -> since, deprecated, ... (separated with '|')
1604         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1605         keywords = []
1606         for c in cond:
1607             if ':' in c:
1608                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1609             else:
1610                 # deprecated can have no description
1611                 keywords.append('{}="{}"'.format(c, ''))
1612         return ' ' + ' '.join(keywords)
1613     else:
1614         return ''
1615
1616
1617 def create_devhelp2_refsect2_keyword(node, base_link):
1618     node_id = node.attrib['id']
1619     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1620         node.attrib['role'], titles[node_id]['title'], base_link + node_id,
1621         create_devhelp2_condition_attribs(node))
1622
1623
1624 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1625     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1626         node.attrib['role'], title, base_link + name,
1627         create_devhelp2_condition_attribs(node))
1628
1629
1630 def create_devhelp2(out_dir, module, xml, files):
1631     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1632               newline='\n', encoding='utf-8') as idx:
1633         bookinfo_nodes = xml.xpath('/book/bookinfo')
1634         title = ''
1635         if bookinfo_nodes is not None:
1636             bookinfo = bookinfo_nodes[0]
1637             title = bookinfo.xpath('./title/text()')[0]
1638             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1639             # TODO: support author too (see devhelp2.xsl)
1640         # TODO: fixxref uses '--src-lang' to set the language
1641         result = [
1642             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1643 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1644   <chapters>
1645 """ % (title, module, online_url)
1646         ]
1647         # toc
1648         result.extend(create_devhelp2_toc(files[0].root))
1649         result.append("""  </chapters>
1650   <functions>
1651 """)
1652         # keywords from all refsect2 and refsect3
1653         refsect2 = etree.XPath('//refsect2[@role]')
1654         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1655         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1656         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1657         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1658         for node in files:
1659             base_link = node.filename + '#'
1660             refsect2_nodes = refsect2(node.xml)
1661             for refsect2_node in refsect2_nodes:
1662                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1663                 refsect3_nodes = refsect3_enum(refsect2_node)
1664                 for refsect3_node in refsect3_nodes:
1665                     details_node = refsect3_enum_details(refsect3_node)[0]
1666                     name = details_node.attrib['id']
1667                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1668                 refsect3_nodes = refsect3_struct(refsect2_node)
1669                 for refsect3_node in refsect3_nodes:
1670                     details_node = refsect3_struct_details(refsect3_node)[0]
1671                     name = details_node.attrib['id']
1672                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1673
1674         result.append("""  </functions>
1675 </book>
1676 """)
1677         for line in result:
1678             idx.write(line)
1679
1680
1681 def get_dirs(uninstalled):
1682     if uninstalled:
1683         # this does not work from buiddir!=srcdir
1684         gtkdocdir = os.path.split(sys.argv[0])[0]
1685         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1686             # try 'srcdir' (set from makefiles) too
1687             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1688                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1689         styledir = gtkdocdir + '/style'
1690     else:
1691         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1692         styledir = gtkdocdir
1693     return (gtkdocdir, styledir)
1694
1695
1696 def main(module, index_file, out_dir, uninstalled, src_lang):
1697
1698     # == Loading phase ==
1699     # the next 3 steps could be done in paralel
1700
1701     # 1) load the docuemnt
1702     _t = timer()
1703     # does not seem to be faster
1704     # parser = etree.XMLParser(collect_ids=False)
1705     # tree = etree.parse(index_file, parser)
1706     tree = etree.parse(index_file)
1707     tree.xinclude()
1708     logging.warning("1: %7.3lf: load doc", timer() - _t)
1709
1710     # 2) copy datafiles
1711     _t = timer()
1712     # TODO: handle additional images
1713     (gtkdocdir, styledir) = get_dirs(uninstalled)
1714     # copy navigation images and stylesheets to html directory ...
1715     css_file = os.path.join(styledir, 'style.css')
1716     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1717         shutil.copy(f, out_dir)
1718     css_file = os.path.join(out_dir, 'style.css')
1719     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1720         css.write(HTML_FORMATTER.get_style_defs())
1721     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1722
1723     # 3) load xref targets
1724     _t = timer()
1725     # TODO: migrate options from fixxref
1726     # TODO: ideally explicity specify the files we need, this will save us the
1727     # globbing and we'll load less files.
1728     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1729     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1730
1731     # == Processing phase ==
1732
1733     # 4) recursively walk the tree and chunk it into a python tree so that we
1734     #    can generate navigation and link tags.
1735     _t = timer()
1736     files = chunk(tree.getroot(), module)
1737     files = [f for f in PreOrderIter(files) if f.anchor is None]
1738     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1739
1740     # 5) extract tables:
1741     _t = timer()
1742     # TODO: can be done in parallel
1743     # - find all 'id' attribs and add them to the link map
1744     # - .. get their titles and store them into the titles map
1745     add_id_links_and_titles(files, fixxref.Links)
1746     # - build glossary dict
1747     build_glossary(files)
1748     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1749
1750     # == Output phase ==
1751     # the next two step could be done in parllel
1752
1753     # 6) create a xxx.devhelp2 file
1754     _t = timer()
1755     create_devhelp2(out_dir, module, tree.getroot(), files)
1756     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1757
1758     # 7) iterate the tree and output files
1759     _t = timer()
1760     # TODO: can be done in parallel, figure out why this is not faster
1761     # from multiprocessing.pool import Pool
1762     # with Pool(4) as p:
1763     #     p.apply_async(convert, args=(out_dir, module, files))
1764     # from multiprocessing.pool import ThreadPool
1765     # with ThreadPool(4) as p:
1766     #     p.apply_async(convert, args=(out_dir, module, files))
1767     for node in files:
1768         convert(out_dir, module, files, node, src_lang)
1769     logging.warning("7: %7.3lf: create html", timer() - _t)
1770
1771
1772 def run(options):
1773     logging.info('options: %s', str(options.__dict__))
1774     module = options.args[0]
1775     document = options.args[1]
1776
1777     # TODO: rename to 'html' later on
1778     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1779     #   outputs into the working directory
1780     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1781     try:
1782         os.mkdir(out_dir)
1783     except OSError as e:
1784         if e.errno != errno.EEXIST:
1785             raise
1786
1787     sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang))