gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - handle 'label' attributes on part/chapter/section-types
  43     - the titles will have a generated prefix, such as 'Part I:'
  44       (locale dependent)
  45     - in the toc it would only be the label: 'I.'
  46   - 'link' adds a 'title' attr to 'a' if the target has a title.
  47     - there is an implementation in convert_link() but it is slow
  48     - we might need to collect titles as we chunk
  49     - if we do this we'd need to keep iterating, but might be able to replace
  50       add_id_links()
  51   - handle the 'xref' tag
  52     - this needs the title + the type of the target
  53     - for the title, see convert_link()
  54 - check each docbook tag if it can contain #PCDATA, if not don't check for
  55   xml.text
  56 - consider some perf-warnings flag
  57   - see 'No "id" attribute on'
  58
  59 OPTIONAL:
  60 - minify html: https://pypi.python.org/pypi/htmlmin/
  61
  62 Requirements:
  63 sudo pip3 install anytree lxml pygments
  64
  65 Example invocation:
  66 cd tests/bugs/docs/
  67 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  68 xdg-open db2html/index.html
  69 meld html db2html
  70
  71 Benchmarking:
  72 cd tests/bugs/docs/;
  73 rm html-build.stamp; time make html-build.stamp
  74 """
  75
  76 import argparse
  77 import errno
  78 import logging
  79 import os
  80 import shutil
  81 import sys
  82
  83 from anytree import Node, PreOrderIter
  84 from copy import deepcopy
  85 from glob import glob
  86 from lxml import etree
  87 from pygments import highlight
  88 from pygments.lexers import CLexer
  89 from pygments.formatters import HtmlFormatter
  90 from timeit import default_timer as timer
  91
  92 from . import config, fixxref
  93
  94 # pygments setup
  95 # lazily constructed lexer cache
  96 LEXERS = {
  97     'c': CLexer()
  98 }
  99 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 100
 101
 102 class ChunkParams(object):
 103     def __init__(self, prefix, parent=None, min_idx=0):
 104         self.prefix = prefix
 105         self.parent = parent
 106         self.min_idx = min_idx
 107         self.idx = 1
 108
 109
 110 DONT_CHUNK = float('inf')
 111 # docbook-xsl defines the chunk tags here.
 112 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 113 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 114 # If not defined, we can just create an example without an 'id' attr and see
 115 # docbook xsl does.
 116 #
 117 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 118 # TODO: this list has also a flag that controls wheter we add the
 119 # 'Table of Contents' heading in convert_chunk_with_toc()
 120 CHUNK_PARAMS = {
 121     'appendix': ChunkParams('app', 'book'),
 122     'book': ChunkParams('bk'),
 123     'chapter': ChunkParams('ch', 'book'),
 124     'glossary': ChunkParams('go', 'book'),
 125     'index': ChunkParams('ix', 'book'),
 126     'part': ChunkParams('pt', 'book'),
 127     'preface': ChunkParams('pr', 'book'),
 128     'refentry': ChunkParams('re', 'book'),
 129     'reference': ChunkParams('rn', 'book'),
 130     'sect1': ChunkParams('s', 'chapter', 1),
 131     'section': ChunkParams('s', 'chapter', 1),
 132     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 133     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 134     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 135     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 136 }
 137 # TAGS we don't support:
 138 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 139
 140 TITLE_XPATHS = {
 141     '_': (etree.XPath('./title'), None),
 142     'book': (etree.XPath('./bookinfo/title'), None),
 143     'refentry': (
 144         etree.XPath('./refmeta/refentrytitle'),
 145         etree.XPath('./refnamediv/refpurpose')
 146     ),
 147 }
 148
 149 ID_XPATH = etree.XPath('//@id')
 150
 151 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 152 glossary = {}
 153
 154 footnote_idx = 1
 155
 156
 157 def gen_chunk_name(node, chunk_params):
 158     """Generate a chunk file name
 159
 160     This is either based on the id or on the position in the doc. In the latter
 161     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 162     type.
 163     """
 164     if 'id' in node.attrib:
 165         return node.attrib['id']
 166
 167     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 168     chunk_params.idx += 1
 169
 170     # handle parents to make names of nested tags like in docbook
 171     # - we only need to prepend the parent if there are > 1 of them in the
 172     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 173     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 174     #   when we chunk explicitly and on each level maintain the 'idx'
 175     # while chunk_params.parent:
 176     #     parent = chunk_params.parent
 177     #     if parent not in CHUNK_PARAMS:
 178     #         break;
 179     #     chunk_params = CHUNK_PARAMS[parent]
 180     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 181
 182     logging.info('Gen chunk name: "%s"', name)
 183     return name
 184
 185
 186 def get_chunk_titles(module, node):
 187     tag = node.tag
 188     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 189
 190     ctx = {
 191         'module': module,
 192         'files': [],
 193     }
 194     result = {
 195         'title': None,
 196         'title_tag': None,
 197         'subtitle': None,
 198         'subtitle_tag': None
 199     }
 200     res = title(node)
 201     if res:
 202         xml = res[0]
 203         result['title'] = ''.join(convert_title(ctx, xml))
 204         if xml.tag != 'title':
 205             result['title_tag'] = xml.tag
 206         else:
 207             result['title_tag'] = tag
 208
 209     if subtitle:
 210         res = subtitle(node)
 211         if res:
 212             xml = res[0]
 213             result['subtitle'] = ''.join(convert_title(ctx, xml))
 214             result['subtitle_tag'] = xml.tag
 215     return result
 216
 217
 218 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 219     """Chunk the tree.
 220
 221     The first time, we're called with parent=None and in that case we return
 222     the new_node as the root of the tree. For each tree-node we generate a
 223     filename and process the children.
 224     """
 225     tag = xml_node.tag
 226     chunk_params = CHUNK_PARAMS.get(tag)
 227     if chunk_params:
 228         title_args = get_chunk_titles(module, xml_node)
 229         chunk_name = gen_chunk_name(xml_node, chunk_params)
 230
 231         # check idx to handle 'sect1'/'section' special casing and title-only
 232         # segments
 233         if idx >= chunk_params.min_idx:
 234             logging.info('chunk tag: "%s"[%d]', tag, idx)
 235             if parent:
 236                 # remove the xml-node from the parent
 237                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 238                 xml_node.getparent().remove(xml_node)
 239                 xml_node = sub_tree
 240
 241             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 242                           idx=idx,
 243                           filename=chunk_name + '.html', anchor=None,
 244                           **title_args)
 245         else:
 246             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 247                           idx=idx,
 248                           filename=parent.filename, anchor='#' + chunk_name,
 249                           **title_args)
 250
 251         depth += 1
 252         idx = 0
 253         for child in xml_node:
 254             chunk(child, module, depth, idx, parent)
 255             if child.tag in CHUNK_PARAMS:
 256                 idx += 1
 257
 258     return parent
 259
 260
 261 def add_id_links(files, links):
 262     for node in files:
 263         chunk_name = node.filename[:-5]
 264         chunk_base = node.filename + '#'
 265         for attr in ID_XPATH(node.xml):
 266             if attr == chunk_name:
 267                 links[attr] = node.filename
 268             else:
 269                 links[attr] = chunk_base + attr
 270
 271
 272 def build_glossary(files):
 273     for node in files:
 274         if node.xml.tag != 'glossary':
 275             continue
 276         for term in GLOSSENTRY_XPATH(node.xml):
 277             # TODO: there can be all kind of things in a glossary. This only supports
 278             # what we commonly use
 279             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 280             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 281             glossary[key] = value
 282             # logging.debug('glosentry: %s:%s', key, value)
 283
 284
 285 # conversion helpers
 286
 287
 288 def convert_inner(ctx, xml, result):
 289     for child in xml:
 290         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 291
 292
 293 def convert_ignore(ctx, xml):
 294     result = []
 295     convert_inner(ctx, xml, result)
 296     return result
 297
 298
 299 def convert_skip(ctx, xml):
 300     return ['']
 301
 302
 303 def append_text(text, result):
 304     if text and text.strip():
 305         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 306
 307
 308 missing_tags = {}
 309
 310
 311 def convert__unknown(ctx, xml):
 312     # don't recurse on subchunks
 313     if xml.tag in CHUNK_PARAMS:
 314         return []
 315     if isinstance(xml, etree._Comment):
 316         return ['<!-- ' + xml.text + '-->\n']
 317     else:
 318         # warn only once
 319         if xml.tag not in missing_tags:
 320             logging.warning('Add tag converter for "%s"', xml.tag)
 321             missing_tags[xml.tag] = True
 322         result = ['<!-- ' + xml.tag + '-->\n']
 323         convert_inner(ctx, xml, result)
 324         result.append('<!-- /' + xml.tag + '-->\n')
 325         return result
 326
 327
 328 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 329     result = ['<div class="%s">\n' % xml.tag]
 330     title = xml.find('title')
 331     if title is not None:
 332         if 'id' in xml.attrib:
 333             result.append('<a name="%s"></a>' % xml.attrib['id'])
 334         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 335     append_text(xml.text, result)
 336     inner_func(ctx, xml, result)
 337     result.append('</div>')
 338     append_text(xml.tail, result)
 339     return result
 340
 341
 342 def xml_get_title(ctx, xml):
 343     title_tag = xml.find('title')
 344     if title_tag is not None:
 345         return ''.join(convert_title(ctx, title_tag))
 346     else:
 347         # TODO(ensonic): any way to get the file (inlcudes) too?
 348         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 349         return ''
 350
 351
 352 # docbook tags
 353
 354
 355 def convert_abstract(ctx, xml):
 356     result = ["""<div class="abstract">
 357     <p class="title"><b>Abstract</b></p>"""]
 358     append_text(xml.text, result)
 359     convert_inner(ctx, xml, result)
 360     result.append('</div>')
 361     append_text(xml.tail, result)
 362     return result
 363
 364
 365 def convert_acronym(ctx, xml):
 366     key = xml.text
 367     title = glossary.get(key, '')
 368     # TODO: print a sensible warning if missing
 369     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 370     if xml.tail:
 371         result.append(xml.tail)
 372     return result
 373
 374
 375 def convert_anchor(ctx, xml):
 376     return ['<a name="%s"></a>' % xml.attrib['id']]
 377
 378
 379 def convert_bookinfo(ctx, xml):
 380     result = ['<div class="titlepage">']
 381     convert_inner(ctx, xml, result)
 382     result.append("""<hr>
 383 </div>""")
 384     if xml.tail:
 385         result.append(xml.tail)
 386     return result
 387
 388
 389 def convert_blockquote(ctx, xml):
 390     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 391     append_text(xml.text, result)
 392     convert_inner(ctx, xml, result)
 393     result.append('</blockquote>\n</div>')
 394     append_text(xml.tail, result)
 395     return result
 396
 397
 398 def convert_code(ctx, xml):
 399     result = ['<code class="%s">' % xml.tag]
 400     append_text(xml.text, result)
 401     convert_inner(ctx, xml, result)
 402     result.append('</code>')
 403     append_text(xml.tail, result)
 404     return result
 405
 406
 407 def convert_colspec(ctx, xml):
 408     result = ['<col']
 409     a = xml.attrib
 410     if 'colname' in a:
 411         result.append(' class="%s"' % a['colname'])
 412     if 'colwidth' in a:
 413         result.append(' width="%s"' % a['colwidth'])
 414     result.append('>\n')
 415     # is in tgroup and there can be no 'text'
 416     return result
 417
 418
 419 def convert_command(ctx, xml):
 420     result = ['<strong class="userinput"><code>']
 421     append_text(xml.text, result)
 422     convert_inner(ctx, xml, result)
 423     result.append('</code></strong>')
 424     append_text(xml.tail, result)
 425     return result
 426
 427
 428 def convert_corpauthor(ctx, xml):
 429     result = ['<div><h3 class="corpauthor">\n']
 430     append_text(xml.text, result)
 431     convert_inner(ctx, xml, result)
 432     result.append('</h3></div>\n')
 433     append_text(xml.tail, result)
 434     return result
 435
 436
 437 def convert_div(ctx, xml):
 438     result = ['<div class="%s">\n' % xml.tag]
 439     append_text(xml.text, result)
 440     convert_inner(ctx, xml, result)
 441     result.append('</div>')
 442     append_text(xml.tail, result)
 443     return result
 444
 445
 446 def convert_emphasis(ctx, xml):
 447     result = ['<span class="emphasis"><em>']
 448     append_text(xml.text, result)
 449     convert_inner(ctx, xml, result)
 450     result.append('</em></span>')
 451     append_text(xml.tail, result)
 452     return result
 453
 454
 455 def convert_em_class(ctx, xml):
 456     result = ['<em class="%s"><code>' % xml.tag]
 457     append_text(xml.text, result)
 458     convert_inner(ctx, xml, result)
 459     result.append('</code></em>')
 460     append_text(xml.tail, result)
 461     return result
 462
 463
 464 def convert_entry(ctx, xml):
 465     entry_type = ctx['table.entry']
 466     result = ['<' + entry_type]
 467     if 'role' in xml.attrib:
 468         result.append(' class="%s"' % xml.attrib['role'])
 469     if 'morerows' in xml.attrib:
 470         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 471     result.append('>')
 472     append_text(xml.text, result)
 473     convert_inner(ctx, xml, result)
 474     result.append('</' + entry_type + '>')
 475     append_text(xml.tail, result)
 476     return result
 477
 478
 479 def convert_footnote(ctx, xml):
 480     footnotes = ctx.get('footnotes', [])
 481     # footnotes idx is not per page, but per doc
 482     global footnote_idx
 483     idx = footnote_idx
 484     footnote_idx += 1
 485
 486     # need a pair of ids for each footnote (docbook generates different ids)
 487     this_id = 'footnote-%d' % idx
 488     that_id = 'ftn.' + this_id
 489
 490     inner = ['<div id="%s" class="footnote">' % that_id]
 491     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 492         this_id, idx))
 493     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 494     # get double nested paras :/.
 495     # convert_inner(ctx, xml, inner)
 496     para = xml.find('para')
 497     if para is None:
 498         para = xml.find('simpara')
 499     if para is not None:
 500         inner.append(para.text)
 501     else:
 502         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 503                         etree.tostring(xml, method="text", encoding=str).strip())
 504     inner.append('</p></div>')
 505     footnotes.append(inner)
 506     ctx['footnotes'] = footnotes
 507     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 508         that_id, this_id, idx)]
 509
 510
 511 def convert_formalpara(ctx, xml):
 512     result = None
 513     title_tag = xml.find('title')
 514     result = ['<p><b>%s</b>' % title_tag.text]
 515     para_tag = xml.find('para')
 516     append_text(para_tag.text, result)
 517     convert_inner(ctx, para_tag, result)
 518     append_text(para_tag.tail, result)
 519     result.append('</p>')
 520     append_text(xml.tail, result)
 521     return result
 522
 523
 524 def convert_glossdef(ctx, xml):
 525     result = ['<dd class="glossdef">']
 526     convert_inner(ctx, xml, result)
 527     result.append('</dd>\n')
 528     return result
 529
 530
 531 def convert_glossdiv(ctx, xml):
 532     title_tag = xml.find('title')
 533     title = title_tag.text
 534     xml.remove(title_tag)
 535     result = [
 536         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 537     ]
 538     convert_inner(ctx, xml, result)
 539     return result
 540
 541
 542 def convert_glossentry(ctx, xml):
 543     result = []
 544     convert_inner(ctx, xml, result)
 545     return result
 546
 547
 548 def convert_glossterm(ctx, xml):
 549     glossid = ''
 550     text = ''
 551     anchor = xml.find('anchor')
 552     if anchor is not None:
 553         glossid = anchor.attrib.get('id', '')
 554         text += anchor.tail or ''
 555     text += xml.text or ''
 556     if glossid == '':
 557         glossid = 'glossterm-' + text
 558     return [
 559         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 560             glossid, text)
 561     ]
 562
 563
 564 def convert_imageobject(ctx, xml):
 565     imagedata = xml.find('imagedata')
 566     if imagedata is not None:
 567         # TODO(ensonic): warn on missing fileref attr?
 568         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 569     else:
 570         return []
 571
 572
 573 def convert_indexdiv(ctx, xml):
 574     title_tag = xml.find('title')
 575     title = title_tag.text
 576     xml.remove(title_tag)
 577     result = [
 578         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 579     ]
 580     convert_inner(ctx, xml, result)
 581     return result
 582
 583
 584 def convert_informaltable(ctx, xml):
 585     result = ['<div class="informaltable"><table class="informaltable"']
 586     a = xml.attrib
 587     if 'pgwide' in a and a['pgwide'] == '1':
 588         result.append(' width="100%"')
 589     if 'frame' in a and a['frame'] == 'none':
 590         result.append(' border="0"')
 591     result.append('>\n')
 592     convert_inner(ctx, xml, result)
 593     result.append('</table></div>')
 594     if xml.tail:
 595         result.append(xml.tail)
 596     return result
 597
 598
 599 def convert_inlinegraphic(ctx, xml):
 600     # TODO(ensonic): warn on missing fileref attr?
 601     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 602
 603
 604 def convert_itemizedlist(ctx, xml):
 605     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 606     convert_inner(ctx, xml, result)
 607     result.append('</ul></div>')
 608     if xml.tail:
 609         result.append(xml.tail)
 610     return result
 611
 612
 613 def convert_link(ctx, xml):
 614     linkend = xml.attrib['linkend']
 615     if linkend in fixxref.NoLinks:
 616         linkend = None
 617     result = []
 618     if linkend:
 619         link_text = []
 620         append_text(xml.text, link_text)
 621         convert_inner(ctx, xml, link_text)
 622         text = ''.join(link_text)
 623
 624         (tid, href) = fixxref.GetXRef(linkend)
 625         if href:
 626             module = ctx['module']
 627             title_attr = ''
 628             # search for a title under id='tid' in all chunks
 629             # NOTE: this will only work for local links
 630             # TODO: this works but is super slow
 631             # id_xpath = etree.XPath('//*[@id="%s"]' % tid)
 632             # for c in ctx['files']:
 633             #     nodes = id_xpath(c.xml)
 634             #     if nodes:
 635             #         title = get_chunk_titles(module, nodes[0])['title']
 636             #         if title:
 637             #             title_attr = ' title="%s"' % title
 638             #             logging.debug('Have title node: href=%s%s', tid, title_attr)
 639             #         break
 640
 641             href = fixxref.MakeRelativeXRef(module, href)
 642             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 643         else:
 644             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 645             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 646             result = [text]
 647     else:
 648         append_text(xml.text, result)
 649         convert_inner(ctx, xml, result)
 650     append_text(xml.tail, result)
 651     return result
 652
 653
 654 def convert_listitem(ctx, xml):
 655     result = ['<li class="listitem">']
 656     convert_inner(ctx, xml, result)
 657     result.append('</li>')
 658     # is in itemizedlist and there can be no 'text'
 659     return result
 660
 661
 662 def convert_literallayout(ctx, xml):
 663     result = ['<div class="literallayout"><p><br>\n']
 664     append_text(xml.text, result)
 665     convert_inner(ctx, xml, result)
 666     result.append('</p></div>')
 667     append_text(xml.tail, result)
 668     return result
 669
 670
 671 def convert_orderedlist(ctx, xml):
 672     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 673     convert_inner(ctx, xml, result)
 674     result.append('</ol></div>')
 675     append_text(xml.tail, result)
 676     return result
 677
 678
 679 def convert_para(ctx, xml):
 680     result = []
 681     if 'id' in xml.attrib:
 682         result.append('<a name="%s"></a>' % xml.attrib['id'])
 683     if 'role' in xml.attrib:
 684         result.append('<p class="%s">' % xml.attrib['role'])
 685     else:
 686         result.append('<p>')
 687     append_text(xml.text, result)
 688     convert_inner(ctx, xml, result)
 689     result.append('</p>')
 690     append_text(xml.tail, result)
 691     return result
 692
 693
 694 def convert_para_like(ctx, xml):
 695     result = []
 696     if 'id' in xml.attrib:
 697         result.append('<a name="%s"></a>' % xml.attrib['id'])
 698     result.append('<p class="%s">' % xml.tag)
 699     append_text(xml.text, result)
 700     convert_inner(ctx, xml, result)
 701     result.append('</p>')
 702     append_text(xml.tail, result)
 703     return result
 704
 705
 706 def convert_phrase(ctx, xml):
 707     result = ['<span']
 708     if 'role' in xml.attrib:
 709         result.append(' class="%s">' % xml.attrib['role'])
 710     else:
 711         result.append('>')
 712     append_text(xml.text, result)
 713     convert_inner(ctx, xml, result)
 714     result.append('</span>')
 715     append_text(xml.tail, result)
 716     return result
 717
 718
 719 def convert_primaryie(ctx, xml):
 720     result = ['<dt>\n']
 721     convert_inner(ctx, xml, result)
 722     result.append('\n</dt>\n<dd></dd>\n')
 723     return result
 724
 725
 726 def convert_pre(ctx, xml):
 727     result = ['<pre class="%s">\n' % xml.tag]
 728     append_text(xml.text, result)
 729     convert_inner(ctx, xml, result)
 730     result.append('</pre>')
 731     append_text(xml.tail, result)
 732     return result
 733
 734
 735 def convert_programlisting(ctx, xml):
 736     result = []
 737     if xml.attrib.get('role', '') == 'example':
 738         if xml.text:
 739             lang = xml.attrib.get('language', 'c').lower()
 740             if lang not in LEXERS:
 741                 LEXERS[lang] = get_lexer_by_name(lang)
 742             lexer = LEXERS.get(lang, None)
 743             if lexer:
 744                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 745
 746                 # we do own line-numbering
 747                 line_count = highlighted.count('\n')
 748                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 749                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 750   <tbody>
 751     <tr>
 752       <td class="listing_lines" align="right"><pre>%s</pre></td>
 753       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 754     </tr>
 755   </tbody>
 756 </table>
 757 """ % (source_lines, highlighted))
 758             else:
 759                 logging.warn('No pygments lexer for language="%s"', lang)
 760                 result.append('<pre class="programlisting">')
 761                 result.append(xml.text)
 762                 result.append('</pre>')
 763     else:
 764         result.append('<pre class="programlisting">')
 765         append_text(xml.text, result)
 766         convert_inner(ctx, xml, result)
 767         result.append('</pre>')
 768     append_text(xml.tail, result)
 769     return result
 770
 771
 772 def convert_quote(ctx, xml):
 773     result = ['<span class="quote">"<span class="quote">']
 774     append_text(xml.text, result)
 775     convert_inner(ctx, xml, result)
 776     result.append('</span>"</span>')
 777     append_text(xml.tail, result)
 778     return result
 779
 780
 781 def convert_refsect1(ctx, xml):
 782     # Add a divider between two consequitive refsect2
 783     def convert_inner(ctx, xml, result):
 784         prev = None
 785         for child in xml:
 786             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 787                 result.append('<hr>\n')
 788             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 789             prev = child
 790     return convert_sect(ctx, xml, 'h2', convert_inner)
 791
 792
 793 def convert_refsect2(ctx, xml):
 794     return convert_sect(ctx, xml, 'h3')
 795
 796
 797 def convert_refsect3(ctx, xml):
 798     return convert_sect(ctx, xml, 'h4')
 799
 800
 801 def convert_row(ctx, xml):
 802     result = ['<tr>\n']
 803     convert_inner(ctx, xml, result)
 804     result.append('</tr>\n')
 805     return result
 806
 807
 808 def convert_sect1_tag(ctx, xml):
 809     return convert_sect(ctx, xml, 'h2')
 810
 811
 812 def convert_sect2(ctx, xml):
 813     return convert_sect(ctx, xml, 'h3')
 814
 815
 816 def convert_sect3(ctx, xml):
 817     return convert_sect(ctx, xml, 'h4')
 818
 819
 820 def convert_simpara(ctx, xml):
 821     result = ['<p>']
 822     append_text(xml.text, result)
 823     result.append('</p>')
 824     append_text(xml.tail, result)
 825     return result
 826
 827
 828 def convert_span(ctx, xml):
 829     result = ['<span class="%s">' % xml.tag]
 830     append_text(xml.text, result)
 831     convert_inner(ctx, xml, result)
 832     result.append('</span>')
 833     append_text(xml.tail, result)
 834     return result
 835
 836
 837 def convert_table(ctx, xml):
 838     result = ['<div class="table">']
 839     if 'id' in xml.attrib:
 840         result.append('<a name="%s"></a>' % xml.attrib['id'])
 841     title_tag = xml.find('title')
 842     if title_tag is not None:
 843         result.append('<p class="title"><b>')
 844         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 845         result.extend(convert_title(ctx, title_tag))
 846         result.append('</b></p>')
 847         xml.remove(title_tag)
 848     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 849
 850     convert_inner(ctx, xml, result)
 851
 852     result.append('</table></div></div>')
 853     append_text(xml.tail, result)
 854     return result
 855
 856
 857 def convert_tbody(ctx, xml):
 858     result = ['<tbody>']
 859     ctx['table.entry'] = 'td'
 860     convert_inner(ctx, xml, result)
 861     result.append('</tbody>')
 862     # is in tgroup and there can be no 'text'
 863     return result
 864
 865
 866 def convert_tgroup(ctx, xml):
 867     # tgroup does not expand to anything, but the nested colspecs need to
 868     # be put into a colgroup
 869     cols = xml.findall('colspec')
 870     result = []
 871     if cols:
 872         result.append('<colgroup>\n')
 873         for col in cols:
 874             result.extend(convert_colspec(ctx, col))
 875             xml.remove(col)
 876         result.append('</colgroup>\n')
 877     convert_inner(ctx, xml, result)
 878     # is in informaltable and there can be no 'text'
 879     return result
 880
 881
 882 def convert_thead(ctx, xml):
 883     result = ['<thead>']
 884     ctx['table.entry'] = 'th'
 885     convert_inner(ctx, xml, result)
 886     result.append('</thead>')
 887     # is in tgroup and there can be no 'text'
 888     return result
 889
 890
 891 def convert_title(ctx, xml):
 892     # This is always called from some context
 893     result = []
 894     append_text(xml.text, result)
 895     convert_inner(ctx, xml, result)
 896     append_text(xml.tail, result)
 897     return result
 898
 899
 900 def convert_ulink(ctx, xml):
 901     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 902     if xml.tail:
 903         result.append(xml.tail)
 904     return result
 905
 906
 907 def convert_userinput(ctx, xml):
 908     result = ['<span class="command"><strong>']
 909     append_text(xml.text, result)
 910     convert_inner(ctx, xml, result)
 911     result.append('</strong></span>')
 912     append_text(xml.tail, result)
 913     return result
 914
 915
 916 def convert_variablelist(ctx, xml):
 917     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 918 <colgroup>
 919 <col align="left" valign="top">
 920 <col>
 921 </colgroup>
 922 <tbody>"""]
 923     convert_inner(ctx, xml, result)
 924     result.append("""</tbody>
 925 </table></div>""")
 926     return result
 927
 928
 929 def convert_varlistentry(ctx, xml):
 930     result = ['<tr>']
 931
 932     result.append('<td><p>')
 933     term = xml.find('term')
 934     result.extend(convert_span(ctx, term))
 935     result.append('</p></td>')
 936
 937     result.append('<td>')
 938     listitem = xml.find('listitem')
 939     convert_inner(ctx, listitem, result)
 940     result.append('</td>')
 941
 942     result.append('<tr>')
 943     return result
 944
 945
 946 # TODO(ensonic): turn into class with converters as functions and ctx as self
 947 convert_tags = {
 948     'abstract': convert_abstract,
 949     'acronym': convert_acronym,
 950     'anchor': convert_anchor,
 951     'application': convert_span,
 952     'bookinfo': convert_bookinfo,
 953     'blockquote': convert_blockquote,
 954     'caption': convert_div,
 955     'code': convert_code,
 956     'colspec': convert_colspec,
 957     'constant': convert_code,
 958     'command': convert_command,
 959     'corpauthor': convert_corpauthor,
 960     'emphasis': convert_emphasis,
 961     'entry': convert_entry,
 962     'envar': convert_code,
 963     'footnote': convert_footnote,
 964     'filename': convert_code,
 965     'formalpara': convert_formalpara,
 966     'function': convert_code,
 967     'glossdef': convert_glossdef,
 968     'glossdiv': convert_glossdiv,
 969     'glossentry': convert_glossentry,
 970     'glossterm': convert_glossterm,
 971     'imageobject': convert_imageobject,
 972     'indexdiv': convert_indexdiv,
 973     'indexentry': convert_ignore,
 974     'indexterm': convert_skip,
 975     'informalexample': convert_div,
 976     'informaltable': convert_informaltable,
 977     'inlinegraphic': convert_inlinegraphic,
 978     'inlinemediaobject': convert_span,
 979     'itemizedlist': convert_itemizedlist,
 980     'legalnotice': convert_div,
 981     'link': convert_link,
 982     'listitem': convert_listitem,
 983     'literal': convert_code,
 984     'literallayout': convert_literallayout,
 985     'mediaobject': convert_div,
 986     'note': convert_div,
 987     'option': convert_code,
 988     'orderedlist': convert_orderedlist,
 989     'para': convert_para,
 990     'partintro': convert_div,
 991     'parameter': convert_em_class,
 992     'phrase': convert_phrase,
 993     'primaryie': convert_primaryie,
 994     'programlisting': convert_programlisting,
 995     'quote': convert_quote,
 996     'releaseinfo': convert_para_like,
 997     'refsect1': convert_refsect1,
 998     'refsect2': convert_refsect2,
 999     'refsect3': convert_refsect3,
1000     'replaceable': convert_em_class,
1001     'returnvalue': convert_span,
1002     'row': convert_row,
1003     'screen': convert_pre,
1004     'sect1': convert_sect1_tag,
1005     'sect2': convert_sect2,
1006     'sect3': convert_sect3,
1007     'simpara': convert_simpara,
1008     'structfield': convert_em_class,
1009     'structname': convert_span,
1010     'synopsis': convert_pre,
1011     'symbol': convert_span,
1012     'table': convert_table,
1013     'tbody': convert_tbody,
1014     'term': convert_span,
1015     'tgroup': convert_tgroup,
1016     'thead': convert_thead,
1017     'title': convert_skip,
1018     'type': convert_span,
1019     'ulink': convert_ulink,
1020     'userinput': convert_userinput,
1021     'varname': convert_code,
1022     'variablelist': convert_variablelist,
1023     'varlistentry': convert_varlistentry,
1024     'warning': convert_div,
1025 }
1026
1027 # conversion helpers
1028
1029 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1030 <html>
1031 <head>
1032 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1033 <title>%s</title>
1034 %s<link rel="stylesheet" href="style.css" type="text/css">
1035 </head>
1036 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1037 """
1038
1039
1040 def generate_head_links(ctx):
1041     n = ctx['nav_home']
1042     result = [
1043         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1044     ]
1045     if 'nav_up' in ctx:
1046         n = ctx['nav_up']
1047         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1048     if 'nav_prev' in ctx:
1049         n = ctx['nav_prev']
1050         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1051     if 'nav_next' in ctx:
1052         n = ctx['nav_next']
1053         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1054     return ''.join(result)
1055
1056
1057 def generate_nav_links(ctx):
1058     n = ctx['nav_home']
1059     result = [
1060         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1061     ]
1062     if 'nav_up' in ctx:
1063         n = ctx['nav_up']
1064         result.append(
1065             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1066     else:
1067         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1068     if 'nav_prev' in ctx:
1069         n = ctx['nav_prev']
1070         result.append(
1071             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1072     else:
1073         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1074     if 'nav_next' in ctx:
1075         n = ctx['nav_next']
1076         result.append(
1077             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1078     else:
1079         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1080
1081     return ''.join(result)
1082
1083
1084 def generate_toc(ctx, node):
1085     result = []
1086     for c in node.children:
1087         # TODO: urlencode the filename: urllib.parse.quote_plus()
1088         link = c.filename
1089         if c.anchor:
1090             link += c.anchor
1091         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1092             c.title_tag, link, c.title))
1093         if c.subtitle:
1094             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1095         result.append('</dt>\n')
1096         if c.children:
1097             result.append('<dd><dl>')
1098             result.extend(generate_toc(ctx, c))
1099             result.append('</dl></dd>')
1100     return result
1101
1102
1103 def generate_basic_nav(ctx):
1104     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1105   <tr valign="middle">
1106     <td width="100%%" align="left" class="shortcuts"></td>
1107     %s
1108   </tr>
1109 </table>
1110     """ % generate_nav_links(ctx)
1111
1112
1113 def generate_alpha_nav(ctx, divs, prefix, span_id):
1114     ix_nav = []
1115     for s in divs:
1116         title = xml_get_title(ctx, s)
1117         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1118
1119     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1120   <tr valign="middle">
1121     <td width="100%%" align="left" class="shortcuts">
1122       <span id="nav_%s">
1123         %s
1124       </span>
1125     </td>
1126     %s
1127   </tr>
1128 </table>
1129     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1130
1131
1132 def generate_refentry_nav(ctx, refsect1s, result):
1133     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1134   <tr valign="middle">
1135     <td width="100%" align="left" class="shortcuts">
1136       <a href="#" class="shortcut">Top</a>""")
1137
1138     for s in refsect1s:
1139         # don't list TOC sections (role="xxx_proto")
1140         if s.attrib.get('role', '').endswith("_proto"):
1141             continue
1142         # skip section without 'id' attrs
1143         if 'id' not in s.attrib:
1144             continue
1145
1146         ref_id = s.attrib['id']
1147         # skip foreign sections
1148         if '.' not in ref_id:
1149             continue
1150
1151         title = xml_get_title(ctx, s)
1152         span_id = ref_id.split('.')[1].replace('-', '_')
1153
1154         result.append("""
1155           <span id="nav_%s">
1156             <span class="dim">|</span>
1157             <a href="#%s" class="shortcut">%s</a>
1158           </span>
1159           """ % (span_id, ref_id, title))
1160     result.append("""
1161     </td>
1162     %s
1163   </tr>
1164 </table>
1165 """ % generate_nav_links(ctx))
1166
1167
1168 def generate_footer(ctx):
1169     result = []
1170     if 'footnotes' in ctx:
1171         result.append("""<div class="footnotes">\n
1172 <br><hr style="width:100; text-align:left;margin-left: 0">
1173 """)
1174         for f in ctx['footnotes']:
1175             result.extend(f)
1176         result.append('</div>\n')
1177     return result
1178
1179
1180 def get_id_path(node):
1181     """ Generate the 'id'.
1182     We need to walk up the xml-tree and check the positions for each sibling.
1183     When reaching the top of the tree we collect remaining index entries from
1184     the chunked-tree.
1185     """
1186     ix = []
1187     xml = node.xml
1188     parent = xml.getparent()
1189     while parent is not None:
1190         children = parent.getchildren()
1191         ix.insert(0, str(children.index(xml) + 1))
1192         xml = parent
1193         parent = xml.getparent()
1194     while node is not None:
1195         ix.insert(0, str(node.idx + 1))
1196         node = node.parent
1197
1198     return ix
1199
1200
1201 def get_id(node):
1202     xml = node.xml
1203     node_id = xml.attrib.get('id', None)
1204     if node_id:
1205         return node_id
1206
1207     # TODO: this is moot if nothing links to it, we could also consider to omit
1208     # the <a name="$id"></a> tag.
1209     logging.info('%d: No "id" attribute on "%s", generating one',
1210                  xml.sourceline, xml.tag)
1211     ix = get_id_path(node)
1212     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1213     return 'id-' + '.'.join(ix)
1214
1215
1216 def convert_chunk_with_toc(ctx, div_class, title_tag):
1217     node = ctx['node']
1218     result = [
1219         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1220         generate_basic_nav(ctx),
1221         '<div class="%s">' % div_class,
1222     ]
1223     title = node.xml.find('title')
1224     if title is not None:
1225         result.append("""
1226 <div class="titlepage">
1227 <%s class="title"><a name="%s"></a>%s</%s>
1228 </div>""" % (
1229             title_tag, get_id(node), title.text, title_tag))
1230
1231     toc = generate_toc(ctx, node)
1232     if toc:
1233         # TODO: not all docbook page types use this extra heading
1234         result.append("""<p><b>Table of Contents</b></p>
1235     <div class="toc">
1236       <dl class="toc">
1237     """)
1238         result.extend(toc)
1239         result.append("""</dl>
1240     </div>
1241     """)
1242     convert_inner(ctx, node.xml, result)
1243     result.extend(generate_footer(ctx))
1244     result.append("""</div>
1245 </body>
1246 </html>""")
1247     return result
1248
1249
1250 # docbook chunks
1251
1252
1253 def convert_book(ctx):
1254     node = ctx['node']
1255     result = [
1256         HTML_HEADER % (node.title, generate_head_links(ctx)),
1257         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1258     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1259 </table>
1260 <div class="book">
1261 """ % node.title
1262     ]
1263     bookinfo = node.xml.findall('bookinfo')[0]
1264     result.extend(convert_bookinfo(ctx, bookinfo))
1265     result.append("""<div class="toc">
1266   <dl class="toc">
1267 """)
1268     result.extend(generate_toc(ctx, node.root))
1269     result.append("""</dl>
1270 </div>
1271 """)
1272     result.extend(generate_footer(ctx))
1273     result.append("""</div>
1274 </body>
1275 </html>""")
1276     return result
1277
1278
1279 def convert_chapter(ctx):
1280     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1281
1282
1283 def convert_glossary(ctx):
1284     node = ctx['node']
1285     glossdivs = node.xml.findall('glossdiv')
1286
1287     result = [
1288         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1289         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1290         """<div class="glossary">
1291 <div class="titlepage"><h%1d class="title">
1292 <a name="%s"></a>%s</h%1d>
1293 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1294     ]
1295     for i in glossdivs:
1296         result.extend(convert_glossdiv(ctx, i))
1297     result.extend(generate_footer(ctx))
1298     result.append("""</div>
1299 </body>
1300 </html>""")
1301     return result
1302
1303
1304 def convert_index(ctx):
1305     node = ctx['node']
1306     # Get all indexdivs under indexdiv
1307     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1308
1309     result = [
1310         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1311         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1312         """<div class="index">
1313 <div class="titlepage"><h%1d class="title">
1314 <a name="%s"></a>%s</h%1d>
1315 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1316     ]
1317     for i in indexdivs:
1318         result.extend(convert_indexdiv(ctx, i))
1319     result.extend(generate_footer(ctx))
1320     result.append("""</div>
1321 </body>
1322 </html>""")
1323     return result
1324
1325
1326 def convert_part(ctx):
1327     return convert_chunk_with_toc(ctx, 'part', 'h1')
1328
1329
1330 def convert_preface(ctx):
1331     node = ctx['node']
1332     result = [
1333         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1334         generate_basic_nav(ctx),
1335         '<div class="preface">'
1336     ]
1337     title = node.xml.find('title')
1338     if title is not None:
1339         result.append("""
1340 <div class="titlepage">
1341 <h2 class="title"><a name="%s"></a>%s</h2>
1342 </div>""" % (get_id(node), title.text))
1343     convert_inner(ctx, node.xml, result)
1344     result.extend(generate_footer(ctx))
1345     result.append("""</div>
1346 </body>
1347 </html>""")
1348     return result
1349
1350
1351 def convert_reference(ctx):
1352     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1353
1354
1355 def convert_refentry(ctx):
1356     node = ctx['node']
1357     node_id = get_id(node)
1358     refsect1s = node.xml.findall('refsect1')
1359
1360     gallery = ''
1361     refmeta = node.xml.find('refmeta')
1362     if refmeta is not None:
1363         refmiscinfo = refmeta.find('refmiscinfo')
1364         if refmiscinfo is not None:
1365             inlinegraphic = refmiscinfo.find('inlinegraphic')
1366             if inlinegraphic is not None:
1367                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1368
1369     result = [
1370         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1371     ]
1372     generate_refentry_nav(ctx, refsect1s, result)
1373     result.append("""
1374 <div class="refentry">
1375 <a name="%s"></a>
1376 <div class="refnamediv">
1377   <table width="100%%"><tr>
1378     <td valign="top">
1379       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1380       <p>%s — %s</p>
1381     </td>
1382     <td class="gallery_image" valign="top" align="right">%s</td>
1383   </tr></table>
1384 </div>
1385 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1386
1387     for s in refsect1s:
1388         result.extend(convert_refsect1(ctx, s))
1389     result.extend(generate_footer(ctx))
1390     result.append("""</div>
1391 </body>
1392 </html>""")
1393     return result
1394
1395
1396 def convert_sect1(ctx):
1397     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1398
1399
1400 # TODO(ensonic): turn into class with converters as functions and ctx as self
1401 convert_chunks = {
1402     'book': convert_book,
1403     'chapter': convert_chapter,
1404     'glossary': convert_glossary,
1405     'index': convert_index,
1406     'part': convert_part,
1407     'preface': convert_preface,
1408     'reference': convert_reference,
1409     'refentry': convert_refentry,
1410     'sect1': convert_sect1,
1411 }
1412
1413
1414 def generate_nav_nodes(files, node):
1415     nav = {
1416         'nav_home': node.root,
1417     }
1418     # nav params: up, prev, next
1419     if node.parent:
1420         nav['nav_up'] = node.parent
1421     ix = files.index(node)
1422     if ix > 0:
1423         nav['nav_prev'] = files[ix - 1]
1424     if ix < len(files) - 1:
1425         nav['nav_next'] = files[ix + 1]
1426     return nav
1427
1428
1429 def convert(out_dir, module, files, node):
1430     """Convert the docbook chunks to a html file.
1431
1432     Args:
1433       out_dir: already created output dir
1434       files: list of nodes in the tree in pre-order
1435       node: current tree node
1436     """
1437
1438     logging.info('Writing: %s', node.filename)
1439     with open(os.path.join(out_dir, node.filename), 'wt',
1440               newline='\n', encoding='utf-8') as html:
1441         ctx = {
1442             'module': module,
1443             'files': files,
1444             'node': node,
1445         }
1446         ctx.update(generate_nav_nodes(files, node))
1447
1448         if node.name in convert_chunks:
1449             for line in convert_chunks[node.name](ctx):
1450                 html.write(line)
1451         else:
1452             logging.warning('Add converter/template for "%s"', node.name)
1453
1454
1455 def create_devhelp2_toc(node):
1456     result = []
1457     for c in node.children:
1458         if c.children:
1459             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1460             result.extend(create_devhelp2_toc(c))
1461             result.append('</sub>\n')
1462         else:
1463             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1464     return result
1465
1466
1467 def create_devhelp2_condition_attribs(node):
1468     if 'condition' in node.attrib:
1469         # condition -> since, deprecated, ... (separated with '|')
1470         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1471         keywords = []
1472         for c in cond:
1473             if ':' in c:
1474                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1475             else:
1476                 # deprecated can have no description
1477                 keywords.append('{}="{}"'.format(c, ''))
1478         return ' ' + ' '.join(keywords)
1479     else:
1480         return ''
1481
1482
1483 def create_devhelp2_refsect2_keyword(node, base_link):
1484     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1485         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1486         create_devhelp2_condition_attribs(node))
1487
1488
1489 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1490     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1491         node.attrib['role'], title, base_link + name,
1492         create_devhelp2_condition_attribs(node))
1493
1494
1495 def create_devhelp2(out_dir, module, xml, files):
1496     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1497               newline='\n', encoding='utf-8') as idx:
1498         bookinfo_nodes = xml.xpath('/book/bookinfo')
1499         title = ''
1500         if bookinfo_nodes is not None:
1501             bookinfo = bookinfo_nodes[0]
1502             title = bookinfo.xpath('./title/text()')[0]
1503             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1504             # TODO: support author too (see devhelp2.xsl)
1505         # TODO: fixxref uses '--src-lang' to set the language
1506         result = [
1507             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1508 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1509   <chapters>
1510 """ % (title, module, online_url)
1511         ]
1512         # toc
1513         result.extend(create_devhelp2_toc(files[0].root))
1514         result.append("""  </chapters>
1515   <functions>
1516 """)
1517         # keywords from all refsect2 and refsect3
1518         refsect2 = etree.XPath('//refsect2[@role]')
1519         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1520         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1521         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1522         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1523         for node in files:
1524             base_link = node.filename + '#'
1525             refsect2_nodes = refsect2(node.xml)
1526             for refsect2_node in refsect2_nodes:
1527                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1528                 refsect3_nodes = refsect3_enum(refsect2_node)
1529                 for refsect3_node in refsect3_nodes:
1530                     details_node = refsect3_enum_details(refsect3_node)[0]
1531                     name = details_node.attrib['id']
1532                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1533                 refsect3_nodes = refsect3_struct(refsect2_node)
1534                 for refsect3_node in refsect3_nodes:
1535                     details_node = refsect3_struct_details(refsect3_node)[0]
1536                     name = details_node.attrib['id']
1537                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1538
1539         result.append("""  </functions>
1540 </book>
1541 """)
1542         for line in result:
1543             idx.write(line)
1544
1545
1546 def get_dirs(uninstalled):
1547     if uninstalled:
1548         # this does not work from buiddir!=srcdir
1549         gtkdocdir = os.path.split(sys.argv[0])[0]
1550         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1551             # try 'srcdir' (set from makefiles) too
1552             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1553                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1554         styledir = gtkdocdir + '/style'
1555     else:
1556         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1557         styledir = gtkdocdir
1558     return (gtkdocdir, styledir)
1559
1560
1561 def main(module, index_file, out_dir, uninstalled):
1562
1563     # == Loading phase ==
1564     # the next 3 steps could be done in paralel
1565
1566     # 1) load the docuemnt
1567     _t = timer()
1568     tree = etree.parse(index_file)
1569     tree.xinclude()
1570     logging.warning("1: %7.3lf: load doc", timer() - _t)
1571
1572     # 2) copy datafiles
1573     _t = timer()
1574     # TODO: handle additional images
1575     (gtkdocdir, styledir) = get_dirs(uninstalled)
1576     # copy navigation images and stylesheets to html directory ...
1577     css_file = os.path.join(styledir, 'style.css')
1578     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1579         shutil.copy(f, out_dir)
1580     css_file = os.path.join(out_dir, 'style.css')
1581     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1582         css.write(HTML_FORMATTER.get_style_defs())
1583     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1584
1585     # 3) load xref targets
1586     _t = timer()
1587     # TODO: migrate options from fixxref
1588     # TODO: ideally explicity specify the files we need, this will save us the
1589     # globbing and we'll load less files.
1590     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1591     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1592
1593     # == Processing phase ==
1594
1595     # 4) recursively walk the tree and chunk it into a python tree so that we
1596     #    can generate navigation and link tags.
1597     _t = timer()
1598     files = chunk(tree.getroot(), module)
1599     files = [f for f in PreOrderIter(files) if f.anchor is None]
1600     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1601
1602     # 5) extract tables:
1603     _t = timer()
1604     # TODO: can be done in parallel
1605     # - find all 'id' attribs and add them to the link map
1606     add_id_links(files, fixxref.Links)
1607     # - build glossary dict
1608     build_glossary(files)
1609     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1610
1611     # == Output phase ==
1612     # the next two step could be done in parllel
1613
1614     # 6) create a xxx.devhelp2 file
1615     _t = timer()
1616     create_devhelp2(out_dir, module, tree.getroot(), files)
1617     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1618
1619     # 7) iterate the tree and output files
1620     _t = timer()
1621     # TODO: can be done in parallel, figure out why this is not faster
1622     # from multiprocessing.pool import Pool
1623     # with Pool(4) as p:
1624     #     p.apply_async(convert, args=(out_dir, module, files))
1625     # from multiprocessing.pool import ThreadPool
1626     # with ThreadPool(4) as p:
1627     #     p.apply_async(convert, args=(out_dir, module, files))
1628     for node in files:
1629         convert(out_dir, module, files, node)
1630     logging.warning("7: %7.3lf: create html", timer() - _t)
1631
1632
1633 def run(options):
1634     logging.info('options: %s', str(options.__dict__))
1635     module = options.args[0]
1636     document = options.args[1]
1637
1638     # TODO: rename to 'html' later on
1639     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1640     #   outputs into the working directory
1641     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1642     try:
1643         os.mkdir(out_dir)
1644     except OSError as e:
1645         if e.errno != errno.EEXIST:
1646             raise
1647
1648     sys.exit(main(module, document, out_dir, options.uninstalled))