gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - handle 'label' attributes on part/chapter/section-types
  43     - the titles will have a generated prefix, such as 'Part I:'
  44       (locale dependent)
  45     - in the toc it would only be the label: 'I.'
  46   - 'link' adds a 'title' attr to 'a' if the target has a title.
  47     - there is an implementation in convert_link() but it is slow
  48     - we might need to collect titles as we chunk
  49     - if we do this we'd need to keep iterating, but might be able to replace
  50       add_id_links()
  51   - handle the 'xref' tag
  52     - this needs the title + the type of the target
  53     - for the title, see convert_link()
  54 - check each docbook tag if it can contain #PCDATA, if not don't check for
  55   xml.text
  56 - consider some perf-warnings flag
  57   - see 'No "id" attribute on'
  58
  59 OPTIONAL:
  60 - minify html: https://pypi.python.org/pypi/htmlmin/
  61
  62 Requirements:
  63 sudo pip3 install anytree lxml pygments
  64
  65 Example invocation:
  66 cd tests/bugs/docs/
  67 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  68 xdg-open db2html/index.html
  69 meld html db2html
  70
  71 Benchmarking:
  72 cd tests/bugs/docs/;
  73 rm html-build.stamp; time make html-build.stamp
  74 """
  75
  76 import argparse
  77 import errno
  78 import logging
  79 import os
  80 import shutil
  81 import sys
  82
  83 from anytree import Node, PreOrderIter
  84 from copy import deepcopy
  85 from glob import glob
  86 from lxml import etree
  87 from pygments import highlight
  88 from pygments.lexers import CLexer
  89 from pygments.formatters import HtmlFormatter
  90
  91 from . import config, fixxref
  92
  93 # pygments setup
  94 # lazily constructed lexer cache
  95 LEXERS = {
  96     'c': CLexer()
  97 }
  98 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  99
 100
 101 class ChunkParams(object):
 102     def __init__(self, prefix, parent=None, min_idx=0):
 103         self.prefix = prefix
 104         self.parent = parent
 105         self.min_idx = min_idx
 106         self.idx = 1
 107
 108
 109 DONT_CHUNK = float('inf')
 110 # docbook-xsl defines the chunk tags here.
 111 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 112 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 113 # If not defined, we can just create an example without an 'id' attr and see
 114 # docbook xsl does.
 115 #
 116 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 117 # TODO: this list has also a flag that controls wheter we add the
 118 # 'Table of Contents' heading in convert_chunk_with_toc()
 119 CHUNK_PARAMS = {
 120     'appendix': ChunkParams('app', 'book'),
 121     'book': ChunkParams('bk'),
 122     'chapter': ChunkParams('ch', 'book'),
 123     'glossary': ChunkParams('go', 'book'),
 124     'index': ChunkParams('ix', 'book'),
 125     'part': ChunkParams('pt', 'book'),
 126     'preface': ChunkParams('pr', 'book'),
 127     'refentry': ChunkParams('re', 'book'),
 128     'reference': ChunkParams('rn', 'book'),
 129     'sect1': ChunkParams('s', 'chapter', 1),
 130     'section': ChunkParams('s', 'chapter', 1),
 131     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 132     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 133     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 134     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 135 }
 136 # TAGS we don't support:
 137 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 138
 139 TITLE_XPATHS = {
 140     '_': (etree.XPath('./title'), None),
 141     'book': (etree.XPath('./bookinfo/title'), None),
 142     'refentry': (
 143         etree.XPath('./refmeta/refentrytitle'),
 144         etree.XPath('./refnamediv/refpurpose')
 145     ),
 146 }
 147
 148 ID_XPATH = etree.XPath('//@id')
 149
 150 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 151 glossary = {}
 152
 153 footnote_idx = 1
 154
 155
 156 def gen_chunk_name(node, chunk_params):
 157     """Generate a chunk file name
 158
 159     This is either based on the id or on the position in the doc. In the latter
 160     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 161     type.
 162     """
 163     if 'id' in node.attrib:
 164         return node.attrib['id']
 165
 166     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 167     chunk_params.idx += 1
 168
 169     # handle parents to make names of nested tags like in docbook
 170     # - we only need to prepend the parent if there are > 1 of them in the
 171     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 172     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 173     #   when we chunk explicitly and on each level maintain the 'idx'
 174     # while chunk_params.parent:
 175     #     parent = chunk_params.parent
 176     #     if parent not in CHUNK_PARAMS:
 177     #         break;
 178     #     chunk_params = CHUNK_PARAMS[parent]
 179     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 180
 181     logging.info('Gen chunk name: "%s"', name)
 182     return name
 183
 184
 185 def get_chunk_titles(module, node):
 186     tag = node.tag
 187     if tag not in TITLE_XPATHS:
 188         # Use defaults
 189         (title, subtitle) = TITLE_XPATHS['_']
 190     else:
 191         (title, subtitle) = TITLE_XPATHS[tag]
 192
 193     ctx = {
 194         'module': module,
 195         'files': [],
 196     }
 197     result = {
 198         'title': None,
 199         'title_tag': None,
 200         'subtitle': None,
 201         'subtitle_tag': None
 202     }
 203     res = title(node)
 204     if res:
 205         xml = res[0]
 206         result['title'] = ''.join(convert_title(ctx, xml))
 207         if xml.tag != 'title':
 208             result['title_tag'] = xml.tag
 209         else:
 210             result['title_tag'] = tag
 211
 212     if subtitle:
 213         res = subtitle(node)
 214         if res:
 215             xml = res[0]
 216             result['subtitle'] = ''.join(convert_title(ctx, xml))
 217             result['subtitle_tag'] = xml.tag
 218     return result
 219
 220
 221 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 222     """Chunk the tree.
 223
 224     The first time, we're called with parent=None and in that case we return
 225     the new_node as the root of the tree. For each tree-node we generate a
 226     filename and process the children.
 227     """
 228     tag = xml_node.tag
 229     chunk_params = CHUNK_PARAMS.get(tag)
 230     if chunk_params:
 231         title_args = get_chunk_titles(module, xml_node)
 232         chunk_name = gen_chunk_name(xml_node, chunk_params)
 233
 234         # check idx to handle 'sect1'/'section' special casing and title-only
 235         # segments
 236         if idx >= chunk_params.min_idx:
 237             logging.info('chunk tag: "%s"[%d]', tag, idx)
 238             if parent:
 239                 # remove the xml-node from the parent
 240                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 241                 xml_node.getparent().remove(xml_node)
 242                 xml_node = sub_tree
 243
 244             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 245                           idx=idx,
 246                           filename=chunk_name + '.html', anchor=None,
 247                           **title_args)
 248         else:
 249             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 250                           idx=idx,
 251                           filename=parent.filename, anchor='#' + chunk_name,
 252                           **title_args)
 253
 254         depth += 1
 255         idx = 0
 256         for child in xml_node:
 257             chunk(child, module, depth, idx, parent)
 258             if child.tag in CHUNK_PARAMS:
 259                 idx += 1
 260
 261     return parent
 262
 263
 264 def add_id_links(files, links):
 265     for node in files:
 266         chunk_name = node.filename[:-5]
 267         chunk_base = node.filename + '#'
 268         for attr in ID_XPATH(node.xml):
 269             if attr == chunk_name:
 270                 links[attr] = node.filename
 271             else:
 272                 links[attr] = chunk_base + attr
 273
 274
 275 def build_glossary(files):
 276     for node in files:
 277         if node.xml.tag != 'glossary':
 278             continue
 279         for term in GLOSSENTRY_XPATH(node.xml):
 280             # TODO: there can be all kind of things in a glossary. This only supports
 281             # what we commonly use
 282             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 283             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 284             glossary[key] = value
 285             # logging.debug('glosentry: %s:%s', key, value)
 286
 287
 288 # conversion helpers
 289
 290
 291 def convert_inner(ctx, xml, result):
 292     for child in xml:
 293         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 294
 295
 296 def convert_ignore(ctx, xml):
 297     result = []
 298     convert_inner(ctx, xml, result)
 299     return result
 300
 301
 302 def convert_skip(ctx, xml):
 303     return ['']
 304
 305
 306 def append_text(text, result):
 307     if text and text.strip():
 308         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 309
 310
 311 missing_tags = {}
 312
 313
 314 def convert__unknown(ctx, xml):
 315     # don't recurse on subchunks
 316     if xml.tag in CHUNK_PARAMS:
 317         return []
 318     if isinstance(xml, etree._Comment):
 319         return ['<!-- ' + xml.text + '-->\n']
 320     else:
 321         # warn only once
 322         if xml.tag not in missing_tags:
 323             logging.warning('Add tag converter for "%s"', xml.tag)
 324             missing_tags[xml.tag] = True
 325         result = ['<!-- ' + xml.tag + '-->\n']
 326         convert_inner(ctx, xml, result)
 327         result.append('<!-- /' + xml.tag + '-->\n')
 328         return result
 329
 330
 331 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 332     result = ['<div class="%s">\n' % xml.tag]
 333     title = xml.find('title')
 334     if title is not None:
 335         if 'id' in xml.attrib:
 336             result.append('<a name="%s"></a>' % xml.attrib['id'])
 337         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 338     append_text(xml.text, result)
 339     inner_func(ctx, xml, result)
 340     result.append('</div>')
 341     append_text(xml.tail, result)
 342     return result
 343
 344
 345 def xml_get_title(ctx, xml):
 346     title_tag = xml.find('title')
 347     if title_tag is not None:
 348         return ''.join(convert_title(ctx, title_tag))
 349     else:
 350         # TODO(ensonic): any way to get the file (inlcudes) too?
 351         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 352         return ''
 353
 354
 355 # docbook tags
 356
 357
 358 def convert_abstract(ctx, xml):
 359     result = ["""<div class="abstract">
 360     <p class="title"><b>Abstract</b></p>"""]
 361     append_text(xml.text, result)
 362     convert_inner(ctx, xml, result)
 363     result.append('</div>')
 364     append_text(xml.tail, result)
 365     return result
 366
 367
 368 def convert_acronym(ctx, xml):
 369     key = xml.text
 370     title = glossary.get(key, '')
 371     # TODO: print a sensible warning if missing
 372     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 373     if xml.tail:
 374         result.append(xml.tail)
 375     return result
 376
 377
 378 def convert_anchor(ctx, xml):
 379     return ['<a name="%s"></a>' % xml.attrib['id']]
 380
 381
 382 def convert_bookinfo(ctx, xml):
 383     result = ['<div class="titlepage">']
 384     convert_inner(ctx, xml, result)
 385     result.append("""<hr>
 386 </div>""")
 387     if xml.tail:
 388         result.append(xml.tail)
 389     return result
 390
 391
 392 def convert_blockquote(ctx, xml):
 393     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 394     append_text(xml.text, result)
 395     convert_inner(ctx, xml, result)
 396     result.append('</blockquote>\n</div>')
 397     append_text(xml.tail, result)
 398     return result
 399
 400
 401 def convert_code(ctx, xml):
 402     result = ['<code class="%s">' % xml.tag]
 403     append_text(xml.text, result)
 404     convert_inner(ctx, xml, result)
 405     result.append('</code>')
 406     append_text(xml.tail, result)
 407     return result
 408
 409
 410 def convert_colspec(ctx, xml):
 411     result = ['<col']
 412     a = xml.attrib
 413     if 'colname' in a:
 414         result.append(' class="%s"' % a['colname'])
 415     if 'colwidth' in a:
 416         result.append(' width="%s"' % a['colwidth'])
 417     result.append('>\n')
 418     # is in tgroup and there can be no 'text'
 419     return result
 420
 421
 422 def convert_command(ctx, xml):
 423     result = ['<strong class="userinput"><code>']
 424     append_text(xml.text, result)
 425     convert_inner(ctx, xml, result)
 426     result.append('</code></strong>')
 427     append_text(xml.tail, result)
 428     return result
 429
 430
 431 def convert_corpauthor(ctx, xml):
 432     result = ['<div><h3 class="corpauthor">\n']
 433     append_text(xml.text, result)
 434     convert_inner(ctx, xml, result)
 435     result.append('</h3></div>\n')
 436     append_text(xml.tail, result)
 437     return result
 438
 439
 440 def convert_div(ctx, xml):
 441     result = ['<div class="%s">\n' % xml.tag]
 442     append_text(xml.text, result)
 443     convert_inner(ctx, xml, result)
 444     result.append('</div>')
 445     append_text(xml.tail, result)
 446     return result
 447
 448
 449 def convert_emphasis(ctx, xml):
 450     result = ['<span class="emphasis"><em>']
 451     append_text(xml.text, result)
 452     convert_inner(ctx, xml, result)
 453     result.append('</em></span>')
 454     append_text(xml.tail, result)
 455     return result
 456
 457
 458 def convert_em_class(ctx, xml):
 459     result = ['<em class="%s"><code>' % xml.tag]
 460     append_text(xml.text, result)
 461     convert_inner(ctx, xml, result)
 462     result.append('</code></em>')
 463     append_text(xml.tail, result)
 464     return result
 465
 466
 467 def convert_entry(ctx, xml):
 468     entry_type = ctx['table.entry']
 469     result = ['<' + entry_type]
 470     if 'role' in xml.attrib:
 471         result.append(' class="%s"' % xml.attrib['role'])
 472     if 'morerows' in xml.attrib:
 473         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 474     result.append('>')
 475     append_text(xml.text, result)
 476     convert_inner(ctx, xml, result)
 477     result.append('</' + entry_type + '>')
 478     append_text(xml.tail, result)
 479     return result
 480
 481
 482 def convert_footnote(ctx, xml):
 483     footnotes = ctx.get('footnotes', [])
 484     # footnotes idx is not per page, but per doc
 485     global footnote_idx
 486     idx = footnote_idx
 487     footnote_idx += 1
 488
 489     # need a pair of ids for each footnote (docbook generates different ids)
 490     this_id = 'footnote-%d' % idx
 491     that_id = 'ftn.' + this_id
 492
 493     inner = ['<div id="%s" class="footnote">' % that_id]
 494     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 495         this_id, idx))
 496     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 497     # get double nested paras :/.
 498     # convert_inner(ctx, xml, inner)
 499     para = xml.find('para')
 500     if para is None:
 501         para = xml.find('simpara')
 502     if para is not None:
 503         inner.append(para.text)
 504     else:
 505         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 506                         etree.tostring(xml, method="text", encoding=str).strip())
 507     inner.append('</p></div>')
 508     footnotes.append(inner)
 509     ctx['footnotes'] = footnotes
 510     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 511         that_id, this_id, idx)]
 512
 513
 514 def convert_formalpara(ctx, xml):
 515     result = None
 516     title_tag = xml.find('title')
 517     result = ['<p><b>%s</b>' % title_tag.text]
 518     para_tag = xml.find('para')
 519     append_text(para_tag.text, result)
 520     convert_inner(ctx, para_tag, result)
 521     append_text(para_tag.tail, result)
 522     result.append('</p>')
 523     append_text(xml.tail, result)
 524     return result
 525
 526
 527 def convert_glossdef(ctx, xml):
 528     result = ['<dd class="glossdef">']
 529     convert_inner(ctx, xml, result)
 530     result.append('</dd>\n')
 531     return result
 532
 533
 534 def convert_glossdiv(ctx, xml):
 535     title_tag = xml.find('title')
 536     title = title_tag.text
 537     xml.remove(title_tag)
 538     result = [
 539         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 540     ]
 541     convert_inner(ctx, xml, result)
 542     return result
 543
 544
 545 def convert_glossentry(ctx, xml):
 546     result = []
 547     convert_inner(ctx, xml, result)
 548     return result
 549
 550
 551 def convert_glossterm(ctx, xml):
 552     glossid = ''
 553     text = ''
 554     anchor = xml.find('anchor')
 555     if anchor is not None:
 556         glossid = anchor.attrib.get('id', '')
 557         text += anchor.tail or ''
 558     text += xml.text or ''
 559     if glossid == '':
 560         glossid = 'glossterm-' + text
 561     return [
 562         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 563             glossid, text)
 564     ]
 565
 566
 567 def convert_imageobject(ctx, xml):
 568     imagedata = xml.find('imagedata')
 569     if imagedata is not None:
 570         # TODO(ensonic): warn on missing fileref attr?
 571         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 572     else:
 573         return []
 574
 575
 576 def convert_indexdiv(ctx, xml):
 577     title_tag = xml.find('title')
 578     title = title_tag.text
 579     xml.remove(title_tag)
 580     result = [
 581         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 582     ]
 583     convert_inner(ctx, xml, result)
 584     return result
 585
 586
 587 def convert_informaltable(ctx, xml):
 588     result = ['<div class="informaltable"><table class="informaltable"']
 589     a = xml.attrib
 590     if 'pgwide' in a and a['pgwide'] == '1':
 591         result.append(' width="100%"')
 592     if 'frame' in a and a['frame'] == 'none':
 593         result.append(' border="0"')
 594     result.append('>\n')
 595     convert_inner(ctx, xml, result)
 596     result.append('</table></div>')
 597     if xml.tail:
 598         result.append(xml.tail)
 599     return result
 600
 601
 602 def convert_inlinegraphic(ctx, xml):
 603     # TODO(ensonic): warn on missing fileref attr?
 604     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 605
 606
 607 def convert_itemizedlist(ctx, xml):
 608     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 609     convert_inner(ctx, xml, result)
 610     result.append('</ul></div>')
 611     if xml.tail:
 612         result.append(xml.tail)
 613     return result
 614
 615
 616 def convert_link(ctx, xml):
 617     linkend = xml.attrib['linkend']
 618     if linkend in fixxref.NoLinks:
 619         linkend = None
 620     result = []
 621     if linkend:
 622         link_text = []
 623         append_text(xml.text, link_text)
 624         convert_inner(ctx, xml, link_text)
 625         text = ''.join(link_text)
 626
 627         (tid, href) = fixxref.GetXRef(linkend)
 628         if href:
 629             module = ctx['module']
 630             title_attr = ''
 631             # search for a title under id='tid' in all chunks
 632             # NOTE: this will only work for local links
 633             # TODO: this works but is super slow
 634             # id_xpath = etree.XPath('//*[@id="%s"]' % tid)
 635             # for c in ctx['files']:
 636             #     nodes = id_xpath(c.xml)
 637             #     if nodes:
 638             #         title = get_chunk_titles(module, nodes[0])['title']
 639             #         if title:
 640             #             title_attr = ' title="%s"' % title
 641             #             logging.debug('Have title node: href=%s%s', tid, title_attr)
 642             #         break
 643
 644             href = fixxref.MakeRelativeXRef(module, href)
 645             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 646         else:
 647             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 648             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 649             result = [text]
 650     else:
 651         append_text(xml.text, result)
 652         convert_inner(ctx, xml, result)
 653     append_text(xml.tail, result)
 654     return result
 655
 656
 657 def convert_listitem(ctx, xml):
 658     result = ['<li class="listitem">']
 659     convert_inner(ctx, xml, result)
 660     result.append('</li>')
 661     # is in itemizedlist and there can be no 'text'
 662     return result
 663
 664
 665 def convert_literallayout(ctx, xml):
 666     result = ['<div class="literallayout"><p><br>\n']
 667     append_text(xml.text, result)
 668     convert_inner(ctx, xml, result)
 669     result.append('</p></div>')
 670     append_text(xml.tail, result)
 671     return result
 672
 673
 674 def convert_orderedlist(ctx, xml):
 675     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 676     convert_inner(ctx, xml, result)
 677     result.append('</ol></div>')
 678     append_text(xml.tail, result)
 679     return result
 680
 681
 682 def convert_para(ctx, xml):
 683     result = []
 684     if 'id' in xml.attrib:
 685         result.append('<a name="%s"></a>' % xml.attrib['id'])
 686     if 'role' in xml.attrib:
 687         result.append('<p class="%s">' % xml.attrib['role'])
 688     else:
 689         result.append('<p>')
 690     append_text(xml.text, result)
 691     convert_inner(ctx, xml, result)
 692     result.append('</p>')
 693     append_text(xml.tail, result)
 694     return result
 695
 696
 697 def convert_para_like(ctx, xml):
 698     result = []
 699     if 'id' in xml.attrib:
 700         result.append('<a name="%s"></a>' % xml.attrib['id'])
 701     result.append('<p class="%s">' % xml.tag)
 702     append_text(xml.text, result)
 703     convert_inner(ctx, xml, result)
 704     result.append('</p>')
 705     append_text(xml.tail, result)
 706     return result
 707
 708
 709 def convert_phrase(ctx, xml):
 710     result = ['<span']
 711     if 'role' in xml.attrib:
 712         result.append(' class="%s">' % xml.attrib['role'])
 713     else:
 714         result.append('>')
 715     append_text(xml.text, result)
 716     convert_inner(ctx, xml, result)
 717     result.append('</span>')
 718     append_text(xml.tail, result)
 719     return result
 720
 721
 722 def convert_primaryie(ctx, xml):
 723     result = ['<dt>\n']
 724     convert_inner(ctx, xml, result)
 725     result.append('\n</dt>\n<dd></dd>\n')
 726     return result
 727
 728
 729 def convert_pre(ctx, xml):
 730     result = ['<pre class="%s">\n' % xml.tag]
 731     append_text(xml.text, result)
 732     convert_inner(ctx, xml, result)
 733     result.append('</pre>')
 734     append_text(xml.tail, result)
 735     return result
 736
 737
 738 def convert_programlisting(ctx, xml):
 739     result = []
 740     if xml.attrib.get('role', '') == 'example':
 741         if xml.text:
 742             lang = xml.attrib.get('language', 'c').lower()
 743             if lang not in LEXERS:
 744                 LEXERS[lang] = get_lexer_by_name(lang)
 745             lexer = LEXERS.get(lang, None)
 746             if lexer:
 747                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 748
 749                 # we do own line-numbering
 750                 line_count = highlighted.count('\n')
 751                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 752                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 753   <tbody>
 754     <tr>
 755       <td class="listing_lines" align="right"><pre>%s</pre></td>
 756       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 757     </tr>
 758   </tbody>
 759 </table>
 760 """ % (source_lines, highlighted))
 761             else:
 762                 logging.warn('No pygments lexer for language="%s"', lang)
 763                 result.append('<pre class="programlisting">')
 764                 result.append(xml.text)
 765                 result.append('</pre>')
 766     else:
 767         result.append('<pre class="programlisting">')
 768         append_text(xml.text, result)
 769         convert_inner(ctx, xml, result)
 770         result.append('</pre>')
 771     append_text(xml.tail, result)
 772     return result
 773
 774
 775 def convert_quote(ctx, xml):
 776     result = ['<span class="quote">"<span class="quote">']
 777     append_text(xml.text, result)
 778     convert_inner(ctx, xml, result)
 779     result.append('</span>"</span>')
 780     append_text(xml.tail, result)
 781     return result
 782
 783
 784 def convert_refsect1(ctx, xml):
 785     # Add a divider between two consequitive refsect2
 786     def convert_inner(ctx, xml, result):
 787         prev = None
 788         for child in xml:
 789             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 790                 result.append('<hr>\n')
 791             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 792             prev = child
 793     return convert_sect(ctx, xml, 'h2', convert_inner)
 794
 795
 796 def convert_refsect2(ctx, xml):
 797     return convert_sect(ctx, xml, 'h3')
 798
 799
 800 def convert_refsect3(ctx, xml):
 801     return convert_sect(ctx, xml, 'h4')
 802
 803
 804 def convert_row(ctx, xml):
 805     result = ['<tr>\n']
 806     convert_inner(ctx, xml, result)
 807     result.append('</tr>\n')
 808     return result
 809
 810
 811 def convert_sect1_tag(ctx, xml):
 812     return convert_sect(ctx, xml, 'h2')
 813
 814
 815 def convert_sect2(ctx, xml):
 816     return convert_sect(ctx, xml, 'h3')
 817
 818
 819 def convert_sect3(ctx, xml):
 820     return convert_sect(ctx, xml, 'h4')
 821
 822
 823 def convert_simpara(ctx, xml):
 824     result = ['<p>']
 825     append_text(xml.text, result)
 826     result.append('</p>')
 827     append_text(xml.tail, result)
 828     return result
 829
 830
 831 def convert_span(ctx, xml):
 832     result = ['<span class="%s">' % xml.tag]
 833     append_text(xml.text, result)
 834     convert_inner(ctx, xml, result)
 835     result.append('</span>')
 836     append_text(xml.tail, result)
 837     return result
 838
 839
 840 def convert_table(ctx, xml):
 841     result = ['<div class="table">']
 842     if 'id' in xml.attrib:
 843         result.append('<a name="%s"></a>' % xml.attrib['id'])
 844     title_tag = xml.find('title')
 845     if title_tag is not None:
 846         result.append('<p class="title"><b>')
 847         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 848         result.extend(convert_title(ctx, title_tag))
 849         result.append('</b></p>')
 850         xml.remove(title_tag)
 851     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 852
 853     convert_inner(ctx, xml, result)
 854
 855     result.append('</table></div></div>')
 856     append_text(xml.tail, result)
 857     return result
 858
 859
 860 def convert_tbody(ctx, xml):
 861     result = ['<tbody>']
 862     ctx['table.entry'] = 'td'
 863     convert_inner(ctx, xml, result)
 864     result.append('</tbody>')
 865     # is in tgroup and there can be no 'text'
 866     return result
 867
 868
 869 def convert_tgroup(ctx, xml):
 870     # tgroup does not expand to anything, but the nested colspecs need to
 871     # be put into a colgroup
 872     cols = xml.findall('colspec')
 873     result = []
 874     if cols:
 875         result.append('<colgroup>\n')
 876         for col in cols:
 877             result.extend(convert_colspec(ctx, col))
 878             xml.remove(col)
 879         result.append('</colgroup>\n')
 880     convert_inner(ctx, xml, result)
 881     # is in informaltable and there can be no 'text'
 882     return result
 883
 884
 885 def convert_thead(ctx, xml):
 886     result = ['<thead>']
 887     ctx['table.entry'] = 'th'
 888     convert_inner(ctx, xml, result)
 889     result.append('</thead>')
 890     # is in tgroup and there can be no 'text'
 891     return result
 892
 893
 894 def convert_title(ctx, xml):
 895     # This is always called from some context
 896     result = []
 897     append_text(xml.text, result)
 898     convert_inner(ctx, xml, result)
 899     append_text(xml.tail, result)
 900     return result
 901
 902
 903 def convert_ulink(ctx, xml):
 904     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 905     if xml.tail:
 906         result.append(xml.tail)
 907     return result
 908
 909
 910 def convert_userinput(ctx, xml):
 911     result = ['<span class="command"><strong>']
 912     append_text(xml.text, result)
 913     convert_inner(ctx, xml, result)
 914     result.append('</strong></span>')
 915     append_text(xml.tail, result)
 916     return result
 917
 918
 919 def convert_variablelist(ctx, xml):
 920     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 921 <colgroup>
 922 <col align="left" valign="top">
 923 <col>
 924 </colgroup>
 925 <tbody>"""]
 926     convert_inner(ctx, xml, result)
 927     result.append("""</tbody>
 928 </table></div>""")
 929     return result
 930
 931
 932 def convert_varlistentry(ctx, xml):
 933     result = ['<tr>']
 934
 935     result.append('<td><p>')
 936     term = xml.find('term')
 937     result.extend(convert_span(ctx, term))
 938     result.append('</p></td>')
 939
 940     result.append('<td>')
 941     listitem = xml.find('listitem')
 942     convert_inner(ctx, listitem, result)
 943     result.append('</td>')
 944
 945     result.append('<tr>')
 946     return result
 947
 948
 949 # TODO(ensonic): turn into class with converters as functions and ctx as self
 950 convert_tags = {
 951     'abstract': convert_abstract,
 952     'acronym': convert_acronym,
 953     'anchor': convert_anchor,
 954     'application': convert_span,
 955     'bookinfo': convert_bookinfo,
 956     'blockquote': convert_blockquote,
 957     'caption': convert_div,
 958     'code': convert_code,
 959     'colspec': convert_colspec,
 960     'constant': convert_code,
 961     'command': convert_command,
 962     'corpauthor': convert_corpauthor,
 963     'emphasis': convert_emphasis,
 964     'entry': convert_entry,
 965     'envar': convert_code,
 966     'footnote': convert_footnote,
 967     'filename': convert_code,
 968     'formalpara': convert_formalpara,
 969     'function': convert_code,
 970     'glossdef': convert_glossdef,
 971     'glossdiv': convert_glossdiv,
 972     'glossentry': convert_glossentry,
 973     'glossterm': convert_glossterm,
 974     'imageobject': convert_imageobject,
 975     'indexdiv': convert_indexdiv,
 976     'indexentry': convert_ignore,
 977     'indexterm': convert_skip,
 978     'informalexample': convert_div,
 979     'informaltable': convert_informaltable,
 980     'inlinegraphic': convert_inlinegraphic,
 981     'inlinemediaobject': convert_span,
 982     'itemizedlist': convert_itemizedlist,
 983     'legalnotice': convert_div,
 984     'link': convert_link,
 985     'listitem': convert_listitem,
 986     'literal': convert_code,
 987     'literallayout': convert_literallayout,
 988     'mediaobject': convert_div,
 989     'note': convert_div,
 990     'option': convert_code,
 991     'orderedlist': convert_orderedlist,
 992     'para': convert_para,
 993     'partintro': convert_div,
 994     'parameter': convert_em_class,
 995     'phrase': convert_phrase,
 996     'primaryie': convert_primaryie,
 997     'programlisting': convert_programlisting,
 998     'quote': convert_quote,
 999     'releaseinfo': convert_para_like,
1000     'refsect1': convert_refsect1,
1001     'refsect2': convert_refsect2,
1002     'refsect3': convert_refsect3,
1003     'replaceable': convert_em_class,
1004     'returnvalue': convert_span,
1005     'row': convert_row,
1006     'screen': convert_pre,
1007     'sect1': convert_sect1_tag,
1008     'sect2': convert_sect2,
1009     'sect3': convert_sect3,
1010     'simpara': convert_simpara,
1011     'structfield': convert_em_class,
1012     'structname': convert_span,
1013     'synopsis': convert_pre,
1014     'symbol': convert_span,
1015     'table': convert_table,
1016     'tbody': convert_tbody,
1017     'term': convert_span,
1018     'tgroup': convert_tgroup,
1019     'thead': convert_thead,
1020     'title': convert_skip,
1021     'type': convert_span,
1022     'ulink': convert_ulink,
1023     'userinput': convert_userinput,
1024     'varname': convert_code,
1025     'variablelist': convert_variablelist,
1026     'varlistentry': convert_varlistentry,
1027     'warning': convert_div,
1028 }
1029
1030 # conversion helpers
1031
1032 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1033 <html>
1034 <head>
1035 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1036 <title>%s</title>
1037 %s<link rel="stylesheet" href="style.css" type="text/css">
1038 </head>
1039 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1040 """
1041
1042
1043 def generate_head_links(ctx):
1044     n = ctx['nav_home']
1045     result = [
1046         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1047     ]
1048     if 'nav_up' in ctx:
1049         n = ctx['nav_up']
1050         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1051     if 'nav_prev' in ctx:
1052         n = ctx['nav_prev']
1053         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1054     if 'nav_next' in ctx:
1055         n = ctx['nav_next']
1056         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1057     return ''.join(result)
1058
1059
1060 def generate_nav_links(ctx):
1061     n = ctx['nav_home']
1062     result = [
1063         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1064     ]
1065     if 'nav_up' in ctx:
1066         n = ctx['nav_up']
1067         result.append(
1068             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1069     else:
1070         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1071     if 'nav_prev' in ctx:
1072         n = ctx['nav_prev']
1073         result.append(
1074             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1075     else:
1076         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1077     if 'nav_next' in ctx:
1078         n = ctx['nav_next']
1079         result.append(
1080             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1081     else:
1082         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1083
1084     return ''.join(result)
1085
1086
1087 def generate_toc(ctx, node):
1088     result = []
1089     for c in node.children:
1090         # TODO: urlencode the filename: urllib.parse.quote_plus()
1091         link = c.filename
1092         if c.anchor:
1093             link += c.anchor
1094         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1095             c.title_tag, link, c.title))
1096         if c.subtitle:
1097             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1098         result.append('</dt>\n')
1099         if c.children:
1100             result.append('<dd><dl>')
1101             result.extend(generate_toc(ctx, c))
1102             result.append('</dl></dd>')
1103     return result
1104
1105
1106 def generate_basic_nav(ctx):
1107     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1108   <tr valign="middle">
1109     <td width="100%%" align="left" class="shortcuts"></td>
1110     %s
1111   </tr>
1112 </table>
1113     """ % generate_nav_links(ctx)
1114
1115
1116 def generate_alpha_nav(ctx, divs, prefix, span_id):
1117     ix_nav = []
1118     for s in divs:
1119         title = xml_get_title(ctx, s)
1120         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1121
1122     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1123   <tr valign="middle">
1124     <td width="100%%" align="left" class="shortcuts">
1125       <span id="nav_%s">
1126         %s
1127       </span>
1128     </td>
1129     %s
1130   </tr>
1131 </table>
1132     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1133
1134
1135 def generate_refentry_nav(ctx, refsect1s, result):
1136     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1137   <tr valign="middle">
1138     <td width="100%" align="left" class="shortcuts">
1139       <a href="#" class="shortcut">Top</a>""")
1140
1141     for s in refsect1s:
1142         # don't list TOC sections (role="xxx_proto")
1143         if s.attrib.get('role', '').endswith("_proto"):
1144             continue
1145         # skip section without 'id' attrs
1146         if 'id' not in s.attrib:
1147             continue
1148
1149         ref_id = s.attrib['id']
1150         # skip foreign sections
1151         if '.' not in ref_id:
1152             continue
1153
1154         title = xml_get_title(ctx, s)
1155         span_id = ref_id.split('.')[1].replace('-', '_')
1156
1157         result.append("""
1158           <span id="nav_%s">
1159             <span class="dim">|</span>
1160             <a href="#%s" class="shortcut">%s</a>
1161           </span>
1162           """ % (span_id, ref_id, title))
1163     result.append("""
1164     </td>
1165     %s
1166   </tr>
1167 </table>
1168 """ % generate_nav_links(ctx))
1169
1170
1171 def generate_footer(ctx):
1172     result = []
1173     if 'footnotes' in ctx:
1174         result.append("""<div class="footnotes">\n
1175 <br><hr style="width:100; text-align:left;margin-left: 0">
1176 """)
1177         for f in ctx['footnotes']:
1178             result.extend(f)
1179         result.append('</div>\n')
1180     return result
1181
1182
1183 def get_id_path(node):
1184     """ Generate the 'id'.
1185     We need to walk up the xml-tree and check the positions for each sibling.
1186     When reaching the top of the tree we collect remaining index entries from
1187     the chunked-tree.
1188     """
1189     ix = []
1190     xml = node.xml
1191     parent = xml.getparent()
1192     while parent is not None:
1193         children = parent.getchildren()
1194         ix.insert(0, str(children.index(xml) + 1))
1195         xml = parent
1196         parent = xml.getparent()
1197     while node is not None:
1198         ix.insert(0, str(node.idx + 1))
1199         node = node.parent
1200
1201     return ix
1202
1203
1204 def get_id(node):
1205     xml = node.xml
1206     node_id = xml.attrib.get('id', None)
1207     if node_id:
1208         return node_id
1209
1210     # TODO: this is moot if nothing links to it, we could also consider to omit
1211     # the <a name="$id"></a> tag.
1212     logging.info('%d: No "id" attribute on "%s", generating one',
1213                  xml.sourceline, xml.tag)
1214     ix = get_id_path(node)
1215     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1216     return 'id-' + '.'.join(ix)
1217
1218
1219 def convert_chunk_with_toc(ctx, div_class, title_tag):
1220     node = ctx['node']
1221     result = [
1222         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1223         generate_basic_nav(ctx),
1224         '<div class="%s">' % div_class,
1225     ]
1226     title = node.xml.find('title')
1227     if title is not None:
1228         result.append("""
1229 <div class="titlepage">
1230 <%s class="title"><a name="%s"></a>%s</%s>
1231 </div>""" % (
1232             title_tag, get_id(node), title.text, title_tag))
1233
1234     toc = generate_toc(ctx, node)
1235     if toc:
1236         # TODO: not all docbook page types use this extra heading
1237         result.append("""<p><b>Table of Contents</b></p>
1238     <div class="toc">
1239       <dl class="toc">
1240     """)
1241         result.extend(toc)
1242         result.append("""</dl>
1243     </div>
1244     """)
1245     convert_inner(ctx, node.xml, result)
1246     result.extend(generate_footer(ctx))
1247     result.append("""</div>
1248 </body>
1249 </html>""")
1250     return result
1251
1252
1253 # docbook chunks
1254
1255
1256 def convert_book(ctx):
1257     node = ctx['node']
1258     result = [
1259         HTML_HEADER % (node.title, generate_head_links(ctx)),
1260         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1261     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1262 </table>
1263 <div class="book">
1264 """ % node.title
1265     ]
1266     bookinfo = node.xml.findall('bookinfo')[0]
1267     result.extend(convert_bookinfo(ctx, bookinfo))
1268     result.append("""<div class="toc">
1269   <dl class="toc">
1270 """)
1271     result.extend(generate_toc(ctx, node.root))
1272     result.append("""</dl>
1273 </div>
1274 """)
1275     result.extend(generate_footer(ctx))
1276     result.append("""</div>
1277 </body>
1278 </html>""")
1279     return result
1280
1281
1282 def convert_chapter(ctx):
1283     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1284
1285
1286 def convert_glossary(ctx):
1287     node = ctx['node']
1288     glossdivs = node.xml.findall('glossdiv')
1289
1290     result = [
1291         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1292         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1293         """<div class="glossary">
1294 <div class="titlepage"><h%1d class="title">
1295 <a name="%s"></a>%s</h%1d>
1296 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1297     ]
1298     for i in glossdivs:
1299         result.extend(convert_glossdiv(ctx, i))
1300     result.extend(generate_footer(ctx))
1301     result.append("""</div>
1302 </body>
1303 </html>""")
1304     return result
1305
1306
1307 def convert_index(ctx):
1308     node = ctx['node']
1309     # Get all indexdivs under indexdiv
1310     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1311
1312     result = [
1313         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1314         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1315         """<div class="index">
1316 <div class="titlepage"><h%1d class="title">
1317 <a name="%s"></a>%s</h%1d>
1318 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1319     ]
1320     for i in indexdivs:
1321         result.extend(convert_indexdiv(ctx, i))
1322     result.extend(generate_footer(ctx))
1323     result.append("""</div>
1324 </body>
1325 </html>""")
1326     return result
1327
1328
1329 def convert_part(ctx):
1330     return convert_chunk_with_toc(ctx, 'part', 'h1')
1331
1332
1333 def convert_preface(ctx):
1334     node = ctx['node']
1335     result = [
1336         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1337         generate_basic_nav(ctx),
1338         '<div class="preface">'
1339     ]
1340     title = node.xml.find('title')
1341     if title is not None:
1342         result.append("""
1343 <div class="titlepage">
1344 <h2 class="title"><a name="%s"></a>%s</h2>
1345 </div>""" % (get_id(node), title.text))
1346     convert_inner(ctx, node.xml, result)
1347     result.extend(generate_footer(ctx))
1348     result.append("""</div>
1349 </body>
1350 </html>""")
1351     return result
1352
1353
1354 def convert_reference(ctx):
1355     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1356
1357
1358 def convert_refentry(ctx):
1359     node = ctx['node']
1360     node_id = get_id(node)
1361     refsect1s = node.xml.findall('refsect1')
1362
1363     gallery = ''
1364     refmeta = node.xml.find('refmeta')
1365     if refmeta is not None:
1366         refmiscinfo = refmeta.find('refmiscinfo')
1367         if refmiscinfo is not None:
1368             inlinegraphic = refmiscinfo.find('inlinegraphic')
1369             if inlinegraphic is not None:
1370                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1371
1372     result = [
1373         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1374     ]
1375     generate_refentry_nav(ctx, refsect1s, result)
1376     result.append("""
1377 <div class="refentry">
1378 <a name="%s"></a>
1379 <div class="refnamediv">
1380   <table width="100%%"><tr>
1381     <td valign="top">
1382       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1383       <p>%s — %s</p>
1384     </td>
1385     <td class="gallery_image" valign="top" align="right">%s</td>
1386   </tr></table>
1387 </div>
1388 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1389
1390     for s in refsect1s:
1391         result.extend(convert_refsect1(ctx, s))
1392     result.extend(generate_footer(ctx))
1393     result.append("""</div>
1394 </body>
1395 </html>""")
1396     return result
1397
1398
1399 def convert_sect1(ctx):
1400     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1401
1402
1403 # TODO(ensonic): turn into class with converters as functions and ctx as self
1404 convert_chunks = {
1405     'book': convert_book,
1406     'chapter': convert_chapter,
1407     'glossary': convert_glossary,
1408     'index': convert_index,
1409     'part': convert_part,
1410     'preface': convert_preface,
1411     'reference': convert_reference,
1412     'refentry': convert_refentry,
1413     'sect1': convert_sect1,
1414 }
1415
1416
1417 def generate_nav_nodes(files, node):
1418     nav = {
1419         'nav_home': node.root,
1420     }
1421     # nav params: up, prev, next
1422     if node.parent:
1423         nav['nav_up'] = node.parent
1424     ix = files.index(node)
1425     if ix > 0:
1426         nav['nav_prev'] = files[ix - 1]
1427     if ix < len(files) - 1:
1428         nav['nav_next'] = files[ix + 1]
1429     return nav
1430
1431
1432 def convert(out_dir, module, files, node):
1433     """Convert the docbook chunks to a html file.
1434
1435     Args:
1436       out_dir: already created output dir
1437       files: list of nodes in the tree in pre-order
1438       node: current tree node
1439     """
1440
1441     logging.info('Writing: %s', node.filename)
1442     with open(os.path.join(out_dir, node.filename), 'wt',
1443               newline='\n', encoding='utf-8') as html:
1444         ctx = {
1445             'module': module,
1446             'files': files,
1447             'node': node,
1448         }
1449         ctx.update(generate_nav_nodes(files, node))
1450
1451         if node.name in convert_chunks:
1452             for line in convert_chunks[node.name](ctx):
1453                 html.write(line)
1454         else:
1455             logging.warning('Add converter/template for "%s"', node.name)
1456
1457
1458 def create_devhelp2_toc(node):
1459     result = []
1460     for c in node.children:
1461         if c.children:
1462             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1463             result.extend(create_devhelp2_toc(c))
1464             result.append('</sub>\n')
1465         else:
1466             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1467     return result
1468
1469
1470 def create_devhelp2_condition_attribs(node):
1471     if 'condition' in node.attrib:
1472         # condition -> since, deprecated, ... (separated with '|')
1473         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1474         keywords = []
1475         for c in cond:
1476             if ':' in c:
1477                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1478             else:
1479                 # deprecated can have no description
1480                 keywords.append('{}="{}"'.format(c, ''))
1481         return ' ' + ' '.join(keywords)
1482     else:
1483         return ''
1484
1485
1486 def create_devhelp2_refsect2_keyword(node, base_link):
1487     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1488         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1489         create_devhelp2_condition_attribs(node))
1490
1491
1492 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1493     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1494         node.attrib['role'], title, base_link + name,
1495         create_devhelp2_condition_attribs(node))
1496
1497
1498 def create_devhelp2(out_dir, module, xml, files):
1499     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1500               newline='\n', encoding='utf-8') as idx:
1501         bookinfo_nodes = xml.xpath('/book/bookinfo')
1502         title = ''
1503         if bookinfo_nodes is not None:
1504             bookinfo = bookinfo_nodes[0]
1505             title = bookinfo.xpath('./title/text()')[0]
1506             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1507             # TODO: support author too (see devhelp2.xsl)
1508         # TODO: fixxref uses '--src-lang' to set the language
1509         result = [
1510             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1511 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1512   <chapters>
1513 """ % (title, module, online_url)
1514         ]
1515         # toc
1516         result.extend(create_devhelp2_toc(files[0].root))
1517         result.append("""  </chapters>
1518   <functions>
1519 """)
1520         # keywords from all refsect2 and refsect3
1521         refsect2 = etree.XPath('//refsect2[@role]')
1522         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1523         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1524         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1525         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1526         for node in files:
1527             base_link = node.filename + '#'
1528             refsect2_nodes = refsect2(node.xml)
1529             for refsect2_node in refsect2_nodes:
1530                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1531                 refsect3_nodes = refsect3_enum(refsect2_node)
1532                 for refsect3_node in refsect3_nodes:
1533                     details_node = refsect3_enum_details(refsect3_node)[0]
1534                     name = details_node.attrib['id']
1535                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1536                 refsect3_nodes = refsect3_struct(refsect2_node)
1537                 for refsect3_node in refsect3_nodes:
1538                     details_node = refsect3_struct_details(refsect3_node)[0]
1539                     name = details_node.attrib['id']
1540                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1541
1542         result.append("""  </functions>
1543 </book>
1544 """)
1545         for line in result:
1546             idx.write(line)
1547
1548
1549 def get_dirs(uninstalled):
1550     if uninstalled:
1551         # this does not work from buiddir!=srcdir
1552         gtkdocdir = os.path.split(sys.argv[0])[0]
1553         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1554             # try 'srcdir' (set from makefiles) too
1555             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1556                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1557         styledir = gtkdocdir + '/style'
1558     else:
1559         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1560         styledir = gtkdocdir
1561     return (gtkdocdir, styledir)
1562
1563
1564 def main(module, index_file, out_dir, uninstalled):
1565     tree = etree.parse(index_file)
1566     tree.xinclude()
1567
1568     (gtkdocdir, styledir) = get_dirs(uninstalled)
1569     # copy navigation images and stylesheets to html directory ...
1570     css_file = os.path.join(styledir, 'style.css')
1571     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1572         shutil.copy(f, out_dir)
1573     css_file = os.path.join(out_dir, 'style.css')
1574     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1575         css.write(HTML_FORMATTER.get_style_defs())
1576
1577     # TODO: migrate options from fixxref
1578     # TODO: do in parallel with loading the xml above.
1579     # TODO: ideally explicity specify the files we need, this will save us the
1580     # globbing and we'll load less files.
1581     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1582
1583     # We do multiple passes:
1584     # 1) recursively walk the tree and chunk it into a python tree so that we
1585     #   can generate navigation and link tags.
1586     files = chunk(tree.getroot(), module)
1587     files = [f for f in PreOrderIter(files) if f.anchor is None]
1588
1589     # 2) extract tables:
1590     # TODO: can be done in parallel
1591     # - find all 'id' attribs and add them to the link map
1592     add_id_links(files, fixxref.Links)
1593     # - build glossary dict
1594     build_glossary(files)
1595
1596     # 3) create a xxx.devhelp2 file (could be done in parallel with 4
1597     create_devhelp2(out_dir, module, tree.getroot(), files)
1598
1599     # 4) iterate the tree and output files
1600     # TODO: can be done in parallel, figure out why this is not faster
1601     # from multiprocessing.pool import Pool
1602     # with Pool(4) as p:
1603     #     p.apply_async(convert, args=(out_dir, module, files))
1604     # from multiprocessing.pool import ThreadPool
1605     # with ThreadPool(4) as p:
1606     #     p.apply_async(convert, args=(out_dir, module, files))
1607     for node in files:
1608         convert(out_dir, module, files, node)
1609
1610
1611 def run(options):
1612     logging.info('options: %s', str(options.__dict__))
1613     module = options.args[0]
1614     document = options.args[1]
1615
1616     # TODO: rename to 'html' later on
1617     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1618     #   outputs into the working directory
1619     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1620     try:
1621         os.mkdir(out_dir)
1622     except OSError as e:
1623         if e.errno != errno.EEXIST:
1624             raise
1625
1626     sys.exit(main(module, document, out_dir, options.uninstalled))