gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - handle the 'xref' tag
  43     - this needs the title + the type of the target
  44     - for the title, see add_id_links_and_titles(), we can also store the tag
  45       in another map
  46 - check each docbook tag if it can contain #PCDATA, if not don't check for
  47   xml.text
  48 - consider some perf-warnings flag
  49   - see 'No "id" attribute on'
  50 - find a better way to print context for warnings
  51   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  52
  53 DIFFERENCES:
  54 - titles
  55   - we add the chunk label to both title in toc and tile on the page
  56   - docbook xsl only sometimes adds the label to the titles and when it does it
  57     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  58 - navigation
  59   - we always add an up-link except on the first page
  60
  61 OPTIONAL:
  62 - minify html: https://pypi.python.org/pypi/htmlmin/
  63
  64 Requirements:
  65 sudo pip3 install anytree lxml pygments
  66
  67 Example invocation:
  68 cd tests/bugs/docs/
  69 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  70 xdg-open db2html/index.html
  71 meld html db2html
  72
  73 Benchmarking:
  74 cd tests/bugs/docs/;
  75 rm html-build.stamp; time make html-build.stamp
  76 """
  77
  78 import argparse
  79 import errno
  80 import logging
  81 import os
  82 import shutil
  83 import sys
  84
  85 from anytree import Node, PreOrderIter
  86 from copy import deepcopy
  87 from glob import glob
  88 from lxml import etree
  89 from pygments import highlight
  90 from pygments.lexers import CLexer
  91 from pygments.formatters import HtmlFormatter
  92 from timeit import default_timer as timer
  93
  94 from . import config, fixxref
  95
  96 # pygments setup
  97 # lazily constructed lexer cache
  98 LEXERS = {
  99     'c': CLexer()
 100 }
 101 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 102
 103
 104 class ChunkParams(object):
 105     def __init__(self, prefix, parent=None, min_idx=0):
 106         self.prefix = prefix
 107         self.parent = parent
 108         self.min_idx = min_idx
 109         self.idx = 1
 110
 111
 112 DONT_CHUNK = float('inf')
 113 # docbook-xsl defines the chunk tags here.
 114 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 115 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 116 # If not defined, we can just create an example without an 'id' attr and see
 117 # docbook xsl does.
 118 #
 119 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 120 # TODO: this list has also a flag that controls wheter we add the
 121 # 'Table of Contents' heading in convert_chunk_with_toc()
 122 CHUNK_PARAMS = {
 123     'appendix': ChunkParams('app', 'book'),
 124     'book': ChunkParams('bk'),
 125     'chapter': ChunkParams('ch', 'book'),
 126     'glossary': ChunkParams('go', 'book'),
 127     'index': ChunkParams('ix', 'book'),
 128     'part': ChunkParams('pt', 'book'),
 129     'preface': ChunkParams('pr', 'book'),
 130     'refentry': ChunkParams('re', 'book'),
 131     'reference': ChunkParams('rn', 'book'),
 132     'sect1': ChunkParams('s', 'chapter', 1),
 133     'section': ChunkParams('s', 'chapter', 1),
 134     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 135     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 136     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 137     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 138 }
 139 # TAGS we don't support:
 140 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 141
 142 TITLE_XPATHS = {
 143     '_': (etree.XPath('./title'), None),
 144     'book': (etree.XPath('./bookinfo/title'), None),
 145     'refentry': (
 146         etree.XPath('./refmeta/refentrytitle'),
 147         etree.XPath('./refnamediv/refpurpose')
 148     ),
 149 }
 150
 151 ID_XPATH = etree.XPath('//*[@id]')
 152
 153 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 154 glossary = {}
 155
 156 footnote_idx = 1
 157
 158 titles = {}
 159
 160
 161 def gen_chunk_name(node, chunk_params):
 162     """Generate a chunk file name
 163
 164     This is either based on the id or on the position in the doc. In the latter
 165     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 166     type.
 167     """
 168     if 'id' in node.attrib:
 169         return node.attrib['id']
 170
 171     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 172     chunk_params.idx += 1
 173
 174     # handle parents to make names of nested tags like in docbook
 175     # - we only need to prepend the parent if there are > 1 of them in the
 176     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 177     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 178     #   when we chunk explicitly and on each level maintain the 'idx'
 179     # while chunk_params.parent:
 180     #     parent = chunk_params.parent
 181     #     if parent not in CHUNK_PARAMS:
 182     #         break;
 183     #     chunk_params = CHUNK_PARAMS[parent]
 184     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 185
 186     logging.info('Gen chunk name: "%s"', name)
 187     return name
 188
 189
 190 def get_chunk_titles(module, node):
 191     tag = node.tag
 192     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 193
 194     ctx = {
 195         'module': module,
 196         'files': [],
 197     }
 198     result = {
 199         'title': None,
 200         'title_tag': None,
 201         'subtitle': None,
 202         'subtitle_tag': None
 203     }
 204     res = title(node)
 205     if res:
 206         # handle chunk label for tocs
 207         label = node.attrib.get('label')
 208         if label:
 209             label += '. '
 210         else:
 211             label = ''
 212
 213         xml = res[0]
 214         result['title'] = label + ''.join(convert_title(ctx, xml))
 215         if xml.tag != 'title':
 216             result['title_tag'] = xml.tag
 217         else:
 218             result['title_tag'] = tag
 219
 220     if subtitle:
 221         res = subtitle(node)
 222         if res:
 223             xml = res[0]
 224             result['subtitle'] = ''.join(convert_title(ctx, xml))
 225             result['subtitle_tag'] = xml.tag
 226     return result
 227
 228
 229 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 230     """Chunk the tree.
 231
 232     The first time, we're called with parent=None and in that case we return
 233     the new_node as the root of the tree. For each tree-node we generate a
 234     filename and process the children.
 235     """
 236     tag = xml_node.tag
 237     chunk_params = CHUNK_PARAMS.get(tag)
 238     if chunk_params:
 239         title_args = get_chunk_titles(module, xml_node)
 240         chunk_name = gen_chunk_name(xml_node, chunk_params)
 241
 242         # check idx to handle 'sect1'/'section' special casing and title-only
 243         # segments
 244         if idx >= chunk_params.min_idx:
 245             logging.info('chunk tag: "%s"[%d]', tag, idx)
 246             if parent:
 247                 # remove the xml-node from the parent
 248                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 249                 xml_node.getparent().remove(xml_node)
 250                 xml_node = sub_tree
 251
 252             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 253                           idx=idx,
 254                           filename=chunk_name + '.html', anchor=None,
 255                           **title_args)
 256         else:
 257             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 258                           idx=idx,
 259                           filename=parent.filename, anchor='#' + chunk_name,
 260                           **title_args)
 261
 262         depth += 1
 263         idx = 0
 264         for child in xml_node:
 265             chunk(child, module, depth, idx, parent)
 266             if child.tag in CHUNK_PARAMS:
 267                 idx += 1
 268
 269     return parent
 270
 271
 272 def add_id_links_and_titles(files, links):
 273     for node in files:
 274         chunk_name = node.filename[:-5]
 275         chunk_base = node.filename + '#'
 276         for elem in ID_XPATH(node.xml):
 277             attr = elem.attrib['id']
 278             if attr == chunk_name:
 279                 links[attr] = node.filename
 280             else:
 281                 links[attr] = chunk_base + attr
 282
 283             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 284             res = title(elem)
 285             if res:
 286                 # we need the plain text content
 287                 titles[attr] = etree.tostring(res[0], method="text",
 288                                               encoding=str).strip()
 289
 290
 291 def build_glossary(files):
 292     for node in files:
 293         if node.xml.tag != 'glossary':
 294             continue
 295         for term in GLOSSENTRY_XPATH(node.xml):
 296             # TODO: there can be all kind of things in a glossary. This only supports
 297             # what we commonly use
 298             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 299             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 300             glossary[key] = value
 301             # logging.debug('glosentry: %s:%s', key, value)
 302
 303
 304 # conversion helpers
 305
 306
 307 def convert_inner(ctx, xml, result):
 308     for child in xml:
 309         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 310
 311
 312 def convert_ignore(ctx, xml):
 313     result = []
 314     convert_inner(ctx, xml, result)
 315     return result
 316
 317
 318 def convert_skip(ctx, xml):
 319     return []
 320
 321
 322 def append_text(text, result):
 323     if text and text.strip():
 324         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 325
 326
 327 missing_tags = {}
 328
 329
 330 def convert__unknown(ctx, xml):
 331     # don't recurse on subchunks
 332     if xml.tag in CHUNK_PARAMS:
 333         return []
 334     if isinstance(xml, etree._Comment):
 335         return ['<!-- ' + xml.text + '-->\n']
 336     else:
 337         # warn only once
 338         if xml.tag not in missing_tags:
 339             logging.warning('Add tag converter for "%s"', xml.tag)
 340             missing_tags[xml.tag] = True
 341         result = ['<!-- ' + xml.tag + '-->\n']
 342         convert_inner(ctx, xml, result)
 343         result.append('<!-- /' + xml.tag + '-->\n')
 344         return result
 345
 346
 347 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 348     result = ['<div class="%s">\n' % xml.tag]
 349     title = xml.find('title')
 350     if title is not None:
 351         if 'id' in xml.attrib:
 352             result.append('<a name="%s"></a>' % xml.attrib['id'])
 353         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 354     append_text(xml.text, result)
 355     inner_func(ctx, xml, result)
 356     result.append('</div>')
 357     append_text(xml.tail, result)
 358     return result
 359
 360
 361 def xml_get_title(ctx, xml):
 362     title_tag = xml.find('title')
 363     if title_tag is not None:
 364         return ''.join(convert_title(ctx, title_tag))
 365     else:
 366         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 367         return ''
 368
 369
 370 # docbook tags
 371
 372
 373 def convert_abstract(ctx, xml):
 374     result = ["""<div class="abstract">
 375     <p class="title"><b>Abstract</b></p>"""]
 376     append_text(xml.text, result)
 377     convert_inner(ctx, xml, result)
 378     result.append('</div>')
 379     append_text(xml.tail, result)
 380     return result
 381
 382
 383 def convert_acronym(ctx, xml):
 384     key = xml.text
 385     title = glossary.get(key, '')
 386     # TODO: print a sensible warning if missing
 387     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 388     if xml.tail:
 389         result.append(xml.tail)
 390     return result
 391
 392
 393 def convert_anchor(ctx, xml):
 394     return ['<a name="%s"></a>' % xml.attrib['id']]
 395
 396
 397 def convert_bookinfo(ctx, xml):
 398     result = ['<div class="titlepage">']
 399     convert_inner(ctx, xml, result)
 400     result.append("""<hr>
 401 </div>""")
 402     if xml.tail:
 403         result.append(xml.tail)
 404     return result
 405
 406
 407 def convert_blockquote(ctx, xml):
 408     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 409     append_text(xml.text, result)
 410     convert_inner(ctx, xml, result)
 411     result.append('</blockquote>\n</div>')
 412     append_text(xml.tail, result)
 413     return result
 414
 415
 416 def convert_code(ctx, xml):
 417     result = ['<code class="%s">' % xml.tag]
 418     append_text(xml.text, result)
 419     convert_inner(ctx, xml, result)
 420     result.append('</code>')
 421     append_text(xml.tail, result)
 422     return result
 423
 424
 425 def convert_colspec(ctx, xml):
 426     result = ['<col']
 427     a = xml.attrib
 428     if 'colname' in a:
 429         result.append(' class="%s"' % a['colname'])
 430     if 'colwidth' in a:
 431         result.append(' width="%s"' % a['colwidth'])
 432     result.append('>\n')
 433     # is in tgroup and there can be no 'text'
 434     return result
 435
 436
 437 def convert_command(ctx, xml):
 438     result = ['<strong class="userinput"><code>']
 439     append_text(xml.text, result)
 440     convert_inner(ctx, xml, result)
 441     result.append('</code></strong>')
 442     append_text(xml.tail, result)
 443     return result
 444
 445
 446 def convert_corpauthor(ctx, xml):
 447     result = ['<div><h3 class="corpauthor">\n']
 448     append_text(xml.text, result)
 449     convert_inner(ctx, xml, result)
 450     result.append('</h3></div>\n')
 451     append_text(xml.tail, result)
 452     return result
 453
 454
 455 def convert_div(ctx, xml):
 456     result = ['<div class="%s">\n' % xml.tag]
 457     append_text(xml.text, result)
 458     convert_inner(ctx, xml, result)
 459     result.append('</div>')
 460     append_text(xml.tail, result)
 461     return result
 462
 463
 464 def convert_emphasis(ctx, xml):
 465     result = ['<span class="emphasis"><em>']
 466     append_text(xml.text, result)
 467     convert_inner(ctx, xml, result)
 468     result.append('</em></span>')
 469     append_text(xml.tail, result)
 470     return result
 471
 472
 473 def convert_em_class(ctx, xml):
 474     result = ['<em class="%s"><code>' % xml.tag]
 475     append_text(xml.text, result)
 476     convert_inner(ctx, xml, result)
 477     result.append('</code></em>')
 478     append_text(xml.tail, result)
 479     return result
 480
 481
 482 def convert_entry(ctx, xml):
 483     entry_type = ctx['table.entry']
 484     result = ['<' + entry_type]
 485     if 'role' in xml.attrib:
 486         result.append(' class="%s"' % xml.attrib['role'])
 487     if 'morerows' in xml.attrib:
 488         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 489     result.append('>')
 490     append_text(xml.text, result)
 491     convert_inner(ctx, xml, result)
 492     result.append('</' + entry_type + '>')
 493     append_text(xml.tail, result)
 494     return result
 495
 496
 497 def convert_footnote(ctx, xml):
 498     footnotes = ctx.get('footnotes', [])
 499     # footnotes idx is not per page, but per doc
 500     global footnote_idx
 501     idx = footnote_idx
 502     footnote_idx += 1
 503
 504     # need a pair of ids for each footnote (docbook generates different ids)
 505     this_id = 'footnote-%d' % idx
 506     that_id = 'ftn.' + this_id
 507
 508     inner = ['<div id="%s" class="footnote">' % that_id]
 509     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 510         this_id, idx))
 511     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 512     # get double nested paras :/.
 513     # convert_inner(ctx, xml, inner)
 514     para = xml.find('para')
 515     if para is None:
 516         para = xml.find('simpara')
 517     if para is not None:
 518         inner.append(para.text)
 519     else:
 520         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 521                         etree.tostring(xml, method="text", encoding=str).strip())
 522     inner.append('</p></div>')
 523     footnotes.append(inner)
 524     ctx['footnotes'] = footnotes
 525     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 526         that_id, this_id, idx)]
 527
 528
 529 def convert_formalpara(ctx, xml):
 530     result = None
 531     title_tag = xml.find('title')
 532     result = ['<p><b>%s</b>' % title_tag.text]
 533     para_tag = xml.find('para')
 534     append_text(para_tag.text, result)
 535     convert_inner(ctx, para_tag, result)
 536     append_text(para_tag.tail, result)
 537     result.append('</p>')
 538     append_text(xml.tail, result)
 539     return result
 540
 541
 542 def convert_glossdef(ctx, xml):
 543     result = ['<dd class="glossdef">']
 544     convert_inner(ctx, xml, result)
 545     result.append('</dd>\n')
 546     return result
 547
 548
 549 def convert_glossdiv(ctx, xml):
 550     title_tag = xml.find('title')
 551     title = title_tag.text
 552     xml.remove(title_tag)
 553     result = [
 554         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 555     ]
 556     convert_inner(ctx, xml, result)
 557     return result
 558
 559
 560 def convert_glossentry(ctx, xml):
 561     result = []
 562     convert_inner(ctx, xml, result)
 563     return result
 564
 565
 566 def convert_glossterm(ctx, xml):
 567     glossid = ''
 568     text = ''
 569     anchor = xml.find('anchor')
 570     if anchor is not None:
 571         glossid = anchor.attrib.get('id', '')
 572         text += anchor.tail or ''
 573     text += xml.text or ''
 574     if glossid == '':
 575         glossid = 'glossterm-' + text
 576     return [
 577         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 578             glossid, text)
 579     ]
 580
 581
 582 def convert_imageobject(ctx, xml):
 583     imagedata = xml.find('imagedata')
 584     if imagedata is not None:
 585         # TODO(ensonic): warn on missing fileref attr?
 586         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 587     else:
 588         return []
 589
 590
 591 def convert_indexdiv(ctx, xml):
 592     title_tag = xml.find('title')
 593     title = title_tag.text
 594     xml.remove(title_tag)
 595     result = [
 596         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 597     ]
 598     convert_inner(ctx, xml, result)
 599     return result
 600
 601
 602 def convert_informaltable(ctx, xml):
 603     result = ['<div class="informaltable"><table class="informaltable"']
 604     a = xml.attrib
 605     if 'pgwide' in a and a['pgwide'] == '1':
 606         result.append(' width="100%"')
 607     if 'frame' in a and a['frame'] == 'none':
 608         result.append(' border="0"')
 609     result.append('>\n')
 610     convert_inner(ctx, xml, result)
 611     result.append('</table></div>')
 612     if xml.tail:
 613         result.append(xml.tail)
 614     return result
 615
 616
 617 def convert_inlinegraphic(ctx, xml):
 618     # TODO(ensonic): warn on missing fileref attr?
 619     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 620
 621
 622 def convert_itemizedlist(ctx, xml):
 623     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 624     convert_inner(ctx, xml, result)
 625     result.append('</ul></div>')
 626     if xml.tail:
 627         result.append(xml.tail)
 628     return result
 629
 630
 631 def convert_link(ctx, xml):
 632     linkend = xml.attrib['linkend']
 633     if linkend in fixxref.NoLinks:
 634         linkend = None
 635     result = []
 636     if linkend:
 637         link_text = []
 638         append_text(xml.text, link_text)
 639         convert_inner(ctx, xml, link_text)
 640         text = ''.join(link_text)
 641
 642         (tid, href) = fixxref.GetXRef(linkend)
 643         if href:
 644             title_attr = ''
 645             title = titles.get(tid)
 646             if title:
 647                 title_attr = ' title="%s"' % title
 648
 649             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 650             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 651         else:
 652             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 653             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 654             result = [text]
 655     else:
 656         append_text(xml.text, result)
 657         convert_inner(ctx, xml, result)
 658     append_text(xml.tail, result)
 659     return result
 660
 661
 662 def convert_listitem(ctx, xml):
 663     result = ['<li class="listitem">']
 664     convert_inner(ctx, xml, result)
 665     result.append('</li>')
 666     # is in itemizedlist and there can be no 'text'
 667     return result
 668
 669
 670 def convert_literallayout(ctx, xml):
 671     result = ['<div class="literallayout"><p><br>\n']
 672     append_text(xml.text, result)
 673     convert_inner(ctx, xml, result)
 674     result.append('</p></div>')
 675     append_text(xml.tail, result)
 676     return result
 677
 678
 679 def convert_orderedlist(ctx, xml):
 680     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 681     convert_inner(ctx, xml, result)
 682     result.append('</ol></div>')
 683     append_text(xml.tail, result)
 684     return result
 685
 686
 687 def convert_para(ctx, xml):
 688     result = []
 689     if 'id' in xml.attrib:
 690         result.append('<a name="%s"></a>' % xml.attrib['id'])
 691     if 'role' in xml.attrib:
 692         result.append('<p class="%s">' % xml.attrib['role'])
 693     else:
 694         result.append('<p>')
 695     append_text(xml.text, result)
 696     convert_inner(ctx, xml, result)
 697     result.append('</p>')
 698     append_text(xml.tail, result)
 699     return result
 700
 701
 702 def convert_para_like(ctx, xml):
 703     result = []
 704     if 'id' in xml.attrib:
 705         result.append('<a name="%s"></a>' % xml.attrib['id'])
 706     result.append('<p class="%s">' % xml.tag)
 707     append_text(xml.text, result)
 708     convert_inner(ctx, xml, result)
 709     result.append('</p>')
 710     append_text(xml.tail, result)
 711     return result
 712
 713
 714 def convert_phrase(ctx, xml):
 715     result = ['<span']
 716     if 'role' in xml.attrib:
 717         result.append(' class="%s">' % xml.attrib['role'])
 718     else:
 719         result.append('>')
 720     append_text(xml.text, result)
 721     convert_inner(ctx, xml, result)
 722     result.append('</span>')
 723     append_text(xml.tail, result)
 724     return result
 725
 726
 727 def convert_primaryie(ctx, xml):
 728     result = ['<dt>\n']
 729     convert_inner(ctx, xml, result)
 730     result.append('\n</dt>\n<dd></dd>\n')
 731     return result
 732
 733
 734 def convert_pre(ctx, xml):
 735     result = ['<pre class="%s">\n' % xml.tag]
 736     append_text(xml.text, result)
 737     convert_inner(ctx, xml, result)
 738     result.append('</pre>')
 739     append_text(xml.tail, result)
 740     return result
 741
 742
 743 def convert_programlisting(ctx, xml):
 744     result = []
 745     if xml.attrib.get('role', '') == 'example':
 746         if xml.text:
 747             lang = xml.attrib.get('language', 'c').lower()
 748             if lang not in LEXERS:
 749                 LEXERS[lang] = get_lexer_by_name(lang)
 750             lexer = LEXERS.get(lang, None)
 751             if lexer:
 752                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 753
 754                 # we do own line-numbering
 755                 line_count = highlighted.count('\n')
 756                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 757                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 758   <tbody>
 759     <tr>
 760       <td class="listing_lines" align="right"><pre>%s</pre></td>
 761       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 762     </tr>
 763   </tbody>
 764 </table>
 765 """ % (source_lines, highlighted))
 766             else:
 767                 logging.warn('No pygments lexer for language="%s"', lang)
 768                 result.append('<pre class="programlisting">')
 769                 result.append(xml.text)
 770                 result.append('</pre>')
 771     else:
 772         result.append('<pre class="programlisting">')
 773         append_text(xml.text, result)
 774         convert_inner(ctx, xml, result)
 775         result.append('</pre>')
 776     append_text(xml.tail, result)
 777     return result
 778
 779
 780 def convert_quote(ctx, xml):
 781     result = ['<span class="quote">"<span class="quote">']
 782     append_text(xml.text, result)
 783     convert_inner(ctx, xml, result)
 784     result.append('</span>"</span>')
 785     append_text(xml.tail, result)
 786     return result
 787
 788
 789 def convert_refsect1(ctx, xml):
 790     # Add a divider between two consequitive refsect2
 791     def convert_inner(ctx, xml, result):
 792         prev = None
 793         for child in xml:
 794             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 795                 result.append('<hr>\n')
 796             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 797             prev = child
 798     return convert_sect(ctx, xml, 'h2', convert_inner)
 799
 800
 801 def convert_refsect2(ctx, xml):
 802     return convert_sect(ctx, xml, 'h3')
 803
 804
 805 def convert_refsect3(ctx, xml):
 806     return convert_sect(ctx, xml, 'h4')
 807
 808
 809 def convert_row(ctx, xml):
 810     result = ['<tr>\n']
 811     convert_inner(ctx, xml, result)
 812     result.append('</tr>\n')
 813     return result
 814
 815
 816 def convert_sect1_tag(ctx, xml):
 817     return convert_sect(ctx, xml, 'h2')
 818
 819
 820 def convert_sect2(ctx, xml):
 821     return convert_sect(ctx, xml, 'h3')
 822
 823
 824 def convert_sect3(ctx, xml):
 825     return convert_sect(ctx, xml, 'h4')
 826
 827
 828 def convert_simpara(ctx, xml):
 829     result = ['<p>']
 830     append_text(xml.text, result)
 831     result.append('</p>')
 832     append_text(xml.tail, result)
 833     return result
 834
 835
 836 def convert_span(ctx, xml):
 837     result = ['<span class="%s">' % xml.tag]
 838     append_text(xml.text, result)
 839     convert_inner(ctx, xml, result)
 840     result.append('</span>')
 841     append_text(xml.tail, result)
 842     return result
 843
 844
 845 def convert_table(ctx, xml):
 846     result = ['<div class="table">']
 847     if 'id' in xml.attrib:
 848         result.append('<a name="%s"></a>' % xml.attrib['id'])
 849     title_tag = xml.find('title')
 850     if title_tag is not None:
 851         result.append('<p class="title"><b>')
 852         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 853         result.extend(convert_title(ctx, title_tag))
 854         result.append('</b></p>')
 855     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 856
 857     convert_inner(ctx, xml, result)
 858
 859     result.append('</table></div></div>')
 860     append_text(xml.tail, result)
 861     return result
 862
 863
 864 def convert_tbody(ctx, xml):
 865     result = ['<tbody>']
 866     ctx['table.entry'] = 'td'
 867     convert_inner(ctx, xml, result)
 868     result.append('</tbody>')
 869     # is in tgroup and there can be no 'text'
 870     return result
 871
 872
 873 def convert_tgroup(ctx, xml):
 874     # tgroup does not expand to anything, but the nested colspecs need to
 875     # be put into a colgroup
 876     cols = xml.findall('colspec')
 877     result = []
 878     if cols:
 879         result.append('<colgroup>\n')
 880         for col in cols:
 881             result.extend(convert_colspec(ctx, col))
 882             xml.remove(col)
 883         result.append('</colgroup>\n')
 884     convert_inner(ctx, xml, result)
 885     # is in informaltable and there can be no 'text'
 886     return result
 887
 888
 889 def convert_thead(ctx, xml):
 890     result = ['<thead>']
 891     ctx['table.entry'] = 'th'
 892     convert_inner(ctx, xml, result)
 893     result.append('</thead>')
 894     # is in tgroup and there can be no 'text'
 895     return result
 896
 897
 898 def convert_title(ctx, xml):
 899     # This is always explicitly called from some context
 900     result = []
 901     append_text(xml.text, result)
 902     convert_inner(ctx, xml, result)
 903     append_text(xml.tail, result)
 904     return result
 905
 906
 907 def convert_ulink(ctx, xml):
 908     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 909     append_text(xml.tail, result)
 910     return result
 911
 912
 913 def convert_userinput(ctx, xml):
 914     result = ['<span class="command"><strong>']
 915     append_text(xml.text, result)
 916     convert_inner(ctx, xml, result)
 917     result.append('</strong></span>')
 918     append_text(xml.tail, result)
 919     return result
 920
 921
 922 def convert_variablelist(ctx, xml):
 923     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 924 <colgroup>
 925 <col align="left" valign="top">
 926 <col>
 927 </colgroup>
 928 <tbody>"""]
 929     convert_inner(ctx, xml, result)
 930     result.append("""</tbody>
 931 </table></div>""")
 932     return result
 933
 934
 935 def convert_varlistentry(ctx, xml):
 936     result = ['<tr>']
 937
 938     result.append('<td><p>')
 939     term = xml.find('term')
 940     result.extend(convert_span(ctx, term))
 941     result.append('</p></td>')
 942
 943     result.append('<td>')
 944     listitem = xml.find('listitem')
 945     convert_inner(ctx, listitem, result)
 946     result.append('</td>')
 947
 948     result.append('<tr>')
 949     return result
 950
 951
 952 # TODO(ensonic): turn into class with converters as functions and ctx as self
 953 convert_tags = {
 954     'abstract': convert_abstract,
 955     'acronym': convert_acronym,
 956     'anchor': convert_anchor,
 957     'application': convert_span,
 958     'bookinfo': convert_bookinfo,
 959     'blockquote': convert_blockquote,
 960     'caption': convert_div,
 961     'code': convert_code,
 962     'colspec': convert_colspec,
 963     'constant': convert_code,
 964     'command': convert_command,
 965     'corpauthor': convert_corpauthor,
 966     'emphasis': convert_emphasis,
 967     'entry': convert_entry,
 968     'envar': convert_code,
 969     'footnote': convert_footnote,
 970     'filename': convert_code,
 971     'formalpara': convert_formalpara,
 972     'function': convert_code,
 973     'glossdef': convert_glossdef,
 974     'glossdiv': convert_glossdiv,
 975     'glossentry': convert_glossentry,
 976     'glossterm': convert_glossterm,
 977     'imageobject': convert_imageobject,
 978     'indexdiv': convert_indexdiv,
 979     'indexentry': convert_ignore,
 980     'indexterm': convert_skip,
 981     'informalexample': convert_div,
 982     'informaltable': convert_informaltable,
 983     'inlinegraphic': convert_inlinegraphic,
 984     'inlinemediaobject': convert_span,
 985     'itemizedlist': convert_itemizedlist,
 986     'legalnotice': convert_div,
 987     'link': convert_link,
 988     'listitem': convert_listitem,
 989     'literal': convert_code,
 990     'literallayout': convert_literallayout,
 991     'mediaobject': convert_div,
 992     'note': convert_div,
 993     'option': convert_code,
 994     'orderedlist': convert_orderedlist,
 995     'para': convert_para,
 996     'partintro': convert_div,
 997     'parameter': convert_em_class,
 998     'phrase': convert_phrase,
 999     'primaryie': convert_primaryie,
1000     'programlisting': convert_programlisting,
1001     'quote': convert_quote,
1002     'releaseinfo': convert_para_like,
1003     'refsect1': convert_refsect1,
1004     'refsect2': convert_refsect2,
1005     'refsect3': convert_refsect3,
1006     'replaceable': convert_em_class,
1007     'returnvalue': convert_span,
1008     'row': convert_row,
1009     'screen': convert_pre,
1010     'sect1': convert_sect1_tag,
1011     'sect2': convert_sect2,
1012     'sect3': convert_sect3,
1013     'simpara': convert_simpara,
1014     'structfield': convert_em_class,
1015     'structname': convert_span,
1016     'synopsis': convert_pre,
1017     'symbol': convert_span,
1018     'table': convert_table,
1019     'tbody': convert_tbody,
1020     'term': convert_span,
1021     'tgroup': convert_tgroup,
1022     'thead': convert_thead,
1023     'title': convert_skip,
1024     'type': convert_span,
1025     'ulink': convert_ulink,
1026     'userinput': convert_userinput,
1027     'varname': convert_code,
1028     'variablelist': convert_variablelist,
1029     'varlistentry': convert_varlistentry,
1030     'warning': convert_div,
1031 }
1032
1033 # conversion helpers
1034
1035 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1036 <html>
1037 <head>
1038 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1039 <title>%s</title>
1040 %s<link rel="stylesheet" href="style.css" type="text/css">
1041 </head>
1042 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1043 """
1044
1045
1046 def generate_head_links(ctx):
1047     n = ctx['nav_home']
1048     result = [
1049         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1050     ]
1051     if 'nav_up' in ctx:
1052         n = ctx['nav_up']
1053         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1054     if 'nav_prev' in ctx:
1055         n = ctx['nav_prev']
1056         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1057     if 'nav_next' in ctx:
1058         n = ctx['nav_next']
1059         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1060     return ''.join(result)
1061
1062
1063 def generate_nav_links(ctx):
1064     n = ctx['nav_home']
1065     result = [
1066         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1067     ]
1068     if 'nav_up' in ctx:
1069         n = ctx['nav_up']
1070         result.append(
1071             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1072     else:
1073         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1074     if 'nav_prev' in ctx:
1075         n = ctx['nav_prev']
1076         result.append(
1077             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1078     else:
1079         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1080     if 'nav_next' in ctx:
1081         n = ctx['nav_next']
1082         result.append(
1083             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1084     else:
1085         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1086
1087     return ''.join(result)
1088
1089
1090 def generate_toc(ctx, node):
1091     result = []
1092     for c in node.children:
1093         # TODO: urlencode the filename: urllib.parse.quote_plus()
1094         link = c.filename
1095         if c.anchor:
1096             link += c.anchor
1097         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1098             c.title_tag, link, c.title))
1099         if c.subtitle:
1100             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1101         result.append('</dt>\n')
1102         if c.children:
1103             result.append('<dd><dl>')
1104             result.extend(generate_toc(ctx, c))
1105             result.append('</dl></dd>')
1106     return result
1107
1108
1109 def generate_basic_nav(ctx):
1110     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1111   <tr valign="middle">
1112     <td width="100%%" align="left" class="shortcuts"></td>
1113     %s
1114   </tr>
1115 </table>
1116     """ % generate_nav_links(ctx)
1117
1118
1119 def generate_alpha_nav(ctx, divs, prefix, span_id):
1120     ix_nav = []
1121     for s in divs:
1122         title = xml_get_title(ctx, s)
1123         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1124
1125     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1126   <tr valign="middle">
1127     <td width="100%%" align="left" class="shortcuts">
1128       <span id="nav_%s">
1129         %s
1130       </span>
1131     </td>
1132     %s
1133   </tr>
1134 </table>
1135     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1136
1137
1138 def generate_refentry_nav(ctx, refsect1s, result):
1139     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1140   <tr valign="middle">
1141     <td width="100%" align="left" class="shortcuts">
1142       <a href="#" class="shortcut">Top</a>""")
1143
1144     for s in refsect1s:
1145         # don't list TOC sections (role="xxx_proto")
1146         if s.attrib.get('role', '').endswith("_proto"):
1147             continue
1148         # skip section without 'id' attrs
1149         if 'id' not in s.attrib:
1150             continue
1151
1152         ref_id = s.attrib['id']
1153         # skip foreign sections
1154         if '.' not in ref_id:
1155             continue
1156
1157         title = xml_get_title(ctx, s)
1158         span_id = ref_id.split('.')[1].replace('-', '_')
1159
1160         result.append("""
1161           <span id="nav_%s">
1162             <span class="dim">|</span>
1163             <a href="#%s" class="shortcut">%s</a>
1164           </span>
1165           """ % (span_id, ref_id, title))
1166     result.append("""
1167     </td>
1168     %s
1169   </tr>
1170 </table>
1171 """ % generate_nav_links(ctx))
1172
1173
1174 def generate_footer(ctx):
1175     result = []
1176     if 'footnotes' in ctx:
1177         result.append("""<div class="footnotes">\n
1178 <br><hr style="width:100; text-align:left;margin-left: 0">
1179 """)
1180         for f in ctx['footnotes']:
1181             result.extend(f)
1182         result.append('</div>\n')
1183     return result
1184
1185
1186 def get_id_path(node):
1187     """ Generate the 'id'.
1188     We need to walk up the xml-tree and check the positions for each sibling.
1189     When reaching the top of the tree we collect remaining index entries from
1190     the chunked-tree.
1191     """
1192     ix = []
1193     xml = node.xml
1194     parent = xml.getparent()
1195     while parent is not None:
1196         children = parent.getchildren()
1197         ix.insert(0, str(children.index(xml) + 1))
1198         xml = parent
1199         parent = xml.getparent()
1200     while node is not None:
1201         ix.insert(0, str(node.idx + 1))
1202         node = node.parent
1203
1204     return ix
1205
1206
1207 def get_id(node):
1208     xml = node.xml
1209     node_id = xml.attrib.get('id', None)
1210     if node_id:
1211         return node_id
1212
1213     # TODO: this is moot if nothing links to it, we could also consider to omit
1214     # the <a name="$id"></a> tag.
1215     logging.info('%d: No "id" attribute on "%s", generating one',
1216                  xml.sourceline, xml.tag)
1217     ix = get_id_path(node)
1218     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1219     return 'id-' + '.'.join(ix)
1220
1221
1222 def convert_chunk_with_toc(ctx, div_class, title_tag):
1223     node = ctx['node']
1224     result = [
1225         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1226         generate_basic_nav(ctx),
1227         '<div class="%s">' % div_class,
1228     ]
1229     if node.title:
1230         result.append("""
1231 <div class="titlepage">
1232 <%s class="title"><a name="%s"></a>%s</%s>
1233 </div>""" % (
1234             title_tag, get_id(node), node.title, title_tag))
1235
1236     toc = generate_toc(ctx, node)
1237     if toc:
1238         # TODO: not all docbook page types use this extra heading
1239         result.append("""<p><b>Table of Contents</b></p>
1240     <div class="toc">
1241       <dl class="toc">
1242     """)
1243         result.extend(toc)
1244         result.append("""</dl>
1245     </div>
1246     """)
1247     convert_inner(ctx, node.xml, result)
1248     result.extend(generate_footer(ctx))
1249     result.append("""</div>
1250 </body>
1251 </html>""")
1252     return result
1253
1254
1255 # docbook chunks
1256
1257
1258 def convert_book(ctx):
1259     node = ctx['node']
1260     result = [
1261         HTML_HEADER % (node.title, generate_head_links(ctx)),
1262         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1263     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1264 </table>
1265 <div class="book">
1266 """ % node.title
1267     ]
1268     bookinfo = node.xml.findall('bookinfo')[0]
1269     result.extend(convert_bookinfo(ctx, bookinfo))
1270     result.append("""<div class="toc">
1271   <dl class="toc">
1272 """)
1273     result.extend(generate_toc(ctx, node.root))
1274     result.append("""</dl>
1275 </div>
1276 """)
1277     result.extend(generate_footer(ctx))
1278     result.append("""</div>
1279 </body>
1280 </html>""")
1281     return result
1282
1283
1284 def convert_chapter(ctx):
1285     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1286
1287
1288 def convert_glossary(ctx):
1289     node = ctx['node']
1290     glossdivs = node.xml.findall('glossdiv')
1291
1292     result = [
1293         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1294         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1295         """<div class="glossary">
1296 <div class="titlepage"><h%1d class="title">
1297 <a name="%s"></a>%s</h%1d>
1298 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1299     ]
1300     for i in glossdivs:
1301         result.extend(convert_glossdiv(ctx, i))
1302     result.extend(generate_footer(ctx))
1303     result.append("""</div>
1304 </body>
1305 </html>""")
1306     return result
1307
1308
1309 def convert_index(ctx):
1310     node = ctx['node']
1311     # Get all indexdivs under indexdiv
1312     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1313
1314     result = [
1315         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1316         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1317         """<div class="index">
1318 <div class="titlepage"><h%1d class="title">
1319 <a name="%s"></a>%s</h%1d>
1320 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1321     ]
1322     for i in indexdivs:
1323         result.extend(convert_indexdiv(ctx, i))
1324     result.extend(generate_footer(ctx))
1325     result.append("""</div>
1326 </body>
1327 </html>""")
1328     return result
1329
1330
1331 def convert_part(ctx):
1332     return convert_chunk_with_toc(ctx, 'part', 'h1')
1333
1334
1335 def convert_preface(ctx):
1336     node = ctx['node']
1337     result = [
1338         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1339         generate_basic_nav(ctx),
1340         '<div class="preface">'
1341     ]
1342     if node.title:
1343         result.append("""
1344 <div class="titlepage">
1345 <h2 class="title"><a name="%s"></a>%s</h2>
1346 </div>""" % (get_id(node), node.title))
1347     convert_inner(ctx, node.xml, result)
1348     result.extend(generate_footer(ctx))
1349     result.append("""</div>
1350 </body>
1351 </html>""")
1352     return result
1353
1354
1355 def convert_reference(ctx):
1356     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1357
1358
1359 def convert_refentry(ctx):
1360     node = ctx['node']
1361     node_id = get_id(node)
1362     refsect1s = node.xml.findall('refsect1')
1363
1364     gallery = ''
1365     refmeta = node.xml.find('refmeta')
1366     if refmeta is not None:
1367         refmiscinfo = refmeta.find('refmiscinfo')
1368         if refmiscinfo is not None:
1369             inlinegraphic = refmiscinfo.find('inlinegraphic')
1370             if inlinegraphic is not None:
1371                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1372
1373     result = [
1374         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1375     ]
1376     generate_refentry_nav(ctx, refsect1s, result)
1377     result.append("""
1378 <div class="refentry">
1379 <a name="%s"></a>
1380 <div class="refnamediv">
1381   <table width="100%%"><tr>
1382     <td valign="top">
1383       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1384       <p>%s — %s</p>
1385     </td>
1386     <td class="gallery_image" valign="top" align="right">%s</td>
1387   </tr></table>
1388 </div>
1389 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1390
1391     for s in refsect1s:
1392         result.extend(convert_refsect1(ctx, s))
1393     result.extend(generate_footer(ctx))
1394     result.append("""</div>
1395 </body>
1396 </html>""")
1397     return result
1398
1399
1400 def convert_sect1(ctx):
1401     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1402
1403
1404 # TODO(ensonic): turn into class with converters as functions and ctx as self
1405 convert_chunks = {
1406     'book': convert_book,
1407     'chapter': convert_chapter,
1408     'glossary': convert_glossary,
1409     'index': convert_index,
1410     'part': convert_part,
1411     'preface': convert_preface,
1412     'reference': convert_reference,
1413     'refentry': convert_refentry,
1414     'sect1': convert_sect1,
1415 }
1416
1417
1418 def generate_nav_nodes(files, node):
1419     nav = {
1420         'nav_home': node.root,
1421     }
1422     # nav params: up, prev, next
1423     if node.parent:
1424         nav['nav_up'] = node.parent
1425     ix = files.index(node)
1426     if ix > 0:
1427         nav['nav_prev'] = files[ix - 1]
1428     if ix < len(files) - 1:
1429         nav['nav_next'] = files[ix + 1]
1430     return nav
1431
1432
1433 def convert(out_dir, module, files, node):
1434     """Convert the docbook chunks to a html file.
1435
1436     Args:
1437       out_dir: already created output dir
1438       files: list of nodes in the tree in pre-order
1439       node: current tree node
1440     """
1441
1442     logging.info('Writing: %s', node.filename)
1443     with open(os.path.join(out_dir, node.filename), 'wt',
1444               newline='\n', encoding='utf-8') as html:
1445         ctx = {
1446             'module': module,
1447             'files': files,
1448             'node': node,
1449         }
1450         ctx.update(generate_nav_nodes(files, node))
1451
1452         if node.name in convert_chunks:
1453             for line in convert_chunks[node.name](ctx):
1454                 html.write(line)
1455         else:
1456             logging.warning('Add converter/template for "%s"', node.name)
1457
1458
1459 def create_devhelp2_toc(node):
1460     result = []
1461     for c in node.children:
1462         if c.children:
1463             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1464             result.extend(create_devhelp2_toc(c))
1465             result.append('</sub>\n')
1466         else:
1467             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1468     return result
1469
1470
1471 def create_devhelp2_condition_attribs(node):
1472     if 'condition' in node.attrib:
1473         # condition -> since, deprecated, ... (separated with '|')
1474         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1475         keywords = []
1476         for c in cond:
1477             if ':' in c:
1478                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1479             else:
1480                 # deprecated can have no description
1481                 keywords.append('{}="{}"'.format(c, ''))
1482         return ' ' + ' '.join(keywords)
1483     else:
1484         return ''
1485
1486
1487 def create_devhelp2_refsect2_keyword(node, base_link):
1488     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1489         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1490         create_devhelp2_condition_attribs(node))
1491
1492
1493 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1494     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1495         node.attrib['role'], title, base_link + name,
1496         create_devhelp2_condition_attribs(node))
1497
1498
1499 def create_devhelp2(out_dir, module, xml, files):
1500     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1501               newline='\n', encoding='utf-8') as idx:
1502         bookinfo_nodes = xml.xpath('/book/bookinfo')
1503         title = ''
1504         if bookinfo_nodes is not None:
1505             bookinfo = bookinfo_nodes[0]
1506             title = bookinfo.xpath('./title/text()')[0]
1507             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1508             # TODO: support author too (see devhelp2.xsl)
1509         # TODO: fixxref uses '--src-lang' to set the language
1510         result = [
1511             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1512 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1513   <chapters>
1514 """ % (title, module, online_url)
1515         ]
1516         # toc
1517         result.extend(create_devhelp2_toc(files[0].root))
1518         result.append("""  </chapters>
1519   <functions>
1520 """)
1521         # keywords from all refsect2 and refsect3
1522         refsect2 = etree.XPath('//refsect2[@role]')
1523         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1524         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1525         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1526         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1527         for node in files:
1528             base_link = node.filename + '#'
1529             refsect2_nodes = refsect2(node.xml)
1530             for refsect2_node in refsect2_nodes:
1531                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1532                 refsect3_nodes = refsect3_enum(refsect2_node)
1533                 for refsect3_node in refsect3_nodes:
1534                     details_node = refsect3_enum_details(refsect3_node)[0]
1535                     name = details_node.attrib['id']
1536                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1537                 refsect3_nodes = refsect3_struct(refsect2_node)
1538                 for refsect3_node in refsect3_nodes:
1539                     details_node = refsect3_struct_details(refsect3_node)[0]
1540                     name = details_node.attrib['id']
1541                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1542
1543         result.append("""  </functions>
1544 </book>
1545 """)
1546         for line in result:
1547             idx.write(line)
1548
1549
1550 def get_dirs(uninstalled):
1551     if uninstalled:
1552         # this does not work from buiddir!=srcdir
1553         gtkdocdir = os.path.split(sys.argv[0])[0]
1554         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1555             # try 'srcdir' (set from makefiles) too
1556             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1557                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1558         styledir = gtkdocdir + '/style'
1559     else:
1560         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1561         styledir = gtkdocdir
1562     return (gtkdocdir, styledir)
1563
1564
1565 def main(module, index_file, out_dir, uninstalled):
1566
1567     # == Loading phase ==
1568     # the next 3 steps could be done in paralel
1569
1570     # 1) load the docuemnt
1571     _t = timer()
1572     # does not seem to be faster
1573     # parser = etree.XMLParser(collect_ids=False)
1574     # tree = etree.parse(index_file, parser)
1575     tree = etree.parse(index_file)
1576     tree.xinclude()
1577     logging.warning("1: %7.3lf: load doc", timer() - _t)
1578
1579     # 2) copy datafiles
1580     _t = timer()
1581     # TODO: handle additional images
1582     (gtkdocdir, styledir) = get_dirs(uninstalled)
1583     # copy navigation images and stylesheets to html directory ...
1584     css_file = os.path.join(styledir, 'style.css')
1585     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1586         shutil.copy(f, out_dir)
1587     css_file = os.path.join(out_dir, 'style.css')
1588     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1589         css.write(HTML_FORMATTER.get_style_defs())
1590     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1591
1592     # 3) load xref targets
1593     _t = timer()
1594     # TODO: migrate options from fixxref
1595     # TODO: ideally explicity specify the files we need, this will save us the
1596     # globbing and we'll load less files.
1597     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1598     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1599
1600     # == Processing phase ==
1601
1602     # 4) recursively walk the tree and chunk it into a python tree so that we
1603     #    can generate navigation and link tags.
1604     _t = timer()
1605     files = chunk(tree.getroot(), module)
1606     files = [f for f in PreOrderIter(files) if f.anchor is None]
1607     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1608
1609     # 5) extract tables:
1610     _t = timer()
1611     # TODO: can be done in parallel
1612     # - find all 'id' attribs and add them to the link map
1613     # - .. get their titles and store them into the titles map
1614     add_id_links_and_titles(files, fixxref.Links)
1615     # - build glossary dict
1616     build_glossary(files)
1617     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1618
1619     # == Output phase ==
1620     # the next two step could be done in parllel
1621
1622     # 6) create a xxx.devhelp2 file
1623     _t = timer()
1624     create_devhelp2(out_dir, module, tree.getroot(), files)
1625     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1626
1627     # 7) iterate the tree and output files
1628     _t = timer()
1629     # TODO: can be done in parallel, figure out why this is not faster
1630     # from multiprocessing.pool import Pool
1631     # with Pool(4) as p:
1632     #     p.apply_async(convert, args=(out_dir, module, files))
1633     # from multiprocessing.pool import ThreadPool
1634     # with ThreadPool(4) as p:
1635     #     p.apply_async(convert, args=(out_dir, module, files))
1636     for node in files:
1637         convert(out_dir, module, files, node)
1638     logging.warning("7: %7.3lf: create html", timer() - _t)
1639
1640
1641 def run(options):
1642     logging.info('options: %s', str(options.__dict__))
1643     module = options.args[0]
1644     document = options.args[1]
1645
1646     # TODO: rename to 'html' later on
1647     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1648     #   outputs into the working directory
1649     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1650     try:
1651         os.mkdir(out_dir)
1652     except OSError as e:
1653         if e.errno != errno.EEXIST:
1654             raise
1655
1656     sys.exit(main(module, document, out_dir, options.uninstalled))