gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42 - check each docbook tag if it can contain #PCDATA, if not don't check for
  43   xml.text
  44 - consider some perf-warnings flag
  45   - see 'No "id" attribute on'
  46 - find a better way to print context for warnings
  47   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  48
  49 DIFFERENCES:
  50 - titles
  51   - we add the chunk label to both title in toc and tile on the page
  52   - docbook xsl only sometimes adds the label to the titles and when it does it
  53     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  54 - navigation
  55   - we always add an up-link except on the first page
  56
  57 OPTIONAL:
  58 - minify html: https://pypi.python.org/pypi/htmlmin/
  59
  60 Requirements:
  61 sudo pip3 install anytree lxml pygments
  62
  63 Example invocation:
  64 cd tests/bugs/docs/
  65 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  66 xdg-open db2html/index.html
  67 meld html db2html
  68
  69 Benchmarking:
  70 cd tests/bugs/docs/;
  71 rm html-build.stamp; time make html-build.stamp
  72 """
  73
  74 import argparse
  75 import errno
  76 import logging
  77 import os
  78 import shutil
  79 import sys
  80
  81 from anytree import Node, PreOrderIter
  82 from copy import deepcopy
  83 from glob import glob
  84 from lxml import etree
  85 from pygments import highlight
  86 from pygments.lexers import CLexer
  87 from pygments.formatters import HtmlFormatter
  88 from timeit import default_timer as timer
  89
  90 from . import config, fixxref
  91
  92 # pygments setup
  93 # lazily constructed lexer cache
  94 LEXERS = {
  95     'c': CLexer()
  96 }
  97 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  98
  99
 100 class ChunkParams(object):
 101     def __init__(self, prefix, parent=None, min_idx=0):
 102         self.prefix = prefix
 103         self.parent = parent
 104         self.min_idx = min_idx
 105         self.idx = 1
 106
 107
 108 DONT_CHUNK = float('inf')
 109 # docbook-xsl defines the chunk tags here.
 110 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 111 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 112 # If not defined, we can just create an example without an 'id' attr and see
 113 # docbook xsl does.
 114 #
 115 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 116 # TODO: this list has also a flag that controls wheter we add the
 117 # 'Table of Contents' heading in convert_chunk_with_toc()
 118 CHUNK_PARAMS = {
 119     'appendix': ChunkParams('app', 'book'),
 120     'book': ChunkParams('bk'),
 121     'chapter': ChunkParams('ch', 'book'),
 122     'glossary': ChunkParams('go', 'book'),
 123     'index': ChunkParams('ix', 'book'),
 124     'part': ChunkParams('pt', 'book'),
 125     'preface': ChunkParams('pr', 'book'),
 126     'refentry': ChunkParams('re', 'book'),
 127     'reference': ChunkParams('rn', 'book'),
 128     'sect1': ChunkParams('s', 'chapter', 1),
 129     'section': ChunkParams('s', 'chapter', 1),
 130     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 131     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 132     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 133     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 134 }
 135 # TAGS we don't support:
 136 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 137
 138 TITLE_XPATHS = {
 139     '_': (etree.XPath('./title'), None),
 140     'book': (etree.XPath('./bookinfo/title'), None),
 141     'refentry': (
 142         etree.XPath('./refmeta/refentrytitle'),
 143         etree.XPath('./refnamediv/refpurpose')
 144     ),
 145 }
 146
 147 ID_XPATH = etree.XPath('//*[@id]')
 148
 149 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 150 glossary = {}
 151
 152 footnote_idx = 1
 153
 154 # nested dict with subkeys:
 155 # title: textual title
 156 # tag: chunk tag
 157 # xml: title xml node
 158 titles = {}
 159
 160
 161 def gen_chunk_name(node, chunk_params):
 162     """Generate a chunk file name
 163
 164     This is either based on the id or on the position in the doc. In the latter
 165     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 166     type.
 167     """
 168     if 'id' in node.attrib:
 169         return node.attrib['id']
 170
 171     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 172     chunk_params.idx += 1
 173
 174     # handle parents to make names of nested tags like in docbook
 175     # - we only need to prepend the parent if there are > 1 of them in the
 176     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 177     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 178     #   when we chunk explicitly and on each level maintain the 'idx'
 179     # while chunk_params.parent:
 180     #     parent = chunk_params.parent
 181     #     if parent not in CHUNK_PARAMS:
 182     #         break;
 183     #     chunk_params = CHUNK_PARAMS[parent]
 184     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 185
 186     logging.info('Gen chunk name: "%s"', name)
 187     return name
 188
 189
 190 def get_chunk_titles(module, node):
 191     tag = node.tag
 192     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 193
 194     ctx = {
 195         'module': module,
 196         'files': [],
 197     }
 198     result = {
 199         'title': None,
 200         'title_tag': None,
 201         'subtitle': None,
 202         'subtitle_tag': None
 203     }
 204     res = title(node)
 205     if res:
 206         # handle chunk label for tocs
 207         label = node.attrib.get('label')
 208         if label:
 209             label += '. '
 210         else:
 211             label = ''
 212
 213         xml = res[0]
 214         result['title'] = label + ''.join(convert_title(ctx, xml))
 215         if xml.tag != 'title':
 216             result['title_tag'] = xml.tag
 217         else:
 218             result['title_tag'] = tag
 219
 220     if subtitle:
 221         res = subtitle(node)
 222         if res:
 223             xml = res[0]
 224             result['subtitle'] = ''.join(convert_title(ctx, xml))
 225             result['subtitle_tag'] = xml.tag
 226     return result
 227
 228
 229 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 230     """Chunk the tree.
 231
 232     The first time, we're called with parent=None and in that case we return
 233     the new_node as the root of the tree. For each tree-node we generate a
 234     filename and process the children.
 235     """
 236     tag = xml_node.tag
 237     chunk_params = CHUNK_PARAMS.get(tag)
 238     if chunk_params:
 239         title_args = get_chunk_titles(module, xml_node)
 240         chunk_name = gen_chunk_name(xml_node, chunk_params)
 241
 242         # check idx to handle 'sect1'/'section' special casing and title-only
 243         # segments
 244         if idx >= chunk_params.min_idx:
 245             logging.info('chunk tag: "%s"[%d]', tag, idx)
 246             if parent:
 247                 # remove the xml-node from the parent
 248                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 249                 xml_node.getparent().remove(xml_node)
 250                 xml_node = sub_tree
 251
 252             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 253                           idx=idx,
 254                           filename=chunk_name + '.html', anchor=None,
 255                           **title_args)
 256         else:
 257             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 258                           idx=idx,
 259                           filename=parent.filename, anchor='#' + chunk_name,
 260                           **title_args)
 261
 262         depth += 1
 263         idx = 0
 264         for child in xml_node:
 265             chunk(child, module, depth, idx, parent)
 266             if child.tag in CHUNK_PARAMS:
 267                 idx += 1
 268
 269     return parent
 270
 271
 272 def add_id_links_and_titles(files, links):
 273     for node in files:
 274         chunk_name = node.filename[:-5]
 275         chunk_base = node.filename + '#'
 276         for elem in ID_XPATH(node.xml):
 277             attr = elem.attrib['id']
 278             if attr == chunk_name:
 279                 links[attr] = node.filename
 280             else:
 281                 links[attr] = chunk_base + attr
 282
 283             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 284             res = title(elem)
 285             if res:
 286                 xml = res[0]
 287                 # TODO: consider to eval those lazily
 288                 titles[attr] = {
 289                     'title': etree.tostring(xml, method="text", encoding=str).strip(),
 290                     'xml': xml,
 291                     'tag': elem.tag,
 292                 }
 293
 294
 295 def build_glossary(files):
 296     for node in files:
 297         if node.xml.tag != 'glossary':
 298             continue
 299         for term in GLOSSENTRY_XPATH(node.xml):
 300             # TODO: there can be all kind of things in a glossary. This only supports
 301             # what we commonly use
 302             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 303             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 304             glossary[key] = value
 305             # logging.debug('glosentry: %s:%s', key, value)
 306
 307
 308 # conversion helpers
 309
 310
 311 def convert_inner(ctx, xml, result):
 312     for child in xml:
 313         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 314
 315
 316 def convert_ignore(ctx, xml):
 317     result = []
 318     convert_inner(ctx, xml, result)
 319     return result
 320
 321
 322 def convert_skip(ctx, xml):
 323     return []
 324
 325
 326 def append_text(text, result):
 327     if text and text.strip():
 328         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 329
 330
 331 missing_tags = {}
 332
 333
 334 def convert__unknown(ctx, xml):
 335     # don't recurse on subchunks
 336     if xml.tag in CHUNK_PARAMS:
 337         return []
 338     if isinstance(xml, etree._Comment):
 339         return ['<!-- ' + xml.text + '-->\n']
 340     else:
 341         # warn only once
 342         if xml.tag not in missing_tags:
 343             logging.warning('Add tag converter for "%s"', xml.tag)
 344             missing_tags[xml.tag] = True
 345         result = ['<!-- ' + xml.tag + '-->\n']
 346         convert_inner(ctx, xml, result)
 347         result.append('<!-- /' + xml.tag + '-->\n')
 348         return result
 349
 350
 351 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 352     result = ['<div class="%s">\n' % xml.tag]
 353     title = xml.find('title')
 354     if title is not None:
 355         if 'id' in xml.attrib:
 356             result.append('<a name="%s"></a>' % xml.attrib['id'])
 357         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 358     append_text(xml.text, result)
 359     inner_func(ctx, xml, result)
 360     result.append('</div>')
 361     append_text(xml.tail, result)
 362     return result
 363
 364
 365 def xml_get_title(ctx, xml):
 366     title_tag = xml.find('title')
 367     if title_tag is not None:
 368         return ''.join(convert_title(ctx, title_tag))
 369     else:
 370         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 371         return ''
 372
 373
 374 # docbook tags
 375
 376
 377 def convert_abstract(ctx, xml):
 378     result = ["""<div class="abstract">
 379     <p class="title"><b>Abstract</b></p>"""]
 380     append_text(xml.text, result)
 381     convert_inner(ctx, xml, result)
 382     result.append('</div>')
 383     append_text(xml.tail, result)
 384     return result
 385
 386
 387 def convert_acronym(ctx, xml):
 388     key = xml.text
 389     title = glossary.get(key, '')
 390     # TODO: print a sensible warning if missing
 391     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 392     if xml.tail:
 393         result.append(xml.tail)
 394     return result
 395
 396
 397 def convert_anchor(ctx, xml):
 398     return ['<a name="%s"></a>' % xml.attrib['id']]
 399
 400
 401 def convert_bookinfo(ctx, xml):
 402     result = ['<div class="titlepage">']
 403     convert_inner(ctx, xml, result)
 404     result.append("""<hr>
 405 </div>""")
 406     if xml.tail:
 407         result.append(xml.tail)
 408     return result
 409
 410
 411 def convert_blockquote(ctx, xml):
 412     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 413     append_text(xml.text, result)
 414     convert_inner(ctx, xml, result)
 415     result.append('</blockquote>\n</div>')
 416     append_text(xml.tail, result)
 417     return result
 418
 419
 420 def convert_code(ctx, xml):
 421     result = ['<code class="%s">' % xml.tag]
 422     append_text(xml.text, result)
 423     convert_inner(ctx, xml, result)
 424     result.append('</code>')
 425     append_text(xml.tail, result)
 426     return result
 427
 428
 429 def convert_colspec(ctx, xml):
 430     result = ['<col']
 431     a = xml.attrib
 432     if 'colname' in a:
 433         result.append(' class="%s"' % a['colname'])
 434     if 'colwidth' in a:
 435         result.append(' width="%s"' % a['colwidth'])
 436     result.append('>\n')
 437     # is in tgroup and there can be no 'text'
 438     return result
 439
 440
 441 def convert_command(ctx, xml):
 442     result = ['<strong class="userinput"><code>']
 443     append_text(xml.text, result)
 444     convert_inner(ctx, xml, result)
 445     result.append('</code></strong>')
 446     append_text(xml.tail, result)
 447     return result
 448
 449
 450 def convert_corpauthor(ctx, xml):
 451     result = ['<div><h3 class="corpauthor">\n']
 452     append_text(xml.text, result)
 453     convert_inner(ctx, xml, result)
 454     result.append('</h3></div>\n')
 455     append_text(xml.tail, result)
 456     return result
 457
 458
 459 def convert_div(ctx, xml):
 460     result = ['<div class="%s">\n' % xml.tag]
 461     append_text(xml.text, result)
 462     convert_inner(ctx, xml, result)
 463     result.append('</div>')
 464     append_text(xml.tail, result)
 465     return result
 466
 467
 468 def convert_emphasis(ctx, xml):
 469     result = ['<span class="emphasis"><em>']
 470     append_text(xml.text, result)
 471     convert_inner(ctx, xml, result)
 472     result.append('</em></span>')
 473     append_text(xml.tail, result)
 474     return result
 475
 476
 477 def convert_em_class(ctx, xml):
 478     result = ['<em class="%s"><code>' % xml.tag]
 479     append_text(xml.text, result)
 480     convert_inner(ctx, xml, result)
 481     result.append('</code></em>')
 482     append_text(xml.tail, result)
 483     return result
 484
 485
 486 def convert_entry(ctx, xml):
 487     entry_type = ctx['table.entry']
 488     result = ['<' + entry_type]
 489     if 'role' in xml.attrib:
 490         result.append(' class="%s"' % xml.attrib['role'])
 491     if 'morerows' in xml.attrib:
 492         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 493     result.append('>')
 494     append_text(xml.text, result)
 495     convert_inner(ctx, xml, result)
 496     result.append('</' + entry_type + '>')
 497     append_text(xml.tail, result)
 498     return result
 499
 500
 501 def convert_footnote(ctx, xml):
 502     footnotes = ctx.get('footnotes', [])
 503     # footnotes idx is not per page, but per doc
 504     global footnote_idx
 505     idx = footnote_idx
 506     footnote_idx += 1
 507
 508     # need a pair of ids for each footnote (docbook generates different ids)
 509     this_id = 'footnote-%d' % idx
 510     that_id = 'ftn.' + this_id
 511
 512     inner = ['<div id="%s" class="footnote">' % that_id]
 513     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 514         this_id, idx))
 515     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 516     # get double nested paras :/.
 517     # convert_inner(ctx, xml, inner)
 518     para = xml.find('para')
 519     if para is None:
 520         para = xml.find('simpara')
 521     if para is not None:
 522         inner.append(para.text)
 523     else:
 524         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 525                         etree.tostring(xml, method="text", encoding=str).strip())
 526     inner.append('</p></div>')
 527     footnotes.append(inner)
 528     ctx['footnotes'] = footnotes
 529     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 530         that_id, this_id, idx)]
 531
 532
 533 def convert_formalpara(ctx, xml):
 534     result = None
 535     title_tag = xml.find('title')
 536     result = ['<p><b>%s</b>' % title_tag.text]
 537     para_tag = xml.find('para')
 538     append_text(para_tag.text, result)
 539     convert_inner(ctx, para_tag, result)
 540     append_text(para_tag.tail, result)
 541     result.append('</p>')
 542     append_text(xml.tail, result)
 543     return result
 544
 545
 546 def convert_glossdef(ctx, xml):
 547     result = ['<dd class="glossdef">']
 548     convert_inner(ctx, xml, result)
 549     result.append('</dd>\n')
 550     return result
 551
 552
 553 def convert_glossdiv(ctx, xml):
 554     title_tag = xml.find('title')
 555     title = title_tag.text
 556     xml.remove(title_tag)
 557     result = [
 558         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 559     ]
 560     convert_inner(ctx, xml, result)
 561     return result
 562
 563
 564 def convert_glossentry(ctx, xml):
 565     result = []
 566     convert_inner(ctx, xml, result)
 567     return result
 568
 569
 570 def convert_glossterm(ctx, xml):
 571     glossid = ''
 572     text = ''
 573     anchor = xml.find('anchor')
 574     if anchor is not None:
 575         glossid = anchor.attrib.get('id', '')
 576         text += anchor.tail or ''
 577     text += xml.text or ''
 578     if glossid == '':
 579         glossid = 'glossterm-' + text
 580     return [
 581         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 582             glossid, text)
 583     ]
 584
 585
 586 def convert_imageobject(ctx, xml):
 587     imagedata = xml.find('imagedata')
 588     if imagedata is not None:
 589         # TODO(ensonic): warn on missing fileref attr?
 590         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 591     else:
 592         return []
 593
 594
 595 def convert_indexdiv(ctx, xml):
 596     title_tag = xml.find('title')
 597     title = title_tag.text
 598     xml.remove(title_tag)
 599     result = [
 600         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 601     ]
 602     convert_inner(ctx, xml, result)
 603     return result
 604
 605
 606 def convert_informaltable(ctx, xml):
 607     result = ['<div class="informaltable"><table class="informaltable"']
 608     a = xml.attrib
 609     if 'pgwide' in a and a['pgwide'] == '1':
 610         result.append(' width="100%"')
 611     if 'frame' in a and a['frame'] == 'none':
 612         result.append(' border="0"')
 613     result.append('>\n')
 614     convert_inner(ctx, xml, result)
 615     result.append('</table></div>')
 616     if xml.tail:
 617         result.append(xml.tail)
 618     return result
 619
 620
 621 def convert_inlinegraphic(ctx, xml):
 622     # TODO(ensonic): warn on missing fileref attr?
 623     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 624
 625
 626 def convert_itemizedlist(ctx, xml):
 627     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 628     convert_inner(ctx, xml, result)
 629     result.append('</ul></div>')
 630     if xml.tail:
 631         result.append(xml.tail)
 632     return result
 633
 634
 635 def convert_link(ctx, xml):
 636     linkend = xml.attrib['linkend']
 637     if linkend in fixxref.NoLinks:
 638         linkend = None
 639     result = []
 640     if linkend:
 641         link_text = []
 642         append_text(xml.text, link_text)
 643         convert_inner(ctx, xml, link_text)
 644         text = ''.join(link_text)
 645
 646         (tid, href) = fixxref.GetXRef(linkend)
 647         if href:
 648             title_attr = ''
 649             title = titles.get(tid)
 650             if title:
 651                 title_attr = ' title="%s"' % title['title']
 652
 653             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 654             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 655         else:
 656             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 657             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 658             result = [text]
 659     else:
 660         append_text(xml.text, result)
 661         convert_inner(ctx, xml, result)
 662     append_text(xml.tail, result)
 663     return result
 664
 665
 666 def convert_listitem(ctx, xml):
 667     result = ['<li class="listitem">']
 668     convert_inner(ctx, xml, result)
 669     result.append('</li>')
 670     # is in itemizedlist and there can be no 'text'
 671     return result
 672
 673
 674 def convert_literallayout(ctx, xml):
 675     result = ['<div class="literallayout"><p><br>\n']
 676     append_text(xml.text, result)
 677     convert_inner(ctx, xml, result)
 678     result.append('</p></div>')
 679     append_text(xml.tail, result)
 680     return result
 681
 682
 683 def convert_orderedlist(ctx, xml):
 684     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 685     convert_inner(ctx, xml, result)
 686     result.append('</ol></div>')
 687     append_text(xml.tail, result)
 688     return result
 689
 690
 691 def convert_para(ctx, xml):
 692     result = []
 693     if 'id' in xml.attrib:
 694         result.append('<a name="%s"></a>' % xml.attrib['id'])
 695     if 'role' in xml.attrib:
 696         result.append('<p class="%s">' % xml.attrib['role'])
 697     else:
 698         result.append('<p>')
 699     append_text(xml.text, result)
 700     convert_inner(ctx, xml, result)
 701     result.append('</p>')
 702     append_text(xml.tail, result)
 703     return result
 704
 705
 706 def convert_para_like(ctx, xml):
 707     result = []
 708     if 'id' in xml.attrib:
 709         result.append('<a name="%s"></a>' % xml.attrib['id'])
 710     result.append('<p class="%s">' % xml.tag)
 711     append_text(xml.text, result)
 712     convert_inner(ctx, xml, result)
 713     result.append('</p>')
 714     append_text(xml.tail, result)
 715     return result
 716
 717
 718 def convert_phrase(ctx, xml):
 719     result = ['<span']
 720     if 'role' in xml.attrib:
 721         result.append(' class="%s">' % xml.attrib['role'])
 722     else:
 723         result.append('>')
 724     append_text(xml.text, result)
 725     convert_inner(ctx, xml, result)
 726     result.append('</span>')
 727     append_text(xml.tail, result)
 728     return result
 729
 730
 731 def convert_primaryie(ctx, xml):
 732     result = ['<dt>\n']
 733     convert_inner(ctx, xml, result)
 734     result.append('\n</dt>\n<dd></dd>\n')
 735     return result
 736
 737
 738 def convert_pre(ctx, xml):
 739     result = ['<pre class="%s">\n' % xml.tag]
 740     append_text(xml.text, result)
 741     convert_inner(ctx, xml, result)
 742     result.append('</pre>')
 743     append_text(xml.tail, result)
 744     return result
 745
 746
 747 def convert_programlisting(ctx, xml):
 748     result = []
 749     if xml.attrib.get('role', '') == 'example':
 750         if xml.text:
 751             lang = xml.attrib.get('language', 'c').lower()
 752             if lang not in LEXERS:
 753                 LEXERS[lang] = get_lexer_by_name(lang)
 754             lexer = LEXERS.get(lang, None)
 755             if lexer:
 756                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 757
 758                 # we do own line-numbering
 759                 line_count = highlighted.count('\n')
 760                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 761                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 762   <tbody>
 763     <tr>
 764       <td class="listing_lines" align="right"><pre>%s</pre></td>
 765       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 766     </tr>
 767   </tbody>
 768 </table>
 769 """ % (source_lines, highlighted))
 770             else:
 771                 logging.warn('No pygments lexer for language="%s"', lang)
 772                 result.append('<pre class="programlisting">')
 773                 result.append(xml.text)
 774                 result.append('</pre>')
 775     else:
 776         result.append('<pre class="programlisting">')
 777         append_text(xml.text, result)
 778         convert_inner(ctx, xml, result)
 779         result.append('</pre>')
 780     append_text(xml.tail, result)
 781     return result
 782
 783
 784 def convert_quote(ctx, xml):
 785     result = ['<span class="quote">"<span class="quote">']
 786     append_text(xml.text, result)
 787     convert_inner(ctx, xml, result)
 788     result.append('</span>"</span>')
 789     append_text(xml.tail, result)
 790     return result
 791
 792
 793 def convert_refsect1(ctx, xml):
 794     # Add a divider between two consequitive refsect2
 795     def convert_inner(ctx, xml, result):
 796         prev = None
 797         for child in xml:
 798             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 799                 result.append('<hr>\n')
 800             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 801             prev = child
 802     return convert_sect(ctx, xml, 'h2', convert_inner)
 803
 804
 805 def convert_refsect2(ctx, xml):
 806     return convert_sect(ctx, xml, 'h3')
 807
 808
 809 def convert_refsect3(ctx, xml):
 810     return convert_sect(ctx, xml, 'h4')
 811
 812
 813 def convert_row(ctx, xml):
 814     result = ['<tr>\n']
 815     convert_inner(ctx, xml, result)
 816     result.append('</tr>\n')
 817     return result
 818
 819
 820 def convert_sect1_tag(ctx, xml):
 821     return convert_sect(ctx, xml, 'h2')
 822
 823
 824 def convert_sect2(ctx, xml):
 825     return convert_sect(ctx, xml, 'h3')
 826
 827
 828 def convert_sect3(ctx, xml):
 829     return convert_sect(ctx, xml, 'h4')
 830
 831
 832 def convert_simpara(ctx, xml):
 833     result = ['<p>']
 834     append_text(xml.text, result)
 835     convert_inner(ctx, xml, result)
 836     result.append('</p>')
 837     append_text(xml.tail, result)
 838     return result
 839
 840
 841 def convert_span(ctx, xml):
 842     result = ['<span class="%s">' % xml.tag]
 843     append_text(xml.text, result)
 844     convert_inner(ctx, xml, result)
 845     result.append('</span>')
 846     append_text(xml.tail, result)
 847     return result
 848
 849
 850 def convert_table(ctx, xml):
 851     result = ['<div class="table">']
 852     if 'id' in xml.attrib:
 853         result.append('<a name="%s"></a>' % xml.attrib['id'])
 854     title_tag = xml.find('title')
 855     if title_tag is not None:
 856         result.append('<p class="title"><b>')
 857         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 858         result.extend(convert_title(ctx, title_tag))
 859         result.append('</b></p>')
 860     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 861
 862     convert_inner(ctx, xml, result)
 863
 864     result.append('</table></div></div>')
 865     append_text(xml.tail, result)
 866     return result
 867
 868
 869 def convert_tbody(ctx, xml):
 870     result = ['<tbody>']
 871     ctx['table.entry'] = 'td'
 872     convert_inner(ctx, xml, result)
 873     result.append('</tbody>')
 874     # is in tgroup and there can be no 'text'
 875     return result
 876
 877
 878 def convert_tgroup(ctx, xml):
 879     # tgroup does not expand to anything, but the nested colspecs need to
 880     # be put into a colgroup
 881     cols = xml.findall('colspec')
 882     result = []
 883     if cols:
 884         result.append('<colgroup>\n')
 885         for col in cols:
 886             result.extend(convert_colspec(ctx, col))
 887             xml.remove(col)
 888         result.append('</colgroup>\n')
 889     convert_inner(ctx, xml, result)
 890     # is in informaltable and there can be no 'text'
 891     return result
 892
 893
 894 def convert_thead(ctx, xml):
 895     result = ['<thead>']
 896     ctx['table.entry'] = 'th'
 897     convert_inner(ctx, xml, result)
 898     result.append('</thead>')
 899     # is in tgroup and there can be no 'text'
 900     return result
 901
 902
 903 def convert_title(ctx, xml):
 904     # This is always explicitly called from some context
 905     result = []
 906     append_text(xml.text, result)
 907     convert_inner(ctx, xml, result)
 908     append_text(xml.tail, result)
 909     return result
 910
 911
 912 def convert_ulink(ctx, xml):
 913     if xml.text:
 914         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 915     else:
 916         url = xml.attrib['url']
 917         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
 918     append_text(xml.tail, result)
 919     return result
 920
 921
 922 def convert_userinput(ctx, xml):
 923     result = ['<span class="command"><strong>']
 924     append_text(xml.text, result)
 925     convert_inner(ctx, xml, result)
 926     result.append('</strong></span>')
 927     append_text(xml.tail, result)
 928     return result
 929
 930
 931 def convert_variablelist(ctx, xml):
 932     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 933 <colgroup>
 934 <col align="left" valign="top">
 935 <col>
 936 </colgroup>
 937 <tbody>"""]
 938     convert_inner(ctx, xml, result)
 939     result.append("""</tbody>
 940 </table></div>""")
 941     return result
 942
 943
 944 def convert_varlistentry(ctx, xml):
 945     result = ['<tr>']
 946
 947     result.append('<td><p>')
 948     term = xml.find('term')
 949     result.extend(convert_span(ctx, term))
 950     result.append('</p></td>')
 951
 952     result.append('<td>')
 953     listitem = xml.find('listitem')
 954     convert_inner(ctx, listitem, result)
 955     result.append('</td>')
 956
 957     result.append('<tr>')
 958     return result
 959
 960
 961 def convert_xref(ctx, xml):
 962     linkend = xml.attrib['linkend']
 963     (tid, href) = fixxref.GetXRef(linkend)
 964     title = titles.get(tid)
 965     # all sectN need to become 'section
 966     tag = title['tag']
 967     tag = {
 968         'sect1': 'section',
 969         'sect2': 'section',
 970         'sect3': 'section',
 971         'sect4': 'section',
 972         'sect5': 'section',
 973     }.get(tag, tag)
 974     result = [
 975         '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
 976         (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
 977     ]
 978
 979     append_text(xml.tail, result)
 980     return result
 981
 982
 983 # TODO(ensonic): turn into class with converters as functions and ctx as self
 984 convert_tags = {
 985     'abstract': convert_abstract,
 986     'acronym': convert_acronym,
 987     'anchor': convert_anchor,
 988     'application': convert_span,
 989     'bookinfo': convert_bookinfo,
 990     'blockquote': convert_blockquote,
 991     'caption': convert_div,
 992     'code': convert_code,
 993     'colspec': convert_colspec,
 994     'constant': convert_code,
 995     'command': convert_command,
 996     'corpauthor': convert_corpauthor,
 997     'emphasis': convert_emphasis,
 998     'entry': convert_entry,
 999     'envar': convert_code,
1000     'footnote': convert_footnote,
1001     'filename': convert_code,
1002     'formalpara': convert_formalpara,
1003     'function': convert_code,
1004     'glossdef': convert_glossdef,
1005     'glossdiv': convert_glossdiv,
1006     'glossentry': convert_glossentry,
1007     'glossterm': convert_glossterm,
1008     'imageobject': convert_imageobject,
1009     'indexdiv': convert_indexdiv,
1010     'indexentry': convert_ignore,
1011     'indexterm': convert_skip,
1012     'informalexample': convert_div,
1013     'informaltable': convert_informaltable,
1014     'inlinegraphic': convert_inlinegraphic,
1015     'inlinemediaobject': convert_span,
1016     'itemizedlist': convert_itemizedlist,
1017     'legalnotice': convert_div,
1018     'link': convert_link,
1019     'listitem': convert_listitem,
1020     'literal': convert_code,
1021     'literallayout': convert_literallayout,
1022     'mediaobject': convert_div,
1023     'note': convert_div,
1024     'option': convert_code,
1025     'orderedlist': convert_orderedlist,
1026     'para': convert_para,
1027     'partintro': convert_div,
1028     'parameter': convert_em_class,
1029     'phrase': convert_phrase,
1030     'primaryie': convert_primaryie,
1031     'programlisting': convert_programlisting,
1032     'quote': convert_quote,
1033     'releaseinfo': convert_para_like,
1034     'refsect1': convert_refsect1,
1035     'refsect2': convert_refsect2,
1036     'refsect3': convert_refsect3,
1037     'replaceable': convert_em_class,
1038     'returnvalue': convert_span,
1039     'row': convert_row,
1040     'screen': convert_pre,
1041     'sect1': convert_sect1_tag,
1042     'sect2': convert_sect2,
1043     'sect3': convert_sect3,
1044     'simpara': convert_simpara,
1045     'structfield': convert_em_class,
1046     'structname': convert_span,
1047     'synopsis': convert_pre,
1048     'symbol': convert_span,
1049     'table': convert_table,
1050     'tbody': convert_tbody,
1051     'term': convert_span,
1052     'tgroup': convert_tgroup,
1053     'thead': convert_thead,
1054     'title': convert_skip,
1055     'type': convert_span,
1056     'ulink': convert_ulink,
1057     'userinput': convert_userinput,
1058     'varname': convert_code,
1059     'variablelist': convert_variablelist,
1060     'varlistentry': convert_varlistentry,
1061     'warning': convert_div,
1062     'xref': convert_xref,
1063 }
1064
1065 # conversion helpers
1066
1067 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1068 <html>
1069 <head>
1070 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1071 <title>%s</title>
1072 %s<link rel="stylesheet" href="style.css" type="text/css">
1073 </head>
1074 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1075 """
1076
1077
1078 def generate_head_links(ctx):
1079     n = ctx['nav_home']
1080     result = [
1081         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1082     ]
1083     if 'nav_up' in ctx:
1084         n = ctx['nav_up']
1085         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1086     if 'nav_prev' in ctx:
1087         n = ctx['nav_prev']
1088         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1089     if 'nav_next' in ctx:
1090         n = ctx['nav_next']
1091         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1092     return ''.join(result)
1093
1094
1095 def generate_nav_links(ctx):
1096     n = ctx['nav_home']
1097     result = [
1098         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1099     ]
1100     if 'nav_up' in ctx:
1101         n = ctx['nav_up']
1102         result.append(
1103             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1104     else:
1105         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1106     if 'nav_prev' in ctx:
1107         n = ctx['nav_prev']
1108         result.append(
1109             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1110     else:
1111         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1112     if 'nav_next' in ctx:
1113         n = ctx['nav_next']
1114         result.append(
1115             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1116     else:
1117         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1118
1119     return ''.join(result)
1120
1121
1122 def generate_toc(ctx, node):
1123     result = []
1124     for c in node.children:
1125         # TODO: urlencode the filename: urllib.parse.quote_plus()
1126         link = c.filename
1127         if c.anchor:
1128             link += c.anchor
1129         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1130             c.title_tag, link, c.title))
1131         if c.subtitle:
1132             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1133         result.append('</dt>\n')
1134         if c.children:
1135             result.append('<dd><dl>')
1136             result.extend(generate_toc(ctx, c))
1137             result.append('</dl></dd>')
1138     return result
1139
1140
1141 def generate_basic_nav(ctx):
1142     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1143   <tr valign="middle">
1144     <td width="100%%" align="left" class="shortcuts"></td>
1145     %s
1146   </tr>
1147 </table>
1148     """ % generate_nav_links(ctx)
1149
1150
1151 def generate_alpha_nav(ctx, divs, prefix, span_id):
1152     ix_nav = []
1153     for s in divs:
1154         title = xml_get_title(ctx, s)
1155         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1156
1157     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1158   <tr valign="middle">
1159     <td width="100%%" align="left" class="shortcuts">
1160       <span id="nav_%s">
1161         %s
1162       </span>
1163     </td>
1164     %s
1165   </tr>
1166 </table>
1167     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1168
1169
1170 def generate_refentry_nav(ctx, refsect1s, result):
1171     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1172   <tr valign="middle">
1173     <td width="100%" align="left" class="shortcuts">
1174       <a href="#" class="shortcut">Top</a>""")
1175
1176     for s in refsect1s:
1177         # don't list TOC sections (role="xxx_proto")
1178         if s.attrib.get('role', '').endswith("_proto"):
1179             continue
1180         # skip section without 'id' attrs
1181         if 'id' not in s.attrib:
1182             continue
1183
1184         ref_id = s.attrib['id']
1185         # skip foreign sections
1186         if '.' not in ref_id:
1187             continue
1188
1189         title = xml_get_title(ctx, s)
1190         span_id = ref_id.split('.')[1].replace('-', '_')
1191
1192         result.append("""
1193           <span id="nav_%s">
1194             <span class="dim">|</span>
1195             <a href="#%s" class="shortcut">%s</a>
1196           </span>
1197           """ % (span_id, ref_id, title))
1198     result.append("""
1199     </td>
1200     %s
1201   </tr>
1202 </table>
1203 """ % generate_nav_links(ctx))
1204
1205
1206 def generate_footer(ctx):
1207     result = []
1208     if 'footnotes' in ctx:
1209         result.append("""<div class="footnotes">\n
1210 <br><hr style="width:100; text-align:left;margin-left: 0">
1211 """)
1212         for f in ctx['footnotes']:
1213             result.extend(f)
1214         result.append('</div>\n')
1215     return result
1216
1217
1218 def get_id_path(node):
1219     """ Generate the 'id'.
1220     We need to walk up the xml-tree and check the positions for each sibling.
1221     When reaching the top of the tree we collect remaining index entries from
1222     the chunked-tree.
1223     """
1224     ix = []
1225     xml = node.xml
1226     parent = xml.getparent()
1227     while parent is not None:
1228         children = parent.getchildren()
1229         ix.insert(0, str(children.index(xml) + 1))
1230         xml = parent
1231         parent = xml.getparent()
1232     while node is not None:
1233         ix.insert(0, str(node.idx + 1))
1234         node = node.parent
1235
1236     return ix
1237
1238
1239 def get_id(node):
1240     xml = node.xml
1241     node_id = xml.attrib.get('id', None)
1242     if node_id:
1243         return node_id
1244
1245     # TODO: this is moot if nothing links to it, we could also consider to omit
1246     # the <a name="$id"></a> tag.
1247     logging.info('%d: No "id" attribute on "%s", generating one',
1248                  xml.sourceline, xml.tag)
1249     ix = get_id_path(node)
1250     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1251     return 'id-' + '.'.join(ix)
1252
1253
1254 def convert_chunk_with_toc(ctx, div_class, title_tag):
1255     node = ctx['node']
1256     result = [
1257         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1258         generate_basic_nav(ctx),
1259         '<div class="%s">' % div_class,
1260     ]
1261     if node.title:
1262         result.append("""
1263 <div class="titlepage">
1264 <%s class="title"><a name="%s"></a>%s</%s>
1265 </div>""" % (
1266             title_tag, get_id(node), node.title, title_tag))
1267
1268     toc = generate_toc(ctx, node)
1269     if toc:
1270         # TODO: not all docbook page types use this extra heading
1271         result.append("""<p><b>Table of Contents</b></p>
1272     <div class="toc">
1273       <dl class="toc">
1274     """)
1275         result.extend(toc)
1276         result.append("""</dl>
1277     </div>
1278     """)
1279     convert_inner(ctx, node.xml, result)
1280     result.extend(generate_footer(ctx))
1281     result.append("""</div>
1282 </body>
1283 </html>""")
1284     return result
1285
1286
1287 # docbook chunks
1288
1289
1290 def convert_book(ctx):
1291     node = ctx['node']
1292     result = [
1293         HTML_HEADER % (node.title, generate_head_links(ctx)),
1294         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1295     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1296 </table>
1297 <div class="book">
1298 """ % node.title
1299     ]
1300     bookinfo = node.xml.findall('bookinfo')[0]
1301     result.extend(convert_bookinfo(ctx, bookinfo))
1302     result.append("""<div class="toc">
1303   <dl class="toc">
1304 """)
1305     result.extend(generate_toc(ctx, node.root))
1306     result.append("""</dl>
1307 </div>
1308 """)
1309     result.extend(generate_footer(ctx))
1310     result.append("""</div>
1311 </body>
1312 </html>""")
1313     return result
1314
1315
1316 def convert_chapter(ctx):
1317     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1318
1319
1320 def convert_glossary(ctx):
1321     node = ctx['node']
1322     glossdivs = node.xml.findall('glossdiv')
1323
1324     result = [
1325         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1326         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1327         """<div class="glossary">
1328 <div class="titlepage"><h%1d class="title">
1329 <a name="%s"></a>%s</h%1d>
1330 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1331     ]
1332     for i in glossdivs:
1333         result.extend(convert_glossdiv(ctx, i))
1334     result.extend(generate_footer(ctx))
1335     result.append("""</div>
1336 </body>
1337 </html>""")
1338     return result
1339
1340
1341 def convert_index(ctx):
1342     node = ctx['node']
1343     # Get all indexdivs under indexdiv
1344     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1345
1346     result = [
1347         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1348         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1349         """<div class="index">
1350 <div class="titlepage"><h%1d class="title">
1351 <a name="%s"></a>%s</h%1d>
1352 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1353     ]
1354     for i in indexdivs:
1355         result.extend(convert_indexdiv(ctx, i))
1356     result.extend(generate_footer(ctx))
1357     result.append("""</div>
1358 </body>
1359 </html>""")
1360     return result
1361
1362
1363 def convert_part(ctx):
1364     return convert_chunk_with_toc(ctx, 'part', 'h1')
1365
1366
1367 def convert_preface(ctx):
1368     node = ctx['node']
1369     result = [
1370         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1371         generate_basic_nav(ctx),
1372         '<div class="preface">'
1373     ]
1374     if node.title:
1375         result.append("""
1376 <div class="titlepage">
1377 <h2 class="title"><a name="%s"></a>%s</h2>
1378 </div>""" % (get_id(node), node.title))
1379     convert_inner(ctx, node.xml, result)
1380     result.extend(generate_footer(ctx))
1381     result.append("""</div>
1382 </body>
1383 </html>""")
1384     return result
1385
1386
1387 def convert_reference(ctx):
1388     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1389
1390
1391 def convert_refentry(ctx):
1392     node = ctx['node']
1393     node_id = get_id(node)
1394     refsect1s = node.xml.findall('refsect1')
1395
1396     gallery = ''
1397     refmeta = node.xml.find('refmeta')
1398     if refmeta is not None:
1399         refmiscinfo = refmeta.find('refmiscinfo')
1400         if refmiscinfo is not None:
1401             inlinegraphic = refmiscinfo.find('inlinegraphic')
1402             if inlinegraphic is not None:
1403                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1404
1405     result = [
1406         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1407     ]
1408     generate_refentry_nav(ctx, refsect1s, result)
1409     result.append("""
1410 <div class="refentry">
1411 <a name="%s"></a>
1412 <div class="refnamediv">
1413   <table width="100%%"><tr>
1414     <td valign="top">
1415       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1416       <p>%s — %s</p>
1417     </td>
1418     <td class="gallery_image" valign="top" align="right">%s</td>
1419   </tr></table>
1420 </div>
1421 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1422
1423     for s in refsect1s:
1424         result.extend(convert_refsect1(ctx, s))
1425     result.extend(generate_footer(ctx))
1426     result.append("""</div>
1427 </body>
1428 </html>""")
1429     return result
1430
1431
1432 def convert_sect1(ctx):
1433     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1434
1435
1436 # TODO(ensonic): turn into class with converters as functions and ctx as self
1437 convert_chunks = {
1438     'book': convert_book,
1439     'chapter': convert_chapter,
1440     'glossary': convert_glossary,
1441     'index': convert_index,
1442     'part': convert_part,
1443     'preface': convert_preface,
1444     'reference': convert_reference,
1445     'refentry': convert_refentry,
1446     'sect1': convert_sect1,
1447 }
1448
1449
1450 def generate_nav_nodes(files, node):
1451     nav = {
1452         'nav_home': node.root,
1453     }
1454     # nav params: up, prev, next
1455     if node.parent:
1456         nav['nav_up'] = node.parent
1457     ix = files.index(node)
1458     if ix > 0:
1459         nav['nav_prev'] = files[ix - 1]
1460     if ix < len(files) - 1:
1461         nav['nav_next'] = files[ix + 1]
1462     return nav
1463
1464
1465 def convert(out_dir, module, files, node):
1466     """Convert the docbook chunks to a html file.
1467
1468     Args:
1469       out_dir: already created output dir
1470       files: list of nodes in the tree in pre-order
1471       node: current tree node
1472     """
1473
1474     logging.info('Writing: %s', node.filename)
1475     with open(os.path.join(out_dir, node.filename), 'wt',
1476               newline='\n', encoding='utf-8') as html:
1477         ctx = {
1478             'module': module,
1479             'files': files,
1480             'node': node,
1481         }
1482         ctx.update(generate_nav_nodes(files, node))
1483
1484         if node.name in convert_chunks:
1485             for line in convert_chunks[node.name](ctx):
1486                 html.write(line)
1487         else:
1488             logging.warning('Add converter/template for "%s"', node.name)
1489
1490
1491 def create_devhelp2_toc(node):
1492     result = []
1493     for c in node.children:
1494         if c.children:
1495             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1496             result.extend(create_devhelp2_toc(c))
1497             result.append('</sub>\n')
1498         else:
1499             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1500     return result
1501
1502
1503 def create_devhelp2_condition_attribs(node):
1504     if 'condition' in node.attrib:
1505         # condition -> since, deprecated, ... (separated with '|')
1506         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1507         keywords = []
1508         for c in cond:
1509             if ':' in c:
1510                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1511             else:
1512                 # deprecated can have no description
1513                 keywords.append('{}="{}"'.format(c, ''))
1514         return ' ' + ' '.join(keywords)
1515     else:
1516         return ''
1517
1518
1519 def create_devhelp2_refsect2_keyword(node, base_link):
1520     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1521         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1522         create_devhelp2_condition_attribs(node))
1523
1524
1525 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1526     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1527         node.attrib['role'], title, base_link + name,
1528         create_devhelp2_condition_attribs(node))
1529
1530
1531 def create_devhelp2(out_dir, module, xml, files):
1532     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1533               newline='\n', encoding='utf-8') as idx:
1534         bookinfo_nodes = xml.xpath('/book/bookinfo')
1535         title = ''
1536         if bookinfo_nodes is not None:
1537             bookinfo = bookinfo_nodes[0]
1538             title = bookinfo.xpath('./title/text()')[0]
1539             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1540             # TODO: support author too (see devhelp2.xsl)
1541         # TODO: fixxref uses '--src-lang' to set the language
1542         result = [
1543             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1544 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1545   <chapters>
1546 """ % (title, module, online_url)
1547         ]
1548         # toc
1549         result.extend(create_devhelp2_toc(files[0].root))
1550         result.append("""  </chapters>
1551   <functions>
1552 """)
1553         # keywords from all refsect2 and refsect3
1554         refsect2 = etree.XPath('//refsect2[@role]')
1555         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1556         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1557         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1558         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1559         for node in files:
1560             base_link = node.filename + '#'
1561             refsect2_nodes = refsect2(node.xml)
1562             for refsect2_node in refsect2_nodes:
1563                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1564                 refsect3_nodes = refsect3_enum(refsect2_node)
1565                 for refsect3_node in refsect3_nodes:
1566                     details_node = refsect3_enum_details(refsect3_node)[0]
1567                     name = details_node.attrib['id']
1568                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1569                 refsect3_nodes = refsect3_struct(refsect2_node)
1570                 for refsect3_node in refsect3_nodes:
1571                     details_node = refsect3_struct_details(refsect3_node)[0]
1572                     name = details_node.attrib['id']
1573                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1574
1575         result.append("""  </functions>
1576 </book>
1577 """)
1578         for line in result:
1579             idx.write(line)
1580
1581
1582 def get_dirs(uninstalled):
1583     if uninstalled:
1584         # this does not work from buiddir!=srcdir
1585         gtkdocdir = os.path.split(sys.argv[0])[0]
1586         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1587             # try 'srcdir' (set from makefiles) too
1588             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1589                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1590         styledir = gtkdocdir + '/style'
1591     else:
1592         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1593         styledir = gtkdocdir
1594     return (gtkdocdir, styledir)
1595
1596
1597 def main(module, index_file, out_dir, uninstalled):
1598
1599     # == Loading phase ==
1600     # the next 3 steps could be done in paralel
1601
1602     # 1) load the docuemnt
1603     _t = timer()
1604     # does not seem to be faster
1605     # parser = etree.XMLParser(collect_ids=False)
1606     # tree = etree.parse(index_file, parser)
1607     tree = etree.parse(index_file)
1608     tree.xinclude()
1609     logging.warning("1: %7.3lf: load doc", timer() - _t)
1610
1611     # 2) copy datafiles
1612     _t = timer()
1613     # TODO: handle additional images
1614     (gtkdocdir, styledir) = get_dirs(uninstalled)
1615     # copy navigation images and stylesheets to html directory ...
1616     css_file = os.path.join(styledir, 'style.css')
1617     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1618         shutil.copy(f, out_dir)
1619     css_file = os.path.join(out_dir, 'style.css')
1620     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1621         css.write(HTML_FORMATTER.get_style_defs())
1622     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1623
1624     # 3) load xref targets
1625     _t = timer()
1626     # TODO: migrate options from fixxref
1627     # TODO: ideally explicity specify the files we need, this will save us the
1628     # globbing and we'll load less files.
1629     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1630     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1631
1632     # == Processing phase ==
1633
1634     # 4) recursively walk the tree and chunk it into a python tree so that we
1635     #    can generate navigation and link tags.
1636     _t = timer()
1637     files = chunk(tree.getroot(), module)
1638     files = [f for f in PreOrderIter(files) if f.anchor is None]
1639     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1640
1641     # 5) extract tables:
1642     _t = timer()
1643     # TODO: can be done in parallel
1644     # - find all 'id' attribs and add them to the link map
1645     # - .. get their titles and store them into the titles map
1646     add_id_links_and_titles(files, fixxref.Links)
1647     # - build glossary dict
1648     build_glossary(files)
1649     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1650
1651     # == Output phase ==
1652     # the next two step could be done in parllel
1653
1654     # 6) create a xxx.devhelp2 file
1655     _t = timer()
1656     create_devhelp2(out_dir, module, tree.getroot(), files)
1657     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1658
1659     # 7) iterate the tree and output files
1660     _t = timer()
1661     # TODO: can be done in parallel, figure out why this is not faster
1662     # from multiprocessing.pool import Pool
1663     # with Pool(4) as p:
1664     #     p.apply_async(convert, args=(out_dir, module, files))
1665     # from multiprocessing.pool import ThreadPool
1666     # with ThreadPool(4) as p:
1667     #     p.apply_async(convert, args=(out_dir, module, files))
1668     for node in files:
1669         convert(out_dir, module, files, node)
1670     logging.warning("7: %7.3lf: create html", timer() - _t)
1671
1672
1673 def run(options):
1674     logging.info('options: %s', str(options.__dict__))
1675     module = options.args[0]
1676     document = options.args[1]
1677
1678     # TODO: rename to 'html' later on
1679     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1680     #   outputs into the working directory
1681     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1682     try:
1683         os.mkdir(out_dir)
1684     except OSError as e:
1685         if e.errno != errno.EEXIST:
1686             raise
1687
1688     sys.exit(main(module, document, out_dir, options.uninstalled))