gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Prototype for builtin docbook processing
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to htnml using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 TODO:
  33 - more chunk converters
  34 - check each docbook tag if it can contain #PCDATA, if not don't check for
  35   xml.text
  36
  37 OPTIONAL:
  38 - minify html: https://pypi.python.org/pypi/htmlmin/
  39
  40 Requirements:
  41 sudo pip3 install anytree lxml pygments
  42
  43 Example invocation:
  44 cd tests/bugs/docs/
  45 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  46 xdg-open db2html/index.html
  47 meld html db2html
  48
  49 Benchmarking:
  50 cd tests/bugs/docs/;
  51 rm html-build.stamp; time make html-build.stamp
  52 """
  53
  54 import argparse
  55 import errno
  56 import logging
  57 import os
  58 import shutil
  59 import sys
  60
  61 from anytree import Node, PreOrderIter
  62 from copy import deepcopy
  63 from glob import glob
  64 from lxml import etree
  65 from pygments import highlight
  66 from pygments.lexers import CLexer
  67 from pygments.formatters import HtmlFormatter
  68
  69 from . import config, fixxref
  70
  71 # pygments setup
  72 # TODO: maybe use get_lexer_for_filename()
  73 LEXER = CLexer()
  74 HTML_FORMATTER = HtmlFormatter(nowrap=False, linenos='table')
  75
  76 # http://www.sagehill.net/docbookxsl/Chunking.html
  77 CHUNK_TAGS = [
  78     'appendix',
  79     'article',
  80     'bibliography',  # in article or book
  81     'book',
  82     'chapter',
  83     'colophon',
  84     'glossary',      # in article or book
  85     'index',         # in article or book
  86     'part',
  87     'preface',
  88     'refentry',
  89     'reference',
  90     'sect1',         # except first
  91     'section',       # if equivalent to sect1
  92     'set',
  93     'setindex',
  94 ]
  95
  96
  97 class ChunkParams(object):
  98     def __init__(self, prefix, parent=None):
  99         self.prefix = prefix
 100         self.parent = None
 101         self.count = 0
 102
 103
 104 # TODO: look up the abbrevs and hierarchy for other tags
 105 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 106 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 107 CHUNK_PARAMS = {
 108     'appendix': ChunkParams('app', 'book'),
 109     'book': ChunkParams('bk'),
 110     'chapter': ChunkParams('ch', 'book'),
 111     'index': ChunkParams('ix', 'book'),
 112     'part': ChunkParams('pt', 'book'),
 113     'sect1': ChunkParams('s', 'chapter'),
 114     'section': ChunkParams('s', 'chapter'),
 115 }
 116
 117 TITLE_XPATHS = {
 118     '_': (etree.XPath('./title'), None),
 119     'book': (etree.XPath('./bookinfo/title'), None),
 120     'refentry': (
 121         etree.XPath('./refmeta/refentrytitle'),
 122         etree.XPath('./refnamediv/refpurpose')
 123     ),
 124 }
 125
 126 ID_XPATH = etree.XPath('//@id')
 127
 128
 129 def gen_chunk_name(node):
 130     if 'id' in node.attrib:
 131         return node.attrib['id']
 132
 133     tag = node.tag
 134     if tag not in CHUNK_PARAMS:
 135         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 136         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 137
 138     naming = CHUNK_PARAMS[tag]
 139     naming.count += 1
 140     name = ('%s%02d' % (naming.prefix, naming.count))
 141     # handle parents to make names of nested tags unique
 142     # TODO: we only need to prepend the parent if there are > 1 of them in the
 143     #       xml
 144     # while naming.parent:
 145     #     parent = naming.parent
 146     #     if parent not in CHUNK_PARAMS:
 147     #         break;
 148     #     naming = CHUNK_PARAMS[parent]
 149     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 150     return name
 151
 152
 153 def get_chunk_titles(node):
 154     tag = node.tag
 155     if tag not in TITLE_XPATHS:
 156         # Use defaults
 157         (title, subtitle) = TITLE_XPATHS['_']
 158     else:
 159         (title, subtitle) = TITLE_XPATHS[tag]
 160
 161     xml = title(node)[0]
 162     result = {
 163         'title': xml.text
 164     }
 165     if xml.tag != 'title':
 166         result['title_tag'] = xml.tag
 167     else:
 168         result['title_tag'] = tag
 169
 170     if subtitle:
 171         xml = subtitle(node)[0]
 172         result['subtitle'] = xml.text
 173         result['subtitle_tag'] = xml.tag
 174     else:
 175         result['subtitle'] = None
 176         result['subtitle_tag'] = None
 177     return result
 178
 179
 180 def chunk(xml_node, parent=None):
 181     """Chunk the tree.
 182
 183     The first time, we're called with parent=None and in that case we return
 184     the new_node as the root of the tree
 185     """
 186     if xml_node.tag in CHUNK_TAGS:
 187         if parent:
 188             # remove the xml-node from the parent
 189             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 190             xml_node.getparent().remove(xml_node)
 191             xml_node = sub_tree
 192
 193         title_args = get_chunk_titles(xml_node)
 194         chunk_name = gen_chunk_name(xml_node)
 195         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 196                       filename=chunk_name + '.html', **title_args)
 197
 198     for child in xml_node:
 199         chunk(child, parent)
 200
 201     return parent
 202
 203
 204 def add_id_links(files, links):
 205     for node in files:
 206         chunk_name = node.filename[:-5]
 207         chunk_base = node.filename + '#'
 208         for attr in ID_XPATH(node.xml):
 209             if attr == chunk_name:
 210                 links[attr] = node.filename
 211             else:
 212                 links[attr] = chunk_base + attr
 213
 214
 215 # conversion helpers
 216
 217
 218 def convert_inner(ctx, xml, result):
 219     for child in xml:
 220         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 221
 222
 223 def convert_ignore(ctx, xml):
 224     result = []
 225     convert_inner(ctx, xml, result)
 226     return result
 227
 228
 229 def convert_skip(ctx, xml):
 230     return ['']
 231
 232
 233 missing_tags = {}
 234
 235
 236 def convert__unknown(ctx, xml):
 237     # don't recurse on subchunks
 238     if xml.tag in CHUNK_TAGS:
 239         return []
 240     # warn only once
 241     if xml.tag not in missing_tags:
 242         logging.warning('Add tag converter for "%s"', xml.tag)
 243         missing_tags[xml.tag] = True
 244     result = ['<!-- ' + xml.tag + '-->\n']
 245     convert_inner(ctx, xml, result)
 246     result.append('<!-- /' + xml.tag + '-->\n')
 247     return result
 248
 249
 250 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
 251     result = ['<div class="%s">\n' % xml.tag]
 252     title = xml.find('title')
 253     if title is not None:
 254         if 'id' in xml.attrib:
 255             result.append('<a name="%s"></a>' % xml.attrib['id'])
 256         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 257         xml.remove(title)
 258     if xml.text:
 259         result.append(xml.text)
 260     inner_func(ctx, xml, result)
 261     result.append('</div>')
 262     if xml.tail:
 263         result.append(xml.tail)
 264     return result
 265
 266
 267 def xml_get_title(xml):
 268     title = xml.find('title')
 269     if title is not None:
 270         return title.text
 271     else:
 272         # TODO(ensonic): any way to get the file (inlcudes) too?
 273         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 274         return ''
 275
 276
 277 # docbook tags
 278
 279 def convert_bookinfo(ctx, xml):
 280     result = ['<div class="titlepage">']
 281     for releaseinfo in xml.findall('releaseinfo'):
 282         result.extend(convert_para(ctx, releaseinfo))
 283     result.append("""<hr>
 284 </div>""")
 285     if xml.tail:
 286         result.append(xml.tail)
 287     return result
 288
 289
 290 def convert_colspec(ctx, xml):
 291     result = ['<col']
 292     a = xml.attrib
 293     if 'colname' in a:
 294         result.append(' class="%s"' % a['colname'])
 295     if 'colwidth' in a:
 296         result.append(' width="%s"' % a['colwidth'])
 297     result.append('>\n')
 298     # is in tgroup and there can be no 'text'
 299     return result
 300
 301
 302 def convert_div(ctx, xml):
 303     result = ['<div class="%s">\n' % xml.tag]
 304     if xml.text:
 305         result.append(xml.text)
 306     convert_inner(ctx, xml, result)
 307     result.append('</div>')
 308     if xml.tail:
 309         result.append(xml.tail)
 310     return result
 311
 312
 313 def convert_em_class(ctx, xml):
 314     result = ['<em class="%s"><code>' % xml.tag]
 315     if xml.text:
 316         result.append(xml.text)
 317     convert_inner(ctx, xml, result)
 318     result.append('</code></em>')
 319     if xml.tail:
 320         result.append(xml.tail)
 321     return result
 322
 323
 324 def convert_entry(ctx, xml):
 325     result = ['<td']
 326     if 'role' in xml.attrib:
 327         result.append(' class="%s">' % xml.attrib['role'])
 328     else:
 329         result.append('>')
 330     if xml.text:
 331         result.append(xml.text)
 332     convert_inner(ctx, xml, result)
 333     result.append('</td>')
 334     if xml.tail:
 335         result.append(xml.tail)
 336     return result
 337
 338
 339 def convert_indexdiv(ctx, xml):
 340     title_tag = xml.find('title')
 341     title = title_tag.text
 342     xml.remove(title_tag)
 343     result = [
 344         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 345     ]
 346     convert_inner(ctx, xml, result)
 347     return result
 348
 349
 350 def convert_informaltable(ctx, xml):
 351     result = ['<div class="informaltable"><table class="informaltable"']
 352     a = xml.attrib
 353     if 'pgwide' in a and a['pgwide'] == '1':
 354         result.append(' width="100%"')
 355     if 'frame' in a and a['frame'] == 'none':
 356         result.append(' border="0"')
 357     result.append('>\n')
 358     convert_inner(ctx, xml, result)
 359     result.append('</table></div>')
 360     if xml.tail:
 361         result.append(xml.tail)
 362     return result
 363
 364
 365 def convert_itemizedlist(ctx, xml):
 366     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 367     convert_inner(ctx, xml, result)
 368     result.append('</ul></div>')
 369     if xml.tail:
 370         result.append(xml.tail)
 371     return result
 372
 373
 374 def convert_link(ctx, xml):
 375     linkend = xml.attrib['linkend']
 376     if linkend in fixxref.NoLinks:
 377         linkend = None
 378     result = []
 379     if linkend:
 380         link_text = []
 381         convert_inner(ctx, xml, link_text)
 382         if xml.text:
 383             link_text.append(xml.text)
 384         # TODO: fixxref does some weird checks in xml.text
 385         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 386     if xml.tail:
 387         result.append(xml.tail)
 388     return result
 389
 390
 391 def convert_listitem(ctx, xml):
 392     result = ['<li class="listitem">']
 393     convert_inner(ctx, xml, result)
 394     result.append('</li>')
 395     # is in itemizedlist and there can be no 'text'
 396     return result
 397
 398
 399 def convert_literal(ctx, xml):
 400     result = ['<code class="%s">' % xml.tag]
 401     if xml.text:
 402         result.append(xml.text)
 403     convert_inner(ctx, xml, result)
 404     result.append('</code>')
 405     if xml.tail:
 406         result.append(xml.tail)
 407     return result
 408
 409
 410 def convert_para(ctx, xml):
 411     result = ['<p>']
 412     if xml.tag != 'para':
 413         result = ['<p class="%s">' % xml.tag]
 414     if xml.text:
 415         result.append(xml.text)
 416     convert_inner(ctx, xml, result)
 417     result.append('</p>')
 418     if xml.tail:
 419         result.append(xml.tail)
 420     return result
 421
 422
 423 def convert_phrase(ctx, xml):
 424     result = ['<span']
 425     if 'role' in xml.attrib:
 426         result.append(' class="%s">' % xml.attrib['role'])
 427     else:
 428         result.append('>')
 429     if xml.text:
 430         result.append(xml.text)
 431     convert_inner(ctx, xml, result)
 432     result.append('</span>')
 433     if xml.tail:
 434         result.append(xml.tail)
 435     return result
 436
 437
 438 def convert_primaryie(ctx, xml):
 439     result = ['<dt>\n']
 440     convert_inner(ctx, xml, result)
 441     result.append('\n</dt>\n<dd></dd>\n')
 442     return result
 443
 444
 445 def convert_programlisting(ctx, xml):
 446     result = ['<pre class="programlisting">']
 447
 448     # TODO: only do this if parent is 'informalexample'?
 449     # Right now we also get programlisting node that are already marked.
 450     # problem: there is no xml.parent :/
 451     # 1) we could pass an option parent node when traversion the tree
 452     # 2) we could set some attributes on this node in mkdb to indicate wheter
 453     #    we'd like to colorize it (e.g. role="example"
 454     # 3) we could also skip doing markup in mkdb and apply it entierly here
 455     # 4) we could check for programlisting-children in informalexample
 456     #
 457     # we're trying 2) below
 458     if xml.attrib.get('role', '') == 'example':
 459         if xml.text:
 460             result.append(highlight(xml.text, LEXER, HTML_FORMATTER))
 461     else:
 462         if xml.text:
 463             result.append(xml.text)
 464         convert_inner(ctx, xml, result)
 465     result.append('</pre>')
 466     if xml.tail:
 467         result.append(xml.tail)
 468     return result
 469
 470
 471 def convert_refsect1(ctx, xml):
 472     # Add a divider between two consequitive refsect2
 473     def convert_inner(ctx, xml, result):
 474         prev = None
 475         for child in xml:
 476             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 477                 result.append('<hr>\n')
 478             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 479             prev = child
 480     return convert_refsect(ctx, xml, 'h2', convert_inner)
 481
 482
 483 def convert_refsect2(ctx, xml):
 484     return convert_refsect(ctx, xml, 'h3')
 485
 486
 487 def convert_refsect3(ctx, xml):
 488     return convert_refsect(ctx, xml, 'h4')
 489
 490
 491 def convert_row(ctx, xml):
 492     result = ['<tr>\n']
 493     convert_inner(ctx, xml, result)
 494     result.append('</tr>\n')
 495     return result
 496
 497
 498 def convert_span(ctx, xml):
 499     result = ['<span class="%s">' % xml.tag]
 500     if xml.text:
 501         result.append(xml.text)
 502     convert_inner(ctx, xml, result)
 503     result.append('</span>')
 504     if xml.tail:
 505         result.append(xml.tail)
 506     return result
 507
 508
 509 def convert_tbody(ctx, xml):
 510     result = ['<tbody>']
 511     convert_inner(ctx, xml, result)
 512     result.append('</tbody>')
 513     # is in tgroup and there can be no 'text'
 514     return result
 515
 516
 517 def convert_tgroup(ctx, xml):
 518     # tgroup does not expand to anything, but the nested colspecs need to
 519     # be put into a colgroup
 520     cols = xml.findall('colspec')
 521     result = []
 522     if cols:
 523         result.append('<colgroup>\n')
 524         for col in cols:
 525             result.extend(convert_colspec(ctx, col))
 526             xml.remove(col)
 527         result.append('</colgroup>\n')
 528     convert_inner(ctx, xml, result)
 529     # is in informaltable and there can be no 'text'
 530     return result
 531
 532
 533 def convert_ulink(ctx, xml):
 534     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 535     if xml.tail:
 536         result.append(xml.tail)
 537     return result
 538
 539
 540 # TODO(ensonic): turn into class with converters as functions and ctx as self
 541 convert_tags = {
 542     'bookinfo': convert_bookinfo,
 543     'colspec': convert_colspec,
 544     'entry': convert_entry,
 545     'function': convert_span,
 546     'indexdiv': convert_indexdiv,
 547     'indexentry': convert_ignore,
 548     'indexterm': convert_skip,
 549     'informalexample': convert_div,
 550     'informaltable': convert_informaltable,
 551     'itemizedlist': convert_itemizedlist,
 552     'link': convert_link,
 553     'listitem': convert_listitem,
 554     'literal': convert_literal,
 555     'para': convert_para,
 556     'parameter': convert_em_class,
 557     'phrase': convert_phrase,
 558     'primaryie': convert_primaryie,
 559     'programlisting': convert_programlisting,
 560     'releaseinfo': convert_para,
 561     'refsect1': convert_refsect1,
 562     'refsect2': convert_refsect2,
 563     'refsect3': convert_refsect3,
 564     'returnvalue': convert_span,
 565     'row': convert_row,
 566     'structfield': convert_em_class,
 567     'tbody': convert_tbody,
 568     'tgroup': convert_tgroup,
 569     'type': convert_span,
 570     'ulink': convert_ulink,
 571     'warning': convert_div,
 572 }
 573
 574 # conversion helpers
 575
 576 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 577 <html>
 578 <head>
 579 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 580 <title>%s</title>
 581 %s<link rel="stylesheet" href="style.css" type="text/css">
 582 </head>
 583 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 584 """
 585
 586
 587 def generate_head_links(ctx):
 588     n = ctx['nav_home']
 589     result = [
 590         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 591     ]
 592     if 'nav_up' in ctx:
 593         n = ctx['nav_up']
 594         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 595     if 'nav_prev' in ctx:
 596         n = ctx['nav_prev']
 597         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 598     if 'nav_next' in ctx:
 599         n = ctx['nav_next']
 600         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 601     return ''.join(result)
 602
 603
 604 def generate_nav_links(ctx):
 605     n = ctx['nav_home']
 606     result = [
 607         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 608     ]
 609     if 'nav_up' in ctx:
 610         n = ctx['nav_up']
 611         result.append(
 612             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 613     else:
 614         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 615     if 'nav_prev' in ctx:
 616         n = ctx['nav_prev']
 617         result.append(
 618             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
 619     else:
 620         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
 621     if 'nav_next' in ctx:
 622         n = ctx['nav_next']
 623         result.append(
 624             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
 625     else:
 626         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
 627
 628     return ''.join(result)
 629
 630
 631 def generate_toc(ctx, node):
 632     result = []
 633     for c in node.children:
 634         # TODO: urlencode the filename: urllib.parse.quote_plus()
 635         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
 636             c.title_tag, c.filename, c.title))
 637         if c.subtitle:
 638             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
 639         result.append('</dt>\n')
 640         if c.children:
 641             result.append('<dd><dl>')
 642             result.extend(generate_toc(ctx, c))
 643             result.append('</dl></dd>')
 644     return result
 645
 646
 647 def generate_basic_nav(ctx):
 648     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 649   <tr valign="middle">
 650     <td width="100%%" align="left" class="shortcuts"></td>
 651     %s
 652   </tr>
 653 </table>
 654     """ % generate_nav_links(ctx)
 655
 656
 657 def generate_index_nav(ctx, indexdivs):
 658     ix_nav = []
 659     for s in indexdivs:
 660         title = xml_get_title(s)
 661         ix_nav.append('<a class="shortcut" href="#idx%s">%s</a>' % (title, title))
 662
 663     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 664   <tr valign="middle">
 665     <td width="100%%" align="left" class="shortcuts">
 666       <span id="nav_index">
 667         %s
 668       </span>
 669     </td>
 670     %s
 671   </tr>
 672 </table>
 673     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
 674
 675
 676 def generate_refentry_nav(ctx, refsect1s, result):
 677     result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 678   <tr valign="middle">
 679     <td width="100%%" align="left" class="shortcuts">
 680       <a href="#" class="shortcut">Top</a>""")
 681
 682     for s in refsect1s:
 683         # don't list TOC sections (role="xxx_proto")
 684         if s.attrib.get('role', '').endswith("_proto"):
 685             continue
 686
 687         title = xml_get_title(s)
 688         result.append("""
 689           <span id="nav_description">
 690             <span class="dim">|</span>
 691             <a href="#%s" class="shortcut">%s</a>
 692           </span>""" % (s.attrib['id'], title))
 693     result.append("""
 694     </td>
 695     %s
 696   </tr>
 697 </table>
 698 """ % generate_nav_links(ctx))
 699
 700
 701 def get_id(node):
 702     xml = node.xml
 703     node_id = xml.attrib.get('id', None)
 704     if node_id:
 705         return node_id
 706
 707     logging.warning('%d: No "id" attribute on "%s"', xml.sourceline, xml.tag)
 708     ix = []
 709     # Generate the 'id'. We need to walk up the xml-tree and check the positions
 710     # for each sibling.
 711     parent = xml.getparent()
 712     while parent is not None:
 713         children = parent.getchildren()
 714         ix.insert(0, str(children.index(xml) + 1))
 715         xml = parent
 716         parent = xml.getparent()
 717     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
 718     return 'id-1.' + '.'.join(ix)
 719
 720
 721 # docbook chunks
 722
 723
 724 def convert_book(ctx):
 725     node = ctx['node']
 726     result = [
 727         HTML_HEADER % (node.title, generate_head_links(ctx)),
 728         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
 729     <tr><th valign="middle"><p class="title">%s</p></th></tr>
 730 </table>
 731 <div class="book">
 732 """ % node.title
 733     ]
 734     bookinfo = node.xml.findall('bookinfo')[0]
 735     result.extend(convert_bookinfo(ctx, bookinfo))
 736     result.append("""<div class="toc">
 737   <dl class="toc">
 738 """)
 739     result.extend(generate_toc(ctx, node.root))
 740     result.append("""</dl>
 741 </div>
 742 </div>
 743 </body>
 744 </html>""")
 745     return result
 746
 747
 748 def convert_chapter(ctx):
 749     node = ctx['node']
 750     result = [
 751         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 752         generate_basic_nav(ctx),
 753         '<div class="chapter">',
 754     ]
 755     title = node.xml.find('title')
 756     if title is not None:
 757         result.append('<div class="titlepage"><h1 class="title"><a name="%s"></a>%s</h1></div>' % (
 758             get_id(node), title.text))
 759         node.xml.remove(title)
 760     convert_inner(ctx, node.xml, result)
 761     result.append("""<div class="toc">
 762   <dl class="toc">
 763 """)
 764     result.extend(generate_toc(ctx, node))
 765     result.append("""</dl>
 766 </div>
 767 </div>
 768 </body>
 769 </html>""")
 770     return result
 771
 772
 773 def convert_index(ctx):
 774     node = ctx['node']
 775     node_id = get_id(node)
 776     # Get all indexdivs under indexdiv
 777     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
 778
 779     result = [
 780         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 781         generate_index_nav(ctx, indexdivs),
 782         """<div class="index">
 783 <div class="titlepage"><h1 class="title">
 784 <a name="%s"></a>%s</h1>
 785 </div>""" % (node_id, node.title)
 786     ]
 787     for i in indexdivs:
 788         result.extend(convert_indexdiv(ctx, i))
 789     result.append("""</div>
 790 </body>
 791 </html>""")
 792     return result
 793
 794
 795 def convert_refentry(ctx):
 796     node = ctx['node']
 797     node_id = get_id(node)
 798     refsect1s = node.xml.findall('refsect1')
 799
 800     result = [
 801         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
 802     ]
 803     generate_refentry_nav(ctx, refsect1s, result)
 804     result.append("""
 805 <div class="refentry">
 806 <a name="%s"></a>
 807 <div class="refnamediv">
 808   <table width="100%%"><tr>
 809     <td valign="top">
 810       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
 811       <p>%s — module for gtk-doc unit test</p>
 812     </td>
 813     <td class="gallery_image" valign="top" align="right"></td>
 814   </tr></table>
 815 </div>
 816 """ % (node_id, node_id, node.title, node.title))
 817
 818     for s in refsect1s:
 819         result.extend(convert_refsect1(ctx, s))
 820     result.append("""</div>
 821 </body>
 822 </html>""")
 823     return result
 824
 825
 826 # TODO(ensonic): turn into class with converters as functions and ctx as self
 827 convert_chunks = {
 828     'book': convert_book,
 829     'chapter': convert_chapter,
 830     'index': convert_index,
 831     'refentry': convert_refentry,
 832 }
 833
 834
 835 def generate_nav_nodes(files, node):
 836     nav = {
 837         'nav_home': node.root,
 838     }
 839     # nav params: up, prev, next
 840     if node.parent:
 841         nav['nav_up'] = node.parent
 842     ix = files.index(node)
 843     if ix > 0:
 844         nav['nav_prev'] = files[ix - 1]
 845     if ix < len(files) - 1:
 846         nav['nav_next'] = files[ix + 1]
 847     return nav
 848
 849
 850 def convert(out_dir, module, files, node):
 851     """Convert the docbook chunks to a html file.
 852
 853     Args:
 854       out_dir: already created output dir
 855       files: list of nodes in the tree in pre-order
 856       node: current tree node
 857     """
 858
 859     logging.info('Writing: %s', node.filename)
 860     with open(os.path.join(out_dir, node.filename), 'wt') as html:
 861         ctx = {
 862             'module': module,
 863             'files': files,
 864             'node': node,
 865         }
 866         ctx.update(generate_nav_nodes(files, node))
 867
 868         if node.name in convert_chunks:
 869             for line in convert_chunks[node.name](ctx):
 870                 html.write(line)
 871         else:
 872             logging.warning('Add converter/template for "%s"', node.name)
 873
 874
 875 def create_devhelp2_toc(node):
 876     result = []
 877     for c in node.children:
 878         if c.children:
 879             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
 880             result.extend(create_devhelp2_toc(c))
 881             result.append('</sub>\n')
 882         else:
 883             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
 884     return result
 885
 886
 887 def create_devhelp2_condition_attribs(node):
 888     if 'condition' in node.attrib:
 889         # condition -> since, deprecated, ... (separated with '|')
 890         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
 891         return' ' + ' '.join(['%s="%s"' % tuple(c.split(':', 1)) for c in cond])
 892     else:
 893         return ''
 894
 895
 896 def create_devhelp2_refsect2_keyword(node, base_link):
 897     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
 898         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
 899         create_devhelp2_condition_attribs(node))
 900
 901
 902 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
 903     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
 904         node.attrib['role'], title, base_link + name,
 905         create_devhelp2_condition_attribs(node))
 906
 907
 908 def create_devhelp2(out_dir, module, xml, files):
 909     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt') as idx:
 910         bookinfo_nodes = xml.xpath('/book/bookinfo')
 911         title = ''
 912         if bookinfo_nodes is not None:
 913             bookinfo = bookinfo_nodes[0]
 914             title = bookinfo.xpath('./title/text()')[0]
 915             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
 916             # TODO: support author too (see devhelp2.xsl)
 917         # TODO: fixxref uses '--src-lang' to set the language
 918         result = [
 919             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
 920 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
 921   <chapters>
 922 """ % (title, module, online_url)
 923         ]
 924         # toc
 925         result.extend(create_devhelp2_toc(files[0].root))
 926         result.append("""  </chapters>
 927   <functions>
 928 """)
 929         # keywords from all refsect2 and refsect3
 930         refsect2 = etree.XPath('//refsect2[@role]')
 931         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
 932         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
 933         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
 934         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
 935         for node in files:
 936             base_link = node.filename + '#'
 937             refsect2_nodes = refsect2(node.xml)
 938             for refsect2_node in refsect2_nodes:
 939                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
 940                 refsect3_nodes = refsect3_enum(refsect2_node)
 941                 for refsect3_node in refsect3_nodes:
 942                     details_node = refsect3_enum_details(refsect3_node)[0]
 943                     name = details_node.attrib['id']
 944                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
 945                 refsect3_nodes = refsect3_struct(refsect2_node)
 946                 for refsect3_node in refsect3_nodes:
 947                     details_node = refsect3_struct_details(refsect3_node)[0]
 948                     name = details_node.attrib['id']
 949                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
 950
 951         result.append("""  </functions>
 952 </book>
 953 """)
 954         for line in result:
 955             idx.write(line)
 956
 957
 958 def get_dirs(uninstalled):
 959     if uninstalled:
 960         # this does not work from buiddir!=srcdir
 961         gtkdocdir = os.path.split(sys.argv[0])[0]
 962         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
 963             # try 'srcdir' (set from makefiles) too
 964             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
 965                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
 966         styledir = gtkdocdir + '/style'
 967     else:
 968         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
 969         styledir = gtkdocdir
 970     return (gtkdocdir, styledir)
 971
 972
 973 def main(module, index_file, out_dir, uninstalled):
 974     tree = etree.parse(index_file)
 975     tree.xinclude()
 976
 977     (gtkdocdir, styledir) = get_dirs(uninstalled)
 978     # copy navigation images and stylesheets to html directory ...
 979     css_file = os.path.join(styledir, 'style.css')
 980     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
 981         shutil.copy(f, out_dir)
 982     css_file = os.path.join(out_dir, 'style.css')
 983     with open(css_file, 'at') as css:
 984         css.write(HTML_FORMATTER.get_style_defs())
 985
 986     # TODO: migrate options from fixxref
 987     # TODO: do in parallel with loading the xml above.
 988     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
 989
 990     # We do multiple passes:
 991     # 1) recursively walk the tree and chunk it into a python tree so that we
 992     #   can generate navigation and link tags.
 993     files = chunk(tree.getroot())
 994     files = list(PreOrderIter(files))
 995     # 2) find all 'id' attribs and add them to the link map
 996     add_id_links(files, fixxref.Links)
 997     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
 998     create_devhelp2(out_dir, module, tree.getroot(), files)
 999     # 4) iterate the tree and output files
1000     # TODO: use multiprocessing
1001     for node in files:
1002         convert(out_dir, module, files, node)
1003
1004
1005 def run(options):
1006     logging.info('options: %s', str(options.__dict__))
1007     module = options.args[0]
1008     document = options.args[1]
1009
1010     # TODO: rename to 'html' later on
1011     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1012     #   outputs into the working directory
1013     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1014     try:
1015         os.mkdir(out_dir)
1016     except OSError as e:
1017         if e.errno != errno.EEXIST:
1018             raise
1019
1020     sys.exit(main(module, document, out_dir, options.uninstalled))