tools/db2html.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2017  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Prototype for builtin docbook processing
  23
  24 The tool loades the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26
  27 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  28 this tools will replace both without relying on external tools such as xsltproc
  29 and source-highlight.
  30
  31 TODO: convert the docbook-xml to html
  32 - more chunk converters
  33 - refentry/index nav headers
  34 - check each docbook tag if it can contain #PCDATA, if not don't check for
  35   xml.text
  36 - integrate syntax-highlighing from fixxref
  37   - maybe handle the combination <informalexample><programlisting> directly
  38   - switch to http://pygments.org/docs/quickstart/?
  39 - integrate MakeXRef from fixxref
  40   - first create devhelp2 output
  41
  42 OPTIONAL:
  43 - minify html: https://pypi.python.org/pypi/htmlmin/
  44
  45 Requirements:
  46 sudo pip3 install anytree lxml
  47
  48 Examples:
  49 python3 tools/db2html.py tests/gobject/docs/tester-docs.xml
  50 ll tests/gobject/docs/db2html
  51
  52 python3 tools/db2html.py tests/bugs/docs/tester-docs.xml
  53 ll tests/bugs/docs/db2html
  54 cp tests/bugs/docs/html/*.{css,png} tests/bugs/docs/db2html/
  55 xdg-open tests/bugs/docs/db2html/index.html
  56 meld tests/bugs/docs/{html,db2html}
  57
  58 Benchmarking:
  59 (cd tests/bugs/docs/; rm html-build.stamp; time make html-build.stamp)
  60 """
  61
  62 import argparse
  63 import errno
  64 import logging
  65 import os
  66 import sys
  67
  68 from anytree import Node, PreOrderIter
  69 from lxml import etree
  70
  71 # TODO(ensonic): requires gtk-doc to be installed, rewrite later
  72 sys.path.append('/usr/share/gtk-doc/python')
  73 from gtkdoc.fixxref import NoLinks
  74 from gtkdoc import common
  75
  76
  77 # http://www.sagehill.net/docbookxsl/Chunking.html
  78 CHUNK_TAGS = [
  79     'appendix',
  80     'article',
  81     'bibliography',  # in article or book
  82     'book',
  83     'chapter',
  84     'colophon',
  85     'glossary',      # in article or book
  86     'index',         # in article or book
  87     'part',
  88     'preface',
  89     'refentry',
  90     'reference',
  91     'sect1',         # except first
  92     'section',       # if equivalent to sect1
  93     'set',
  94     'setindex',
  95 ]
  96
  97
  98 class ChunkParams(object):
  99     def __init__(self, prefix, parent=None):
 100         self.prefix = prefix
 101         self.parent = None
 102         self.count = 0
 103
 104
 105 # TODO: look up the abbrevs and hierarchy for other tags
 106 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 107 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 108 CHUNK_PARAMS = {
 109     'appendix': ChunkParams('app', 'book'),
 110     'book': ChunkParams('bk'),
 111     'chapter': ChunkParams('ch', 'book'),
 112     'index': ChunkParams('ix', 'book'),
 113     'part': ChunkParams('pt', 'book'),
 114     'sect1': ChunkParams('s', 'chapter'),
 115     'section': ChunkParams('s', 'chapter'),
 116 }
 117
 118 TITLE_XPATHS = {
 119     '_': (etree.XPath('./title'), None),
 120     'book': (etree.XPath('./bookinfo/title'), None),
 121     'refentry': (
 122         etree.XPath('./refmeta/refentrytitle'),
 123         etree.XPath('./refnamediv/refpurpose')
 124     ),
 125 }
 126
 127
 128 def gen_chunk_name(node):
 129     if 'id' in node.attrib:
 130         return node.attrib['id']
 131
 132     tag = node.tag
 133     if tag not in CHUNK_PARAMS:
 134         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 135         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 136
 137     naming = CHUNK_PARAMS[tag]
 138     naming.count += 1
 139     name = ('%s%02d' % (naming.prefix, naming.count))
 140     # handle parents to make names of nested tags unique
 141     # TODO: we only need to prepend the parent if there are > 1 of them in the
 142     #       xml
 143     # while naming.parent:
 144     #     parent = naming.parent
 145     #     if parent not in CHUNK_PARAMS:
 146     #         break;
 147     #     naming = CHUNK_PARAMS[parent]
 148     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 149     return name
 150
 151
 152 def get_chunk_titles(node):
 153     tag = node.tag
 154     if tag not in TITLE_XPATHS:
 155         # Use defaults
 156         (title, subtitle) = TITLE_XPATHS['_']
 157     else:
 158         (title, subtitle) = TITLE_XPATHS[tag]
 159
 160     xml = title(node)[0]
 161     result = {
 162         'title': xml.text
 163     }
 164     if xml.tag != 'title':
 165         result['title_tag'] = xml.tag
 166     else:
 167         result['title_tag'] = tag
 168
 169     if subtitle:
 170         xml = subtitle(node)[0]
 171         result['subtitle'] = xml.text
 172         result['subtitle_tag'] = xml.tag
 173     else:
 174         result['subtitle'] = None
 175         result['subtitle_tag'] = None
 176     return result
 177
 178
 179 def chunk(xml_node, parent=None):
 180     """Chunk the tree.
 181
 182     The first time, we're called with parent=None and in that case we return
 183     the new_node as the root of the tree
 184     """
 185     # print('<%s %s>' % (xml_node.tag, xml_node.attrib))
 186     if xml_node.tag in CHUNK_TAGS:
 187         # TODO: do we need to remove the xml-node from the parent?
 188         #
 189         # from copy import deepcopy
 190         # sub_tree = deepcopy(xml_node)
 191         # xml_node.getparent().remove(xml_node)
 192         # # or:
 193         # sub_tree = etree.ElementTree(xml_node).getroot()
 194         title_args = get_chunk_titles(xml_node)
 195         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 196                       filename=gen_chunk_name(xml_node) + '.html',
 197                       **title_args)
 198     for child in xml_node:
 199         chunk(child, parent)
 200
 201     return parent
 202
 203 # conversion helpers
 204
 205
 206 def escape_entities(text):
 207     return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 208
 209
 210 def convert_inner(ctx, xml, result):
 211     for child in xml:
 212         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 213
 214
 215 def convert_ignore(ctx, xml):
 216     result = []
 217     convert_inner(ctx, xml, result)
 218     return result
 219
 220
 221 def convert_skip(ctx, xml):
 222     return ['']
 223
 224
 225 missing_tags = {}
 226
 227
 228 def convert__unknown(ctx, xml):
 229     # don't recurse on subchunks
 230     if xml.tag in CHUNK_TAGS:
 231         return []
 232     # warn only once
 233     if xml.tag not in missing_tags:
 234         logging.warning('Add tag converter for "%s"', xml.tag)
 235         missing_tags[xml.tag] = True
 236     result = ['<!-- ' + xml.tag + '-->\n']
 237     convert_inner(ctx, xml, result)
 238     result.append('<!-- /' + xml.tag + '-->\n')
 239     return result
 240
 241
 242 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
 243     result = ['<div class="%s">\n' % xml.tag]
 244     title = xml.find('title')
 245     if title is not None:
 246         if 'id' in xml.attrib:
 247             result.append('<a name="%s"></a>' % xml.attrib['id'])
 248         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 249         xml.remove(title)
 250     if xml.text:
 251         result.append(xml.text)
 252     inner_func(ctx, xml, result)
 253     result.append('</div>')
 254     if xml.tail:
 255         result.append(xml.tail)
 256     return result
 257
 258
 259 def xml_get_title(xml):
 260     title = xml.find('title')
 261     if title is not None:
 262         return title.text
 263     else:
 264         # TODO(ensonic): any way to get the file (inlcudes) too?
 265         logging.warning('%s: Expected title tag under "%s"', xml.sourceline, xml.tag)
 266         return ''
 267
 268
 269 # docbook tags
 270
 271 def convert_bookinfo(ctx, xml):
 272     result = ['<div class="titlepage">']
 273     for releaseinfo in xml.findall('releaseinfo'):
 274         result.extend(convert_para(ctx, releaseinfo))
 275     result.append("""<hr>
 276 </div>""")
 277     if xml.tail:
 278         result.append(xml.tail)
 279     return result
 280
 281
 282 def convert_colspec(ctx, xml):
 283     result = ['<col']
 284     a = xml.attrib
 285     if 'colname' in a:
 286         result.append(' class="%s"' % a['colname'])
 287     if 'colwidth' in a:
 288         result.append(' width="%s"' % a['colwidth'])
 289     result.append('>\n')
 290     # is in tgroup and there can be no 'text'
 291     return result
 292
 293
 294 def convert_div(ctx, xml):
 295     result = ['<div class="%s">\n' % xml.tag]
 296     if xml.text:
 297         result.append(xml.text)
 298     convert_inner(ctx, xml, result)
 299     result.append('</div>')
 300     if xml.tail:
 301         result.append(xml.tail)
 302     return result
 303
 304
 305 def convert_em_class(ctx, xml):
 306     result = ['<em class="%s"><code>' % xml.tag]
 307     if xml.text:
 308         result.append(xml.text)
 309     convert_inner(ctx, xml, result)
 310     result.append('</code></em>')
 311     if xml.tail:
 312         result.append(xml.tail)
 313     return result
 314
 315
 316 def convert_entry(ctx, xml):
 317     result = ['<td']
 318     if 'role' in xml.attrib:
 319         result.append(' class="%s">' % xml.attrib['role'])
 320     else:
 321         result.append('>')
 322     if xml.text:
 323         result.append(xml.text)
 324     convert_inner(ctx, xml, result)
 325     result.append('</td>')
 326     if xml.tail:
 327         result.append(xml.tail)
 328     return result
 329
 330
 331 def convert_indexdiv(ctx, xml):
 332     title_tag = xml.find('title')
 333     title = title_tag.text
 334     xml.remove(title_tag)
 335     result = [
 336         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 337     ]
 338     convert_inner(ctx, xml, result)
 339     return result
 340
 341
 342 def convert_informaltable(ctx, xml):
 343     result = ['<div class="informaltable"><table class="informaltable"']
 344     a = xml.attrib
 345     if 'pgwide' in a and a['pgwide'] == '1':
 346         result.append(' width="100%"')
 347     if 'frame' in a and a['frame'] == 'none':
 348         result.append(' border="0"')
 349     result.append('>\n')
 350     convert_inner(ctx, xml, result)
 351     result.append('</table></div>')
 352     if xml.tail:
 353         result.append(xml.tail)
 354     return result
 355
 356
 357 def convert_itemizedlist(ctx, xml):
 358     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 359     convert_inner(ctx, xml, result)
 360     result.append('</ul></div>')
 361     if xml.tail:
 362         result.append(xml.tail)
 363     return result
 364
 365
 366 def convert_link(ctx, xml):
 367     # TODO: inline more fixxref functionality
 368     # TODO: need to build an 'id' map and resolve against internal links too
 369     linkend = xml.attrib['linkend']
 370     if linkend in NoLinks:
 371         linkend = None
 372     result = []
 373     if linkend:
 374         result = ['<!-- GTKDOCLINK HREF="%s" -->' % linkend]
 375     if xml.text:
 376         result.append(xml.text)
 377     convert_inner(ctx, xml, result)
 378     if linkend:
 379         result.append('<!-- /GTKDOCLINK -->')
 380     if xml.tail:
 381         result.append(xml.tail)
 382     return result
 383
 384
 385 def convert_listitem(ctx, xml):
 386     result = ['<li class="listitem">']
 387     convert_inner(ctx, xml, result)
 388     result.append('</li>')
 389     # is in itemizedlist and there can be no 'text'
 390     return result
 391
 392
 393 def convert_literal(ctx, xml):
 394     result = ['<code class="%s">' % xml.tag]
 395     if xml.text:
 396         result.append(xml.text)
 397     convert_inner(ctx, xml, result)
 398     result.append('</code>')
 399     if xml.tail:
 400         result.append(xml.tail)
 401     return result
 402
 403
 404 def convert_para(ctx, xml):
 405     result = ['<p>']
 406     if xml.tag != 'para':
 407         result = ['<p class="%s">' % xml.tag]
 408     if xml.text:
 409         result.append(xml.text)
 410     convert_inner(ctx, xml, result)
 411     result.append('</p>')
 412     if xml.tail:
 413         result.append(xml.tail)
 414     return result
 415
 416
 417 def convert_phrase(ctx, xml):
 418     result = ['<span']
 419     if 'role' in xml.attrib:
 420         result.append(' class="%s">' % xml.attrib['role'])
 421     else:
 422         result.append('>')
 423     if xml.text:
 424         result.append(xml.text)
 425     convert_inner(ctx, xml, result)
 426     result.append('</span>')
 427     if xml.tail:
 428         result.append(xml.tail)
 429     return result
 430
 431
 432 def convert_primaryie(ctx, xml):
 433     result = ['<dt>']
 434     convert_inner(ctx, xml, result)
 435     result.append('</dt>\n<dd></dd>\n')
 436     return result
 437
 438
 439 def convert_programlisting(ctx, xml):
 440     result = ['<pre class="programlisting">']
 441     if xml.text:
 442         result.append(escape_entities(xml.text))
 443     convert_inner(ctx, xml, result)
 444     result.append('</pre>')
 445     if xml.tail:
 446         result.append(xml.tail)
 447     return result
 448
 449
 450 def convert_refsect1(ctx, xml):
 451     # Add a divider between two consequitive refsect2
 452     def convert_inner(ctx, xml, result):
 453         prev = None
 454         for child in xml:
 455             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 456                 result.append('<hr>\n')
 457             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 458             prev = child
 459     return convert_refsect(ctx, xml, 'h2', convert_inner)
 460
 461
 462 def convert_refsect2(ctx, xml):
 463     return convert_refsect(ctx, xml, 'h3')
 464
 465
 466 def convert_refsect3(ctx, xml):
 467     return convert_refsect(ctx, xml, 'h4')
 468
 469
 470 def convert_row(ctx, xml):
 471     result = ['<tr>\n']
 472     convert_inner(ctx, xml, result)
 473     result.append('</tr>\n')
 474     return result
 475
 476
 477 def convert_span(ctx, xml):
 478     result = ['<span class="%s">' % xml.tag]
 479     if xml.text:
 480         result.append(xml.text)
 481     convert_inner(ctx, xml, result)
 482     result.append('</span>')
 483     if xml.tail:
 484         result.append(xml.tail)
 485     return result
 486
 487
 488 def convert_tbody(ctx, xml):
 489     result = ['<tbody>']
 490     convert_inner(ctx, xml, result)
 491     result.append('</tbody>')
 492     # is in tgroup and there can be no 'text'
 493     return result
 494
 495
 496 def convert_tgroup(ctx, xml):
 497     # tgroup does not expand to anything, but the nested colspecs need to
 498     # be put into a colgroup
 499     cols = xml.findall('colspec')
 500     result = []
 501     if cols:
 502         result.append('<colgroup>\n')
 503         for col in cols:
 504             result.extend(convert_colspec(ctx, col))
 505             xml.remove(col)
 506         result.append('</colgroup>\n')
 507     convert_inner(ctx, xml, result)
 508     # is in informaltable and there can be no 'text'
 509     return result
 510
 511
 512 def convert_ulink(ctx, xml):
 513     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 514     if xml.tail:
 515         result.append(xml.tail)
 516     return result
 517
 518
 519 # TODO(ensonic): turn into class with converters as functions and ctx as self
 520 convert_tags = {
 521     'bookinfo': convert_bookinfo,
 522     'colspec': convert_colspec,
 523     'entry': convert_entry,
 524     'function': convert_span,
 525     'indexdiv': convert_indexdiv,
 526     'indexentry': convert_ignore,
 527     'indexterm': convert_skip,
 528     'informalexample': convert_div,
 529     'informaltable': convert_informaltable,
 530     'itemizedlist': convert_itemizedlist,
 531     'link': convert_link,
 532     'listitem': convert_listitem,
 533     'literal': convert_literal,
 534     'para': convert_para,
 535     'parameter': convert_em_class,
 536     'phrase': convert_phrase,
 537     'primaryie': convert_primaryie,
 538     'programlisting': convert_programlisting,
 539     'releaseinfo': convert_para,
 540     'refsect1': convert_refsect1,
 541     'refsect2': convert_refsect2,
 542     'refsect3': convert_refsect3,
 543     'returnvalue': convert_span,
 544     'row': convert_row,
 545     'structfield': convert_em_class,
 546     'tbody': convert_tbody,
 547     'tgroup': convert_tgroup,
 548     'type': convert_span,
 549     'ulink': convert_ulink,
 550     'warning': convert_div,
 551 }
 552
 553 # conversion helpers
 554
 555 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 556 <html>
 557 <head>
 558 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 559 <title>%s</title>
 560 %s<link rel="stylesheet" href="style.css" type="text/css">
 561 </head>
 562 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 563 """
 564
 565
 566 def generate_head_links(ctx):
 567     n = ctx['nav_home']
 568     result = [
 569         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 570     ]
 571     if 'nav_up' in ctx:
 572         n = ctx['nav_up']
 573         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 574     if 'nav_prev' in ctx:
 575         n = ctx['nav_prev']
 576         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 577     if 'nav_next' in ctx:
 578         n = ctx['nav_next']
 579         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 580     return ''.join(result)
 581
 582
 583 def generate_nav_links(ctx):
 584     n = ctx['nav_home']
 585     result = [
 586         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 587     ]
 588     if 'nav_up' in ctx:
 589         n = ctx['nav_up']
 590         result.append(
 591             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 592     else:
 593         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 594     if 'nav_prev' in ctx:
 595         n = ctx['nav_prev']
 596         result.append(
 597             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
 598     else:
 599         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
 600     if 'nav_next' in ctx:
 601         n = ctx['nav_next']
 602         result.append(
 603             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
 604     else:
 605         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
 606
 607     return ''.join(result)
 608
 609
 610 def generate_toc(ctx, node):
 611     result = []
 612     for c in node.children:
 613         # TODO: urlencode the filename: urllib.parse.quote_plus()
 614         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
 615             c.title_tag, c.filename, c.title))
 616         if c.subtitle:
 617             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
 618         result.append('</dt>\n')
 619         if c.children:
 620             result.append('<dd><dl>')
 621             result.extend(generate_toc(ctx, c))
 622             result.append('</dl></dd>')
 623     return result
 624
 625
 626 def generate_basic_nav(ctx):
 627     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 628   <tr valign="middle">
 629     <td width="100%%" align="left" class="shortcuts"></td>
 630     %s
 631   </tr>
 632 </table>
 633     """ % generate_nav_links(ctx)
 634
 635
 636 def generate_index_nav(ctx, indexdivs):
 637     ix_nav = []
 638     for s in indexdivs:
 639         title = xml_get_title(s)
 640         ix_nav.append('<a class="shortcut" href="#idx%s">%s</a>' % (title, title))
 641
 642     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 643   <tr valign="middle">
 644     <td width="100%%" align="left" class="shortcuts">
 645       <span id="nav_index">
 646         %s
 647       </span>
 648     </td>
 649     %s
 650   </tr>
 651 </table>
 652     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
 653
 654
 655 def generate_refentry_nav(ctx, refsect1s):
 656     result = ["""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 657   <tr valign="middle">
 658     <td width="100%%" align="left" class="shortcuts">
 659       <a href="#" class="shortcut">Top</a>"""
 660               ]
 661     for s in refsect1s:
 662         # don't list TOC sections (role="xxx_proto")
 663         if s.attrib.get('role', '').endswith("_proto"):
 664             continue
 665
 666         title = xml_get_title(s)
 667         result.append("""
 668           <span id="nav_description">
 669             <span class="dim">|</span>
 670             <a href="#%s" class="shortcut">%s</a>
 671           </span>""" % (s.attrib['id'], title))
 672     result.append("""
 673     </td>
 674     %s
 675   </tr>
 676 </table>
 677 """ % generate_nav_links(ctx))
 678     return ''.join(result)
 679
 680
 681 def get_id(node):
 682     xml = node.xml
 683     node_id = xml.attrib.get('id', None)
 684     if node_id:
 685         return node_id
 686
 687     logging.warning('%d: No "id" attribute on "%s"', xml.sourceline, xml.tag)
 688     ix = []
 689     # Generate the 'id'. We need to walk up the xml-tree and check the positions
 690     # for each sibling.
 691     parent = xml.getparent()
 692     while parent is not None:
 693         children = parent.getchildren()
 694         ix.insert(0, str(children.index(xml) + 1))
 695         xml = parent
 696         parent = xml.getparent()
 697     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
 698     return 'id-1.' + '.'.join(ix)
 699
 700 # docbook chunks
 701
 702
 703 def convert_book(ctx):
 704     node = ctx['node']
 705     result = [
 706         HTML_HEADER % (node.title, generate_head_links(ctx)),
 707         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
 708     <tr><th valign="middle"><p class="title">%s</p></th></tr>
 709 </table>
 710 <div class="book">
 711 """ % node.title
 712     ]
 713     bookinfo = node.xml.findall('bookinfo')[0]
 714     result.extend(convert_bookinfo(ctx, bookinfo))
 715     result.append("""<div class="toc">
 716   <dl class="toc">
 717 """)
 718     result.extend(generate_toc(ctx, node.root))
 719     result.append("""</dl>
 720 </div>
 721 </div>
 722 </body>
 723 </html>""")
 724     return ''.join(result)
 725
 726
 727 def convert_chapter(ctx):
 728     node = ctx['node']
 729     result = [
 730         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 731         generate_basic_nav(ctx),
 732         '<div class="chapter">',
 733     ]
 734     title = node.xml.find('title')
 735     if title is not None:
 736         result.append('<div class="titlepage"><h1 class="title"><a name="%s"></a>%s</h1></div>' % (
 737             get_id(node), title.text))
 738         node.xml.remove(title)
 739     convert_inner(ctx, node.xml, result)
 740     result.append("""<div class="toc">
 741   <dl class="toc">
 742 """)
 743     result.extend(generate_toc(ctx, node))
 744     result.append("""</dl>
 745 </div>
 746 </div>
 747 </body>
 748 </html>""")
 749     return ''.join(result)
 750
 751
 752 def convert_index(ctx):
 753     node = ctx['node']
 754     node_id = get_id(node)
 755     # Get all indexdivs under indexdiv
 756     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
 757
 758     result = [
 759         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 760         generate_index_nav(ctx, indexdivs),
 761     ]
 762     result.append("""<div class="index">
 763 <div class="titlepage"><h1 class="title">
 764 <a name="%s"></a>%s</h1>
 765 </div>""" % (node_id, node.title))
 766     for i in indexdivs:
 767         result.extend(convert_indexdiv(ctx, i))
 768     result.append("""</div>
 769 </body>
 770 </html>""")
 771     return ''.join(result)
 772
 773
 774 def convert_refentry(ctx):
 775     node = ctx['node']
 776     node_id = get_id(node)
 777     refsect1s = node.xml.findall('refsect1')
 778
 779     result = [
 780         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 781         generate_refentry_nav(ctx, refsect1s),
 782         """
 783 <div class="refentry">
 784 <a name="%s"></a>
 785 <div class="refnamediv">
 786   <table width="100%%"><tr>
 787     <td valign="top">
 788       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
 789       <p>%s — module for gtk-doc unit test</p>
 790     </td>
 791     <td class="gallery_image" valign="top" align="right"></td>
 792   </tr></table>
 793 </div>
 794 """ % (node_id, node_id, node.title, node.title)
 795     ]
 796
 797     for s in refsect1s:
 798         result.extend(convert_refsect1(ctx, s))
 799     result.append("""</div>
 800 </body>
 801 </html>""")
 802     return ''.join(result)
 803
 804
 805 # TODO(ensonic): turn into class with converters as functions and ctx as self
 806 convert_chunks = {
 807     'book': convert_book,
 808     'chapter': convert_chapter,
 809     'index': convert_index,
 810     'refentry': convert_refentry,
 811 }
 812
 813
 814 def generate_nav_nodes(files, node):
 815     nav = {
 816         'nav_home': node.root,
 817     }
 818     # nav params: up, prev, next
 819     if node.parent:
 820         nav['nav_up'] = node.parent
 821     ix = files.index(node)
 822     if ix > 0:
 823         nav['nav_prev'] = files[ix - 1]
 824     if ix < len(files) - 1:
 825         nav['nav_next'] = files[ix + 1]
 826     return nav
 827
 828
 829 def convert(out_dir, files, node):
 830     """Convert the docbook chunks to a html file.
 831
 832     Args:
 833       out_dir: already created output dir
 834       files: list of nodes in the tree in pre-order
 835       node: current tree node
 836     """
 837
 838     def jinja_convert(ctx, xml):
 839         return ''.join(convert_tags.get(xml.tag, convert__unknown)(ctx, xml))
 840
 841     logging.info('Writing: %s', node.filename)
 842     with open(os.path.join(out_dir, node.filename), 'wt') as html:
 843         ctx = {
 844             'files': files,
 845             'node': node,
 846         }
 847         ctx.update(generate_nav_nodes(files, node))
 848
 849         if node.name in convert_chunks:
 850             # TODO(ensonic): try returning the array of string and loop over them to write them
 851             html.write(convert_chunks[node.name](ctx))
 852         else:
 853             logging.warning('Add converter/template for "%s"', node.name)
 854
 855
 856 def main(index_file):
 857     tree = etree.parse(index_file)
 858     tree.xinclude()
 859
 860     dir_name = os.path.dirname(index_file)
 861
 862     # for testing: dump to output file
 863     # out_file = os.path.join(dir_name, 'db2html.xml')
 864     # tree.write(out_file)
 865
 866     # TODO: rename to 'html' later on
 867     out_dir = os.path.join(dir_name, 'db2html')
 868     try:
 869         os.mkdir(out_dir)
 870     except OSError as e:
 871         if e.errno != errno.EEXIST:
 872             raise
 873
 874     # We need multiple passes:
 875     # 1) recursively walk the tree and chunk it into a python tree so that we
 876     #   can generate navigation and link tags.
 877     #   also collect all 'id' attributes on the way and build map of
 878     #   id:rel-link (in fixxref is is Links[])
 879     files = chunk(tree.getroot())
 880     # 2) iterate the tree and output files
 881     # TODO: use multiprocessing
 882     files = list(PreOrderIter(files))
 883     for node in files:
 884         convert(out_dir, files, node)
 885     # 3) create a devhelp2.xsl
 886     # - toc under 'chapter'
 887     # - keywords under 'functions' from all refsect2 and refsect3
 888
 889
 890 if __name__ == '__main__':
 891     parser = argparse.ArgumentParser(
 892         description='db2html - chunk docbook')
 893     parser.add_argument('sources', nargs='*')
 894     options = parser.parse_args()
 895     if len(options.sources) != 1:
 896         sys.exit('Expect one source file argument.')
 897
 898     common.setup_logging()
 899
 900     sys.exit(main(options.sources[0]))