gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - handle 'label' attributes on part/chapter/section-types
  43     - the titles will have a generated prefix, such as 'Part I:'
  44       (locale dependent)
  45     - in the toc it would only be the label: 'I.'
  46   - 'link' seems to add a 'title' attr to 'a' if the target has a title.
  47     - we're using fixxref.MakeXRef to generate the 'a' tag, we could pass the
  48       title with a default value of "" there to inject it
  49     - we might need to split this a bit to first run the 'id' transform logic
  50       and then do the linking.
  51     - initially we could generate this as needed (we need to run the xpath on
  52       each of the chunks though
  53   - handle the 'xref' tag, this also need the title + the type of the target
  54 - check each docbook tag if it can contain #PCDATA, if not don't check for
  55   xml.text
  56 - consider some perf-warnings flag
  57   - see 'No "id" attribute on'
  58
  59 OPTIONAL:
  60 - minify html: https://pypi.python.org/pypi/htmlmin/
  61
  62 Requirements:
  63 sudo pip3 install anytree lxml pygments
  64
  65 Example invocation:
  66 cd tests/bugs/docs/
  67 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  68 xdg-open db2html/index.html
  69 meld html db2html
  70
  71 Benchmarking:
  72 cd tests/bugs/docs/;
  73 rm html-build.stamp; time make html-build.stamp
  74 """
  75
  76 import argparse
  77 import errno
  78 import logging
  79 import os
  80 import shutil
  81 import sys
  82
  83 from anytree import Node, PreOrderIter
  84 from copy import deepcopy
  85 from glob import glob
  86 from lxml import etree
  87 from pygments import highlight
  88 from pygments.lexers import CLexer
  89 from pygments.formatters import HtmlFormatter
  90
  91 from . import config, fixxref
  92
  93 # pygments setup
  94 # lazily constructed lexer cache
  95 LEXERS = {
  96     'c': CLexer()
  97 }
  98 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  99
 100
 101 class ChunkParams(object):
 102     def __init__(self, prefix, parent=None, min_idx=0):
 103         self.prefix = prefix
 104         self.parent = parent
 105         self.min_idx = min_idx
 106         self.idx = 1
 107
 108
 109 DONT_CHUNK = float('inf')
 110 # docbook-xsl defines the chunk tags here.
 111 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 112 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 113 # If not defined, we can just create an example without an 'id' attr and see
 114 # docbook xsl does.
 115 #
 116 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 117 # TODO: this list has also a flag that controls wheter we add the
 118 # 'Table of Contents' heading in convert_chunk_with_toc()
 119 CHUNK_PARAMS = {
 120     'appendix': ChunkParams('app', 'book'),
 121     'book': ChunkParams('bk'),
 122     'chapter': ChunkParams('ch', 'book'),
 123     'glossary': ChunkParams('go', 'book'),
 124     'index': ChunkParams('ix', 'book'),
 125     'part': ChunkParams('pt', 'book'),
 126     'preface': ChunkParams('pr', 'book'),
 127     'refentry': ChunkParams('re', 'book'),
 128     'reference': ChunkParams('rn', 'book'),
 129     'sect1': ChunkParams('s', 'chapter', 1),
 130     'section': ChunkParams('s', 'chapter', 1),
 131     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 132     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 133     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 134     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 135 }
 136 # TAGS we don't support:
 137 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 138
 139 TITLE_XPATHS = {
 140     '_': (etree.XPath('./title'), None),
 141     'book': (etree.XPath('./bookinfo/title'), None),
 142     'refentry': (
 143         etree.XPath('./refmeta/refentrytitle'),
 144         etree.XPath('./refnamediv/refpurpose')
 145     ),
 146 }
 147
 148 ID_XPATH = etree.XPath('//@id')
 149
 150 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 151 glossary = {}
 152
 153 footnote_idx = 1
 154
 155
 156 def gen_chunk_name(node, chunk_params):
 157     """Generate a chunk file name
 158
 159     This is either based on the id or on the position in the doc. In the latter
 160     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 161     type.
 162     """
 163     if 'id' in node.attrib:
 164         return node.attrib['id']
 165
 166     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 167     chunk_params.idx += 1
 168
 169     # handle parents to make names of nested tags like in docbook
 170     # - we only need to prepend the parent if there are > 1 of them in the
 171     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 172     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 173     #   when we chunk explicitly and on each level maintain the 'idx'
 174     # while chunk_params.parent:
 175     #     parent = chunk_params.parent
 176     #     if parent not in CHUNK_PARAMS:
 177     #         break;
 178     #     chunk_params = CHUNK_PARAMS[parent]
 179     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 180
 181     logging.info('Gen chunk name: "%s"', name)
 182     return name
 183
 184
 185 def get_chunk_titles(module, node):
 186     tag = node.tag
 187     if tag not in TITLE_XPATHS:
 188         # Use defaults
 189         (title, subtitle) = TITLE_XPATHS['_']
 190     else:
 191         (title, subtitle) = TITLE_XPATHS[tag]
 192
 193     ctx = {
 194         'module': module,
 195     }
 196     result = {
 197         'title': None,
 198         'title_tag': None,
 199         'subtitle': None,
 200         'subtitle_tag': None
 201     }
 202     res = title(node)
 203     if res:
 204         xml = res[0]
 205         result['title'] = ''.join(convert_title(ctx, xml))
 206         if xml.tag != 'title':
 207             result['title_tag'] = xml.tag
 208         else:
 209             result['title_tag'] = tag
 210
 211     if subtitle:
 212         res = subtitle(node)
 213         if res:
 214             xml = res[0]
 215             result['subtitle'] = ''.join(convert_title(ctx, xml))
 216             result['subtitle_tag'] = xml.tag
 217     return result
 218
 219
 220 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 221     """Chunk the tree.
 222
 223     The first time, we're called with parent=None and in that case we return
 224     the new_node as the root of the tree. For each tree-node we generate a
 225     filename and process the children.
 226     """
 227     tag = xml_node.tag
 228     chunk_params = CHUNK_PARAMS.get(tag)
 229     if chunk_params:
 230         title_args = get_chunk_titles(module, xml_node)
 231         chunk_name = gen_chunk_name(xml_node, chunk_params)
 232
 233         # check idx to handle 'sect1'/'section' special casing and title-only
 234         # segments
 235         if idx >= chunk_params.min_idx:
 236             logging.info('chunk tag: "%s"[%d]', tag, idx)
 237             if parent:
 238                 # remove the xml-node from the parent
 239                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 240                 xml_node.getparent().remove(xml_node)
 241                 xml_node = sub_tree
 242
 243             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 244                           idx=idx,
 245                           filename=chunk_name + '.html', anchor=None,
 246                           **title_args)
 247         else:
 248             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 249                           idx=idx,
 250                           filename=parent.filename, anchor='#' + chunk_name,
 251                           **title_args)
 252
 253         depth += 1
 254         idx = 0
 255         for child in xml_node:
 256             chunk(child, module, depth, idx, parent)
 257             if child.tag in CHUNK_PARAMS:
 258                 idx += 1
 259
 260     return parent
 261
 262
 263 def add_id_links(files, links):
 264     for node in files:
 265         chunk_name = node.filename[:-5]
 266         chunk_base = node.filename + '#'
 267         for attr in ID_XPATH(node.xml):
 268             if attr == chunk_name:
 269                 links[attr] = node.filename
 270             else:
 271                 links[attr] = chunk_base + attr
 272
 273
 274 def build_glossary(files):
 275     for node in files:
 276         if node.xml.tag != 'glossary':
 277             continue
 278         for term in GLOSSENTRY_XPATH(node.xml):
 279             # TODO: there can be all kind of things in a glossary. This only supports
 280             # what we commonly use
 281             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 282             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 283             glossary[key] = value
 284             # logging.debug('glosentry: %s:%s', key, value)
 285
 286
 287 # conversion helpers
 288
 289
 290 def convert_inner(ctx, xml, result):
 291     for child in xml:
 292         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 293
 294
 295 def convert_ignore(ctx, xml):
 296     result = []
 297     convert_inner(ctx, xml, result)
 298     return result
 299
 300
 301 def convert_skip(ctx, xml):
 302     return ['']
 303
 304
 305 def append_text(text, result):
 306     if text and text.strip():
 307         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 308
 309
 310 missing_tags = {}
 311
 312
 313 def convert__unknown(ctx, xml):
 314     # don't recurse on subchunks
 315     if xml.tag in CHUNK_PARAMS:
 316         return []
 317     if isinstance(xml, etree._Comment):
 318         return ['<!-- ' + xml.text + '-->\n']
 319     else:
 320         # warn only once
 321         if xml.tag not in missing_tags:
 322             logging.warning('Add tag converter for "%s"', xml.tag)
 323             missing_tags[xml.tag] = True
 324         result = ['<!-- ' + xml.tag + '-->\n']
 325         convert_inner(ctx, xml, result)
 326         result.append('<!-- /' + xml.tag + '-->\n')
 327         return result
 328
 329
 330 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 331     result = ['<div class="%s">\n' % xml.tag]
 332     title = xml.find('title')
 333     if title is not None:
 334         if 'id' in xml.attrib:
 335             result.append('<a name="%s"></a>' % xml.attrib['id'])
 336         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 337         xml.remove(title)
 338     append_text(xml.text, result)
 339     inner_func(ctx, xml, result)
 340     result.append('</div>')
 341     append_text(xml.tail, result)
 342     return result
 343
 344
 345 def xml_get_title(ctx, xml):
 346     title_tag = xml.find('title')
 347     if title_tag is not None:
 348         return ''.join(convert_title(ctx, title_tag))
 349     else:
 350         # TODO(ensonic): any way to get the file (inlcudes) too?
 351         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 352         return ''
 353
 354
 355 # docbook tags
 356
 357
 358 def convert_abstract(ctx, xml):
 359     result = ["""<div class="abstract">
 360     <p class="title"><b>Abstract</b></p>"""]
 361     append_text(xml.text, result)
 362     convert_inner(ctx, xml, result)
 363     result.append('</div>')
 364     append_text(xml.tail, result)
 365     return result
 366
 367
 368 def convert_acronym(ctx, xml):
 369     key = xml.text
 370     title = glossary.get(key, '')
 371     # TODO: print a sensible warning if missing
 372     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 373     if xml.tail:
 374         result.append(xml.tail)
 375     return result
 376
 377
 378 def convert_anchor(ctx, xml):
 379     return ['<a name="%s"></a>' % xml.attrib['id']]
 380
 381
 382 def convert_bookinfo(ctx, xml):
 383     result = ['<div class="titlepage">']
 384     convert_inner(ctx, xml, result)
 385     result.append("""<hr>
 386 </div>""")
 387     if xml.tail:
 388         result.append(xml.tail)
 389     return result
 390
 391
 392 def convert_blockquote(ctx, xml):
 393     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 394     append_text(xml.text, result)
 395     convert_inner(ctx, xml, result)
 396     result.append('</blockquote>\n</div>')
 397     append_text(xml.tail, result)
 398     return result
 399
 400
 401 def convert_code(ctx, xml):
 402     result = ['<code class="%s">' % xml.tag]
 403     append_text(xml.text, result)
 404     convert_inner(ctx, xml, result)
 405     result.append('</code>')
 406     append_text(xml.tail, result)
 407     return result
 408
 409
 410 def convert_colspec(ctx, xml):
 411     result = ['<col']
 412     a = xml.attrib
 413     if 'colname' in a:
 414         result.append(' class="%s"' % a['colname'])
 415     if 'colwidth' in a:
 416         result.append(' width="%s"' % a['colwidth'])
 417     result.append('>\n')
 418     # is in tgroup and there can be no 'text'
 419     return result
 420
 421
 422 def convert_command(ctx, xml):
 423     result = ['<strong class="userinput"><code>']
 424     append_text(xml.text, result)
 425     convert_inner(ctx, xml, result)
 426     result.append('</code></strong>')
 427     append_text(xml.tail, result)
 428     return result
 429
 430
 431 def convert_corpauthor(ctx, xml):
 432     result = ['<div><h3 class="corpauthor">\n']
 433     append_text(xml.text, result)
 434     convert_inner(ctx, xml, result)
 435     result.append('</h3></div>\n')
 436     append_text(xml.tail, result)
 437     return result
 438
 439
 440 def convert_div(ctx, xml):
 441     result = ['<div class="%s">\n' % xml.tag]
 442     append_text(xml.text, result)
 443     convert_inner(ctx, xml, result)
 444     result.append('</div>')
 445     append_text(xml.tail, result)
 446     return result
 447
 448
 449 def convert_emphasis(ctx, xml):
 450     result = ['<span class="emphasis"><em>']
 451     append_text(xml.text, result)
 452     convert_inner(ctx, xml, result)
 453     result.append('</em></span>')
 454     append_text(xml.tail, result)
 455     return result
 456
 457
 458 def convert_em_class(ctx, xml):
 459     result = ['<em class="%s"><code>' % xml.tag]
 460     append_text(xml.text, result)
 461     convert_inner(ctx, xml, result)
 462     result.append('</code></em>')
 463     append_text(xml.tail, result)
 464     return result
 465
 466
 467 def convert_entry(ctx, xml):
 468     entry_type = ctx['table.entry']
 469     result = ['<' + entry_type]
 470     if 'role' in xml.attrib:
 471         result.append(' class="%s"' % xml.attrib['role'])
 472     if 'morerows' in xml.attrib:
 473         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 474     result.append('>')
 475     append_text(xml.text, result)
 476     convert_inner(ctx, xml, result)
 477     result.append('</' + entry_type + '>')
 478     append_text(xml.tail, result)
 479     return result
 480
 481
 482 def convert_footnote(ctx, xml):
 483     footnotes = ctx.get('footnotes', [])
 484     # footnotes idx is not per page, but per doc
 485     global footnote_idx
 486     idx = footnote_idx
 487     footnote_idx += 1
 488
 489     # need a pair of ids for each footnote (docbook generates different ids)
 490     this_id = 'footnote-%d' % idx
 491     that_id = 'ftn.' + this_id
 492
 493     inner = ['<div id="%s" class="footnote">' % that_id]
 494     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 495         this_id, idx))
 496     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 497     # get double nested paras :/.
 498     # convert_inner(ctx, xml, inner)
 499     para = xml.find('para')
 500     if para is None:
 501         para = xml.find('simpara')
 502     if para is not None:
 503         inner.append(para.text)
 504     else:
 505         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 506                         etree.tostring(xml, method="text", encoding=str).strip())
 507     inner.append('</p></div>')
 508     footnotes.append(inner)
 509     ctx['footnotes'] = footnotes
 510     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 511         that_id, this_id, idx)]
 512
 513
 514 def convert_formalpara(ctx, xml):
 515     result = None
 516     title_tag = xml.find('title')
 517     result = ['<p><b>%s</b>' % title_tag.text]
 518     para_tag = xml.find('para')
 519     append_text(para_tag.text, result)
 520     convert_inner(ctx, para_tag, result)
 521     append_text(para_tag.tail, result)
 522     result.append('</p>')
 523     append_text(xml.tail, result)
 524     return result
 525
 526
 527 def convert_glossdef(ctx, xml):
 528     result = ['<dd class="glossdef">']
 529     convert_inner(ctx, xml, result)
 530     result.append('</dd>\n')
 531     return result
 532
 533
 534 def convert_glossdiv(ctx, xml):
 535     title_tag = xml.find('title')
 536     title = title_tag.text
 537     xml.remove(title_tag)
 538     result = [
 539         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 540     ]
 541     convert_inner(ctx, xml, result)
 542     return result
 543
 544
 545 def convert_glossentry(ctx, xml):
 546     result = []
 547     convert_inner(ctx, xml, result)
 548     return result
 549
 550
 551 def convert_glossterm(ctx, xml):
 552     glossid = ''
 553     text = ''
 554     anchor = xml.find('anchor')
 555     if anchor is not None:
 556         glossid = anchor.attrib.get('id', '')
 557         text += anchor.tail or ''
 558     text += xml.text or ''
 559     if glossid == '':
 560         glossid = 'glossterm-' + text
 561     return [
 562         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 563             glossid, text)
 564     ]
 565
 566
 567 def convert_imageobject(ctx, xml):
 568     imagedata = xml.find('imagedata')
 569     if imagedata is not None:
 570         # TODO(ensonic): warn on missing fileref attr?
 571         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 572     else:
 573         return []
 574
 575
 576 def convert_indexdiv(ctx, xml):
 577     title_tag = xml.find('title')
 578     title = title_tag.text
 579     xml.remove(title_tag)
 580     result = [
 581         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 582     ]
 583     convert_inner(ctx, xml, result)
 584     return result
 585
 586
 587 def convert_informaltable(ctx, xml):
 588     result = ['<div class="informaltable"><table class="informaltable"']
 589     a = xml.attrib
 590     if 'pgwide' in a and a['pgwide'] == '1':
 591         result.append(' width="100%"')
 592     if 'frame' in a and a['frame'] == 'none':
 593         result.append(' border="0"')
 594     result.append('>\n')
 595     convert_inner(ctx, xml, result)
 596     result.append('</table></div>')
 597     if xml.tail:
 598         result.append(xml.tail)
 599     return result
 600
 601
 602 def convert_inlinegraphic(ctx, xml):
 603     # TODO(ensonic): warn on missing fileref attr?
 604     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 605
 606
 607 def convert_itemizedlist(ctx, xml):
 608     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 609     convert_inner(ctx, xml, result)
 610     result.append('</ul></div>')
 611     if xml.tail:
 612         result.append(xml.tail)
 613     return result
 614
 615
 616 def convert_link(ctx, xml):
 617     linkend = xml.attrib['linkend']
 618     if linkend in fixxref.NoLinks:
 619         linkend = None
 620     result = []
 621     if linkend:
 622         link_text = []
 623         append_text(xml.text, link_text)
 624         convert_inner(ctx, xml, link_text)
 625         # TODO: fixxref does some weird checks in xml.text
 626         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 627     else:
 628         append_text(xml.text, result)
 629         convert_inner(ctx, xml, result)
 630     append_text(xml.tail, result)
 631     return result
 632
 633
 634 def convert_listitem(ctx, xml):
 635     result = ['<li class="listitem">']
 636     convert_inner(ctx, xml, result)
 637     result.append('</li>')
 638     # is in itemizedlist and there can be no 'text'
 639     return result
 640
 641
 642 def convert_literallayout(ctx, xml):
 643     result = ['<div class="literallayout"><p><br>\n']
 644     append_text(xml.text, result)
 645     convert_inner(ctx, xml, result)
 646     result.append('</p></div>')
 647     append_text(xml.tail, result)
 648     return result
 649
 650
 651 def convert_orderedlist(ctx, xml):
 652     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 653     convert_inner(ctx, xml, result)
 654     result.append('</ol></div>')
 655     append_text(xml.tail, result)
 656     return result
 657
 658
 659 def convert_para(ctx, xml):
 660     result = []
 661     if 'id' in xml.attrib:
 662         result.append('<a name="%s"></a>' % xml.attrib['id'])
 663     if 'role' in xml.attrib:
 664         result.append('<p class="%s">' % xml.attrib['role'])
 665     else:
 666         result.append('<p>')
 667     append_text(xml.text, result)
 668     convert_inner(ctx, xml, result)
 669     result.append('</p>')
 670     append_text(xml.tail, result)
 671     return result
 672
 673
 674 def convert_para_like(ctx, xml):
 675     result = []
 676     if 'id' in xml.attrib:
 677         result.append('<a name="%s"></a>' % xml.attrib['id'])
 678     result.append('<p class="%s">' % xml.tag)
 679     append_text(xml.text, result)
 680     convert_inner(ctx, xml, result)
 681     result.append('</p>')
 682     append_text(xml.tail, result)
 683     return result
 684
 685
 686 def convert_phrase(ctx, xml):
 687     result = ['<span']
 688     if 'role' in xml.attrib:
 689         result.append(' class="%s">' % xml.attrib['role'])
 690     else:
 691         result.append('>')
 692     append_text(xml.text, result)
 693     convert_inner(ctx, xml, result)
 694     result.append('</span>')
 695     append_text(xml.tail, result)
 696     return result
 697
 698
 699 def convert_primaryie(ctx, xml):
 700     result = ['<dt>\n']
 701     convert_inner(ctx, xml, result)
 702     result.append('\n</dt>\n<dd></dd>\n')
 703     return result
 704
 705
 706 def convert_pre(ctx, xml):
 707     result = ['<pre class="%s">\n' % xml.tag]
 708     append_text(xml.text, result)
 709     convert_inner(ctx, xml, result)
 710     result.append('</pre>')
 711     append_text(xml.tail, result)
 712     return result
 713
 714
 715 def convert_programlisting(ctx, xml):
 716     result = []
 717     if xml.attrib.get('role', '') == 'example':
 718         if xml.text:
 719             lang = xml.attrib.get('language', 'c').lower()
 720             if lang not in LEXERS:
 721                 LEXERS[lang] = get_lexer_by_name(lang)
 722             lexer = LEXERS.get(lang, None)
 723             if lexer:
 724                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 725
 726                 # we do own line-numbering
 727                 line_count = highlighted.count('\n')
 728                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 729                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 730   <tbody>
 731     <tr>
 732       <td class="listing_lines" align="right"><pre>%s</pre></td>
 733       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 734     </tr>
 735   </tbody>
 736 </table>
 737 """ % (source_lines, highlighted))
 738             else:
 739                 logging.warn('No pygments lexer for language="%s"', lang)
 740                 result.append('<pre class="programlisting">')
 741                 result.append(xml.text)
 742                 result.append('</pre>')
 743     else:
 744         result.append('<pre class="programlisting">')
 745         append_text(xml.text, result)
 746         convert_inner(ctx, xml, result)
 747         result.append('</pre>')
 748     append_text(xml.tail, result)
 749     return result
 750
 751
 752 def convert_quote(ctx, xml):
 753     result = ['<span class="quote">"<span class="quote">']
 754     append_text(xml.text, result)
 755     convert_inner(ctx, xml, result)
 756     result.append('</span>"</span>')
 757     append_text(xml.tail, result)
 758     return result
 759
 760
 761 def convert_refsect1(ctx, xml):
 762     # Add a divider between two consequitive refsect2
 763     def convert_inner(ctx, xml, result):
 764         prev = None
 765         for child in xml:
 766             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 767                 result.append('<hr>\n')
 768             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 769             prev = child
 770     return convert_sect(ctx, xml, 'h2', convert_inner)
 771
 772
 773 def convert_refsect2(ctx, xml):
 774     return convert_sect(ctx, xml, 'h3')
 775
 776
 777 def convert_refsect3(ctx, xml):
 778     return convert_sect(ctx, xml, 'h4')
 779
 780
 781 def convert_row(ctx, xml):
 782     result = ['<tr>\n']
 783     convert_inner(ctx, xml, result)
 784     result.append('</tr>\n')
 785     return result
 786
 787
 788 def convert_sect1_tag(ctx, xml):
 789     return convert_sect(ctx, xml, 'h2')
 790
 791
 792 def convert_sect2(ctx, xml):
 793     return convert_sect(ctx, xml, 'h3')
 794
 795
 796 def convert_sect3(ctx, xml):
 797     return convert_sect(ctx, xml, 'h4')
 798
 799
 800 def convert_simpara(ctx, xml):
 801     result = ['<p>']
 802     append_text(xml.text, result)
 803     result.append('</p>')
 804     append_text(xml.tail, result)
 805     return result
 806
 807
 808 def convert_span(ctx, xml):
 809     result = ['<span class="%s">' % xml.tag]
 810     append_text(xml.text, result)
 811     convert_inner(ctx, xml, result)
 812     result.append('</span>')
 813     append_text(xml.tail, result)
 814     return result
 815
 816
 817 def convert_table(ctx, xml):
 818     result = ['<div class="table">']
 819     if 'id' in xml.attrib:
 820         result.append('<a name="%s"></a>' % xml.attrib['id'])
 821     title_tag = xml.find('title')
 822     if title_tag is not None:
 823         result.append('<p class="title"><b>')
 824         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 825         result.extend(convert_title(ctx, title_tag))
 826         result.append('</b></p>')
 827         xml.remove(title_tag)
 828     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 829
 830     convert_inner(ctx, xml, result)
 831
 832     result.append('</table></div></div>')
 833     append_text(xml.tail, result)
 834     return result
 835
 836
 837 def convert_tbody(ctx, xml):
 838     result = ['<tbody>']
 839     ctx['table.entry'] = 'td'
 840     convert_inner(ctx, xml, result)
 841     result.append('</tbody>')
 842     # is in tgroup and there can be no 'text'
 843     return result
 844
 845
 846 def convert_tgroup(ctx, xml):
 847     # tgroup does not expand to anything, but the nested colspecs need to
 848     # be put into a colgroup
 849     cols = xml.findall('colspec')
 850     result = []
 851     if cols:
 852         result.append('<colgroup>\n')
 853         for col in cols:
 854             result.extend(convert_colspec(ctx, col))
 855             xml.remove(col)
 856         result.append('</colgroup>\n')
 857     convert_inner(ctx, xml, result)
 858     # is in informaltable and there can be no 'text'
 859     return result
 860
 861
 862 def convert_thead(ctx, xml):
 863     result = ['<thead>']
 864     ctx['table.entry'] = 'th'
 865     convert_inner(ctx, xml, result)
 866     result.append('</thead>')
 867     # is in tgroup and there can be no 'text'
 868     return result
 869
 870
 871 def convert_title(ctx, xml):
 872     # This is always called from some context
 873     result = []
 874     append_text(xml.text, result)
 875     convert_inner(ctx, xml, result)
 876     append_text(xml.tail, result)
 877     return result
 878
 879
 880 def convert_ulink(ctx, xml):
 881     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 882     if xml.tail:
 883         result.append(xml.tail)
 884     return result
 885
 886
 887 def convert_userinput(ctx, xml):
 888     result = ['<span class="command"><strong>']
 889     append_text(xml.text, result)
 890     convert_inner(ctx, xml, result)
 891     result.append('</strong></span>')
 892     append_text(xml.tail, result)
 893     return result
 894
 895
 896 def convert_variablelist(ctx, xml):
 897     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 898 <colgroup>
 899 <col align="left" valign="top">
 900 <col>
 901 </colgroup>
 902 <tbody>"""]
 903     convert_inner(ctx, xml, result)
 904     result.append("""</tbody>
 905 </table></div>""")
 906     return result
 907
 908
 909 def convert_varlistentry(ctx, xml):
 910     result = ['<tr>']
 911
 912     result.append('<td><p>')
 913     term = xml.find('term')
 914     result.extend(convert_span(ctx, term))
 915     result.append('</p></td>')
 916
 917     result.append('<td>')
 918     listitem = xml.find('listitem')
 919     convert_inner(ctx, listitem, result)
 920     result.append('</td>')
 921
 922     result.append('<tr>')
 923     return result
 924
 925
 926 # TODO(ensonic): turn into class with converters as functions and ctx as self
 927 convert_tags = {
 928     'abstract': convert_abstract,
 929     'acronym': convert_acronym,
 930     'anchor': convert_anchor,
 931     'application': convert_span,
 932     'bookinfo': convert_bookinfo,
 933     'blockquote': convert_blockquote,
 934     'caption': convert_div,
 935     'code': convert_code,
 936     'colspec': convert_colspec,
 937     'constant': convert_code,
 938     'command': convert_command,
 939     'corpauthor': convert_corpauthor,
 940     'emphasis': convert_emphasis,
 941     'entry': convert_entry,
 942     'envar': convert_code,
 943     'footnote': convert_footnote,
 944     'filename': convert_code,
 945     'formalpara': convert_formalpara,
 946     'function': convert_code,
 947     'glossdef': convert_glossdef,
 948     'glossdiv': convert_glossdiv,
 949     'glossentry': convert_glossentry,
 950     'glossterm': convert_glossterm,
 951     'imageobject': convert_imageobject,
 952     'indexdiv': convert_indexdiv,
 953     'indexentry': convert_ignore,
 954     'indexterm': convert_skip,
 955     'informalexample': convert_div,
 956     'informaltable': convert_informaltable,
 957     'inlinegraphic': convert_inlinegraphic,
 958     'inlinemediaobject': convert_span,
 959     'itemizedlist': convert_itemizedlist,
 960     'legalnotice': convert_div,
 961     'link': convert_link,
 962     'listitem': convert_listitem,
 963     'literal': convert_code,
 964     'literallayout': convert_literallayout,
 965     'mediaobject': convert_div,
 966     'note': convert_div,
 967     'option': convert_code,
 968     'orderedlist': convert_orderedlist,
 969     'para': convert_para,
 970     'partintro': convert_div,
 971     'parameter': convert_em_class,
 972     'phrase': convert_phrase,
 973     'primaryie': convert_primaryie,
 974     'programlisting': convert_programlisting,
 975     'quote': convert_quote,
 976     'releaseinfo': convert_para_like,
 977     'refsect1': convert_refsect1,
 978     'refsect2': convert_refsect2,
 979     'refsect3': convert_refsect3,
 980     'replaceable': convert_em_class,
 981     'returnvalue': convert_span,
 982     'row': convert_row,
 983     'screen': convert_pre,
 984     'sect1': convert_sect1_tag,
 985     'sect2': convert_sect2,
 986     'sect3': convert_sect3,
 987     'simpara': convert_simpara,
 988     'structfield': convert_em_class,
 989     'structname': convert_span,
 990     'synopsis': convert_pre,
 991     'symbol': convert_span,
 992     'table': convert_table,
 993     'tbody': convert_tbody,
 994     'term': convert_span,
 995     'tgroup': convert_tgroup,
 996     'thead': convert_thead,
 997     'type': convert_span,
 998     'ulink': convert_ulink,
 999     'userinput': convert_userinput,
1000     'varname': convert_code,
1001     'variablelist': convert_variablelist,
1002     'varlistentry': convert_varlistentry,
1003     'warning': convert_div,
1004 }
1005
1006 # conversion helpers
1007
1008 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1009 <html>
1010 <head>
1011 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1012 <title>%s</title>
1013 %s<link rel="stylesheet" href="style.css" type="text/css">
1014 </head>
1015 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1016 """
1017
1018
1019 def generate_head_links(ctx):
1020     n = ctx['nav_home']
1021     result = [
1022         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1023     ]
1024     if 'nav_up' in ctx:
1025         n = ctx['nav_up']
1026         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1027     if 'nav_prev' in ctx:
1028         n = ctx['nav_prev']
1029         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1030     if 'nav_next' in ctx:
1031         n = ctx['nav_next']
1032         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1033     return ''.join(result)
1034
1035
1036 def generate_nav_links(ctx):
1037     n = ctx['nav_home']
1038     result = [
1039         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1040     ]
1041     if 'nav_up' in ctx:
1042         n = ctx['nav_up']
1043         result.append(
1044             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1045     else:
1046         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1047     if 'nav_prev' in ctx:
1048         n = ctx['nav_prev']
1049         result.append(
1050             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1051     else:
1052         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1053     if 'nav_next' in ctx:
1054         n = ctx['nav_next']
1055         result.append(
1056             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1057     else:
1058         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1059
1060     return ''.join(result)
1061
1062
1063 def generate_toc(ctx, node):
1064     result = []
1065     for c in node.children:
1066         # TODO: urlencode the filename: urllib.parse.quote_plus()
1067         link = c.filename
1068         if c.anchor:
1069             link += c.anchor
1070         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1071             c.title_tag, link, c.title))
1072         if c.subtitle:
1073             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1074         result.append('</dt>\n')
1075         if c.children:
1076             result.append('<dd><dl>')
1077             result.extend(generate_toc(ctx, c))
1078             result.append('</dl></dd>')
1079     return result
1080
1081
1082 def generate_basic_nav(ctx):
1083     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1084   <tr valign="middle">
1085     <td width="100%%" align="left" class="shortcuts"></td>
1086     %s
1087   </tr>
1088 </table>
1089     """ % generate_nav_links(ctx)
1090
1091
1092 def generate_alpha_nav(ctx, divs, prefix, span_id):
1093     ix_nav = []
1094     for s in divs:
1095         title = xml_get_title(ctx, s)
1096         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1097
1098     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1099   <tr valign="middle">
1100     <td width="100%%" align="left" class="shortcuts">
1101       <span id="nav_%s">
1102         %s
1103       </span>
1104     </td>
1105     %s
1106   </tr>
1107 </table>
1108     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1109
1110
1111 def generate_refentry_nav(ctx, refsect1s, result):
1112     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1113   <tr valign="middle">
1114     <td width="100%" align="left" class="shortcuts">
1115       <a href="#" class="shortcut">Top</a>""")
1116
1117     for s in refsect1s:
1118         # don't list TOC sections (role="xxx_proto")
1119         if s.attrib.get('role', '').endswith("_proto"):
1120             continue
1121         # skip section without 'id' attrs
1122         if 'id' not in s.attrib:
1123             continue
1124
1125         ref_id = s.attrib['id']
1126         # skip foreign sections
1127         if '.' not in ref_id:
1128             continue
1129
1130         title = xml_get_title(ctx, s)
1131         span_id = ref_id.split('.')[1].replace('-', '_')
1132
1133         result.append("""
1134           <span id="nav_%s">
1135             <span class="dim">|</span>
1136             <a href="#%s" class="shortcut">%s</a>
1137           </span>
1138           """ % (span_id, ref_id, title))
1139     result.append("""
1140     </td>
1141     %s
1142   </tr>
1143 </table>
1144 """ % generate_nav_links(ctx))
1145
1146
1147 def generate_footer(ctx):
1148     result = []
1149     if 'footnotes' in ctx:
1150         result.append("""<div class="footnotes">\n
1151 <br><hr style="width:100; text-align:left;margin-left: 0">
1152 """)
1153         for f in ctx['footnotes']:
1154             result.extend(f)
1155         result.append('</div>\n')
1156     return result
1157
1158
1159 def get_id_path(node):
1160     """ Generate the 'id'.
1161     We need to walk up the xml-tree and check the positions for each sibling.
1162     When reaching the top of the tree we collect remaining index entries from
1163     the chunked-tree.
1164     """
1165     ix = []
1166     xml = node.xml
1167     parent = xml.getparent()
1168     while parent is not None:
1169         children = parent.getchildren()
1170         ix.insert(0, str(children.index(xml) + 1))
1171         xml = parent
1172         parent = xml.getparent()
1173     while node is not None:
1174         ix.insert(0, str(node.idx + 1))
1175         node = node.parent
1176
1177     return ix
1178
1179
1180 def get_id(node):
1181     xml = node.xml
1182     node_id = xml.attrib.get('id', None)
1183     if node_id:
1184         return node_id
1185
1186     # TODO: this is moot if nothing links to it, we could also consider to omit
1187     # the <a name="$id"></a> tag.
1188     logging.info('%d: No "id" attribute on "%s", generating one',
1189                  xml.sourceline, xml.tag)
1190     ix = get_id_path(node)
1191     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1192     return 'id-' + '.'.join(ix)
1193
1194
1195 def convert_chunk_with_toc(ctx, div_class, title_tag):
1196     node = ctx['node']
1197     result = [
1198         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1199         generate_basic_nav(ctx),
1200         '<div class="%s">' % div_class,
1201     ]
1202     title = node.xml.find('title')
1203     if title is not None:
1204         result.append("""
1205 <div class="titlepage">
1206 <%s class="title"><a name="%s"></a>%s</%s>
1207 </div>""" % (
1208             title_tag, get_id(node), title.text, title_tag))
1209         node.xml.remove(title)
1210
1211     toc = generate_toc(ctx, node)
1212     if toc:
1213         # TODO: not all docbook page types use this extra heading
1214         result.append("""<p><b>Table of Contents</b></p>
1215     <div class="toc">
1216       <dl class="toc">
1217     """)
1218         result.extend(toc)
1219         result.append("""</dl>
1220     </div>
1221     """)
1222     convert_inner(ctx, node.xml, result)
1223     result.extend(generate_footer(ctx))
1224     result.append("""</div>
1225 </body>
1226 </html>""")
1227     return result
1228
1229
1230 # docbook chunks
1231
1232
1233 def convert_book(ctx):
1234     node = ctx['node']
1235     result = [
1236         HTML_HEADER % (node.title, generate_head_links(ctx)),
1237         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1238     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1239 </table>
1240 <div class="book">
1241 """ % node.title
1242     ]
1243     bookinfo = node.xml.findall('bookinfo')[0]
1244     # we already used the title
1245     title = bookinfo.find('title')
1246     if title is not None:
1247         bookinfo.remove(title)
1248     result.extend(convert_bookinfo(ctx, bookinfo))
1249     result.append("""<div class="toc">
1250   <dl class="toc">
1251 """)
1252     result.extend(generate_toc(ctx, node.root))
1253     result.append("""</dl>
1254 </div>
1255 """)
1256     result.extend(generate_footer(ctx))
1257     result.append("""</div>
1258 </body>
1259 </html>""")
1260     return result
1261
1262
1263 def convert_chapter(ctx):
1264     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1265
1266
1267 def convert_glossary(ctx):
1268     node = ctx['node']
1269     glossdivs = node.xml.findall('glossdiv')
1270
1271     result = [
1272         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1273         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1274         """<div class="glossary">
1275 <div class="titlepage"><h%1d class="title">
1276 <a name="%s"></a>%s</h%1d>
1277 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1278     ]
1279     for i in glossdivs:
1280         result.extend(convert_glossdiv(ctx, i))
1281     result.extend(generate_footer(ctx))
1282     result.append("""</div>
1283 </body>
1284 </html>""")
1285     return result
1286
1287
1288 def convert_index(ctx):
1289     node = ctx['node']
1290     # Get all indexdivs under indexdiv
1291     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1292
1293     result = [
1294         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1295         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1296         """<div class="index">
1297 <div class="titlepage"><h%1d class="title">
1298 <a name="%s"></a>%s</h%1d>
1299 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1300     ]
1301     for i in indexdivs:
1302         result.extend(convert_indexdiv(ctx, i))
1303     result.extend(generate_footer(ctx))
1304     result.append("""</div>
1305 </body>
1306 </html>""")
1307     return result
1308
1309
1310 def convert_part(ctx):
1311     return convert_chunk_with_toc(ctx, 'part', 'h1')
1312
1313
1314 def convert_preface(ctx):
1315     node = ctx['node']
1316     result = [
1317         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1318         generate_basic_nav(ctx),
1319         '<div class="preface">'
1320     ]
1321     title = node.xml.find('title')
1322     if title is not None:
1323         result.append("""
1324 <div class="titlepage">
1325 <h2 class="title"><a name="%s"></a>%s</h2>
1326 </div>""" % (get_id(node), title.text))
1327         node.xml.remove(title)
1328     convert_inner(ctx, node.xml, result)
1329     result.extend(generate_footer(ctx))
1330     result.append("""</div>
1331 </body>
1332 </html>""")
1333     return result
1334
1335
1336 def convert_reference(ctx):
1337     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1338
1339
1340 def convert_refentry(ctx):
1341     node = ctx['node']
1342     node_id = get_id(node)
1343     refsect1s = node.xml.findall('refsect1')
1344
1345     gallery = ''
1346     refmeta = node.xml.find('refmeta')
1347     if refmeta is not None:
1348         refmiscinfo = refmeta.find('refmiscinfo')
1349         if refmiscinfo is not None:
1350             inlinegraphic = refmiscinfo.find('inlinegraphic')
1351             if inlinegraphic is not None:
1352                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1353
1354     result = [
1355         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1356     ]
1357     generate_refentry_nav(ctx, refsect1s, result)
1358     result.append("""
1359 <div class="refentry">
1360 <a name="%s"></a>
1361 <div class="refnamediv">
1362   <table width="100%%"><tr>
1363     <td valign="top">
1364       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1365       <p>%s — %s</p>
1366     </td>
1367     <td class="gallery_image" valign="top" align="right">%s</td>
1368   </tr></table>
1369 </div>
1370 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1371
1372     for s in refsect1s:
1373         result.extend(convert_refsect1(ctx, s))
1374     result.extend(generate_footer(ctx))
1375     result.append("""</div>
1376 </body>
1377 </html>""")
1378     return result
1379
1380
1381 def convert_sect1(ctx):
1382     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1383
1384
1385 # TODO(ensonic): turn into class with converters as functions and ctx as self
1386 convert_chunks = {
1387     'book': convert_book,
1388     'chapter': convert_chapter,
1389     'glossary': convert_glossary,
1390     'index': convert_index,
1391     'part': convert_part,
1392     'preface': convert_preface,
1393     'reference': convert_reference,
1394     'refentry': convert_refentry,
1395     'sect1': convert_sect1,
1396 }
1397
1398
1399 def generate_nav_nodes(files, node):
1400     nav = {
1401         'nav_home': node.root,
1402     }
1403     # nav params: up, prev, next
1404     if node.parent:
1405         nav['nav_up'] = node.parent
1406     ix = files.index(node)
1407     if ix > 0:
1408         nav['nav_prev'] = files[ix - 1]
1409     if ix < len(files) - 1:
1410         nav['nav_next'] = files[ix + 1]
1411     return nav
1412
1413
1414 def convert(out_dir, module, files, node):
1415     """Convert the docbook chunks to a html file.
1416
1417     Args:
1418       out_dir: already created output dir
1419       files: list of nodes in the tree in pre-order
1420       node: current tree node
1421     """
1422
1423     logging.info('Writing: %s', node.filename)
1424     with open(os.path.join(out_dir, node.filename), 'wt',
1425               newline='\n', encoding='utf-8') as html:
1426         ctx = {
1427             'module': module,
1428             'files': files,
1429             'node': node,
1430         }
1431         ctx.update(generate_nav_nodes(files, node))
1432
1433         if node.name in convert_chunks:
1434             for line in convert_chunks[node.name](ctx):
1435                 html.write(line)
1436         else:
1437             logging.warning('Add converter/template for "%s"', node.name)
1438
1439
1440 def create_devhelp2_toc(node):
1441     result = []
1442     for c in node.children:
1443         if c.children:
1444             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1445             result.extend(create_devhelp2_toc(c))
1446             result.append('</sub>\n')
1447         else:
1448             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1449     return result
1450
1451
1452 def create_devhelp2_condition_attribs(node):
1453     if 'condition' in node.attrib:
1454         # condition -> since, deprecated, ... (separated with '|')
1455         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1456         keywords = []
1457         for c in cond:
1458             if ':' in c:
1459                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1460             else:
1461                 # deprecated can have no description
1462                 keywords.append('{}="{}"'.format(c, ''))
1463         return ' ' + ' '.join(keywords)
1464     else:
1465         return ''
1466
1467
1468 def create_devhelp2_refsect2_keyword(node, base_link):
1469     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1470         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1471         create_devhelp2_condition_attribs(node))
1472
1473
1474 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1475     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1476         node.attrib['role'], title, base_link + name,
1477         create_devhelp2_condition_attribs(node))
1478
1479
1480 def create_devhelp2(out_dir, module, xml, files):
1481     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1482               newline='\n', encoding='utf-8') as idx:
1483         bookinfo_nodes = xml.xpath('/book/bookinfo')
1484         title = ''
1485         if bookinfo_nodes is not None:
1486             bookinfo = bookinfo_nodes[0]
1487             title = bookinfo.xpath('./title/text()')[0]
1488             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1489             # TODO: support author too (see devhelp2.xsl)
1490         # TODO: fixxref uses '--src-lang' to set the language
1491         result = [
1492             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1493 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1494   <chapters>
1495 """ % (title, module, online_url)
1496         ]
1497         # toc
1498         result.extend(create_devhelp2_toc(files[0].root))
1499         result.append("""  </chapters>
1500   <functions>
1501 """)
1502         # keywords from all refsect2 and refsect3
1503         refsect2 = etree.XPath('//refsect2[@role]')
1504         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1505         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1506         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1507         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1508         for node in files:
1509             base_link = node.filename + '#'
1510             refsect2_nodes = refsect2(node.xml)
1511             for refsect2_node in refsect2_nodes:
1512                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1513                 refsect3_nodes = refsect3_enum(refsect2_node)
1514                 for refsect3_node in refsect3_nodes:
1515                     details_node = refsect3_enum_details(refsect3_node)[0]
1516                     name = details_node.attrib['id']
1517                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1518                 refsect3_nodes = refsect3_struct(refsect2_node)
1519                 for refsect3_node in refsect3_nodes:
1520                     details_node = refsect3_struct_details(refsect3_node)[0]
1521                     name = details_node.attrib['id']
1522                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1523
1524         result.append("""  </functions>
1525 </book>
1526 """)
1527         for line in result:
1528             idx.write(line)
1529
1530
1531 def get_dirs(uninstalled):
1532     if uninstalled:
1533         # this does not work from buiddir!=srcdir
1534         gtkdocdir = os.path.split(sys.argv[0])[0]
1535         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1536             # try 'srcdir' (set from makefiles) too
1537             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1538                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1539         styledir = gtkdocdir + '/style'
1540     else:
1541         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1542         styledir = gtkdocdir
1543     return (gtkdocdir, styledir)
1544
1545
1546 def main(module, index_file, out_dir, uninstalled):
1547     tree = etree.parse(index_file)
1548     tree.xinclude()
1549
1550     (gtkdocdir, styledir) = get_dirs(uninstalled)
1551     # copy navigation images and stylesheets to html directory ...
1552     css_file = os.path.join(styledir, 'style.css')
1553     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1554         shutil.copy(f, out_dir)
1555     css_file = os.path.join(out_dir, 'style.css')
1556     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1557         css.write(HTML_FORMATTER.get_style_defs())
1558
1559     # TODO: migrate options from fixxref
1560     # TODO: do in parallel with loading the xml above.
1561     # TODO: ideally explicity specify the files we need, this will save us the
1562     # globbing and we'll load less files.
1563     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1564
1565     # We do multiple passes:
1566     # 1) recursively walk the tree and chunk it into a python tree so that we
1567     #   can generate navigation and link tags.
1568     files = chunk(tree.getroot(), module)
1569     files = [f for f in PreOrderIter(files) if f.anchor is None]
1570
1571     # 2) extract tables:
1572     # TODO: can be done in parallel
1573     # - find all 'id' attribs and add them to the link map
1574     add_id_links(files, fixxref.Links)
1575     # - build glossary dict
1576     build_glossary(files)
1577
1578     # 3) create a xxx.devhelp2 file, do this before 4), since we modify the tree
1579     create_devhelp2(out_dir, module, tree.getroot(), files)
1580
1581     # 4) iterate the tree and output files
1582     # TODO: can be done in parallel, figure out why this is not faster
1583     # from multiprocessing.pool import Pool
1584     # with Pool(4) as p:
1585     #     p.apply_async(convert, args=(out_dir, module, files))
1586     # from multiprocessing.pool import ThreadPool
1587     # with ThreadPool(4) as p:
1588     #     p.apply_async(convert, args=(out_dir, module, files))
1589     for node in files:
1590         convert(out_dir, module, files, node)
1591
1592
1593 def run(options):
1594     logging.info('options: %s', str(options.__dict__))
1595     module = options.args[0]
1596     document = options.args[1]
1597
1598     # TODO: rename to 'html' later on
1599     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1600     #   outputs into the working directory
1601     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1602     try:
1603         os.mkdir(out_dir)
1604     except OSError as e:
1605         if e.errno != errno.EEXIST:
1606             raise
1607
1608     sys.exit(main(module, document, out_dir, options.uninstalled))