gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - 'section'/'simplesect' - the first we convert as a chunk, the nested ones we
  40     need to convert as 'sect{2,3,4,...}, we can track depth in 'ctx'
  41   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  42   - inside 'glossentry' we're only handling 'glossterm' and 'glossdef'
  43   - convert_{figure,table} need counters.
  44 - check each docbook tag if it can contain #PCDATA, if not don't check for
  45   xml.text/xml.tail and add a comment (# no PCDATA allowed here)
  46 - consider some perf-warnings flag
  47   - see 'No "id" attribute on'
  48 - find a better way to print context for warnings
  49   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  50 - consolidate title handling:
  51   - always use the titles-dict
  52   - there only store what we have (xml, tag, ...)
  53   - when chunking generate 'id's and add entries to titles-dict
  54   - add accessors for title and raw_title that lazily get them
  55
  56 DIFFERENCES:
  57 - titles
  58   - we add the chunk label to the title in toc, on the page and in nav tooltips
  59   - docbook xsl only sometimes adds the label to the titles and when it does it
  60     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  61 - navigation
  62   - we always add an up-link except on the first page
  63 - footer
  64   - we're nov omitting the footer
  65 - tocs
  66   - we always add "Table of Contents' before a toc
  67   - docbook does that for some pages, it is configurable
  68
  69 OPTIONAL:
  70 - minify html: https://pypi.python.org/pypi/htmlmin/
  71
  72 Requirements:
  73 sudo pip3 install anytree lxml pygments
  74
  75 Example invocation:
  76 cd tests/bugs/docs/
  77 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  78 xdg-open db2html/index.html
  79 meld html db2html
  80
  81 Benchmarking:
  82 cd tests/bugs/docs/;
  83 rm html-build.stamp; time make html-build.stamp
  84 """
  85
  86 import argparse
  87 import errno
  88 import logging
  89 import os
  90 import shutil
  91 import sys
  92
  93 from anytree import Node, PreOrderIter
  94 from copy import deepcopy
  95 from glob import glob
  96 from lxml import etree
  97 from pygments import highlight
  98 from pygments.lexers import CLexer
  99 from pygments.formatters import HtmlFormatter
 100 from timeit import default_timer as timer
 101
 102 from . import config, fixxref
 103
 104 # pygments setup
 105 # lazily constructed lexer cache
 106 LEXERS = {
 107     'c': CLexer()
 108 }
 109 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 110
 111
 112 class ChunkParams(object):
 113     def __init__(self, prefix, parent=None, min_idx=0):
 114         self.prefix = prefix
 115         self.parent = parent
 116         self.min_idx = min_idx
 117         self.idx = 1
 118
 119
 120 DONT_CHUNK = float('inf')
 121 # docbook-xsl defines the chunk tags here.
 122 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 123 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 124 # If not defined, we can just create an example without an 'id' attr and see
 125 # docbook xsl does.
 126 #
 127 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 128 # TODO: this list has also a flag that controls wheter we add the
 129 # 'Table of Contents' heading in convert_chunk_with_toc()
 130 CHUNK_PARAMS = {
 131     'appendix': ChunkParams('app', 'book'),
 132     'book': ChunkParams('bk'),
 133     'chapter': ChunkParams('ch', 'book'),
 134     'glossary': ChunkParams('go', 'book'),
 135     'index': ChunkParams('ix', 'book'),
 136     'part': ChunkParams('pt', 'book'),
 137     'preface': ChunkParams('pr', 'book'),
 138     'refentry': ChunkParams('re', 'book'),
 139     'reference': ChunkParams('rn', 'book'),
 140     'sect1': ChunkParams('s', 'chapter', 1),
 141     'section': ChunkParams('s', 'chapter', 1),
 142     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 143     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 144     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 145     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 146 }
 147 # TAGS we don't support:
 148 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 149
 150 TITLE_XPATHS = {
 151     '_': (etree.XPath('./title'), None),
 152     'book': (etree.XPath('./bookinfo/title'), None),
 153     'refentry': (
 154         etree.XPath('./refmeta/refentrytitle'),
 155         etree.XPath('./refnamediv/refpurpose')
 156     ),
 157 }
 158
 159 ID_XPATH = etree.XPath('//*[@id]')
 160
 161 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 162 glossary = {}
 163
 164 footnote_idx = 1
 165
 166 # nested dict with subkeys:
 167 # title: textual title
 168 # tag: chunk tag
 169 # xml: title xml node
 170 titles = {}
 171
 172 # files to copy
 173 assets = set()
 174
 175
 176 def encode_entities(text):
 177     return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 178
 179
 180 def raw_text(xml):
 181     return etree.tostring(xml, method="text", encoding=str).strip()
 182
 183
 184 def gen_chunk_name(node, chunk_params):
 185     """Generate a chunk file name
 186
 187     This is either based on the id or on the position in the doc. In the latter
 188     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 189     type.
 190     """
 191     idval = node.attrib.get('id')
 192     if idval is not None:
 193         return idval
 194
 195     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 196     chunk_params.idx += 1
 197
 198     # handle parents to make names of nested tags like in docbook
 199     # - we only need to prepend the parent if there are > 1 of them in the
 200     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 201     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 202     #   when we chunk explicitly and on each level maintain the 'idx'
 203     # while chunk_params.parent:
 204     #     parent = chunk_params.parent
 205     #     if parent not in CHUNK_PARAMS:
 206     #         break;
 207     #     chunk_params = CHUNK_PARAMS[parent]
 208     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 209
 210     logging.info('Gen chunk name: "%s"', name)
 211     return name
 212
 213
 214 def get_chunk_titles(module, node):
 215     tag = node.tag
 216     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 217
 218     ctx = {
 219         'module': module,
 220         'files': [],
 221     }
 222     result = {
 223         'title': None,
 224         'title_tag': None,
 225         'subtitle': None,
 226         'subtitle_tag': None
 227     }
 228     res = title(node)
 229     if res:
 230         # handle chunk label for tocs
 231         label = node.attrib.get('label')
 232         if label:
 233             label += '. '
 234         else:
 235             label = ''
 236
 237         xml = res[0]
 238         # TODO: consider to eval 'title'/'raw_title' lazily
 239         result['title'] = label + ''.join(convert_title(ctx, xml))
 240         result['raw_title'] = encode_entities(raw_text(xml))
 241         if xml.tag != 'title':
 242             result['title_tag'] = xml.tag
 243         else:
 244             result['title_tag'] = tag
 245
 246     if subtitle:
 247         res = subtitle(node)
 248         if res:
 249             xml = res[0]
 250             result['subtitle'] = ''.join(convert_title(ctx, xml))
 251             result['subtitle_tag'] = xml.tag
 252     return result
 253
 254
 255 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 256     """Chunk the tree.
 257
 258     The first time, we're called with parent=None and in that case we return
 259     the new_node as the root of the tree. For each tree-node we generate a
 260     filename and process the children.
 261     """
 262     tag = xml_node.tag
 263     chunk_params = CHUNK_PARAMS.get(tag)
 264     if chunk_params:
 265         title_args = get_chunk_titles(module, xml_node)
 266         chunk_name = gen_chunk_name(xml_node, chunk_params)
 267
 268         # check idx to handle 'sect1'/'section' special casing and title-only
 269         # segments
 270         if idx >= chunk_params.min_idx:
 271             logging.info('chunk tag: "%s"[%d]', tag, idx)
 272             if parent:
 273                 # remove the xml-node from the parent
 274                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 275                 xml_node.getparent().remove(xml_node)
 276                 xml_node = sub_tree
 277
 278             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 279                           idx=idx,
 280                           filename=chunk_name + '.html', anchor=None,
 281                           **title_args)
 282         else:
 283             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 284                           idx=idx,
 285                           filename=parent.filename, anchor='#' + chunk_name,
 286                           **title_args)
 287
 288         depth += 1
 289         idx = 0
 290         for child in xml_node:
 291             chunk(child, module, depth, idx, parent)
 292             if child.tag in CHUNK_PARAMS:
 293                 idx += 1
 294
 295     return parent
 296
 297
 298 def add_id_links_and_titles(files, links):
 299     for node in files:
 300         chunk_name = node.filename[:-5]
 301         chunk_base = node.filename + '#'
 302         for elem in ID_XPATH(node.xml):
 303             attr = elem.attrib['id']
 304             if attr == chunk_name:
 305                 links[attr] = node.filename
 306             else:
 307                 links[attr] = chunk_base + attr
 308
 309             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 310             res = title(elem)
 311             if res:
 312                 xml = res[0]
 313                 # TODO: consider to eval 'title' lazily
 314                 titles[attr] = {
 315                     'title': encode_entities(raw_text(xml)),
 316                     'xml': xml,
 317                     'tag': elem.tag,
 318                 }
 319
 320
 321 def build_glossary(files):
 322     for node in files:
 323         if node.xml.tag != 'glossary':
 324             continue
 325         for term in GLOSSENTRY_XPATH(node.xml):
 326             # TODO: there can be all kind of things in a glossary. This only supports
 327             # what we commonly use, glossterm is mandatory
 328             key_node = term.find('glossterm')
 329             val_node = term.find('glossdef')
 330             if key_node is not None and val_node is not None:
 331                 glossary[raw_text(key_node)] = raw_text(val_node)
 332             else:
 333                 debug = []
 334                 if key_node is None:
 335                     debug.append('missing key')
 336                 if val_node is None:
 337                     debug.append('missing val')
 338                 logging.warning('Broken glossentry "%s": %s',
 339                                 term.attrib['id'], ','.join(debug))
 340
 341
 342 # conversion helpers
 343
 344
 345 def convert_inner(ctx, xml, result):
 346     for child in xml:
 347         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 348
 349
 350 def convert_ignore(ctx, xml):
 351     result = []
 352     convert_inner(ctx, xml, result)
 353     return result
 354
 355
 356 def convert_skip(ctx, xml):
 357     return []
 358
 359
 360 def append_idref(attrib, result):
 361     idval = attrib.get('id')
 362     if idval is not None:
 363         result.append('<a name="%s"></a>' % idval)
 364
 365
 366 def append_text(ctx, text, result):
 367     if text and ('no-strip' in ctx or text.strip()):
 368         result.append(encode_entities(text))
 369
 370
 371 missing_tags = {}
 372
 373
 374 def convert__unknown(ctx, xml):
 375     # don't recurse on subchunks
 376     if xml.tag in CHUNK_PARAMS:
 377         return []
 378     if isinstance(xml, etree._Comment):
 379         return ['<!-- ' + xml.text + '-->\n']
 380     else:
 381         # warn only once
 382         if xml.tag not in missing_tags:
 383             logging.warning('Add tag converter for "%s"', xml.tag)
 384             missing_tags[xml.tag] = True
 385         result = ['<!-- ' + xml.tag + '-->\n']
 386         convert_inner(ctx, xml, result)
 387         result.append('<!-- /' + xml.tag + '-->\n')
 388         return result
 389
 390
 391 def convert_mediaobject_children(ctx, xml, result):
 392     # look for textobject/phrase
 393     alt_text = ''
 394     textobject = xml.find('textobject')
 395     if textobject is not None:
 396         phrase = textobject.findtext('phrase')
 397         if phrase:
 398             alt_text = ' alt="%s"' % phrase
 399
 400     # look for imageobject/imagedata
 401     imageobject = xml.find('imageobject')
 402     if imageobject is not None:
 403         imagedata = imageobject.find('imagedata')
 404         if imagedata is not None:
 405             # TODO(ensonic): warn on missing fileref attr?
 406             fileref = imagedata.attrib.get('fileref', '')
 407             if fileref:
 408                 assets.add(fileref)
 409             result.append('<img src="%s"%s>' % (fileref, alt_text))
 410
 411
 412 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 413     result = ['<div class="%s">\n' % xml.tag]
 414     title_tag = xml.find('title')
 415     if title_tag is not None:
 416         append_idref(xml.attrib, result)
 417         result.append('<%s>%s</%s>' % (
 418             h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
 419     append_text(ctx, xml.text, result)
 420     inner_func(ctx, xml, result)
 421     result.append('</div>')
 422     append_text(ctx, xml.tail, result)
 423     return result
 424
 425
 426 def xml_get_title(ctx, xml):
 427     title_tag = xml.find('title')
 428     if title_tag is not None:
 429         return ''.join(convert_title(ctx, title_tag))
 430     else:
 431         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 432         return ''
 433
 434
 435 # docbook tags
 436
 437
 438 def convert_abstract(ctx, xml):
 439     result = ["""<div class="abstract">
 440     <p class="title"><b>Abstract</b></p>"""]
 441     append_text(ctx, xml.text, result)
 442     convert_inner(ctx, xml, result)
 443     result.append('</div>')
 444     append_text(ctx, xml.tail, result)
 445     return result
 446
 447
 448 def convert_acronym(ctx, xml):
 449     key = xml.text
 450     title = glossary.get(key, '')
 451     # TODO: print a sensible warning if missing
 452     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 453     if xml.tail:
 454         result.append(xml.tail)
 455     return result
 456
 457
 458 def convert_anchor(ctx, xml):
 459     return ['<a name="%s"></a>' % xml.attrib['id']]
 460
 461
 462 def convert_bookinfo(ctx, xml):
 463     result = ['<div class="titlepage">']
 464     convert_inner(ctx, xml, result)
 465     result.append("""<hr>
 466 </div>""")
 467     if xml.tail:
 468         result.append(xml.tail)
 469     return result
 470
 471
 472 def convert_blockquote(ctx, xml):
 473     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 474     append_text(ctx, xml.text, result)
 475     convert_inner(ctx, xml, result)
 476     result.append('</blockquote>\n</div>')
 477     append_text(ctx, xml.tail, result)
 478     return result
 479
 480
 481 def convert_code(ctx, xml):
 482     result = ['<code class="%s">' % xml.tag]
 483     append_text(ctx, xml.text, result)
 484     convert_inner(ctx, xml, result)
 485     result.append('</code>')
 486     append_text(ctx, xml.tail, result)
 487     return result
 488
 489
 490 def convert_colspec(ctx, xml):
 491     result = ['<col']
 492     colname = xml.attrib.get('colname')
 493     if colname is not None:
 494         result.append(' class="%s"' % colname)
 495     colwidth = xml.attrib.get('colwidth')
 496     if colwidth is not None:
 497         result.append(' width="%s"' % colwidth)
 498     result.append('>\n')
 499     # is in tgroup and there can be no 'text'
 500     return result
 501
 502
 503 def convert_command(ctx, xml):
 504     result = ['<strong class="userinput"><code>']
 505     append_text(ctx, xml.text, result)
 506     convert_inner(ctx, xml, result)
 507     result.append('</code></strong>')
 508     append_text(ctx, xml.tail, result)
 509     return result
 510
 511
 512 def convert_corpauthor(ctx, xml):
 513     result = ['<div><h3 class="corpauthor">\n']
 514     append_text(ctx, xml.text, result)
 515     convert_inner(ctx, xml, result)
 516     result.append('</h3></div>\n')
 517     append_text(ctx, xml.tail, result)
 518     return result
 519
 520
 521 def convert_div(ctx, xml):
 522     result = ['<div class="%s">\n' % xml.tag]
 523     append_text(ctx, xml.text, result)
 524     convert_inner(ctx, xml, result)
 525     result.append('</div>')
 526     append_text(ctx, xml.tail, result)
 527     return result
 528
 529
 530 def convert_emphasis(ctx, xml):
 531     role = xml.attrib.get('role')
 532     if role is not None:
 533         result = ['<span class="%s">' % role]
 534         end = '</span>'
 535     else:
 536         result = ['<span class="emphasis"><em>']
 537         end = '</em></span>'
 538     append_text(ctx, xml.text, result)
 539     convert_inner(ctx, xml, result)
 540     result.append(end)
 541     append_text(ctx, xml.tail, result)
 542     return result
 543
 544
 545 def convert_em(ctx, xml):
 546     result = ['<em class="%s">' % xml.tag]
 547     append_text(ctx, xml.text, result)
 548     convert_inner(ctx, xml, result)
 549     result.append('</em>')
 550     append_text(ctx, xml.tail, result)
 551     return result
 552
 553
 554 def convert_em_code(ctx, xml):
 555     result = ['<em class="%s"><code>' % xml.tag]
 556     append_idref(xml.attrib, result)
 557     append_text(ctx, xml.text, result)
 558     convert_inner(ctx, xml, result)
 559     result.append('</code></em>')
 560     append_text(ctx, xml.tail, result)
 561     return result
 562
 563
 564 def convert_entry(ctx, xml):
 565     entry_type = ctx['table.entry']
 566     result = ['<' + entry_type]
 567     role = xml.attrib.get('role')
 568     if role is not None:
 569         result.append(' class="%s"' % role)
 570     morerows = xml.attrib.get('morerows')
 571     if morerows is not None:
 572         result.append(' rowspan="%s"' % (1 + int(morerows)))
 573     result.append('>')
 574     append_text(ctx, xml.text, result)
 575     convert_inner(ctx, xml, result)
 576     result.append('</' + entry_type + '>')
 577     append_text(ctx, xml.tail, result)
 578     return result
 579
 580
 581 def convert_figure(ctx, xml):
 582     result = ['<div class="figure">\n']
 583     append_idref(xml.attrib, result)
 584     title_tag = xml.find('title')
 585     if title_tag is not None:
 586         # TODO(ensonic): Add a 'Figure X. ' prefix, needs a figure counter
 587         result.append('<p><b>%s</b></p>' % ''.join(convert_title(ctx, title_tag)))
 588     result.append('<div class="figure-contents">')
 589     # TODO(ensonic): title can become alt on inner 'graphic' element
 590     convert_inner(ctx, xml, result)
 591     result.append('</div></div><br class="figure-break"/>')
 592     append_text(ctx, xml.tail, result)
 593     return result
 594
 595
 596 def convert_footnote(ctx, xml):
 597     footnotes = ctx.get('footnotes', [])
 598     # footnotes idx is not per page, but per doc
 599     global footnote_idx
 600     idx = footnote_idx
 601     footnote_idx += 1
 602
 603     # need a pair of ids for each footnote (docbook generates different ids)
 604     this_id = 'footnote-%d' % idx
 605     that_id = 'ftn.' + this_id
 606
 607     inner = ['<div id="%s" class="footnote">' % that_id]
 608     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 609         this_id, idx))
 610     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 611     # get double nested paras :/.
 612     # convert_inner(ctx, xml, inner)
 613     para = xml.find('para')
 614     if para is None:
 615         para = xml.find('simpara')
 616     if para is not None:
 617         inner.append(para.text)
 618     else:
 619         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline, raw_text(xml))
 620     inner.append('</p></div>')
 621     footnotes.append(inner)
 622     ctx['footnotes'] = footnotes
 623     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 624         that_id, this_id, idx)]
 625
 626
 627 def convert_formalpara(ctx, xml):
 628     result = None
 629     title_tag = xml.find('title')
 630     result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
 631     para_tag = xml.find('para')
 632     append_text(ctx, para_tag.text, result)
 633     convert_inner(ctx, para_tag, result)
 634     append_text(ctx, para_tag.tail, result)
 635     result.append('</p>')
 636     append_text(ctx, xml.tail, result)
 637     return result
 638
 639
 640 def convert_glossdef(ctx, xml):
 641     result = ['<dd class="glossdef">']
 642     convert_inner(ctx, xml, result)
 643     result.append('</dd>\n')
 644     return result
 645
 646
 647 def convert_glossdiv(ctx, xml):
 648     title_tag = xml.find('title')
 649     title = title_tag.text
 650     xml.remove(title_tag)
 651     result = [
 652         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 653     ]
 654     convert_inner(ctx, xml, result)
 655     return result
 656
 657
 658 def convert_glossentry(ctx, xml):
 659     result = []
 660     convert_inner(ctx, xml, result)
 661     return result
 662
 663
 664 def convert_glossterm(ctx, xml):
 665     glossid = ''
 666     text = ''
 667     anchor = xml.find('anchor')
 668     if anchor is not None:
 669         glossid = anchor.attrib.get('id', '')
 670         text += anchor.tail or ''
 671     text += xml.text or ''
 672     if glossid == '':
 673         glossid = 'glossterm-' + text
 674     return [
 675         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 676             glossid, text)
 677     ]
 678
 679
 680 def convert_graphic(ctx, xml):
 681     # TODO(ensonic): warn on missing fileref attr?
 682     fileref = xml.attrib.get('fileref', '')
 683     if fileref:
 684         assets.add(fileref)
 685     return ['<div><img src="%s"></div>' % fileref]
 686
 687
 688 def convert_indexdiv(ctx, xml):
 689     title_tag = xml.find('title')
 690     title = title_tag.text
 691     xml.remove(title_tag)
 692     result = [
 693         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 694     ]
 695     convert_inner(ctx, xml, result)
 696     return result
 697
 698
 699 def convert_informaltable(ctx, xml):
 700     result = ['<div class="informaltable"><table class="informaltable"']
 701     if xml.attrib.get('pgwide') == '1':
 702         result.append(' width="100%"')
 703     if xml.attrib.get('frame') == 'none':
 704         result.append(' border="0"')
 705     result.append('>\n')
 706     convert_inner(ctx, xml, result)
 707     result.append('</table></div>')
 708     if xml.tail:
 709         result.append(xml.tail)
 710     return result
 711
 712
 713 def convert_inlinegraphic(ctx, xml):
 714     # TODO(ensonic): warn on missing fileref attr?
 715     fileref = xml.attrib.get('fileref', '')
 716     if fileref:
 717         assets.add(fileref)
 718     return ['<img src="%s">' % fileref]
 719
 720
 721 def convert_inlinemediaobject(ctx, xml):
 722     result = ['<span class="inlinemediaobject">']
 723     # no PCDATA allowed here
 724     convert_mediaobject_children(ctx, xml, result)
 725     result.append('</span>')
 726     append_text(ctx, xml.tail, result)
 727     return result
 728
 729
 730 def convert_itemizedlist(ctx, xml):
 731     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 732     convert_inner(ctx, xml, result)
 733     result.append('</ul></div>')
 734     if xml.tail:
 735         result.append(xml.tail)
 736     return result
 737
 738
 739 def convert_link(ctx, xml):
 740     linkend = xml.attrib['linkend']
 741     result = []
 742     if linkend:
 743         link_text = []
 744         append_text(ctx, xml.text, link_text)
 745         convert_inner(ctx, xml, link_text)
 746         text = ''.join(link_text)
 747
 748         (tid, href) = fixxref.GetXRef(linkend)
 749         if href:
 750             title_attr = ''
 751             title = titles.get(tid)
 752             if title:
 753                 title_attr = ' title="%s"' % title['title']
 754
 755             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 756             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 757         else:
 758             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 759             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 760             result = [text]
 761     else:
 762         append_text(ctx, xml.text, result)
 763         convert_inner(ctx, xml, result)
 764     append_text(ctx, xml.tail, result)
 765     return result
 766
 767
 768 def convert_listitem(ctx, xml):
 769     result = ['<li class="listitem">']
 770     convert_inner(ctx, xml, result)
 771     result.append('</li>')
 772     # no PCDATA allowed here, is in itemizedlist
 773     return result
 774
 775
 776 def convert_literallayout(ctx, xml):
 777     result = ['<div class="literallayout"><p><br>\n']
 778     append_text(ctx, xml.text, result)
 779     convert_inner(ctx, xml, result)
 780     result.append('</p></div>')
 781     append_text(ctx, xml.tail, result)
 782     return result
 783
 784
 785 def convert_mediaobject(ctx, xml):
 786     result = ['<div class="mediaobject">\n']
 787     # no PCDATA allowed here
 788     convert_mediaobject_children(ctx, xml, result)
 789     result.append('</div>')
 790     append_text(ctx, xml.tail, result)
 791     return result
 792
 793
 794 def convert_orderedlist(ctx, xml):
 795     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 796     convert_inner(ctx, xml, result)
 797     result.append('</ol></div>')
 798     append_text(ctx, xml.tail, result)
 799     return result
 800
 801
 802 def convert_para(ctx, xml):
 803     result = []
 804     role = xml.attrib.get('role')
 805     if role is not None:
 806         result.append('<p class="%s">' % role)
 807     else:
 808         result.append('<p>')
 809     append_idref(xml.attrib, result)
 810     append_text(ctx, xml.text, result)
 811     convert_inner(ctx, xml, result)
 812     result.append('</p>')
 813     append_text(ctx, xml.tail, result)
 814     return result
 815
 816
 817 def convert_para_like(ctx, xml):
 818     result = []
 819     append_idref(xml.attrib, result)
 820     result.append('<p class="%s">' % xml.tag)
 821     append_text(ctx, xml.text, result)
 822     convert_inner(ctx, xml, result)
 823     result.append('</p>')
 824     append_text(ctx, xml.tail, result)
 825     return result
 826
 827
 828 def convert_phrase(ctx, xml):
 829     result = ['<span']
 830     role = xml.attrib.get('role')
 831     if role is not None:
 832         result.append(' class="%s">' % role)
 833     else:
 834         result.append('>')
 835     append_text(ctx, xml.text, result)
 836     convert_inner(ctx, xml, result)
 837     result.append('</span>')
 838     append_text(ctx, xml.tail, result)
 839     return result
 840
 841
 842 def convert_primaryie(ctx, xml):
 843     result = ['<dt>\n']
 844     convert_inner(ctx, xml, result)
 845     result.append('\n</dt>\n<dd></dd>\n')
 846     return result
 847
 848
 849 def convert_pre(ctx, xml):
 850     # Since we're inside <pre> don't skip newlines
 851     ctx['no-strip'] = True
 852     result = ['<pre class="%s">' % xml.tag]
 853     append_text(ctx, xml.text, result)
 854     convert_inner(ctx, xml, result)
 855     result.append('</pre>')
 856     del ctx['no-strip']
 857     append_text(ctx, xml.tail, result)
 858     return result
 859
 860
 861 def convert_programlisting(ctx, xml):
 862     result = []
 863     if xml.attrib.get('role', '') == 'example':
 864         if xml.text:
 865             lang = xml.attrib.get('language', ctx['src-lang']).lower()
 866             if lang not in LEXERS:
 867                 LEXERS[lang] = get_lexer_by_name(lang)
 868             lexer = LEXERS.get(lang, None)
 869             if lexer:
 870                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 871
 872                 # we do own line-numbering
 873                 line_count = highlighted.count('\n')
 874                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 875                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 876   <tbody>
 877     <tr>
 878       <td class="listing_lines" align="right"><pre>%s</pre></td>
 879       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 880     </tr>
 881   </tbody>
 882 </table>
 883 """ % (source_lines, highlighted))
 884             else:
 885                 logging.warn('No pygments lexer for language="%s"', lang)
 886                 result.append('<pre class="programlisting">')
 887                 result.append(xml.text)
 888                 result.append('</pre>')
 889     else:
 890         result.append('<pre class="programlisting">')
 891         append_text(ctx, xml.text, result)
 892         convert_inner(ctx, xml, result)
 893         result.append('</pre>')
 894     append_text(ctx, xml.tail, result)
 895     return result
 896
 897
 898 def convert_quote(ctx, xml):
 899     result = ['<span class="quote">"<span class="quote">']
 900     append_text(ctx, xml.text, result)
 901     convert_inner(ctx, xml, result)
 902     result.append('</span>"</span>')
 903     append_text(ctx, xml.tail, result)
 904     return result
 905
 906
 907 def convert_refsect1(ctx, xml):
 908     # Add a divider between two consequitive refsect2
 909     def convert_inner(ctx, xml, result):
 910         prev = None
 911         for child in xml:
 912             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 913                 result.append('<hr>\n')
 914             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 915             prev = child
 916     return convert_sect(ctx, xml, 'h2', convert_inner)
 917
 918
 919 def convert_refsect2(ctx, xml):
 920     return convert_sect(ctx, xml, 'h3')
 921
 922
 923 def convert_refsect3(ctx, xml):
 924     return convert_sect(ctx, xml, 'h4')
 925
 926
 927 def convert_row(ctx, xml):
 928     result = ['<tr>\n']
 929     convert_inner(ctx, xml, result)
 930     result.append('</tr>\n')
 931     return result
 932
 933
 934 def convert_sbr(ctx, xml):
 935     return ['<br>']
 936
 937
 938 def convert_sect1_tag(ctx, xml):
 939     return convert_sect(ctx, xml, 'h2')
 940
 941
 942 def convert_sect2(ctx, xml):
 943     return convert_sect(ctx, xml, 'h3')
 944
 945
 946 def convert_sect3(ctx, xml):
 947     return convert_sect(ctx, xml, 'h4')
 948
 949
 950 def convert_simpara(ctx, xml):
 951     result = ['<p>']
 952     append_text(ctx, xml.text, result)
 953     convert_inner(ctx, xml, result)
 954     result.append('</p>')
 955     append_text(ctx, xml.tail, result)
 956     return result
 957
 958
 959 def convert_span(ctx, xml):
 960     result = ['<span class="%s">' % xml.tag]
 961     append_text(ctx, xml.text, result)
 962     convert_inner(ctx, xml, result)
 963     result.append('</span>')
 964     append_text(ctx, xml.tail, result)
 965     return result
 966
 967
 968 def convert_table(ctx, xml):
 969     result = ['<div class="table">']
 970     append_idref(xml.attrib, result)
 971     title_tag = xml.find('title')
 972     if title_tag is not None:
 973         result.append('<p class="title"><b>')
 974         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 975         result.extend(convert_title(ctx, title_tag))
 976         result.append('</b></p>')
 977     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 978
 979     convert_inner(ctx, xml, result)
 980
 981     result.append('</table></div></div>')
 982     append_text(ctx, xml.tail, result)
 983     return result
 984
 985
 986 def convert_tag(ctx, xml):
 987     classval = xml.attrib.get('class')
 988     if classval is not None:
 989         result = ['<code class="sgmltag-%s">' % classval]
 990     else:
 991         result = ['<code>']
 992     append_text(ctx, xml.text, result)
 993     result.append('</code>')
 994     append_text(ctx, xml.tail, result)
 995     return result
 996
 997
 998 def convert_tbody(ctx, xml):
 999     result = ['<tbody>']
1000     ctx['table.entry'] = 'td'
1001     convert_inner(ctx, xml, result)
1002     result.append('</tbody>')
1003     # is in tgroup and there can be no 'text'
1004     return result
1005
1006
1007 def convert_tgroup(ctx, xml):
1008     # tgroup does not expand to anything, but the nested colspecs need to
1009     # be put into a colgroup
1010     cols = xml.findall('colspec')
1011     result = []
1012     if cols:
1013         result.append('<colgroup>\n')
1014         for col in cols:
1015             result.extend(convert_colspec(ctx, col))
1016             xml.remove(col)
1017         result.append('</colgroup>\n')
1018     convert_inner(ctx, xml, result)
1019     # is in informaltable and there can be no 'text'
1020     return result
1021
1022
1023 def convert_thead(ctx, xml):
1024     result = ['<thead>']
1025     ctx['table.entry'] = 'th'
1026     convert_inner(ctx, xml, result)
1027     result.append('</thead>')
1028     # is in tgroup and there can be no 'text'
1029     return result
1030
1031
1032 def convert_title(ctx, xml):
1033     # This is always explicitly called from some context
1034     result = []
1035     append_text(ctx, xml.text, result)
1036     convert_inner(ctx, xml, result)
1037     append_text(ctx, xml.tail, result)
1038     return result
1039
1040
1041 def convert_ulink(ctx, xml):
1042     if xml.text:
1043         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
1044     else:
1045         url = xml.attrib['url']
1046         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
1047     append_text(ctx, xml.tail, result)
1048     return result
1049
1050
1051 def convert_userinput(ctx, xml):
1052     result = ['<span class="command"><strong>']
1053     append_text(ctx, xml.text, result)
1054     convert_inner(ctx, xml, result)
1055     result.append('</strong></span>')
1056     append_text(ctx, xml.tail, result)
1057     return result
1058
1059
1060 def convert_variablelist(ctx, xml):
1061     result = ["""<div class="variablelist"><table border="0" class="variablelist">
1062 <colgroup>
1063 <col align="left" valign="top">
1064 <col>
1065 </colgroup>
1066 <tbody>"""]
1067     convert_inner(ctx, xml, result)
1068     result.append("""</tbody>
1069 </table></div>""")
1070     return result
1071
1072
1073 def convert_varlistentry(ctx, xml):
1074     result = ['<tr>']
1075
1076     result.append('<td><p>')
1077     term = xml.find('term')
1078     result.extend(convert_span(ctx, term))
1079     result.append('</p></td>')
1080
1081     result.append('<td>')
1082     listitem = xml.find('listitem')
1083     convert_inner(ctx, listitem, result)
1084     result.append('</td>')
1085
1086     result.append('<tr>')
1087     return result
1088
1089
1090 def convert_xref(ctx, xml):
1091     result = []
1092     linkend = xml.attrib['linkend']
1093     (tid, href) = fixxref.GetXRef(linkend)
1094     try:
1095         title = titles[tid]
1096         # all sectN need to become 'section
1097         tag = title['tag']
1098         tag = {
1099             'sect1': 'section',
1100             'sect2': 'section',
1101             'sect3': 'section',
1102             'sect4': 'section',
1103             'sect5': 'section',
1104         }.get(tag, tag)
1105         result = [
1106             '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1107             (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1108         ]
1109     except KeyError:
1110         logging.warning('invalid linkend "%s"', tid)
1111
1112     append_text(ctx, xml.tail, result)
1113     return result
1114
1115
1116 # TODO(ensonic): turn into class with converters as functions and ctx as self
1117 convert_tags = {
1118     'abstract': convert_abstract,
1119     'acronym': convert_acronym,
1120     'anchor': convert_anchor,
1121     'application': convert_span,
1122     'bookinfo': convert_bookinfo,
1123     'blockquote': convert_blockquote,
1124     'classname': convert_code,
1125     'caption': convert_div,
1126     'code': convert_code,
1127     'colspec': convert_colspec,
1128     'constant': convert_code,
1129     'command': convert_command,
1130     'corpauthor': convert_corpauthor,
1131     'emphasis': convert_emphasis,
1132     'entry': convert_entry,
1133     'envar': convert_code,
1134     'footnote': convert_footnote,
1135     'figure': convert_figure,
1136     'filename': convert_code,
1137     'firstterm': convert_em,
1138     'formalpara': convert_formalpara,
1139     'function': convert_code,
1140     'glossdef': convert_glossdef,
1141     'glossdiv': convert_glossdiv,
1142     'glossentry': convert_glossentry,
1143     'glossterm': convert_glossterm,
1144     'graphic': convert_graphic,
1145     'indexdiv': convert_indexdiv,
1146     'indexentry': convert_ignore,
1147     'indexterm': convert_skip,
1148     'informalexample': convert_div,
1149     'informaltable': convert_informaltable,
1150     'inlinegraphic': convert_inlinegraphic,
1151     'inlinemediaobject': convert_inlinemediaobject,
1152     'interfacename': convert_code,
1153     'itemizedlist': convert_itemizedlist,
1154     'legalnotice': convert_div,
1155     'link': convert_link,
1156     'listitem': convert_listitem,
1157     'literal': convert_code,
1158     'literallayout': convert_literallayout,
1159     'mediaobject': convert_mediaobject,
1160     'note': convert_div,
1161     'option': convert_code,
1162     'orderedlist': convert_orderedlist,
1163     'para': convert_para,
1164     'partintro': convert_div,
1165     'parameter': convert_em_code,
1166     'phrase': convert_phrase,
1167     'primaryie': convert_primaryie,
1168     'programlisting': convert_programlisting,
1169     'quote': convert_quote,
1170     'releaseinfo': convert_para_like,
1171     'refsect1': convert_refsect1,
1172     'refsect2': convert_refsect2,
1173     'refsect3': convert_refsect3,
1174     'replaceable': convert_em_code,
1175     'returnvalue': convert_span,
1176     'row': convert_row,
1177     'sbr': convert_sbr,
1178     'screen': convert_pre,
1179     'section': convert_sect2,      # FIXME: need tracking of nesting
1180     'sect1': convert_sect1_tag,
1181     'sect2': convert_sect2,
1182     'sect3': convert_sect3,
1183     'simpara': convert_simpara,
1184     'simplesect': convert_sect2,   # FIXME: need tracking of nesting
1185     'structfield': convert_em_code,
1186     'structname': convert_span,
1187     'synopsis': convert_pre,
1188     'symbol': convert_span,
1189     'table': convert_table,
1190     'tag': convert_tag,
1191     'tbody': convert_tbody,
1192     'term': convert_span,
1193     'tgroup': convert_tgroup,
1194     'thead': convert_thead,
1195     'title': convert_skip,
1196     'type': convert_span,
1197     'ulink': convert_ulink,
1198     'userinput': convert_userinput,
1199     'varname': convert_code,
1200     'variablelist': convert_variablelist,
1201     'varlistentry': convert_varlistentry,
1202     'warning': convert_div,
1203     'xref': convert_xref,
1204 }
1205
1206 # conversion helpers
1207
1208 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1209 <html>
1210 <head>
1211 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1212 <title>%s</title>
1213 %s<link rel="stylesheet" href="style.css" type="text/css">
1214 </head>
1215 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1216 """
1217
1218
1219 def generate_head_links(ctx):
1220     n = ctx['nav_home']
1221     result = [
1222         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.raw_title)
1223     ]
1224
1225     n = ctx.get('nav_up')
1226     if n is not None:
1227         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1228
1229     n = ctx.get('nav_prev')
1230     if n is not None:
1231         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1232
1233     n = ctx.get('nav_next')
1234     if n is not None:
1235         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1236
1237     return ''.join(result)
1238
1239
1240 def generate_nav_links(ctx):
1241     n = ctx['nav_home']
1242     result = [
1243         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1244     ]
1245
1246     n = ctx.get('nav_up')
1247     if n is not None:
1248         result.append(
1249             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1250     else:
1251         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1252
1253     n = ctx.get('nav_prev')
1254     if n is not None:
1255         result.append(
1256             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1257     else:
1258         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1259
1260     n = ctx.get('nav_next')
1261     if n is not None:
1262         result.append(
1263             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1264     else:
1265         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1266
1267     return ''.join(result)
1268
1269
1270 def generate_toc(ctx, node):
1271     result = []
1272     for c in node.children:
1273         # TODO: urlencode the filename: urllib.parse.quote_plus()
1274         link = c.filename
1275         if c.anchor:
1276             link += c.anchor
1277         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1278             c.title_tag, link, c.title))
1279         if c.subtitle:
1280             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1281         result.append('</dt>\n')
1282         if c.children:
1283             result.append('<dd><dl>')
1284             result.extend(generate_toc(ctx, c))
1285             result.append('</dl></dd>')
1286     return result
1287
1288
1289 def generate_basic_nav(ctx):
1290     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1291   <tr valign="middle">
1292     <td width="100%%" align="left" class="shortcuts"></td>
1293     %s
1294   </tr>
1295 </table>
1296     """ % generate_nav_links(ctx)
1297
1298
1299 def generate_alpha_nav(ctx, divs, prefix, span_id):
1300     ix_nav = []
1301     for s in divs:
1302         title = xml_get_title(ctx, s)
1303         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1304
1305     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1306   <tr valign="middle">
1307     <td width="100%%" align="left" class="shortcuts">
1308       <span id="nav_%s">
1309         %s
1310       </span>
1311     </td>
1312     %s
1313   </tr>
1314 </table>
1315     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1316
1317
1318 def generate_refentry_nav(ctx, refsect1s, result):
1319     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1320   <tr valign="middle">
1321     <td width="100%" align="left" class="shortcuts">
1322       <a href="#" class="shortcut">Top</a>""")
1323
1324     for s in refsect1s:
1325         # don't list TOC sections (role="xxx_proto")
1326         if s.attrib.get('role', '').endswith("_proto"):
1327             continue
1328         # skip section without 'id' attrs
1329         ref_id = s.attrib.get('id')
1330         if ref_id is None:
1331             continue
1332
1333         # skip foreign sections
1334         if '.' not in ref_id:
1335             continue
1336
1337         title = xml_get_title(ctx, s)
1338         span_id = ref_id.split('.')[1].replace('-', '_')
1339
1340         result.append("""
1341           <span id="nav_%s">
1342             <span class="dim">|</span>
1343             <a href="#%s" class="shortcut">%s</a>
1344           </span>
1345           """ % (span_id, ref_id, title))
1346     result.append("""
1347     </td>
1348     %s
1349   </tr>
1350 </table>
1351 """ % generate_nav_links(ctx))
1352
1353
1354 def generate_footer(ctx):
1355     footnotes = ctx.get('footnotes')
1356     if footnotes is None:
1357         return []
1358
1359     result = ["""<div class="footnotes">\n
1360 <br><hr style="width:100; text-align:left;margin-left: 0">
1361 """]
1362     for f in footnotes:
1363         result.extend(f)
1364     result.append('</div>\n')
1365     return result
1366
1367
1368 def get_id_path(node):
1369     """ Generate the 'id'.
1370     We need to walk up the xml-tree and check the positions for each sibling.
1371     When reaching the top of the tree we collect remaining index entries from
1372     the chunked-tree.
1373     """
1374     ix = []
1375     xml = node.xml
1376     parent = xml.getparent()
1377     while parent is not None:
1378         children = parent.getchildren()
1379         ix.insert(0, str(children.index(xml) + 1))
1380         xml = parent
1381         parent = xml.getparent()
1382     while node is not None:
1383         ix.insert(0, str(node.idx + 1))
1384         node = node.parent
1385
1386     return ix
1387
1388
1389 def get_id(node):
1390     xml = node.xml
1391     node_id = xml.attrib.get('id', None)
1392     if node_id:
1393         return node_id
1394
1395     # TODO: this is moot if nothing links to it, we could also consider to omit
1396     # the <a name="$id"></a> tag.
1397     logging.info('%d: No "id" attribute on "%s", generating one',
1398                  xml.sourceline, xml.tag)
1399     ix = get_id_path(node)
1400     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1401     return 'id-' + '.'.join(ix)
1402
1403
1404 def convert_chunk_with_toc(ctx, div_class, title_tag):
1405     node = ctx['node']
1406     result = [
1407         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1408         generate_basic_nav(ctx),
1409         '<div class="%s">' % div_class,
1410     ]
1411     if node.title:
1412         result.append("""
1413 <div class="titlepage">
1414 <%s class="title"><a name="%s"></a>%s</%s>
1415 </div>""" % (
1416             title_tag, get_id(node), node.title, title_tag))
1417
1418     toc = generate_toc(ctx, node)
1419     if toc:
1420         # TODO: not all docbook page types use this extra heading
1421         result.append("""<p><b>Table of Contents</b></p>
1422     <div class="toc">
1423       <dl class="toc">
1424     """)
1425         result.extend(toc)
1426         result.append("""</dl>
1427     </div>
1428     """)
1429     convert_inner(ctx, node.xml, result)
1430     result.extend(generate_footer(ctx))
1431     result.append("""</div>
1432 </body>
1433 </html>""")
1434     return result
1435
1436
1437 # docbook chunks
1438
1439
1440 def convert_book(ctx):
1441     node = ctx['node']
1442     result = [
1443         HTML_HEADER % (node.title, generate_head_links(ctx)),
1444         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1445     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1446 </table>
1447 <div class="book">
1448 """ % node.title
1449     ]
1450     bookinfo = node.xml.findall('bookinfo')[0]
1451     result.extend(convert_bookinfo(ctx, bookinfo))
1452     result.append("""<div class="toc">
1453   <dl class="toc">
1454 """)
1455     result.extend(generate_toc(ctx, node.root))
1456     result.append("""</dl>
1457 </div>
1458 """)
1459     result.extend(generate_footer(ctx))
1460     result.append("""</div>
1461 </body>
1462 </html>""")
1463     return result
1464
1465
1466 def convert_chapter(ctx):
1467     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1468
1469
1470 def convert_glossary(ctx):
1471     node = ctx['node']
1472     glossdivs = node.xml.findall('glossdiv')
1473
1474     result = [
1475         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1476         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1477         """<div class="glossary">
1478 <div class="titlepage"><h%1d class="title">
1479 <a name="%s"></a>%s</h%1d>
1480 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1481     ]
1482     for i in glossdivs:
1483         result.extend(convert_glossdiv(ctx, i))
1484     result.extend(generate_footer(ctx))
1485     result.append("""</div>
1486 </body>
1487 </html>""")
1488     return result
1489
1490
1491 def convert_index(ctx):
1492     node = ctx['node']
1493     # Get all indexdivs under indexdiv
1494     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1495
1496     result = [
1497         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1498         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1499         """<div class="index">
1500 <div class="titlepage"><h%1d class="title">
1501 <a name="%s"></a>%s</h%1d>
1502 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1503     ]
1504     for i in indexdivs:
1505         result.extend(convert_indexdiv(ctx, i))
1506     result.extend(generate_footer(ctx))
1507     result.append("""</div>
1508 </body>
1509 </html>""")
1510     return result
1511
1512
1513 def convert_part(ctx):
1514     return convert_chunk_with_toc(ctx, 'part', 'h1')
1515
1516
1517 def convert_preface(ctx):
1518     node = ctx['node']
1519     result = [
1520         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1521         generate_basic_nav(ctx),
1522         '<div class="preface">'
1523     ]
1524     if node.title:
1525         result.append("""
1526 <div class="titlepage">
1527 <h2 class="title"><a name="%s"></a>%s</h2>
1528 </div>""" % (get_id(node), node.title))
1529     convert_inner(ctx, node.xml, result)
1530     result.extend(generate_footer(ctx))
1531     result.append("""</div>
1532 </body>
1533 </html>""")
1534     return result
1535
1536
1537 def convert_reference(ctx):
1538     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1539
1540
1541 def convert_refentry(ctx):
1542     node = ctx['node']
1543     node_id = get_id(node)
1544     refsect1s = node.xml.findall('refsect1')
1545
1546     gallery = ''
1547     refmeta = node.xml.find('refmeta')
1548     if refmeta is not None:
1549         refmiscinfo = refmeta.find('refmiscinfo')
1550         if refmiscinfo is not None:
1551             inlinegraphic = refmiscinfo.find('inlinegraphic')
1552             if inlinegraphic is not None:
1553                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1554
1555     result = [
1556         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1557     ]
1558     generate_refentry_nav(ctx, refsect1s, result)
1559     result.append("""
1560 <div class="refentry">
1561 <a name="%s"></a>
1562 <div class="refnamediv">
1563   <table width="100%%"><tr>
1564     <td valign="top">
1565       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1566       <p>%s — %s</p>
1567     </td>
1568     <td class="gallery_image" valign="top" align="right">%s</td>
1569   </tr></table>
1570 </div>
1571 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1572
1573     for s in refsect1s:
1574         result.extend(convert_refsect1(ctx, s))
1575     result.extend(generate_footer(ctx))
1576     result.append("""</div>
1577 </body>
1578 </html>""")
1579     return result
1580
1581
1582 def convert_section(ctx):
1583     return convert_chunk_with_toc(ctx, 'section', 'h2')
1584
1585
1586 def convert_sect1(ctx):
1587     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1588
1589
1590 # TODO(ensonic): turn into class with converters as functions and ctx as self
1591 convert_chunks = {
1592     'book': convert_book,
1593     'chapter': convert_chapter,
1594     'glossary': convert_glossary,
1595     'index': convert_index,
1596     'part': convert_part,
1597     'preface': convert_preface,
1598     'reference': convert_reference,
1599     'refentry': convert_refentry,
1600     'section': convert_section,
1601     'sect1': convert_sect1,
1602 }
1603
1604
1605 def generate_nav_nodes(files, node):
1606     nav = {
1607         'nav_home': node.root,
1608     }
1609     # nav params: up, prev, next
1610     if node.parent:
1611         nav['nav_up'] = node.parent
1612     ix = files.index(node)
1613     if ix > 0:
1614         nav['nav_prev'] = files[ix - 1]
1615     if ix < len(files) - 1:
1616         nav['nav_next'] = files[ix + 1]
1617     return nav
1618
1619
1620 def convert(out_dir, module, files, node, src_lang):
1621     """Convert the docbook chunks to a html file.
1622
1623     Args:
1624       out_dir: already created output dir
1625       files: list of nodes in the tree in pre-order
1626       node: current tree node
1627     """
1628
1629     logging.info('Writing: %s', node.filename)
1630     with open(os.path.join(out_dir, node.filename), 'wt',
1631               newline='\n', encoding='utf-8') as html:
1632         ctx = {
1633             'module': module,
1634             'files': files,
1635             'node': node,
1636             'src-lang': src_lang,
1637         }
1638         ctx.update(generate_nav_nodes(files, node))
1639
1640         converter = convert_chunks.get(node.name)
1641         if converter is not None:
1642             for line in converter(ctx):
1643                 html.write(line)
1644         else:
1645             logging.warning('Add chunk converter for "%s"', node.name)
1646
1647
1648 def create_devhelp2_toc(node):
1649     result = []
1650     for c in node.children:
1651         if c.children:
1652             result.append('<sub name="%s" link="%s">\n' % (c.raw_title, c.filename))
1653             result.extend(create_devhelp2_toc(c))
1654             result.append('</sub>\n')
1655         else:
1656             result.append('<sub name="%s" link="%s"/>\n' % (c.raw_title, c.filename))
1657     return result
1658
1659
1660 def create_devhelp2_condition_attribs(node):
1661     condition = node.attrib.get('condition')
1662     if condition is not None:
1663         # condition -> since, deprecated, ... (separated with '|')
1664         cond = condition.replace('"', '&quot;').split('|')
1665         keywords = []
1666         for c in cond:
1667             if ':' in c:
1668                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1669             else:
1670                 # deprecated can have no description
1671                 keywords.append('{}="{}"'.format(c, ''))
1672         return ' ' + ' '.join(keywords)
1673     else:
1674         return ''
1675
1676
1677 def create_devhelp2_refsect2_keyword(node, base_link):
1678     node_id = node.attrib['id']
1679     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1680         node.attrib['role'], titles[node_id]['title'], base_link + node_id,
1681         create_devhelp2_condition_attribs(node))
1682
1683
1684 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1685     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1686         node.attrib['role'], title, base_link + name,
1687         create_devhelp2_condition_attribs(node))
1688
1689
1690 def create_devhelp2(out_dir, module, xml, files):
1691     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1692               newline='\n', encoding='utf-8') as idx:
1693         bookinfo_nodes = xml.xpath('/book/bookinfo')
1694         title = ''
1695         if bookinfo_nodes is not None:
1696             bookinfo = bookinfo_nodes[0]
1697             title = bookinfo.xpath('./title/text()')[0]
1698             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1699             # TODO: support author too (see devhelp2.xsl)
1700         # TODO: fixxref uses '--src-lang' to set the language
1701         result = [
1702             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1703 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1704   <chapters>
1705 """ % (title, module, online_url)
1706         ]
1707         # toc
1708         result.extend(create_devhelp2_toc(files[0].root))
1709         result.append("""  </chapters>
1710   <functions>
1711 """)
1712         # keywords from all refsect2 and refsect3
1713         refsect2 = etree.XPath('//refsect2[@role]')
1714         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1715         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1716         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1717         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1718         for node in files:
1719             base_link = node.filename + '#'
1720             refsect2_nodes = refsect2(node.xml)
1721             for refsect2_node in refsect2_nodes:
1722                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1723                 refsect3_nodes = refsect3_enum(refsect2_node)
1724                 for refsect3_node in refsect3_nodes:
1725                     details_node = refsect3_enum_details(refsect3_node)[0]
1726                     name = details_node.attrib['id']
1727                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1728                 refsect3_nodes = refsect3_struct(refsect2_node)
1729                 for refsect3_node in refsect3_nodes:
1730                     details_node = refsect3_struct_details(refsect3_node)[0]
1731                     name = details_node.attrib['id']
1732                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1733
1734         result.append("""  </functions>
1735 </book>
1736 """)
1737         for line in result:
1738             idx.write(line)
1739
1740
1741 def get_dirs(uninstalled):
1742     if uninstalled:
1743         # this does not work from buiddir!=srcdir
1744         gtkdocdir = os.path.split(sys.argv[0])[0]
1745         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1746             # try 'srcdir' (set from makefiles) too
1747             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1748                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1749         styledir = gtkdocdir + '/style'
1750     else:
1751         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1752         styledir = gtkdocdir
1753     return (gtkdocdir, styledir)
1754
1755
1756 def main(module, index_file, out_dir, uninstalled, src_lang, paths):
1757
1758     # == Loading phase ==
1759     # the next 3 steps could be done in paralel
1760
1761     # 1) load the docuemnt
1762     _t = timer()
1763     # does not seem to be faster
1764     # parser = etree.XMLParser(collect_ids=False)
1765     # tree = etree.parse(index_file, parser)
1766     tree = etree.parse(index_file)
1767     tree.xinclude()
1768     logging.warning("1: %7.3lf: load doc", timer() - _t)
1769
1770     # 2) copy datafiles
1771     _t = timer()
1772     # TODO: handle additional images
1773     (gtkdocdir, styledir) = get_dirs(uninstalled)
1774     # copy navigation images and stylesheets to html directory ...
1775     css_file = os.path.join(styledir, 'style.css')
1776     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1777         shutil.copy(f, out_dir)
1778     css_file = os.path.join(out_dir, 'style.css')
1779     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1780         css.write(HTML_FORMATTER.get_style_defs())
1781     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1782
1783     # 3) load xref targets
1784     _t = timer()
1785     # TODO: migrate options from fixxref
1786     # TODO: ideally explicity specify the files we need, this will save us the
1787     # globbing and we'll load less files.
1788     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1789     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1790
1791     # == Processing phase ==
1792
1793     # 4) recursively walk the tree and chunk it into a python tree so that we
1794     #    can generate navigation and link tags.
1795     _t = timer()
1796     files = chunk(tree.getroot(), module)
1797     files = [f for f in PreOrderIter(files) if f.anchor is None]
1798     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1799
1800     # 5) extract tables:
1801     _t = timer()
1802     # TODO: can be done in parallel
1803     # - find all 'id' attribs and add them to the link map
1804     # - .. get their titles and store them into the titles map
1805     add_id_links_and_titles(files, fixxref.Links)
1806     # - build glossary dict
1807     build_glossary(files)
1808     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1809
1810     # == Output phase ==
1811     # the next two step could be done in parllel
1812
1813     # 6) create a xxx.devhelp2 file
1814     _t = timer()
1815     create_devhelp2(out_dir, module, tree.getroot(), files)
1816     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1817
1818     # 7) iterate the tree and output files
1819     _t = timer()
1820     # TODO: can be done in parallel, figure out why this is not faster
1821     # from multiprocessing.pool import Pool
1822     # with Pool(4) as p:
1823     #     p.apply_async(convert, args=(out_dir, module, files))
1824     # from multiprocessing.pool import ThreadPool
1825     # with ThreadPool(4) as p:
1826     #     p.apply_async(convert, args=(out_dir, module, files))
1827     for node in files:
1828         convert(out_dir, module, files, node, src_lang)
1829     logging.warning("7: %7.3lf: create html", timer() - _t)
1830
1831     # 8) copy assets over
1832     _t = timer()
1833     paths = set(paths + [os.getcwd()])
1834     for a in assets:
1835         logging.info('trying %s in %s', a, str(paths))
1836         copied = False
1837         for p in paths:
1838             try:
1839                 shutil.copy(os.path.join(p, a), out_dir)
1840                 copied = True
1841             except FileNotFoundError:
1842                 pass
1843         if not copied:
1844             logging.warning('file %s not found in path (did you add --path?)', a)
1845     logging.warning("8: %7.3lf: copy assets", timer() - _t)
1846
1847
1848 def run(options):
1849     logging.info('options: %s', str(options.__dict__))
1850     module = options.args[0]
1851     document = options.args[1]
1852
1853     # TODO: rename to 'html' later on
1854     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1855     #   outputs into the working directory
1856     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1857     try:
1858         os.mkdir(out_dir)
1859     except OSError as e:
1860         if e.errno != errno.EEXIST:
1861             raise
1862
1863     sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang,
1864                   options.path))