gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - 'section'/'simplesect' - the first we convert as a chunk, the nested ones we
  40     need to convert as 'sect{2,3,4,...}, we can track depth in 'ctx'
  41   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  42   - inside 'glossentry' we're only handling 'glossterm' and 'glossdef'
  43   - convert_{figure,table} need counters.
  44 - check each docbook tag if it can contain #PCDATA, if not don't check for
  45   xml.text/xml.tail and add a comment (# no PCDATA allowed here)
  46 - find a better way to print context for warnings
  47   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  48 - consolidate title handling:
  49   - always use the titles-dict
  50     - convert_title(): uses titles.get(tid)['title']
  51     - convert_xref(): uses titles[tid]['tag'], ['title'] and ['xml']
  52     - create_devhelp2_refsect2_keyword(): uses titles[tid]['title']
  53   - there only store what we have (xml, tag, ...)
  54   - when chunking generate 'id's and add entries to titles-dict
  55   - add accessors for title and raw_title that lazily get them
  56   - see if any of the other ~10 places that call convert_title() could use this
  57     cache
  58 - performance
  59   - consider some perf-warnings flag
  60     - see 'No "id" attribute on'
  61   - xinclude processing in libxml2 is slow
  62     - if we disable it, we get '{http://www.w3.org/2003/XInclude}include' tags
  63       and we could try handling them ourself, in some cases those are subtrees
  64       that we extract for chunking anyway
  65
  66 DIFFERENCES:
  67 - titles
  68   - we add the chunk label to the title in toc, on the page and in nav tooltips
  69   - docbook xsl only sometimes adds the label to the titles and when it does it
  70     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  71 - navigation
  72   - we always add an up-link except on the first page
  73 - footer
  74   - we're nov omitting the footer
  75 - tocs
  76   - we always add "Table of Contents' before a toc
  77   - docbook does that for some pages, it is configurable
  78
  79 OPTIONAL:
  80 - minify html: https://pypi.python.org/pypi/htmlmin/
  81
  82 Requirements:
  83 sudo pip3 install anytree lxml pygments
  84
  85 Example invocation:
  86 cd tests/bugs/docs/
  87 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  88 xdg-open db2html/index.html
  89 meld html db2html
  90
  91 Benchmarking:
  92 cd tests/bugs/docs/;
  93 rm html-build.stamp; time make html-build.stamp
  94 """
  95
  96 import argparse
  97 import errno
  98 import logging
  99 import os
 100 import shutil
 101 import sys
 102
 103 from anytree import Node, PreOrderIter
 104 from copy import deepcopy
 105 from glob import glob
 106 from lxml import etree
 107 from pygments import highlight
 108 from pygments.lexers import CLexer
 109 from pygments.formatters import HtmlFormatter
 110 from timeit import default_timer as timer
 111
 112 from . import config, fixxref
 113
 114 # pygments setup
 115 # lazily constructed lexer cache
 116 LEXERS = {
 117     'c': CLexer()
 118 }
 119 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 120
 121
 122 class ChunkParams(object):
 123     def __init__(self, prefix, parent=None, min_idx=0):
 124         self.prefix = prefix
 125         self.parent = parent
 126         self.min_idx = min_idx
 127         self.idx = 1
 128
 129
 130 DONT_CHUNK = float('inf')
 131 # docbook-xsl defines the chunk tags here.
 132 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 133 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 134 # If not defined, we can just create an example without an 'id' attr and see
 135 # docbook xsl does.
 136 #
 137 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 138 # TODO: this list has also a flag that controls wheter we add the
 139 # 'Table of Contents' heading in convert_chunk_with_toc()
 140 CHUNK_PARAMS = {
 141     'appendix': ChunkParams('app', 'book'),
 142     'book': ChunkParams('bk'),
 143     'chapter': ChunkParams('ch', 'book'),
 144     'glossary': ChunkParams('go', 'book'),
 145     'index': ChunkParams('ix', 'book'),
 146     'part': ChunkParams('pt', 'book'),
 147     'preface': ChunkParams('pr', 'book'),
 148     'refentry': ChunkParams('re', 'book'),
 149     'reference': ChunkParams('rn', 'book'),
 150     'sect1': ChunkParams('s', 'chapter', 1),
 151     'section': ChunkParams('s', 'chapter', 1),
 152     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 153     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 154     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 155     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 156 }
 157 # TAGS we don't support:
 158 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 159
 160 TITLE_XPATHS = {
 161     '_': (etree.XPath('./title'), None),
 162     'book': (etree.XPath('./bookinfo/title'), None),
 163     'refentry': (
 164         etree.XPath('./refmeta/refentrytitle'),
 165         etree.XPath('./refnamediv/refpurpose')
 166     ),
 167 }
 168
 169 ID_XPATH = etree.XPath('//*[@id]')
 170
 171 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 172 glossary = {}
 173
 174 footnote_idx = 1
 175
 176 # nested dict with subkeys:
 177 # title: textual title
 178 # tag: chunk tag
 179 # xml: title xml node
 180 titles = {}
 181
 182 # files to copy
 183 assets = set()
 184
 185
 186 def encode_entities(text):
 187     return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 188
 189
 190 def raw_text(xml):
 191     return etree.tostring(xml, method="text", encoding=str).strip()
 192
 193
 194 def gen_chunk_name(node, chunk_params):
 195     """Generate a chunk file name
 196
 197     This is either based on the id or on the position in the doc. In the latter
 198     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 199     type.
 200     """
 201     idval = node.attrib.get('id')
 202     if idval is not None:
 203         return idval
 204
 205     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 206     chunk_params.idx += 1
 207
 208     # handle parents to make names of nested tags like in docbook
 209     # - we only need to prepend the parent if there are > 1 of them in the
 210     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 211     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 212     #   when we chunk explicitly and on each level maintain the 'idx'
 213     # while chunk_params.parent:
 214     #     parent = chunk_params.parent
 215     #     if parent not in CHUNK_PARAMS:
 216     #         break;
 217     #     chunk_params = CHUNK_PARAMS[parent]
 218     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 219
 220     logging.info('Gen chunk name: "%s"', name)
 221     return name
 222
 223
 224 def get_chunk_titles(module, node):
 225     tag = node.tag
 226     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 227
 228     ctx = {
 229         'module': module,
 230         'files': [],
 231     }
 232     result = {
 233         'title': None,
 234         'title_tag': None,
 235         'subtitle': None,
 236         'subtitle_tag': None
 237     }
 238     res = title(node)
 239     if res:
 240         # handle chunk label for tocs
 241         label = node.attrib.get('label')
 242         if label:
 243             label += '. '
 244         else:
 245             label = ''
 246
 247         xml = res[0]
 248         # TODO: consider to eval 'title'/'raw_title' lazily
 249         result['title'] = label + ''.join(convert_title(ctx, xml))
 250         result['raw_title'] = encode_entities(raw_text(xml))
 251         if xml.tag != 'title':
 252             result['title_tag'] = xml.tag
 253         else:
 254             result['title_tag'] = tag
 255
 256     if subtitle:
 257         res = subtitle(node)
 258         if res:
 259             xml = res[0]
 260             result['subtitle'] = ''.join(convert_title(ctx, xml))
 261             result['subtitle_tag'] = xml.tag
 262     return result
 263
 264
 265 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 266     """Chunk the tree.
 267
 268     The first time, we're called with parent=None and in that case we return
 269     the new_node as the root of the tree. For each tree-node we generate a
 270     filename and process the children.
 271     """
 272     tag = xml_node.tag
 273     chunk_params = CHUNK_PARAMS.get(tag)
 274     if chunk_params:
 275         title_args = get_chunk_titles(module, xml_node)
 276         chunk_name = gen_chunk_name(xml_node, chunk_params)
 277
 278         # check idx to handle 'sect1'/'section' special casing and title-only
 279         # segments
 280         if idx >= chunk_params.min_idx:
 281             logging.info('chunk tag: "%s"[%d]', tag, idx)
 282             if parent:
 283                 # remove the xml-node from the parent
 284                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 285                 xml_node.getparent().remove(xml_node)
 286                 xml_node = sub_tree
 287
 288             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 289                           idx=idx,
 290                           filename=chunk_name + '.html', anchor=None,
 291                           **title_args)
 292         else:
 293             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 294                           idx=idx,
 295                           filename=parent.filename, anchor='#' + chunk_name,
 296                           **title_args)
 297
 298         depth += 1
 299         idx = 0
 300         for child in xml_node:
 301             chunk(child, module, depth, idx, parent)
 302             if child.tag in CHUNK_PARAMS:
 303                 idx += 1
 304
 305     return parent
 306
 307
 308 def add_id_links_and_titles(files, links):
 309     for node in files:
 310         chunk_name = node.filename[:-5]
 311         chunk_base = node.filename + '#'
 312         for elem in ID_XPATH(node.xml):
 313             attr = elem.attrib['id']
 314             if attr == chunk_name:
 315                 links[attr] = node.filename
 316             else:
 317                 links[attr] = chunk_base + attr
 318
 319             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 320             res = title(elem)
 321             if res:
 322                 xml = res[0]
 323                 # TODO: consider to eval 'title' lazily
 324                 titles[attr] = {
 325                     'title': encode_entities(raw_text(xml)),
 326                     'xml': xml,
 327                     'tag': elem.tag,
 328                 }
 329
 330
 331 def build_glossary(files):
 332     for node in files:
 333         if node.xml.tag != 'glossary':
 334             continue
 335         for term in GLOSSENTRY_XPATH(node.xml):
 336             # TODO: there can be all kind of things in a glossary. This only supports
 337             # what we commonly use, glossterm is mandatory
 338             key_node = term.find('glossterm')
 339             val_node = term.find('glossdef')
 340             if key_node is not None and val_node is not None:
 341                 glossary[raw_text(key_node)] = raw_text(val_node)
 342             else:
 343                 debug = []
 344                 if key_node is None:
 345                     debug.append('missing key')
 346                 if val_node is None:
 347                     debug.append('missing val')
 348                 logging.warning('Broken glossentry "%s": %s',
 349                                 term.attrib['id'], ','.join(debug))
 350
 351
 352 # conversion helpers
 353
 354
 355 def convert_inner(ctx, xml, result):
 356     for child in xml:
 357         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 358
 359
 360 def convert_ignore(ctx, xml):
 361     result = []
 362     convert_inner(ctx, xml, result)
 363     return result
 364
 365
 366 def convert_skip(ctx, xml):
 367     return []
 368
 369
 370 def append_idref(attrib, result):
 371     idval = attrib.get('id')
 372     if idval is not None:
 373         result.append('<a name="%s"></a>' % idval)
 374
 375
 376 def append_text(ctx, text, result):
 377     if text and ('no-strip' in ctx or text.strip()):
 378         result.append(encode_entities(text))
 379
 380
 381 missing_tags = {}
 382
 383
 384 def convert__unknown(ctx, xml):
 385     # don't recurse on subchunks
 386     if xml.tag in CHUNK_PARAMS:
 387         return []
 388     if isinstance(xml, etree._Comment):
 389         return ['<!-- ' + xml.text + '-->\n']
 390     else:
 391         # warn only once
 392         if xml.tag not in missing_tags:
 393             logging.warning('Add tag converter for "%s"', xml.tag)
 394             missing_tags[xml.tag] = True
 395         result = ['<!-- ' + xml.tag + '-->\n']
 396         convert_inner(ctx, xml, result)
 397         result.append('<!-- /' + xml.tag + '-->\n')
 398         return result
 399
 400
 401 def convert_mediaobject_children(ctx, xml, result):
 402     # look for textobject/phrase
 403     alt_text = ''
 404     textobject = xml.find('textobject')
 405     if textobject is not None:
 406         phrase = textobject.findtext('phrase')
 407         if phrase:
 408             alt_text = ' alt="%s"' % phrase
 409
 410     # look for imageobject/imagedata
 411     imageobject = xml.find('imageobject')
 412     if imageobject is not None:
 413         imagedata = imageobject.find('imagedata')
 414         if imagedata is not None:
 415             # TODO(ensonic): warn on missing fileref attr?
 416             fileref = imagedata.attrib.get('fileref', '')
 417             if fileref:
 418                 assets.add(fileref)
 419             result.append('<img src="%s"%s>' % (fileref, alt_text))
 420
 421
 422 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 423     result = ['<div class="%s">\n' % xml.tag]
 424     title_tag = xml.find('title')
 425     if title_tag is not None:
 426         append_idref(xml.attrib, result)
 427         result.append('<%s>%s</%s>' % (
 428             h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
 429     append_text(ctx, xml.text, result)
 430     inner_func(ctx, xml, result)
 431     result.append('</div>')
 432     append_text(ctx, xml.tail, result)
 433     return result
 434
 435
 436 def xml_get_title(ctx, xml):
 437     title_tag = xml.find('title')
 438     if title_tag is not None:
 439         return ''.join(convert_title(ctx, title_tag))
 440     else:
 441         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 442         return ''
 443
 444
 445 # docbook tags
 446
 447
 448 def convert_abstract(ctx, xml):
 449     result = ["""<div class="abstract">
 450     <p class="title"><b>Abstract</b></p>"""]
 451     append_text(ctx, xml.text, result)
 452     convert_inner(ctx, xml, result)
 453     result.append('</div>')
 454     append_text(ctx, xml.tail, result)
 455     return result
 456
 457
 458 def convert_acronym(ctx, xml):
 459     key = xml.text
 460     title = glossary.get(key, '')
 461     # TODO: print a sensible warning if missing
 462     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 463     if xml.tail:
 464         result.append(xml.tail)
 465     return result
 466
 467
 468 def convert_anchor(ctx, xml):
 469     return ['<a name="%s"></a>' % xml.attrib['id']]
 470
 471
 472 def convert_bookinfo(ctx, xml):
 473     result = ['<div class="titlepage">']
 474     convert_inner(ctx, xml, result)
 475     result.append("""<hr>
 476 </div>""")
 477     if xml.tail:
 478         result.append(xml.tail)
 479     return result
 480
 481
 482 def convert_blockquote(ctx, xml):
 483     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 484     append_text(ctx, xml.text, result)
 485     convert_inner(ctx, xml, result)
 486     result.append('</blockquote>\n</div>')
 487     append_text(ctx, xml.tail, result)
 488     return result
 489
 490
 491 def convert_code(ctx, xml):
 492     result = ['<code class="%s">' % xml.tag]
 493     append_text(ctx, xml.text, result)
 494     convert_inner(ctx, xml, result)
 495     result.append('</code>')
 496     append_text(ctx, xml.tail, result)
 497     return result
 498
 499
 500 def convert_colspec(ctx, xml):
 501     result = ['<col']
 502     colname = xml.attrib.get('colname')
 503     if colname is not None:
 504         result.append(' class="%s"' % colname)
 505     colwidth = xml.attrib.get('colwidth')
 506     if colwidth is not None:
 507         result.append(' width="%s"' % colwidth)
 508     result.append('>\n')
 509     # is in tgroup and there can be no 'text'
 510     return result
 511
 512
 513 def convert_command(ctx, xml):
 514     result = ['<strong class="userinput"><code>']
 515     append_text(ctx, xml.text, result)
 516     convert_inner(ctx, xml, result)
 517     result.append('</code></strong>')
 518     append_text(ctx, xml.tail, result)
 519     return result
 520
 521
 522 def convert_corpauthor(ctx, xml):
 523     result = ['<div><h3 class="corpauthor">\n']
 524     append_text(ctx, xml.text, result)
 525     convert_inner(ctx, xml, result)
 526     result.append('</h3></div>\n')
 527     append_text(ctx, xml.tail, result)
 528     return result
 529
 530
 531 def convert_div(ctx, xml):
 532     result = ['<div class="%s">\n' % xml.tag]
 533     append_text(ctx, xml.text, result)
 534     convert_inner(ctx, xml, result)
 535     result.append('</div>')
 536     append_text(ctx, xml.tail, result)
 537     return result
 538
 539
 540 def convert_emphasis(ctx, xml):
 541     role = xml.attrib.get('role')
 542     if role is not None:
 543         result = ['<span class="%s">' % role]
 544         end = '</span>'
 545     else:
 546         result = ['<span class="emphasis"><em>']
 547         end = '</em></span>'
 548     append_text(ctx, xml.text, result)
 549     convert_inner(ctx, xml, result)
 550     result.append(end)
 551     append_text(ctx, xml.tail, result)
 552     return result
 553
 554
 555 def convert_em(ctx, xml):
 556     result = ['<em class="%s">' % xml.tag]
 557     append_text(ctx, xml.text, result)
 558     convert_inner(ctx, xml, result)
 559     result.append('</em>')
 560     append_text(ctx, xml.tail, result)
 561     return result
 562
 563
 564 def convert_em_code(ctx, xml):
 565     result = ['<em class="%s"><code>' % xml.tag]
 566     append_idref(xml.attrib, result)
 567     append_text(ctx, xml.text, result)
 568     convert_inner(ctx, xml, result)
 569     result.append('</code></em>')
 570     append_text(ctx, xml.tail, result)
 571     return result
 572
 573
 574 def convert_entry(ctx, xml):
 575     entry_type = ctx['table.entry']
 576     result = ['<' + entry_type]
 577     role = xml.attrib.get('role')
 578     if role is not None:
 579         result.append(' class="%s"' % role)
 580     morerows = xml.attrib.get('morerows')
 581     if morerows is not None:
 582         result.append(' rowspan="%s"' % (1 + int(morerows)))
 583     result.append('>')
 584     append_text(ctx, xml.text, result)
 585     convert_inner(ctx, xml, result)
 586     result.append('</' + entry_type + '>')
 587     append_text(ctx, xml.tail, result)
 588     return result
 589
 590
 591 def convert_figure(ctx, xml):
 592     result = ['<div class="figure">\n']
 593     append_idref(xml.attrib, result)
 594     title_tag = xml.find('title')
 595     if title_tag is not None:
 596         # TODO(ensonic): Add a 'Figure X. ' prefix, needs a figure counter
 597         result.append('<p><b>%s</b></p>' % ''.join(convert_title(ctx, title_tag)))
 598     result.append('<div class="figure-contents">')
 599     # TODO(ensonic): title can become alt on inner 'graphic' element
 600     convert_inner(ctx, xml, result)
 601     result.append('</div></div><br class="figure-break"/>')
 602     append_text(ctx, xml.tail, result)
 603     return result
 604
 605
 606 def convert_footnote(ctx, xml):
 607     footnotes = ctx.get('footnotes', [])
 608     # footnotes idx is not per page, but per doc
 609     global footnote_idx
 610     idx = footnote_idx
 611     footnote_idx += 1
 612
 613     # need a pair of ids for each footnote (docbook generates different ids)
 614     this_id = 'footnote-%d' % idx
 615     that_id = 'ftn.' + this_id
 616
 617     inner = ['<div id="%s" class="footnote">' % that_id]
 618     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 619         this_id, idx))
 620     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 621     # get double nested paras :/.
 622     # convert_inner(ctx, xml, inner)
 623     para = xml.find('para')
 624     if para is None:
 625         para = xml.find('simpara')
 626     if para is not None:
 627         inner.append(para.text)
 628     else:
 629         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline, raw_text(xml))
 630     inner.append('</p></div>')
 631     footnotes.append(inner)
 632     ctx['footnotes'] = footnotes
 633     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 634         that_id, this_id, idx)]
 635
 636
 637 def convert_formalpara(ctx, xml):
 638     result = None
 639     title_tag = xml.find('title')
 640     result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
 641     para_tag = xml.find('para')
 642     append_text(ctx, para_tag.text, result)
 643     convert_inner(ctx, para_tag, result)
 644     append_text(ctx, para_tag.tail, result)
 645     result.append('</p>')
 646     append_text(ctx, xml.tail, result)
 647     return result
 648
 649
 650 def convert_glossdef(ctx, xml):
 651     result = ['<dd class="glossdef">']
 652     convert_inner(ctx, xml, result)
 653     result.append('</dd>\n')
 654     return result
 655
 656
 657 def convert_glossdiv(ctx, xml):
 658     title_tag = xml.find('title')
 659     title = title_tag.text
 660     xml.remove(title_tag)
 661     result = [
 662         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 663     ]
 664     convert_inner(ctx, xml, result)
 665     return result
 666
 667
 668 def convert_glossentry(ctx, xml):
 669     result = []
 670     convert_inner(ctx, xml, result)
 671     return result
 672
 673
 674 def convert_glossterm(ctx, xml):
 675     glossid = ''
 676     text = ''
 677     anchor = xml.find('anchor')
 678     if anchor is not None:
 679         glossid = anchor.attrib.get('id', '')
 680         text += anchor.tail or ''
 681     text += xml.text or ''
 682     if glossid == '':
 683         glossid = 'glossterm-' + text
 684     return [
 685         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 686             glossid, text)
 687     ]
 688
 689
 690 def convert_graphic(ctx, xml):
 691     # TODO(ensonic): warn on missing fileref attr?
 692     fileref = xml.attrib.get('fileref', '')
 693     if fileref:
 694         assets.add(fileref)
 695     return ['<div><img src="%s"></div>' % fileref]
 696
 697
 698 def convert_indexdiv(ctx, xml):
 699     title_tag = xml.find('title')
 700     title = title_tag.text
 701     xml.remove(title_tag)
 702     result = [
 703         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 704     ]
 705     convert_inner(ctx, xml, result)
 706     return result
 707
 708
 709 def convert_informaltable(ctx, xml):
 710     result = ['<div class="informaltable"><table class="informaltable"']
 711     if xml.attrib.get('pgwide') == '1':
 712         result.append(' width="100%"')
 713     if xml.attrib.get('frame') == 'none':
 714         result.append(' border="0"')
 715     result.append('>\n')
 716     convert_inner(ctx, xml, result)
 717     result.append('</table></div>')
 718     if xml.tail:
 719         result.append(xml.tail)
 720     return result
 721
 722
 723 def convert_inlinegraphic(ctx, xml):
 724     # TODO(ensonic): warn on missing fileref attr?
 725     fileref = xml.attrib.get('fileref', '')
 726     if fileref:
 727         assets.add(fileref)
 728     return ['<img src="%s">' % fileref]
 729
 730
 731 def convert_inlinemediaobject(ctx, xml):
 732     result = ['<span class="inlinemediaobject">']
 733     # no PCDATA allowed here
 734     convert_mediaobject_children(ctx, xml, result)
 735     result.append('</span>')
 736     append_text(ctx, xml.tail, result)
 737     return result
 738
 739
 740 def convert_itemizedlist(ctx, xml):
 741     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 742     convert_inner(ctx, xml, result)
 743     result.append('</ul></div>')
 744     if xml.tail:
 745         result.append(xml.tail)
 746     return result
 747
 748
 749 def convert_link(ctx, xml):
 750     linkend = xml.attrib['linkend']
 751     result = []
 752     if linkend:
 753         link_text = []
 754         append_text(ctx, xml.text, link_text)
 755         convert_inner(ctx, xml, link_text)
 756         text = ''.join(link_text)
 757
 758         (tid, href) = fixxref.GetXRef(linkend)
 759         if href:
 760             title_attr = ''
 761             title = titles.get(tid)
 762             if title:
 763                 title_attr = ' title="%s"' % title['title']
 764
 765             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 766             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 767         else:
 768             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 769             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 770             result = [text]
 771     else:
 772         append_text(ctx, xml.text, result)
 773         convert_inner(ctx, xml, result)
 774     append_text(ctx, xml.tail, result)
 775     return result
 776
 777
 778 def convert_listitem(ctx, xml):
 779     result = ['<li class="listitem">']
 780     convert_inner(ctx, xml, result)
 781     result.append('</li>')
 782     # no PCDATA allowed here, is in itemizedlist
 783     return result
 784
 785
 786 def convert_literallayout(ctx, xml):
 787     result = ['<div class="literallayout"><p><br>\n']
 788     append_text(ctx, xml.text, result)
 789     convert_inner(ctx, xml, result)
 790     result.append('</p></div>')
 791     append_text(ctx, xml.tail, result)
 792     return result
 793
 794
 795 def convert_mediaobject(ctx, xml):
 796     result = ['<div class="mediaobject">\n']
 797     # no PCDATA allowed here
 798     convert_mediaobject_children(ctx, xml, result)
 799     result.append('</div>')
 800     append_text(ctx, xml.tail, result)
 801     return result
 802
 803
 804 def convert_orderedlist(ctx, xml):
 805     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 806     convert_inner(ctx, xml, result)
 807     result.append('</ol></div>')
 808     append_text(ctx, xml.tail, result)
 809     return result
 810
 811
 812 def convert_para(ctx, xml):
 813     result = []
 814     role = xml.attrib.get('role')
 815     if role is not None:
 816         result.append('<p class="%s">' % role)
 817     else:
 818         result.append('<p>')
 819     append_idref(xml.attrib, result)
 820     append_text(ctx, xml.text, result)
 821     convert_inner(ctx, xml, result)
 822     result.append('</p>')
 823     append_text(ctx, xml.tail, result)
 824     return result
 825
 826
 827 def convert_para_like(ctx, xml):
 828     result = []
 829     append_idref(xml.attrib, result)
 830     result.append('<p class="%s">' % xml.tag)
 831     append_text(ctx, xml.text, result)
 832     convert_inner(ctx, xml, result)
 833     result.append('</p>')
 834     append_text(ctx, xml.tail, result)
 835     return result
 836
 837
 838 def convert_phrase(ctx, xml):
 839     result = ['<span']
 840     role = xml.attrib.get('role')
 841     if role is not None:
 842         result.append(' class="%s">' % role)
 843     else:
 844         result.append('>')
 845     append_text(ctx, xml.text, result)
 846     convert_inner(ctx, xml, result)
 847     result.append('</span>')
 848     append_text(ctx, xml.tail, result)
 849     return result
 850
 851
 852 def convert_primaryie(ctx, xml):
 853     result = ['<dt>\n']
 854     convert_inner(ctx, xml, result)
 855     result.append('\n</dt>\n<dd></dd>\n')
 856     return result
 857
 858
 859 def convert_pre(ctx, xml):
 860     # Since we're inside <pre> don't skip newlines
 861     ctx['no-strip'] = True
 862     result = ['<pre class="%s">' % xml.tag]
 863     append_text(ctx, xml.text, result)
 864     convert_inner(ctx, xml, result)
 865     result.append('</pre>')
 866     del ctx['no-strip']
 867     append_text(ctx, xml.tail, result)
 868     return result
 869
 870
 871 def convert_programlisting(ctx, xml):
 872     result = []
 873     if xml.attrib.get('role', '') == 'example':
 874         if xml.text:
 875             lang = xml.attrib.get('language', ctx['src-lang']).lower()
 876             if lang not in LEXERS:
 877                 LEXERS[lang] = get_lexer_by_name(lang)
 878             lexer = LEXERS.get(lang, None)
 879             if lexer:
 880                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 881
 882                 # we do own line-numbering
 883                 line_count = highlighted.count('\n')
 884                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 885                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 886   <tbody>
 887     <tr>
 888       <td class="listing_lines" align="right"><pre>%s</pre></td>
 889       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 890     </tr>
 891   </tbody>
 892 </table>
 893 """ % (source_lines, highlighted))
 894             else:
 895                 logging.warn('No pygments lexer for language="%s"', lang)
 896                 result.append('<pre class="programlisting">')
 897                 result.append(xml.text)
 898                 result.append('</pre>')
 899     else:
 900         result.append('<pre class="programlisting">')
 901         append_text(ctx, xml.text, result)
 902         convert_inner(ctx, xml, result)
 903         result.append('</pre>')
 904     append_text(ctx, xml.tail, result)
 905     return result
 906
 907
 908 def convert_quote(ctx, xml):
 909     result = ['<span class="quote">"<span class="quote">']
 910     append_text(ctx, xml.text, result)
 911     convert_inner(ctx, xml, result)
 912     result.append('</span>"</span>')
 913     append_text(ctx, xml.tail, result)
 914     return result
 915
 916
 917 def convert_refsect1(ctx, xml):
 918     # Add a divider between two consequitive refsect2
 919     def convert_inner(ctx, xml, result):
 920         prev = None
 921         for child in xml:
 922             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 923                 result.append('<hr>\n')
 924             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 925             prev = child
 926     return convert_sect(ctx, xml, 'h2', convert_inner)
 927
 928
 929 def convert_refsect2(ctx, xml):
 930     return convert_sect(ctx, xml, 'h3')
 931
 932
 933 def convert_refsect3(ctx, xml):
 934     return convert_sect(ctx, xml, 'h4')
 935
 936
 937 def convert_row(ctx, xml):
 938     result = ['<tr>\n']
 939     convert_inner(ctx, xml, result)
 940     result.append('</tr>\n')
 941     return result
 942
 943
 944 def convert_sbr(ctx, xml):
 945     return ['<br>']
 946
 947
 948 def convert_sect1_tag(ctx, xml):
 949     return convert_sect(ctx, xml, 'h2')
 950
 951
 952 def convert_sect2(ctx, xml):
 953     return convert_sect(ctx, xml, 'h3')
 954
 955
 956 def convert_sect3(ctx, xml):
 957     return convert_sect(ctx, xml, 'h4')
 958
 959
 960 def convert_simpara(ctx, xml):
 961     result = ['<p>']
 962     append_text(ctx, xml.text, result)
 963     convert_inner(ctx, xml, result)
 964     result.append('</p>')
 965     append_text(ctx, xml.tail, result)
 966     return result
 967
 968
 969 def convert_span(ctx, xml):
 970     result = ['<span class="%s">' % xml.tag]
 971     append_text(ctx, xml.text, result)
 972     convert_inner(ctx, xml, result)
 973     result.append('</span>')
 974     append_text(ctx, xml.tail, result)
 975     return result
 976
 977
 978 def convert_table(ctx, xml):
 979     result = ['<div class="table">']
 980     append_idref(xml.attrib, result)
 981     title_tag = xml.find('title')
 982     if title_tag is not None:
 983         result.append('<p class="title"><b>')
 984         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 985         result.extend(convert_title(ctx, title_tag))
 986         result.append('</b></p>')
 987     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 988
 989     convert_inner(ctx, xml, result)
 990
 991     result.append('</table></div></div>')
 992     append_text(ctx, xml.tail, result)
 993     return result
 994
 995
 996 def convert_tag(ctx, xml):
 997     classval = xml.attrib.get('class')
 998     if classval is not None:
 999         result = ['<code class="sgmltag-%s">' % classval]
1000     else:
1001         result = ['<code>']
1002     append_text(ctx, xml.text, result)
1003     result.append('</code>')
1004     append_text(ctx, xml.tail, result)
1005     return result
1006
1007
1008 def convert_tbody(ctx, xml):
1009     result = ['<tbody>']
1010     ctx['table.entry'] = 'td'
1011     convert_inner(ctx, xml, result)
1012     result.append('</tbody>')
1013     # is in tgroup and there can be no 'text'
1014     return result
1015
1016
1017 def convert_tgroup(ctx, xml):
1018     # tgroup does not expand to anything, but the nested colspecs need to
1019     # be put into a colgroup
1020     cols = xml.findall('colspec')
1021     result = []
1022     if cols:
1023         result.append('<colgroup>\n')
1024         for col in cols:
1025             result.extend(convert_colspec(ctx, col))
1026             xml.remove(col)
1027         result.append('</colgroup>\n')
1028     convert_inner(ctx, xml, result)
1029     # is in informaltable and there can be no 'text'
1030     return result
1031
1032
1033 def convert_thead(ctx, xml):
1034     result = ['<thead>']
1035     ctx['table.entry'] = 'th'
1036     convert_inner(ctx, xml, result)
1037     result.append('</thead>')
1038     # is in tgroup and there can be no 'text'
1039     return result
1040
1041
1042 def convert_title(ctx, xml):
1043     # This is always explicitly called from some context
1044     result = []
1045     append_text(ctx, xml.text, result)
1046     convert_inner(ctx, xml, result)
1047     append_text(ctx, xml.tail, result)
1048     return result
1049
1050
1051 def convert_ulink(ctx, xml):
1052     if xml.text:
1053         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
1054     else:
1055         url = xml.attrib['url']
1056         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
1057     append_text(ctx, xml.tail, result)
1058     return result
1059
1060
1061 def convert_userinput(ctx, xml):
1062     result = ['<span class="command"><strong>']
1063     append_text(ctx, xml.text, result)
1064     convert_inner(ctx, xml, result)
1065     result.append('</strong></span>')
1066     append_text(ctx, xml.tail, result)
1067     return result
1068
1069
1070 def convert_variablelist(ctx, xml):
1071     result = ["""<div class="variablelist"><table border="0" class="variablelist">
1072 <colgroup>
1073 <col align="left" valign="top">
1074 <col>
1075 </colgroup>
1076 <tbody>"""]
1077     convert_inner(ctx, xml, result)
1078     result.append("""</tbody>
1079 </table></div>""")
1080     return result
1081
1082
1083 def convert_varlistentry(ctx, xml):
1084     result = ['<tr>']
1085
1086     result.append('<td><p>')
1087     term = xml.find('term')
1088     result.extend(convert_span(ctx, term))
1089     result.append('</p></td>')
1090
1091     result.append('<td>')
1092     listitem = xml.find('listitem')
1093     convert_inner(ctx, listitem, result)
1094     result.append('</td>')
1095
1096     result.append('<tr>')
1097     return result
1098
1099
1100 def convert_xref(ctx, xml):
1101     result = []
1102     linkend = xml.attrib['linkend']
1103     (tid, href) = fixxref.GetXRef(linkend)
1104     try:
1105         title = titles[tid]
1106         # all sectN need to become 'section
1107         tag = title['tag']
1108         tag = {
1109             'sect1': 'section',
1110             'sect2': 'section',
1111             'sect3': 'section',
1112             'sect4': 'section',
1113             'sect5': 'section',
1114         }.get(tag, tag)
1115         result = [
1116             '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1117             (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1118         ]
1119     except KeyError:
1120         logging.warning('invalid linkend "%s"', tid)
1121
1122     append_text(ctx, xml.tail, result)
1123     return result
1124
1125
1126 # TODO(ensonic): turn into class with converters as functions and ctx as self
1127 convert_tags = {
1128     'abstract': convert_abstract,
1129     'acronym': convert_acronym,
1130     'anchor': convert_anchor,
1131     'application': convert_span,
1132     'bookinfo': convert_bookinfo,
1133     'blockquote': convert_blockquote,
1134     'classname': convert_code,
1135     'caption': convert_div,
1136     'code': convert_code,
1137     'colspec': convert_colspec,
1138     'constant': convert_code,
1139     'command': convert_command,
1140     'corpauthor': convert_corpauthor,
1141     'emphasis': convert_emphasis,
1142     'entry': convert_entry,
1143     'envar': convert_code,
1144     'footnote': convert_footnote,
1145     'figure': convert_figure,
1146     'filename': convert_code,
1147     'firstterm': convert_em,
1148     'formalpara': convert_formalpara,
1149     'function': convert_code,
1150     'glossdef': convert_glossdef,
1151     'glossdiv': convert_glossdiv,
1152     'glossentry': convert_glossentry,
1153     'glossterm': convert_glossterm,
1154     'graphic': convert_graphic,
1155     'indexdiv': convert_indexdiv,
1156     'indexentry': convert_ignore,
1157     'indexterm': convert_skip,
1158     'informalexample': convert_div,
1159     'informaltable': convert_informaltable,
1160     'inlinegraphic': convert_inlinegraphic,
1161     'inlinemediaobject': convert_inlinemediaobject,
1162     'interfacename': convert_code,
1163     'itemizedlist': convert_itemizedlist,
1164     'legalnotice': convert_div,
1165     'link': convert_link,
1166     'listitem': convert_listitem,
1167     'literal': convert_code,
1168     'literallayout': convert_literallayout,
1169     'mediaobject': convert_mediaobject,
1170     'note': convert_div,
1171     'option': convert_code,
1172     'orderedlist': convert_orderedlist,
1173     'para': convert_para,
1174     'partintro': convert_div,
1175     'parameter': convert_em_code,
1176     'phrase': convert_phrase,
1177     'primaryie': convert_primaryie,
1178     'programlisting': convert_programlisting,
1179     'quote': convert_quote,
1180     'releaseinfo': convert_para_like,
1181     'refsect1': convert_refsect1,
1182     'refsect2': convert_refsect2,
1183     'refsect3': convert_refsect3,
1184     'replaceable': convert_em_code,
1185     'returnvalue': convert_span,
1186     'row': convert_row,
1187     'sbr': convert_sbr,
1188     'screen': convert_pre,
1189     'section': convert_sect2,      # FIXME: need tracking of nesting
1190     'sect1': convert_sect1_tag,
1191     'sect2': convert_sect2,
1192     'sect3': convert_sect3,
1193     'simpara': convert_simpara,
1194     'simplesect': convert_sect2,   # FIXME: need tracking of nesting
1195     'structfield': convert_em_code,
1196     'structname': convert_span,
1197     'synopsis': convert_pre,
1198     'symbol': convert_span,
1199     'table': convert_table,
1200     'tag': convert_tag,
1201     'tbody': convert_tbody,
1202     'term': convert_span,
1203     'tgroup': convert_tgroup,
1204     'thead': convert_thead,
1205     'title': convert_skip,
1206     'type': convert_span,
1207     'ulink': convert_ulink,
1208     'userinput': convert_userinput,
1209     'varname': convert_code,
1210     'variablelist': convert_variablelist,
1211     'varlistentry': convert_varlistentry,
1212     'warning': convert_div,
1213     'xref': convert_xref,
1214 }
1215
1216 # conversion helpers
1217
1218 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1219 <html>
1220 <head>
1221 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1222 <title>%s</title>
1223 %s<link rel="stylesheet" href="style.css" type="text/css">
1224 </head>
1225 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1226 """
1227
1228
1229 def generate_head_links(ctx):
1230     n = ctx['nav_home']
1231     result = [
1232         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.raw_title)
1233     ]
1234
1235     n = ctx.get('nav_up')
1236     if n is not None:
1237         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1238
1239     n = ctx.get('nav_prev')
1240     if n is not None:
1241         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1242
1243     n = ctx.get('nav_next')
1244     if n is not None:
1245         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.raw_title))
1246
1247     return ''.join(result)
1248
1249
1250 def generate_nav_links(ctx):
1251     n = ctx['nav_home']
1252     result = [
1253         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1254     ]
1255
1256     n = ctx.get('nav_up')
1257     if n is not None:
1258         result.append(
1259             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1260     else:
1261         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1262
1263     n = ctx.get('nav_prev')
1264     if n is not None:
1265         result.append(
1266             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1267     else:
1268         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1269
1270     n = ctx.get('nav_next')
1271     if n is not None:
1272         result.append(
1273             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1274     else:
1275         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1276
1277     return ''.join(result)
1278
1279
1280 def generate_toc(ctx, node):
1281     result = []
1282     for c in node.children:
1283         # TODO: urlencode the filename: urllib.parse.quote_plus()
1284         link = c.filename
1285         if c.anchor:
1286             link += c.anchor
1287         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1288             c.title_tag, link, c.title))
1289         if c.subtitle:
1290             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1291         result.append('</dt>\n')
1292         if c.children:
1293             result.append('<dd><dl>')
1294             result.extend(generate_toc(ctx, c))
1295             result.append('</dl></dd>')
1296     return result
1297
1298
1299 def generate_basic_nav(ctx):
1300     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1301   <tr valign="middle">
1302     <td width="100%%" align="left" class="shortcuts"></td>
1303     %s
1304   </tr>
1305 </table>
1306     """ % generate_nav_links(ctx)
1307
1308
1309 def generate_alpha_nav(ctx, divs, prefix, span_id):
1310     ix_nav = []
1311     for s in divs:
1312         title = xml_get_title(ctx, s)
1313         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1314
1315     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1316   <tr valign="middle">
1317     <td width="100%%" align="left" class="shortcuts">
1318       <span id="nav_%s">
1319         %s
1320       </span>
1321     </td>
1322     %s
1323   </tr>
1324 </table>
1325     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1326
1327
1328 def generate_refentry_nav(ctx, refsect1s, result):
1329     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1330   <tr valign="middle">
1331     <td width="100%" align="left" class="shortcuts">
1332       <a href="#" class="shortcut">Top</a>""")
1333
1334     for s in refsect1s:
1335         # don't list TOC sections (role="xxx_proto")
1336         if s.attrib.get('role', '').endswith("_proto"):
1337             continue
1338         # skip section without 'id' attrs
1339         ref_id = s.attrib.get('id')
1340         if ref_id is None:
1341             continue
1342
1343         # skip foreign sections
1344         if '.' not in ref_id:
1345             continue
1346
1347         title = xml_get_title(ctx, s)
1348         span_id = ref_id.split('.')[1].replace('-', '_')
1349
1350         result.append("""
1351           <span id="nav_%s">
1352             <span class="dim">|</span>
1353             <a href="#%s" class="shortcut">%s</a>
1354           </span>
1355           """ % (span_id, ref_id, title))
1356     result.append("""
1357     </td>
1358     %s
1359   </tr>
1360 </table>
1361 """ % generate_nav_links(ctx))
1362
1363
1364 def generate_footer(ctx):
1365     footnotes = ctx.get('footnotes')
1366     if footnotes is None:
1367         return []
1368
1369     result = ["""<div class="footnotes">\n
1370 <br><hr style="width:100; text-align:left;margin-left: 0">
1371 """]
1372     for f in footnotes:
1373         result.extend(f)
1374     result.append('</div>\n')
1375     return result
1376
1377
1378 def get_id_path(node):
1379     """ Generate the 'id'.
1380     We need to walk up the xml-tree and check the positions for each sibling.
1381     When reaching the top of the tree we collect remaining index entries from
1382     the chunked-tree.
1383     """
1384     ix = []
1385     xml = node.xml
1386     parent = xml.getparent()
1387     while parent is not None:
1388         children = parent.getchildren()
1389         ix.insert(0, str(children.index(xml) + 1))
1390         xml = parent
1391         parent = xml.getparent()
1392     while node is not None:
1393         ix.insert(0, str(node.idx + 1))
1394         node = node.parent
1395
1396     return ix
1397
1398
1399 def get_id(node):
1400     xml = node.xml
1401     node_id = xml.attrib.get('id', None)
1402     if node_id:
1403         return node_id
1404
1405     # TODO: this is moot if nothing links to it, we could also consider to omit
1406     # the <a name="$id"></a> tag.
1407     logging.info('%d: No "id" attribute on "%s", generating one',
1408                  xml.sourceline, xml.tag)
1409     ix = get_id_path(node)
1410     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1411     return 'id-' + '.'.join(ix)
1412
1413
1414 def convert_chunk_with_toc(ctx, div_class, title_tag):
1415     node = ctx['node']
1416     result = [
1417         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1418         generate_basic_nav(ctx),
1419         '<div class="%s">' % div_class,
1420     ]
1421     if node.title:
1422         result.append("""
1423 <div class="titlepage">
1424 <%s class="title"><a name="%s"></a>%s</%s>
1425 </div>""" % (
1426             title_tag, get_id(node), node.title, title_tag))
1427
1428     toc = generate_toc(ctx, node)
1429     if toc:
1430         # TODO: not all docbook page types use this extra heading
1431         result.append("""<p><b>Table of Contents</b></p>
1432     <div class="toc">
1433       <dl class="toc">
1434     """)
1435         result.extend(toc)
1436         result.append("""</dl>
1437     </div>
1438     """)
1439     convert_inner(ctx, node.xml, result)
1440     result.extend(generate_footer(ctx))
1441     result.append("""</div>
1442 </body>
1443 </html>""")
1444     return result
1445
1446
1447 # docbook chunks
1448
1449
1450 def convert_book(ctx):
1451     node = ctx['node']
1452     result = [
1453         HTML_HEADER % (node.title, generate_head_links(ctx)),
1454         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1455     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1456 </table>
1457 <div class="book">
1458 """ % node.title
1459     ]
1460     bookinfo = node.xml.findall('bookinfo')[0]
1461     result.extend(convert_bookinfo(ctx, bookinfo))
1462     result.append("""<div class="toc">
1463   <dl class="toc">
1464 """)
1465     result.extend(generate_toc(ctx, node.root))
1466     result.append("""</dl>
1467 </div>
1468 """)
1469     result.extend(generate_footer(ctx))
1470     result.append("""</div>
1471 </body>
1472 </html>""")
1473     return result
1474
1475
1476 def convert_chapter(ctx):
1477     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1478
1479
1480 def convert_glossary(ctx):
1481     node = ctx['node']
1482     glossdivs = node.xml.findall('glossdiv')
1483
1484     result = [
1485         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1486         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1487         """<div class="glossary">
1488 <div class="titlepage"><h%1d class="title">
1489 <a name="%s"></a>%s</h%1d>
1490 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1491     ]
1492     for i in glossdivs:
1493         result.extend(convert_glossdiv(ctx, i))
1494     result.extend(generate_footer(ctx))
1495     result.append("""</div>
1496 </body>
1497 </html>""")
1498     return result
1499
1500
1501 def convert_index(ctx):
1502     node = ctx['node']
1503     # Get all indexdivs under indexdiv
1504     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1505
1506     result = [
1507         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1508         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1509         """<div class="index">
1510 <div class="titlepage"><h%1d class="title">
1511 <a name="%s"></a>%s</h%1d>
1512 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1513     ]
1514     for i in indexdivs:
1515         result.extend(convert_indexdiv(ctx, i))
1516     result.extend(generate_footer(ctx))
1517     result.append("""</div>
1518 </body>
1519 </html>""")
1520     return result
1521
1522
1523 def convert_part(ctx):
1524     return convert_chunk_with_toc(ctx, 'part', 'h1')
1525
1526
1527 def convert_preface(ctx):
1528     node = ctx['node']
1529     result = [
1530         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1531         generate_basic_nav(ctx),
1532         '<div class="preface">'
1533     ]
1534     if node.title:
1535         result.append("""
1536 <div class="titlepage">
1537 <h2 class="title"><a name="%s"></a>%s</h2>
1538 </div>""" % (get_id(node), node.title))
1539     convert_inner(ctx, node.xml, result)
1540     result.extend(generate_footer(ctx))
1541     result.append("""</div>
1542 </body>
1543 </html>""")
1544     return result
1545
1546
1547 def convert_reference(ctx):
1548     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1549
1550
1551 def convert_refentry(ctx):
1552     node = ctx['node']
1553     node_id = get_id(node)
1554     refsect1s = node.xml.findall('refsect1')
1555
1556     gallery = ''
1557     refmeta = node.xml.find('refmeta')
1558     if refmeta is not None:
1559         refmiscinfo = refmeta.find('refmiscinfo')
1560         if refmiscinfo is not None:
1561             inlinegraphic = refmiscinfo.find('inlinegraphic')
1562             if inlinegraphic is not None:
1563                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1564
1565     result = [
1566         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1567     ]
1568     generate_refentry_nav(ctx, refsect1s, result)
1569     result.append("""
1570 <div class="refentry">
1571 <a name="%s"></a>
1572 <div class="refnamediv">
1573   <table width="100%%"><tr>
1574     <td valign="top">
1575       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1576       <p>%s — %s</p>
1577     </td>
1578     <td class="gallery_image" valign="top" align="right">%s</td>
1579   </tr></table>
1580 </div>
1581 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1582
1583     for s in refsect1s:
1584         result.extend(convert_refsect1(ctx, s))
1585     result.extend(generate_footer(ctx))
1586     result.append("""</div>
1587 </body>
1588 </html>""")
1589     return result
1590
1591
1592 def convert_section(ctx):
1593     return convert_chunk_with_toc(ctx, 'section', 'h2')
1594
1595
1596 def convert_sect1(ctx):
1597     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1598
1599
1600 # TODO(ensonic): turn into class with converters as functions and ctx as self
1601 convert_chunks = {
1602     'book': convert_book,
1603     'chapter': convert_chapter,
1604     'glossary': convert_glossary,
1605     'index': convert_index,
1606     'part': convert_part,
1607     'preface': convert_preface,
1608     'reference': convert_reference,
1609     'refentry': convert_refentry,
1610     'section': convert_section,
1611     'sect1': convert_sect1,
1612 }
1613
1614
1615 def generate_nav_nodes(files, node):
1616     nav = {
1617         'nav_home': node.root,
1618     }
1619     # nav params: up, prev, next
1620     if node.parent:
1621         nav['nav_up'] = node.parent
1622     ix = files.index(node)
1623     if ix > 0:
1624         nav['nav_prev'] = files[ix - 1]
1625     if ix < len(files) - 1:
1626         nav['nav_next'] = files[ix + 1]
1627     return nav
1628
1629
1630 def convert(out_dir, module, files, node, src_lang):
1631     """Convert the docbook chunks to a html file.
1632
1633     Args:
1634       out_dir: already created output dir
1635       files: list of nodes in the tree in pre-order
1636       node: current tree node
1637     """
1638
1639     logging.info('Writing: %s', node.filename)
1640     with open(os.path.join(out_dir, node.filename), 'wt',
1641               newline='\n', encoding='utf-8') as html:
1642         ctx = {
1643             'module': module,
1644             'files': files,
1645             'node': node,
1646             'src-lang': src_lang,
1647         }
1648         ctx.update(generate_nav_nodes(files, node))
1649
1650         converter = convert_chunks.get(node.name)
1651         if converter is not None:
1652             for line in converter(ctx):
1653                 html.write(line)
1654         else:
1655             logging.warning('Add chunk converter for "%s"', node.name)
1656
1657
1658 def create_devhelp2_toc(node):
1659     result = []
1660     for c in node.children:
1661         if c.children:
1662             result.append('<sub name="%s" link="%s">\n' % (c.raw_title, c.filename))
1663             result.extend(create_devhelp2_toc(c))
1664             result.append('</sub>\n')
1665         else:
1666             result.append('<sub name="%s" link="%s"/>\n' % (c.raw_title, c.filename))
1667     return result
1668
1669
1670 def create_devhelp2_condition_attribs(node):
1671     condition = node.attrib.get('condition')
1672     if condition is not None:
1673         # condition -> since, deprecated, ... (separated with '|')
1674         cond = condition.replace('"', '&quot;').split('|')
1675         keywords = []
1676         for c in cond:
1677             if ':' in c:
1678                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1679             else:
1680                 # deprecated can have no description
1681                 keywords.append('{}="{}"'.format(c, ''))
1682         return ' ' + ' '.join(keywords)
1683     else:
1684         return ''
1685
1686
1687 def create_devhelp2_refsect2_keyword(node, base_link):
1688     node_id = node.attrib['id']
1689     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1690         node.attrib['role'], titles[node_id]['title'], base_link + node_id,
1691         create_devhelp2_condition_attribs(node))
1692
1693
1694 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1695     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1696         node.attrib['role'], title, base_link + name,
1697         create_devhelp2_condition_attribs(node))
1698
1699
1700 def create_devhelp2(out_dir, module, xml, files):
1701     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1702               newline='\n', encoding='utf-8') as idx:
1703         bookinfo_nodes = xml.xpath('/book/bookinfo')
1704         title = ''
1705         if bookinfo_nodes is not None:
1706             bookinfo = bookinfo_nodes[0]
1707             title = bookinfo.xpath('./title/text()')[0]
1708             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1709             # TODO: support author too (see devhelp2.xsl)
1710         # TODO: fixxref uses '--src-lang' to set the language
1711         result = [
1712             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1713 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1714   <chapters>
1715 """ % (title, module, online_url)
1716         ]
1717         # toc
1718         result.extend(create_devhelp2_toc(files[0].root))
1719         result.append("""  </chapters>
1720   <functions>
1721 """)
1722         # keywords from all refsect2 and refsect3
1723         refsect2 = etree.XPath('//refsect2[@role]')
1724         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1725         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1726         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1727         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1728         for node in files:
1729             base_link = node.filename + '#'
1730             refsect2_nodes = refsect2(node.xml)
1731             for refsect2_node in refsect2_nodes:
1732                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1733                 refsect3_nodes = refsect3_enum(refsect2_node)
1734                 for refsect3_node in refsect3_nodes:
1735                     details_node = refsect3_enum_details(refsect3_node)[0]
1736                     name = details_node.attrib['id']
1737                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1738                 refsect3_nodes = refsect3_struct(refsect2_node)
1739                 for refsect3_node in refsect3_nodes:
1740                     details_node = refsect3_struct_details(refsect3_node)[0]
1741                     name = details_node.attrib['id']
1742                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1743
1744         result.append("""  </functions>
1745 </book>
1746 """)
1747         for line in result:
1748             idx.write(line)
1749
1750
1751 def get_dirs(uninstalled):
1752     if uninstalled:
1753         # this does not work from buiddir!=srcdir
1754         gtkdocdir = os.path.split(sys.argv[0])[0]
1755         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1756             # try 'srcdir' (set from makefiles) too
1757             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1758                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1759         styledir = gtkdocdir + '/style'
1760     else:
1761         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1762         styledir = gtkdocdir
1763     return (gtkdocdir, styledir)
1764
1765
1766 def main(module, index_file, out_dir, uninstalled, src_lang, paths):
1767
1768     # == Loading phase ==
1769     # the next 3 steps could be done in paralel
1770
1771     # 1) load the docuemnt
1772     _t = timer()
1773     # does not seem to be faster
1774     # parser = etree.XMLParser(dtd_validation=False, collect_ids=False)
1775     # tree = etree.parse(index_file, parser)
1776     tree = etree.parse(index_file)
1777     logging.warning("1a: %7.3lf: load doc", timer() - _t)
1778     _t = timer()
1779     tree.xinclude()
1780     logging.warning("1b: %7.3lf: xinclude doc", timer() - _t)
1781
1782     # 2) copy datafiles
1783     _t = timer()
1784     # TODO: handle additional images
1785     (gtkdocdir, styledir) = get_dirs(uninstalled)
1786     # copy navigation images and stylesheets to html directory ...
1787     css_file = os.path.join(styledir, 'style.css')
1788     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1789         shutil.copy(f, out_dir)
1790     css_file = os.path.join(out_dir, 'style.css')
1791     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1792         css.write(HTML_FORMATTER.get_style_defs())
1793     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1794
1795     # 3) load xref targets
1796     _t = timer()
1797     # TODO: migrate options from fixxref
1798     # TODO: ideally explicity specify the files we need, this will save us the
1799     # globbing and we'll load less files.
1800     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1801     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1802
1803     # == Processing phase ==
1804
1805     # 4) recursively walk the tree and chunk it into a python tree so that we
1806     #    can generate navigation and link tags.
1807     _t = timer()
1808     files = chunk(tree.getroot(), module)
1809     files = [f for f in PreOrderIter(files) if f.anchor is None]
1810     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1811
1812     # 5) extract tables:
1813     _t = timer()
1814     # TODO: can be done in parallel
1815     # - find all 'id' attribs and add them to the link map
1816     # - .. get their titles and store them into the titles map
1817     add_id_links_and_titles(files, fixxref.Links)
1818     # - build glossary dict
1819     build_glossary(files)
1820     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1821
1822     # == Output phase ==
1823     # the next two step could be done in parllel
1824
1825     # 6) create a xxx.devhelp2 file
1826     _t = timer()
1827     create_devhelp2(out_dir, module, tree.getroot(), files)
1828     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1829
1830     # 7) iterate the tree and output files
1831     _t = timer()
1832     # TODO: can be done in parallel, figure out why this is not faster
1833     # from multiprocessing.pool import Pool
1834     # with Pool(4) as p:
1835     #     p.apply_async(convert, args=(out_dir, module, files))
1836     # from multiprocessing.pool import ThreadPool
1837     # with ThreadPool(4) as p:
1838     #     p.apply_async(convert, args=(out_dir, module, files))
1839     for node in files:
1840         convert(out_dir, module, files, node, src_lang)
1841     logging.warning("7: %7.3lf: create html", timer() - _t)
1842
1843     # 8) copy assets over
1844     _t = timer()
1845     paths = set(paths + [os.getcwd()])
1846     for a in assets:
1847         logging.info('trying %s in %s', a, str(paths))
1848         copied = False
1849         for p in paths:
1850             try:
1851                 shutil.copy(os.path.join(p, a), out_dir)
1852                 copied = True
1853             except FileNotFoundError:
1854                 pass
1855         if not copied:
1856             logging.warning('file %s not found in path (did you add --path?)', a)
1857     logging.warning("8: %7.3lf: copy assets", timer() - _t)
1858
1859
1860 def run(options):
1861     logging.info('options: %s', str(options.__dict__))
1862     module = options.args[0]
1863     document = options.args[1]
1864
1865     # TODO: rename to 'html' later on
1866     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1867     #   outputs into the working directory
1868     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1869     try:
1870         os.mkdir(out_dir)
1871     except OSError as e:
1872         if e.errno != errno.EEXIST:
1873             raise
1874
1875     sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang,
1876                   options.path))