gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42 - check each docbook tag if it can contain #PCDATA, if not don't check for
  43   xml.text
  44 - consider some perf-warnings flag
  45   - see 'No "id" attribute on'
  46 - find a better way to print context for warnings
  47   - we use 'xml.sourceline', but this all does not help a lot due to xi:include
  48 - copy images
  49   - do we need to find them on the respective tags and search them in the path
  50     setup by '--path'
  51 - commandline options
  52   - mkhtml:
  53     --path 'Extra source directories' - used to find images
  54   - fixxref:
  55
  56 DIFFERENCES:
  57 - titles
  58   - we add the chunk label to the title in toc, on the page and in nav tooltips
  59   - docbook xsl only sometimes adds the label to the titles and when it does it
  60     adds name chunk type too (e.g. 'Part I.' instead of 'I.')
  61 - navigation
  62   - we always add an up-link except on the first page
  63 - footer
  64   - we're nov omitting the footer
  65 - tocs
  66   - we always add "Table of Contents' before a toc
  67   - docbook does that for some pages, it is configurable
  68
  69 OPTIONAL:
  70 - minify html: https://pypi.python.org/pypi/htmlmin/
  71
  72 Requirements:
  73 sudo pip3 install anytree lxml pygments
  74
  75 Example invocation:
  76 cd tests/bugs/docs/
  77 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  78 xdg-open db2html/index.html
  79 meld html db2html
  80
  81 Benchmarking:
  82 cd tests/bugs/docs/;
  83 rm html-build.stamp; time make html-build.stamp
  84 """
  85
  86 import argparse
  87 import errno
  88 import logging
  89 import os
  90 import shutil
  91 import sys
  92
  93 from anytree import Node, PreOrderIter
  94 from copy import deepcopy
  95 from glob import glob
  96 from lxml import etree
  97 from pygments import highlight
  98 from pygments.lexers import CLexer
  99 from pygments.formatters import HtmlFormatter
 100 from timeit import default_timer as timer
 101
 102 from . import config, fixxref
 103
 104 # pygments setup
 105 # lazily constructed lexer cache
 106 LEXERS = {
 107     'c': CLexer()
 108 }
 109 HTML_FORMATTER = HtmlFormatter(nowrap=True)
 110
 111
 112 class ChunkParams(object):
 113     def __init__(self, prefix, parent=None, min_idx=0):
 114         self.prefix = prefix
 115         self.parent = parent
 116         self.min_idx = min_idx
 117         self.idx = 1
 118
 119
 120 DONT_CHUNK = float('inf')
 121 # docbook-xsl defines the chunk tags here.
 122 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 123 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 124 # If not defined, we can just create an example without an 'id' attr and see
 125 # docbook xsl does.
 126 #
 127 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 128 # TODO: this list has also a flag that controls wheter we add the
 129 # 'Table of Contents' heading in convert_chunk_with_toc()
 130 CHUNK_PARAMS = {
 131     'appendix': ChunkParams('app', 'book'),
 132     'book': ChunkParams('bk'),
 133     'chapter': ChunkParams('ch', 'book'),
 134     'glossary': ChunkParams('go', 'book'),
 135     'index': ChunkParams('ix', 'book'),
 136     'part': ChunkParams('pt', 'book'),
 137     'preface': ChunkParams('pr', 'book'),
 138     'refentry': ChunkParams('re', 'book'),
 139     'reference': ChunkParams('rn', 'book'),
 140     'sect1': ChunkParams('s', 'chapter', 1),
 141     'section': ChunkParams('s', 'chapter', 1),
 142     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 143     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 144     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 145     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 146 }
 147 # TAGS we don't support:
 148 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 149
 150 TITLE_XPATHS = {
 151     '_': (etree.XPath('./title'), None),
 152     'book': (etree.XPath('./bookinfo/title'), None),
 153     'refentry': (
 154         etree.XPath('./refmeta/refentrytitle'),
 155         etree.XPath('./refnamediv/refpurpose')
 156     ),
 157 }
 158
 159 ID_XPATH = etree.XPath('//*[@id]')
 160
 161 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 162 glossary = {}
 163
 164 footnote_idx = 1
 165
 166 # nested dict with subkeys:
 167 # title: textual title
 168 # tag: chunk tag
 169 # xml: title xml node
 170 titles = {}
 171
 172
 173 def gen_chunk_name(node, chunk_params):
 174     """Generate a chunk file name
 175
 176     This is either based on the id or on the position in the doc. In the latter
 177     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 178     type.
 179     """
 180     if 'id' in node.attrib:
 181         return node.attrib['id']
 182
 183     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 184     chunk_params.idx += 1
 185
 186     # handle parents to make names of nested tags like in docbook
 187     # - we only need to prepend the parent if there are > 1 of them in the
 188     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 189     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 190     #   when we chunk explicitly and on each level maintain the 'idx'
 191     # while chunk_params.parent:
 192     #     parent = chunk_params.parent
 193     #     if parent not in CHUNK_PARAMS:
 194     #         break;
 195     #     chunk_params = CHUNK_PARAMS[parent]
 196     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 197
 198     logging.info('Gen chunk name: "%s"', name)
 199     return name
 200
 201
 202 def get_chunk_titles(module, node):
 203     tag = node.tag
 204     (title, subtitle) = TITLE_XPATHS.get(tag, TITLE_XPATHS['_'])
 205
 206     ctx = {
 207         'module': module,
 208         'files': [],
 209     }
 210     result = {
 211         'title': None,
 212         'title_tag': None,
 213         'subtitle': None,
 214         'subtitle_tag': None
 215     }
 216     res = title(node)
 217     if res:
 218         # handle chunk label for tocs
 219         label = node.attrib.get('label')
 220         if label:
 221             label += '. '
 222         else:
 223             label = ''
 224
 225         xml = res[0]
 226         result['title'] = label + ''.join(convert_title(ctx, xml))
 227         if xml.tag != 'title':
 228             result['title_tag'] = xml.tag
 229         else:
 230             result['title_tag'] = tag
 231
 232     if subtitle:
 233         res = subtitle(node)
 234         if res:
 235             xml = res[0]
 236             result['subtitle'] = ''.join(convert_title(ctx, xml))
 237             result['subtitle_tag'] = xml.tag
 238     return result
 239
 240
 241 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 242     """Chunk the tree.
 243
 244     The first time, we're called with parent=None and in that case we return
 245     the new_node as the root of the tree. For each tree-node we generate a
 246     filename and process the children.
 247     """
 248     tag = xml_node.tag
 249     chunk_params = CHUNK_PARAMS.get(tag)
 250     if chunk_params:
 251         title_args = get_chunk_titles(module, xml_node)
 252         chunk_name = gen_chunk_name(xml_node, chunk_params)
 253
 254         # check idx to handle 'sect1'/'section' special casing and title-only
 255         # segments
 256         if idx >= chunk_params.min_idx:
 257             logging.info('chunk tag: "%s"[%d]', tag, idx)
 258             if parent:
 259                 # remove the xml-node from the parent
 260                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 261                 xml_node.getparent().remove(xml_node)
 262                 xml_node = sub_tree
 263
 264             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 265                           idx=idx,
 266                           filename=chunk_name + '.html', anchor=None,
 267                           **title_args)
 268         else:
 269             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 270                           idx=idx,
 271                           filename=parent.filename, anchor='#' + chunk_name,
 272                           **title_args)
 273
 274         depth += 1
 275         idx = 0
 276         for child in xml_node:
 277             chunk(child, module, depth, idx, parent)
 278             if child.tag in CHUNK_PARAMS:
 279                 idx += 1
 280
 281     return parent
 282
 283
 284 def add_id_links_and_titles(files, links):
 285     for node in files:
 286         chunk_name = node.filename[:-5]
 287         chunk_base = node.filename + '#'
 288         for elem in ID_XPATH(node.xml):
 289             attr = elem.attrib['id']
 290             if attr == chunk_name:
 291                 links[attr] = node.filename
 292             else:
 293                 links[attr] = chunk_base + attr
 294
 295             title = TITLE_XPATHS.get(elem.tag, TITLE_XPATHS['_'])[0]
 296             res = title(elem)
 297             if res:
 298                 xml = res[0]
 299                 # TODO: consider to eval those lazily
 300                 titles[attr] = {
 301                     'title': etree.tostring(xml, method="text", encoding=str).strip(),
 302                     'xml': xml,
 303                     'tag': elem.tag,
 304                 }
 305
 306
 307 def build_glossary(files):
 308     for node in files:
 309         if node.xml.tag != 'glossary':
 310             continue
 311         for term in GLOSSENTRY_XPATH(node.xml):
 312             # TODO: there can be all kind of things in a glossary. This only supports
 313             # what we commonly use, glossterm is mandatory
 314             key_node = term.find('glossterm')
 315             val_node = term.find('glossdef')
 316             if key_node is not None and val_node is not None:
 317                 key = etree.tostring(key_node, method="text", encoding=str).strip()
 318                 val = etree.tostring(val_node, method="text", encoding=str).strip()
 319                 glossary[key] = val
 320                 # logging.debug('glosentry: %s:%s', key, val)
 321             else:
 322                 debug = []
 323                 if key_node is None:
 324                     debug.append('missing key')
 325                 if val_node is None:
 326                     debug.append('missing val')
 327                 logging.warning('Unexpected glossentry %s:', term.attrib['id'], ','.join(debug))
 328
 329
 330 # conversion helpers
 331
 332
 333 def convert_inner(ctx, xml, result):
 334     for child in xml:
 335         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 336
 337
 338 def convert_ignore(ctx, xml):
 339     result = []
 340     convert_inner(ctx, xml, result)
 341     return result
 342
 343
 344 def convert_skip(ctx, xml):
 345     return []
 346
 347
 348 def append_idref(attrib, result):
 349     if 'id' in attrib:
 350         result.append('<a name="%s"></a>' % attrib['id'])
 351
 352
 353 def append_text(ctx, text, result):
 354     if text and ('no-strip' in ctx or text.strip()):
 355         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 356
 357
 358 missing_tags = {}
 359
 360
 361 def convert__unknown(ctx, xml):
 362     # don't recurse on subchunks
 363     if xml.tag in CHUNK_PARAMS:
 364         return []
 365     if isinstance(xml, etree._Comment):
 366         return ['<!-- ' + xml.text + '-->\n']
 367     else:
 368         # warn only once
 369         if xml.tag not in missing_tags:
 370             logging.warning('Add tag converter for "%s"', xml.tag)
 371             missing_tags[xml.tag] = True
 372         result = ['<!-- ' + xml.tag + '-->\n']
 373         convert_inner(ctx, xml, result)
 374         result.append('<!-- /' + xml.tag + '-->\n')
 375         return result
 376
 377
 378 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 379     result = ['<div class="%s">\n' % xml.tag]
 380     title_tag = xml.find('title')
 381     if title_tag is not None:
 382         if 'id' in xml.attrib:
 383             result.append('<a name="%s"></a>' % xml.attrib['id'])
 384         result.append('<%s>%s</%s>' % (
 385             h_tag, ''.join(convert_title(ctx, title_tag)), h_tag))
 386     append_text(ctx, xml.text, result)
 387     inner_func(ctx, xml, result)
 388     result.append('</div>')
 389     append_text(ctx, xml.tail, result)
 390     return result
 391
 392
 393 def xml_get_title(ctx, xml):
 394     title_tag = xml.find('title')
 395     if title_tag is not None:
 396         return ''.join(convert_title(ctx, title_tag))
 397     else:
 398         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 399         return ''
 400
 401
 402 # docbook tags
 403
 404
 405 def convert_abstract(ctx, xml):
 406     result = ["""<div class="abstract">
 407     <p class="title"><b>Abstract</b></p>"""]
 408     append_text(ctx, xml.text, result)
 409     convert_inner(ctx, xml, result)
 410     result.append('</div>')
 411     append_text(ctx, xml.tail, result)
 412     return result
 413
 414
 415 def convert_acronym(ctx, xml):
 416     key = xml.text
 417     title = glossary.get(key, '')
 418     # TODO: print a sensible warning if missing
 419     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 420     if xml.tail:
 421         result.append(xml.tail)
 422     return result
 423
 424
 425 def convert_anchor(ctx, xml):
 426     return ['<a name="%s"></a>' % xml.attrib['id']]
 427
 428
 429 def convert_bookinfo(ctx, xml):
 430     result = ['<div class="titlepage">']
 431     convert_inner(ctx, xml, result)
 432     result.append("""<hr>
 433 </div>""")
 434     if xml.tail:
 435         result.append(xml.tail)
 436     return result
 437
 438
 439 def convert_blockquote(ctx, xml):
 440     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 441     append_text(ctx, xml.text, result)
 442     convert_inner(ctx, xml, result)
 443     result.append('</blockquote>\n</div>')
 444     append_text(ctx, xml.tail, result)
 445     return result
 446
 447
 448 def convert_code(ctx, xml):
 449     result = ['<code class="%s">' % xml.tag]
 450     append_text(ctx, xml.text, result)
 451     convert_inner(ctx, xml, result)
 452     result.append('</code>')
 453     append_text(ctx, xml.tail, result)
 454     return result
 455
 456
 457 def convert_colspec(ctx, xml):
 458     result = ['<col']
 459     a = xml.attrib
 460     if 'colname' in a:
 461         result.append(' class="%s"' % a['colname'])
 462     if 'colwidth' in a:
 463         result.append(' width="%s"' % a['colwidth'])
 464     result.append('>\n')
 465     # is in tgroup and there can be no 'text'
 466     return result
 467
 468
 469 def convert_command(ctx, xml):
 470     result = ['<strong class="userinput"><code>']
 471     append_text(ctx, xml.text, result)
 472     convert_inner(ctx, xml, result)
 473     result.append('</code></strong>')
 474     append_text(ctx, xml.tail, result)
 475     return result
 476
 477
 478 def convert_corpauthor(ctx, xml):
 479     result = ['<div><h3 class="corpauthor">\n']
 480     append_text(ctx, xml.text, result)
 481     convert_inner(ctx, xml, result)
 482     result.append('</h3></div>\n')
 483     append_text(ctx, xml.tail, result)
 484     return result
 485
 486
 487 def convert_div(ctx, xml):
 488     result = ['<div class="%s">\n' % xml.tag]
 489     append_text(ctx, xml.text, result)
 490     convert_inner(ctx, xml, result)
 491     result.append('</div>')
 492     append_text(ctx, xml.tail, result)
 493     return result
 494
 495
 496 def convert_emphasis(ctx, xml):
 497     if 'role' in xml.attrib:
 498         result = ['<span class="%s">' % xml.attrib['role']]
 499         end = '</span>'
 500     else:
 501         result = ['<span class="emphasis"><em>']
 502         end = '</em></span>'
 503     append_text(ctx, xml.text, result)
 504     convert_inner(ctx, xml, result)
 505     result.append(end)
 506     append_text(ctx, xml.tail, result)
 507     return result
 508
 509
 510 def convert_em_class(ctx, xml):
 511     result = ['<em class="%s"><code>' % xml.tag]
 512     append_idref(xml.attrib, result)
 513     append_text(ctx, xml.text, result)
 514     convert_inner(ctx, xml, result)
 515     result.append('</code></em>')
 516     append_text(ctx, xml.tail, result)
 517     return result
 518
 519
 520 def convert_entry(ctx, xml):
 521     entry_type = ctx['table.entry']
 522     result = ['<' + entry_type]
 523     if 'role' in xml.attrib:
 524         result.append(' class="%s"' % xml.attrib['role'])
 525     if 'morerows' in xml.attrib:
 526         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 527     result.append('>')
 528     append_text(ctx, xml.text, result)
 529     convert_inner(ctx, xml, result)
 530     result.append('</' + entry_type + '>')
 531     append_text(ctx, xml.tail, result)
 532     return result
 533
 534
 535 def convert_footnote(ctx, xml):
 536     footnotes = ctx.get('footnotes', [])
 537     # footnotes idx is not per page, but per doc
 538     global footnote_idx
 539     idx = footnote_idx
 540     footnote_idx += 1
 541
 542     # need a pair of ids for each footnote (docbook generates different ids)
 543     this_id = 'footnote-%d' % idx
 544     that_id = 'ftn.' + this_id
 545
 546     inner = ['<div id="%s" class="footnote">' % that_id]
 547     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 548         this_id, idx))
 549     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 550     # get double nested paras :/.
 551     # convert_inner(ctx, xml, inner)
 552     para = xml.find('para')
 553     if para is None:
 554         para = xml.find('simpara')
 555     if para is not None:
 556         inner.append(para.text)
 557     else:
 558         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 559                         etree.tostring(xml, method="text", encoding=str).strip())
 560     inner.append('</p></div>')
 561     footnotes.append(inner)
 562     ctx['footnotes'] = footnotes
 563     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 564         that_id, this_id, idx)]
 565
 566
 567 def convert_formalpara(ctx, xml):
 568     result = None
 569     title_tag = xml.find('title')
 570     result = ['<p><b>%s</b>' % ''.join(convert_title(ctx, title_tag))]
 571     para_tag = xml.find('para')
 572     append_text(ctx, para_tag.text, result)
 573     convert_inner(ctx, para_tag, result)
 574     append_text(ctx, para_tag.tail, result)
 575     result.append('</p>')
 576     append_text(ctx, xml.tail, result)
 577     return result
 578
 579
 580 def convert_glossdef(ctx, xml):
 581     result = ['<dd class="glossdef">']
 582     convert_inner(ctx, xml, result)
 583     result.append('</dd>\n')
 584     return result
 585
 586
 587 def convert_glossdiv(ctx, xml):
 588     title_tag = xml.find('title')
 589     title = title_tag.text
 590     xml.remove(title_tag)
 591     result = [
 592         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 593     ]
 594     convert_inner(ctx, xml, result)
 595     return result
 596
 597
 598 def convert_glossentry(ctx, xml):
 599     result = []
 600     convert_inner(ctx, xml, result)
 601     return result
 602
 603
 604 def convert_glossterm(ctx, xml):
 605     glossid = ''
 606     text = ''
 607     anchor = xml.find('anchor')
 608     if anchor is not None:
 609         glossid = anchor.attrib.get('id', '')
 610         text += anchor.tail or ''
 611     text += xml.text or ''
 612     if glossid == '':
 613         glossid = 'glossterm-' + text
 614     return [
 615         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 616             glossid, text)
 617     ]
 618
 619
 620 def convert_imageobject(ctx, xml):
 621     imagedata = xml.find('imagedata')
 622     if imagedata is not None:
 623         # TODO(ensonic): warn on missing fileref attr?
 624         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 625     else:
 626         return []
 627
 628
 629 def convert_indexdiv(ctx, xml):
 630     title_tag = xml.find('title')
 631     title = title_tag.text
 632     xml.remove(title_tag)
 633     result = [
 634         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 635     ]
 636     convert_inner(ctx, xml, result)
 637     return result
 638
 639
 640 def convert_informaltable(ctx, xml):
 641     result = ['<div class="informaltable"><table class="informaltable"']
 642     a = xml.attrib
 643     if 'pgwide' in a and a['pgwide'] == '1':
 644         result.append(' width="100%"')
 645     if 'frame' in a and a['frame'] == 'none':
 646         result.append(' border="0"')
 647     result.append('>\n')
 648     convert_inner(ctx, xml, result)
 649     result.append('</table></div>')
 650     if xml.tail:
 651         result.append(xml.tail)
 652     return result
 653
 654
 655 def convert_inlinegraphic(ctx, xml):
 656     # TODO(ensonic): warn on missing fileref attr?
 657     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 658
 659
 660 def convert_itemizedlist(ctx, xml):
 661     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 662     convert_inner(ctx, xml, result)
 663     result.append('</ul></div>')
 664     if xml.tail:
 665         result.append(xml.tail)
 666     return result
 667
 668
 669 def convert_link(ctx, xml):
 670     linkend = xml.attrib['linkend']
 671     result = []
 672     if linkend:
 673         link_text = []
 674         append_text(ctx, xml.text, link_text)
 675         convert_inner(ctx, xml, link_text)
 676         text = ''.join(link_text)
 677
 678         (tid, href) = fixxref.GetXRef(linkend)
 679         if href:
 680             title_attr = ''
 681             title = titles.get(tid)
 682             if title:
 683                 title_attr = ' title="%s"' % title['title']
 684
 685             href = fixxref.MakeRelativeXRef(ctx['module'], href)
 686             result = ['<a href="%s"%s>%s</a>' % (href, title_attr, text)]
 687         else:
 688             # TODO: filename is for the output and xml.sourceline is on the masterdoc ...
 689             fixxref.ReportBadXRef(ctx['node'].filename, 0, linkend, text)
 690             result = [text]
 691     else:
 692         append_text(ctx, xml.text, result)
 693         convert_inner(ctx, xml, result)
 694     append_text(ctx, xml.tail, result)
 695     return result
 696
 697
 698 def convert_listitem(ctx, xml):
 699     result = ['<li class="listitem">']
 700     convert_inner(ctx, xml, result)
 701     result.append('</li>')
 702     # is in itemizedlist and there can be no 'text'
 703     return result
 704
 705
 706 def convert_literallayout(ctx, xml):
 707     result = ['<div class="literallayout"><p><br>\n']
 708     append_text(ctx, xml.text, result)
 709     convert_inner(ctx, xml, result)
 710     result.append('</p></div>')
 711     append_text(ctx, xml.tail, result)
 712     return result
 713
 714
 715 def convert_orderedlist(ctx, xml):
 716     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 717     convert_inner(ctx, xml, result)
 718     result.append('</ol></div>')
 719     append_text(ctx, xml.tail, result)
 720     return result
 721
 722
 723 def convert_para(ctx, xml):
 724     result = []
 725     if 'role' in xml.attrib:
 726         result.append('<p class="%s">' % xml.attrib['role'])
 727     else:
 728         result.append('<p>')
 729     if 'id' in xml.attrib:
 730         result.append('<a name="%s"></a>' % xml.attrib['id'])
 731     append_text(ctx, xml.text, result)
 732     convert_inner(ctx, xml, result)
 733     result.append('</p>')
 734     append_text(ctx, xml.tail, result)
 735     return result
 736
 737
 738 def convert_para_like(ctx, xml):
 739     result = []
 740     if 'id' in xml.attrib:
 741         result.append('<a name="%s"></a>' % xml.attrib['id'])
 742     result.append('<p class="%s">' % xml.tag)
 743     append_text(ctx, xml.text, result)
 744     convert_inner(ctx, xml, result)
 745     result.append('</p>')
 746     append_text(ctx, xml.tail, result)
 747     return result
 748
 749
 750 def convert_phrase(ctx, xml):
 751     result = ['<span']
 752     if 'role' in xml.attrib:
 753         result.append(' class="%s">' % xml.attrib['role'])
 754     else:
 755         result.append('>')
 756     append_text(ctx, xml.text, result)
 757     convert_inner(ctx, xml, result)
 758     result.append('</span>')
 759     append_text(ctx, xml.tail, result)
 760     return result
 761
 762
 763 def convert_primaryie(ctx, xml):
 764     result = ['<dt>\n']
 765     convert_inner(ctx, xml, result)
 766     result.append('\n</dt>\n<dd></dd>\n')
 767     return result
 768
 769
 770 def convert_pre(ctx, xml):
 771     # Since we're inside <pre> don't skip newlines
 772     ctx['no-strip'] = True
 773     result = ['<pre class="%s">' % xml.tag]
 774     append_text(ctx, xml.text, result)
 775     convert_inner(ctx, xml, result)
 776     result.append('</pre>')
 777     del ctx['no-strip']
 778     append_text(ctx, xml.tail, result)
 779     return result
 780
 781
 782 def convert_programlisting(ctx, xml):
 783     result = []
 784     if xml.attrib.get('role', '') == 'example':
 785         if xml.text:
 786             lang = xml.attrib.get('language', ctx['src-lang']).lower()
 787             if lang not in LEXERS:
 788                 LEXERS[lang] = get_lexer_by_name(lang)
 789             lexer = LEXERS.get(lang, None)
 790             if lexer:
 791                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 792
 793                 # we do own line-numbering
 794                 line_count = highlighted.count('\n')
 795                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 796                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 797   <tbody>
 798     <tr>
 799       <td class="listing_lines" align="right"><pre>%s</pre></td>
 800       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 801     </tr>
 802   </tbody>
 803 </table>
 804 """ % (source_lines, highlighted))
 805             else:
 806                 logging.warn('No pygments lexer for language="%s"', lang)
 807                 result.append('<pre class="programlisting">')
 808                 result.append(xml.text)
 809                 result.append('</pre>')
 810     else:
 811         result.append('<pre class="programlisting">')
 812         append_text(ctx, xml.text, result)
 813         convert_inner(ctx, xml, result)
 814         result.append('</pre>')
 815     append_text(ctx, xml.tail, result)
 816     return result
 817
 818
 819 def convert_quote(ctx, xml):
 820     result = ['<span class="quote">"<span class="quote">']
 821     append_text(ctx, xml.text, result)
 822     convert_inner(ctx, xml, result)
 823     result.append('</span>"</span>')
 824     append_text(ctx, xml.tail, result)
 825     return result
 826
 827
 828 def convert_refsect1(ctx, xml):
 829     # Add a divider between two consequitive refsect2
 830     def convert_inner(ctx, xml, result):
 831         prev = None
 832         for child in xml:
 833             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 834                 result.append('<hr>\n')
 835             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 836             prev = child
 837     return convert_sect(ctx, xml, 'h2', convert_inner)
 838
 839
 840 def convert_refsect2(ctx, xml):
 841     return convert_sect(ctx, xml, 'h3')
 842
 843
 844 def convert_refsect3(ctx, xml):
 845     return convert_sect(ctx, xml, 'h4')
 846
 847
 848 def convert_row(ctx, xml):
 849     result = ['<tr>\n']
 850     convert_inner(ctx, xml, result)
 851     result.append('</tr>\n')
 852     return result
 853
 854
 855 def convert_sect1_tag(ctx, xml):
 856     return convert_sect(ctx, xml, 'h2')
 857
 858
 859 def convert_sect2(ctx, xml):
 860     return convert_sect(ctx, xml, 'h3')
 861
 862
 863 def convert_sect3(ctx, xml):
 864     return convert_sect(ctx, xml, 'h4')
 865
 866
 867 def convert_simpara(ctx, xml):
 868     result = ['<p>']
 869     append_text(ctx, xml.text, result)
 870     convert_inner(ctx, xml, result)
 871     result.append('</p>')
 872     append_text(ctx, xml.tail, result)
 873     return result
 874
 875
 876 def convert_span(ctx, xml):
 877     result = ['<span class="%s">' % xml.tag]
 878     append_text(ctx, xml.text, result)
 879     convert_inner(ctx, xml, result)
 880     result.append('</span>')
 881     append_text(ctx, xml.tail, result)
 882     return result
 883
 884
 885 def convert_table(ctx, xml):
 886     result = ['<div class="table">']
 887     append_idref(xml.attrib, result)
 888     title_tag = xml.find('title')
 889     if title_tag is not None:
 890         result.append('<p class="title"><b>')
 891         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 892         result.extend(convert_title(ctx, title_tag))
 893         result.append('</b></p>')
 894     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 895
 896     convert_inner(ctx, xml, result)
 897
 898     result.append('</table></div></div>')
 899     append_text(ctx, xml.tail, result)
 900     return result
 901
 902
 903 def convert_tbody(ctx, xml):
 904     result = ['<tbody>']
 905     ctx['table.entry'] = 'td'
 906     convert_inner(ctx, xml, result)
 907     result.append('</tbody>')
 908     # is in tgroup and there can be no 'text'
 909     return result
 910
 911
 912 def convert_tgroup(ctx, xml):
 913     # tgroup does not expand to anything, but the nested colspecs need to
 914     # be put into a colgroup
 915     cols = xml.findall('colspec')
 916     result = []
 917     if cols:
 918         result.append('<colgroup>\n')
 919         for col in cols:
 920             result.extend(convert_colspec(ctx, col))
 921             xml.remove(col)
 922         result.append('</colgroup>\n')
 923     convert_inner(ctx, xml, result)
 924     # is in informaltable and there can be no 'text'
 925     return result
 926
 927
 928 def convert_thead(ctx, xml):
 929     result = ['<thead>']
 930     ctx['table.entry'] = 'th'
 931     convert_inner(ctx, xml, result)
 932     result.append('</thead>')
 933     # is in tgroup and there can be no 'text'
 934     return result
 935
 936
 937 def convert_title(ctx, xml):
 938     # This is always explicitly called from some context
 939     result = []
 940     append_text(ctx, xml.text, result)
 941     convert_inner(ctx, xml, result)
 942     append_text(ctx, xml.tail, result)
 943     return result
 944
 945
 946 def convert_ulink(ctx, xml):
 947     if xml.text:
 948         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 949     else:
 950         url = xml.attrib['url']
 951         result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, url, url)]
 952     append_text(ctx, xml.tail, result)
 953     return result
 954
 955
 956 def convert_userinput(ctx, xml):
 957     result = ['<span class="command"><strong>']
 958     append_text(ctx, xml.text, result)
 959     convert_inner(ctx, xml, result)
 960     result.append('</strong></span>')
 961     append_text(ctx, xml.tail, result)
 962     return result
 963
 964
 965 def convert_variablelist(ctx, xml):
 966     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 967 <colgroup>
 968 <col align="left" valign="top">
 969 <col>
 970 </colgroup>
 971 <tbody>"""]
 972     convert_inner(ctx, xml, result)
 973     result.append("""</tbody>
 974 </table></div>""")
 975     return result
 976
 977
 978 def convert_varlistentry(ctx, xml):
 979     result = ['<tr>']
 980
 981     result.append('<td><p>')
 982     term = xml.find('term')
 983     result.extend(convert_span(ctx, term))
 984     result.append('</p></td>')
 985
 986     result.append('<td>')
 987     listitem = xml.find('listitem')
 988     convert_inner(ctx, listitem, result)
 989     result.append('</td>')
 990
 991     result.append('<tr>')
 992     return result
 993
 994
 995 def convert_xref(ctx, xml):
 996     linkend = xml.attrib['linkend']
 997     (tid, href) = fixxref.GetXRef(linkend)
 998     title = titles.get(tid)
 999     # all sectN need to become 'section
1000     tag = title['tag']
1001     tag = {
1002         'sect1': 'section',
1003         'sect2': 'section',
1004         'sect3': 'section',
1005         'sect4': 'section',
1006         'sect5': 'section',
1007     }.get(tag, tag)
1008     result = [
1009         '<a class="xref" href="%s" title="%s">the %s called “%s”</a>' %
1010         (href, title['title'], tag, ''.join(convert_title(ctx, title['xml'])))
1011     ]
1012
1013     append_text(ctx, xml.tail, result)
1014     return result
1015
1016
1017 # TODO(ensonic): turn into class with converters as functions and ctx as self
1018 convert_tags = {
1019     'abstract': convert_abstract,
1020     'acronym': convert_acronym,
1021     'anchor': convert_anchor,
1022     'application': convert_span,
1023     'bookinfo': convert_bookinfo,
1024     'blockquote': convert_blockquote,
1025     'caption': convert_div,
1026     'code': convert_code,
1027     'colspec': convert_colspec,
1028     'constant': convert_code,
1029     'command': convert_command,
1030     'corpauthor': convert_corpauthor,
1031     'emphasis': convert_emphasis,
1032     'entry': convert_entry,
1033     'envar': convert_code,
1034     'footnote': convert_footnote,
1035     'filename': convert_code,
1036     'formalpara': convert_formalpara,
1037     'function': convert_code,
1038     'glossdef': convert_glossdef,
1039     'glossdiv': convert_glossdiv,
1040     'glossentry': convert_glossentry,
1041     'glossterm': convert_glossterm,
1042     'imageobject': convert_imageobject,
1043     'indexdiv': convert_indexdiv,
1044     'indexentry': convert_ignore,
1045     'indexterm': convert_skip,
1046     'informalexample': convert_div,
1047     'informaltable': convert_informaltable,
1048     'inlinegraphic': convert_inlinegraphic,
1049     'inlinemediaobject': convert_span,
1050     'itemizedlist': convert_itemizedlist,
1051     'legalnotice': convert_div,
1052     'link': convert_link,
1053     'listitem': convert_listitem,
1054     'literal': convert_code,
1055     'literallayout': convert_literallayout,
1056     'mediaobject': convert_div,
1057     'note': convert_div,
1058     'option': convert_code,
1059     'orderedlist': convert_orderedlist,
1060     'para': convert_para,
1061     'partintro': convert_div,
1062     'parameter': convert_em_class,
1063     'phrase': convert_phrase,
1064     'primaryie': convert_primaryie,
1065     'programlisting': convert_programlisting,
1066     'quote': convert_quote,
1067     'releaseinfo': convert_para_like,
1068     'refsect1': convert_refsect1,
1069     'refsect2': convert_refsect2,
1070     'refsect3': convert_refsect3,
1071     'replaceable': convert_em_class,
1072     'returnvalue': convert_span,
1073     'row': convert_row,
1074     'screen': convert_pre,
1075     'sect1': convert_sect1_tag,
1076     'sect2': convert_sect2,
1077     'sect3': convert_sect3,
1078     'simpara': convert_simpara,
1079     'structfield': convert_em_class,
1080     'structname': convert_span,
1081     'synopsis': convert_pre,
1082     'symbol': convert_span,
1083     'table': convert_table,
1084     'tbody': convert_tbody,
1085     'term': convert_span,
1086     'tgroup': convert_tgroup,
1087     'thead': convert_thead,
1088     'title': convert_skip,
1089     'type': convert_span,
1090     'ulink': convert_ulink,
1091     'userinput': convert_userinput,
1092     'varname': convert_code,
1093     'variablelist': convert_variablelist,
1094     'varlistentry': convert_varlistentry,
1095     'warning': convert_div,
1096     'xref': convert_xref,
1097 }
1098
1099 # conversion helpers
1100
1101 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
1102 <html>
1103 <head>
1104 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1105 <title>%s</title>
1106 %s<link rel="stylesheet" href="style.css" type="text/css">
1107 </head>
1108 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1109 """
1110
1111
1112 def generate_head_links(ctx):
1113     n = ctx['nav_home']
1114     result = [
1115         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1116     ]
1117     if 'nav_up' in ctx:
1118         n = ctx['nav_up']
1119         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1120     if 'nav_prev' in ctx:
1121         n = ctx['nav_prev']
1122         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1123     if 'nav_next' in ctx:
1124         n = ctx['nav_next']
1125         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1126     return ''.join(result)
1127
1128
1129 def generate_nav_links(ctx):
1130     n = ctx['nav_home']
1131     result = [
1132         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1133     ]
1134     if 'nav_up' in ctx:
1135         n = ctx['nav_up']
1136         result.append(
1137             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1138     else:
1139         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1140     if 'nav_prev' in ctx:
1141         n = ctx['nav_prev']
1142         result.append(
1143             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1144     else:
1145         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1146     if 'nav_next' in ctx:
1147         n = ctx['nav_next']
1148         result.append(
1149             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1150     else:
1151         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1152
1153     return ''.join(result)
1154
1155
1156 def generate_toc(ctx, node):
1157     result = []
1158     for c in node.children:
1159         # TODO: urlencode the filename: urllib.parse.quote_plus()
1160         link = c.filename
1161         if c.anchor:
1162             link += c.anchor
1163         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1164             c.title_tag, link, c.title))
1165         if c.subtitle:
1166             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1167         result.append('</dt>\n')
1168         if c.children:
1169             result.append('<dd><dl>')
1170             result.extend(generate_toc(ctx, c))
1171             result.append('</dl></dd>')
1172     return result
1173
1174
1175 def generate_basic_nav(ctx):
1176     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1177   <tr valign="middle">
1178     <td width="100%%" align="left" class="shortcuts"></td>
1179     %s
1180   </tr>
1181 </table>
1182     """ % generate_nav_links(ctx)
1183
1184
1185 def generate_alpha_nav(ctx, divs, prefix, span_id):
1186     ix_nav = []
1187     for s in divs:
1188         title = xml_get_title(ctx, s)
1189         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1190
1191     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1192   <tr valign="middle">
1193     <td width="100%%" align="left" class="shortcuts">
1194       <span id="nav_%s">
1195         %s
1196       </span>
1197     </td>
1198     %s
1199   </tr>
1200 </table>
1201     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1202
1203
1204 def generate_refentry_nav(ctx, refsect1s, result):
1205     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1206   <tr valign="middle">
1207     <td width="100%" align="left" class="shortcuts">
1208       <a href="#" class="shortcut">Top</a>""")
1209
1210     for s in refsect1s:
1211         # don't list TOC sections (role="xxx_proto")
1212         if s.attrib.get('role', '').endswith("_proto"):
1213             continue
1214         # skip section without 'id' attrs
1215         if 'id' not in s.attrib:
1216             continue
1217
1218         ref_id = s.attrib['id']
1219         # skip foreign sections
1220         if '.' not in ref_id:
1221             continue
1222
1223         title = xml_get_title(ctx, s)
1224         span_id = ref_id.split('.')[1].replace('-', '_')
1225
1226         result.append("""
1227           <span id="nav_%s">
1228             <span class="dim">|</span>
1229             <a href="#%s" class="shortcut">%s</a>
1230           </span>
1231           """ % (span_id, ref_id, title))
1232     result.append("""
1233     </td>
1234     %s
1235   </tr>
1236 </table>
1237 """ % generate_nav_links(ctx))
1238
1239
1240 def generate_footer(ctx):
1241     result = []
1242     if 'footnotes' in ctx:
1243         result.append("""<div class="footnotes">\n
1244 <br><hr style="width:100; text-align:left;margin-left: 0">
1245 """)
1246         for f in ctx['footnotes']:
1247             result.extend(f)
1248         result.append('</div>\n')
1249     return result
1250
1251
1252 def get_id_path(node):
1253     """ Generate the 'id'.
1254     We need to walk up the xml-tree and check the positions for each sibling.
1255     When reaching the top of the tree we collect remaining index entries from
1256     the chunked-tree.
1257     """
1258     ix = []
1259     xml = node.xml
1260     parent = xml.getparent()
1261     while parent is not None:
1262         children = parent.getchildren()
1263         ix.insert(0, str(children.index(xml) + 1))
1264         xml = parent
1265         parent = xml.getparent()
1266     while node is not None:
1267         ix.insert(0, str(node.idx + 1))
1268         node = node.parent
1269
1270     return ix
1271
1272
1273 def get_id(node):
1274     xml = node.xml
1275     node_id = xml.attrib.get('id', None)
1276     if node_id:
1277         return node_id
1278
1279     # TODO: this is moot if nothing links to it, we could also consider to omit
1280     # the <a name="$id"></a> tag.
1281     logging.info('%d: No "id" attribute on "%s", generating one',
1282                  xml.sourceline, xml.tag)
1283     ix = get_id_path(node)
1284     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1285     return 'id-' + '.'.join(ix)
1286
1287
1288 def convert_chunk_with_toc(ctx, div_class, title_tag):
1289     node = ctx['node']
1290     result = [
1291         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1292         generate_basic_nav(ctx),
1293         '<div class="%s">' % div_class,
1294     ]
1295     if node.title:
1296         result.append("""
1297 <div class="titlepage">
1298 <%s class="title"><a name="%s"></a>%s</%s>
1299 </div>""" % (
1300             title_tag, get_id(node), node.title, title_tag))
1301
1302     toc = generate_toc(ctx, node)
1303     if toc:
1304         # TODO: not all docbook page types use this extra heading
1305         result.append("""<p><b>Table of Contents</b></p>
1306     <div class="toc">
1307       <dl class="toc">
1308     """)
1309         result.extend(toc)
1310         result.append("""</dl>
1311     </div>
1312     """)
1313     convert_inner(ctx, node.xml, result)
1314     result.extend(generate_footer(ctx))
1315     result.append("""</div>
1316 </body>
1317 </html>""")
1318     return result
1319
1320
1321 # docbook chunks
1322
1323
1324 def convert_book(ctx):
1325     node = ctx['node']
1326     result = [
1327         HTML_HEADER % (node.title, generate_head_links(ctx)),
1328         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1329     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1330 </table>
1331 <div class="book">
1332 """ % node.title
1333     ]
1334     bookinfo = node.xml.findall('bookinfo')[0]
1335     result.extend(convert_bookinfo(ctx, bookinfo))
1336     result.append("""<div class="toc">
1337   <dl class="toc">
1338 """)
1339     result.extend(generate_toc(ctx, node.root))
1340     result.append("""</dl>
1341 </div>
1342 """)
1343     result.extend(generate_footer(ctx))
1344     result.append("""</div>
1345 </body>
1346 </html>""")
1347     return result
1348
1349
1350 def convert_chapter(ctx):
1351     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1352
1353
1354 def convert_glossary(ctx):
1355     node = ctx['node']
1356     glossdivs = node.xml.findall('glossdiv')
1357
1358     result = [
1359         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1360         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1361         """<div class="glossary">
1362 <div class="titlepage"><h%1d class="title">
1363 <a name="%s"></a>%s</h%1d>
1364 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1365     ]
1366     for i in glossdivs:
1367         result.extend(convert_glossdiv(ctx, i))
1368     result.extend(generate_footer(ctx))
1369     result.append("""</div>
1370 </body>
1371 </html>""")
1372     return result
1373
1374
1375 def convert_index(ctx):
1376     node = ctx['node']
1377     # Get all indexdivs under indexdiv
1378     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1379
1380     result = [
1381         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1382         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1383         """<div class="index">
1384 <div class="titlepage"><h%1d class="title">
1385 <a name="%s"></a>%s</h%1d>
1386 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1387     ]
1388     for i in indexdivs:
1389         result.extend(convert_indexdiv(ctx, i))
1390     result.extend(generate_footer(ctx))
1391     result.append("""</div>
1392 </body>
1393 </html>""")
1394     return result
1395
1396
1397 def convert_part(ctx):
1398     return convert_chunk_with_toc(ctx, 'part', 'h1')
1399
1400
1401 def convert_preface(ctx):
1402     node = ctx['node']
1403     result = [
1404         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1405         generate_basic_nav(ctx),
1406         '<div class="preface">'
1407     ]
1408     if node.title:
1409         result.append("""
1410 <div class="titlepage">
1411 <h2 class="title"><a name="%s"></a>%s</h2>
1412 </div>""" % (get_id(node), node.title))
1413     convert_inner(ctx, node.xml, result)
1414     result.extend(generate_footer(ctx))
1415     result.append("""</div>
1416 </body>
1417 </html>""")
1418     return result
1419
1420
1421 def convert_reference(ctx):
1422     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1423
1424
1425 def convert_refentry(ctx):
1426     node = ctx['node']
1427     node_id = get_id(node)
1428     refsect1s = node.xml.findall('refsect1')
1429
1430     gallery = ''
1431     refmeta = node.xml.find('refmeta')
1432     if refmeta is not None:
1433         refmiscinfo = refmeta.find('refmiscinfo')
1434         if refmiscinfo is not None:
1435             inlinegraphic = refmiscinfo.find('inlinegraphic')
1436             if inlinegraphic is not None:
1437                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1438
1439     result = [
1440         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1441     ]
1442     generate_refentry_nav(ctx, refsect1s, result)
1443     result.append("""
1444 <div class="refentry">
1445 <a name="%s"></a>
1446 <div class="refnamediv">
1447   <table width="100%%"><tr>
1448     <td valign="top">
1449       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1450       <p>%s — %s</p>
1451     </td>
1452     <td class="gallery_image" valign="top" align="right">%s</td>
1453   </tr></table>
1454 </div>
1455 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1456
1457     for s in refsect1s:
1458         result.extend(convert_refsect1(ctx, s))
1459     result.extend(generate_footer(ctx))
1460     result.append("""</div>
1461 </body>
1462 </html>""")
1463     return result
1464
1465
1466 def convert_sect1(ctx):
1467     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1468
1469
1470 # TODO(ensonic): turn into class with converters as functions and ctx as self
1471 convert_chunks = {
1472     'book': convert_book,
1473     'chapter': convert_chapter,
1474     'glossary': convert_glossary,
1475     'index': convert_index,
1476     'part': convert_part,
1477     'preface': convert_preface,
1478     'reference': convert_reference,
1479     'refentry': convert_refentry,
1480     'sect1': convert_sect1,
1481 }
1482
1483
1484 def generate_nav_nodes(files, node):
1485     nav = {
1486         'nav_home': node.root,
1487     }
1488     # nav params: up, prev, next
1489     if node.parent:
1490         nav['nav_up'] = node.parent
1491     ix = files.index(node)
1492     if ix > 0:
1493         nav['nav_prev'] = files[ix - 1]
1494     if ix < len(files) - 1:
1495         nav['nav_next'] = files[ix + 1]
1496     return nav
1497
1498
1499 def convert(out_dir, module, files, node, src_lang):
1500     """Convert the docbook chunks to a html file.
1501
1502     Args:
1503       out_dir: already created output dir
1504       files: list of nodes in the tree in pre-order
1505       node: current tree node
1506     """
1507
1508     logging.info('Writing: %s', node.filename)
1509     with open(os.path.join(out_dir, node.filename), 'wt',
1510               newline='\n', encoding='utf-8') as html:
1511         ctx = {
1512             'module': module,
1513             'files': files,
1514             'node': node,
1515             'src-lang': src_lang,
1516         }
1517         ctx.update(generate_nav_nodes(files, node))
1518
1519         if node.name in convert_chunks:
1520             for line in convert_chunks[node.name](ctx):
1521                 html.write(line)
1522         else:
1523             logging.warning('Add converter/template for "%s"', node.name)
1524
1525
1526 def create_devhelp2_toc(node):
1527     result = []
1528     for c in node.children:
1529         if c.children:
1530             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1531             result.extend(create_devhelp2_toc(c))
1532             result.append('</sub>\n')
1533         else:
1534             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1535     return result
1536
1537
1538 def create_devhelp2_condition_attribs(node):
1539     if 'condition' in node.attrib:
1540         # condition -> since, deprecated, ... (separated with '|')
1541         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1542         keywords = []
1543         for c in cond:
1544             if ':' in c:
1545                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1546             else:
1547                 # deprecated can have no description
1548                 keywords.append('{}="{}"'.format(c, ''))
1549         return ' ' + ' '.join(keywords)
1550     else:
1551         return ''
1552
1553
1554 def create_devhelp2_refsect2_keyword(node, base_link):
1555     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1556         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1557         create_devhelp2_condition_attribs(node))
1558
1559
1560 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1561     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1562         node.attrib['role'], title, base_link + name,
1563         create_devhelp2_condition_attribs(node))
1564
1565
1566 def create_devhelp2(out_dir, module, xml, files):
1567     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1568               newline='\n', encoding='utf-8') as idx:
1569         bookinfo_nodes = xml.xpath('/book/bookinfo')
1570         title = ''
1571         if bookinfo_nodes is not None:
1572             bookinfo = bookinfo_nodes[0]
1573             title = bookinfo.xpath('./title/text()')[0]
1574             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1575             # TODO: support author too (see devhelp2.xsl)
1576         # TODO: fixxref uses '--src-lang' to set the language
1577         result = [
1578             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1579 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1580   <chapters>
1581 """ % (title, module, online_url)
1582         ]
1583         # toc
1584         result.extend(create_devhelp2_toc(files[0].root))
1585         result.append("""  </chapters>
1586   <functions>
1587 """)
1588         # keywords from all refsect2 and refsect3
1589         refsect2 = etree.XPath('//refsect2[@role]')
1590         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1591         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1592         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1593         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1594         for node in files:
1595             base_link = node.filename + '#'
1596             refsect2_nodes = refsect2(node.xml)
1597             for refsect2_node in refsect2_nodes:
1598                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1599                 refsect3_nodes = refsect3_enum(refsect2_node)
1600                 for refsect3_node in refsect3_nodes:
1601                     details_node = refsect3_enum_details(refsect3_node)[0]
1602                     name = details_node.attrib['id']
1603                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1604                 refsect3_nodes = refsect3_struct(refsect2_node)
1605                 for refsect3_node in refsect3_nodes:
1606                     details_node = refsect3_struct_details(refsect3_node)[0]
1607                     name = details_node.attrib['id']
1608                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1609
1610         result.append("""  </functions>
1611 </book>
1612 """)
1613         for line in result:
1614             idx.write(line)
1615
1616
1617 def get_dirs(uninstalled):
1618     if uninstalled:
1619         # this does not work from buiddir!=srcdir
1620         gtkdocdir = os.path.split(sys.argv[0])[0]
1621         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1622             # try 'srcdir' (set from makefiles) too
1623             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1624                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1625         styledir = gtkdocdir + '/style'
1626     else:
1627         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1628         styledir = gtkdocdir
1629     return (gtkdocdir, styledir)
1630
1631
1632 def main(module, index_file, out_dir, uninstalled, src_lang):
1633
1634     # == Loading phase ==
1635     # the next 3 steps could be done in paralel
1636
1637     # 1) load the docuemnt
1638     _t = timer()
1639     # does not seem to be faster
1640     # parser = etree.XMLParser(collect_ids=False)
1641     # tree = etree.parse(index_file, parser)
1642     tree = etree.parse(index_file)
1643     tree.xinclude()
1644     logging.warning("1: %7.3lf: load doc", timer() - _t)
1645
1646     # 2) copy datafiles
1647     _t = timer()
1648     # TODO: handle additional images
1649     (gtkdocdir, styledir) = get_dirs(uninstalled)
1650     # copy navigation images and stylesheets to html directory ...
1651     css_file = os.path.join(styledir, 'style.css')
1652     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1653         shutil.copy(f, out_dir)
1654     css_file = os.path.join(out_dir, 'style.css')
1655     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1656         css.write(HTML_FORMATTER.get_style_defs())
1657     logging.warning("2: %7.3lf: copy datafiles", timer() - _t)
1658
1659     # 3) load xref targets
1660     _t = timer()
1661     # TODO: migrate options from fixxref
1662     # TODO: ideally explicity specify the files we need, this will save us the
1663     # globbing and we'll load less files.
1664     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1665     logging.warning("3: %7.3lf: load xrefs", timer() - _t)
1666
1667     # == Processing phase ==
1668
1669     # 4) recursively walk the tree and chunk it into a python tree so that we
1670     #    can generate navigation and link tags.
1671     _t = timer()
1672     files = chunk(tree.getroot(), module)
1673     files = [f for f in PreOrderIter(files) if f.anchor is None]
1674     logging.warning("4: %7.3lf: chunk doc", timer() - _t)
1675
1676     # 5) extract tables:
1677     _t = timer()
1678     # TODO: can be done in parallel
1679     # - find all 'id' attribs and add them to the link map
1680     # - .. get their titles and store them into the titles map
1681     add_id_links_and_titles(files, fixxref.Links)
1682     # - build glossary dict
1683     build_glossary(files)
1684     logging.warning("5: %7.3lf: extract tables", timer() - _t)
1685
1686     # == Output phase ==
1687     # the next two step could be done in parllel
1688
1689     # 6) create a xxx.devhelp2 file
1690     _t = timer()
1691     create_devhelp2(out_dir, module, tree.getroot(), files)
1692     logging.warning("6: %7.3lf: create devhelp2", timer() - _t)
1693
1694     # 7) iterate the tree and output files
1695     _t = timer()
1696     # TODO: can be done in parallel, figure out why this is not faster
1697     # from multiprocessing.pool import Pool
1698     # with Pool(4) as p:
1699     #     p.apply_async(convert, args=(out_dir, module, files))
1700     # from multiprocessing.pool import ThreadPool
1701     # with ThreadPool(4) as p:
1702     #     p.apply_async(convert, args=(out_dir, module, files))
1703     for node in files:
1704         convert(out_dir, module, files, node, src_lang)
1705     logging.warning("7: %7.3lf: create html", timer() - _t)
1706
1707
1708 def run(options):
1709     logging.info('options: %s', str(options.__dict__))
1710     module = options.args[0]
1711     document = options.args[1]
1712
1713     # TODO: rename to 'html' later on
1714     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1715     #   outputs into the working directory
1716     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1717     try:
1718         os.mkdir(out_dir)
1719     except OSError as e:
1720         if e.errno != errno.EEXIST:
1721             raise
1722
1723     sys.exit(main(module, document, out_dir, options.uninstalled, options.src_lang))