gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - glossary/index: depending on the parents, the headings as h1/h2
  43     - maybe track depth when chunking
  44   - handle 'label' attributes on part/chapter/section-types
  45     - the titles will have a generated prefix, such as 'Part I:'
  46     - in the toc it would only be only the label: 'I.'
  47   - we need to separate the toc data from the chunking tree
  48     - since we don't chunk first 'secttion'/'sect1' those are missing from the
  49       toc
  50     - the toc also lists 'sect2' (TODO: check how deep it goes)
  51   - replace get_title with a result.extend(convert_title(ctx, title_tag))
  52     - see convert_table()
  53 - check each docbook tag if it can contain #PCDATA, if not don't check for
  54   xml.text
  55 - consider some perf-warnings flag
  56   - see 'No "id" attribute on'
  57
  58 OPTIONAL:
  59 - minify html: https://pypi.python.org/pypi/htmlmin/
  60
  61 Requirements:
  62 sudo pip3 install anytree lxml pygments
  63
  64 Example invocation:
  65 cd tests/bugs/docs/
  66 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  67 xdg-open db2html/index.html
  68 meld html db2html
  69
  70 Benchmarking:
  71 cd tests/bugs/docs/;
  72 rm html-build.stamp; time make html-build.stamp
  73 """
  74
  75 import argparse
  76 import errno
  77 import logging
  78 import os
  79 import shutil
  80 import sys
  81
  82 from anytree import Node, PreOrderIter
  83 from copy import deepcopy
  84 from glob import glob
  85 from lxml import etree
  86 from pygments import highlight
  87 from pygments.lexers import CLexer
  88 from pygments.formatters import HtmlFormatter
  89
  90 from . import config, fixxref
  91
  92 # pygments setup
  93 # lazily constructed lexer cache
  94 LEXERS = {
  95     'c': CLexer()
  96 }
  97 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  98
  99
 100 class ChunkParams(object):
 101     def __init__(self, prefix, parent=None, min_idx=0):
 102         self.prefix = prefix
 103         self.parent = parent
 104         self.min_idx = min_idx
 105
 106
 107 # TODO: look up the abbrevs and hierarchy for other tags
 108 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 109 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 110 #
 111 # If not defined, we can just create an example without an 'id' attr and see
 112 # docbook xsl does.
 113 CHUNK_PARAMS = {
 114     'appendix': ChunkParams('app', 'book'),
 115     'book': ChunkParams('bk'),
 116     'chapter': ChunkParams('ch', 'book'),
 117     'glossary': ChunkParams('go', 'book'),
 118     'index': ChunkParams('ix', 'book'),
 119     'part': ChunkParams('pt', 'book'),
 120     'preface': ChunkParams('pr', 'book'),
 121     'refentry': ChunkParams('re', 'book'),
 122     'reference': ChunkParams('rn', 'book'),
 123     'sect1': ChunkParams('s', 'chapter', 1),
 124     'section': ChunkParams('s', 'chapter', 1),
 125 }
 126 # TAGS we don't support:
 127 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 128
 129 TITLE_XPATHS = {
 130     '_': (etree.XPath('./title'), None),
 131     'book': (etree.XPath('./bookinfo/title'), None),
 132     'refentry': (
 133         etree.XPath('./refmeta/refentrytitle'),
 134         etree.XPath('./refnamediv/refpurpose')
 135     ),
 136 }
 137
 138 ID_XPATH = etree.XPath('//@id')
 139
 140 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 141 glossary = {}
 142
 143 footnote_idx = 1
 144
 145
 146 def gen_chunk_name(node, chunk_params, idx):
 147     """Generate a chunk file name
 148
 149     This is either based on the id or on the position in the doc. In the latter
 150     case it uses a prefix from CHUNK_PARAMS.
 151     """
 152     if 'id' in node.attrib:
 153         return node.attrib['id']
 154
 155     name = ('%s%02d' % (chunk_params.prefix, idx))
 156     # handle parents to make names of nested tags unique
 157     # TODO: we only need to prepend the parent if there are > 1 of them in the
 158     #       xml. None, the parents we have are not sufficient, e.g. 'index' can
 159     #       be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 160     #       when we chunk explicitly and on each level maintain the 'idx'
 161     # while naming.parent:
 162     #     parent = naming.parent
 163     #     if parent not in CHUNK_PARAMS:
 164     #         break;
 165     #     chunk_params = CHUNK_PARAMS[parent]
 166     #     name = ('%s%02d' % (naming.prefix, idx)) + name
 167     logging.info('Gen chunk name: "%s"', name)
 168     return name
 169
 170
 171 def get_chunk_titles(node):
 172     tag = node.tag
 173     if tag not in TITLE_XPATHS:
 174         # Use defaults
 175         (title, subtitle) = TITLE_XPATHS['_']
 176     else:
 177         (title, subtitle) = TITLE_XPATHS[tag]
 178
 179     result = {
 180         'title': None,
 181         'title_tag': None,
 182         'subtitle': None,
 183         'subtitle_tag': None
 184     }
 185     res = title(node)
 186     if res:
 187         xml = res[0]
 188         result['title'] = xml.text
 189         if xml.tag != 'title':
 190             result['title_tag'] = xml.tag
 191         else:
 192             result['title_tag'] = tag
 193
 194     if subtitle:
 195         res = subtitle(node)
 196         if res:
 197             xml = res[0]
 198             result['subtitle'] = xml.text
 199             result['subtitle_tag'] = xml.tag
 200     return result
 201
 202
 203 def chunk(xml_node, idx=0, parent=None):
 204     """Chunk the tree.
 205
 206     The first time, we're called with parent=None and in that case we return
 207     the new_node as the root of the tree
 208     """
 209     tag = xml_node.tag
 210     chunk_params = CHUNK_PARAMS.get(tag)
 211     # TODO: if this is None, we should stop traversing, right?
 212
 213     # also check idx to handle 'sect1'/'section' special casing
 214     if chunk_params and idx >= chunk_params.min_idx:
 215         logging.info('chunk tag: "%s"[%d]', tag, idx)
 216         if parent:
 217             # remove the xml-node from the parent
 218             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 219             xml_node.getparent().remove(xml_node)
 220             xml_node = sub_tree
 221
 222         title_args = get_chunk_titles(xml_node)
 223         chunk_name = gen_chunk_name(xml_node, chunk_params, (idx + 1))
 224         parent = Node(tag, parent=parent, xml=xml_node,
 225                       filename=chunk_name + '.html', **title_args)
 226
 227     idx = 0
 228     for child in xml_node:
 229         chunk(child, idx, parent)
 230         if child.tag in CHUNK_PARAMS:
 231             idx += 1
 232
 233     return parent
 234
 235
 236 def add_id_links(files, links):
 237     for node in files:
 238         chunk_name = node.filename[:-5]
 239         chunk_base = node.filename + '#'
 240         for attr in ID_XPATH(node.xml):
 241             if attr == chunk_name:
 242                 links[attr] = node.filename
 243             else:
 244                 links[attr] = chunk_base + attr
 245
 246
 247 def build_glossary(files):
 248     for node in files:
 249         if node.xml.tag != 'glossary':
 250             continue
 251         for term in GLOSSENTRY_XPATH(node.xml):
 252             # TODO: there can be all kind of things in a glossary. This only supports
 253             # what we commonly use
 254             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 255             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 256             glossary[key] = value
 257             # logging.debug('glosentry: %s:%s', key, value)
 258
 259
 260 # conversion helpers
 261
 262
 263 def convert_inner(ctx, xml, result):
 264     for child in xml:
 265         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 266
 267
 268 def convert_ignore(ctx, xml):
 269     result = []
 270     convert_inner(ctx, xml, result)
 271     return result
 272
 273
 274 def convert_skip(ctx, xml):
 275     return ['']
 276
 277
 278 def append_text(text, result):
 279     if text and text.strip():
 280         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 281
 282
 283 missing_tags = {}
 284
 285
 286 def convert__unknown(ctx, xml):
 287     # don't recurse on subchunks
 288     if xml.tag in CHUNK_PARAMS:
 289         return []
 290     if isinstance(xml, etree._Comment):
 291         return ['<!-- ' + xml.text + '-->\n']
 292     else:
 293         # warn only once
 294         if xml.tag not in missing_tags:
 295             logging.warning('Add tag converter for "%s"', xml.tag)
 296             missing_tags[xml.tag] = True
 297         result = ['<!-- ' + xml.tag + '-->\n']
 298         convert_inner(ctx, xml, result)
 299         result.append('<!-- /' + xml.tag + '-->\n')
 300         return result
 301
 302
 303 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 304     result = ['<div class="%s">\n' % xml.tag]
 305     title = xml.find('title')
 306     if title is not None:
 307         if 'id' in xml.attrib:
 308             result.append('<a name="%s"></a>' % xml.attrib['id'])
 309         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 310         xml.remove(title)
 311     append_text(xml.text, result)
 312     inner_func(ctx, xml, result)
 313     result.append('</div>')
 314     append_text(xml.tail, result)
 315     return result
 316
 317
 318 def xml_get_title(xml):
 319     title = xml.find('title')
 320     if title is not None:
 321         return title.text
 322     else:
 323         # TODO(ensonic): any way to get the file (inlcudes) too?
 324         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 325         return ''
 326
 327
 328 # docbook tags
 329
 330
 331 def convert_abstract(ctx, xml):
 332     result = ["""<div class="abstract">
 333     <p class="title"><b>Abstract</b></p>"""]
 334     append_text(xml.text, result)
 335     convert_inner(ctx, xml, result)
 336     result.append('</div>')
 337     append_text(xml.tail, result)
 338     return result
 339
 340
 341 def convert_acronym(ctx, xml):
 342     key = xml.text
 343     title = glossary.get(key, '')
 344     # TODO: print a sensible warning if missing
 345     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 346     if xml.tail:
 347         result.append(xml.tail)
 348     return result
 349
 350
 351 def convert_anchor(ctx, xml):
 352     return ['<a name="%s"></a>' % xml.attrib['id']]
 353
 354
 355 def convert_bookinfo(ctx, xml):
 356     result = ['<div class="titlepage">']
 357     convert_inner(ctx, xml, result)
 358     result.append("""<hr>
 359 </div>""")
 360     if xml.tail:
 361         result.append(xml.tail)
 362     return result
 363
 364
 365 def convert_blockquote(ctx, xml):
 366     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 367     append_text(xml.text, result)
 368     convert_inner(ctx, xml, result)
 369     result.append('</blockquote>\n</div>')
 370     append_text(xml.tail, result)
 371     return result
 372
 373
 374 def convert_code(ctx, xml):
 375     result = ['<code class="%s">' % xml.tag]
 376     append_text(xml.text, result)
 377     convert_inner(ctx, xml, result)
 378     result.append('</code>')
 379     append_text(xml.tail, result)
 380     return result
 381
 382
 383 def convert_colspec(ctx, xml):
 384     result = ['<col']
 385     a = xml.attrib
 386     if 'colname' in a:
 387         result.append(' class="%s"' % a['colname'])
 388     if 'colwidth' in a:
 389         result.append(' width="%s"' % a['colwidth'])
 390     result.append('>\n')
 391     # is in tgroup and there can be no 'text'
 392     return result
 393
 394
 395 def convert_command(ctx, xml):
 396     result = ['<strong class="userinput"><code>']
 397     append_text(xml.text, result)
 398     convert_inner(ctx, xml, result)
 399     result.append('</code></strong>')
 400     append_text(xml.tail, result)
 401     return result
 402
 403
 404 def convert_corpauthor(ctx, xml):
 405     result = ['<div><h3 class="corpauthor">\n']
 406     append_text(xml.text, result)
 407     convert_inner(ctx, xml, result)
 408     result.append('</h3></div>\n')
 409     append_text(xml.tail, result)
 410     return result
 411
 412
 413 def convert_div(ctx, xml):
 414     result = ['<div class="%s">\n' % xml.tag]
 415     append_text(xml.text, result)
 416     convert_inner(ctx, xml, result)
 417     result.append('</div>')
 418     append_text(xml.tail, result)
 419     return result
 420
 421
 422 def convert_em_class(ctx, xml):
 423     result = ['<em class="%s"><code>' % xml.tag]
 424     append_text(xml.text, result)
 425     convert_inner(ctx, xml, result)
 426     result.append('</code></em>')
 427     append_text(xml.tail, result)
 428     return result
 429
 430
 431 def convert_entry(ctx, xml):
 432     entry_type = ctx['table.entry']
 433     result = ['<' + entry_type]
 434     if 'role' in xml.attrib:
 435         result.append(' class="%s"' % xml.attrib['role'])
 436     if 'morerows' in xml.attrib:
 437         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 438     result.append('>')
 439     append_text(xml.text, result)
 440     convert_inner(ctx, xml, result)
 441     result.append('</' + entry_type + '>')
 442     append_text(xml.tail, result)
 443     return result
 444
 445
 446 def convert_footnote(ctx, xml):
 447     footnotes = ctx.get('footnotes', [])
 448     # footnotes idx is not per page, but per doc
 449     global footnote_idx
 450     idx = footnote_idx
 451     footnote_idx += 1
 452
 453     # need a pair of ids for each footnote (docbook generates different ids)
 454     this_id = 'footnote-%d' % idx
 455     that_id = 'ftn.' + this_id
 456
 457     inner = ['<div id="%s" class="footnote">' % that_id]
 458     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 459         this_id, idx))
 460     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 461     # get double nested paras :/.
 462     # convert_inner(ctx, xml, inner)
 463     para = xml.find('para')
 464     if para is None:
 465         para = xml.find('simpara')
 466     if para is not None:
 467         inner.append(para.text)
 468     else:
 469         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 470                         etree.tostring(xml, method="text", encoding=str).strip())
 471     inner.append('</p></div>')
 472     footnotes.append(inner)
 473     ctx['footnotes'] = footnotes
 474     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 475         that_id, this_id, idx)]
 476
 477
 478 def convert_formalpara(ctx, xml):
 479     result = None
 480     title_tag = xml.find('title')
 481     result = ['<p><b>%s</b>' % title_tag.text]
 482     para_tag = xml.find('para')
 483     append_text(para_tag.text, result)
 484     convert_inner(ctx, para_tag, result)
 485     append_text(para_tag.tail, result)
 486     result.append('</p>')
 487     append_text(xml.tail, result)
 488     return result
 489
 490
 491 def convert_glossdef(ctx, xml):
 492     result = ['<dd class="glossdef">']
 493     convert_inner(ctx, xml, result)
 494     result.append('</dd>\n')
 495     return result
 496
 497
 498 def convert_glossdiv(ctx, xml):
 499     title_tag = xml.find('title')
 500     title = title_tag.text
 501     xml.remove(title_tag)
 502     result = [
 503         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 504     ]
 505     convert_inner(ctx, xml, result)
 506     return result
 507
 508
 509 def convert_glossentry(ctx, xml):
 510     result = []
 511     convert_inner(ctx, xml, result)
 512     return result
 513
 514
 515 def convert_glossterm(ctx, xml):
 516     glossid = ''
 517     text = ''
 518     anchor = xml.find('anchor')
 519     if anchor is not None:
 520         glossid = anchor.attrib.get('id', '')
 521         text += anchor.tail or ''
 522     text += xml.text or ''
 523     if glossid == '':
 524         glossid = 'glossterm-' + text
 525     return [
 526         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 527             glossid, text)
 528     ]
 529
 530
 531 def convert_imageobject(ctx, xml):
 532     imagedata = xml.find('imagedata')
 533     if imagedata is not None:
 534         # TODO(ensonic): warn on missing fileref attr?
 535         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 536     else:
 537         return []
 538
 539
 540 def convert_indexdiv(ctx, xml):
 541     title_tag = xml.find('title')
 542     title = title_tag.text
 543     xml.remove(title_tag)
 544     result = [
 545         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 546     ]
 547     convert_inner(ctx, xml, result)
 548     return result
 549
 550
 551 def convert_informaltable(ctx, xml):
 552     result = ['<div class="informaltable"><table class="informaltable"']
 553     a = xml.attrib
 554     if 'pgwide' in a and a['pgwide'] == '1':
 555         result.append(' width="100%"')
 556     if 'frame' in a and a['frame'] == 'none':
 557         result.append(' border="0"')
 558     result.append('>\n')
 559     convert_inner(ctx, xml, result)
 560     result.append('</table></div>')
 561     if xml.tail:
 562         result.append(xml.tail)
 563     return result
 564
 565
 566 def convert_itemizedlist(ctx, xml):
 567     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 568     convert_inner(ctx, xml, result)
 569     result.append('</ul></div>')
 570     if xml.tail:
 571         result.append(xml.tail)
 572     return result
 573
 574
 575 def convert_link(ctx, xml):
 576     linkend = xml.attrib['linkend']
 577     if linkend in fixxref.NoLinks:
 578         linkend = None
 579     result = []
 580     if linkend:
 581         link_text = []
 582         append_text(xml.text, link_text)
 583         convert_inner(ctx, xml, link_text)
 584         # TODO: fixxref does some weird checks in xml.text
 585         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 586     else:
 587         append_text(xml.text, result)
 588         convert_inner(ctx, xml, result)
 589     append_text(xml.tail, result)
 590     return result
 591
 592
 593 def convert_listitem(ctx, xml):
 594     result = ['<li class="listitem">']
 595     convert_inner(ctx, xml, result)
 596     result.append('</li>')
 597     # is in itemizedlist and there can be no 'text'
 598     return result
 599
 600
 601 def convert_literallayout(ctx, xml):
 602     result = ['<div class="literallayout"><p><br>\n']
 603     append_text(xml.text, result)
 604     convert_inner(ctx, xml, result)
 605     result.append('</p></div>')
 606     append_text(xml.tail, result)
 607     return result
 608
 609
 610 def convert_orderedlist(ctx, xml):
 611     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 612     convert_inner(ctx, xml, result)
 613     result.append('</ol></div>')
 614     append_text(xml.tail, result)
 615     return result
 616
 617
 618 def convert_para(ctx, xml):
 619     result = []
 620     if 'id' in xml.attrib:
 621         result.append('<a name="%s"></a>' % xml.attrib['id'])
 622     if 'role' in xml.attrib:
 623         result.append('<p class="%s">' % xml.attrib['role'])
 624     else:
 625         result.append('<p>')
 626     append_text(xml.text, result)
 627     convert_inner(ctx, xml, result)
 628     result.append('</p>')
 629     append_text(xml.tail, result)
 630     return result
 631
 632
 633 def convert_para_like(ctx, xml):
 634     result = []
 635     if 'id' in xml.attrib:
 636         result.append('<a name="%s"></a>' % xml.attrib['id'])
 637     result.append('<p class="%s">' % xml.tag)
 638     append_text(xml.text, result)
 639     convert_inner(ctx, xml, result)
 640     result.append('</p>')
 641     append_text(xml.tail, result)
 642     return result
 643
 644
 645 def convert_phrase(ctx, xml):
 646     result = ['<span']
 647     if 'role' in xml.attrib:
 648         result.append(' class="%s">' % xml.attrib['role'])
 649     else:
 650         result.append('>')
 651     append_text(xml.text, result)
 652     convert_inner(ctx, xml, result)
 653     result.append('</span>')
 654     append_text(xml.tail, result)
 655     return result
 656
 657
 658 def convert_primaryie(ctx, xml):
 659     result = ['<dt>\n']
 660     convert_inner(ctx, xml, result)
 661     result.append('\n</dt>\n<dd></dd>\n')
 662     return result
 663
 664
 665 def convert_pre(ctx, xml):
 666     result = ['<pre class="%s">\n' % xml.tag]
 667     append_text(xml.text, result)
 668     convert_inner(ctx, xml, result)
 669     result.append('</pre>')
 670     append_text(xml.tail, result)
 671     return result
 672
 673
 674 def convert_programlisting(ctx, xml):
 675     result = []
 676     if xml.attrib.get('role', '') == 'example':
 677         if xml.text:
 678             lang = xml.attrib.get('language', 'c').lower()
 679             if lang not in LEXERS:
 680                 LEXERS[lang] = get_lexer_by_name(lang)
 681             lexer = LEXERS.get(lang, None)
 682             if lexer:
 683                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 684
 685                 # we do own line-numbering
 686                 line_count = highlighted.count('\n')
 687                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 688                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 689   <tbody>
 690     <tr>
 691       <td class="listing_lines" align="right"><pre>%s</pre></td>
 692       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 693     </tr>
 694   </tbody>
 695 </table>
 696 """ % (source_lines, highlighted))
 697             else:
 698                 logging.warn('No pygments lexer for language="%s"', lang)
 699                 result.append('<pre class="programlisting">')
 700                 result.append(xml.text)
 701                 result.append('</pre>')
 702     else:
 703         result.append('<pre class="programlisting">')
 704         append_text(xml.text, result)
 705         convert_inner(ctx, xml, result)
 706         result.append('</pre>')
 707     append_text(xml.tail, result)
 708     return result
 709
 710
 711 def convert_quote(ctx, xml):
 712     result = ['<span class="quote">"<span class="quote">']
 713     append_text(xml.text, result)
 714     convert_inner(ctx, xml, result)
 715     result.append('</span>"</span>')
 716     append_text(xml.tail, result)
 717     return result
 718
 719
 720 def convert_refsect1(ctx, xml):
 721     # Add a divider between two consequitive refsect2
 722     def convert_inner(ctx, xml, result):
 723         prev = None
 724         for child in xml:
 725             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 726                 result.append('<hr>\n')
 727             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 728             prev = child
 729     return convert_sect(ctx, xml, 'h2', convert_inner)
 730
 731
 732 def convert_refsect2(ctx, xml):
 733     return convert_sect(ctx, xml, 'h3')
 734
 735
 736 def convert_refsect3(ctx, xml):
 737     return convert_sect(ctx, xml, 'h4')
 738
 739
 740 def convert_row(ctx, xml):
 741     result = ['<tr>\n']
 742     convert_inner(ctx, xml, result)
 743     result.append('</tr>\n')
 744     return result
 745
 746
 747 def convert_sect1_tag(ctx, xml):
 748     return convert_sect(ctx, xml, 'h2')
 749
 750
 751 def convert_sect2(ctx, xml):
 752     return convert_sect(ctx, xml, 'h3')
 753
 754
 755 def convert_sect3(ctx, xml):
 756     return convert_sect(ctx, xml, 'h4')
 757
 758
 759 def convert_simpara(ctx, xml):
 760     result = ['<p>']
 761     append_text(xml.text, result)
 762     result.append('</p>')
 763     append_text(xml.tail, result)
 764     return result
 765
 766
 767 def convert_span(ctx, xml):
 768     result = ['<span class="%s">' % xml.tag]
 769     append_text(xml.text, result)
 770     convert_inner(ctx, xml, result)
 771     result.append('</span>')
 772     append_text(xml.tail, result)
 773     return result
 774
 775
 776 def convert_table(ctx, xml):
 777     result = ['<div class="table">']
 778     if 'id' in xml.attrib:
 779         result.append('<a name="%s"></a>' % xml.attrib['id'])
 780     title_tag = xml.find('title')
 781     if title_tag is not None:
 782         result.append('<p class="title"><b>')
 783         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 784         result.extend(convert_title(ctx, title_tag))
 785         result.append('</b></p>')
 786         xml.remove(title_tag)
 787     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 788
 789     convert_inner(ctx, xml, result)
 790
 791     result.append('</table></div></div>')
 792     append_text(xml.tail, result)
 793     return result
 794
 795
 796 def convert_tbody(ctx, xml):
 797     result = ['<tbody>']
 798     ctx['table.entry'] = 'td'
 799     convert_inner(ctx, xml, result)
 800     result.append('</tbody>')
 801     # is in tgroup and there can be no 'text'
 802     return result
 803
 804
 805 def convert_tgroup(ctx, xml):
 806     # tgroup does not expand to anything, but the nested colspecs need to
 807     # be put into a colgroup
 808     cols = xml.findall('colspec')
 809     result = []
 810     if cols:
 811         result.append('<colgroup>\n')
 812         for col in cols:
 813             result.extend(convert_colspec(ctx, col))
 814             xml.remove(col)
 815         result.append('</colgroup>\n')
 816     convert_inner(ctx, xml, result)
 817     # is in informaltable and there can be no 'text'
 818     return result
 819
 820
 821 def convert_thead(ctx, xml):
 822     result = ['<thead>']
 823     ctx['table.entry'] = 'th'
 824     convert_inner(ctx, xml, result)
 825     result.append('</thead>')
 826     # is in tgroup and there can be no 'text'
 827     return result
 828
 829
 830 def convert_title(ctx, xml):
 831     # This is always called from some context
 832     result = []
 833     append_text(xml.text, result)
 834     convert_inner(ctx, xml, result)
 835     append_text(xml.tail, result)
 836     return result
 837
 838
 839 def convert_ulink(ctx, xml):
 840     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 841     if xml.tail:
 842         result.append(xml.tail)
 843     return result
 844
 845
 846 def convert_userinput(ctx, xml):
 847     result = ['<span class="command"><strong>']
 848     append_text(xml.text, result)
 849     convert_inner(ctx, xml, result)
 850     result.append('</strong></span>')
 851     append_text(xml.tail, result)
 852     return result
 853
 854
 855 def convert_variablelist(ctx, xml):
 856     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 857 <colgroup>
 858 <col align="left" valign="top">
 859 <col>
 860 </colgroup>
 861 <tbody>"""]
 862     convert_inner(ctx, xml, result)
 863     result.append("""</tbody>
 864 </table></div>""")
 865     return result
 866
 867
 868 def convert_varlistentry(ctx, xml):
 869     result = ['<tr>']
 870
 871     result.append('<td><p>')
 872     term = xml.find('term')
 873     result.extend(convert_span(ctx, term))
 874     result.append('</p></td>')
 875
 876     result.append('<td>')
 877     listitem = xml.find('listitem')
 878     convert_inner(ctx, listitem, result)
 879     result.append('</td>')
 880
 881     result.append('<tr>')
 882     return result
 883
 884
 885 # TODO(ensonic): turn into class with converters as functions and ctx as self
 886 convert_tags = {
 887     'abstract': convert_abstract,
 888     'acronym': convert_acronym,
 889     'anchor': convert_anchor,
 890     'application': convert_span,
 891     'bookinfo': convert_bookinfo,
 892     'blockquote': convert_blockquote,
 893     'caption': convert_div,
 894     'code': convert_code,
 895     'colspec': convert_colspec,
 896     'constant': convert_code,
 897     'command': convert_command,
 898     'corpauthor': convert_corpauthor,
 899     'emphasis': convert_span,
 900     'entry': convert_entry,
 901     'envar': convert_code,
 902     'footnote': convert_footnote,
 903     'filename': convert_code,
 904     'formalpara': convert_formalpara,
 905     'function': convert_code,
 906     'glossdef': convert_glossdef,
 907     'glossdiv': convert_glossdiv,
 908     'glossentry': convert_glossentry,
 909     'glossterm': convert_glossterm,
 910     'imageobject': convert_imageobject,
 911     'indexdiv': convert_indexdiv,
 912     'indexentry': convert_ignore,
 913     'indexterm': convert_skip,
 914     'informalexample': convert_div,
 915     'informaltable': convert_informaltable,
 916     'inlinemediaobject': convert_span,
 917     'itemizedlist': convert_itemizedlist,
 918     'legalnotice': convert_div,
 919     'link': convert_link,
 920     'listitem': convert_listitem,
 921     'literal': convert_code,
 922     'literallayout': convert_literallayout,
 923     'mediaobject': convert_div,
 924     'note': convert_div,
 925     'option': convert_code,
 926     'orderedlist': convert_orderedlist,
 927     'para': convert_para,
 928     'partintro': convert_div,
 929     'parameter': convert_em_class,
 930     'phrase': convert_phrase,
 931     'primaryie': convert_primaryie,
 932     'programlisting': convert_programlisting,
 933     'quote': convert_quote,
 934     'releaseinfo': convert_para_like,
 935     'refsect1': convert_refsect1,
 936     'refsect2': convert_refsect2,
 937     'refsect3': convert_refsect3,
 938     'replaceable': convert_em_class,
 939     'returnvalue': convert_span,
 940     'row': convert_row,
 941     'screen': convert_pre,
 942     'sect1': convert_sect1_tag,
 943     'sect2': convert_sect2,
 944     'sect3': convert_sect3,
 945     'simpara': convert_simpara,
 946     'structfield': convert_em_class,
 947     'structname': convert_span,
 948     'synopsis': convert_pre,
 949     'symbol': convert_span,
 950     'table': convert_table,
 951     'tbody': convert_tbody,
 952     'term': convert_span,
 953     'tgroup': convert_tgroup,
 954     'thead': convert_thead,
 955     'type': convert_span,
 956     'ulink': convert_ulink,
 957     'userinput': convert_userinput,
 958     'varname': convert_code,
 959     'variablelist': convert_variablelist,
 960     'varlistentry': convert_varlistentry,
 961     'warning': convert_div,
 962 }
 963
 964 # conversion helpers
 965
 966 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 967 <html>
 968 <head>
 969 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 970 <title>%s</title>
 971 %s<link rel="stylesheet" href="style.css" type="text/css">
 972 </head>
 973 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 974 """
 975
 976
 977 def generate_head_links(ctx):
 978     n = ctx['nav_home']
 979     result = [
 980         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 981     ]
 982     if 'nav_up' in ctx:
 983         n = ctx['nav_up']
 984         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 985     if 'nav_prev' in ctx:
 986         n = ctx['nav_prev']
 987         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 988     if 'nav_next' in ctx:
 989         n = ctx['nav_next']
 990         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 991     return ''.join(result)
 992
 993
 994 def generate_nav_links(ctx):
 995     n = ctx['nav_home']
 996     result = [
 997         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 998     ]
 999     if 'nav_up' in ctx:
1000         n = ctx['nav_up']
1001         result.append(
1002             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1003     else:
1004         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1005     if 'nav_prev' in ctx:
1006         n = ctx['nav_prev']
1007         result.append(
1008             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1009     else:
1010         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1011     if 'nav_next' in ctx:
1012         n = ctx['nav_next']
1013         result.append(
1014             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1015     else:
1016         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1017
1018     return ''.join(result)
1019
1020
1021 def generate_toc(ctx, node):
1022     result = []
1023     for c in node.children:
1024         # TODO: urlencode the filename: urllib.parse.quote_plus()
1025         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1026             c.title_tag, c.filename, c.title))
1027         if c.subtitle:
1028             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1029         result.append('</dt>\n')
1030         if c.children:
1031             result.append('<dd><dl>')
1032             result.extend(generate_toc(ctx, c))
1033             result.append('</dl></dd>')
1034     return result
1035
1036
1037 def generate_basic_nav(ctx):
1038     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1039   <tr valign="middle">
1040     <td width="100%%" align="left" class="shortcuts"></td>
1041     %s
1042   </tr>
1043 </table>
1044     """ % generate_nav_links(ctx)
1045
1046
1047 def generate_alpha_nav(ctx, divs, prefix, span_id):
1048     ix_nav = []
1049     for s in divs:
1050         title = xml_get_title(s)
1051         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1052
1053     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1054   <tr valign="middle">
1055     <td width="100%%" align="left" class="shortcuts">
1056       <span id="nav_%s">
1057         %s
1058       </span>
1059     </td>
1060     %s
1061   </tr>
1062 </table>
1063     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1064
1065
1066 def generate_refentry_nav(ctx, refsect1s, result):
1067     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1068   <tr valign="middle">
1069     <td width="100%" align="left" class="shortcuts">
1070       <a href="#" class="shortcut">Top</a>""")
1071
1072     for s in refsect1s:
1073         # don't list TOC sections (role="xxx_proto")
1074         if s.attrib.get('role', '').endswith("_proto"):
1075             continue
1076         # skip section without 'id' attrs
1077         if 'id' not in s.attrib:
1078             continue
1079
1080         title = xml_get_title(s)
1081         result.append("""
1082           <span class="dim">|</span>
1083           <a href="#%s" class="shortcut">%s</a>
1084           """ % (s.attrib['id'], title))
1085     result.append("""
1086     </td>
1087     %s
1088   </tr>
1089 </table>
1090 """ % generate_nav_links(ctx))
1091
1092
1093 def generate_footer(ctx):
1094     result = []
1095     if 'footnotes' in ctx:
1096         result.append("""<div class="footnotes">\n
1097 <br><hr style="width:100; text-align:left;margin-left: 0">
1098 """)
1099         for f in ctx['footnotes']:
1100             result.extend(f)
1101         result.append('</div>\n')
1102     return result
1103
1104
1105 def get_id(node):
1106     xml = node.xml
1107     node_id = xml.attrib.get('id', None)
1108     if node_id:
1109         return node_id
1110
1111     logging.info('%d: No "id" attribute on "%s", generating one',
1112                  xml.sourceline, xml.tag)
1113     ix = []
1114     # Generate the 'id'. We need to walk up the xml-tree and check the positions
1115     # for each sibling.
1116     parent = xml.getparent()
1117     while parent is not None:
1118         children = parent.getchildren()
1119         ix.insert(0, str(children.index(xml) + 1))
1120         xml = parent
1121         parent = xml.getparent()
1122     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1123     return 'id-1.' + '.'.join(ix)
1124
1125
1126 def convert_chunk_with_toc(ctx, div_class, title_tag):
1127     node = ctx['node']
1128     result = [
1129         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1130         generate_basic_nav(ctx),
1131         '<div class="%s">' % div_class,
1132     ]
1133     title = node.xml.find('title')
1134     if title is not None:
1135         result.append("""
1136 <div class="titlepage">
1137 <%s class="title"><a name="%s"></a>%s</%s>
1138 </div>""" % (
1139             title_tag, get_id(node), title.text, title_tag))
1140         node.xml.remove(title)
1141
1142     toc = generate_toc(ctx, node)
1143     if toc:
1144         # TODO: not all docbook page types use this extra heading
1145         result.append("""<p><b>Table of Contents</b></p>
1146     <div class="toc">
1147       <dl class="toc">
1148     """)
1149         result.extend(toc)
1150         result.append("""</dl>
1151     </div>
1152     """)
1153     convert_inner(ctx, node.xml, result)
1154     result.extend(generate_footer(ctx))
1155     result.append("""</div>
1156 </body>
1157 </html>""")
1158     return result
1159
1160
1161 # docbook chunks
1162
1163
1164 def convert_book(ctx):
1165     node = ctx['node']
1166     result = [
1167         HTML_HEADER % (node.title, generate_head_links(ctx)),
1168         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1169     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1170 </table>
1171 <div class="book">
1172 """ % node.title
1173     ]
1174     bookinfo = node.xml.findall('bookinfo')[0]
1175     # we already used the title
1176     title = bookinfo.find('title')
1177     if title is not None:
1178         bookinfo.remove(title)
1179     result.extend(convert_bookinfo(ctx, bookinfo))
1180     result.append("""<div class="toc">
1181   <dl class="toc">
1182 """)
1183     result.extend(generate_toc(ctx, node.root))
1184     result.append("""</dl>
1185 </div>
1186 """)
1187     result.extend(generate_footer(ctx))
1188     result.append("""</div>
1189 </body>
1190 </html>""")
1191     return result
1192
1193
1194 def convert_chapter(ctx):
1195     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1196
1197
1198 def convert_glossary(ctx):
1199     node = ctx['node']
1200     glossdivs = node.xml.findall('glossdiv')
1201
1202     result = [
1203         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1204         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1205         """<div class="glossary">
1206 <div class="titlepage"><h1 class="title">
1207 <a name="%s"></a>%s</h1>
1208 </div>""" % (get_id(node), node.title)
1209     ]
1210     for i in glossdivs:
1211         result.extend(convert_glossdiv(ctx, i))
1212     result.extend(generate_footer(ctx))
1213     result.append("""</div>
1214 </body>
1215 </html>""")
1216     return result
1217
1218
1219 def convert_index(ctx):
1220     node = ctx['node']
1221     # Get all indexdivs under indexdiv
1222     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1223
1224     result = [
1225         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1226         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1227         """<div class="index">
1228 <div class="titlepage"><h2 class="title">
1229 <a name="%s"></a>%s</h2>
1230 </div>""" % (get_id(node), node.title)
1231     ]
1232     for i in indexdivs:
1233         result.extend(convert_indexdiv(ctx, i))
1234     result.extend(generate_footer(ctx))
1235     result.append("""</div>
1236 </body>
1237 </html>""")
1238     return result
1239
1240
1241 def convert_part(ctx):
1242     return convert_chunk_with_toc(ctx, 'part', 'h1')
1243
1244
1245 def convert_preface(ctx):
1246     node = ctx['node']
1247     result = [
1248         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1249         generate_basic_nav(ctx),
1250         '<div class="preface">'
1251     ]
1252     title = node.xml.find('title')
1253     if title is not None:
1254         result.append("""
1255 <div class="titlepage">
1256 <h2 class="title"><a name="%s"></a>%s</h2>
1257 </div>""" % (get_id(node), title.text))
1258         node.xml.remove(title)
1259     convert_inner(ctx, node.xml, result)
1260     result.extend(generate_footer(ctx))
1261     result.append("""</div>
1262 </body>
1263 </html>""")
1264     return result
1265
1266
1267 def convert_reference(ctx):
1268     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1269
1270
1271 def convert_refentry(ctx):
1272     node = ctx['node']
1273     node_id = get_id(node)
1274     refsect1s = node.xml.findall('refsect1')
1275
1276     result = [
1277         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1278     ]
1279     generate_refentry_nav(ctx, refsect1s, result)
1280     result.append("""
1281 <div class="refentry">
1282 <a name="%s"></a>
1283 <div class="refnamediv">
1284   <table width="100%%"><tr>
1285     <td valign="top">
1286       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1287       <p>%s — module for gtk-doc unit test</p>
1288     </td>
1289     <td class="gallery_image" valign="top" align="right"></td>
1290   </tr></table>
1291 </div>
1292 """ % (node_id, node_id, node.title, node.title))
1293
1294     for s in refsect1s:
1295         result.extend(convert_refsect1(ctx, s))
1296     result.extend(generate_footer(ctx))
1297     result.append("""</div>
1298 </body>
1299 </html>""")
1300     return result
1301
1302
1303 def convert_sect1(ctx):
1304     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1305
1306
1307 # TODO(ensonic): turn into class with converters as functions and ctx as self
1308 convert_chunks = {
1309     'book': convert_book,
1310     'chapter': convert_chapter,
1311     'glossary': convert_glossary,
1312     'index': convert_index,
1313     'part': convert_part,
1314     'preface': convert_preface,
1315     'reference': convert_reference,
1316     'refentry': convert_refentry,
1317     'sect1': convert_sect1,
1318 }
1319
1320
1321 def generate_nav_nodes(files, node):
1322     nav = {
1323         'nav_home': node.root,
1324     }
1325     # nav params: up, prev, next
1326     if node.parent:
1327         nav['nav_up'] = node.parent
1328     ix = files.index(node)
1329     if ix > 0:
1330         nav['nav_prev'] = files[ix - 1]
1331     if ix < len(files) - 1:
1332         nav['nav_next'] = files[ix + 1]
1333     return nav
1334
1335
1336 def convert(out_dir, module, files, node):
1337     """Convert the docbook chunks to a html file.
1338
1339     Args:
1340       out_dir: already created output dir
1341       files: list of nodes in the tree in pre-order
1342       node: current tree node
1343     """
1344
1345     logging.info('Writing: %s', node.filename)
1346     with open(os.path.join(out_dir, node.filename), 'wt',
1347               newline='\n', encoding='utf-8') as html:
1348         ctx = {
1349             'module': module,
1350             'files': files,
1351             'node': node,
1352         }
1353         ctx.update(generate_nav_nodes(files, node))
1354
1355         if node.name in convert_chunks:
1356             for line in convert_chunks[node.name](ctx):
1357                 html.write(line)
1358         else:
1359             logging.warning('Add converter/template for "%s"', node.name)
1360
1361
1362 def create_devhelp2_toc(node):
1363     result = []
1364     for c in node.children:
1365         if c.children:
1366             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1367             result.extend(create_devhelp2_toc(c))
1368             result.append('</sub>\n')
1369         else:
1370             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1371     return result
1372
1373
1374 def create_devhelp2_condition_attribs(node):
1375     if 'condition' in node.attrib:
1376         # condition -> since, deprecated, ... (separated with '|')
1377         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1378         keywords = []
1379         for c in cond:
1380             if ':' in c:
1381                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1382             else:
1383                 # deprecated can have no description
1384                 keywords.append('{}="{}"'.format(c, ''))
1385         return ' ' + ' '.join(keywords)
1386     else:
1387         return ''
1388
1389
1390 def create_devhelp2_refsect2_keyword(node, base_link):
1391     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1392         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1393         create_devhelp2_condition_attribs(node))
1394
1395
1396 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1397     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1398         node.attrib['role'], title, base_link + name,
1399         create_devhelp2_condition_attribs(node))
1400
1401
1402 def create_devhelp2(out_dir, module, xml, files):
1403     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1404               newline='\n', encoding='utf-8') as idx:
1405         bookinfo_nodes = xml.xpath('/book/bookinfo')
1406         title = ''
1407         if bookinfo_nodes is not None:
1408             bookinfo = bookinfo_nodes[0]
1409             title = bookinfo.xpath('./title/text()')[0]
1410             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1411             # TODO: support author too (see devhelp2.xsl)
1412         # TODO: fixxref uses '--src-lang' to set the language
1413         result = [
1414             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1415 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1416   <chapters>
1417 """ % (title, module, online_url)
1418         ]
1419         # toc
1420         result.extend(create_devhelp2_toc(files[0].root))
1421         result.append("""  </chapters>
1422   <functions>
1423 """)
1424         # keywords from all refsect2 and refsect3
1425         refsect2 = etree.XPath('//refsect2[@role]')
1426         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1427         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1428         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1429         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1430         for node in files:
1431             base_link = node.filename + '#'
1432             refsect2_nodes = refsect2(node.xml)
1433             for refsect2_node in refsect2_nodes:
1434                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1435                 refsect3_nodes = refsect3_enum(refsect2_node)
1436                 for refsect3_node in refsect3_nodes:
1437                     details_node = refsect3_enum_details(refsect3_node)[0]
1438                     name = details_node.attrib['id']
1439                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1440                 refsect3_nodes = refsect3_struct(refsect2_node)
1441                 for refsect3_node in refsect3_nodes:
1442                     details_node = refsect3_struct_details(refsect3_node)[0]
1443                     name = details_node.attrib['id']
1444                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1445
1446         result.append("""  </functions>
1447 </book>
1448 """)
1449         for line in result:
1450             idx.write(line)
1451
1452
1453 def get_dirs(uninstalled):
1454     if uninstalled:
1455         # this does not work from buiddir!=srcdir
1456         gtkdocdir = os.path.split(sys.argv[0])[0]
1457         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1458             # try 'srcdir' (set from makefiles) too
1459             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1460                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1461         styledir = gtkdocdir + '/style'
1462     else:
1463         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1464         styledir = gtkdocdir
1465     return (gtkdocdir, styledir)
1466
1467
1468 def main(module, index_file, out_dir, uninstalled):
1469     tree = etree.parse(index_file)
1470     tree.xinclude()
1471
1472     (gtkdocdir, styledir) = get_dirs(uninstalled)
1473     # copy navigation images and stylesheets to html directory ...
1474     css_file = os.path.join(styledir, 'style.css')
1475     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1476         shutil.copy(f, out_dir)
1477     css_file = os.path.join(out_dir, 'style.css')
1478     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1479         css.write(HTML_FORMATTER.get_style_defs())
1480
1481     # TODO: migrate options from fixxref
1482     # TODO: do in parallel with loading the xml above.
1483     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1484
1485     # We do multiple passes:
1486     # 1) recursively walk the tree and chunk it into a python tree so that we
1487     #   can generate navigation and link tags.
1488     files = chunk(tree.getroot())
1489     files = list(PreOrderIter(files))
1490     # 2) extract tables:
1491     # TODO: use multiprocessing
1492     # - find all 'id' attribs and add them to the link map
1493     add_id_links(files, fixxref.Links)
1494     # - build glossary dict
1495     build_glossary(files)
1496
1497     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1498     create_devhelp2(out_dir, module, tree.getroot(), files)
1499     # 4) iterate the tree and output files
1500     # TODO: use multiprocessing
1501     for node in files:
1502         convert(out_dir, module, files, node)
1503
1504
1505 def run(options):
1506     logging.info('options: %s', str(options.__dict__))
1507     module = options.args[0]
1508     document = options.args[1]
1509
1510     # TODO: rename to 'html' later on
1511     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1512     #   outputs into the working directory
1513     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1514     try:
1515         os.mkdir(out_dir)
1516     except OSError as e:
1517         if e.errno != errno.EEXIST:
1518             raise
1519
1520     sys.exit(main(module, document, out_dir, options.uninstalled))