gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - glossary/index: depending on the parents, the headings as h1/h2
  43     - maybe track depth when chunking
  44   - handle 'label' attributes on part/chapter/section-types
  45     - the titles will have a generated prefix, such as 'Part I:'
  46     - in the toc it would only be only the label: 'I.'
  47   - we need to separate the toc data from the chunking tree
  48     - since we don't chunk first 'secttion'/'sect1' those are missing from the
  49       toc
  50     - the toc also lists 'sect2' (TODO: check how deep it goes)
  51   - replace get_title with a result.extend(convert_title(ctx, title_tag))
  52     - see convert_table()
  53 - check each docbook tag if it can contain #PCDATA, if not don't check for
  54   xml.text
  55 - consider some perf-warnings flag
  56   - see 'No "id" attribute on'
  57
  58 OPTIONAL:
  59 - minify html: https://pypi.python.org/pypi/htmlmin/
  60
  61 Requirements:
  62 sudo pip3 install anytree lxml pygments
  63
  64 Example invocation:
  65 cd tests/bugs/docs/
  66 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  67 xdg-open db2html/index.html
  68 meld html db2html
  69
  70 Benchmarking:
  71 cd tests/bugs/docs/;
  72 rm html-build.stamp; time make html-build.stamp
  73 """
  74
  75 import argparse
  76 import errno
  77 import logging
  78 import os
  79 import shutil
  80 import sys
  81
  82 from anytree import Node, PreOrderIter
  83 from copy import deepcopy
  84 from glob import glob
  85 from lxml import etree
  86 from pygments import highlight
  87 from pygments.lexers import CLexer
  88 from pygments.formatters import HtmlFormatter
  89
  90 from . import config, fixxref
  91
  92 # pygments setup
  93 # lazily constructed lexer cache
  94 LEXERS = {
  95     'c': CLexer()
  96 }
  97 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  98
  99 # http://www.sagehill.net/docbookxsl/Chunking.html
 100 CHUNK_TAGS = [
 101     'appendix',
 102     'article',
 103     'bibliography',  # in article or book
 104     'book',
 105     'chapter',
 106     'colophon',
 107     'glossary',      # in article or book
 108     'index',         # in article or book
 109     'part',
 110     'preface',
 111     'refentry',
 112     'reference',
 113     'sect1',         # except first
 114     'section',       # if equivalent to sect1
 115     'set',
 116     'setindex',
 117 ]
 118
 119
 120 class ChunkParams(object):
 121     def __init__(self, prefix, parent=None, min_idx=0):
 122         self.prefix = prefix
 123         self.parent = parent
 124         self.min_idx = min_idx
 125
 126
 127 # TODO: look up the abbrevs and hierarchy for other tags
 128 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 129 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 130 #
 131 # If not defined, we can just create an example without an 'id' attr and see
 132 # docbook xsl does.
 133 CHUNK_PARAMS = {
 134     'appendix': ChunkParams('app', 'book'),
 135     'book': ChunkParams('bk'),
 136     'chapter': ChunkParams('ch', 'book'),
 137     'index': ChunkParams('ix', 'book'),
 138     'part': ChunkParams('pt', 'book'),
 139     'preface': ChunkParams('pr', 'book'),
 140     'reference': ChunkParams('rn', 'book'),
 141     'sect1': ChunkParams('s', 'chapter', 1),
 142     'section': ChunkParams('s', 'chapter', 1),
 143 }
 144
 145 TITLE_XPATHS = {
 146     '_': (etree.XPath('./title'), None),
 147     'book': (etree.XPath('./bookinfo/title'), None),
 148     'refentry': (
 149         etree.XPath('./refmeta/refentrytitle'),
 150         etree.XPath('./refnamediv/refpurpose')
 151     ),
 152 }
 153
 154 ID_XPATH = etree.XPath('//@id')
 155
 156 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 157 glossary = {}
 158
 159 footnote_idx = 1
 160
 161
 162 def get_chunk_min_idx(tag):
 163     if tag not in CHUNK_PARAMS:
 164         return 0
 165
 166     return CHUNK_PARAMS[tag].min_idx
 167
 168
 169 def gen_chunk_name(node, idx):
 170     if 'id' in node.attrib:
 171         return node.attrib['id']
 172
 173     tag = node.tag
 174     if tag not in CHUNK_PARAMS:
 175         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 176         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 177
 178     naming = CHUNK_PARAMS[tag]
 179     name = ('%s%02d' % (naming.prefix, idx))
 180     # handle parents to make names of nested tags unique
 181     # TODO: we only need to prepend the parent if there are > 1 of them in the
 182     #       xml
 183     # while naming.parent:
 184     #     parent = naming.parent
 185     #     if parent not in CHUNK_PARAMS:
 186     #         break;
 187     #     naming = CHUNK_PARAMS[parent]
 188     #     name = ('%s%02d' % (naming.prefix, idx)) + name
 189     logging.info('Gen chunk name: "%s"', name)
 190     return name
 191
 192
 193 def get_chunk_titles(node):
 194     tag = node.tag
 195     if tag not in TITLE_XPATHS:
 196         # Use defaults
 197         (title, subtitle) = TITLE_XPATHS['_']
 198     else:
 199         (title, subtitle) = TITLE_XPATHS[tag]
 200
 201     xml = title(node)[0]
 202     result = {
 203         'title': xml.text
 204     }
 205     if xml.tag != 'title':
 206         result['title_tag'] = xml.tag
 207     else:
 208         result['title_tag'] = tag
 209
 210     if subtitle:
 211         xml = subtitle(node)[0]
 212         result['subtitle'] = xml.text
 213         result['subtitle_tag'] = xml.tag
 214     else:
 215         result['subtitle'] = None
 216         result['subtitle_tag'] = None
 217     return result
 218
 219
 220 def chunk(xml_node, idx=0, parent=None):
 221     """Chunk the tree.
 222
 223     The first time, we're called with parent=None and in that case we return
 224     the new_node as the root of the tree
 225     """
 226     tag = xml_node.tag
 227     # also check idx to handle 'sect1'/'section' special casing
 228     if tag in CHUNK_TAGS and idx >= get_chunk_min_idx(tag):
 229         logging.info('chunk tag: "%s"[%d]', tag, idx)
 230         if parent:
 231             # remove the xml-node from the parent
 232             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 233             xml_node.getparent().remove(xml_node)
 234             xml_node = sub_tree
 235
 236         title_args = get_chunk_titles(xml_node)
 237         chunk_name = gen_chunk_name(xml_node, (idx + 1))
 238         parent = Node(tag, parent=parent, xml=xml_node,
 239                       filename=chunk_name + '.html', **title_args)
 240
 241     idx = 0
 242     for child in xml_node:
 243         new_parent = chunk(child, idx, parent)
 244         if child.tag in CHUNK_TAGS:
 245             idx += 1
 246
 247     return parent
 248
 249
 250 def add_id_links(files, links):
 251     for node in files:
 252         chunk_name = node.filename[:-5]
 253         chunk_base = node.filename + '#'
 254         for attr in ID_XPATH(node.xml):
 255             if attr == chunk_name:
 256                 links[attr] = node.filename
 257             else:
 258                 links[attr] = chunk_base + attr
 259
 260
 261 def build_glossary(files):
 262     for node in files:
 263         if node.xml.tag != 'glossary':
 264             continue
 265         for term in GLOSSENTRY_XPATH(node.xml):
 266             # TODO: there can be all kind of things in a glossary. This only supports
 267             # what we commonly use
 268             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 269             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 270             glossary[key] = value
 271             # logging.debug('glosentry: %s:%s', key, value)
 272
 273
 274 # conversion helpers
 275
 276
 277 def convert_inner(ctx, xml, result):
 278     for child in xml:
 279         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 280
 281
 282 def convert_ignore(ctx, xml):
 283     result = []
 284     convert_inner(ctx, xml, result)
 285     return result
 286
 287
 288 def convert_skip(ctx, xml):
 289     return ['']
 290
 291
 292 def append_text(text, result):
 293     if text and text.strip():
 294         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 295
 296
 297 missing_tags = {}
 298
 299
 300 def convert__unknown(ctx, xml):
 301     # don't recurse on subchunks
 302     if xml.tag in CHUNK_TAGS:
 303         return []
 304     if isinstance(xml, etree._Comment):
 305         return ['<!-- ' + xml.text + '-->\n']
 306     else:
 307         # warn only once
 308         if xml.tag not in missing_tags:
 309             logging.warning('Add tag converter for "%s"', xml.tag)
 310             missing_tags[xml.tag] = True
 311         result = ['<!-- ' + xml.tag + '-->\n']
 312         convert_inner(ctx, xml, result)
 313         result.append('<!-- /' + xml.tag + '-->\n')
 314         return result
 315
 316
 317 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 318     result = ['<div class="%s">\n' % xml.tag]
 319     title = xml.find('title')
 320     if title is not None:
 321         if 'id' in xml.attrib:
 322             result.append('<a name="%s"></a>' % xml.attrib['id'])
 323         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 324         xml.remove(title)
 325     append_text(xml.text, result)
 326     inner_func(ctx, xml, result)
 327     result.append('</div>')
 328     append_text(xml.tail, result)
 329     return result
 330
 331
 332 def xml_get_title(xml):
 333     title = xml.find('title')
 334     if title is not None:
 335         return title.text
 336     else:
 337         # TODO(ensonic): any way to get the file (inlcudes) too?
 338         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 339         return ''
 340
 341
 342 # docbook tags
 343
 344
 345 def convert_abstract(ctx, xml):
 346     result = ["""<div class="abstract">
 347     <p class="title"><b>Abstract</b></p>"""]
 348     append_text(xml.text, result)
 349     convert_inner(ctx, xml, result)
 350     result.append('</div>')
 351     append_text(xml.tail, result)
 352     return result
 353
 354
 355 def convert_acronym(ctx, xml):
 356     key = xml.text
 357     title = glossary.get(key, '')
 358     # TODO: print a sensible warning if missing
 359     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 360     if xml.tail:
 361         result.append(xml.tail)
 362     return result
 363
 364
 365 def convert_anchor(ctx, xml):
 366     return ['<a name="%s"></a>' % xml.attrib['id']]
 367
 368
 369 def convert_bookinfo(ctx, xml):
 370     result = ['<div class="titlepage">']
 371     convert_inner(ctx, xml, result)
 372     result.append("""<hr>
 373 </div>""")
 374     if xml.tail:
 375         result.append(xml.tail)
 376     return result
 377
 378
 379 def convert_blockquote(ctx, xml):
 380     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 381     append_text(xml.text, result)
 382     convert_inner(ctx, xml, result)
 383     result.append('</blockquote>\n</div>')
 384     append_text(xml.tail, result)
 385     return result
 386
 387
 388 def convert_code(ctx, xml):
 389     result = ['<code class="%s">' % xml.tag]
 390     append_text(xml.text, result)
 391     convert_inner(ctx, xml, result)
 392     result.append('</code>')
 393     append_text(xml.tail, result)
 394     return result
 395
 396
 397 def convert_colspec(ctx, xml):
 398     result = ['<col']
 399     a = xml.attrib
 400     if 'colname' in a:
 401         result.append(' class="%s"' % a['colname'])
 402     if 'colwidth' in a:
 403         result.append(' width="%s"' % a['colwidth'])
 404     result.append('>\n')
 405     # is in tgroup and there can be no 'text'
 406     return result
 407
 408
 409 def convert_command(ctx, xml):
 410     result = ['<strong class="userinput"><code>']
 411     append_text(xml.text, result)
 412     convert_inner(ctx, xml, result)
 413     result.append('</code></strong>')
 414     append_text(xml.tail, result)
 415     return result
 416
 417
 418 def convert_corpauthor(ctx, xml):
 419     result = ['<div><h3 class="corpauthor">\n']
 420     append_text(xml.text, result)
 421     convert_inner(ctx, xml, result)
 422     result.append('</h3></div>\n')
 423     append_text(xml.tail, result)
 424     return result
 425
 426
 427 def convert_div(ctx, xml):
 428     result = ['<div class="%s">\n' % xml.tag]
 429     append_text(xml.text, result)
 430     convert_inner(ctx, xml, result)
 431     result.append('</div>')
 432     append_text(xml.tail, result)
 433     return result
 434
 435
 436 def convert_em_class(ctx, xml):
 437     result = ['<em class="%s"><code>' % xml.tag]
 438     append_text(xml.text, result)
 439     convert_inner(ctx, xml, result)
 440     result.append('</code></em>')
 441     append_text(xml.tail, result)
 442     return result
 443
 444
 445 def convert_entry(ctx, xml):
 446     entry_type = ctx['table.entry']
 447     result = ['<' + entry_type]
 448     if 'role' in xml.attrib:
 449         result.append(' class="%s"' % xml.attrib['role'])
 450     if 'morerows' in xml.attrib:
 451         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 452     result.append('>')
 453     append_text(xml.text, result)
 454     convert_inner(ctx, xml, result)
 455     result.append('</' + entry_type + '>')
 456     append_text(xml.tail, result)
 457     return result
 458
 459
 460 def convert_footnote(ctx, xml):
 461     footnotes = ctx.get('footnotes', [])
 462     # footnotes idx is not per page, but per doc
 463     global footnote_idx
 464     idx = footnote_idx
 465     footnote_idx += 1
 466
 467     # need a pair of ids for each footnote (docbook generates different ids)
 468     this_id = 'footnote-%d' % idx
 469     that_id = 'ftn.' + this_id
 470
 471     inner = ['<div id="%s" class="footnote">' % that_id]
 472     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 473         this_id, idx))
 474     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 475     # get double nested paras :/.
 476     # convert_inner(ctx, xml, inner)
 477     para = xml.find('para')
 478     if para is None:
 479         para = xml.find('simpara')
 480     if para is not None:
 481         inner.append(para.text)
 482     else:
 483         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 484                         etree.tostring(xml, method="text", encoding=str).strip())
 485     inner.append('</p></div>')
 486     footnotes.append(inner)
 487     ctx['footnotes'] = footnotes
 488     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 489         that_id, this_id, idx)]
 490
 491
 492 def convert_formalpara(ctx, xml):
 493     result = None
 494     title_tag = xml.find('title')
 495     result = ['<p><b>%s</b>' % title_tag.text]
 496     para_tag = xml.find('para')
 497     append_text(para_tag.text, result)
 498     convert_inner(ctx, para_tag, result)
 499     append_text(para_tag.tail, result)
 500     result.append('</p>')
 501     append_text(xml.tail, result)
 502     return result
 503
 504
 505 def convert_glossdef(ctx, xml):
 506     result = ['<dd class="glossdef">']
 507     convert_inner(ctx, xml, result)
 508     result.append('</dd>\n')
 509     return result
 510
 511
 512 def convert_glossdiv(ctx, xml):
 513     title_tag = xml.find('title')
 514     title = title_tag.text
 515     xml.remove(title_tag)
 516     result = [
 517         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 518     ]
 519     convert_inner(ctx, xml, result)
 520     return result
 521
 522
 523 def convert_glossentry(ctx, xml):
 524     result = []
 525     convert_inner(ctx, xml, result)
 526     return result
 527
 528
 529 def convert_glossterm(ctx, xml):
 530     glossid = ''
 531     text = ''
 532     anchor = xml.find('anchor')
 533     if anchor is not None:
 534         glossid = anchor.attrib.get('id', '')
 535         text += anchor.tail or ''
 536     text += xml.text or ''
 537     if glossid == '':
 538         glossid = 'glossterm-' + text
 539     return [
 540         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 541             glossid, text)
 542     ]
 543
 544
 545 def convert_imageobject(ctx, xml):
 546     imagedata = xml.find('imagedata')
 547     if imagedata is not None:
 548         # TODO(ensonic): warn on missing fileref attr?
 549         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 550     else:
 551         return []
 552
 553
 554 def convert_indexdiv(ctx, xml):
 555     title_tag = xml.find('title')
 556     title = title_tag.text
 557     xml.remove(title_tag)
 558     result = [
 559         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 560     ]
 561     convert_inner(ctx, xml, result)
 562     return result
 563
 564
 565 def convert_informaltable(ctx, xml):
 566     result = ['<div class="informaltable"><table class="informaltable"']
 567     a = xml.attrib
 568     if 'pgwide' in a and a['pgwide'] == '1':
 569         result.append(' width="100%"')
 570     if 'frame' in a and a['frame'] == 'none':
 571         result.append(' border="0"')
 572     result.append('>\n')
 573     convert_inner(ctx, xml, result)
 574     result.append('</table></div>')
 575     if xml.tail:
 576         result.append(xml.tail)
 577     return result
 578
 579
 580 def convert_itemizedlist(ctx, xml):
 581     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 582     convert_inner(ctx, xml, result)
 583     result.append('</ul></div>')
 584     if xml.tail:
 585         result.append(xml.tail)
 586     return result
 587
 588
 589 def convert_link(ctx, xml):
 590     linkend = xml.attrib['linkend']
 591     if linkend in fixxref.NoLinks:
 592         linkend = None
 593     result = []
 594     if linkend:
 595         link_text = []
 596         convert_inner(ctx, xml, link_text)
 597         append_text(xml.text, link_text)
 598         # TODO: fixxref does some weird checks in xml.text
 599         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 600     append_text(xml.tail, result)
 601     return result
 602
 603
 604 def convert_listitem(ctx, xml):
 605     result = ['<li class="listitem">']
 606     convert_inner(ctx, xml, result)
 607     result.append('</li>')
 608     # is in itemizedlist and there can be no 'text'
 609     return result
 610
 611
 612 def convert_literallayout(ctx, xml):
 613     result = ['<div class="literallayout"><p><br>\n']
 614     append_text(xml.text, result)
 615     convert_inner(ctx, xml, result)
 616     result.append('</p></div>')
 617     append_text(xml.tail, result)
 618     return result
 619
 620
 621 def convert_orderedlist(ctx, xml):
 622     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 623     convert_inner(ctx, xml, result)
 624     result.append('</ol></div>')
 625     append_text(xml.tail, result)
 626     return result
 627
 628
 629 def convert_para(ctx, xml):
 630     result = []
 631     if 'id' in xml.attrib:
 632         result.append('<a name="%s"></a>' % xml.attrib['id'])
 633     result.append('<p>')
 634     append_text(xml.text, result)
 635     convert_inner(ctx, xml, result)
 636     result.append('</p>')
 637     append_text(xml.tail, result)
 638     return result
 639
 640
 641 def convert_para_like(ctx, xml):
 642     result = []
 643     if 'id' in xml.attrib:
 644         result.append('<a name="%s"></a>' % xml.attrib['id'])
 645     result.append('<p class="%s">' % xml.tag)
 646     append_text(xml.text, result)
 647     convert_inner(ctx, xml, result)
 648     result.append('</p>')
 649     append_text(xml.tail, result)
 650     return result
 651
 652
 653 def convert_phrase(ctx, xml):
 654     result = ['<span']
 655     if 'role' in xml.attrib:
 656         result.append(' class="%s">' % xml.attrib['role'])
 657     else:
 658         result.append('>')
 659     append_text(xml.text, result)
 660     convert_inner(ctx, xml, result)
 661     result.append('</span>')
 662     append_text(xml.tail, result)
 663     return result
 664
 665
 666 def convert_primaryie(ctx, xml):
 667     result = ['<dt>\n']
 668     convert_inner(ctx, xml, result)
 669     result.append('\n</dt>\n<dd></dd>\n')
 670     return result
 671
 672
 673 def convert_pre(ctx, xml):
 674     result = ['<pre class="%s">\n' % xml.tag]
 675     append_text(xml.text, result)
 676     convert_inner(ctx, xml, result)
 677     result.append('</pre>')
 678     append_text(xml.tail, result)
 679     return result
 680
 681
 682 def convert_programlisting(ctx, xml):
 683     result = []
 684     if xml.attrib.get('role', '') == 'example':
 685         if xml.text:
 686             lang = xml.attrib.get('language', 'c').lower()
 687             if lang not in LEXERS:
 688                 LEXERS[lang] = get_lexer_by_name(lang)
 689             lexer = LEXERS.get(lang, None)
 690             if lexer:
 691                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 692
 693                 # we do own line-numbering
 694                 line_count = highlighted.count('\n')
 695                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 696                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 697   <tbody>
 698     <tr>
 699       <td class="listing_lines" align="right"><pre>%s</pre></td>
 700       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 701     </tr>
 702   </tbody>
 703 </table>
 704 """ % (source_lines, highlighted))
 705             else:
 706                 logging.warn('No pygments lexer for language="%s"', lang)
 707                 result.append('<pre class="programlisting">')
 708                 result.append(xml.text)
 709                 result.append('</pre>')
 710     else:
 711         result.append('<pre class="programlisting">')
 712         append_text(xml.text, result)
 713         convert_inner(ctx, xml, result)
 714         result.append('</pre>')
 715     append_text(xml.tail, result)
 716     return result
 717
 718
 719 def convert_quote(ctx, xml):
 720     result = ['<span class="quote">"<span class="quote">']
 721     append_text(xml.text, result)
 722     convert_inner(ctx, xml, result)
 723     result.append('</span>"</span>')
 724     append_text(xml.tail, result)
 725     return result
 726
 727
 728 def convert_refsect1(ctx, xml):
 729     # Add a divider between two consequitive refsect2
 730     def convert_inner(ctx, xml, result):
 731         prev = None
 732         for child in xml:
 733             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 734                 result.append('<hr>\n')
 735             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 736             prev = child
 737     return convert_sect(ctx, xml, 'h2', convert_inner)
 738
 739
 740 def convert_refsect2(ctx, xml):
 741     return convert_sect(ctx, xml, 'h3')
 742
 743
 744 def convert_refsect3(ctx, xml):
 745     return convert_sect(ctx, xml, 'h4')
 746
 747
 748 def convert_row(ctx, xml):
 749     result = ['<tr>\n']
 750     convert_inner(ctx, xml, result)
 751     result.append('</tr>\n')
 752     return result
 753
 754
 755 def convert_sect1_tag(ctx, xml):
 756     return convert_sect(ctx, xml, 'h2')
 757
 758
 759 def convert_sect2(ctx, xml):
 760     return convert_sect(ctx, xml, 'h3')
 761
 762
 763 def convert_sect3(ctx, xml):
 764     return convert_sect(ctx, xml, 'h4')
 765
 766
 767 def convert_simpara(ctx, xml):
 768     result = ['<p>']
 769     append_text(xml.text, result)
 770     result.append('</p>')
 771     append_text(xml.tail, result)
 772     return result
 773
 774
 775 def convert_span(ctx, xml):
 776     result = ['<span class="%s">' % xml.tag]
 777     append_text(xml.text, result)
 778     convert_inner(ctx, xml, result)
 779     result.append('</span>')
 780     append_text(xml.tail, result)
 781     return result
 782
 783
 784 def convert_table(ctx, xml):
 785     result = ['<div class="table">']
 786     if 'id' in xml.attrib:
 787         result.append('<a name="%s"></a>' % xml.attrib['id'])
 788     title_tag = xml.find('title')
 789     if title_tag is not None:
 790         result.append('<p class="title"><b>')
 791         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 792         result.extend(convert_title(ctx, title_tag))
 793         result.append('</b></p>')
 794         xml.remove(title_tag)
 795     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 796
 797     convert_inner(ctx, xml, result)
 798
 799     result.append('</table></div></div>')
 800     append_text(xml.tail, result)
 801     return result
 802
 803
 804 def convert_tbody(ctx, xml):
 805     result = ['<tbody>']
 806     ctx['table.entry'] = 'td'
 807     convert_inner(ctx, xml, result)
 808     result.append('</tbody>')
 809     # is in tgroup and there can be no 'text'
 810     return result
 811
 812
 813 def convert_tgroup(ctx, xml):
 814     # tgroup does not expand to anything, but the nested colspecs need to
 815     # be put into a colgroup
 816     cols = xml.findall('colspec')
 817     result = []
 818     if cols:
 819         result.append('<colgroup>\n')
 820         for col in cols:
 821             result.extend(convert_colspec(ctx, col))
 822             xml.remove(col)
 823         result.append('</colgroup>\n')
 824     convert_inner(ctx, xml, result)
 825     # is in informaltable and there can be no 'text'
 826     return result
 827
 828
 829 def convert_thead(ctx, xml):
 830     result = ['<thead>']
 831     ctx['table.entry'] = 'th'
 832     convert_inner(ctx, xml, result)
 833     result.append('</thead>')
 834     # is in tgroup and there can be no 'text'
 835     return result
 836
 837
 838 def convert_title(ctx, xml):
 839     # This is always called from some context
 840     result = []
 841     append_text(xml.text, result)
 842     convert_inner(ctx, xml, result)
 843     append_text(xml.tail, result)
 844     return result
 845
 846
 847 def convert_ulink(ctx, xml):
 848     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 849     if xml.tail:
 850         result.append(xml.tail)
 851     return result
 852
 853
 854 def convert_userinput(ctx, xml):
 855     result = ['<span class="command"><strong>']
 856     append_text(xml.text, result)
 857     convert_inner(ctx, xml, result)
 858     result.append('</strong></span>')
 859     append_text(xml.tail, result)
 860     return result
 861
 862
 863 def convert_variablelist(ctx, xml):
 864     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 865 <colgroup>
 866 <col align="left" valign="top">
 867 <col>
 868 </colgroup>
 869 <tbody>"""]
 870     convert_inner(ctx, xml, result)
 871     result.append("""</tbody>
 872 </table></div>""")
 873     return result
 874
 875
 876 def convert_varlistentry(ctx, xml):
 877     result = ['<tr>']
 878
 879     result.append('<td><p>')
 880     term = xml.find('term')
 881     result.extend(convert_span(ctx, term))
 882     result.append('</p></td>')
 883
 884     result.append('<td>')
 885     listitem = xml.find('listitem')
 886     convert_inner(ctx, listitem, result)
 887     result.append('</td>')
 888
 889     result.append('<tr>')
 890     return result
 891
 892
 893 # TODO(ensonic): turn into class with converters as functions and ctx as self
 894 convert_tags = {
 895     'abstract': convert_abstract,
 896     'acronym': convert_acronym,
 897     'anchor': convert_anchor,
 898     'application': convert_span,
 899     'bookinfo': convert_bookinfo,
 900     'blockquote': convert_blockquote,
 901     'caption': convert_div,
 902     'code': convert_code,
 903     'colspec': convert_colspec,
 904     'constant': convert_code,
 905     'command': convert_command,
 906     'corpauthor': convert_corpauthor,
 907     'emphasis': convert_span,
 908     'entry': convert_entry,
 909     'envar': convert_code,
 910     'footnote': convert_footnote,
 911     'filename': convert_code,
 912     'formalpara': convert_formalpara,
 913     'function': convert_code,
 914     'glossdef': convert_glossdef,
 915     'glossdiv': convert_glossdiv,
 916     'glossentry': convert_glossentry,
 917     'glossterm': convert_glossterm,
 918     'imageobject': convert_imageobject,
 919     'indexdiv': convert_indexdiv,
 920     'indexentry': convert_ignore,
 921     'indexterm': convert_skip,
 922     'informalexample': convert_div,
 923     'informaltable': convert_informaltable,
 924     'inlinemediaobject': convert_span,
 925     'itemizedlist': convert_itemizedlist,
 926     'legalnotice': convert_div,
 927     'link': convert_link,
 928     'listitem': convert_listitem,
 929     'literal': convert_code,
 930     'literallayout': convert_literallayout,
 931     'mediaobject': convert_div,
 932     'note': convert_div,
 933     'option': convert_code,
 934     'orderedlist': convert_orderedlist,
 935     'para': convert_para,
 936     'partintro': convert_div,
 937     'parameter': convert_em_class,
 938     'phrase': convert_phrase,
 939     'primaryie': convert_primaryie,
 940     'programlisting': convert_programlisting,
 941     'quote': convert_quote,
 942     'releaseinfo': convert_para_like,
 943     'refsect1': convert_refsect1,
 944     'refsect2': convert_refsect2,
 945     'refsect3': convert_refsect3,
 946     'replaceable': convert_em_class,
 947     'returnvalue': convert_span,
 948     'row': convert_row,
 949     'screen': convert_pre,
 950     'sect1': convert_sect1_tag,
 951     'sect2': convert_sect2,
 952     'sect3': convert_sect3,
 953     'simpara': convert_simpara,
 954     'structfield': convert_em_class,
 955     'structname': convert_span,
 956     'synopsis': convert_pre,
 957     'symbol': convert_span,
 958     'table': convert_table,
 959     'tbody': convert_tbody,
 960     'term': convert_span,
 961     'tgroup': convert_tgroup,
 962     'thead': convert_thead,
 963     'type': convert_span,
 964     'ulink': convert_ulink,
 965     'userinput': convert_userinput,
 966     'varname': convert_code,
 967     'variablelist': convert_variablelist,
 968     'varlistentry': convert_varlistentry,
 969     'warning': convert_div,
 970 }
 971
 972 # conversion helpers
 973
 974 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 975 <html>
 976 <head>
 977 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 978 <title>%s</title>
 979 %s<link rel="stylesheet" href="style.css" type="text/css">
 980 </head>
 981 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 982 """
 983
 984
 985 def generate_head_links(ctx):
 986     n = ctx['nav_home']
 987     result = [
 988         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 989     ]
 990     if 'nav_up' in ctx:
 991         n = ctx['nav_up']
 992         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 993     if 'nav_prev' in ctx:
 994         n = ctx['nav_prev']
 995         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 996     if 'nav_next' in ctx:
 997         n = ctx['nav_next']
 998         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 999     return ''.join(result)
1000
1001
1002 def generate_nav_links(ctx):
1003     n = ctx['nav_home']
1004     result = [
1005         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1006     ]
1007     if 'nav_up' in ctx:
1008         n = ctx['nav_up']
1009         result.append(
1010             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1011     else:
1012         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1013     if 'nav_prev' in ctx:
1014         n = ctx['nav_prev']
1015         result.append(
1016             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1017     else:
1018         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1019     if 'nav_next' in ctx:
1020         n = ctx['nav_next']
1021         result.append(
1022             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1023     else:
1024         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1025
1026     return ''.join(result)
1027
1028
1029 def generate_toc(ctx, node):
1030     result = []
1031     for c in node.children:
1032         # TODO: urlencode the filename: urllib.parse.quote_plus()
1033         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1034             c.title_tag, c.filename, c.title))
1035         if c.subtitle:
1036             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1037         result.append('</dt>\n')
1038         if c.children:
1039             result.append('<dd><dl>')
1040             result.extend(generate_toc(ctx, c))
1041             result.append('</dl></dd>')
1042     return result
1043
1044
1045 def generate_basic_nav(ctx):
1046     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1047   <tr valign="middle">
1048     <td width="100%%" align="left" class="shortcuts"></td>
1049     %s
1050   </tr>
1051 </table>
1052     """ % generate_nav_links(ctx)
1053
1054
1055 def generate_alpha_nav(ctx, divs, prefix):
1056     ix_nav = []
1057     for s in divs:
1058         title = xml_get_title(s)
1059         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1060
1061     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1062   <tr valign="middle">
1063     <td width="100%%" align="left" class="shortcuts">
1064       <span id="nav_index">
1065         %s
1066       </span>
1067     </td>
1068     %s
1069   </tr>
1070 </table>
1071     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1072
1073
1074 def generate_refentry_nav(ctx, refsect1s, result):
1075     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1076   <tr valign="middle">
1077     <td width="100%" align="left" class="shortcuts">
1078       <a href="#" class="shortcut">Top</a>""")
1079
1080     for s in refsect1s:
1081         # don't list TOC sections (role="xxx_proto")
1082         if s.attrib.get('role', '').endswith("_proto"):
1083             continue
1084         # skip section without 'id' attrs
1085         if 'id' not in s.attrib:
1086             continue
1087
1088         title = xml_get_title(s)
1089         result.append("""
1090           <span class="dim">|</span>
1091           <a href="#%s" class="shortcut">%s</a>
1092           """ % (s.attrib['id'], title))
1093     result.append("""
1094     </td>
1095     %s
1096   </tr>
1097 </table>
1098 """ % generate_nav_links(ctx))
1099
1100
1101 def generate_footer(ctx):
1102     result = []
1103     if 'footnotes' in ctx:
1104         result.append("""<div class="footnotes">\n
1105 <br><hr style="width:100; text-align:left;margin-left: 0">
1106 """)
1107         for f in ctx['footnotes']:
1108             result.extend(f)
1109         result.append('</div>\n')
1110     return result
1111
1112
1113 def get_id(node):
1114     xml = node.xml
1115     node_id = xml.attrib.get('id', None)
1116     if node_id:
1117         return node_id
1118
1119     logging.info('%d: No "id" attribute on "%s", generating one',
1120                  xml.sourceline, xml.tag)
1121     ix = []
1122     # Generate the 'id'. We need to walk up the xml-tree and check the positions
1123     # for each sibling.
1124     parent = xml.getparent()
1125     while parent is not None:
1126         children = parent.getchildren()
1127         ix.insert(0, str(children.index(xml) + 1))
1128         xml = parent
1129         parent = xml.getparent()
1130     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1131     return 'id-1.' + '.'.join(ix)
1132
1133
1134 def convert_chunk_with_toc(ctx, div_class, title_tag):
1135     node = ctx['node']
1136     result = [
1137         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1138         generate_basic_nav(ctx),
1139         '<div class="%s">' % div_class,
1140     ]
1141     title = node.xml.find('title')
1142     if title is not None:
1143         result.append("""
1144 <div class="titlepage">
1145 <%s class="title"><a name="%s"></a>%s</%s>
1146 </div>""" % (
1147             title_tag, get_id(node), title.text, title_tag))
1148         node.xml.remove(title)
1149
1150     toc = generate_toc(ctx, node)
1151     if toc:
1152         # TODO: not all docbook page types use this extra heading
1153         result.append("""<p><b>Table of Contents</b></p>
1154     <div class="toc">
1155       <dl class="toc">
1156     """)
1157         result.extend(toc)
1158         result.append("""</dl>
1159     </div>
1160     """)
1161     convert_inner(ctx, node.xml, result)
1162     result.extend(generate_footer(ctx))
1163     result.append("""</div>
1164 </body>
1165 </html>""")
1166     return result
1167
1168
1169 # docbook chunks
1170
1171
1172 def convert_book(ctx):
1173     node = ctx['node']
1174     result = [
1175         HTML_HEADER % (node.title, generate_head_links(ctx)),
1176         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1177     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1178 </table>
1179 <div class="book">
1180 """ % node.title
1181     ]
1182     bookinfo = node.xml.findall('bookinfo')[0]
1183     # we already used the title
1184     title = bookinfo.find('title')
1185     if title is not None:
1186         bookinfo.remove(title)
1187     result.extend(convert_bookinfo(ctx, bookinfo))
1188     result.append("""<div class="toc">
1189   <dl class="toc">
1190 """)
1191     result.extend(generate_toc(ctx, node.root))
1192     result.append("""</dl>
1193 </div>
1194 """)
1195     result.extend(generate_footer(ctx))
1196     result.append("""</div>
1197 </body>
1198 </html>""")
1199     return result
1200
1201
1202 def convert_chapter(ctx):
1203     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1204
1205
1206 def convert_glossary(ctx):
1207     node = ctx['node']
1208     glossdivs = node.xml.findall('glossdiv')
1209
1210     result = [
1211         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1212         generate_alpha_nav(ctx, glossdivs, 'gls'),
1213         """<div class="glossary">
1214 <div class="titlepage"><h1 class="title">
1215 <a name="%s"></a>%s</h1>
1216 </div>""" % (get_id(node), node.title)
1217     ]
1218     for i in glossdivs:
1219         result.extend(convert_glossdiv(ctx, i))
1220     result.extend(generate_footer(ctx))
1221     result.append("""</div>
1222 </body>
1223 </html>""")
1224     return result
1225
1226
1227 def convert_index(ctx):
1228     node = ctx['node']
1229     # Get all indexdivs under indexdiv
1230     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1231
1232     result = [
1233         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1234         generate_alpha_nav(ctx, indexdivs, 'idx'),
1235         """<div class="index">
1236 <div class="titlepage"><h2 class="title">
1237 <a name="%s"></a>%s</h2>
1238 </div>""" % (get_id(node), node.title)
1239     ]
1240     for i in indexdivs:
1241         result.extend(convert_indexdiv(ctx, i))
1242     result.extend(generate_footer(ctx))
1243     result.append("""</div>
1244 </body>
1245 </html>""")
1246     return result
1247
1248
1249 def convert_part(ctx):
1250     return convert_chunk_with_toc(ctx, 'part', 'h1')
1251
1252
1253 def convert_preface(ctx):
1254     node = ctx['node']
1255     result = [
1256         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1257         generate_basic_nav(ctx),
1258         '<div class="preface">'
1259     ]
1260     title = node.xml.find('title')
1261     if title is not None:
1262         result.append("""
1263 <div class="titlepage">
1264 <h2 class="title"><a name="%s"></a>%s</h2>
1265 </div>""" % (get_id(node), title.text))
1266         node.xml.remove(title)
1267     convert_inner(ctx, node.xml, result)
1268     result.extend(generate_footer(ctx))
1269     result.append("""</div>
1270 </body>
1271 </html>""")
1272     return result
1273
1274
1275 def convert_reference(ctx):
1276     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1277
1278
1279 def convert_refentry(ctx):
1280     node = ctx['node']
1281     node_id = get_id(node)
1282     refsect1s = node.xml.findall('refsect1')
1283
1284     result = [
1285         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1286     ]
1287     generate_refentry_nav(ctx, refsect1s, result)
1288     result.append("""
1289 <div class="refentry">
1290 <a name="%s"></a>
1291 <div class="refnamediv">
1292   <table width="100%%"><tr>
1293     <td valign="top">
1294       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1295       <p>%s — module for gtk-doc unit test</p>
1296     </td>
1297     <td class="gallery_image" valign="top" align="right"></td>
1298   </tr></table>
1299 </div>
1300 """ % (node_id, node_id, node.title, node.title))
1301
1302     for s in refsect1s:
1303         result.extend(convert_refsect1(ctx, s))
1304     result.extend(generate_footer(ctx))
1305     result.append("""</div>
1306 </body>
1307 </html>""")
1308     return result
1309
1310
1311 def convert_sect1(ctx):
1312     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1313
1314
1315 # TODO(ensonic): turn into class with converters as functions and ctx as self
1316 convert_chunks = {
1317     'book': convert_book,
1318     'chapter': convert_chapter,
1319     'glossary': convert_glossary,
1320     'index': convert_index,
1321     'part': convert_part,
1322     'preface': convert_preface,
1323     'reference': convert_reference,
1324     'refentry': convert_refentry,
1325     'sect1': convert_sect1,
1326 }
1327
1328
1329 def generate_nav_nodes(files, node):
1330     nav = {
1331         'nav_home': node.root,
1332     }
1333     # nav params: up, prev, next
1334     if node.parent:
1335         nav['nav_up'] = node.parent
1336     ix = files.index(node)
1337     if ix > 0:
1338         nav['nav_prev'] = files[ix - 1]
1339     if ix < len(files) - 1:
1340         nav['nav_next'] = files[ix + 1]
1341     return nav
1342
1343
1344 def convert(out_dir, module, files, node):
1345     """Convert the docbook chunks to a html file.
1346
1347     Args:
1348       out_dir: already created output dir
1349       files: list of nodes in the tree in pre-order
1350       node: current tree node
1351     """
1352
1353     logging.info('Writing: %s', node.filename)
1354     with open(os.path.join(out_dir, node.filename), 'wt',
1355               newline='\n', encoding='utf-8') as html:
1356         ctx = {
1357             'module': module,
1358             'files': files,
1359             'node': node,
1360         }
1361         ctx.update(generate_nav_nodes(files, node))
1362
1363         if node.name in convert_chunks:
1364             for line in convert_chunks[node.name](ctx):
1365                 html.write(line)
1366         else:
1367             logging.warning('Add converter/template for "%s"', node.name)
1368
1369
1370 def create_devhelp2_toc(node):
1371     result = []
1372     for c in node.children:
1373         if c.children:
1374             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1375             result.extend(create_devhelp2_toc(c))
1376             result.append('</sub>\n')
1377         else:
1378             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1379     return result
1380
1381
1382 def create_devhelp2_condition_attribs(node):
1383     if 'condition' in node.attrib:
1384         # condition -> since, deprecated, ... (separated with '|')
1385         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1386         keywords = []
1387         for c in cond:
1388             if ':' in c:
1389                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1390             else:
1391                 # deprecated can have no description
1392                 keywords.append('{}="{}"'.format(c, ''))
1393         return ' ' + ' '.join(keywords)
1394     else:
1395         return ''
1396
1397
1398 def create_devhelp2_refsect2_keyword(node, base_link):
1399     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1400         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1401         create_devhelp2_condition_attribs(node))
1402
1403
1404 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1405     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1406         node.attrib['role'], title, base_link + name,
1407         create_devhelp2_condition_attribs(node))
1408
1409
1410 def create_devhelp2(out_dir, module, xml, files):
1411     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1412               newline='\n', encoding='utf-8') as idx:
1413         bookinfo_nodes = xml.xpath('/book/bookinfo')
1414         title = ''
1415         if bookinfo_nodes is not None:
1416             bookinfo = bookinfo_nodes[0]
1417             title = bookinfo.xpath('./title/text()')[0]
1418             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1419             # TODO: support author too (see devhelp2.xsl)
1420         # TODO: fixxref uses '--src-lang' to set the language
1421         result = [
1422             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1423 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1424   <chapters>
1425 """ % (title, module, online_url)
1426         ]
1427         # toc
1428         result.extend(create_devhelp2_toc(files[0].root))
1429         result.append("""  </chapters>
1430   <functions>
1431 """)
1432         # keywords from all refsect2 and refsect3
1433         refsect2 = etree.XPath('//refsect2[@role]')
1434         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1435         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1436         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1437         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1438         for node in files:
1439             base_link = node.filename + '#'
1440             refsect2_nodes = refsect2(node.xml)
1441             for refsect2_node in refsect2_nodes:
1442                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1443                 refsect3_nodes = refsect3_enum(refsect2_node)
1444                 for refsect3_node in refsect3_nodes:
1445                     details_node = refsect3_enum_details(refsect3_node)[0]
1446                     name = details_node.attrib['id']
1447                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1448                 refsect3_nodes = refsect3_struct(refsect2_node)
1449                 for refsect3_node in refsect3_nodes:
1450                     details_node = refsect3_struct_details(refsect3_node)[0]
1451                     name = details_node.attrib['id']
1452                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1453
1454         result.append("""  </functions>
1455 </book>
1456 """)
1457         for line in result:
1458             idx.write(line)
1459
1460
1461 def get_dirs(uninstalled):
1462     if uninstalled:
1463         # this does not work from buiddir!=srcdir
1464         gtkdocdir = os.path.split(sys.argv[0])[0]
1465         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1466             # try 'srcdir' (set from makefiles) too
1467             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1468                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1469         styledir = gtkdocdir + '/style'
1470     else:
1471         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1472         styledir = gtkdocdir
1473     return (gtkdocdir, styledir)
1474
1475
1476 def main(module, index_file, out_dir, uninstalled):
1477     tree = etree.parse(index_file)
1478     tree.xinclude()
1479
1480     (gtkdocdir, styledir) = get_dirs(uninstalled)
1481     # copy navigation images and stylesheets to html directory ...
1482     css_file = os.path.join(styledir, 'style.css')
1483     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1484         shutil.copy(f, out_dir)
1485     css_file = os.path.join(out_dir, 'style.css')
1486     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1487         css.write(HTML_FORMATTER.get_style_defs())
1488
1489     # TODO: migrate options from fixxref
1490     # TODO: do in parallel with loading the xml above.
1491     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1492
1493     # We do multiple passes:
1494     # 1) recursively walk the tree and chunk it into a python tree so that we
1495     #   can generate navigation and link tags.
1496     files = chunk(tree.getroot())
1497     files = list(PreOrderIter(files))
1498     # 2) extract tables:
1499     # TODO: use multiprocessing
1500     # - find all 'id' attribs and add them to the link map
1501     add_id_links(files, fixxref.Links)
1502     # - build glossary dict
1503     build_glossary(files)
1504
1505     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1506     create_devhelp2(out_dir, module, tree.getroot(), files)
1507     # 4) iterate the tree and output files
1508     # TODO: use multiprocessing
1509     for node in files:
1510         convert(out_dir, module, files, node)
1511
1512
1513 def run(options):
1514     logging.info('options: %s', str(options.__dict__))
1515     module = options.args[0]
1516     document = options.args[1]
1517
1518     # TODO: rename to 'html' later on
1519     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1520     #   outputs into the working directory
1521     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1522     try:
1523         os.mkdir(out_dir)
1524     except OSError as e:
1525         if e.errno != errno.EEXIST:
1526             raise
1527
1528     sys.exit(main(module, document, out_dir, options.uninstalled))