gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - more chunk converters
  39 - more tag converters:
  40   - footnote: maybe track those in ctx and write them out at the end of the chunk
  41   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  42     attr on the <img> tag of the 'imageobject'
  43 - check each docbook tag if it can contain #PCDATA, if not don't check for
  44   xml.text
  45 - consider some perf-warnings flag
  46   - see 'No "id" attribute on'
  47
  48 OPTIONAL:
  49 - minify html: https://pypi.python.org/pypi/htmlmin/
  50
  51 Requirements:
  52 sudo pip3 install anytree lxml pygments
  53
  54 Example invocation:
  55 cd tests/bugs/docs/
  56 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  57 xdg-open db2html/index.html
  58 meld html db2html
  59
  60 Benchmarking:
  61 cd tests/bugs/docs/;
  62 rm html-build.stamp; time make html-build.stamp
  63 """
  64
  65 import argparse
  66 import errno
  67 import logging
  68 import os
  69 import shutil
  70 import sys
  71
  72 from anytree import Node, PreOrderIter
  73 from copy import deepcopy
  74 from glob import glob
  75 from lxml import etree
  76 from pygments import highlight
  77 from pygments.lexers import CLexer
  78 from pygments.formatters import HtmlFormatter
  79
  80 from . import config, fixxref
  81
  82 # pygments setup
  83 # lazily constructed lexer cache
  84 LEXERS = {
  85     'c': CLexer()
  86 }
  87 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  88
  89 # http://www.sagehill.net/docbookxsl/Chunking.html
  90 CHUNK_TAGS = [
  91     'appendix',
  92     'article',
  93     'bibliography',  # in article or book
  94     'book',
  95     'chapter',
  96     'colophon',
  97     'glossary',      # in article or book
  98     'index',         # in article or book
  99     'part',
 100     'preface',
 101     'refentry',
 102     'reference',
 103     'sect1',         # except first
 104     'section',       # if equivalent to sect1
 105     'set',
 106     'setindex',
 107 ]
 108
 109
 110 class ChunkParams(object):
 111     def __init__(self, prefix, parent=None):
 112         self.prefix = prefix
 113         self.parent = None
 114         self.count = 0
 115
 116
 117 # TODO: look up the abbrevs and hierarchy for other tags
 118 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 119 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 120 #
 121 # If not defined, we can just create an example without an 'id' attr and see
 122 # docbook xsl does.
 123 CHUNK_PARAMS = {
 124     'appendix': ChunkParams('app', 'book'),
 125     'book': ChunkParams('bk'),
 126     'chapter': ChunkParams('ch', 'book'),
 127     'index': ChunkParams('ix', 'book'),
 128     'part': ChunkParams('pt', 'book'),
 129     'preface': ChunkParams('pr', 'book'),
 130     'sect1': ChunkParams('s', 'chapter'),
 131     'section': ChunkParams('s', 'chapter'),
 132 }
 133
 134 TITLE_XPATHS = {
 135     '_': (etree.XPath('./title'), None),
 136     'book': (etree.XPath('./bookinfo/title'), None),
 137     'refentry': (
 138         etree.XPath('./refmeta/refentrytitle'),
 139         etree.XPath('./refnamediv/refpurpose')
 140     ),
 141 }
 142
 143 ID_XPATH = etree.XPath('//@id')
 144
 145 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 146 glossary = {}
 147
 148
 149 def gen_chunk_name(node):
 150     if 'id' in node.attrib:
 151         return node.attrib['id']
 152
 153     tag = node.tag
 154     if tag not in CHUNK_PARAMS:
 155         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 156         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 157
 158     naming = CHUNK_PARAMS[tag]
 159     naming.count += 1
 160     name = ('%s%02d' % (naming.prefix, naming.count))
 161     # handle parents to make names of nested tags unique
 162     # TODO: we only need to prepend the parent if there are > 1 of them in the
 163     #       xml
 164     # while naming.parent:
 165     #     parent = naming.parent
 166     #     if parent not in CHUNK_PARAMS:
 167     #         break;
 168     #     naming = CHUNK_PARAMS[parent]
 169     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 170     return name
 171
 172
 173 def get_chunk_titles(node):
 174     tag = node.tag
 175     if tag not in TITLE_XPATHS:
 176         # Use defaults
 177         (title, subtitle) = TITLE_XPATHS['_']
 178     else:
 179         (title, subtitle) = TITLE_XPATHS[tag]
 180
 181     xml = title(node)[0]
 182     result = {
 183         'title': xml.text
 184     }
 185     if xml.tag != 'title':
 186         result['title_tag'] = xml.tag
 187     else:
 188         result['title_tag'] = tag
 189
 190     if subtitle:
 191         xml = subtitle(node)[0]
 192         result['subtitle'] = xml.text
 193         result['subtitle_tag'] = xml.tag
 194     else:
 195         result['subtitle'] = None
 196         result['subtitle_tag'] = None
 197     return result
 198
 199
 200 def chunk(xml_node, parent=None):
 201     """Chunk the tree.
 202
 203     The first time, we're called with parent=None and in that case we return
 204     the new_node as the root of the tree
 205     """
 206     if xml_node.tag in CHUNK_TAGS:
 207         if parent:
 208             # remove the xml-node from the parent
 209             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 210             xml_node.getparent().remove(xml_node)
 211             xml_node = sub_tree
 212
 213         title_args = get_chunk_titles(xml_node)
 214         chunk_name = gen_chunk_name(xml_node)
 215         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 216                       filename=chunk_name + '.html', **title_args)
 217
 218     for child in xml_node:
 219         chunk(child, parent)
 220
 221     return parent
 222
 223
 224 def add_id_links(files, links):
 225     for node in files:
 226         chunk_name = node.filename[:-5]
 227         chunk_base = node.filename + '#'
 228         for attr in ID_XPATH(node.xml):
 229             if attr == chunk_name:
 230                 links[attr] = node.filename
 231             else:
 232                 links[attr] = chunk_base + attr
 233
 234
 235 def build_glossary(files):
 236     for node in files:
 237         if node.xml.tag != 'glossary':
 238             continue
 239         for term in GLOSSENTRY_XPATH(node.xml):
 240             # TODO: there can be all kind of things in a glossary. This only supports
 241             # what we commonly use
 242             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 243             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 244             glossary[key] = value
 245             # logging.debug('glosentry: %s:%s', key, value)
 246
 247
 248 # conversion helpers
 249
 250
 251 def convert_inner(ctx, xml, result):
 252     for child in xml:
 253         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 254
 255
 256 def convert_ignore(ctx, xml):
 257     result = []
 258     convert_inner(ctx, xml, result)
 259     return result
 260
 261
 262 def convert_skip(ctx, xml):
 263     return ['']
 264
 265
 266 missing_tags = {}
 267
 268
 269 def convert__unknown(ctx, xml):
 270     # don't recurse on subchunks
 271     if xml.tag in CHUNK_TAGS:
 272         return []
 273     if isinstance(xml, etree._Comment):
 274         return ['<!-- ' + xml.text + '-->\n']
 275     else:
 276         # warn only once
 277         if xml.tag not in missing_tags:
 278             logging.warning('Add tag converter for "%s"', xml.tag)
 279             missing_tags[xml.tag] = True
 280         result = ['<!-- ' + xml.tag + '-->\n']
 281         convert_inner(ctx, xml, result)
 282         result.append('<!-- /' + xml.tag + '-->\n')
 283         return result
 284
 285
 286 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
 287     result = ['<div class="%s">\n' % xml.tag]
 288     title = xml.find('title')
 289     if title is not None:
 290         if 'id' in xml.attrib:
 291             result.append('<a name="%s"></a>' % xml.attrib['id'])
 292         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 293         xml.remove(title)
 294     if xml.text:
 295         result.append(xml.text)
 296     inner_func(ctx, xml, result)
 297     result.append('</div>')
 298     if xml.tail:
 299         result.append(xml.tail)
 300     return result
 301
 302
 303 def xml_get_title(xml):
 304     title = xml.find('title')
 305     if title is not None:
 306         return title.text
 307     else:
 308         # TODO(ensonic): any way to get the file (inlcudes) too?
 309         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 310         return ''
 311
 312
 313 # docbook tags
 314
 315
 316 def convert_acronym(ctx, xml):
 317     key = xml.text
 318     title = glossary.get(key, '')
 319     # TODO: print a sensible warning if missing
 320     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 321     if xml.tail:
 322         result.append(xml.tail)
 323     return result
 324
 325
 326 def convert_bookinfo(ctx, xml):
 327     result = ['<div class="titlepage">']
 328     convert_inner(ctx, xml, result)
 329     result.append("""<hr>
 330 </div>""")
 331     if xml.tail:
 332         result.append(xml.tail)
 333     return result
 334
 335
 336 def convert_blockquote(ctx, xml):
 337     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 338     if xml.text:
 339         result.append(xml.text)
 340     convert_inner(ctx, xml, result)
 341     result.append('</blockquote>\n</div>')
 342     if xml.tail:
 343         result.append(xml.tail)
 344     return result
 345
 346
 347 def convert_colspec(ctx, xml):
 348     result = ['<col']
 349     a = xml.attrib
 350     if 'colname' in a:
 351         result.append(' class="%s"' % a['colname'])
 352     if 'colwidth' in a:
 353         result.append(' width="%s"' % a['colwidth'])
 354     result.append('>\n')
 355     # is in tgroup and there can be no 'text'
 356     return result
 357
 358
 359 def convert_corpauthor(ctx, xml):
 360     result = ['<div><h3 class="corpauthor">\n']
 361     if xml.text:
 362         result.append(xml.text)
 363     convert_inner(ctx, xml, result)
 364     result.append('</h3></div>\n')
 365     if xml.tail:
 366         result.append(xml.tail)
 367     return result
 368
 369
 370 def convert_div(ctx, xml):
 371     result = ['<div class="%s">\n' % xml.tag]
 372     if xml.text:
 373         result.append(xml.text)
 374     convert_inner(ctx, xml, result)
 375     result.append('</div>')
 376     if xml.tail:
 377         result.append(xml.tail)
 378     return result
 379
 380
 381 def convert_em_class(ctx, xml):
 382     result = ['<em class="%s"><code>' % xml.tag]
 383     if xml.text:
 384         result.append(xml.text)
 385     convert_inner(ctx, xml, result)
 386     result.append('</code></em>')
 387     if xml.tail:
 388         result.append(xml.tail)
 389     return result
 390
 391
 392 def convert_entry(ctx, xml):
 393     result = ['<td']
 394     if 'role' in xml.attrib:
 395         result.append(' class="%s">' % xml.attrib['role'])
 396     else:
 397         result.append('>')
 398     if xml.text:
 399         result.append(xml.text)
 400     convert_inner(ctx, xml, result)
 401     result.append('</td>')
 402     if xml.tail:
 403         result.append(xml.tail)
 404     return result
 405
 406
 407 def convert_glossdef(ctx, xml):
 408     result = ['<dd class="glossdef">']
 409     convert_inner(ctx, xml, result)
 410     result.append('</dd>\n')
 411     return result
 412
 413
 414 def convert_glossdiv(ctx, xml):
 415     title_tag = xml.find('title')
 416     title = title_tag.text
 417     xml.remove(title_tag)
 418     result = [
 419         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 420     ]
 421     convert_inner(ctx, xml, result)
 422     return result
 423
 424
 425 def convert_glossentry(ctx, xml):
 426     result = []
 427     convert_inner(ctx, xml, result)
 428     return result
 429
 430
 431 def convert_glossterm(ctx, xml):
 432     glossid = ''
 433     text = ''
 434     anchor = xml.find('anchor')
 435     if anchor is not None:
 436         glossid = anchor.attrib.get('id', '')
 437         text += anchor.tail or ''
 438     text += xml.text or ''
 439     if glossid == '':
 440         glossid = 'glossterm-' + text
 441     return [
 442         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 443             glossid, text)
 444     ]
 445
 446
 447 def convert_imageobject(ctx, xml):
 448     imagedata = xml.find('imagedata')
 449     if imagedata is not None:
 450         # TODO(ensonic): warn on missing fileref attr?
 451         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 452     else:
 453         return []
 454
 455
 456 def convert_indexdiv(ctx, xml):
 457     title_tag = xml.find('title')
 458     title = title_tag.text
 459     xml.remove(title_tag)
 460     result = [
 461         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 462     ]
 463     convert_inner(ctx, xml, result)
 464     return result
 465
 466
 467 def convert_informaltable(ctx, xml):
 468     result = ['<div class="informaltable"><table class="informaltable"']
 469     a = xml.attrib
 470     if 'pgwide' in a and a['pgwide'] == '1':
 471         result.append(' width="100%"')
 472     if 'frame' in a and a['frame'] == 'none':
 473         result.append(' border="0"')
 474     result.append('>\n')
 475     convert_inner(ctx, xml, result)
 476     result.append('</table></div>')
 477     if xml.tail:
 478         result.append(xml.tail)
 479     return result
 480
 481
 482 def convert_itemizedlist(ctx, xml):
 483     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 484     convert_inner(ctx, xml, result)
 485     result.append('</ul></div>')
 486     if xml.tail:
 487         result.append(xml.tail)
 488     return result
 489
 490
 491 def convert_link(ctx, xml):
 492     linkend = xml.attrib['linkend']
 493     if linkend in fixxref.NoLinks:
 494         linkend = None
 495     result = []
 496     if linkend:
 497         link_text = []
 498         convert_inner(ctx, xml, link_text)
 499         if xml.text:
 500             link_text.append(xml.text)
 501         # TODO: fixxref does some weird checks in xml.text
 502         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 503     if xml.tail:
 504         result.append(xml.tail)
 505     return result
 506
 507
 508 def convert_listitem(ctx, xml):
 509     result = ['<li class="listitem">']
 510     convert_inner(ctx, xml, result)
 511     result.append('</li>')
 512     # is in itemizedlist and there can be no 'text'
 513     return result
 514
 515
 516 def convert_literal(ctx, xml):
 517     result = ['<code class="%s">' % xml.tag]
 518     if xml.text:
 519         result.append(xml.text)
 520     convert_inner(ctx, xml, result)
 521     result.append('</code>')
 522     if xml.tail:
 523         result.append(xml.tail)
 524     return result
 525
 526
 527 def convert_orderedlist(ctx, xml):
 528     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 529     convert_inner(ctx, xml, result)
 530     result.append('</ol></div>')
 531     if xml.tail:
 532         result.append(xml.tail)
 533     return result
 534
 535
 536 def convert_para(ctx, xml):
 537     result = []
 538     if 'id' in xml.attrib:
 539         result.append('<a name="%s"></a>' % xml.attrib['id'])
 540     result.append('<p>')
 541     if xml.text:
 542         result.append(xml.text)
 543     convert_inner(ctx, xml, result)
 544     result.append('</p>')
 545     if xml.tail:
 546         result.append(xml.tail)
 547     return result
 548
 549
 550 def convert_para_like(ctx, xml):
 551     result = []
 552     if 'id' in xml.attrib:
 553         result.append('<a name="%s"></a>' % xml.attrib['id'])
 554     result.append('<p class="%s">' % xml.tag)
 555     if xml.text:
 556         result.append(xml.text)
 557     convert_inner(ctx, xml, result)
 558     result.append('</p>')
 559     if xml.tail:
 560         result.append(xml.tail)
 561     return result
 562
 563
 564 def convert_phrase(ctx, xml):
 565     result = ['<span']
 566     if 'role' in xml.attrib:
 567         result.append(' class="%s">' % xml.attrib['role'])
 568     else:
 569         result.append('>')
 570     if xml.text:
 571         result.append(xml.text)
 572     convert_inner(ctx, xml, result)
 573     result.append('</span>')
 574     if xml.tail:
 575         result.append(xml.tail)
 576     return result
 577
 578
 579 def convert_primaryie(ctx, xml):
 580     result = ['<dt>\n']
 581     convert_inner(ctx, xml, result)
 582     result.append('\n</dt>\n<dd></dd>\n')
 583     return result
 584
 585
 586 def convert_pre(ctx, xml):
 587     result = ['<pre class="%s">\n' % xml.tag]
 588     if xml.text:
 589         result.append(xml.text)
 590     convert_inner(ctx, xml, result)
 591     result.append('</pre>')
 592     if xml.tail:
 593         result.append(xml.tail)
 594     return result
 595
 596
 597 def convert_programlisting(ctx, xml):
 598     result = []
 599     if xml.attrib.get('role', '') == 'example':
 600         if xml.text:
 601             lang = xml.attrib.get('language', 'c').lower()
 602             if lang not in LEXERS:
 603                 LEXERS[lang] = get_lexer_by_name(lang)
 604             lexer = LEXERS.get(lang, None)
 605             if lexer:
 606                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 607
 608                 # we do own line-numbering
 609                 line_count = highlighted.count('\n')
 610                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 611                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 612   <tbody>
 613     <tr>
 614       <td class="listing_lines" align="right"><pre>%s</pre></td>
 615       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 616     </tr>
 617   </tbody>
 618 </table>
 619 """ % (source_lines, highlighted))
 620             else:
 621                 logging.warn('No pygments lexer for language="%s"', lang)
 622                 result.append('<pre class="programlisting">')
 623                 result.append(xml.text)
 624                 result.append('</pre>')
 625     else:
 626         result.append('<pre class="programlisting">')
 627         if xml.text:
 628             result.append(xml.text)
 629         convert_inner(ctx, xml, result)
 630         result.append('</pre>')
 631     if xml.tail:
 632         result.append(xml.tail)
 633     return result
 634
 635
 636 def convert_refsect1(ctx, xml):
 637     # Add a divider between two consequitive refsect2
 638     def convert_inner(ctx, xml, result):
 639         prev = None
 640         for child in xml:
 641             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 642                 result.append('<hr>\n')
 643             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 644             prev = child
 645     return convert_refsect(ctx, xml, 'h2', convert_inner)
 646
 647
 648 def convert_refsect2(ctx, xml):
 649     return convert_refsect(ctx, xml, 'h3')
 650
 651
 652 def convert_refsect3(ctx, xml):
 653     return convert_refsect(ctx, xml, 'h4')
 654
 655
 656 def convert_row(ctx, xml):
 657     result = ['<tr>\n']
 658     convert_inner(ctx, xml, result)
 659     result.append('</tr>\n')
 660     return result
 661
 662
 663 def convert_simpara(ctx, xml):
 664     result = ['<p>']
 665     if xml.text:
 666         result.append(xml.text)
 667     result.append('</p>')
 668     if xml.tail:
 669         result.append(xml.tail)
 670     return result
 671
 672
 673 def convert_span(ctx, xml):
 674     result = ['<span class="%s">' % xml.tag]
 675     if xml.text:
 676         result.append(xml.text)
 677     convert_inner(ctx, xml, result)
 678     result.append('</span>')
 679     if xml.tail:
 680         result.append(xml.tail)
 681     return result
 682
 683
 684 def convert_tbody(ctx, xml):
 685     result = ['<tbody>']
 686     convert_inner(ctx, xml, result)
 687     result.append('</tbody>')
 688     # is in tgroup and there can be no 'text'
 689     return result
 690
 691
 692 def convert_tgroup(ctx, xml):
 693     # tgroup does not expand to anything, but the nested colspecs need to
 694     # be put into a colgroup
 695     cols = xml.findall('colspec')
 696     result = []
 697     if cols:
 698         result.append('<colgroup>\n')
 699         for col in cols:
 700             result.extend(convert_colspec(ctx, col))
 701             xml.remove(col)
 702         result.append('</colgroup>\n')
 703     convert_inner(ctx, xml, result)
 704     # is in informaltable and there can be no 'text'
 705     return result
 706
 707
 708 def convert_ulink(ctx, xml):
 709     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 710     if xml.tail:
 711         result.append(xml.tail)
 712     return result
 713
 714
 715 # TODO(ensonic): turn into class with converters as functions and ctx as self
 716 convert_tags = {
 717     'acronym': convert_acronym,
 718     'bookinfo': convert_bookinfo,
 719     'blockquote': convert_blockquote,
 720     'caption': convert_div,
 721     'colspec': convert_colspec,
 722     'corpauthor': convert_corpauthor,
 723     'emphasis': convert_span,
 724     'entry': convert_entry,
 725     'function': convert_span,
 726     'glossdef': convert_glossdef,
 727     'glossdiv': convert_glossdiv,
 728     'glossentry': convert_glossentry,
 729     'glossterm': convert_glossterm,
 730     'imageobject': convert_imageobject,
 731     'indexdiv': convert_indexdiv,
 732     'indexentry': convert_ignore,
 733     'indexterm': convert_skip,
 734     'informalexample': convert_div,
 735     'informaltable': convert_informaltable,
 736     'inlinemediaobject': convert_span,
 737     'itemizedlist': convert_itemizedlist,
 738     'legalnotice': convert_para_like,
 739     'link': convert_link,
 740     'listitem': convert_listitem,
 741     'literal': convert_literal,
 742     'mediaobject': convert_div,
 743     'note': convert_div,
 744     'orderedlist': convert_orderedlist,
 745     'para': convert_para,
 746     'parameter': convert_em_class,
 747     'phrase': convert_phrase,
 748     'primaryie': convert_primaryie,
 749     'programlisting': convert_programlisting,
 750     'releaseinfo': convert_para_like,
 751     'refsect1': convert_refsect1,
 752     'refsect2': convert_refsect2,
 753     'refsect3': convert_refsect3,
 754     'replaceable': convert_em_class,
 755     'returnvalue': convert_span,
 756     'row': convert_row,
 757     'screen': convert_pre,
 758     'simpara': convert_simpara,
 759     'structfield': convert_em_class,
 760     'tbody': convert_tbody,
 761     'tgroup': convert_tgroup,
 762     'type': convert_span,
 763     'ulink': convert_ulink,
 764     'warning': convert_div,
 765 }
 766
 767 # conversion helpers
 768
 769 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 770 <html>
 771 <head>
 772 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 773 <title>%s</title>
 774 %s<link rel="stylesheet" href="style.css" type="text/css">
 775 </head>
 776 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 777 """
 778
 779
 780 def generate_head_links(ctx):
 781     n = ctx['nav_home']
 782     result = [
 783         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 784     ]
 785     if 'nav_up' in ctx:
 786         n = ctx['nav_up']
 787         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 788     if 'nav_prev' in ctx:
 789         n = ctx['nav_prev']
 790         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 791     if 'nav_next' in ctx:
 792         n = ctx['nav_next']
 793         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 794     return ''.join(result)
 795
 796
 797 def generate_nav_links(ctx):
 798     n = ctx['nav_home']
 799     result = [
 800         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 801     ]
 802     if 'nav_up' in ctx:
 803         n = ctx['nav_up']
 804         result.append(
 805             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 806     else:
 807         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 808     if 'nav_prev' in ctx:
 809         n = ctx['nav_prev']
 810         result.append(
 811             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
 812     else:
 813         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
 814     if 'nav_next' in ctx:
 815         n = ctx['nav_next']
 816         result.append(
 817             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
 818     else:
 819         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
 820
 821     return ''.join(result)
 822
 823
 824 def generate_toc(ctx, node):
 825     result = []
 826     for c in node.children:
 827         # TODO: urlencode the filename: urllib.parse.quote_plus()
 828         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
 829             c.title_tag, c.filename, c.title))
 830         if c.subtitle:
 831             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
 832         result.append('</dt>\n')
 833         if c.children:
 834             result.append('<dd><dl>')
 835             result.extend(generate_toc(ctx, c))
 836             result.append('</dl></dd>')
 837     return result
 838
 839
 840 def generate_basic_nav(ctx):
 841     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 842   <tr valign="middle">
 843     <td width="100%%" align="left" class="shortcuts"></td>
 844     %s
 845   </tr>
 846 </table>
 847     """ % generate_nav_links(ctx)
 848
 849
 850 def generate_alpha_nav(ctx, divs, prefix):
 851     ix_nav = []
 852     for s in divs:
 853         title = xml_get_title(s)
 854         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
 855
 856     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 857   <tr valign="middle">
 858     <td width="100%%" align="left" class="shortcuts">
 859       <span id="nav_index">
 860         %s
 861       </span>
 862     </td>
 863     %s
 864   </tr>
 865 </table>
 866     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
 867
 868
 869 def generate_refentry_nav(ctx, refsect1s, result):
 870     result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 871   <tr valign="middle">
 872     <td width="100%%" align="left" class="shortcuts">
 873       <a href="#" class="shortcut">Top</a>""")
 874
 875     for s in refsect1s:
 876         # don't list TOC sections (role="xxx_proto")
 877         if s.attrib.get('role', '').endswith("_proto"):
 878             continue
 879         # skip section without 'id' attrs
 880         if 'id' not in s.attrib:
 881             continue
 882
 883         title = xml_get_title(s)
 884         result.append("""
 885           <span id="nav_description">
 886             <span class="dim">|</span>
 887             <a href="#%s" class="shortcut">%s</a>
 888           </span>""" % (s.attrib['id'], title))
 889     result.append("""
 890     </td>
 891     %s
 892   </tr>
 893 </table>
 894 """ % generate_nav_links(ctx))
 895
 896
 897 def get_id(node):
 898     xml = node.xml
 899     node_id = xml.attrib.get('id', None)
 900     if node_id:
 901         return node_id
 902
 903     logging.info('%d: No "id" attribute on "%s", generating one',
 904                  xml.sourceline, xml.tag)
 905     ix = []
 906     # Generate the 'id'. We need to walk up the xml-tree and check the positions
 907     # for each sibling.
 908     parent = xml.getparent()
 909     while parent is not None:
 910         children = parent.getchildren()
 911         ix.insert(0, str(children.index(xml) + 1))
 912         xml = parent
 913         parent = xml.getparent()
 914     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
 915     return 'id-1.' + '.'.join(ix)
 916
 917
 918 def convert_chunk_with_toc(ctx, div_class, title_tag):
 919     node = ctx['node']
 920     result = [
 921         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 922         generate_basic_nav(ctx),
 923         '<div class="%s">' % div_class,
 924     ]
 925     title = node.xml.find('title')
 926     if title is not None:
 927         result.append("""
 928 <div class="titlepage">
 929 <%s class="title"><a name="%s"></a>%s</%s>
 930 </div>""" % (
 931             title_tag, get_id(node), title.text, title_tag))
 932         node.xml.remove(title)
 933     convert_inner(ctx, node.xml, result)
 934     result.append("""<p>
 935   <b>Table of Contents</b>
 936 </p>
 937 <div class="toc">
 938   <dl class="toc">
 939 """)
 940     result.extend(generate_toc(ctx, node))
 941     result.append("""</dl>
 942 </div>
 943 </div>
 944 </body>
 945 </html>""")
 946     return result
 947
 948
 949 # docbook chunks
 950
 951
 952 def convert_book(ctx):
 953     node = ctx['node']
 954     result = [
 955         HTML_HEADER % (node.title, generate_head_links(ctx)),
 956         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
 957     <tr><th valign="middle"><p class="title">%s</p></th></tr>
 958 </table>
 959 <div class="book">
 960 """ % node.title
 961     ]
 962     bookinfo = node.xml.findall('bookinfo')[0]
 963     # we already used the title
 964     title = bookinfo.find('title')
 965     if title is not None:
 966         bookinfo.remove(title)
 967     result.extend(convert_bookinfo(ctx, bookinfo))
 968     result.append("""<div class="toc">
 969   <dl class="toc">
 970 """)
 971     result.extend(generate_toc(ctx, node.root))
 972     result.append("""</dl>
 973 </div>
 974 </div>
 975 </body>
 976 </html>""")
 977     return result
 978
 979
 980 def convert_chapter(ctx):
 981     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
 982
 983
 984 def convert_glossary(ctx):
 985     node = ctx['node']
 986     glossdivs = node.xml.findall('glossdiv')
 987
 988     result = [
 989         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 990         generate_alpha_nav(ctx, glossdivs, 'gls'),
 991         """<div class="index">
 992 <div class="titlepage"><h1 class="title">
 993 <a name="%s"></a>%s</h1>
 994 </div>""" % (get_id(node), node.title)
 995     ]
 996
 997     for i in glossdivs:
 998         result.extend(convert_glossdiv(ctx, i))
 999
1000     result.append("""</div>
1001 </body>
1002 </html>""")
1003     return result
1004
1005
1006 def convert_index(ctx):
1007     node = ctx['node']
1008     # Get all indexdivs under indexdiv
1009     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1010
1011     result = [
1012         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1013         generate_alpha_nav(ctx, indexdivs, 'idx'),
1014         """<div class="glossary">
1015 <div class="titlepage"><h2 class="title">
1016 <a name="%s"></a>%s</h2>
1017 </div>""" % (get_id(node), node.title)
1018     ]
1019     for i in indexdivs:
1020         result.extend(convert_indexdiv(ctx, i))
1021     result.append("""</div>
1022 </body>
1023 </html>""")
1024     return result
1025
1026
1027 def convert_part(ctx):
1028     return convert_chunk_with_toc(ctx, 'part', 'h1')
1029
1030
1031 def convert_preface(ctx):
1032     node = ctx['node']
1033     result = [
1034         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1035         generate_basic_nav(ctx),
1036         '<div class="preface">'
1037     ]
1038     title = node.xml.find('title')
1039     if title is not None:
1040         result.append("""
1041 <div class="titlepage">
1042 <h2 class="title"><a name="%s"></a>%s</h2>
1043 </div>""" % (get_id(node), title.text))
1044         node.xml.remove(title)
1045     convert_inner(ctx, node.xml, result)
1046     result.append("""</div>
1047 </body>
1048 </html>""")
1049     return result
1050
1051
1052 def convert_reference(ctx):
1053     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1054
1055
1056 def convert_refentry(ctx):
1057     node = ctx['node']
1058     node_id = get_id(node)
1059     refsect1s = node.xml.findall('refsect1')
1060
1061     result = [
1062         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1063     ]
1064     generate_refentry_nav(ctx, refsect1s, result)
1065     result.append("""
1066 <div class="refentry">
1067 <a name="%s"></a>
1068 <div class="refnamediv">
1069   <table width="100%%"><tr>
1070     <td valign="top">
1071       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1072       <p>%s — module for gtk-doc unit test</p>
1073     </td>
1074     <td class="gallery_image" valign="top" align="right"></td>
1075   </tr></table>
1076 </div>
1077 """ % (node_id, node_id, node.title, node.title))
1078
1079     for s in refsect1s:
1080         result.extend(convert_refsect1(ctx, s))
1081     result.append("""</div>
1082 </body>
1083 </html>""")
1084     return result
1085
1086
1087 # TODO(ensonic): turn into class with converters as functions and ctx as self
1088 convert_chunks = {
1089     'book': convert_book,
1090     'chapter': convert_chapter,
1091     'glossary': convert_glossary,
1092     'index': convert_index,
1093     'part': convert_part,
1094     'preface': convert_preface,
1095     'reference': convert_reference,
1096     'refentry': convert_refentry,
1097 }
1098
1099
1100 def generate_nav_nodes(files, node):
1101     nav = {
1102         'nav_home': node.root,
1103     }
1104     # nav params: up, prev, next
1105     if node.parent:
1106         nav['nav_up'] = node.parent
1107     ix = files.index(node)
1108     if ix > 0:
1109         nav['nav_prev'] = files[ix - 1]
1110     if ix < len(files) - 1:
1111         nav['nav_next'] = files[ix + 1]
1112     return nav
1113
1114
1115 def convert(out_dir, module, files, node):
1116     """Convert the docbook chunks to a html file.
1117
1118     Args:
1119       out_dir: already created output dir
1120       files: list of nodes in the tree in pre-order
1121       node: current tree node
1122     """
1123
1124     logging.info('Writing: %s', node.filename)
1125     with open(os.path.join(out_dir, node.filename), 'wt',
1126               newline='\n', encoding='utf-8') as html:
1127         ctx = {
1128             'module': module,
1129             'files': files,
1130             'node': node,
1131         }
1132         ctx.update(generate_nav_nodes(files, node))
1133
1134         if node.name in convert_chunks:
1135             for line in convert_chunks[node.name](ctx):
1136                 html.write(line)
1137         else:
1138             logging.warning('Add converter/template for "%s"', node.name)
1139
1140
1141 def create_devhelp2_toc(node):
1142     result = []
1143     for c in node.children:
1144         if c.children:
1145             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1146             result.extend(create_devhelp2_toc(c))
1147             result.append('</sub>\n')
1148         else:
1149             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1150     return result
1151
1152
1153 def create_devhelp2_condition_attribs(node):
1154     if 'condition' in node.attrib:
1155         # condition -> since, deprecated, ... (separated with '|')
1156         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1157         keywords = []
1158         for c in cond:
1159             if ':' in c:
1160                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1161             else:
1162                 # deprecated can have no description
1163                 keywords.append('{}="{}"'.format(c, ''))
1164         return ' ' + ' '.join(keywords)
1165     else:
1166         return ''
1167
1168
1169 def create_devhelp2_refsect2_keyword(node, base_link):
1170     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1171         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1172         create_devhelp2_condition_attribs(node))
1173
1174
1175 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1176     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1177         node.attrib['role'], title, base_link + name,
1178         create_devhelp2_condition_attribs(node))
1179
1180
1181 def create_devhelp2(out_dir, module, xml, files):
1182     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1183               newline='\n', encoding='utf-8') as idx:
1184         bookinfo_nodes = xml.xpath('/book/bookinfo')
1185         title = ''
1186         if bookinfo_nodes is not None:
1187             bookinfo = bookinfo_nodes[0]
1188             title = bookinfo.xpath('./title/text()')[0]
1189             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1190             # TODO: support author too (see devhelp2.xsl)
1191         # TODO: fixxref uses '--src-lang' to set the language
1192         result = [
1193             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1194 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1195   <chapters>
1196 """ % (title, module, online_url)
1197         ]
1198         # toc
1199         result.extend(create_devhelp2_toc(files[0].root))
1200         result.append("""  </chapters>
1201   <functions>
1202 """)
1203         # keywords from all refsect2 and refsect3
1204         refsect2 = etree.XPath('//refsect2[@role]')
1205         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1206         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1207         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1208         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1209         for node in files:
1210             base_link = node.filename + '#'
1211             refsect2_nodes = refsect2(node.xml)
1212             for refsect2_node in refsect2_nodes:
1213                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1214                 refsect3_nodes = refsect3_enum(refsect2_node)
1215                 for refsect3_node in refsect3_nodes:
1216                     details_node = refsect3_enum_details(refsect3_node)[0]
1217                     name = details_node.attrib['id']
1218                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1219                 refsect3_nodes = refsect3_struct(refsect2_node)
1220                 for refsect3_node in refsect3_nodes:
1221                     details_node = refsect3_struct_details(refsect3_node)[0]
1222                     name = details_node.attrib['id']
1223                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1224
1225         result.append("""  </functions>
1226 </book>
1227 """)
1228         for line in result:
1229             idx.write(line)
1230
1231
1232 def get_dirs(uninstalled):
1233     if uninstalled:
1234         # this does not work from buiddir!=srcdir
1235         gtkdocdir = os.path.split(sys.argv[0])[0]
1236         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1237             # try 'srcdir' (set from makefiles) too
1238             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1239                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1240         styledir = gtkdocdir + '/style'
1241     else:
1242         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1243         styledir = gtkdocdir
1244     return (gtkdocdir, styledir)
1245
1246
1247 def main(module, index_file, out_dir, uninstalled):
1248     tree = etree.parse(index_file)
1249     tree.xinclude()
1250
1251     (gtkdocdir, styledir) = get_dirs(uninstalled)
1252     # copy navigation images and stylesheets to html directory ...
1253     css_file = os.path.join(styledir, 'style.css')
1254     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1255         shutil.copy(f, out_dir)
1256     css_file = os.path.join(out_dir, 'style.css')
1257     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1258         css.write(HTML_FORMATTER.get_style_defs())
1259
1260     # TODO: migrate options from fixxref
1261     # TODO: do in parallel with loading the xml above.
1262     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1263
1264     # We do multiple passes:
1265     # 1) recursively walk the tree and chunk it into a python tree so that we
1266     #   can generate navigation and link tags.
1267     files = chunk(tree.getroot())
1268     files = list(PreOrderIter(files))
1269     # 2) extract tables:
1270     # TODO: use multiprocessing
1271     # - find all 'id' attribs and add them to the link map
1272     add_id_links(files, fixxref.Links)
1273     # - build glossary dict
1274     build_glossary(files)
1275
1276     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1277     create_devhelp2(out_dir, module, tree.getroot(), files)
1278     # 4) iterate the tree and output files
1279     # TODO: use multiprocessing
1280     for node in files:
1281         convert(out_dir, module, files, node)
1282
1283
1284 def run(options):
1285     logging.info('options: %s', str(options.__dict__))
1286     module = options.args[0]
1287     document = options.args[1]
1288
1289     # TODO: rename to 'html' later on
1290     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1291     #   outputs into the working directory
1292     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1293     try:
1294         os.mkdir(out_dir)
1295     except OSError as e:
1296         if e.errno != errno.EEXIST:
1297             raise
1298
1299     sys.exit(main(module, document, out_dir, options.uninstalled))