gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 TODO:
  33 - more chunk converters
  34 - more tag converters:
  35   - footnote: maybe track those in ctx and write them out at the end of the chunk
  36   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  37     attr on the <img> tag of the 'imageobject'
  38 - check each docbook tag if it can contain #PCDATA, if not don't check for
  39   xml.text
  40
  41 OPTIONAL:
  42 - minify html: https://pypi.python.org/pypi/htmlmin/
  43
  44 Requirements:
  45 sudo pip3 install anytree lxml pygments
  46
  47 Example invocation:
  48 cd tests/bugs/docs/
  49 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  50 xdg-open db2html/index.html
  51 meld html db2html
  52
  53 Benchmarking:
  54 cd tests/bugs/docs/;
  55 rm html-build.stamp; time make html-build.stamp
  56 """
  57
  58 import argparse
  59 import errno
  60 import logging
  61 import os
  62 import shutil
  63 import sys
  64
  65 from anytree import Node, PreOrderIter
  66 from copy import deepcopy
  67 from glob import glob
  68 from lxml import etree
  69 from pygments import highlight
  70 from pygments.lexers import CLexer
  71 from pygments.formatters import HtmlFormatter
  72
  73 from . import config, fixxref
  74
  75 # pygments setup
  76 # lazily constructed lexer cache
  77 LEXERS = {
  78     'c': CLexer()
  79 }
  80 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  81
  82 # http://www.sagehill.net/docbookxsl/Chunking.html
  83 CHUNK_TAGS = [
  84     'appendix',
  85     'article',
  86     'bibliography',  # in article or book
  87     'book',
  88     'chapter',
  89     'colophon',
  90     'glossary',      # in article or book
  91     'index',         # in article or book
  92     'part',
  93     'preface',
  94     'refentry',
  95     'reference',
  96     'sect1',         # except first
  97     'section',       # if equivalent to sect1
  98     'set',
  99     'setindex',
 100 ]
 101
 102
 103 class ChunkParams(object):
 104     def __init__(self, prefix, parent=None):
 105         self.prefix = prefix
 106         self.parent = None
 107         self.count = 0
 108
 109
 110 # TODO: look up the abbrevs and hierarchy for other tags
 111 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 112 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 113 #
 114 # If not defined, we can just create an example without an 'id' attr and see
 115 # docbook xsl does.
 116 CHUNK_PARAMS = {
 117     'appendix': ChunkParams('app', 'book'),
 118     'book': ChunkParams('bk'),
 119     'chapter': ChunkParams('ch', 'book'),
 120     'index': ChunkParams('ix', 'book'),
 121     'part': ChunkParams('pt', 'book'),
 122     'preface': ChunkParams('pr', 'book'),
 123     'sect1': ChunkParams('s', 'chapter'),
 124     'section': ChunkParams('s', 'chapter'),
 125 }
 126
 127 TITLE_XPATHS = {
 128     '_': (etree.XPath('./title'), None),
 129     'book': (etree.XPath('./bookinfo/title'), None),
 130     'refentry': (
 131         etree.XPath('./refmeta/refentrytitle'),
 132         etree.XPath('./refnamediv/refpurpose')
 133     ),
 134 }
 135
 136 ID_XPATH = etree.XPath('//@id')
 137
 138 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 139 glossary = {}
 140
 141
 142 def gen_chunk_name(node):
 143     if 'id' in node.attrib:
 144         return node.attrib['id']
 145
 146     tag = node.tag
 147     if tag not in CHUNK_PARAMS:
 148         CHUNK_PARAMS[tag] = ChunkParams(node.tag[:2])
 149         logging.warning('Add CHUNK_PARAMS for "%s"', tag)
 150
 151     naming = CHUNK_PARAMS[tag]
 152     naming.count += 1
 153     name = ('%s%02d' % (naming.prefix, naming.count))
 154     # handle parents to make names of nested tags unique
 155     # TODO: we only need to prepend the parent if there are > 1 of them in the
 156     #       xml
 157     # while naming.parent:
 158     #     parent = naming.parent
 159     #     if parent not in CHUNK_PARAMS:
 160     #         break;
 161     #     naming = CHUNK_PARAMS[parent]
 162     #     name = ('%s%02d' % (naming.prefix, naming.count)) + name
 163     return name
 164
 165
 166 def get_chunk_titles(node):
 167     tag = node.tag
 168     if tag not in TITLE_XPATHS:
 169         # Use defaults
 170         (title, subtitle) = TITLE_XPATHS['_']
 171     else:
 172         (title, subtitle) = TITLE_XPATHS[tag]
 173
 174     xml = title(node)[0]
 175     result = {
 176         'title': xml.text
 177     }
 178     if xml.tag != 'title':
 179         result['title_tag'] = xml.tag
 180     else:
 181         result['title_tag'] = tag
 182
 183     if subtitle:
 184         xml = subtitle(node)[0]
 185         result['subtitle'] = xml.text
 186         result['subtitle_tag'] = xml.tag
 187     else:
 188         result['subtitle'] = None
 189         result['subtitle_tag'] = None
 190     return result
 191
 192
 193 def chunk(xml_node, parent=None):
 194     """Chunk the tree.
 195
 196     The first time, we're called with parent=None and in that case we return
 197     the new_node as the root of the tree
 198     """
 199     if xml_node.tag in CHUNK_TAGS:
 200         if parent:
 201             # remove the xml-node from the parent
 202             sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 203             xml_node.getparent().remove(xml_node)
 204             xml_node = sub_tree
 205
 206         title_args = get_chunk_titles(xml_node)
 207         chunk_name = gen_chunk_name(xml_node)
 208         parent = Node(xml_node.tag, parent=parent, xml=xml_node,
 209                       filename=chunk_name + '.html', **title_args)
 210
 211     for child in xml_node:
 212         chunk(child, parent)
 213
 214     return parent
 215
 216
 217 def add_id_links(files, links):
 218     for node in files:
 219         chunk_name = node.filename[:-5]
 220         chunk_base = node.filename + '#'
 221         for attr in ID_XPATH(node.xml):
 222             if attr == chunk_name:
 223                 links[attr] = node.filename
 224             else:
 225                 links[attr] = chunk_base + attr
 226
 227
 228 def build_glossary(files):
 229     for node in files:
 230         if node.xml.tag != 'glossary':
 231             continue
 232         for term in GLOSSENTRY_XPATH(node.xml):
 233             # TODO: there can be all kind of things in a glossary. This only supports
 234             # what we commonly use
 235             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 236             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 237             glossary[key] = value
 238             # logging.debug('glosentry: %s:%s', key, value)
 239
 240
 241 # conversion helpers
 242
 243
 244 def convert_inner(ctx, xml, result):
 245     for child in xml:
 246         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 247
 248
 249 def convert_ignore(ctx, xml):
 250     result = []
 251     convert_inner(ctx, xml, result)
 252     return result
 253
 254
 255 def convert_skip(ctx, xml):
 256     return ['']
 257
 258
 259 missing_tags = {}
 260
 261
 262 def convert__unknown(ctx, xml):
 263     # don't recurse on subchunks
 264     if xml.tag in CHUNK_TAGS:
 265         return []
 266     # warn only once
 267     if xml.tag not in missing_tags:
 268         logging.warning('Add tag converter for "%s"', xml.tag)
 269         missing_tags[xml.tag] = True
 270     result = ['<!-- ' + xml.tag + '-->\n']
 271     convert_inner(ctx, xml, result)
 272     result.append('<!-- /' + xml.tag + '-->\n')
 273     return result
 274
 275
 276 def convert_refsect(ctx, xml, h_tag, inner_func=convert_inner):
 277     result = ['<div class="%s">\n' % xml.tag]
 278     title = xml.find('title')
 279     if title is not None:
 280         if 'id' in xml.attrib:
 281             result.append('<a name="%s"></a>' % xml.attrib['id'])
 282         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 283         xml.remove(title)
 284     if xml.text:
 285         result.append(xml.text)
 286     inner_func(ctx, xml, result)
 287     result.append('</div>')
 288     if xml.tail:
 289         result.append(xml.tail)
 290     return result
 291
 292
 293 def xml_get_title(xml):
 294     title = xml.find('title')
 295     if title is not None:
 296         return title.text
 297     else:
 298         # TODO(ensonic): any way to get the file (inlcudes) too?
 299         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 300         return ''
 301
 302
 303 # docbook tags
 304
 305
 306 def convert_acronym(ctx, xml):
 307     key = xml.text
 308     title = glossary.get(key, '')
 309     # TODO: print a sensible warning if missing
 310     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 311     if xml.tail:
 312         result.append(xml.tail)
 313     return result
 314
 315
 316 def convert_bookinfo(ctx, xml):
 317     result = ['<div class="titlepage">']
 318     convert_inner(ctx, xml, result)
 319     result.append("""<hr>
 320 </div>""")
 321     if xml.tail:
 322         result.append(xml.tail)
 323     return result
 324
 325
 326 def convert_blockquote(ctx, xml):
 327     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 328     if xml.text:
 329         result.append(xml.text)
 330     convert_inner(ctx, xml, result)
 331     result.append('</blockquote>\n</div>')
 332     if xml.tail:
 333         result.append(xml.tail)
 334     return result
 335
 336
 337 def convert_colspec(ctx, xml):
 338     result = ['<col']
 339     a = xml.attrib
 340     if 'colname' in a:
 341         result.append(' class="%s"' % a['colname'])
 342     if 'colwidth' in a:
 343         result.append(' width="%s"' % a['colwidth'])
 344     result.append('>\n')
 345     # is in tgroup and there can be no 'text'
 346     return result
 347
 348
 349 def convert_corpauthor(ctx, xml):
 350     result = ['<div><h3 class="corpauthor">\n']
 351     if xml.text:
 352         result.append(xml.text)
 353     convert_inner(ctx, xml, result)
 354     result.append('</h3></div>\n')
 355     if xml.tail:
 356         result.append(xml.tail)
 357     return result
 358
 359
 360 def convert_div(ctx, xml):
 361     result = ['<div class="%s">\n' % xml.tag]
 362     if xml.text:
 363         result.append(xml.text)
 364     convert_inner(ctx, xml, result)
 365     result.append('</div>')
 366     if xml.tail:
 367         result.append(xml.tail)
 368     return result
 369
 370
 371 def convert_em_class(ctx, xml):
 372     result = ['<em class="%s"><code>' % xml.tag]
 373     if xml.text:
 374         result.append(xml.text)
 375     convert_inner(ctx, xml, result)
 376     result.append('</code></em>')
 377     if xml.tail:
 378         result.append(xml.tail)
 379     return result
 380
 381
 382 def convert_entry(ctx, xml):
 383     result = ['<td']
 384     if 'role' in xml.attrib:
 385         result.append(' class="%s">' % xml.attrib['role'])
 386     else:
 387         result.append('>')
 388     if xml.text:
 389         result.append(xml.text)
 390     convert_inner(ctx, xml, result)
 391     result.append('</td>')
 392     if xml.tail:
 393         result.append(xml.tail)
 394     return result
 395
 396
 397 def convert_imageobject(ctx, xml):
 398     imagedata = xml.find('imagedata')
 399     if imagedata is not None:
 400         # TODO(ensonic): warn on missing fileref attr?
 401         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 402     else:
 403         return []
 404
 405
 406 def convert_indexdiv(ctx, xml):
 407     title_tag = xml.find('title')
 408     title = title_tag.text
 409     xml.remove(title_tag)
 410     result = [
 411         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 412     ]
 413     convert_inner(ctx, xml, result)
 414     return result
 415
 416
 417 def convert_informaltable(ctx, xml):
 418     result = ['<div class="informaltable"><table class="informaltable"']
 419     a = xml.attrib
 420     if 'pgwide' in a and a['pgwide'] == '1':
 421         result.append(' width="100%"')
 422     if 'frame' in a and a['frame'] == 'none':
 423         result.append(' border="0"')
 424     result.append('>\n')
 425     convert_inner(ctx, xml, result)
 426     result.append('</table></div>')
 427     if xml.tail:
 428         result.append(xml.tail)
 429     return result
 430
 431
 432 def convert_itemizedlist(ctx, xml):
 433     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 434     convert_inner(ctx, xml, result)
 435     result.append('</ul></div>')
 436     if xml.tail:
 437         result.append(xml.tail)
 438     return result
 439
 440
 441 def convert_link(ctx, xml):
 442     linkend = xml.attrib['linkend']
 443     if linkend in fixxref.NoLinks:
 444         linkend = None
 445     result = []
 446     if linkend:
 447         link_text = []
 448         convert_inner(ctx, xml, link_text)
 449         if xml.text:
 450             link_text.append(xml.text)
 451         # TODO: fixxref does some weird checks in xml.text
 452         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 453     if xml.tail:
 454         result.append(xml.tail)
 455     return result
 456
 457
 458 def convert_listitem(ctx, xml):
 459     result = ['<li class="listitem">']
 460     convert_inner(ctx, xml, result)
 461     result.append('</li>')
 462     # is in itemizedlist and there can be no 'text'
 463     return result
 464
 465
 466 def convert_literal(ctx, xml):
 467     result = ['<code class="%s">' % xml.tag]
 468     if xml.text:
 469         result.append(xml.text)
 470     convert_inner(ctx, xml, result)
 471     result.append('</code>')
 472     if xml.tail:
 473         result.append(xml.tail)
 474     return result
 475
 476
 477 def convert_orderedlist(ctx, xml):
 478     result = ['<div class="orderedlistlist"><ol class="orderedlistlist" type="1">']
 479     convert_inner(ctx, xml, result)
 480     result.append('</ol></div>')
 481     if xml.tail:
 482         result.append(xml.tail)
 483     return result
 484
 485
 486 def convert_para(ctx, xml):
 487     result = []
 488     if 'id' in xml.attrib:
 489         result.append('<a name="%s"></a>' % xml.attrib['id'])
 490     result.append('<p>')
 491     if xml.text:
 492         result.append(xml.text)
 493     convert_inner(ctx, xml, result)
 494     result.append('</p>')
 495     if xml.tail:
 496         result.append(xml.tail)
 497     return result
 498
 499
 500 def convert_para_like(ctx, xml):
 501     result = []
 502     if 'id' in xml.attrib:
 503         result.append('<a name="%s"></a>' % xml.attrib['id'])
 504     result.append('<p class="%s">' % xml.tag)
 505     if xml.text:
 506         result.append(xml.text)
 507     convert_inner(ctx, xml, result)
 508     result.append('</p>')
 509     if xml.tail:
 510         result.append(xml.tail)
 511     return result
 512
 513
 514 def convert_phrase(ctx, xml):
 515     result = ['<span']
 516     if 'role' in xml.attrib:
 517         result.append(' class="%s">' % xml.attrib['role'])
 518     else:
 519         result.append('>')
 520     if xml.text:
 521         result.append(xml.text)
 522     convert_inner(ctx, xml, result)
 523     result.append('</span>')
 524     if xml.tail:
 525         result.append(xml.tail)
 526     return result
 527
 528
 529 def convert_primaryie(ctx, xml):
 530     result = ['<dt>\n']
 531     convert_inner(ctx, xml, result)
 532     result.append('\n</dt>\n<dd></dd>\n')
 533     return result
 534
 535
 536 def convert_pre(ctx, xml):
 537     result = ['<pre class="%s">\n' % xml.tag]
 538     if xml.text:
 539         result.append(xml.text)
 540     convert_inner(ctx, xml, result)
 541     result.append('</pre>')
 542     if xml.tail:
 543         result.append(xml.tail)
 544     return result
 545
 546
 547 def convert_programlisting(ctx, xml):
 548     result = []
 549     if xml.attrib.get('role', '') == 'example':
 550         if xml.text:
 551             lang = xml.attrib.get('language', 'c').lower()
 552             if lang not in LEXERS:
 553                 LEXERS[lang] = get_lexer_by_name(lang)
 554             lexer = LEXERS.get(lang, None)
 555             if lexer:
 556                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 557
 558                 # we do own line-numbering
 559                 line_count = highlighted.count('\n')
 560                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 561                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 562   <tbody>
 563     <tr>
 564       <td class="listing_lines" align="right"><pre>%s</pre></td>
 565       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 566     </tr>
 567   </tbody>
 568 </table>
 569 """ % (source_lines, highlighted))
 570             else:
 571                 logging.warn('No pygments lexer for language="%s"', lang)
 572                 result.append('<pre class="programlisting">')
 573                 result.append(xml.text)
 574                 result.append('</pre>')
 575     else:
 576         result.append('<pre class="programlisting">')
 577         if xml.text:
 578             result.append(xml.text)
 579         convert_inner(ctx, xml, result)
 580         result.append('</pre>')
 581     if xml.tail:
 582         result.append(xml.tail)
 583     return result
 584
 585
 586 def convert_refsect1(ctx, xml):
 587     # Add a divider between two consequitive refsect2
 588     def convert_inner(ctx, xml, result):
 589         prev = None
 590         for child in xml:
 591             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 592                 result.append('<hr>\n')
 593             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 594             prev = child
 595     return convert_refsect(ctx, xml, 'h2', convert_inner)
 596
 597
 598 def convert_refsect2(ctx, xml):
 599     return convert_refsect(ctx, xml, 'h3')
 600
 601
 602 def convert_refsect3(ctx, xml):
 603     return convert_refsect(ctx, xml, 'h4')
 604
 605
 606 def convert_row(ctx, xml):
 607     result = ['<tr>\n']
 608     convert_inner(ctx, xml, result)
 609     result.append('</tr>\n')
 610     return result
 611
 612
 613 def convert_simpara(ctx, xml):
 614     result = ['<p>']
 615     if xml.text:
 616         result.append(xml.text)
 617     result.append('</p>')
 618     if xml.tail:
 619         result.append(xml.tail)
 620     return result
 621
 622
 623 def convert_span(ctx, xml):
 624     result = ['<span class="%s">' % xml.tag]
 625     if xml.text:
 626         result.append(xml.text)
 627     convert_inner(ctx, xml, result)
 628     result.append('</span>')
 629     if xml.tail:
 630         result.append(xml.tail)
 631     return result
 632
 633
 634 def convert_tbody(ctx, xml):
 635     result = ['<tbody>']
 636     convert_inner(ctx, xml, result)
 637     result.append('</tbody>')
 638     # is in tgroup and there can be no 'text'
 639     return result
 640
 641
 642 def convert_tgroup(ctx, xml):
 643     # tgroup does not expand to anything, but the nested colspecs need to
 644     # be put into a colgroup
 645     cols = xml.findall('colspec')
 646     result = []
 647     if cols:
 648         result.append('<colgroup>\n')
 649         for col in cols:
 650             result.extend(convert_colspec(ctx, col))
 651             xml.remove(col)
 652         result.append('</colgroup>\n')
 653     convert_inner(ctx, xml, result)
 654     # is in informaltable and there can be no 'text'
 655     return result
 656
 657
 658 def convert_ulink(ctx, xml):
 659     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 660     if xml.tail:
 661         result.append(xml.tail)
 662     return result
 663
 664
 665 # TODO(ensonic): turn into class with converters as functions and ctx as self
 666 convert_tags = {
 667     'acronym': convert_acronym,
 668     'bookinfo': convert_bookinfo,
 669     'blockquote': convert_blockquote,
 670     'caption': convert_div,
 671     'colspec': convert_colspec,
 672     'corpauthor': convert_corpauthor,
 673     'emphasis': convert_span,
 674     'entry': convert_entry,
 675     'function': convert_span,
 676     'imageobject': convert_imageobject,
 677     'indexdiv': convert_indexdiv,
 678     'indexentry': convert_ignore,
 679     'indexterm': convert_skip,
 680     'informalexample': convert_div,
 681     'informaltable': convert_informaltable,
 682     'inlinemediaobject': convert_span,
 683     'itemizedlist': convert_itemizedlist,
 684     'legalnotice': convert_para_like,
 685     'link': convert_link,
 686     'listitem': convert_listitem,
 687     'literal': convert_literal,
 688     'mediaobject': convert_div,
 689     'note': convert_div,
 690     'orderedlist': convert_orderedlist,
 691     'para': convert_para,
 692     'parameter': convert_em_class,
 693     'phrase': convert_phrase,
 694     'primaryie': convert_primaryie,
 695     'programlisting': convert_programlisting,
 696     'releaseinfo': convert_para_like,
 697     'refsect1': convert_refsect1,
 698     'refsect2': convert_refsect2,
 699     'refsect3': convert_refsect3,
 700     'replaceable': convert_em_class,
 701     'returnvalue': convert_span,
 702     'row': convert_row,
 703     'screen': convert_pre,
 704     'simpara': convert_simpara,
 705     'structfield': convert_em_class,
 706     'tbody': convert_tbody,
 707     'tgroup': convert_tgroup,
 708     'type': convert_span,
 709     'ulink': convert_ulink,
 710     'warning': convert_div,
 711 }
 712
 713 # conversion helpers
 714
 715 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 716 <html>
 717 <head>
 718 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
 719 <title>%s</title>
 720 %s<link rel="stylesheet" href="style.css" type="text/css">
 721 </head>
 722 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
 723 """
 724
 725
 726 def generate_head_links(ctx):
 727     n = ctx['nav_home']
 728     result = [
 729         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
 730     ]
 731     if 'nav_up' in ctx:
 732         n = ctx['nav_up']
 733         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
 734     if 'nav_prev' in ctx:
 735         n = ctx['nav_prev']
 736         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
 737     if 'nav_next' in ctx:
 738         n = ctx['nav_next']
 739         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
 740     return ''.join(result)
 741
 742
 743 def generate_nav_links(ctx):
 744     n = ctx['nav_home']
 745     result = [
 746         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
 747     ]
 748     if 'nav_up' in ctx:
 749         n = ctx['nav_up']
 750         result.append(
 751             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
 752     else:
 753         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
 754     if 'nav_prev' in ctx:
 755         n = ctx['nav_prev']
 756         result.append(
 757             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
 758     else:
 759         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
 760     if 'nav_next' in ctx:
 761         n = ctx['nav_next']
 762         result.append(
 763             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
 764     else:
 765         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
 766
 767     return ''.join(result)
 768
 769
 770 def generate_toc(ctx, node):
 771     result = []
 772     for c in node.children:
 773         # TODO: urlencode the filename: urllib.parse.quote_plus()
 774         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
 775             c.title_tag, c.filename, c.title))
 776         if c.subtitle:
 777             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
 778         result.append('</dt>\n')
 779         if c.children:
 780             result.append('<dd><dl>')
 781             result.extend(generate_toc(ctx, c))
 782             result.append('</dl></dd>')
 783     return result
 784
 785
 786 def generate_basic_nav(ctx):
 787     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 788   <tr valign="middle">
 789     <td width="100%%" align="left" class="shortcuts"></td>
 790     %s
 791   </tr>
 792 </table>
 793     """ % generate_nav_links(ctx)
 794
 795
 796 def generate_index_nav(ctx, indexdivs):
 797     ix_nav = []
 798     for s in indexdivs:
 799         title = xml_get_title(s)
 800         ix_nav.append('<a class="shortcut" href="#idx%s">%s</a>' % (title, title))
 801
 802     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 803   <tr valign="middle">
 804     <td width="100%%" align="left" class="shortcuts">
 805       <span id="nav_index">
 806         %s
 807       </span>
 808     </td>
 809     %s
 810   </tr>
 811 </table>
 812     """ % ('\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
 813
 814
 815 def generate_refentry_nav(ctx, refsect1s, result):
 816     result.append("""<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
 817   <tr valign="middle">
 818     <td width="100%%" align="left" class="shortcuts">
 819       <a href="#" class="shortcut">Top</a>""")
 820
 821     for s in refsect1s:
 822         # don't list TOC sections (role="xxx_proto")
 823         if s.attrib.get('role', '').endswith("_proto"):
 824             continue
 825
 826         title = xml_get_title(s)
 827         result.append("""
 828           <span id="nav_description">
 829             <span class="dim">|</span>
 830             <a href="#%s" class="shortcut">%s</a>
 831           </span>""" % (s.attrib['id'], title))
 832     result.append("""
 833     </td>
 834     %s
 835   </tr>
 836 </table>
 837 """ % generate_nav_links(ctx))
 838
 839
 840 def get_id(node):
 841     xml = node.xml
 842     node_id = xml.attrib.get('id', None)
 843     if node_id:
 844         return node_id
 845
 846     logging.warning('%d: No "id" attribute on "%s"', xml.sourceline, xml.tag)
 847     ix = []
 848     # Generate the 'id'. We need to walk up the xml-tree and check the positions
 849     # for each sibling.
 850     parent = xml.getparent()
 851     while parent is not None:
 852         children = parent.getchildren()
 853         ix.insert(0, str(children.index(xml) + 1))
 854         xml = parent
 855         parent = xml.getparent()
 856     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
 857     return 'id-1.' + '.'.join(ix)
 858
 859
 860 def convert_chunk_with_toc(ctx, div_class, title_tag):
 861     node = ctx['node']
 862     result = [
 863         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 864         generate_basic_nav(ctx),
 865         '<div class="%s">' % div_class,
 866     ]
 867     title = node.xml.find('title')
 868     if title is not None:
 869         result.append("""
 870 <div class="titlepage">
 871 <%s class="title"><a name="%s"></a>%s</%s>
 872 </div>""" % (
 873             title_tag, get_id(node), title.text, title_tag))
 874         node.xml.remove(title)
 875     convert_inner(ctx, node.xml, result)
 876     result.append("""<p>
 877   <b>Table of Contents</b>
 878 </p>
 879 <div class="toc">
 880   <dl class="toc">
 881 """)
 882     result.extend(generate_toc(ctx, node))
 883     result.append("""</dl>
 884 </div>
 885 </div>
 886 </body>
 887 </html>""")
 888     return result
 889
 890
 891 # docbook chunks
 892
 893
 894 def convert_book(ctx):
 895     node = ctx['node']
 896     result = [
 897         HTML_HEADER % (node.title, generate_head_links(ctx)),
 898         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
 899     <tr><th valign="middle"><p class="title">%s</p></th></tr>
 900 </table>
 901 <div class="book">
 902 """ % node.title
 903     ]
 904     bookinfo = node.xml.findall('bookinfo')[0]
 905     # we already used the title
 906     title = bookinfo.find('title')
 907     if title is not None:
 908         bookinfo.remove(title)
 909     result.extend(convert_bookinfo(ctx, bookinfo))
 910     result.append("""<div class="toc">
 911   <dl class="toc">
 912 """)
 913     result.extend(generate_toc(ctx, node.root))
 914     result.append("""</dl>
 915 </div>
 916 </div>
 917 </body>
 918 </html>""")
 919     return result
 920
 921
 922 def convert_chapter(ctx):
 923     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
 924
 925
 926 def convert_index(ctx):
 927     node = ctx['node']
 928     node_id = get_id(node)
 929     # Get all indexdivs under indexdiv
 930     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
 931
 932     result = [
 933         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 934         generate_index_nav(ctx, indexdivs),
 935         """<div class="index">
 936 <div class="titlepage"><h1 class="title">
 937 <a name="%s"></a>%s</h1>
 938 </div>""" % (node_id, node.title)
 939     ]
 940     for i in indexdivs:
 941         result.extend(convert_indexdiv(ctx, i))
 942     result.append("""</div>
 943 </body>
 944 </html>""")
 945     return result
 946
 947
 948 def convert_part(ctx):
 949     return convert_chunk_with_toc(ctx, 'part', 'h1')
 950
 951
 952 def convert_preface(ctx):
 953     node = ctx['node']
 954     result = [
 955         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
 956         generate_basic_nav(ctx),
 957         '<div class="preface">'
 958     ]
 959     title = node.xml.find('title')
 960     if title is not None:
 961         result.append("""
 962 <div class="titlepage">
 963 <h2 class="title"><a name="%s"></a>%s</h2>
 964 </div>""" % (get_id(node), title.text))
 965         node.xml.remove(title)
 966     convert_inner(ctx, node.xml, result)
 967     result.append("""</div>
 968 </body>
 969 </html>""")
 970     return result
 971
 972
 973 def convert_reference(ctx):
 974     return convert_chunk_with_toc(ctx, 'reference', 'h1')
 975
 976
 977 def convert_refentry(ctx):
 978     node = ctx['node']
 979     node_id = get_id(node)
 980     refsect1s = node.xml.findall('refsect1')
 981
 982     result = [
 983         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
 984     ]
 985     generate_refentry_nav(ctx, refsect1s, result)
 986     result.append("""
 987 <div class="refentry">
 988 <a name="%s"></a>
 989 <div class="refnamediv">
 990   <table width="100%%"><tr>
 991     <td valign="top">
 992       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
 993       <p>%s — module for gtk-doc unit test</p>
 994     </td>
 995     <td class="gallery_image" valign="top" align="right"></td>
 996   </tr></table>
 997 </div>
 998 """ % (node_id, node_id, node.title, node.title))
 999
1000     for s in refsect1s:
1001         result.extend(convert_refsect1(ctx, s))
1002     result.append("""</div>
1003 </body>
1004 </html>""")
1005     return result
1006
1007
1008 # TODO(ensonic): turn into class with converters as functions and ctx as self
1009 convert_chunks = {
1010     'book': convert_book,
1011     'chapter': convert_chapter,
1012     'index': convert_index,
1013     'part': convert_part,
1014     'preface': convert_preface,
1015     'reference': convert_reference,
1016     'refentry': convert_refentry,
1017 }
1018
1019
1020 def generate_nav_nodes(files, node):
1021     nav = {
1022         'nav_home': node.root,
1023     }
1024     # nav params: up, prev, next
1025     if node.parent:
1026         nav['nav_up'] = node.parent
1027     ix = files.index(node)
1028     if ix > 0:
1029         nav['nav_prev'] = files[ix - 1]
1030     if ix < len(files) - 1:
1031         nav['nav_next'] = files[ix + 1]
1032     return nav
1033
1034
1035 def convert(out_dir, module, files, node):
1036     """Convert the docbook chunks to a html file.
1037
1038     Args:
1039       out_dir: already created output dir
1040       files: list of nodes in the tree in pre-order
1041       node: current tree node
1042     """
1043
1044     logging.info('Writing: %s', node.filename)
1045     with open(os.path.join(out_dir, node.filename), 'wt') as html:
1046         ctx = {
1047             'module': module,
1048             'files': files,
1049             'node': node,
1050         }
1051         ctx.update(generate_nav_nodes(files, node))
1052
1053         if node.name in convert_chunks:
1054             for line in convert_chunks[node.name](ctx):
1055                 html.write(line)
1056         else:
1057             logging.warning('Add converter/template for "%s"', node.name)
1058
1059
1060 def create_devhelp2_toc(node):
1061     result = []
1062     for c in node.children:
1063         if c.children:
1064             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1065             result.extend(create_devhelp2_toc(c))
1066             result.append('</sub>\n')
1067         else:
1068             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1069     return result
1070
1071
1072 def create_devhelp2_condition_attribs(node):
1073     if 'condition' in node.attrib:
1074         # condition -> since, deprecated, ... (separated with '|')
1075         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1076         return' ' + ' '.join(['%s="%s"' % tuple(c.split(':', 1)) for c in cond])
1077     else:
1078         return ''
1079
1080
1081 def create_devhelp2_refsect2_keyword(node, base_link):
1082     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1083         node.attrib['role'], xml_get_title(node), base_link + node.attrib['id'],
1084         create_devhelp2_condition_attribs(node))
1085
1086
1087 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1088     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1089         node.attrib['role'], title, base_link + name,
1090         create_devhelp2_condition_attribs(node))
1091
1092
1093 def create_devhelp2(out_dir, module, xml, files):
1094     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt') as idx:
1095         bookinfo_nodes = xml.xpath('/book/bookinfo')
1096         title = ''
1097         if bookinfo_nodes is not None:
1098             bookinfo = bookinfo_nodes[0]
1099             title = bookinfo.xpath('./title/text()')[0]
1100             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1101             # TODO: support author too (see devhelp2.xsl)
1102         # TODO: fixxref uses '--src-lang' to set the language
1103         result = [
1104             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1105 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1106   <chapters>
1107 """ % (title, module, online_url)
1108         ]
1109         # toc
1110         result.extend(create_devhelp2_toc(files[0].root))
1111         result.append("""  </chapters>
1112   <functions>
1113 """)
1114         # keywords from all refsect2 and refsect3
1115         refsect2 = etree.XPath('//refsect2[@role]')
1116         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1117         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1118         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1119         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1120         for node in files:
1121             base_link = node.filename + '#'
1122             refsect2_nodes = refsect2(node.xml)
1123             for refsect2_node in refsect2_nodes:
1124                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1125                 refsect3_nodes = refsect3_enum(refsect2_node)
1126                 for refsect3_node in refsect3_nodes:
1127                     details_node = refsect3_enum_details(refsect3_node)[0]
1128                     name = details_node.attrib['id']
1129                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1130                 refsect3_nodes = refsect3_struct(refsect2_node)
1131                 for refsect3_node in refsect3_nodes:
1132                     details_node = refsect3_struct_details(refsect3_node)[0]
1133                     name = details_node.attrib['id']
1134                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1135
1136         result.append("""  </functions>
1137 </book>
1138 """)
1139         for line in result:
1140             idx.write(line)
1141
1142
1143 def get_dirs(uninstalled):
1144     if uninstalled:
1145         # this does not work from buiddir!=srcdir
1146         gtkdocdir = os.path.split(sys.argv[0])[0]
1147         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1148             # try 'srcdir' (set from makefiles) too
1149             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1150                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1151         styledir = gtkdocdir + '/style'
1152     else:
1153         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1154         styledir = gtkdocdir
1155     return (gtkdocdir, styledir)
1156
1157
1158 def main(module, index_file, out_dir, uninstalled):
1159     tree = etree.parse(index_file)
1160     tree.xinclude()
1161
1162     (gtkdocdir, styledir) = get_dirs(uninstalled)
1163     # copy navigation images and stylesheets to html directory ...
1164     css_file = os.path.join(styledir, 'style.css')
1165     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1166         shutil.copy(f, out_dir)
1167     css_file = os.path.join(out_dir, 'style.css')
1168     with open(css_file, 'at') as css:
1169         css.write(HTML_FORMATTER.get_style_defs())
1170
1171     # TODO: migrate options from fixxref
1172     # TODO: do in parallel with loading the xml above.
1173     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1174
1175     # We do multiple passes:
1176     # 1) recursively walk the tree and chunk it into a python tree so that we
1177     #   can generate navigation and link tags.
1178     files = chunk(tree.getroot())
1179     files = list(PreOrderIter(files))
1180     # 2) extract tables:
1181     # TODO: use multiprocessing
1182     # - find all 'id' attribs and add them to the link map
1183     add_id_links(files, fixxref.Links)
1184     # - build glossary dict
1185     build_glossary(files)
1186
1187     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1188     create_devhelp2(out_dir, module, tree.getroot(), files)
1189     # 4) iterate the tree and output files
1190     # TODO: use multiprocessing
1191     for node in files:
1192         convert(out_dir, module, files, node)
1193
1194
1195 def run(options):
1196     logging.info('options: %s', str(options.__dict__))
1197     module = options.args[0]
1198     document = options.args[1]
1199
1200     # TODO: rename to 'html' later on
1201     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1202     #   outputs into the working directory
1203     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1204     try:
1205         os.mkdir(out_dir)
1206     except OSError as e:
1207         if e.errno != errno.EEXIST:
1208             raise
1209
1210     sys.exit(main(module, document, out_dir, options.uninstalled))