gtkdoc/mkhtml2.py

   1 #!/usr/bin/env python3
   2 # -*- python; coding: utf-8 -*-
   3 #
   4 # gtk-doc - GTK DocBook documentation generator.
   5 # Copyright (C) 2018  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """Generate html from docbook
  23
  24 The tool loads the main xml document (<module>-docs.xml) and chunks it
  25 like the xsl-stylesheets would do. For that it resolves all the xml-includes.
  26 Each chunk is converted to html using python functions.
  27
  28 In contrast to our previous approach of running gtkdoc-mkhtml + gtkdoc-fixxref,
  29 this tools will replace both without relying on external tools such as xsltproc
  30 and source-highlight.
  31
  32 Please note, that we're not aiming for complete docbook-xml support. All tags
  33 used in the generated xml are of course handled. More tags used in handwritten
  34 xml can be easilly supported, but for some combinations of tags we prefer
  35 simplicity.
  36
  37 TODO:
  38 - tag converters:
  39   - inside 'footnote' one can have many tags, we only handle 'para'/'simpara'
  40   - inside 'inlinemediaobject'/'mediaobject' a 'textobject' becomes the 'alt'
  41     attr on the <img> tag of the 'imageobject'
  42   - handle 'label' attributes on part/chapter/section-types
  43     - the titles will have a generated prefix, such as 'Part I:'
  44     - in the toc it would only be only the label: 'I.'
  45   - 'linkend' seems to add a 'title' attr to 'a' if the targe has a title.
  46 - check each docbook tag if it can contain #PCDATA, if not don't check for
  47   xml.text
  48 - consider some perf-warnings flag
  49   - see 'No "id" attribute on'
  50
  51 OPTIONAL:
  52 - minify html: https://pypi.python.org/pypi/htmlmin/
  53
  54 Requirements:
  55 sudo pip3 install anytree lxml pygments
  56
  57 Example invocation:
  58 cd tests/bugs/docs/
  59 ../../../gtkdoc-mkhtml2 tester tester-docs.xml
  60 xdg-open db2html/index.html
  61 meld html db2html
  62
  63 Benchmarking:
  64 cd tests/bugs/docs/;
  65 rm html-build.stamp; time make html-build.stamp
  66 """
  67
  68 import argparse
  69 import errno
  70 import logging
  71 import os
  72 import shutil
  73 import sys
  74
  75 from anytree import Node, PreOrderIter
  76 from copy import deepcopy
  77 from glob import glob
  78 from lxml import etree
  79 from pygments import highlight
  80 from pygments.lexers import CLexer
  81 from pygments.formatters import HtmlFormatter
  82
  83 from . import config, fixxref
  84
  85 # pygments setup
  86 # lazily constructed lexer cache
  87 LEXERS = {
  88     'c': CLexer()
  89 }
  90 HTML_FORMATTER = HtmlFormatter(nowrap=True)
  91
  92
  93 class ChunkParams(object):
  94     def __init__(self, prefix, parent=None, min_idx=0):
  95         self.prefix = prefix
  96         self.parent = parent
  97         self.min_idx = min_idx
  98         self.idx = 1
  99
 100
 101 DONT_CHUNK = float('inf')
 102 # docbook-xsl defines the chunk tags here.
 103 # http://www.sagehill.net/docbookxsl/Chunking.html#GeneratedFilenames
 104 # https://github.com/oreillymedia/HTMLBook/blob/master/htmlbook-xsl/chunk.xsl#L33
 105 # If not defined, we can just create an example without an 'id' attr and see
 106 # docbook xsl does.
 107 #
 108 # For toc levels see http://www.sagehill.net/docbookxsl/TOCcontrol.html
 109 # TODO: this list has also a flag that controls wheter we add the
 110 # 'Table of Contents' heading in convert_chunk_with_toc()
 111 CHUNK_PARAMS = {
 112     'appendix': ChunkParams('app', 'book'),
 113     'book': ChunkParams('bk'),
 114     'chapter': ChunkParams('ch', 'book'),
 115     'glossary': ChunkParams('go', 'book'),
 116     'index': ChunkParams('ix', 'book'),
 117     'part': ChunkParams('pt', 'book'),
 118     'preface': ChunkParams('pr', 'book'),
 119     'refentry': ChunkParams('re', 'book'),
 120     'reference': ChunkParams('rn', 'book'),
 121     'sect1': ChunkParams('s', 'chapter', 1),
 122     'section': ChunkParams('s', 'chapter', 1),
 123     'sect2': ChunkParams('s', 'sect1', DONT_CHUNK),
 124     'sect3': ChunkParams('s', 'sect2', DONT_CHUNK),
 125     'sect4': ChunkParams('s', 'sect3', DONT_CHUNK),
 126     'sect5': ChunkParams('s', 'sect4', DONT_CHUNK),
 127 }
 128 # TAGS we don't support:
 129 # 'article', 'bibliography', 'colophon', 'set', 'setindex'
 130
 131 TITLE_XPATHS = {
 132     '_': (etree.XPath('./title'), None),
 133     'book': (etree.XPath('./bookinfo/title'), None),
 134     'refentry': (
 135         etree.XPath('./refmeta/refentrytitle'),
 136         etree.XPath('./refnamediv/refpurpose')
 137     ),
 138 }
 139
 140 ID_XPATH = etree.XPath('//@id')
 141
 142 GLOSSENTRY_XPATH = etree.XPath('//glossentry')
 143 glossary = {}
 144
 145 footnote_idx = 1
 146
 147
 148 def gen_chunk_name(node, chunk_params):
 149     """Generate a chunk file name
 150
 151     This is either based on the id or on the position in the doc. In the latter
 152     case it uses a prefix from CHUNK_PARAMS and a sequence number for each chunk
 153     type.
 154     """
 155     if 'id' in node.attrib:
 156         return node.attrib['id']
 157
 158     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx))
 159     chunk_params.idx += 1
 160
 161     # handle parents to make names of nested tags like in docbook
 162     # - we only need to prepend the parent if there are > 1 of them in the
 163     #   xml. None, the parents we have are not sufficient, e.g. 'index' can
 164     #   be in 'book' or 'part' or ... Maybe we can track the chunk_parents
 165     #   when we chunk explicitly and on each level maintain the 'idx'
 166     # while chunk_params.parent:
 167     #     parent = chunk_params.parent
 168     #     if parent not in CHUNK_PARAMS:
 169     #         break;
 170     #     chunk_params = CHUNK_PARAMS[parent]
 171     #     name = ('%s%02d' % (chunk_params.prefix, chunk_params.idx)) + name
 172
 173     logging.info('Gen chunk name: "%s"', name)
 174     return name
 175
 176
 177 def get_chunk_titles(module, node):
 178     tag = node.tag
 179     if tag not in TITLE_XPATHS:
 180         # Use defaults
 181         (title, subtitle) = TITLE_XPATHS['_']
 182     else:
 183         (title, subtitle) = TITLE_XPATHS[tag]
 184
 185     ctx = {
 186         'module': module,
 187     }
 188     result = {
 189         'title': None,
 190         'title_tag': None,
 191         'subtitle': None,
 192         'subtitle_tag': None
 193     }
 194     res = title(node)
 195     if res:
 196         xml = res[0]
 197         result['title'] = ''.join(convert_title(ctx, xml))
 198         if xml.tag != 'title':
 199             result['title_tag'] = xml.tag
 200         else:
 201             result['title_tag'] = tag
 202
 203     if subtitle:
 204         res = subtitle(node)
 205         if res:
 206             xml = res[0]
 207             result['subtitle'] = ''.join(convert_title(ctx, xml))
 208             result['subtitle_tag'] = xml.tag
 209     return result
 210
 211
 212 def chunk(xml_node, module, depth=0, idx=0, parent=None):
 213     """Chunk the tree.
 214
 215     The first time, we're called with parent=None and in that case we return
 216     the new_node as the root of the tree. For each tree-node we generate a
 217     filename and process the children.
 218     """
 219     tag = xml_node.tag
 220     chunk_params = CHUNK_PARAMS.get(tag)
 221     if chunk_params:
 222         title_args = get_chunk_titles(module, xml_node)
 223         chunk_name = gen_chunk_name(xml_node, chunk_params)
 224
 225         # check idx to handle 'sect1'/'section' special casing and title-only
 226         # segments
 227         if idx >= chunk_params.min_idx:
 228             logging.info('chunk tag: "%s"[%d]', tag, idx)
 229             if parent:
 230                 # remove the xml-node from the parent
 231                 sub_tree = etree.ElementTree(deepcopy(xml_node)).getroot()
 232                 xml_node.getparent().remove(xml_node)
 233                 xml_node = sub_tree
 234
 235             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 236                           filename=chunk_name + '.html', anchor=None,
 237                           **title_args)
 238         else:
 239             parent = Node(tag, parent=parent, xml=xml_node, depth=depth,
 240                           filename=parent.filename, anchor='#' + chunk_name,
 241                           **title_args)
 242
 243         depth += 1
 244         idx = 0
 245         for child in xml_node:
 246             chunk(child, module, depth, idx, parent)
 247             if child.tag in CHUNK_PARAMS:
 248                 idx += 1
 249
 250     return parent
 251
 252
 253 def add_id_links(files, links):
 254     for node in files:
 255         chunk_name = node.filename[:-5]
 256         chunk_base = node.filename + '#'
 257         for attr in ID_XPATH(node.xml):
 258             if attr == chunk_name:
 259                 links[attr] = node.filename
 260             else:
 261                 links[attr] = chunk_base + attr
 262
 263
 264 def build_glossary(files):
 265     for node in files:
 266         if node.xml.tag != 'glossary':
 267             continue
 268         for term in GLOSSENTRY_XPATH(node.xml):
 269             # TODO: there can be all kind of things in a glossary. This only supports
 270             # what we commonly use
 271             key = etree.tostring(term.find('glossterm'), method="text", encoding=str).strip()
 272             value = etree.tostring(term.find('glossdef'), method="text", encoding=str).strip()
 273             glossary[key] = value
 274             # logging.debug('glosentry: %s:%s', key, value)
 275
 276
 277 # conversion helpers
 278
 279
 280 def convert_inner(ctx, xml, result):
 281     for child in xml:
 282         result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 283
 284
 285 def convert_ignore(ctx, xml):
 286     result = []
 287     convert_inner(ctx, xml, result)
 288     return result
 289
 290
 291 def convert_skip(ctx, xml):
 292     return ['']
 293
 294
 295 def append_text(text, result):
 296     if text and text.strip():
 297         result.append(text.replace('<', '&lt;').replace('>', '&gt;'))
 298
 299
 300 missing_tags = {}
 301
 302
 303 def convert__unknown(ctx, xml):
 304     # don't recurse on subchunks
 305     if xml.tag in CHUNK_PARAMS:
 306         return []
 307     if isinstance(xml, etree._Comment):
 308         return ['<!-- ' + xml.text + '-->\n']
 309     else:
 310         # warn only once
 311         if xml.tag not in missing_tags:
 312             logging.warning('Add tag converter for "%s"', xml.tag)
 313             missing_tags[xml.tag] = True
 314         result = ['<!-- ' + xml.tag + '-->\n']
 315         convert_inner(ctx, xml, result)
 316         result.append('<!-- /' + xml.tag + '-->\n')
 317         return result
 318
 319
 320 def convert_sect(ctx, xml, h_tag, inner_func=convert_inner):
 321     result = ['<div class="%s">\n' % xml.tag]
 322     title = xml.find('title')
 323     if title is not None:
 324         if 'id' in xml.attrib:
 325             result.append('<a name="%s"></a>' % xml.attrib['id'])
 326         result.append('<%s>%s</%s>' % (h_tag, title.text, h_tag))
 327         xml.remove(title)
 328     append_text(xml.text, result)
 329     inner_func(ctx, xml, result)
 330     result.append('</div>')
 331     append_text(xml.tail, result)
 332     return result
 333
 334
 335 def xml_get_title(ctx, xml):
 336     title_tag = xml.find('title')
 337     if title_tag is not None:
 338         return ''.join(convert_title(ctx, title_tag))
 339     else:
 340         # TODO(ensonic): any way to get the file (inlcudes) too?
 341         logging.warning('%s: Expected title tag under "%s %s"', xml.sourceline, xml.tag, str(xml.attrib))
 342         return ''
 343
 344
 345 # docbook tags
 346
 347
 348 def convert_abstract(ctx, xml):
 349     result = ["""<div class="abstract">
 350     <p class="title"><b>Abstract</b></p>"""]
 351     append_text(xml.text, result)
 352     convert_inner(ctx, xml, result)
 353     result.append('</div>')
 354     append_text(xml.tail, result)
 355     return result
 356
 357
 358 def convert_acronym(ctx, xml):
 359     key = xml.text
 360     title = glossary.get(key, '')
 361     # TODO: print a sensible warning if missing
 362     result = ['<acronym title="%s"><span class="acronym">%s</span></acronym>' % (title, key)]
 363     if xml.tail:
 364         result.append(xml.tail)
 365     return result
 366
 367
 368 def convert_anchor(ctx, xml):
 369     return ['<a name="%s"></a>' % xml.attrib['id']]
 370
 371
 372 def convert_bookinfo(ctx, xml):
 373     result = ['<div class="titlepage">']
 374     convert_inner(ctx, xml, result)
 375     result.append("""<hr>
 376 </div>""")
 377     if xml.tail:
 378         result.append(xml.tail)
 379     return result
 380
 381
 382 def convert_blockquote(ctx, xml):
 383     result = ['<div class="blockquote">\n<blockquote class="blockquote">']
 384     append_text(xml.text, result)
 385     convert_inner(ctx, xml, result)
 386     result.append('</blockquote>\n</div>')
 387     append_text(xml.tail, result)
 388     return result
 389
 390
 391 def convert_code(ctx, xml):
 392     result = ['<code class="%s">' % xml.tag]
 393     append_text(xml.text, result)
 394     convert_inner(ctx, xml, result)
 395     result.append('</code>')
 396     append_text(xml.tail, result)
 397     return result
 398
 399
 400 def convert_colspec(ctx, xml):
 401     result = ['<col']
 402     a = xml.attrib
 403     if 'colname' in a:
 404         result.append(' class="%s"' % a['colname'])
 405     if 'colwidth' in a:
 406         result.append(' width="%s"' % a['colwidth'])
 407     result.append('>\n')
 408     # is in tgroup and there can be no 'text'
 409     return result
 410
 411
 412 def convert_command(ctx, xml):
 413     result = ['<strong class="userinput"><code>']
 414     append_text(xml.text, result)
 415     convert_inner(ctx, xml, result)
 416     result.append('</code></strong>')
 417     append_text(xml.tail, result)
 418     return result
 419
 420
 421 def convert_corpauthor(ctx, xml):
 422     result = ['<div><h3 class="corpauthor">\n']
 423     append_text(xml.text, result)
 424     convert_inner(ctx, xml, result)
 425     result.append('</h3></div>\n')
 426     append_text(xml.tail, result)
 427     return result
 428
 429
 430 def convert_div(ctx, xml):
 431     result = ['<div class="%s">\n' % xml.tag]
 432     append_text(xml.text, result)
 433     convert_inner(ctx, xml, result)
 434     result.append('</div>')
 435     append_text(xml.tail, result)
 436     return result
 437
 438
 439 def convert_emphasis(ctx, xml):
 440     result = ['<span class="emphasis"><em>']
 441     append_text(xml.text, result)
 442     convert_inner(ctx, xml, result)
 443     result.append('</em></span>')
 444     append_text(xml.tail, result)
 445     return result
 446
 447
 448 def convert_em_class(ctx, xml):
 449     result = ['<em class="%s"><code>' % xml.tag]
 450     append_text(xml.text, result)
 451     convert_inner(ctx, xml, result)
 452     result.append('</code></em>')
 453     append_text(xml.tail, result)
 454     return result
 455
 456
 457 def convert_entry(ctx, xml):
 458     entry_type = ctx['table.entry']
 459     result = ['<' + entry_type]
 460     if 'role' in xml.attrib:
 461         result.append(' class="%s"' % xml.attrib['role'])
 462     if 'morerows' in xml.attrib:
 463         result.append(' rowspan="%s"' % (1 + int(xml.attrib['morerows'])))
 464     result.append('>')
 465     append_text(xml.text, result)
 466     convert_inner(ctx, xml, result)
 467     result.append('</' + entry_type + '>')
 468     append_text(xml.tail, result)
 469     return result
 470
 471
 472 def convert_footnote(ctx, xml):
 473     footnotes = ctx.get('footnotes', [])
 474     # footnotes idx is not per page, but per doc
 475     global footnote_idx
 476     idx = footnote_idx
 477     footnote_idx += 1
 478
 479     # need a pair of ids for each footnote (docbook generates different ids)
 480     this_id = 'footnote-%d' % idx
 481     that_id = 'ftn.' + this_id
 482
 483     inner = ['<div id="%s" class="footnote">' % that_id]
 484     inner.append('<p><a href="#%s" class="para"><sup class="para">[%d] </sup></a>' % (
 485         this_id, idx))
 486     # TODO(ensonic): this can contain all kind of tags, if we convert them we'll
 487     # get double nested paras :/.
 488     # convert_inner(ctx, xml, inner)
 489     para = xml.find('para')
 490     if para is None:
 491         para = xml.find('simpara')
 492     if para is not None:
 493         inner.append(para.text)
 494     else:
 495         logging.warning('%s: Unhandled footnote content: %s', xml.sourceline,
 496                         etree.tostring(xml, method="text", encoding=str).strip())
 497     inner.append('</p></div>')
 498     footnotes.append(inner)
 499     ctx['footnotes'] = footnotes
 500     return ['<a href="#%s" class="footnote" name="%s"><sup class="footnote">[%s]</sup></a>' % (
 501         that_id, this_id, idx)]
 502
 503
 504 def convert_formalpara(ctx, xml):
 505     result = None
 506     title_tag = xml.find('title')
 507     result = ['<p><b>%s</b>' % title_tag.text]
 508     para_tag = xml.find('para')
 509     append_text(para_tag.text, result)
 510     convert_inner(ctx, para_tag, result)
 511     append_text(para_tag.tail, result)
 512     result.append('</p>')
 513     append_text(xml.tail, result)
 514     return result
 515
 516
 517 def convert_glossdef(ctx, xml):
 518     result = ['<dd class="glossdef">']
 519     convert_inner(ctx, xml, result)
 520     result.append('</dd>\n')
 521     return result
 522
 523
 524 def convert_glossdiv(ctx, xml):
 525     title_tag = xml.find('title')
 526     title = title_tag.text
 527     xml.remove(title_tag)
 528     result = [
 529         '<a name="gls%s"></a><h3 class="title">%s</h3>' % (title, title)
 530     ]
 531     convert_inner(ctx, xml, result)
 532     return result
 533
 534
 535 def convert_glossentry(ctx, xml):
 536     result = []
 537     convert_inner(ctx, xml, result)
 538     return result
 539
 540
 541 def convert_glossterm(ctx, xml):
 542     glossid = ''
 543     text = ''
 544     anchor = xml.find('anchor')
 545     if anchor is not None:
 546         glossid = anchor.attrib.get('id', '')
 547         text += anchor.tail or ''
 548     text += xml.text or ''
 549     if glossid == '':
 550         glossid = 'glossterm-' + text
 551     return [
 552         '<dt><span class="glossterm"><a name="%s"></a>%s</span></dt>' % (
 553             glossid, text)
 554     ]
 555
 556
 557 def convert_imageobject(ctx, xml):
 558     imagedata = xml.find('imagedata')
 559     if imagedata is not None:
 560         # TODO(ensonic): warn on missing fileref attr?
 561         return ['<img src="%s">' % imagedata.attrib.get('fileref', '')]
 562     else:
 563         return []
 564
 565
 566 def convert_indexdiv(ctx, xml):
 567     title_tag = xml.find('title')
 568     title = title_tag.text
 569     xml.remove(title_tag)
 570     result = [
 571         '<a name="idx%s"></a><h3 class="title">%s</h3>' % (title, title)
 572     ]
 573     convert_inner(ctx, xml, result)
 574     return result
 575
 576
 577 def convert_informaltable(ctx, xml):
 578     result = ['<div class="informaltable"><table class="informaltable"']
 579     a = xml.attrib
 580     if 'pgwide' in a and a['pgwide'] == '1':
 581         result.append(' width="100%"')
 582     if 'frame' in a and a['frame'] == 'none':
 583         result.append(' border="0"')
 584     result.append('>\n')
 585     convert_inner(ctx, xml, result)
 586     result.append('</table></div>')
 587     if xml.tail:
 588         result.append(xml.tail)
 589     return result
 590
 591
 592 def convert_inlinegraphic(ctx, xml):
 593     # TODO(ensonic): warn on missing fileref attr?
 594     return ['<img src="%s">' % xml.attrib.get('fileref', '')]
 595
 596
 597 def convert_itemizedlist(ctx, xml):
 598     result = ['<div class="itemizedlist"><ul class="itemizedlist" style="list-style-type: disc; ">']
 599     convert_inner(ctx, xml, result)
 600     result.append('</ul></div>')
 601     if xml.tail:
 602         result.append(xml.tail)
 603     return result
 604
 605
 606 def convert_link(ctx, xml):
 607     linkend = xml.attrib['linkend']
 608     if linkend in fixxref.NoLinks:
 609         linkend = None
 610     result = []
 611     if linkend:
 612         link_text = []
 613         append_text(xml.text, link_text)
 614         convert_inner(ctx, xml, link_text)
 615         # TODO: fixxref does some weird checks in xml.text
 616         result = [fixxref.MakeXRef(ctx['module'], '', 0, linkend, ''.join(link_text))]
 617     else:
 618         append_text(xml.text, result)
 619         convert_inner(ctx, xml, result)
 620     append_text(xml.tail, result)
 621     return result
 622
 623
 624 def convert_listitem(ctx, xml):
 625     result = ['<li class="listitem">']
 626     convert_inner(ctx, xml, result)
 627     result.append('</li>')
 628     # is in itemizedlist and there can be no 'text'
 629     return result
 630
 631
 632 def convert_literallayout(ctx, xml):
 633     result = ['<div class="literallayout"><p><br>\n']
 634     append_text(xml.text, result)
 635     convert_inner(ctx, xml, result)
 636     result.append('</p></div>')
 637     append_text(xml.tail, result)
 638     return result
 639
 640
 641 def convert_orderedlist(ctx, xml):
 642     result = ['<div class="orderedlist"><ol class="orderedlist" type="1">']
 643     convert_inner(ctx, xml, result)
 644     result.append('</ol></div>')
 645     append_text(xml.tail, result)
 646     return result
 647
 648
 649 def convert_para(ctx, xml):
 650     result = []
 651     if 'id' in xml.attrib:
 652         result.append('<a name="%s"></a>' % xml.attrib['id'])
 653     if 'role' in xml.attrib:
 654         result.append('<p class="%s">' % xml.attrib['role'])
 655     else:
 656         result.append('<p>')
 657     append_text(xml.text, result)
 658     convert_inner(ctx, xml, result)
 659     result.append('</p>')
 660     append_text(xml.tail, result)
 661     return result
 662
 663
 664 def convert_para_like(ctx, xml):
 665     result = []
 666     if 'id' in xml.attrib:
 667         result.append('<a name="%s"></a>' % xml.attrib['id'])
 668     result.append('<p class="%s">' % xml.tag)
 669     append_text(xml.text, result)
 670     convert_inner(ctx, xml, result)
 671     result.append('</p>')
 672     append_text(xml.tail, result)
 673     return result
 674
 675
 676 def convert_phrase(ctx, xml):
 677     result = ['<span']
 678     if 'role' in xml.attrib:
 679         result.append(' class="%s">' % xml.attrib['role'])
 680     else:
 681         result.append('>')
 682     append_text(xml.text, result)
 683     convert_inner(ctx, xml, result)
 684     result.append('</span>')
 685     append_text(xml.tail, result)
 686     return result
 687
 688
 689 def convert_primaryie(ctx, xml):
 690     result = ['<dt>\n']
 691     convert_inner(ctx, xml, result)
 692     result.append('\n</dt>\n<dd></dd>\n')
 693     return result
 694
 695
 696 def convert_pre(ctx, xml):
 697     result = ['<pre class="%s">\n' % xml.tag]
 698     append_text(xml.text, result)
 699     convert_inner(ctx, xml, result)
 700     result.append('</pre>')
 701     append_text(xml.tail, result)
 702     return result
 703
 704
 705 def convert_programlisting(ctx, xml):
 706     result = []
 707     if xml.attrib.get('role', '') == 'example':
 708         if xml.text:
 709             lang = xml.attrib.get('language', 'c').lower()
 710             if lang not in LEXERS:
 711                 LEXERS[lang] = get_lexer_by_name(lang)
 712             lexer = LEXERS.get(lang, None)
 713             if lexer:
 714                 highlighted = highlight(xml.text, lexer, HTML_FORMATTER)
 715
 716                 # we do own line-numbering
 717                 line_count = highlighted.count('\n')
 718                 source_lines = '\n'.join([str(i) for i in range(1, line_count + 1)])
 719                 result.append("""<table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
 720   <tbody>
 721     <tr>
 722       <td class="listing_lines" align="right"><pre>%s</pre></td>
 723       <td class="listing_code"><pre class="programlisting">%s</pre></td>
 724     </tr>
 725   </tbody>
 726 </table>
 727 """ % (source_lines, highlighted))
 728             else:
 729                 logging.warn('No pygments lexer for language="%s"', lang)
 730                 result.append('<pre class="programlisting">')
 731                 result.append(xml.text)
 732                 result.append('</pre>')
 733     else:
 734         result.append('<pre class="programlisting">')
 735         append_text(xml.text, result)
 736         convert_inner(ctx, xml, result)
 737         result.append('</pre>')
 738     append_text(xml.tail, result)
 739     return result
 740
 741
 742 def convert_quote(ctx, xml):
 743     result = ['<span class="quote">"<span class="quote">']
 744     append_text(xml.text, result)
 745     convert_inner(ctx, xml, result)
 746     result.append('</span>"</span>')
 747     append_text(xml.tail, result)
 748     return result
 749
 750
 751 def convert_refsect1(ctx, xml):
 752     # Add a divider between two consequitive refsect2
 753     def convert_inner(ctx, xml, result):
 754         prev = None
 755         for child in xml:
 756             if child.tag == 'refsect2' and prev is not None and prev.tag == child.tag:
 757                 result.append('<hr>\n')
 758             result.extend(convert_tags.get(child.tag, convert__unknown)(ctx, child))
 759             prev = child
 760     return convert_sect(ctx, xml, 'h2', convert_inner)
 761
 762
 763 def convert_refsect2(ctx, xml):
 764     return convert_sect(ctx, xml, 'h3')
 765
 766
 767 def convert_refsect3(ctx, xml):
 768     return convert_sect(ctx, xml, 'h4')
 769
 770
 771 def convert_row(ctx, xml):
 772     result = ['<tr>\n']
 773     convert_inner(ctx, xml, result)
 774     result.append('</tr>\n')
 775     return result
 776
 777
 778 def convert_sect1_tag(ctx, xml):
 779     return convert_sect(ctx, xml, 'h2')
 780
 781
 782 def convert_sect2(ctx, xml):
 783     return convert_sect(ctx, xml, 'h3')
 784
 785
 786 def convert_sect3(ctx, xml):
 787     return convert_sect(ctx, xml, 'h4')
 788
 789
 790 def convert_simpara(ctx, xml):
 791     result = ['<p>']
 792     append_text(xml.text, result)
 793     result.append('</p>')
 794     append_text(xml.tail, result)
 795     return result
 796
 797
 798 def convert_span(ctx, xml):
 799     result = ['<span class="%s">' % xml.tag]
 800     append_text(xml.text, result)
 801     convert_inner(ctx, xml, result)
 802     result.append('</span>')
 803     append_text(xml.tail, result)
 804     return result
 805
 806
 807 def convert_table(ctx, xml):
 808     result = ['<div class="table">']
 809     if 'id' in xml.attrib:
 810         result.append('<a name="%s"></a>' % xml.attrib['id'])
 811     title_tag = xml.find('title')
 812     if title_tag is not None:
 813         result.append('<p class="title"><b>')
 814         # TODO(ensonic): Add a 'Table X. ' prefix, needs a table counter
 815         result.extend(convert_title(ctx, title_tag))
 816         result.append('</b></p>')
 817         xml.remove(title_tag)
 818     result.append('<div class="table-contents"><table class="table" summary="g_object_new" border="1">')
 819
 820     convert_inner(ctx, xml, result)
 821
 822     result.append('</table></div></div>')
 823     append_text(xml.tail, result)
 824     return result
 825
 826
 827 def convert_tbody(ctx, xml):
 828     result = ['<tbody>']
 829     ctx['table.entry'] = 'td'
 830     convert_inner(ctx, xml, result)
 831     result.append('</tbody>')
 832     # is in tgroup and there can be no 'text'
 833     return result
 834
 835
 836 def convert_tgroup(ctx, xml):
 837     # tgroup does not expand to anything, but the nested colspecs need to
 838     # be put into a colgroup
 839     cols = xml.findall('colspec')
 840     result = []
 841     if cols:
 842         result.append('<colgroup>\n')
 843         for col in cols:
 844             result.extend(convert_colspec(ctx, col))
 845             xml.remove(col)
 846         result.append('</colgroup>\n')
 847     convert_inner(ctx, xml, result)
 848     # is in informaltable and there can be no 'text'
 849     return result
 850
 851
 852 def convert_thead(ctx, xml):
 853     result = ['<thead>']
 854     ctx['table.entry'] = 'th'
 855     convert_inner(ctx, xml, result)
 856     result.append('</thead>')
 857     # is in tgroup and there can be no 'text'
 858     return result
 859
 860
 861 def convert_title(ctx, xml):
 862     # This is always called from some context
 863     result = []
 864     append_text(xml.text, result)
 865     convert_inner(ctx, xml, result)
 866     append_text(xml.tail, result)
 867     return result
 868
 869
 870 def convert_ulink(ctx, xml):
 871     result = ['<a class="%s" href="%s">%s</a>' % (xml.tag, xml.attrib['url'], xml.text)]
 872     if xml.tail:
 873         result.append(xml.tail)
 874     return result
 875
 876
 877 def convert_userinput(ctx, xml):
 878     result = ['<span class="command"><strong>']
 879     append_text(xml.text, result)
 880     convert_inner(ctx, xml, result)
 881     result.append('</strong></span>')
 882     append_text(xml.tail, result)
 883     return result
 884
 885
 886 def convert_variablelist(ctx, xml):
 887     result = ["""<div class="variablelist"><table border="0" class="variablelist">
 888 <colgroup>
 889 <col align="left" valign="top">
 890 <col>
 891 </colgroup>
 892 <tbody>"""]
 893     convert_inner(ctx, xml, result)
 894     result.append("""</tbody>
 895 </table></div>""")
 896     return result
 897
 898
 899 def convert_varlistentry(ctx, xml):
 900     result = ['<tr>']
 901
 902     result.append('<td><p>')
 903     term = xml.find('term')
 904     result.extend(convert_span(ctx, term))
 905     result.append('</p></td>')
 906
 907     result.append('<td>')
 908     listitem = xml.find('listitem')
 909     convert_inner(ctx, listitem, result)
 910     result.append('</td>')
 911
 912     result.append('<tr>')
 913     return result
 914
 915
 916 # TODO(ensonic): turn into class with converters as functions and ctx as self
 917 convert_tags = {
 918     'abstract': convert_abstract,
 919     'acronym': convert_acronym,
 920     'anchor': convert_anchor,
 921     'application': convert_span,
 922     'bookinfo': convert_bookinfo,
 923     'blockquote': convert_blockquote,
 924     'caption': convert_div,
 925     'code': convert_code,
 926     'colspec': convert_colspec,
 927     'constant': convert_code,
 928     'command': convert_command,
 929     'corpauthor': convert_corpauthor,
 930     'emphasis': convert_emphasis,
 931     'entry': convert_entry,
 932     'envar': convert_code,
 933     'footnote': convert_footnote,
 934     'filename': convert_code,
 935     'formalpara': convert_formalpara,
 936     'function': convert_code,
 937     'glossdef': convert_glossdef,
 938     'glossdiv': convert_glossdiv,
 939     'glossentry': convert_glossentry,
 940     'glossterm': convert_glossterm,
 941     'imageobject': convert_imageobject,
 942     'indexdiv': convert_indexdiv,
 943     'indexentry': convert_ignore,
 944     'indexterm': convert_skip,
 945     'informalexample': convert_div,
 946     'informaltable': convert_informaltable,
 947     'inlinegraphic': convert_inlinegraphic,
 948     'inlinemediaobject': convert_span,
 949     'itemizedlist': convert_itemizedlist,
 950     'legalnotice': convert_div,
 951     'link': convert_link,
 952     'listitem': convert_listitem,
 953     'literal': convert_code,
 954     'literallayout': convert_literallayout,
 955     'mediaobject': convert_div,
 956     'note': convert_div,
 957     'option': convert_code,
 958     'orderedlist': convert_orderedlist,
 959     'para': convert_para,
 960     'partintro': convert_div,
 961     'parameter': convert_em_class,
 962     'phrase': convert_phrase,
 963     'primaryie': convert_primaryie,
 964     'programlisting': convert_programlisting,
 965     'quote': convert_quote,
 966     'releaseinfo': convert_para_like,
 967     'refsect1': convert_refsect1,
 968     'refsect2': convert_refsect2,
 969     'refsect3': convert_refsect3,
 970     'replaceable': convert_em_class,
 971     'returnvalue': convert_span,
 972     'row': convert_row,
 973     'screen': convert_pre,
 974     'sect1': convert_sect1_tag,
 975     'sect2': convert_sect2,
 976     'sect3': convert_sect3,
 977     'simpara': convert_simpara,
 978     'structfield': convert_em_class,
 979     'structname': convert_span,
 980     'synopsis': convert_pre,
 981     'symbol': convert_span,
 982     'table': convert_table,
 983     'tbody': convert_tbody,
 984     'term': convert_span,
 985     'tgroup': convert_tgroup,
 986     'thead': convert_thead,
 987     'type': convert_span,
 988     'ulink': convert_ulink,
 989     'userinput': convert_userinput,
 990     'varname': convert_code,
 991     'variablelist': convert_variablelist,
 992     'varlistentry': convert_varlistentry,
 993     'warning': convert_div,
 994 }
 995
 996 # conversion helpers
 997
 998 HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
 999 <html>
1000 <head>
1001 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
1002 <title>%s</title>
1003 %s<link rel="stylesheet" href="style.css" type="text/css">
1004 </head>
1005 <body bgcolor="white" text="black" link="#0000FF" vlink="#840084" alink="#0000FF">
1006 """
1007
1008
1009 def generate_head_links(ctx):
1010     n = ctx['nav_home']
1011     result = [
1012         '<link rel="home" href="%s" title="%s">\n' % (n.filename, n.title)
1013     ]
1014     if 'nav_up' in ctx:
1015         n = ctx['nav_up']
1016         result.append('<link rel="up" href="%s" title="%s">\n' % (n.filename, n.title))
1017     if 'nav_prev' in ctx:
1018         n = ctx['nav_prev']
1019         result.append('<link rel="prev" href="%s" title="%s">\n' % (n.filename, n.title))
1020     if 'nav_next' in ctx:
1021         n = ctx['nav_next']
1022         result.append('<link rel="next" href="%s" title="%s">\n' % (n.filename, n.title))
1023     return ''.join(result)
1024
1025
1026 def generate_nav_links(ctx):
1027     n = ctx['nav_home']
1028     result = [
1029         '<td><a accesskey="h" href="%s"><img src="home.png" width="16" height="16" border="0" alt="Home"></a></td>' % n.filename
1030     ]
1031     if 'nav_up' in ctx:
1032         n = ctx['nav_up']
1033         result.append(
1034             '<td><a accesskey="u" href="%s"><img src="up.png" width="16" height="16" border="0" alt="Up"></a></td>' % n.filename)
1035     else:
1036         result.append('<td><img src="up-insensitive.png" width="16" height="16" border="0"></td>')
1037     if 'nav_prev' in ctx:
1038         n = ctx['nav_prev']
1039         result.append(
1040             '<td><a accesskey="p" href="%s"><img src="left.png" width="16" height="16" border="0" alt="Prev"></a></td>' % n.filename)
1041     else:
1042         result.append('<td><img src="left-insensitive.png" width="16" height="16" border="0"></td>')
1043     if 'nav_next' in ctx:
1044         n = ctx['nav_next']
1045         result.append(
1046             '<td><a accesskey="n" href="%s"><img src="right.png" width="16" height="16" border="0" alt="Next"></a></td>' % n.filename)
1047     else:
1048         result.append('<td><img src="right-insensitive.png" width="16" height="16" border="0"></td>')
1049
1050     return ''.join(result)
1051
1052
1053 def generate_toc(ctx, node):
1054     result = []
1055     for c in node.children:
1056         # TODO: urlencode the filename: urllib.parse.quote_plus()
1057         link = c.filename
1058         if c.anchor:
1059             link += c.anchor
1060         result.append('<dt><span class="%s"><a href="%s">%s</a></span>\n' % (
1061             c.title_tag, link, c.title))
1062         if c.subtitle:
1063             result.append('<span class="%s"> — %s</span>' % (c.subtitle_tag, c.subtitle))
1064         result.append('</dt>\n')
1065         if c.children:
1066             result.append('<dd><dl>')
1067             result.extend(generate_toc(ctx, c))
1068             result.append('</dl></dd>')
1069     return result
1070
1071
1072 def generate_basic_nav(ctx):
1073     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1074   <tr valign="middle">
1075     <td width="100%%" align="left" class="shortcuts"></td>
1076     %s
1077   </tr>
1078 </table>
1079     """ % generate_nav_links(ctx)
1080
1081
1082 def generate_alpha_nav(ctx, divs, prefix, span_id):
1083     ix_nav = []
1084     for s in divs:
1085         title = xml_get_title(ctx, s)
1086         ix_nav.append('<a class="shortcut" href="#%s%s">%s</a>' % (prefix, title, title))
1087
1088     return """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="5">
1089   <tr valign="middle">
1090     <td width="100%%" align="left" class="shortcuts">
1091       <span id="nav_%s">
1092         %s
1093       </span>
1094     </td>
1095     %s
1096   </tr>
1097 </table>
1098     """ % (span_id, '\n<span class="dim">|</span>\n'.join(ix_nav), generate_nav_links(ctx))
1099
1100
1101 def generate_refentry_nav(ctx, refsect1s, result):
1102     result.append("""<table class="navigation" id="top" width="100%" cellpadding="2" cellspacing="5">
1103   <tr valign="middle">
1104     <td width="100%" align="left" class="shortcuts">
1105       <a href="#" class="shortcut">Top</a>""")
1106
1107     for s in refsect1s:
1108         # don't list TOC sections (role="xxx_proto")
1109         if s.attrib.get('role', '').endswith("_proto"):
1110             continue
1111         # skip section without 'id' attrs
1112         if 'id' not in s.attrib:
1113             continue
1114
1115         ref_id = s.attrib['id']
1116         # skip foreign sections
1117         if '.' not in ref_id:
1118             continue
1119
1120         title = xml_get_title(ctx, s)
1121         span_id = ref_id.split('.')[1].replace('-', '_')
1122
1123         result.append("""
1124           <span id="nav_%s">
1125             <span class="dim">|</span>
1126             <a href="#%s" class="shortcut">%s</a>
1127           </span>
1128           """ % (span_id, ref_id, title))
1129     result.append("""
1130     </td>
1131     %s
1132   </tr>
1133 </table>
1134 """ % generate_nav_links(ctx))
1135
1136
1137 def generate_footer(ctx):
1138     result = []
1139     if 'footnotes' in ctx:
1140         result.append("""<div class="footnotes">\n
1141 <br><hr style="width:100; text-align:left;margin-left: 0">
1142 """)
1143         for f in ctx['footnotes']:
1144             result.extend(f)
1145         result.append('</div>\n')
1146     return result
1147
1148
1149 def get_id(node):
1150     xml = node.xml
1151     node_id = xml.attrib.get('id', None)
1152     if node_id:
1153         return node_id
1154
1155     logging.info('%d: No "id" attribute on "%s", generating one',
1156                  xml.sourceline, xml.tag)
1157     ix = []
1158     # Generate the 'id'. We need to walk up the xml-tree and check the positions
1159     # for each sibling.
1160     parent = xml.getparent()
1161     while parent is not None:
1162         children = parent.getchildren()
1163         ix.insert(0, str(children.index(xml) + 1))
1164         xml = parent
1165         parent = xml.getparent()
1166     # logging.warning('%s: id indexes: %s', node.filename, str(ix))
1167     return 'id-1.' + '.'.join(ix)
1168
1169
1170 def convert_chunk_with_toc(ctx, div_class, title_tag):
1171     node = ctx['node']
1172     result = [
1173         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1174         generate_basic_nav(ctx),
1175         '<div class="%s">' % div_class,
1176     ]
1177     title = node.xml.find('title')
1178     if title is not None:
1179         result.append("""
1180 <div class="titlepage">
1181 <%s class="title"><a name="%s"></a>%s</%s>
1182 </div>""" % (
1183             title_tag, get_id(node), title.text, title_tag))
1184         node.xml.remove(title)
1185
1186     toc = generate_toc(ctx, node)
1187     if toc:
1188         # TODO: not all docbook page types use this extra heading
1189         result.append("""<p><b>Table of Contents</b></p>
1190     <div class="toc">
1191       <dl class="toc">
1192     """)
1193         result.extend(toc)
1194         result.append("""</dl>
1195     </div>
1196     """)
1197     convert_inner(ctx, node.xml, result)
1198     result.extend(generate_footer(ctx))
1199     result.append("""</div>
1200 </body>
1201 </html>""")
1202     return result
1203
1204
1205 # docbook chunks
1206
1207
1208 def convert_book(ctx):
1209     node = ctx['node']
1210     result = [
1211         HTML_HEADER % (node.title, generate_head_links(ctx)),
1212         """<table class="navigation" id="top" width="100%%" cellpadding="2" cellspacing="0">
1213     <tr><th valign="middle"><p class="title">%s</p></th></tr>
1214 </table>
1215 <div class="book">
1216 """ % node.title
1217     ]
1218     bookinfo = node.xml.findall('bookinfo')[0]
1219     # we already used the title
1220     title = bookinfo.find('title')
1221     if title is not None:
1222         bookinfo.remove(title)
1223     result.extend(convert_bookinfo(ctx, bookinfo))
1224     result.append("""<div class="toc">
1225   <dl class="toc">
1226 """)
1227     result.extend(generate_toc(ctx, node.root))
1228     result.append("""</dl>
1229 </div>
1230 """)
1231     result.extend(generate_footer(ctx))
1232     result.append("""</div>
1233 </body>
1234 </html>""")
1235     return result
1236
1237
1238 def convert_chapter(ctx):
1239     return convert_chunk_with_toc(ctx, 'chapter', 'h2')
1240
1241
1242 def convert_glossary(ctx):
1243     node = ctx['node']
1244     glossdivs = node.xml.findall('glossdiv')
1245
1246     result = [
1247         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1248         generate_alpha_nav(ctx, glossdivs, 'gls', 'glossary'),
1249         """<div class="glossary">
1250 <div class="titlepage"><h%1d class="title">
1251 <a name="%s"></a>%s</h%1d>
1252 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1253     ]
1254     for i in glossdivs:
1255         result.extend(convert_glossdiv(ctx, i))
1256     result.extend(generate_footer(ctx))
1257     result.append("""</div>
1258 </body>
1259 </html>""")
1260     return result
1261
1262
1263 def convert_index(ctx):
1264     node = ctx['node']
1265     # Get all indexdivs under indexdiv
1266     indexdivs = node.xml.find('indexdiv').findall('indexdiv')
1267
1268     result = [
1269         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1270         generate_alpha_nav(ctx, indexdivs, 'idx', 'index'),
1271         """<div class="index">
1272 <div class="titlepage"><h%1d class="title">
1273 <a name="%s"></a>%s</h%1d>
1274 </div>""" % (node.depth, get_id(node), node.title, node.depth)
1275     ]
1276     for i in indexdivs:
1277         result.extend(convert_indexdiv(ctx, i))
1278     result.extend(generate_footer(ctx))
1279     result.append("""</div>
1280 </body>
1281 </html>""")
1282     return result
1283
1284
1285 def convert_part(ctx):
1286     return convert_chunk_with_toc(ctx, 'part', 'h1')
1287
1288
1289 def convert_preface(ctx):
1290     node = ctx['node']
1291     result = [
1292         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx)),
1293         generate_basic_nav(ctx),
1294         '<div class="preface">'
1295     ]
1296     title = node.xml.find('title')
1297     if title is not None:
1298         result.append("""
1299 <div class="titlepage">
1300 <h2 class="title"><a name="%s"></a>%s</h2>
1301 </div>""" % (get_id(node), title.text))
1302         node.xml.remove(title)
1303     convert_inner(ctx, node.xml, result)
1304     result.extend(generate_footer(ctx))
1305     result.append("""</div>
1306 </body>
1307 </html>""")
1308     return result
1309
1310
1311 def convert_reference(ctx):
1312     return convert_chunk_with_toc(ctx, 'reference', 'h1')
1313
1314
1315 def convert_refentry(ctx):
1316     node = ctx['node']
1317     node_id = get_id(node)
1318     refsect1s = node.xml.findall('refsect1')
1319
1320     gallery = ''
1321     refmeta = node.xml.find('refmeta')
1322     if refmeta is not None:
1323         refmiscinfo = refmeta.find('refmiscinfo')
1324         if refmiscinfo is not None:
1325             inlinegraphic = refmiscinfo.find('inlinegraphic')
1326             if inlinegraphic is not None:
1327                 gallery = ''.join(convert_inlinegraphic(ctx, inlinegraphic))
1328
1329     result = [
1330         HTML_HEADER % (node.title + ": " + node.root.title, generate_head_links(ctx))
1331     ]
1332     generate_refentry_nav(ctx, refsect1s, result)
1333     result.append("""
1334 <div class="refentry">
1335 <a name="%s"></a>
1336 <div class="refnamediv">
1337   <table width="100%%"><tr>
1338     <td valign="top">
1339       <h2><span class="refentrytitle"><a name="%s.top_of_page"></a>%s</span></h2>
1340       <p>%s — %s</p>
1341     </td>
1342     <td class="gallery_image" valign="top" align="right">%s</td>
1343   </tr></table>
1344 </div>
1345 """ % (node_id, node_id, node.title, node.title, node.subtitle, gallery))
1346
1347     for s in refsect1s:
1348         result.extend(convert_refsect1(ctx, s))
1349     result.extend(generate_footer(ctx))
1350     result.append("""</div>
1351 </body>
1352 </html>""")
1353     return result
1354
1355
1356 def convert_sect1(ctx):
1357     return convert_chunk_with_toc(ctx, 'sect1', 'h2')
1358
1359
1360 # TODO(ensonic): turn into class with converters as functions and ctx as self
1361 convert_chunks = {
1362     'book': convert_book,
1363     'chapter': convert_chapter,
1364     'glossary': convert_glossary,
1365     'index': convert_index,
1366     'part': convert_part,
1367     'preface': convert_preface,
1368     'reference': convert_reference,
1369     'refentry': convert_refentry,
1370     'sect1': convert_sect1,
1371 }
1372
1373
1374 def generate_nav_nodes(files, node):
1375     nav = {
1376         'nav_home': node.root,
1377     }
1378     # nav params: up, prev, next
1379     if node.parent:
1380         nav['nav_up'] = node.parent
1381     ix = files.index(node)
1382     if ix > 0:
1383         nav['nav_prev'] = files[ix - 1]
1384     if ix < len(files) - 1:
1385         nav['nav_next'] = files[ix + 1]
1386     return nav
1387
1388
1389 def convert(out_dir, module, files, node):
1390     """Convert the docbook chunks to a html file.
1391
1392     Args:
1393       out_dir: already created output dir
1394       files: list of nodes in the tree in pre-order
1395       node: current tree node
1396     """
1397
1398     logging.info('Writing: %s', node.filename)
1399     with open(os.path.join(out_dir, node.filename), 'wt',
1400               newline='\n', encoding='utf-8') as html:
1401         ctx = {
1402             'module': module,
1403             'files': files,
1404             'node': node,
1405         }
1406         ctx.update(generate_nav_nodes(files, node))
1407
1408         if node.name in convert_chunks:
1409             for line in convert_chunks[node.name](ctx):
1410                 html.write(line)
1411         else:
1412             logging.warning('Add converter/template for "%s"', node.name)
1413
1414
1415 def create_devhelp2_toc(node):
1416     result = []
1417     for c in node.children:
1418         if c.children:
1419             result.append('<sub name="%s" link="%s">\n' % (c.title, c.filename))
1420             result.extend(create_devhelp2_toc(c))
1421             result.append('</sub>\n')
1422         else:
1423             result.append('<sub name="%s" link="%s"/>\n' % (c.title, c.filename))
1424     return result
1425
1426
1427 def create_devhelp2_condition_attribs(node):
1428     if 'condition' in node.attrib:
1429         # condition -> since, deprecated, ... (separated with '|')
1430         cond = node.attrib['condition'].replace('"', '&quot;').split('|')
1431         keywords = []
1432         for c in cond:
1433             if ':' in c:
1434                 keywords.append('{}="{}"'.format(*c.split(':', 1)))
1435             else:
1436                 # deprecated can have no description
1437                 keywords.append('{}="{}"'.format(c, ''))
1438         return ' ' + ' '.join(keywords)
1439     else:
1440         return ''
1441
1442
1443 def create_devhelp2_refsect2_keyword(node, base_link):
1444     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1445         node.attrib['role'], xml_get_title({}, node), base_link + node.attrib['id'],
1446         create_devhelp2_condition_attribs(node))
1447
1448
1449 def create_devhelp2_refsect3_keyword(node, base_link, title, name):
1450     return'    <keyword type="%s" name="%s" link="%s"%s/>\n' % (
1451         node.attrib['role'], title, base_link + name,
1452         create_devhelp2_condition_attribs(node))
1453
1454
1455 def create_devhelp2(out_dir, module, xml, files):
1456     with open(os.path.join(out_dir, module + '.devhelp2'), 'wt',
1457               newline='\n', encoding='utf-8') as idx:
1458         bookinfo_nodes = xml.xpath('/book/bookinfo')
1459         title = ''
1460         if bookinfo_nodes is not None:
1461             bookinfo = bookinfo_nodes[0]
1462             title = bookinfo.xpath('./title/text()')[0]
1463             online_url = bookinfo.xpath('./releaseinfo/ulink[@role="online-location"]/@url')[0]
1464             # TODO: support author too (see devhelp2.xsl)
1465         # TODO: fixxref uses '--src-lang' to set the language
1466         result = [
1467             """<?xml version="1.0" encoding="utf-8" standalone="no"?>
1468 <book xmlns="http://www.devhelp.net/book" title="%s" link="index.html" author="" name="%s" version="2" language="c" online="%s">
1469   <chapters>
1470 """ % (title, module, online_url)
1471         ]
1472         # toc
1473         result.extend(create_devhelp2_toc(files[0].root))
1474         result.append("""  </chapters>
1475   <functions>
1476 """)
1477         # keywords from all refsect2 and refsect3
1478         refsect2 = etree.XPath('//refsect2[@role]')
1479         refsect3_enum = etree.XPath('refsect3[@role="enum_members"]/informaltable/tgroup/tbody/row[@role="constant"]')
1480         refsect3_enum_details = etree.XPath('entry[@role="enum_member_name"]/para')
1481         refsect3_struct = etree.XPath('refsect3[@role="struct_members"]/informaltable/tgroup/tbody/row[@role="member"]')
1482         refsect3_struct_details = etree.XPath('entry[@role="struct_member_name"]/para/structfield')
1483         for node in files:
1484             base_link = node.filename + '#'
1485             refsect2_nodes = refsect2(node.xml)
1486             for refsect2_node in refsect2_nodes:
1487                 result.append(create_devhelp2_refsect2_keyword(refsect2_node, base_link))
1488                 refsect3_nodes = refsect3_enum(refsect2_node)
1489                 for refsect3_node in refsect3_nodes:
1490                     details_node = refsect3_enum_details(refsect3_node)[0]
1491                     name = details_node.attrib['id']
1492                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, details_node.text, name))
1493                 refsect3_nodes = refsect3_struct(refsect2_node)
1494                 for refsect3_node in refsect3_nodes:
1495                     details_node = refsect3_struct_details(refsect3_node)[0]
1496                     name = details_node.attrib['id']
1497                     result.append(create_devhelp2_refsect3_keyword(refsect3_node, base_link, name, name))
1498
1499         result.append("""  </functions>
1500 </book>
1501 """)
1502         for line in result:
1503             idx.write(line)
1504
1505
1506 def get_dirs(uninstalled):
1507     if uninstalled:
1508         # this does not work from buiddir!=srcdir
1509         gtkdocdir = os.path.split(sys.argv[0])[0]
1510         if not os.path.exists(gtkdocdir + '/gtk-doc.xsl'):
1511             # try 'srcdir' (set from makefiles) too
1512             if os.path.exists(os.environ.get("ABS_TOP_SRCDIR", '') + '/gtk-doc.xsl'):
1513                 gtkdocdir = os.environ['ABS_TOP_SRCDIR']
1514         styledir = gtkdocdir + '/style'
1515     else:
1516         gtkdocdir = os.path.join(config.datadir, 'gtk-doc/data')
1517         styledir = gtkdocdir
1518     return (gtkdocdir, styledir)
1519
1520
1521 def main(module, index_file, out_dir, uninstalled):
1522     tree = etree.parse(index_file)
1523     tree.xinclude()
1524
1525     (gtkdocdir, styledir) = get_dirs(uninstalled)
1526     # copy navigation images and stylesheets to html directory ...
1527     css_file = os.path.join(styledir, 'style.css')
1528     for f in glob(os.path.join(styledir, '*.png')) + [css_file]:
1529         shutil.copy(f, out_dir)
1530     css_file = os.path.join(out_dir, 'style.css')
1531     with open(css_file, 'at', newline='\n', encoding='utf-8') as css:
1532         css.write(HTML_FORMATTER.get_style_defs())
1533
1534     # TODO: migrate options from fixxref
1535     # TODO: do in parallel with loading the xml above.
1536     fixxref.LoadIndicies(out_dir, '/usr/share/gtk-doc/html', [])
1537
1538     # We do multiple passes:
1539     # 1) recursively walk the tree and chunk it into a python tree so that we
1540     #   can generate navigation and link tags.
1541     files = chunk(tree.getroot(), module)
1542     files = [f for f in PreOrderIter(files) if f.anchor is None]
1543
1544     # 2) extract tables:
1545     # TODO: use multiprocessing
1546     # - find all 'id' attribs and add them to the link map
1547     add_id_links(files, fixxref.Links)
1548     # - build glossary dict
1549     build_glossary(files)
1550
1551     # 3) create a xxx.devhelp2 file, do this before 3), since we modify the tree
1552     create_devhelp2(out_dir, module, tree.getroot(), files)
1553     # 4) iterate the tree and output files
1554     # TODO: use multiprocessing
1555     for node in files:
1556         convert(out_dir, module, files, node)
1557
1558
1559 def run(options):
1560     logging.info('options: %s', str(options.__dict__))
1561     module = options.args[0]
1562     document = options.args[1]
1563
1564     # TODO: rename to 'html' later on
1565     # - right now in mkhtml, the dir is created by the Makefile and mkhtml
1566     #   outputs into the working directory
1567     out_dir = os.path.join(os.path.dirname(document), 'db2html')
1568     try:
1569         os.mkdir(out_dir)
1570     except OSError as e:
1571         if e.errno != errno.EEXIST:
1572             raise
1573
1574     sys.exit(main(module, document, out_dir, options.uninstalled))