gtkdoc/md_to_db.py

   1 # -*- python; coding: utf-8 -*-
   2 #
   3 # gtk-doc - GTK DocBook documentation generator.
   4 # Copyright (C) 1998  Damon Chaplin
   5 #               2007-2016  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 """
  23 Markdown to Docbook converter
  24 """
  25
  26 import logging
  27 import re
  28
  29 # external functions
  30 ExpandAbbreviations = MakeXRef = MakeHashXRef = tagify = None
  31
  32 # Elements to consider non-block items in MarkDown parsing
  33 MD_TEXT_LEVEL_ELEMENTS = {
  34     'emphasis', 'envar', 'filename', 'firstterm', 'footnote', 'function', 'literal',
  35     'manvolnum', 'option', 'replaceable', 'structfield', 'structname', 'title',
  36     'varname'
  37 }
  38 MD_ESCAPABLE_CHARS = r'\`*_{}[]()>#+-.!'
  39 MD_GTK_ESCAPABLE_CHARS = r'@%'
  40
  41
  42 def Init():
  43     # TODO(enonic): find a better way to do this
  44     global ExpandAbbreviations, MakeXRef, MakeHashXRef, tagify
  45     from .mkdb import ExpandAbbreviations, MakeXRef, MakeHashXRef, tagify
  46
  47
  48 def MarkDownParseBlocks(lines, symbol, context):
  49     md_blocks = []
  50     md_block = {"type": ''}
  51
  52     logging.debug("parsing %s lines", len(lines))
  53     for line in lines:
  54         logging.info("type='%s', int='%s', parsing '%s'", md_block["type"], md_block.get('interrupted'), line)
  55         first_char = None
  56         if line:
  57             first_char = line[0]
  58
  59         if md_block["type"] == "markup":
  60             if 'closed' not in md_block:
  61                 if md_block["start"] in line:
  62                     md_block["depth"] += 1
  63
  64                 if md_block["end"] in line:
  65                     if md_block["depth"] > 0:
  66                         md_block["depth"] -= 1
  67                     else:
  68                         logging.info("closing tag '%s'", line)
  69                         md_block["closed"] = 1
  70                         # TODO(ensonic): reparse inner text with MarkDownParseLines?
  71
  72                 md_block["text"] += "\n" + line
  73                 logging.info("add to markup: '%s'", line)
  74                 continue
  75
  76         deindented_line = line.lstrip()
  77
  78         if md_block["type"] == "heading":
  79             # a heading is ended by any level less than or equal
  80             if md_block["level"] == 1:
  81                 heading_match = re.search(r'^[#][ \t]+(.+?)[ \t]*[#]*[ \t]*(?:{#([^}]+)})?[ \t]*$', line)
  82                 if re.search(r'^={4,}[ \t]*$', line):
  83                     text = md_block["lines"].pop()
  84                     md_block.pop("interrupted", None)
  85                     md_blocks.append(md_block)
  86                     md_block = {'type': "heading",
  87                                 'text': text,
  88                                 'lines': [],
  89                                 'level': 1,
  90                                 }
  91                     continue
  92                 elif heading_match:
  93                     md_block.pop("interrupted", None)
  94                     md_blocks.append(md_block)
  95                     md_block = {'type': "heading",
  96                                 'text': heading_match.group(1),
  97                                 'lines': [],
  98                                 'level': 1,
  99                                 }
 100                     if heading_match.group(2):
 101                         md_block['id'] = heading_match.group(2)
 102                     continue
 103                 else:
 104                     # push lines into the block until the end is reached
 105                     md_block["lines"].append(line)
 106                     continue
 107
 108             else:
 109                 heading_match = re.search(r'^([#]{1,2})[ \t]+(.+?)[ \t]*[#]*[ \t]*(?:{#([^}]+)})?[ \t]*$', line)
 110                 if re.search(r'^[=]{4,}[ \t]*$', line):
 111                     text = md_block["lines"].pop()
 112                     md_block.pop("interrupted", None)
 113                     md_blocks.append(md_block)
 114                     md_block = {'type': "heading",
 115                                 'text': text,
 116                                 'lines': [],
 117                                 'level': 1,
 118                                 }
 119                     continue
 120                 elif re.search(r'^[-]{4,}[ \t]*$', line):
 121                     text = md_block["lines"].pop()
 122                     md_block.pop("interrupted", None)
 123                     md_blocks.append(md_block)
 124                     md_block = {'type': "heading",
 125                                 'text': text,
 126                                 'lines': [],
 127                                 'level': 2,
 128                                 }
 129                     continue
 130                 elif heading_match:
 131                     md_block.pop("interrupted", None)
 132                     md_blocks.append(md_block)
 133                     md_block = {'type': "heading",
 134                                 'text': heading_match.group(2),
 135                                 'lines': [],
 136                                 'level': len(heading_match.group(1))
 137                                 }
 138                     if heading_match.group(3):
 139                         md_block['id'] = heading_match.group(3)
 140                     continue
 141                 else:
 142                     # push lines into the block until the end is reached
 143                     md_block["lines"].append(line)
 144                     continue
 145         elif md_block["type"] == "code":
 146             end_of_code_match = re.search(r'^[ \t]*\]\|(.*)', line)
 147             if end_of_code_match:
 148                 md_blocks.append(md_block)
 149                 md_block = {'type': "paragraph",
 150                             'text': end_of_code_match.group(1),
 151                             'lines': [],
 152                             }
 153             else:
 154                 md_block["lines"].append(line)
 155             continue
 156
 157         if deindented_line == '':
 158             logging.info('setting "interrupted" due to empty line')
 159             md_block["interrupted"] = 1
 160             continue
 161
 162         if md_block["type"] == "quote":
 163             if 'interrupted' not in md_block:
 164                 line = re.sub(r'^[ ]*>[ ]?', '', line)
 165                 md_block["lines"].append(line)
 166                 continue
 167
 168         elif md_block["type"] == "li":
 169             marker = md_block["marker"]
 170             marker_match = re.search(r'^([ ]{0,3})(%s)[ ](.*)' % marker, line)
 171             if marker_match:
 172                 indentation = marker_match.group(1)
 173                 if md_block["indentation"] != indentation:
 174                     md_block["lines"].append(line)
 175                 else:
 176                     ordered = md_block["ordered"]
 177                     md_block.pop('last', None)
 178                     md_blocks.append(md_block)
 179                     md_block = {'type': "li",
 180                                 'ordered': ordered,
 181                                 'indentation': indentation,
 182                                 'marker': marker,
 183                                 'last': 1,
 184                                 'lines': [re.sub(r'^[ ]{0,4}', '', marker_match.group(3))],
 185                                 }
 186                 continue
 187
 188             if 'interrupted' in md_block:
 189                 if first_char == " ":
 190                     md_block["lines"].append('')
 191                     line = re.sub(r'^[ ]{0,4}', '', line)
 192                     md_block["lines"].append(line)
 193                     md_block.pop("interrupted", None)
 194                     continue
 195             else:
 196                 line = re.sub(r'^[ ]{0,4}', '', line)
 197                 md_block["lines"].append(line)
 198                 continue
 199
 200         # indentation sensitive types
 201         heading_match = re.search(r'^([#]{1,2})[ \t]+(.+?)[ \t]*[#]*[ \t]*(?:{#([^}]+)})?[ \t]*$', line)
 202         code_match = re.search(r'^[ \t]*\|\[[ ]*(?:<!-- language="([^"]+?)" -->)?', line)
 203         if heading_match:
 204             # atx heading (#)
 205             md_blocks.append(md_block)
 206             md_block = {'type': "heading",
 207                         'text': heading_match.group(2),
 208                         'lines': [],
 209                         'level': len(heading_match.group(1)),
 210                         }
 211             if heading_match.group(3):
 212                 md_block['id'] = heading_match.group(3)
 213             continue
 214         elif re.search(r'^={4,}[ \t]*$', line):
 215             # setext heading (====)
 216
 217             if md_block["type"] == "paragraph" and "interrupted" in md_block:
 218                 md_blocks.append(md_block.copy())
 219                 md_block["type"] = "heading"
 220                 md_block["lines"] = []
 221                 md_block["level"] = 1
 222             continue
 223         elif re.search(r'^-{4,}[ \t]*$', line):
 224             # setext heading (-----)
 225
 226             if md_block["type"] == "paragraph" and "interrupted" in md_block:
 227                 md_blocks.append(md_block.copy())
 228                 md_block["type"] = "heading"
 229                 md_block["lines"] = []
 230                 md_block["level"] = 2
 231
 232             continue
 233         elif code_match:
 234             # code
 235             md_block["interrupted"] = 1
 236             md_blocks.append(md_block)
 237             md_block = {'type': "code",
 238                         'lines': [],
 239                         }
 240             if code_match.group(1):
 241                 md_block['language'] = code_match.group(1)
 242             continue
 243
 244         # indentation insensitive types
 245         markup_match = re.search(r'^[ ]*<\??(\w+)[^>]*([\/\?])?[ \t]*>', line)
 246         li_match = re.search(r'^([ ]*)[*+-][ ](.*)', line)
 247         quote_match = re.search(r'^[ ]*>[ ]?(.*)', line)
 248         if re.search(r'^[ ]*<!DOCTYPE/', line):
 249             md_blocks.append(md_block)
 250             md_block = {'type': "markup",
 251                         'text': deindented_line,
 252                         'start': '<',
 253                         'end': '>',
 254                         'depth': 0,
 255                         }
 256
 257         elif markup_match:
 258             # markup, including <?xml version="1.0"?>
 259             tag = markup_match.group(1)
 260             is_self_closing = markup_match.group(2) is not None
 261
 262             # skip link markdown
 263             # TODO(ensonic): consider adding more uri schemes (ftp, ...)
 264             if re.search(r'https?', tag):
 265                 logging.info("skipping link '%s'", tag)
 266             else:
 267                 # for TEXT_LEVEL_ELEMENTS, we want to keep them as-is in the paragraph
 268                 # instead of creation a markdown block.
 269                 scanning_for_end_of_text_level_tag = (
 270                     md_block["type"] == "paragraph" and
 271                     'start' in md_block and
 272                     'closed' not in md_block)
 273                 logging.info("markup found '%s', scanning %s ?", tag, scanning_for_end_of_text_level_tag)
 274                 if tag not in MD_TEXT_LEVEL_ELEMENTS and not scanning_for_end_of_text_level_tag:
 275                     md_blocks.append(md_block)
 276
 277                     if is_self_closing:
 278                         logging.info("self-closing docbook '%s'", tag)
 279                         md_block = {'type': "self-closing tag",
 280                                     'text': deindented_line,
 281                                     }
 282                         is_self_closing = 0
 283                         continue
 284
 285                     logging.info("new markup '%s'", tag)
 286                     md_block = {'type': "markup",
 287                                 'text': deindented_line,
 288                                 'start': '<' + tag + '>',
 289                                 'end': '</' + tag + '>',
 290                                 'depth': 0,
 291                                 }
 292                     if re.search(r'<\/%s>' % tag, deindented_line):
 293                         md_block["closed"] = 1
 294
 295                     continue
 296                 else:
 297                     if tag in MD_TEXT_LEVEL_ELEMENTS:
 298                         logging.info("text level docbook '%s' in '%s' state", tag, md_block["type"])
 299                         # TODO(ensonic): handle nesting
 300                         if not scanning_for_end_of_text_level_tag:
 301                             if not re.search(r'<\/%s>' % tag, deindented_line):
 302                                 logging.info("new text level markup '%s'", tag)
 303                                 md_block["start"] = '<' + tag + '>'
 304                                 md_block["end"] = '</' + tag + '>'
 305                                 md_block.pop("closed", None)
 306                                 logging.info("scanning for end of '%s'", tag)
 307
 308                         else:
 309                             if md_block["end"] in deindented_line:
 310                                 md_block["closed"] = 1
 311                                 logging.info("found end of '%s'", tag)
 312         elif li_match:
 313             # li
 314             md_blocks.append(md_block)
 315             indentation = li_match.group(1)
 316             md_block = {'type': "li",
 317                         'ordered': 0,
 318                         'indentation': indentation,
 319                         'marker': "[*+-]",
 320                         'first': 1,
 321                         'last': 1,
 322                         'lines': [re.sub(r'^[ ]{0,4}', '', li_match.group(2))],
 323                         }
 324             continue
 325         elif quote_match:
 326             md_blocks.append(md_block)
 327             md_block = {'type': "quote",
 328                         'lines': [quote_match.group(1)],
 329                         }
 330             continue
 331
 332         # list item
 333         list_item_match = re.search(r'^([ ]{0,4})\d+[.][ ]+(.*)', line)
 334         if list_item_match:
 335             md_blocks.append(md_block)
 336             indentation = list_item_match.group(1)
 337             md_block = {'type': "li",
 338                         'ordered': 1,
 339                         'indentation': indentation,
 340                         'marker': "\\d+[.]",
 341                         'first': 1,
 342                         'last': 1,
 343                         'lines': [re.sub(r'^[ ]{0,4}', '', list_item_match.group(2))],
 344                         }
 345             continue
 346
 347         # paragraph
 348         if md_block["type"] == "paragraph":
 349             if "interrupted" in md_block:
 350                 md_blocks.append(md_block)
 351                 md_block = {'type': "paragraph",
 352                             'text': line,
 353                             }
 354                 logging.info("new paragraph due to interrupted")
 355             else:
 356                 md_block["text"] += "\n" + line
 357                 logging.info("add to paragraph: '%s'", line)
 358
 359         else:
 360             md_blocks.append(md_block)
 361             md_block = {'type': "paragraph",
 362                         'text': line,
 363                         }
 364             logging.info("new paragraph due to different block type")
 365
 366     md_blocks.append(md_block)
 367     md_blocks.pop(0)
 368
 369     return md_blocks
 370
 371
 372 def MarkDownParseSpanElementsInner(text, markersref):
 373     markup = ''
 374     markers = {i: 1 for i in markersref}
 375
 376     while text != '':
 377         closest_marker = ''
 378         closest_marker_position = -1
 379         text_marker = ''
 380         offset = 0
 381         markers_rest = []
 382
 383         for marker, use in markers.items():
 384             if not use:
 385                 continue
 386
 387             marker_position = text.find(marker)
 388
 389             if marker_position < 0:
 390                 markers[marker] = 0
 391                 continue
 392
 393             if closest_marker == '' or marker_position < closest_marker_position:
 394                 closest_marker = marker
 395                 closest_marker_position = marker_position
 396
 397         if closest_marker_position >= 0:
 398             text_marker = text[closest_marker_position:]
 399
 400         if text_marker == '':
 401             markup += text
 402             text = ''
 403             continue
 404
 405         markup += text[:closest_marker_position]
 406         text = text[closest_marker_position:]
 407         markers_rest = {k: v for k, v in markers.items() if v and k != closest_marker}
 408
 409         if closest_marker == '![' or closest_marker == '[':
 410             # 'id-ref' : local id reference
 411             # 'title'  : link short description/alt-text/tooltip
 412             # 'a'      : linked text
 413             # 'href'   : external link
 414             # 'is-media': is link to media object
 415             element = None
 416
 417             # FIXME: '(?R)' is a recursive subpattern
 418             # match a [...] block with no ][ inside or this thing again
 419             # m = re.search(r'\[((?:[^][]|(?R))*)\]', text)
 420             m = re.search(r'\[((?:[^][])*)\]', text)
 421             if ']' in text and m:
 422                 element = {'is-media': text[0] == '!',
 423                            'a': EscapeEntities(m.group(1)),
 424                            }
 425
 426                 offset = len(m.group(0))
 427                 if element['is-media']:
 428                     offset += 1
 429                 logging.debug("Recursive md-expr match: off=%d, text='%s', match='%s'", offset, text, m.group(1))
 430
 431                 remaining_text = text[offset:]
 432                 # (link "alt-text")
 433                 m2 = re.search(r'''^\([ ]*([^)'"]*?)(?:[ ]+['"](.+?)['"])?[ ]*\)''', remaining_text)
 434                 # [id-reference]
 435                 m3 = re.search(r'^\s*\[([^\]<]*?)\]', remaining_text)
 436                 if m2:
 437                     element['href'] = m2.group(1)
 438                     if m2.group(2):
 439                         element['title'] = m2.group(2)
 440                     offset += len(m2.group(0))
 441                 elif m3:
 442                     element['id-ref'] = m3.group(1)
 443                     offset += len(m3.group(0))
 444                 else:
 445                     element = None
 446
 447             if element:
 448                 logging.debug("output link for", element)
 449
 450                 if 'href' in element:
 451                     element['href'] = EscapeEntities(element['href'])
 452
 453                 if element['is-media']:
 454                     # media link
 455                     markup += '<inlinemediaobject><imageobject><imagedata fileref="' + \
 456                         element['href'] + '"></imagedata></imageobject>'
 457
 458                     if 'a' in element:
 459                         markup += "<textobject><phrase>" + element['a'] + "</phrase></textobject>"
 460
 461                     markup += "</inlinemediaobject>"
 462                 elif 'id-ref' in element:
 463                     # internal link
 464                     element['a'] = MarkDownParseSpanElementsInner(element['a'], markers_rest)
 465                     markup += '<link linkend="' + element['id-ref'] + '"'
 466
 467                     if 'title' in element:
 468                         # title attribute not supported
 469                         pass
 470
 471                     markup += '>' + element['a'] + "</link>"
 472                 else:
 473                     # external link
 474                     element['a'] = MarkDownParseSpanElementsInner(element['a'], markers_rest)
 475                     markup += '<ulink url="' + element['href'] + '"'
 476
 477                     if 'title' in element:
 478                         # title attribute not supported
 479                         pass
 480
 481                     markup += '>' + element['a'] + "</ulink>"
 482
 483             else:
 484                 markup += closest_marker
 485                 if closest_marker == '![':
 486                     offset = 2
 487                 else:
 488                     offset = 1
 489
 490         elif closest_marker == '<':
 491             m4 = re.search(r'^<(https?:[\/]{2}[^\s]+?)>', text, flags=re.I)
 492             m5 = re.search(r'^<([A-Za-z0-9._-]+?@[A-Za-z0-9._-]+?)>', text)
 493             m6 = re.search(r'^<[^>]+?>', text)
 494             if m4:
 495                 element_url = EscapeEntities(m4.group(1))
 496
 497                 markup += '<ulink url="' + element_url + '">' + element_url + '</ulink>'
 498                 offset = len(m4.group(0))
 499             elif m5:
 500                 markup += "<ulink url=\"mailto:" + m5.group(1) + "\">" + m5.group(1) + "</ulink>"
 501                 offset = len(m5.group(0))
 502             elif m6:
 503                 markup += m6.group(0)
 504                 offset = len(m6.group(0))
 505             else:
 506                 markup += "&lt;"
 507                 offset = 1
 508
 509         elif closest_marker == "\\":
 510             special_char = ''
 511             if len(text) > 1:
 512                 special_char = text[1]
 513             if special_char in MD_ESCAPABLE_CHARS or special_char in MD_GTK_ESCAPABLE_CHARS:
 514                 markup += special_char
 515                 offset = 2
 516             else:
 517                 markup += "\\"
 518                 offset = 1
 519
 520         elif closest_marker == "`":
 521             m7 = re.search(r'^(`+)([^`]+?)\1(?!`)', text)
 522             if m7:
 523                 element_text = EscapeEntities(m7.group(2))
 524                 markup += "<literal>" + element_text + "</literal>"
 525                 offset = len(m7.group(0))
 526             else:
 527                 markup += "`"
 528                 offset = 1
 529
 530         elif closest_marker == "@":
 531             # Convert '@param()'
 532             # FIXME: we could make those also links ($symbol.$2), but that would be less
 533             # useful as the link target is a few lines up or down
 534             m7 = re.search(r'^(\A|[^\\])\@(\w+((\.|->)\w+)*)\s*\(\)', text)
 535             m8 = re.search(r'^(\A|[^\\])\@(\w+((\.|->)\w+)*)', text)
 536             m9 = re.search(r'^\\\@', text)
 537             if m7:
 538                 markup += m7.group(1) + "<parameter>" + m7.group(2) + "()</parameter>\n"
 539                 offset = len(m7.group(0))
 540             elif m8:
 541                 # Convert '@param', but not '\@param'.
 542                 markup += m8.group(1) + "<parameter>" + m8.group(2) + "</parameter>\n"
 543                 offset = len(m8.group(0))
 544             elif m9:
 545                 markup += r"\@"
 546                 offset = len(m9.group(0))
 547             else:
 548                 markup += "@"
 549                 offset = 1
 550
 551         elif closest_marker == '#':
 552             m10 = re.search(r'^(\A|[^\\])#([\w\-:\.]+[\w]+)\s*\(\)', text)
 553             m11 = re.search(r'^(\A|[^\\])#([\w\-:\.]+[\w]+)', text)
 554             m12 = re.search(r'^\\#', text)
 555             if m10:
 556                 # handle #Object.func()
 557                 markup += m10.group(1) + MakeXRef(m10.group(2), tagify(m10.group(2) + "()", "function"))
 558                 offset = len(m10.group(0))
 559             elif m11:
 560                 # Convert '#symbol', but not '\#symbol'.
 561                 markup += m11.group(1) + MakeHashXRef(m11.group(2), "type")
 562                 offset = len(m11.group(0))
 563             elif m12:
 564                 markup += '#'
 565                 offset = len(m12.group(0))
 566             else:
 567                 markup += '#'
 568                 offset = 1
 569
 570         elif closest_marker == "%":
 571             m12 = re.search(r'^(\A|[^\\])\%(-?\w+)', text)
 572             m13 = re.search(r'^\\%', text)
 573             if m12:
 574                 # Convert '%constant', but not '\%constant'.
 575                 # Also allow negative numbers, e.g. %-1.
 576                 markup += m12.group(1) + MakeXRef(m12.group(2), tagify(m12.group(2), "literal"))
 577                 offset = len(m12.group(0))
 578             elif m13:
 579                 markup += r"\%"
 580                 offset = len(m13.group(0))
 581             else:
 582                 markup += "%"
 583                 offset = 1
 584
 585         if offset > 0:
 586             text = text[offset:]
 587
 588     return markup
 589
 590
 591 def MarkDownParseSpanElements(text):
 592     markers = ["\\", '<', '![', '[', "`", '%', '#', '@']
 593
 594     text = MarkDownParseSpanElementsInner(text, markers)
 595
 596     # Convert 'function()' or 'macro()'.
 597     # if there is abc_*_def() we don't want to make a link to _def()
 598     # FIXME: also handle abc(def(....)) : but that would need to be done recursively :/
 599     def f(m):
 600         return m.group(1) + MakeXRef(m.group(2), tagify(m.group(2) + "()", "function"))
 601     text = re.sub(r'([^\*.\w])(\w+)\s*\(\)', f, text)
 602     return text
 603
 604
 605 def EscapeEntities(text):
 606     return text.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 607
 608
 609 def ReplaceEntities(text):
 610     entities = [["&lt;", '<'],
 611                 ["&gt;", '>'],
 612                 ["&ast;", '*'],
 613                 ["&num;", '#'],
 614                 ["&percnt;", '%'],
 615                 ["&colon;", ':'],
 616                 ["&quot;", '"'],
 617                 ["&apos;", "'"],
 618                 ["&nbsp;", ' '],
 619                 ["&amp;", '&'],  # Do this last, or the others get messed up.
 620                 ]
 621
 622     for i in entities:
 623         text = re.sub(i[0], i[1], text)
 624     return text
 625
 626
 627 def MarkDownOutputDocBook(blocksref, symbol, context):
 628     output = ''
 629     blocks = blocksref
 630
 631     for block in blocks:
 632         # $output += "\n<!-- beg type='" . $block->{"type"} . "'-->\n"
 633
 634         if block["type"] == "paragraph":
 635             text = MarkDownParseSpanElements(block["text"])
 636             if context == "li" and output == '':
 637                 if 'interrupted' in block:
 638                     output += "\n<para>%s</para>\n" % text
 639                 else:
 640                     output += "<para>%s</para>" % text
 641                     if len(blocks) > 1:
 642                         output += "\n"
 643             else:
 644                 output += "<para>%s</para>\n" % text
 645
 646         elif block["type"] == "heading":
 647
 648             title = MarkDownParseSpanElements(block["text"])
 649
 650             if block["level"] == 1:
 651                 tag = "refsect2"
 652             else:
 653                 tag = "refsect3"
 654
 655             text = MarkDownParseLines(block["lines"], symbol, "heading")
 656             if 'id' in block:
 657                 output += "<%s id=\"%s\">" % (tag, block["id"])
 658             else:
 659                 output += "<%s>" % tag
 660
 661             output += "<title>%s</title>%s</%s>\n" % (title, text, tag)
 662         elif block["type"] == "li":
 663             tag = "itemizedlist"
 664
 665             if "first" in block:
 666                 if block["ordered"]:
 667                     tag = "orderedlist"
 668                 output += "<%s>\n" % tag
 669
 670             if "interrupted" in block:
 671                 block["lines"].append('')
 672
 673             text = MarkDownParseLines(block["lines"], symbol, "li")
 674             output += "<listitem>" + text + "</listitem>\n"
 675             if 'last' in block:
 676                 if block["ordered"]:
 677                     tag = "orderedlist"
 678                 output += "</%s>\n" % tag
 679
 680         elif block["type"] == "quote":
 681             text = MarkDownParseLines(block["lines"], symbol, "quote")
 682             output += "<blockquote>\n%s</blockquote>\n" % text
 683         elif block["type"] == "code":
 684             tag = "programlisting"
 685
 686             if "language" in block:
 687                 if block["language"] == "plain":
 688                     output += "<informalexample><screen><![CDATA[\n"
 689                     tag = "screen"
 690                 else:
 691                     output += "<informalexample><programlisting role=\"example\" language=\"%s\"><![CDATA[\n" % block['language']
 692             else:
 693                 output += "<informalexample><programlisting role=\"example\"><![CDATA[\n"
 694
 695             logging.debug('listing for %s: [%s]', symbol, '\n'.join(block['lines']))
 696             for line in block["lines"]:
 697                 output += ReplaceEntities(line) + "\n"
 698
 699             output += "]]></%s></informalexample>\n" % tag
 700         elif block["type"] == "markup":
 701             text = ExpandAbbreviations(symbol, block["text"])
 702             output += text + "\n"
 703         else:
 704             output += block["text"] + "\n"
 705
 706         # $output += "\n<!-- end type='" . $block->{"type"} . "'-->\n"
 707     return output
 708
 709
 710 def MarkDownParseLines(lines, symbol, context):
 711     logging.info('md parse: ctx=%s, [%s]', context, '\n'.join(lines))
 712     blocks = MarkDownParseBlocks(lines, symbol, context)
 713     output = MarkDownOutputDocBook(blocks, symbol, context)
 714     return output
 715
 716
 717 def MarkDownParse(text, symbol):
 718     """Converts mark down syntax to the respective docbook.
 719
 720     http://de.wikipedia.org/wiki/Markdown
 721     Inspired by the design of ParseDown
 722     http://parsedown.org/
 723     Copyright (c) 2013 Emanuil Rusev, erusev.com
 724
 725     SUPPORTED MARKDOWN
 726     ==================
 727
 728     Atx-style Headers
 729     -----------------
 730
 731     # Header 1
 732
 733     ## Header 2 ##
 734
 735     Setext-style Headers
 736     --------------------
 737
 738     Header 1
 739     ========
 740
 741     Header 2
 742     --------
 743
 744     Ordered (unnested) Lists
 745     ------------------------
 746
 747     1. item 1
 748
 749     1. item 2 with loooong
 750        description
 751
 752     3. item 3
 753
 754     Note: we require a blank line above the list items
 755     """
 756     # TODO(ensonic): it would be nice to add id parameters to the refsect2 elements
 757
 758     return MarkDownParseLines(text.splitlines(), symbol, '')