doc/tools/asciidoc/asciidoc.py

   1 #!/usr/bin/env python
   2 '''
   3 asciidoc -  converts an AsciiDoc text file to DocBook, HTML or LinuxDoc
   4
   5 Copyright (C) 2002-2005 Stuart Rackham. Free use of this software is granted
   6 under the terms of the GNU General Public License (GPL).
   7 '''
   8
   9 import sys, os, re, string, time, traceback, tempfile, popen2, codecs
  10 from types import *
  11
  12 VERSION = '7.1.2'       # See CHANGLOG file for version history.
  13
  14 #---------------------------------------------------------------------------
  15 # Program onstants.
  16 #---------------------------------------------------------------------------
  17 DEFAULT_BACKEND = 'xhtml11'
  18 DEFAULT_DOCTYPE = 'article'
  19 # Allowed substitution options for List, Paragraph and DelimitedBlock
  20 # definition subs entry.
  21 SUBS_OPTIONS = ('specialcharacters','quotes','specialwords','replacements',
  22     'attributes','macros','callouts','normal','verbatim','none','passthroughs')
  23 # Default value for unspecified subs and presubs configuration file entries.
  24 SUBS_NORMAL = ('specialcharacters','quotes','specialwords','replacements',
  25     'attributes','macros','passthroughs')
  26 SUBS_VERBATIM = ('specialcharacters','callouts')
  27
  28 NAME_RE = r'(?u)[^\W\d][-\w]*'  # Valid section or attrbibute name.
  29
  30
  31 #---------------------------------------------------------------------------
  32 # Utility functions and classes.
  33 #---------------------------------------------------------------------------
  34
  35 class EAsciiDoc(Exception):
  36     pass
  37
  38
  39 from UserDict import UserDict
  40
  41 class OrderedDict(UserDict):
  42     '''Python Cookbook: Ordered Dictionary, Submitter: David Benjamin'''
  43     def __init__(self, d = None):
  44         self._keys = []
  45         UserDict.__init__(self, d)
  46     def __delitem__(self, key):
  47         UserDict.__delitem__(self, key)
  48         self._keys.remove(key)
  49     def __setitem__(self, key, item):
  50         UserDict.__setitem__(self, key, item)
  51         if key not in self._keys: self._keys.append(key)
  52     def clear(self):
  53         UserDict.clear(self)
  54         self._keys = []
  55     def copy(self):
  56         d = UserDict.copy(self)
  57         d._keys = self._keys[:]
  58         return d
  59     def items(self):
  60         return zip(self._keys, self.values())
  61     def keys(self):
  62         return self._keys
  63     def popitem(self):
  64         try:
  65             key = self._keys[-1]
  66         except IndexError:
  67             raise KeyError('dictionary is empty')
  68         val = self[key]
  69         del self[key]
  70         return (key, val)
  71     def setdefault(self, key, failobj = None):
  72         UserDict.setdefault(self, key, failobj)
  73         if key not in self._keys: self._keys.append(key)
  74     def update(self, d):
  75         UserDict.update(self, d)
  76         for key in d.keys():
  77             if key not in self._keys: self._keys.append(key)
  78     def values(self):
  79         return map(self.get, self._keys)
  80
  81 def print_stderr(line):
  82     sys.stderr.write(line+os.linesep)
  83
  84 def verbose(msg,linenos=True):
  85     '''-v option messages.'''
  86     if config.verbose:
  87         console(msg,linenos=linenos)
  88
  89 def warning(msg,linenos=True):
  90     console(msg,'WARNING: ',linenos)
  91
  92 def deprecated(old, new, linenos=True):
  93     console("%s: %s" % (old,new), 'DEPRECATED: ', linenos)
  94
  95 def error(msg):
  96     '''Report fatal error but don't exit application, continue in the hope of
  97     reporting all fatal errors finishing with a non-zero exit code.'''
  98     console(msg,'ERROR: ')
  99     document.has_errors = True
 100
 101 def console(msg, prefix='', linenos=True):
 102     '''Print message to stderr. 'offset' is added to reported line number for
 103     warnings emitted when reading ahead.'''
 104     s = prefix
 105     if linenos and reader.cursor:
 106         s = s + "%s: line %d: " \
 107             % (os.path.basename(reader.cursor[0]),reader.cursor[1])
 108     s = s + msg
 109     print_stderr(s)
 110
 111 def file_in(fname,dir):
 112     '''Return True if file fname resides inside directory dir.'''
 113     assert os.path.isfile(fname)
 114     assert os.path.isdir(dir)
 115     dir = os.path.abspath(dir)
 116     fname = realpath(fname)
 117     return os.path.commonprefix((dir,fname)) == dir
 118
 119 def safe():
 120     return document.safe
 121
 122 def is_safe_file(fname, dir=None):
 123     # A safe file must reside in directory dir (defaults to the source file
 124     # directory).
 125     if dir is None:
 126         dir = os.path.dirname(document.infile)
 127     return not safe() or file_in(fname,dir)
 128
 129 # Return file name which must reside in the parent file directory.
 130 # Return None if file is not found or not safe.
 131 def safe_filename(fname, parentdir):
 132     if not os.path.isabs(fname):
 133         # Include files are relative to parent document
 134         # directory.
 135         fname = os.path.join(parentdir,fname)
 136     if not os.path.isfile(fname):
 137         warning('include file not found: %s' % fname)
 138         return None
 139     if not is_safe_file(fname, parentdir):
 140         unsafe_error('include file: %s' % fname)
 141         return None
 142     return fname
 143
 144 def unsafe_error(msg):
 145     error('unsafe: '+msg)
 146
 147 def realpath(fname):
 148     '''Return the absolute pathname of the file fname. Follow symbolic links.
 149     os.realpath() not available in Python prior to 2.2 and not portable.'''
 150     # Follow symlinks to the actual executable.
 151     wd = os.getcwd()
 152     try:
 153         while os.path.islink(fname):
 154             linkdir = os.path.dirname(fname)
 155             fname = os.readlink(fname)
 156             if linkdir: os.chdir(linkdir)   # Symlinks can be relative.
 157         fname = os.path.abspath(fname)
 158     finally:
 159         os.chdir(wd)
 160     return os.path.normpath(fname)
 161
 162 def syseval(cmd):
 163     # Run shell command and return stdout.
 164     child = os.popen(cmd)
 165     data = child.read()
 166     err = child.close()
 167     if not err:
 168         return data
 169     else:
 170         return ''
 171
 172 def assign(dst,src):
 173     '''Assign all attributes from 'src' object to 'dst' object.'''
 174     for a,v in src.__dict__.items():
 175         setattr(dst,a,v)
 176
 177 def strip_quotes(s):
 178     '''Trim white space and, if necessary, quote characters from s.'''
 179     s = string.strip(s)
 180     # Strip quotation mark characters from quoted strings.
 181     if len(s) >= 3 and s[0] == '"' and s[-1] == '"':
 182         s = s[1:-1]
 183     return s
 184
 185 def is_regexp(s):
 186     '''Return True if s is a valid regular expression else return False.'''
 187     try: re.compile(s)
 188     except: return False
 189     else: return True
 190
 191 def join_regexp(relist):
 192     '''Join list of regular expressions re1,re2,... to single regular
 193     expression (re1)|(re2)|...'''
 194     if len(relist) == 0:
 195         return None
 196     result = []
 197     # Delete named groups to avoid ambiguity.
 198     for s in relist:
 199         result.append(re.sub(r'\?P<\S+?>','',s))
 200     result = string.join(result,')|(')
 201     result = '('+result+')'
 202     return result
 203
 204 def validate(value,rule,errmsg):
 205     '''Validate value against rule expression. Throw EAsciiDoc exception with
 206     errmsg if validation fails.'''
 207     try:
 208         if not eval(string.replace(rule,'$',str(value))):
 209             raise EAsciiDoc,errmsg
 210     except:
 211         raise EAsciiDoc,errmsg
 212     return value
 213
 214 def join_lines(lines):
 215     '''Return a list in which lines terminated with the backslash line
 216     continuation character are joined.'''
 217     result = []
 218     s = ''
 219     continuation = False
 220     for line in lines:
 221         if line and line[-1] == '\\':
 222             s = s + line[:-1]
 223             continuation = True
 224             continue
 225         if continuation:
 226             result.append(s+line)
 227             s = ''
 228             continuation = False
 229         else:
 230             result.append(line)
 231     if continuation:
 232         result.append(s)
 233     return result
 234
 235 def dovetail(lines1, lines2):
 236     '''Append list or tuple of strings 'lines2' to list 'lines1'.  Join the
 237     last string in 'lines1' with the first string in 'lines2' into a single
 238     string.'''
 239     assert isinstance(lines1,list) or isinstance(lines1,tuple)
 240     assert isinstance(lines2,list) or isinstance(lines2,tuple)
 241     if not lines1 or not lines2:
 242         return list(lines1) + list(lines2)
 243     result = list(lines1[:-1])
 244     result.append(lines1[-1] + lines2[0])
 245     result += list(lines2[1:])
 246     return result
 247
 248 def dovetail_tags(stag,content,etag):
 249     '''Merge the end tag with the first content line and the last
 250     content line with the end tag. This ensures verbatim elements don't
 251     include extraneous opening and closing line breaks.'''
 252     return dovetail(dovetail(stag,content), etag)
 253
 254 def parse_attributes(attrs,dict):
 255     '''Update a dictionary with name/value attributes from the attrs string.
 256     The attrs string is a comma separated list of values and keyword name=value
 257     pairs. Values must preceed keywords and are named '1','2'... The entire
 258     attributes list is named '0'. If keywords are specified string values must
 259     be quoted. Examples:
 260
 261     attrs: ''
 262     dict: {}
 263
 264     attrs: 'hello,world'
 265     dict: {'2': 'world', '0': 'hello,world', '1': 'hello'}
 266
 267     attrs: '"hello",planet="earth"'
 268     dict: {'planet': 'earth', '0': '"hello",planet="earth"', '1': 'hello'}
 269     '''
 270     def f(*args,**keywords):
 271         # Name and add aguments '1','2'... to keywords.
 272         for i in range(len(args)):
 273             if not keywords.has_key(str(i+1)):
 274                 keywords[str(i+1)] = args[i]
 275         return keywords
 276
 277     if not attrs:
 278         return
 279     dict['0'] = attrs
 280     # Replace line separators with spaces so line spanning works.
 281     s = re.sub(r'\s', ' ', attrs)
 282     try:
 283         d = eval('f('+s+')')
 284         dict.update(d)
 285     except:
 286         # Try quoting the attrs.
 287         s = string.replace(s,'"',r'\"') # Escape double-quotes.
 288         s = string.split(s,',')
 289         s = map(lambda x: '"'+string.strip(x)+'"',s)
 290         s = string.join(s,',')
 291         try:
 292             d = eval('f('+s+')')
 293         except:
 294             return  # If there's a syntax error leave with {0}=attrs.
 295         for k in d.keys():  # Drop any empty positional arguments.
 296             if d[k] == '': del d[k]
 297         dict.update(d)
 298     assert len(d) > 0
 299
 300 def parse_named_attributes(s,attrs):
 301     '''Update a attrs dictionary with name="value" attributes from the s string.
 302     Returns False if invalid syntax.
 303     Example:
 304     attrs: 'star="sun",planet="earth"'
 305     dict: {'planet':'earth', 'star':'sun'}
 306     '''
 307     def f(**keywords): return keywords
 308
 309     try:
 310         d = eval('f('+s+')')
 311         attrs.update(d)
 312         return True
 313     except:
 314         return False
 315
 316 def parse_list(s):
 317     '''Parse comma separated string of Python literals. Return a tuple of of
 318     parsed values.'''
 319     try:
 320         result = eval('tuple(['+s+'])')
 321     except:
 322         raise EAsciiDoc,'malformed list: '+s
 323     return result
 324
 325 def parse_options(options,allowed,errmsg):
 326     '''Parse comma separated string of unquoted option names and return as a
 327     tuple of valid options. 'allowed' is a list of allowed option values.
 328     If allowed=() then all legitimate names are allowed.
 329     'errmsg' isan error message prefix if an illegal option error is thrown.'''
 330     result = []
 331     if options:
 332         for s in re.split(r'\s*,\s*',options):
 333             if (allowed and s not in allowed) or (s == '' or not is_name(s)):
 334                 raise EAsciiDoc,'%s: %s' % (errmsg,s)
 335             result.append(s)
 336     return tuple(result)
 337
 338 def symbolize(s):
 339     '''Drop non-symbol characters and convert to lowercase.'''
 340     return string.lower(re.sub(r'(?u)[^\w\-_]', '', s))
 341
 342 def is_name(s):
 343     '''Return True if s is valid attribute, macro or tag name
 344     (starts with alpha containing alphanumeric and dashes only).'''
 345     return re.match(NAME_RE,s) is not None
 346
 347 def subs_quotes(text):
 348     '''Quoted text is marked up and the resulting text is
 349     returned.'''
 350     quotes = config.quotes.keys()
 351     # The quotes are iterated in reverse sort order to avoid ambiguity,
 352     # for example, '' is processed before '.
 353     quotes.sort()
 354     quotes.reverse()
 355     for quote in quotes:
 356         i = string.find(quote,'|')
 357         if i != -1 and quote != '|' and quote != '||':
 358             lq = quote[:i]
 359             rq = quote[i+1:]
 360         else:
 361             lq = rq = quote
 362         # Non-word (\W) characters are allowed at boundaries to accomodate
 363         # enveloping quotes. re.S flag for line spanning.
 364         reo = re.compile(r'(?msu)(^|\W)(\[(?P<attrs>[^[]+?)\])?'\
 365             + r'(?:' + re.escape(lq) + r')' \
 366             + r'(?P<content>.*?)(?:'+re.escape(rq)+r')(?=\W|$)')
 367         pos = 0
 368         while True:
 369             mo = reo.search(text,pos)
 370             if not mo: break
 371             if text[mo.start()] == '\\':
 372                 pos = mo.end()
 373             else:
 374                 attrs = {}
 375                 parse_attributes(mo.group('attrs'), attrs)
 376                 stag,etag = config.tag(config.quotes[quote], attrs)
 377                 s = mo.group(1) + stag + mo.group('content') + etag
 378                 text = text[:mo.start()] + s + text[mo.end():]
 379                 pos = mo.start() + len(s)
 380         # Unescape escaped quotes.
 381         text = string.replace(text,'\\'+lq, lq)
 382         if lq != rq:
 383             text = string.replace(text,'\\'+rq, rq)
 384     return text
 385
 386 def subs_tag(tag,dict={}):
 387     '''Perform attribute substitution and split tag string returning start, end
 388     tag tuple (c.f. Config.tag()).'''
 389     s = subs_attrs(tag,dict)
 390     if not s:
 391         warning('tag "%s" dropped: contains undefined attribute' % tag)
 392         return [None,None]
 393     result = string.split(s,'|')
 394     if len(result) == 1:
 395         return result+[None]
 396     elif len(result) == 2:
 397         return result
 398     else:
 399         raise EAsciiDoc,'malformed tag: %s' % tag
 400
 401 def parse_entry(entry, dict=None, unquote=False, unique_values=False,
 402         allow_name_only=False):
 403     '''Parse name=value entry to dictionary 'dict'. Return tuple (name,value)
 404     or None if illegal entry.
 405     If the syntax is name= then value is set to ''.
 406     If the syntax is name and allow_name_only=True then value is set to ''.
 407     If the syntax is name! and allow_name_only=True then value is set to None.
 408     Leading and trailing white space is striped from 'name' and 'value'.
 409     'name' can contain any printable characters. If 'name includes the equals
 410     '=' character it must be escaped with a backslash.
 411     If 'unquote' is True leading and trailing double-quotes are stripped from
 412     'name' and 'value'.
 413     If unique_values' is True then dictionary entries with the same value are
 414     removed before the parsed entry is added.'''
 415     mo=re.search(r'[^\\](=)',entry)
 416     if mo:  # name=value entry.
 417         name = entry[:mo.start(1)]
 418         value = entry[mo.end(1):]
 419     elif allow_name_only and entry:   # name or name! entry.
 420         name = entry
 421         if name[-1] == '!':
 422             name = name[:-1]
 423             value = None
 424         else:
 425             value = ''
 426     else:
 427         return None
 428     if unquote:
 429         name = strip_quotes(name)
 430         if value is not None:
 431             value = strip_quotes(value)
 432     else:
 433         name = string.strip(name)
 434         if value is not None:
 435             value = string.strip(value)
 436     if not name:
 437         return None
 438     if dict is not None:
 439         if unique_values:
 440             for k,v in dict.items():
 441                 if v == value: del dict[k]
 442         dict[name] = value
 443     return name,value
 444
 445 def parse_entries(entries, dict, unquote=False, unique_values=False,
 446         allow_name_only=False):
 447     '''Parse name=value entries from  from lines of text in 'entries' into
 448     dictionary 'dict'. Blank lines are skipped.'''
 449     for entry in entries:
 450         if entry and not parse_entry(entry, dict, unquote, unique_values,
 451                 allow_name_only):
 452             raise EAsciiDoc,'malformed section entry: %s' % entry
 453
 454 def load_sections(sections, fname, dir=None, namepat=NAME_RE):
 455     '''Loads sections dictionary with sections from file fname.
 456     Existing sections are overlaid. Silently skips missing configuration
 457     files.'''
 458     if dir:
 459         fname = os.path.join(dir, fname)
 460     # Sliently skip missing configuration file.
 461     if not os.path.isfile(fname):
 462         return
 463     lines = open(fname).readlines()
 464     reo = re.compile(r'^\[(?P<section>'+namepat+')\]\s*$')
 465     section,contents = '',[]
 466     for line in lines:
 467         if line and line[0] == '#': # Skip comment lines.
 468             continue
 469         line = string.rstrip(line)
 470         found = reo.findall(line)
 471         if found:
 472             if section:             # Store previous section.
 473                 sections[section] = contents
 474             section = found[0].lower()
 475             contents = []
 476         else:
 477             contents.append(line)
 478     if section and contents:        # Store last section.
 479         sections[section] = contents
 480
 481 def dump_section(name,dict,f=sys.stdout):
 482     '''Write parameters in 'dict' as in configuration file section format with
 483     section 'name'.'''
 484     f.write('[%s]%s' % (name,writer.newline))
 485     for k,v in dict.items():
 486         k = str(k)
 487         # Quote if necessary.
 488         if len(k) != len(string.strip(k)):
 489             k = '"'+k+'"'
 490         if v and len(v) != len(string.strip(v)):
 491             v = '"'+v+'"'
 492         if v is None:
 493             # Don't dump undefined attributes.
 494             continue
 495         else:
 496             s = k+'='+v
 497         if s[0] == '#':
 498             s = '\\' + s    # Escape so not treated as comment lines.
 499         f.write('%s%s' % (s,writer.newline))
 500     f.write(writer.newline)
 501
 502 def update_attrs(attrs,dict):
 503     '''Update 'attrs' dictionary with parsed attributes in dictionary 'dict'.'''
 504     for k,v in dict.items():
 505         if not is_name(k):
 506             raise EAsciiDoc,'illegal attribute name: %s' % k
 507         attrs[k] = v
 508
 509 def readlines(fname):
 510     '''Read lines from file named 'fname' and strip trailing white space.'''
 511     # Read file.
 512     f = open(fname)
 513     try:
 514         lines = f.readlines()
 515     finally:
 516         f.close()
 517     # Strip newlines.
 518     for i in range(len(lines)):
 519         lines[i] = string.rstrip(lines[i])
 520     return lines
 521
 522 def filter_lines(filter,lines,dict={}):
 523     '''Run 'lines' through the 'filter' shell command and return the result. The
 524     'dict' dictionary contains additional filter attributes.'''
 525     # BUG: Has problems finding filters with spaces in command name.
 526     if not filter:
 527         return lines
 528     if os.name != 'posix':
 529         warning('filters do not work in a non-posix environment')
 530         return lines
 531     # Perform attributes substitution on the filter command.
 532     s = subs_attrs(filter,dict)
 533     if not s:
 534         raise EAsciiDoc,'filter has undefined attribute: %s' % filter
 535     filter = s
 536     # Search for the filter command in  both user and application 'filters'
 537     # sub-directories.
 538     mo = re.match(r'^(?P<cmd>\S+)(?P<tail>.*)$', filter)
 539     if mo:
 540         cmd = mo.group('cmd')
 541         found = False
 542         if not os.path.dirname(cmd):
 543             # Check in asciidoc user and application directories for unqualified
 544             # file name.
 545             if USER_DIR:
 546                 cmd2 = os.path.join(USER_DIR,'filters',cmd)
 547                 if os.path.isfile(cmd2):
 548                     found = True
 549             if not found:
 550                 cmd2 = os.path.join(GLOBAL_CONFIG_DIR,'filters',cmd)
 551                 if os.path.isfile(cmd2):
 552                     found = True
 553             if not found:
 554                 cmd2 = os.path.join(APP_DIR,'filters',cmd)
 555                 if os.path.isfile(cmd2):
 556                     found = True
 557             if found:
 558                 filter = cmd2 + mo.group('tail')
 559         else:
 560             if os.uname()[0][:6] == 'CYGWIN':
 561                 # popen2() does not like non-drive letter path names under
 562                 # Cygwin.
 563                 s = string.strip(syseval('cygpath -m "'+cmd+'"'))
 564                 if s:
 565                     cmd = s
 566             if not os.path.isfile(cmd):
 567                 warning('filter not found: %s' % cmd)
 568                 return lines
 569     # Put the real filter path into the filter command.
 570     verbose('filtering: '+filter)
 571     try:
 572         import select
 573         result = []
 574         r,w = popen2.popen2(filter)
 575         # Polled I/O loop to alleviate full buffer deadlocks.
 576         i = 0
 577         while i < len(lines):
 578             line = lines[i]
 579             if select.select([],[w.fileno()],[],0)[1]:
 580                 w.write(line+os.linesep)    # Use platform line terminator.
 581                 i = i+1
 582             if select.select([r.fileno()],[],[],0)[0]:
 583                 s = r.readline()
 584                 if not s: break             # Exit if filter output closes.
 585                 result.append(string.rstrip(s))
 586         w.close()
 587         for s in r.readlines():
 588             result.append(string.rstrip(s))
 589         r.close()
 590     except:
 591         raise EAsciiDoc,'filter error: %s' % filter
 592     # There's no easy way to guage whether popen2() found and executed the
 593     # filter, so guess that if it produced no output there is probably a
 594     # problem.
 595     if lines and not result:
 596         warning('no output from filter: %s' % filter)
 597     return result
 598
 599 def system(name, args, is_macro=False):
 600     '''Evaluate a system attribute ({name:args}) or system block macro
 601     (name::[args]). If is_macro is True then we are processing a system
 602     block macro otherwise it's a system attribute.
 603     NOTE: The include1 attribute is used internally by the include1::[] macro
 604     and is not for public use.'''
 605     if is_macro:
 606         syntax = '%s::[%s]'
 607         separator = '\n'
 608     else:
 609         syntax = '{%s:%s}'
 610         separator = writer.newline
 611     if name not in ('eval','sys','sys2','include','include1'):
 612         msg = 'illegal '+syntax % (name,args)
 613         if is_macro:
 614             msg += ': macro name'
 615         else:
 616             msg += ': executable attribute name'
 617         warning(msg)
 618         return None
 619     if name != 'include1':
 620         verbose(('evaluating: '+syntax) % (name,args))
 621     if safe() and name not in ('include','include1'):
 622         unsafe_error(syntax % (name,args))
 623         return None
 624     result = None
 625     if name == 'eval':
 626         try:
 627             result = eval(args)
 628             if result is True:
 629                 result = ''
 630             elif result is False:
 631                 result = None
 632             elif result is not None:
 633                 result = str(result)
 634         except:
 635             warning((syntax+': expression evaluation error') % (name,args))
 636     elif name in ('sys','sys2'):
 637         result = ''
 638         fd,tmp = tempfile.mkstemp()
 639         os.close(fd)
 640         try:
 641             cmd = args
 642             cmd = cmd + (' > %s' % tmp)
 643             if name == 'sys2':
 644                 cmd = cmd + ' 2>&1'
 645             if os.system(cmd):
 646                 warning((syntax+': non-zero exit status') % (name,args))
 647             try:
 648                 if os.path.isfile(tmp):
 649                     lines = readlines(tmp)
 650                 else:
 651                     lines = []
 652             except:
 653                 raise EAsciiDoc,(syntax+': temp file read error') % (name,args)
 654             result = string.join(lines, separator)
 655         finally:
 656             if os.path.isfile(tmp):
 657                 os.remove(tmp)
 658     elif name == 'include':
 659         if not os.path.exists(args):
 660             warning((syntax+': file does not exist') % (name,args))
 661         elif not is_safe_file(args):
 662             unsafe_error(syntax % (name,args))
 663         else:
 664             result = readlines(args)
 665             if result:
 666                 result = subs_attrs(result)
 667                 result = string.join(result, separator)
 668                 result = string.expandtabs(result, reader.tabsize)
 669             else:
 670                 result = ''
 671     elif name == 'include1':
 672         result = string.join(config.include1[args], separator)
 673     else:
 674         assert False
 675     return result
 676
 677 def subs_attrs(lines,dict={}):
 678     '''Substitute 'lines' of text with attributes from the global
 679     document.attributes dictionary and from the 'dict' dictionary ('dict'
 680     entries take precedence). Return a tuple of the substituted lines.  'lines'
 681     containing undefined attributes are deleted. If 'lines' is a string then
 682     return a string.
 683
 684     - Attribute references are substituted in the following order: simple,
 685       conditional, system.
 686     - If a 'dict' value includes an attribute reference it won't be substituted
 687       unless it's substitution order follows that of the source attribute
 688       reference.
 689
 690     The moral is that any attribute references in 'dict' attribute values
 691     should be substituted beforehand.'''
 692
 693     def end_brace(text,start):
 694         '''Return index following end brace that matches brace at start in
 695         text.'''
 696         assert text[start] == '{'
 697         n = 0
 698         result = start
 699         for c in text[start:]:
 700             # Skip braces that are followed by a backslash.
 701             if result == len(text)-1 or text[result+1] != '\\':
 702                 if c == '{': n = n + 1
 703                 elif c == '}': n = n - 1
 704             result = result + 1
 705             if n == 0: break
 706         return result
 707
 708     if isinstance(lines,StringType):
 709         string_result = True
 710         lines = [lines]
 711     else:
 712         string_result = False
 713         lines = list(lines)
 714     attrs = document.attributes.copy()
 715     attrs.update(dict)
 716     # Substitute all attributes in all lines.
 717     for i in range(len(lines)-1,-1,-1): # Reverse iterate lines.
 718         text = lines[i]
 719         # Make it easier for regular expressions.
 720         text = string.replace(text,'\\{','{\\')
 721         text = string.replace(text,'\\}','}\\')
 722         # Expand simple attributes ({name}).
 723         # Nested attributes not allowed.
 724         reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)\}(?!\\)')
 725         pos = 0
 726         while True:
 727             mo = reo.search(text,pos)
 728             if not mo: break
 729             s =  attrs.get(mo.group('name'))
 730             if s is None:
 731                 pos = mo.end()
 732             else:
 733                 s = str(s)
 734                 text = text[:mo.start()] + s + text[mo.end():]
 735                 pos = mo.start() + len(s)
 736         # Expand conditional attributes.
 737         reo = re.compile(r'(?su)\{(?P<name>[^\\\W][-\w]*?)' \
 738                          r'(?P<op>\=|\?|!|#|%|@|\$)'        \
 739                          r'(?P<value>.*?)\}(?!\\)')
 740         pos = 0
 741         while True:
 742             mo = reo.search(text,pos)
 743             if not mo: break
 744             attr = mo.group()
 745             name =  mo.group('name')
 746             lval =  attrs.get(name)
 747             op = mo.group('op')
 748             # mo.end() is not good enough because '{x={y}}' matches '{x={y}'.
 749             end = end_brace(text,mo.start())
 750             rval = text[mo.start('value'):end-1]
 751             if lval is None:
 752                 if op == '=': s = rval
 753                 elif op == '?': s = ''
 754                 elif op == '!': s = rval
 755                 elif op == '#': s = '{'+name+'}'    # So the line is dropped.
 756                 elif op == '%': s = rval
 757                 elif op in ('@','$'):
 758                     s = '{'+name+'}'                # So the line is dropped.
 759                 else:
 760                     assert False, 'illegal attribute: %s' % attr
 761             else:
 762                 if op == '=': s = lval
 763                 elif op == '?': s = rval
 764                 elif op == '!': s = ''
 765                 elif op == '#': s = rval
 766                 elif op == '%': s = '{zzzzz}'       # So the line is dropped.
 767                 elif op in ('@','$'):
 768                     v = re.split(r'(?<!\\):',rval)
 769                     if len(v) not in (2,3):
 770                         error('illegal attribute syntax: %s' % attr)
 771                         s = ''
 772                     elif not is_regexp('^'+v[0]+'$'):
 773                         error('illegal attribute regexp: %s' % attr)
 774                         s = ''
 775                     else:
 776                         v = [s.replace('\\:',':') for s in v]
 777                         re_mo = re.match('^'+v[0]+'$',lval)
 778                         if op == '@':
 779                             if re_mo:
 780                                 s = v[1]            # {<name>@<re>:<v1>[:<v2>]}
 781                             else:
 782                                 if len(v) == 3:     # {<name>@<re>:<v1>:<v2>}
 783                                     s = v[2]
 784                                 else:               # {<name>@<re>:<v1>}
 785                                     s = ''
 786                         else:
 787                             if re_mo:
 788                                 if len(v) == 2:     # {<name>$<re>:<v1>}
 789                                     s = v[1]
 790                                 elif v[1] == '':    # {<name>$<re>::<v2>}
 791                                     s = '{zzzzz}'   # So the line is dropped.
 792                                 else:               # {<name>$<re>:<v1>:<v2>}
 793                                     s = v[1]
 794                             else:
 795                                 if len(v) == 2:     # {<name>$<re>:<v1>}
 796                                     s = '{zzzzz}'   # So the line is dropped.
 797                                 else:               # {<name>$<re>:<v1>:<v2>}
 798                                     s = v[2]
 799                 else:
 800                     assert False, 'illegal attribute: %s' % attr
 801             s = str(s)
 802             text = text[:mo.start()] + s + text[end:]
 803             pos = mo.start() + len(s)
 804         # Drop line if it contains  unsubstituted {name} references.
 805         skipped = re.search(r'(?su)\{[^\\\W][-\w]*?\}(?!\\)', text)
 806         if skipped:
 807             del lines[i]
 808             continue;
 809         # Expand system attributes.
 810         reo = re.compile(r'(?su)\{(?P<action>[^\\\W][-\w]*?):(?P<expr>.*?)\}(?!\\)')
 811         skipped = False
 812         pos = 0
 813         while True:
 814             mo = reo.search(text,pos)
 815             if not mo: break
 816             expr = mo.group('expr')
 817             expr = expr.replace('{\\','{')
 818             expr = expr.replace('}\\','}')
 819             s = system(mo.group('action'),expr)
 820             if s is None:
 821                 skipped = True
 822                 break
 823             text = text[:mo.start()] + s + text[mo.end():]
 824             pos = mo.start() + len(s)
 825         # Drop line if the action returns None.
 826         if skipped:
 827             del lines[i]
 828             continue;
 829         # Remove backslash from escaped entries.
 830         text = text.replace('{\\','{')
 831         text = text.replace('}\\','}')
 832         lines[i] = text
 833     if string_result:
 834         if lines:
 835             return string.join(lines,'\n')
 836         else:
 837             return None
 838     else:
 839         return tuple(lines)
 840
 841 def char_encoding():
 842     encoding = document.attributes.get('encoding')
 843     if encoding:
 844         try:
 845             codecs.lookup(encoding)
 846         except LookupError,e:
 847             raise EAsciiDoc,str(e)
 848     return encoding
 849
 850 def char_len(s):
 851     return len(char_decode(s))
 852
 853 def char_decode(s):
 854     if char_encoding():
 855         try:
 856             return s.decode(char_encoding())
 857         except Exception:
 858             raise EAsciiDoc, \
 859                 "'%s' codec can't decode \"%s\"" % (char_encoding(), s)
 860     else:
 861         return s
 862
 863 def char_encode(s):
 864     if char_encoding():
 865         return s.encode(char_encoding())
 866     else:
 867         return s
 868
 869 class Lex:
 870     '''Lexical analysis routines. Static methods and attributes only.'''
 871     prev_element = None
 872     prev_cursor = None
 873     def __init__(self):
 874         raise AssertionError,'no class instances allowed'
 875     def next():
 876         '''Returns class of next element on the input (None if EOF).  The
 877         reader is assumed to be at the first line following a previous element,
 878         end of file or line one.  Exits with the reader pointing to the first
 879         line of the next element or EOF (leading blank lines are skipped).'''
 880         reader.skip_blank_lines()
 881         if reader.eof(): return None
 882         # Optimization: If we've already checked for an element at this
 883         # position return the element.
 884         if Lex.prev_element and Lex.prev_cursor == reader.cursor:
 885             return Lex.prev_element
 886         result = None
 887         # Check for Title.
 888         if not result and Title.isnext():
 889             result = Title
 890         # Check for Block Macro.
 891         if not result and macros.isnext():
 892             result = macros.current
 893         # Check for List.
 894         if not result and lists.isnext():
 895             result = lists.current
 896         # Check for DelimitedBlock.
 897         if not result and blocks.isnext():
 898             # Skip comment blocks.
 899             if 'skip' in blocks.current.options:
 900                 blocks.current.translate()
 901                 return Lex.next()
 902             else:
 903                 result = blocks.current
 904         # Check for Table.
 905         if not result and tables.isnext():
 906             result = tables.current
 907         # Check for AttributeEntry.
 908         if not result and AttributeEntry.isnext():
 909             result = AttributeEntry
 910         # Check for AttributeList.
 911         if not result and AttributeList.isnext():
 912             result = AttributeList
 913         # Check for BlockTitle.
 914         if not result and BlockTitle.isnext():
 915             result = BlockTitle
 916         # If it's none of the above then it must be an Paragraph.
 917         if not result:
 918             if not paragraphs.isnext():
 919                 raise EAsciiDoc,'paragraph expected'
 920             result = paragraphs.current
 921         # Cache answer.
 922         Lex.prev_cursor = reader.cursor
 923         Lex.prev_element = result
 924         return result
 925     next = staticmethod(next)
 926
 927     # Extract the passthrough text and replace with temporary placeholders.
 928     def extract_passthroughs(text, passthroughs):
 929         # +++ passthrough.
 930         lq1 = r'(?P<lq>\+{3})'
 931         rq1 = r'\+{3}'
 932         reo1 = re.compile(r'(?msu)(^|[^\w+])(' + lq1 + r')' \
 933             + r'(?P<content>.+?)(' + rq1 + r')(?=[^\w+]|$)')
 934         # $$ passthrough.
 935         lq2 = r'(\[(?P<attrs>[^[]+?)\])?(?P<lq>\${2})'
 936         rq2 = r'\${2}'
 937         reo2 = re.compile(r'(?msu)(^|[^\w$\]])(' + lq2 + r')' \
 938             + r'(?P<content>.+?)(' + rq2 + r')(?=[^\w$]|$)')
 939         reo = reo1
 940         pos = 0
 941         while True:
 942             mo = reo.search(text,pos)
 943             if not mo:
 944                 if reo == reo1:
 945                     reo = reo2
 946                     pos = 0
 947                     continue
 948                 else:
 949                     break
 950             if text[mo.start()] == '\\':
 951                 pos = mo.end()
 952             else:
 953                 content = mo.group('content')
 954                 if mo.group('lq') == '$$':
 955                     content = config.subs_specialchars(content)
 956                     attrs = {}
 957                     parse_attributes(mo.group('attrs'), attrs)
 958                     stag,etag = config.tag('$$passthrough', attrs)
 959                     if not stag:
 960                         etag = ''   # Drop end tag if start tag has been.
 961                     content = stag + content + etag
 962                 passthroughs.append(content)
 963                 # Tabs are expanded when the source is read so using them here
 964                 # guarantees the placeholders are unambiguous.
 965                 s = mo.group(1) + '\t' + str(len(passthroughs)-1) + '\t'
 966                 text = text[:mo.start()] + s + text[mo.end():]
 967                 pos = mo.start() + len(s)
 968         # Unescape escaped passthroughs.
 969         text = string.replace(text,'\\+++', '+++')
 970         text = string.replace(text,'\\$$', '$$')
 971         return text
 972     extract_passthroughs = staticmethod(extract_passthroughs)
 973
 974     # Replace passthough placeholders with the original passthrough text.
 975     def restore_passthroughs(text, passthroughs):
 976         for i,v in enumerate(passthroughs):
 977             text = string.replace(text, '\t'+str(i)+'\t', passthroughs[i], 1)
 978         return text
 979     restore_passthroughs = staticmethod(restore_passthroughs)
 980
 981     def subs_1(s,options):
 982         '''Perform substitution specified in 'options' (in 'options' order) on
 983         a single line 's' of text.  Returns the substituted string.'''
 984         if not s:
 985             return s
 986         result = s
 987         for o in options:
 988             if o == 'specialcharacters':
 989                 result = config.subs_specialchars(result)
 990             # Quoted text.
 991             elif o == 'quotes':
 992                 result = subs_quotes(result)
 993             # Special words.
 994             elif o == 'specialwords':
 995                 result = config.subs_specialwords(result)
 996             # Replacements.
 997             elif o == 'replacements':
 998                 result = config.subs_replacements(result)
 999             # Inline macros.
1000             elif o == 'macros':
1001                 result = macros.subs(result)
1002             elif o == 'callouts':
1003                 result = macros.subs(result,callouts=True)
1004             elif o == 'passthroughs':
1005                 pass
1006             else:
1007                 raise EAsciiDoc,'illegal substitution option: %s' % o
1008         return result
1009     subs_1 = staticmethod(subs_1)
1010
1011     def subs(lines,options):
1012         '''Perform inline processing specified by 'options' (in 'options'
1013         order) on sequence of 'lines'.'''
1014         if len(options) == 1:
1015             if options[0] == 'none':
1016                 options = ()
1017             elif options[0] == 'normal':
1018                 options = SUBS_NORMAL
1019             elif options[0] == 'verbatim':
1020                 options = SUBS_VERBATIM
1021         if not lines or not options:
1022             return lines
1023         # Join lines so quoting can span multiple lines.
1024         para = string.join(lines,"\n")
1025         if 'passthroughs' in options:
1026             passthroughs = []
1027             para = Lex.extract_passthroughs(para,passthroughs)
1028         for o in options:
1029             if o == 'attributes':
1030                 # If we don't substitute attributes line-by-line then a single
1031                 # undefined attribute will drop the entire paragraph.
1032                 lines = subs_attrs(para.split("\n"))
1033                 para = string.join(lines,"\n")
1034             else:
1035                 para = Lex.subs_1(para,(o,))
1036         if 'passthroughs' in options:
1037             para = Lex.restore_passthroughs(para,passthroughs)
1038         return para.split("\n")
1039     subs = staticmethod(subs)
1040
1041     def set_margin(lines, margin=0):
1042         '''Utility routine that sets the left margin to 'margin' space in a
1043         block of non-blank lines.'''
1044         # Calculate width of block margin.
1045         lines = list(lines)
1046         width = len(lines[0])
1047         for s in lines:
1048             i = re.search(r'\S',s).start()
1049             if i < width: width = i
1050         # Strip margin width from all lines.
1051         for i in range(len(lines)):
1052             lines[i] = ' '*margin + lines[i][width:]
1053         return lines
1054     set_margin = staticmethod(set_margin)
1055
1056 #---------------------------------------------------------------------------
1057 # Document element classes parse AsciiDoc reader input and write DocBook writer
1058 # output.
1059 #---------------------------------------------------------------------------
1060 class Document:
1061     def __init__(self):
1062         self.doctype = None     # 'article','manpage' or 'book'.
1063         self.backend = None     # -b option argument.
1064         self.infile = None      # Source file name.
1065         self.outfile = None     # Output file name.
1066         self.attributes = {}
1067         self.level = 0          # 0 => front matter. 1,2,3 => sect1,2,3.
1068         self.has_errors = False # Set true if processing errors were flagged.
1069         self.safe = True        # Default safe mode.
1070     def init_attrs(self):
1071         # Set implicit attributes.
1072         d = time.localtime(time.time())
1073         self.attributes['localdate'] = time.strftime('%d-%b-%Y',d)
1074         s = time.strftime('%H:%M:%S',d)
1075         if time.daylight:
1076             self.attributes['localtime'] = s + ' ' + time.tzname[1]
1077         else:
1078             self.attributes['localtime'] = s + ' ' + time.tzname[0]
1079         self.attributes['asciidoc-version'] = VERSION
1080         self.attributes['backend'] = document.backend
1081         self.attributes['doctype'] = document.doctype
1082         self.attributes['backend-'+document.backend] = ''
1083         self.attributes['doctype-'+document.doctype] = ''
1084         self.attributes[document.backend+'-'+document.doctype] = ''
1085         self.attributes['asciidoc-dir'] = APP_DIR
1086         self.attributes['user-dir'] = USER_DIR
1087         if self.infile != '<stdin>':
1088             self.attributes['infile'] = self.infile
1089             self.attributes['docdir'] = os.path.dirname(self.infile)
1090             self.attributes['docname'] = os.path.splitext(
1091                     os.path.basename(self.infile))[0]
1092         # Update with configuration file attributes.
1093         self.attributes.update(config.conf_attrs)
1094         # Update with command-line attributes.
1095         self.attributes.update(config.cmd_attrs)
1096         # Filter miscellaneous configuration section entries from attributes.
1097         config.load_miscellaneous(config.conf_attrs)
1098         config.load_miscellaneous(config.cmd_attrs)
1099         self.attributes['newline'] = config.newline # Use raw (unescaped) value.
1100         if self.outfile:
1101             if self.outfile != '<stdout>':
1102                 self.attributes['outfile'] = self.outfile
1103                 ext = os.path.splitext(self.outfile)[1][1:]
1104             elif config.outfilesuffix:
1105                 ext = config.outfilesuffix[1:]
1106             else:
1107                 ext = ''
1108             if ext:
1109                 self.attributes['filetype'] = ext
1110                 self.attributes['filetype-'+ext] = ''
1111     def translate(self):
1112         assert self.doctype in ('article','manpage','book'), \
1113             'illegal document type'
1114         assert self.level == 0
1115         config.expand_all_templates()
1116         # Process document header.
1117         AttributeEntry.translate_all()
1118         has_header =  Lex.next() is Title and Title.level == 0
1119         if self.doctype == 'manpage' and not has_header:
1120             error('manpage document title is mandatory')
1121         if has_header:
1122             Header.translate()
1123             # Command-line entries override header derived entries.
1124             self.attributes.update(config.cmd_attrs)
1125             if config.header_footer:
1126                 hdr = config.subs_section('header',{})
1127                 writer.write(hdr)
1128             if self.doctype in ('article','book'):
1129                 # Translate 'preamble' (untitled elements between header
1130                 # and first section title).
1131                 if Lex.next() is not Title:
1132                     stag,etag = config.section2tags('preamble')
1133                     writer.write(stag)
1134                     Section.translate_body()
1135                     writer.write(etag)
1136             else:
1137                 # Translate manpage SYNOPSIS.
1138                 if Lex.next() is not Title:
1139                     error('SYNOPSIS section expected')
1140                 else:
1141                     Title.translate()
1142                     if string.upper(Title.dict['title']) <> 'SYNOPSIS':
1143                         error('second section must be named SYNOPSIS')
1144                     if Title.level != 1:
1145                         error('SYNOPSIS section title must be at level 1')
1146                     d = {}
1147                     d.update(Title.dict)
1148                     AttributeList.consume(d)
1149                     stag,etag = config.section2tags('sect-synopsis',d)
1150                     writer.write(stag)
1151                     Section.translate_body()
1152                     writer.write(etag)
1153         else:
1154             if config.header_footer:
1155                 hdr = config.subs_section('header',{})
1156                 writer.write(hdr)
1157             if Lex.next() is not Title:
1158                 Section.translate_body()
1159         # Process remaining sections.
1160         while not reader.eof():
1161             if Lex.next() is not Title:
1162                 raise EAsciiDoc,'section title expected'
1163             Section.translate()
1164         Section.setlevel(0) # Write remaining unwritten section close tags.
1165         # Substitute document parameters and write document footer.
1166         if config.header_footer:
1167             ftr = config.subs_section('footer',{})
1168             writer.write(ftr)
1169     def parse_author(self,s):
1170         attrs = self.attributes # Alias for readability.
1171         s = subs_attrs(s)
1172         if not s:   # An undefined attribute has dropped the author line.
1173             return
1174         s = string.strip(s)
1175         mo = re.match(r'^(?P<name1>[^<>\s]+)'
1176                 '(\s+(?P<name2>[^<>\s]+))?'
1177                 '(\s+(?P<name3>[^<>\s]+))?'
1178                 '(\s+<(?P<email>\S+)>)?$',s)
1179         if not mo:
1180             error('malformed author line')
1181             return
1182         firstname = mo.group('name1')
1183         if mo.group('name3'):
1184             middlename = mo.group('name2')
1185             lastname = mo.group('name3')
1186         else:
1187             middlename = None
1188             lastname = mo.group('name2')
1189         email = mo.group('email')
1190         author = firstname
1191         initials = firstname[0]
1192         if middlename:
1193             author += ' '+middlename
1194             initials += middlename[0]
1195         if lastname:
1196             author += ' '+lastname
1197             initials += lastname[0]
1198         initials = string.upper(initials)
1199         if firstname:
1200             attrs['firstname'] = config.subs_specialchars(firstname)
1201         if middlename:
1202             attrs['middlename'] = config.subs_specialchars(middlename)
1203         if lastname:
1204             attrs['lastname'] = config.subs_specialchars(lastname)
1205         if author:
1206             attrs['author'] = config.subs_specialchars(author)
1207         if initials:
1208             attrs['authorinitials'] = config.subs_specialchars(initials)
1209         if email:
1210             attrs['email'] = email
1211
1212 class Header:
1213     '''Static methods and attributes only.'''
1214     def __init__(self):
1215         raise AssertionError,'no class instances allowed'
1216     def translate():
1217         assert Lex.next() is Title and Title.level == 0
1218         Title.translate()
1219         attrs = document.attributes # Alias for readability.
1220         attrs['doctitle'] = Title.dict['title']
1221         if document.doctype == 'manpage':
1222             # manpage title formatted like mantitle(manvolnum).
1223             mo = re.match(r'^(?P<mantitle>.*)\((?P<manvolnum>.*)\)$',
1224                           attrs['doctitle'])
1225             if not mo:
1226                 error('malformed manpage title')
1227             else:
1228                 attrs['mantitle'] = mo.group('mantitle').lower().strip()
1229                 attrs['manvolnum'] = mo.group('manvolnum').strip()
1230         AttributeEntry.translate_all()
1231         s = reader.read_next()
1232         if s:
1233             s = reader.read()
1234             document.parse_author(s)
1235             AttributeEntry.translate_all()
1236             if reader.read_next():
1237                 # Parse revision line.
1238                 s = reader.read()
1239                 s = subs_attrs(s)
1240                 if s:
1241                     # Match RCS/CVS $Id$ marker format.
1242                     mo = re.match(r'^\$Id: \S+ (?P<revision>\S+)'
1243                         ' (?P<date>\S+) \S+ \S+ \S+ \$$',s)
1244                     if not mo:
1245                         # Match AsciiDoc revision,date format.
1246                         mo = re.match(r'^\D*(?P<revision>.*?),(?P<date>.+)$',s)
1247                     if mo:
1248                         revision = mo.group('revision').strip()
1249                         date = mo.group('date').strip()
1250                     else:
1251                         revision = None
1252                         date = s.strip()
1253                     if revision:
1254                         attrs['revision'] = config.subs_specialchars(revision)
1255                     if date:
1256                         attrs['date'] = config.subs_specialchars(date)
1257             AttributeEntry.translate_all()
1258         if document.backend == 'linuxdoc' and not attrs.has_key('author'):
1259             warning('linuxdoc requires author name')
1260         if document.doctype == 'manpage':
1261             # Translate mandatory NAME section.
1262             if Lex.next() is not Title:
1263                 error('NAME section expected')
1264             else:
1265                 Title.translate()
1266                 if string.upper(Title.dict['title']) <> 'NAME':
1267                     error('first section must be named NAME')
1268                 if Title.level != 1:
1269                     error('NAME section title must be at level 1')
1270                 if not isinstance(Lex.next(),Paragraph):
1271                     error('malformed NAME section body')
1272                 lines = reader.read_until(r'^$')
1273                 s = string.join(lines)
1274                 mo = re.match(r'^(?P<manname>.*?)\s+-\s+(?P<manpurpose>.*)$',s)
1275                 if not mo:
1276                     error('malformed NAME section body')
1277                 attrs['manname'] = string.strip(mo.group('manname'))
1278                 attrs['manpurpose'] = string.strip(mo.group('manpurpose'))
1279         if attrs.get('author',None) or attrs.get('email',None):
1280             attrs['authored'] = ''
1281     translate = staticmethod(translate)
1282
1283 class AttributeEntry:
1284     '''Static methods and attributes only.'''
1285     pattern = None
1286     name = None
1287     value = None
1288     def __init__(self):
1289         raise AssertionError,'no class instances allowed'
1290     def isnext():
1291         result = False  # Assume not next.
1292         if not AttributeEntry.pattern:
1293             pat = document.attributes.get('attributeentry-pattern')
1294             if not pat:
1295                 error("[attributes] missing 'attributeentry-pattern' entry")
1296             AttributeEntry.pattern = pat
1297         line = reader.read_next()
1298         if line:
1299             mo = re.match(AttributeEntry.pattern,line)
1300             if mo:
1301                 name = mo.group('attrname').strip()
1302                 if name[-1] == '!':     # Names like name! are None.
1303                     name = name[:-1]
1304                     value = None
1305                 else:
1306                     value = mo.group('attrvalue').strip()
1307                 # Strip white space and illegal name chars.
1308                 name = re.sub(r'(?u)[^\w\-_]', '', name).lower()
1309                 AttributeEntry.name = name
1310                 AttributeEntry.value = value
1311                 result = True
1312         return result
1313     isnext = staticmethod(isnext)
1314     def translate():
1315         assert Lex.next() is AttributeEntry
1316         attr = AttributeEntry   # Alias for brevity.
1317         reader.read()   # Discard attribute from reader.
1318         # Don't override command-line attributes.
1319         if config.cmd_attrs.has_key(attr.name):
1320             return
1321         # Update document.attributes from previously parsed attribute.
1322         if attr.value:
1323             attr.value = config.subs_specialchars(attr.value)
1324             attr.value = subs_attrs(attr.value)
1325         if attr.value is not None:
1326             document.attributes[attr.name] = attr.value
1327             # Some document Header attributes get special treatment.
1328             if attr.name == 'author':
1329                 document.parse_author(attr.value)
1330         elif document.attributes.has_key(attr.name):
1331             del document.attributes[attr.name]
1332     translate = staticmethod(translate)
1333     def translate_all():
1334         ''' Process all contiguous attribute lines on reader.'''
1335         while AttributeEntry.isnext():
1336             AttributeEntry.translate()
1337     translate_all = staticmethod(translate_all)
1338
1339 class AttributeList:
1340     '''Static methods and attributes only.'''
1341     pattern = None
1342     match = None
1343     attrs = {}
1344     def __init__(self):
1345         raise AssertionError,'no class instances allowed'
1346     def isnext():
1347         result = False  # Assume not next.
1348         if not AttributeList.pattern:
1349             if not document.attributes.has_key('attributelist-pattern'):
1350                 error("[miscellaneous] missing 'attributelist-pattern' entry")
1351             AttributeList.pattern = document.attributes['attributelist-pattern']
1352         line = reader.read_next()
1353         if line:
1354             mo = re.match(AttributeList.pattern, line)
1355             if mo:
1356                 AttributeList.match = mo
1357                 result = True
1358         return result
1359     isnext = staticmethod(isnext)
1360     def translate():
1361         assert Lex.next() is AttributeList
1362         reader.read()   # Discard attribute list from reader.
1363         d = AttributeList.match.groupdict()
1364         for k,v in d.items():
1365             if v is not None:
1366                 if k == 'attrlist':
1367                     v = subs_attrs(v)
1368                     if v:
1369                         parse_attributes(v, AttributeList.attrs)
1370                 else:
1371                     AttributeList.attrs[k] = v
1372     translate = staticmethod(translate)
1373     def consume(d):
1374         '''Add attribute list to the dictionary 'd' and reset the
1375         list.'''
1376         if AttributeList.attrs:
1377             d.update(AttributeList.attrs)
1378             AttributeList.attrs = {}
1379     consume = staticmethod(consume)
1380
1381 class BlockTitle:
1382     '''Static methods and attributes only.'''
1383     title = None
1384     pattern = None
1385     def __init__(self):
1386         raise AssertionError,'no class instances allowed'
1387     def isnext():
1388         result = False  # Assume not next.
1389         line = reader.read_next()
1390         if line:
1391             mo = re.match(BlockTitle.pattern,line)
1392             if mo:
1393                 BlockTitle.title = mo.group('title')
1394                 result = True
1395         return result
1396     isnext = staticmethod(isnext)
1397     def translate():
1398         assert Lex.next() is BlockTitle
1399         reader.read()   # Discard title from reader.
1400         # Perform title substitutions.
1401         s = Lex.subs((BlockTitle.title,), Title.subs)
1402         s = string.join(s,writer.newline)
1403         if not s:
1404             warning('blank block title')
1405         BlockTitle.title = s
1406     translate = staticmethod(translate)
1407     def consume(d):
1408         '''If there is a title add it to dictionary 'd' then reset title.'''
1409         if BlockTitle.title:
1410             d['title'] = BlockTitle.title
1411             BlockTitle.title = None
1412     consume = staticmethod(consume)
1413
1414 class Title:
1415     '''Processes Header and Section titles. Static methods and attributes
1416     only.'''
1417     # Class variables
1418     underlines = ('==','--','~~','^^','++') # Levels 0,1,2,3,4.
1419     subs = ('specialcharacters','quotes','replacements','attributes','macros')
1420     pattern = None
1421     level = 0
1422     dict = {}
1423     sectname = None
1424     section_numbers = [0]*len(underlines)
1425     dump_dict = {}
1426     linecount = None    # Number of lines in title (1 or 2).
1427     def __init__(self):
1428         raise AssertionError,'no class instances allowed'
1429     def translate():
1430         '''Parse the Title.dict and Title.level from the reader. The
1431         real work has already been done by parse().'''
1432         assert Lex.next() is Title
1433         # Discard title from reader.
1434         for i in range(Title.linecount):
1435             reader.read()
1436         Title.setsectname()
1437         # Perform title substitutions.
1438         s = Lex.subs((Title.dict['title'],), Title.subs)
1439         s = string.join(s,writer.newline)
1440         if not s:
1441             warning('blank section title')
1442         Title.dict['title'] = s
1443     translate = staticmethod(translate)
1444     def isnext():
1445         lines = reader.read_ahead(2)
1446         return Title.parse(lines)
1447     isnext = staticmethod(isnext)
1448     def parse(lines):
1449         '''Parse title at start of lines tuple.'''
1450         if len(lines) == 0: return False
1451         if len(lines[0]) == 0: return False # Title can't be blank.
1452         # Check for single-line titles.
1453         result = False
1454         for level in range(len(Title.underlines)):
1455             k = 'sect%s' % level
1456             if Title.dump_dict.has_key(k):
1457                 mo = re.match(Title.dump_dict[k], lines[0])
1458                 if mo:
1459                     Title.dict = mo.groupdict()
1460                     Title.level = level
1461                     Title.linecount = 1
1462                     result = True
1463                     break
1464         if not result:
1465             # Check for double-line titles.
1466             if not Title.pattern: return False  # Single-line titles only.
1467             if len(lines) < 2: return False
1468             title,ul = lines[:2]
1469             title_len = char_len(title)
1470             ul_len = char_len(ul)
1471             if ul_len < 2: return False
1472             # Fast elimination check.
1473             if ul[:2] not in Title.underlines: return False
1474             # Length of underline must be within +-3 of title.
1475             if not (ul_len-3 < title_len < ul_len+3): return False
1476             # Check for valid repetition of underline character pairs.
1477             s = ul[:2]*((ul_len+1)/2)
1478             if ul != s[:ul_len]: return False
1479             # Don't be fooled by back-to-back delimited blocks, require at
1480             # least one alphanumeric character in title.
1481             if not re.search(r'(?u)\w',title): return False
1482             mo = re.match(Title.pattern, title)
1483             if mo:
1484                 Title.dict = mo.groupdict()
1485                 Title.level = list(Title.underlines).index(ul[:2])
1486                 Title.linecount = 2
1487                 result = True
1488         # Check for expected pattern match groups.
1489         if result:
1490             if not Title.dict.has_key('title'):
1491                 warning('[titles] entry has no <title> group')
1492                 Title.dict['title'] = lines[0]
1493             for k,v in Title.dict.items():
1494                 if v is None: del Title.dict[k]
1495         return result
1496     parse = staticmethod(parse)
1497     def load(dict):
1498         '''Load and validate [titles] section entries from dict.'''
1499         if dict.has_key('underlines'):
1500             errmsg = 'malformed [titles] underlines entry'
1501             try:
1502                 underlines = parse_list(dict['underlines'])
1503             except:
1504                 raise EAsciiDoc,errmsg
1505             if len(underlines) != len(Title.underlines):
1506                 raise EAsciiDoc,errmsg
1507             for s in underlines:
1508                 if len(s) !=2:
1509                     raise EAsciiDoc,errmsg
1510             Title.underlines = tuple(underlines)
1511             Title.dump_dict['underlines'] = dict['underlines']
1512         if dict.has_key('subs'):
1513             Title.subs = parse_options(dict['subs'], SUBS_OPTIONS,
1514                 'illegal [titles] subs entry')
1515             Title.dump_dict['subs'] = dict['subs']
1516         if dict.has_key('sectiontitle'):
1517             pat = dict['sectiontitle']
1518             if not pat or not is_regexp(pat):
1519                 raise EAsciiDoc,'malformed [titles] sectiontitle entry'
1520             Title.pattern = pat
1521             Title.dump_dict['sectiontitle'] = pat
1522         if dict.has_key('blocktitle'):
1523             pat = dict['blocktitle']
1524             if not pat or not is_regexp(pat):
1525                 raise EAsciiDoc,'malformed [titles] blocktitle entry'
1526             BlockTitle.pattern = pat
1527             Title.dump_dict['blocktitle'] = pat
1528         # Load single-line title patterns.
1529         for k in ('sect0','sect1','sect2','sect3','sect4'):
1530             if dict.has_key(k):
1531                 pat = dict[k]
1532                 if not pat or not is_regexp(pat):
1533                     raise EAsciiDoc,'malformed [titles] %s entry' % k
1534                 Title.dump_dict[k] = pat
1535         # TODO: Check we have either a Title.pattern or at least one
1536         # single-line title pattern -- can this be done here or do we need
1537         # check routine like the other block checkers?
1538     load = staticmethod(load)
1539     def dump():
1540         dump_section('titles',Title.dump_dict)
1541     dump = staticmethod(dump)
1542     def setsectname():
1543         '''Set Title section name. First search for section title in
1544         [specialsections], if not found use default 'sect<level>' name.'''
1545         for pat,sect in config.specialsections.items():
1546             mo = re.match(pat,Title.dict['title'])
1547             if mo:
1548                 title = mo.groupdict().get('title')
1549                 if title is not None:
1550                     Title.dict['title'] = string.strip(title)
1551                 else:
1552                     Title.dict['title'] = string.strip(mo.group())
1553                 Title.sectname = sect
1554                 break
1555         else:
1556             Title.sectname = 'sect%d' % Title.level
1557     setsectname = staticmethod(setsectname)
1558     def getnumber(level):
1559         '''Return next section number at section 'level' formatted like
1560         1.2.3.4.'''
1561         number = ''
1562         for l in range(len(Title.section_numbers)):
1563             n = Title.section_numbers[l]
1564             if l == 0:
1565                 continue
1566             elif l < level:
1567                 number = '%s%d.' % (number, n)
1568             elif l == level:
1569                 number = '%s%d.' % (number, n + 1)
1570                 Title.section_numbers[l] = n + 1
1571             elif l > level:
1572                 # Reset unprocessed section levels.
1573                 Title.section_numbers[l] = 0
1574         return number
1575     getnumber = staticmethod(getnumber)
1576
1577
1578 class Section:
1579     '''Static methods and attributes only.'''
1580     endtags = [] # Stack of currently open section (level,endtag) tuples.
1581     def __init__(self):
1582         raise AssertionError,'no class instances allowed'
1583     def savetag(level,etag):
1584         '''Save section end.'''
1585         Section.endtags.append((level,etag))
1586     savetag = staticmethod(savetag)
1587     def setlevel(level):
1588         '''Set document level and write open section close tags up to level.'''
1589         while Section.endtags and Section.endtags[-1][0] >= level:
1590             writer.write(Section.endtags.pop()[1])
1591         document.level = level
1592     setlevel = staticmethod(setlevel)
1593     def translate():
1594         assert Lex.next() is Title
1595         prev_sectname = Title.sectname
1596         Title.translate()
1597         if Title.level == 0 and document.doctype != 'book':
1598             error('only book doctypes can contain level 0 sections')
1599         if Title.level > document.level \
1600                 and document.backend == 'docbook' \
1601                 and prev_sectname in ('sect-colophon','sect-abstract', \
1602                     'sect-dedication','sect-glossary','sect-bibliography'):
1603             error('%s section cannot contain sub-sections' % prev_sectname)
1604         if Title.level > document.level+1:
1605             # Sub-sections of multi-part book level zero Preface and Appendices
1606             # are meant to be out of sequence.
1607             if document.doctype == 'book' \
1608                     and document.level == 0 \
1609                     and Title.level == 2 \
1610                     and prev_sectname in ('sect-preface','sect-appendix'):
1611                 pass
1612             else:
1613                 warning('section title out of sequence: '
1614                     'expected level %d, got level %d'
1615                     % (document.level+1, Title.level))
1616         Section.setlevel(Title.level)
1617         Title.dict['sectnum'] = Title.getnumber(document.level)
1618         AttributeList.consume(Title.dict)
1619         stag,etag = config.section2tags(Title.sectname,Title.dict)
1620         Section.savetag(Title.level,etag)
1621         writer.write(stag)
1622         Section.translate_body()
1623     translate = staticmethod(translate)
1624     def translate_body(terminator=Title):
1625         isempty = True
1626         next = Lex.next()
1627         while next and next is not terminator:
1628             if next is Title and isinstance(terminator,DelimitedBlock):
1629                 error('title not permitted in sidebar body')
1630             if document.backend == 'linuxdoc'   \
1631                 and document.level == 0         \
1632                 and not isinstance(next,Paragraph):
1633                 warning('only paragraphs are permitted in linuxdoc synopsis')
1634             next.translate()
1635             next = Lex.next()
1636             isempty = False
1637         # The section is not empty if contains a subsection.
1638         if next and isempty and Title.level > document.level:
1639             isempty = False
1640         # Report empty sections if invalid markup will result.
1641         if isempty:
1642             if document.backend == 'docbook' and Title.sectname != 'sect-index':
1643                 error('empty section is not valid')
1644     translate_body = staticmethod(translate_body)
1645
1646 class AbstractBlock:
1647     def __init__(self):
1648         self.OPTIONS = ()   # The set of allowed options values
1649         # Configuration parameter names common to all blocks.
1650         self.CONF_ENTRIES = ('options','subs','presubs','postsubs',
1651                              'posattrs','style','.*-style')
1652         # Configuration parameters.
1653         self.name=None      # Configuration file section name.
1654         self.delimiter=None # Regular expression matching block delimiter.
1655         self.template=None  # template section entry.
1656         self.options=()     # options entry list.
1657         self.presubs=SUBS_NORMAL # presubs/subs entry list.
1658         self.postsubs=()    # postsubs entry list.
1659         self.filter=None    # filter entry.
1660         self.posattrs=()    # posattrs entry list.
1661         self.style=None     # Default style.
1662         self.styles=OrderedDict()   # Styles dictionary.
1663         # Before a block is processed it's attributes (from it's
1664         # attributes list) are merged with the block configuration parameters
1665         # (by self.process_attributes()) resulting in the template substitution
1666         # dictionary (self.attributes) and the block's procssing parameters
1667         # (self.parameters).
1668         self.attributes={}
1669         # The names of block parameters.
1670         self.PARAM_NAMES=('template','options','presubs','postsubs','filter')
1671         self.parameters={}
1672         # Leading delimiter match object.
1673         self.mo=None
1674     def is_conf_entry(self,param):
1675         '''Return True if param matches an allowed configuration file entry
1676         name.'''
1677         for s in self.CONF_ENTRIES:
1678             if re.match('^'+s+'$',param):
1679                 return True
1680         return False
1681     def load(self,name,entries):
1682         '''Update block definition from section 'entries' dictionary.'''
1683         for k in entries.keys():
1684             if not self.is_conf_entry(k):
1685                 raise EAsciiDoc,'illegal [%s] entry name: %s' % (name,k)
1686         self.name = name
1687         for k,v in entries.items():
1688             if not is_name(k):
1689                 raise EAsciiDoc, \
1690                     'malformed [%s] entry name: %s' % (name,k)
1691             if k == 'delimiter':
1692                 if v and is_regexp(v):
1693                     self.delimiter = v
1694                 else:
1695                     raise EAsciiDoc,'malformed [%s] regexp: %s' % (name,v)
1696             elif k == 'template':
1697                 if not is_name(v):
1698                     raise EAsciiDoc, \
1699                         'malformed [%s] template name: %s' % (name,v)
1700                 self.template = v
1701             elif k == 'style':
1702                 if not is_name(v):
1703                     raise EAsciiDoc, \
1704                         'malformed [%s] style name: %s' % (name,v)
1705                 self.style = v
1706             elif k == 'posattrs':
1707                 self.posattrs = parse_options(v, (),
1708                     'illegal [%s] %s: %s' % (name,k,v))
1709             elif k == 'options':
1710                 self.options = parse_options(v,self.OPTIONS,
1711                     'illegal [%s] %s: %s' % (name,k,v))
1712             elif k == 'presubs' or k == 'subs':
1713                 self.presubs = parse_options(v,SUBS_OPTIONS,
1714                     'illegal [%s] %s: %s' % (name,k,v))
1715             elif k == 'postsubs':
1716                 self.postsubs = parse_options(v,SUBS_OPTIONS,
1717                     'illegal [%s] %s: %s' % (name,k,v))
1718             elif k == 'filter':
1719                 self.filter = v
1720             else:
1721                 mo = re.match(r'^(?P<style>.*)-style$',k)
1722                 if mo:
1723                     if not v:
1724                         raise EAsciiDoc, 'empty [%s] style: %s' % (name,k)
1725                     style = mo.group('style')
1726                     d = {}
1727                     if not parse_named_attributes(v,d):
1728                         raise EAsciiDoc,'malformed [%s] style: %s' % (name,v)
1729                     self.styles[style] = d
1730     def dump(self):
1731         '''Write block definition to stdout.'''
1732         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
1733         write('['+self.name+']')
1734         if self.is_conf_entry('delimiter'):
1735             write('delimiter='+self.delimiter)
1736         if self.template:
1737             write('template='+self.template)
1738         if self.options:
1739             write('options='+string.join(self.options,','))
1740         if self.presubs:
1741             if self.postsubs:
1742                 write('presubs='+string.join(self.presubs,','))
1743             else:
1744                 write('subs='+string.join(self.presubs,','))
1745         if self.postsubs:
1746             write('postsubs='+string.join(self.postsubs,','))
1747         if self.filter:
1748             write('filter='+self.filter)
1749         if self.posattrs:
1750             write('posattrs='+string.join(self.posattrs,','))
1751         if self.style:
1752             write('style='+self.style)
1753         if self.styles:
1754             for style,d in self.styles.items():
1755                 s = ''
1756                 for k,v in d.items():
1757                     if isinstance(v,tuple):
1758                         v = string.join(v,',')
1759                     s += k+'="'+v+'",'
1760                 write(style+'-style='+s[:-1])
1761     def validate(self):
1762         '''Validate block after the complete configuration has been loaded.'''
1763         if self.is_conf_entry('delimiter') and not self.delimiter:
1764             raise EAsciiDoc,'[%s] missing delimiter' % self.name
1765         if self.style:
1766             if not self.styles.has_key(self.style):
1767                 warning(' missing [%s] %s-style entry' % (self.name,self.style))
1768         # Check all styles for missing templates.
1769         all_styles_have_template = True
1770         for k,v in self.styles.items():
1771             t = v.get('template')
1772             if t and not config.sections.has_key(t):
1773                 warning('[%s] missing template section' % t)
1774             if not t:
1775                 all_styles_have_template = False
1776         # Check we have a valid template entry or alternatively that all the
1777         # styles have templates.
1778         if self.is_conf_entry('template') and not 'skip' in self.options:
1779             if self.template:
1780                 if not config.sections.has_key(self.template):
1781                     warning('[%s] missing template section' % self.template)
1782             elif not all_styles_have_template:
1783                 warning('[%s] styles missing templates' % self.name)
1784     def isnext(self):
1785         '''Check if this block is next in document reader.'''
1786         result = False
1787         reader.skip_blank_lines()
1788         if reader.read_next():
1789             mo = re.match(self.delimiter,reader.read_next())
1790             if mo:
1791                 self.mo = mo
1792                 result = True
1793         return result
1794     def translate(self):
1795         '''Translate block from document reader.'''
1796         raise AssertionError,'no class instances allowed'
1797     def update_params(self,src,dst):
1798         '''Copy block processing parameters from src to dst dictionaries.'''
1799         for k,v in src.items():
1800             if k == 'template':
1801                 dst[k] = v
1802             elif k == 'options':
1803                 dst[k] = parse_options(v,self.OPTIONS,
1804                     'illegal [%s] %s: %s' % (self.name,k,v))
1805             elif k in  ('subs','presubs','postsubs'):
1806                 subs = parse_options(v,SUBS_OPTIONS,
1807                     'illegal [%s] %s: %s' % (self.name,k,v))
1808                 if k == 'subs':
1809                     dst['presubs'] = subs
1810                 else:
1811                     dst[k] = subs
1812             elif k == 'filter':
1813                 dst[k] = v
1814     def merge_attributes(self,attrs):
1815         '''Merge block attributes 'attrs' dictionary with the block
1816         configuration parameters setting self.attributes (template substitution
1817         attributes) and self.parameters (block processing parameters).'''
1818         self.attributes = {}
1819         self.attributes.update(attrs)
1820         # Configure positional attributes.
1821         for i,v in enumerate(self.posattrs):
1822             if self.attributes.has_key(str(i+1)):
1823                 self.attributes[v] = self.attributes[str(i+1)]
1824         # Calculate dynamic block parameters.
1825         # Start with configuration file defaults.
1826         self.parameters['template'] = self.template
1827         self.parameters['options'] = self.options
1828         self.parameters['presubs'] = self.presubs
1829         self.parameters['postsubs'] = self.postsubs
1830         self.parameters['filter'] = self.filter
1831         # Load the selected style attributes.
1832         style = self.attributes.get('style',self.style)
1833         if style is not None:
1834             if not self.styles.has_key(style):
1835                 warning('missing [%s] %s-style entry' % (self.name,style))
1836             else:
1837                 self.attributes['style'] = style
1838                 for k,v in self.styles[style].items():
1839                     if k in self.PARAM_NAMES:
1840                         self.parameters[k] = v
1841                     elif not self.attributes.has_key(k):
1842                         # Style attributes don't take precedence over explicit.
1843                         self.attributes[k] = v
1844         # Override config and style attributes with document attributes.
1845         self.update_params(self.attributes,self.parameters)
1846         assert isinstance(self.parameters['options'],tuple)
1847         assert isinstance(self.parameters['presubs'],tuple)
1848         assert isinstance(self.parameters['postsubs'],tuple)
1849     def get_options(self):
1850         return self.parameters['options']
1851     def get_subs(self):
1852         return (self.parameters['presubs'], self.parameters['postsubs'])
1853     def get_template(self):
1854         return self.parameters['template']
1855     def get_filter(self):
1856         return self.parameters['filter']
1857
1858 class AbstractBlocks:
1859     '''List of block definitions.'''
1860     PREFIX = ''         # Conf file section name prefix set in derived classes.
1861     BLOCK_TYPE = None   # Block type set in derived classes.
1862     def __init__(self):
1863         self.current=None
1864         self.blocks = []        # List of Block objects.
1865         self.default = None     # Default Block.
1866         self.delimiter = None   # Combined tables delimiter regular expression.
1867     def load(self,sections):
1868         '''Load block definition from 'sections' dictionary.'''
1869         for k in sections.keys():
1870             if re.match(r'^'+ self.PREFIX + r'.+$',k):
1871                 d = {}
1872                 parse_entries(sections.get(k,()),d)
1873                 for b in self.blocks:
1874                     if b.name == k:
1875                         break
1876                 else:
1877                     b = self.BLOCK_TYPE()
1878                     self.blocks.append(b)
1879                 try:
1880                     b.load(k,d)
1881                 except EAsciiDoc,e:
1882                     raise EAsciiDoc,'[%s] %s' % (k,str(e))
1883     def dump(self):
1884         for b in self.blocks:
1885             b.dump()
1886     def isnext(self):
1887         for b in self.blocks:
1888             if b.isnext():
1889                 self.current = b
1890                 return True;
1891         return False
1892     def validate(self):
1893         '''Validate the block definitions.'''
1894         # Validate delimiters and build combined lists delimiter pattern.
1895         for b in self.blocks:
1896             assert b.__class__ is self.BLOCK_TYPE
1897             b.validate()
1898         if b.is_conf_entry('delimiter'):
1899             delimiters = []
1900             for b in self.blocks:
1901                 b.validate()
1902                 if b.delimiter:
1903                     delimiters.append(b.delimiter)
1904             self.delimiter = join_regexp(delimiters)
1905
1906 class Paragraph(AbstractBlock):
1907     def __init__(self):
1908         AbstractBlock.__init__(self)
1909         self.CONF_ENTRIES += ('delimiter','template','filter')
1910         self.OPTIONS = ('listelement',)
1911         self.text=None      # Text in first line of paragraph.
1912     def load(self,name,entries):
1913         AbstractBlock.load(self,name,entries)
1914     def dump(self):
1915         AbstractBlock.dump(self)
1916         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
1917         write('')
1918     def isnext(self):
1919         result = AbstractBlock.isnext(self)
1920         if result:
1921             self.text = self.mo.groupdict().get('text')
1922         return result
1923     def translate(self):
1924         attrs = {}
1925         attrs.update(self.mo.groupdict())
1926         BlockTitle.consume(attrs)
1927         AttributeList.consume(attrs)
1928         self.merge_attributes(attrs)
1929         reader.read()   # Discard (already parsed item first line).
1930         body = reader.read_until(r'^\+$|^$|'+blocks.delimiter+r'|'+tables.delimiter)
1931         body = [self.text] + list(body)
1932         presubs,postsubs = self.get_subs()
1933         # Don't join verbatim paragraphs.
1934         if 'verbatim' not in (presubs + postsubs):
1935             body = join_lines(body)
1936         body = Lex.set_margin(body) # Move body to left margin.
1937         body = Lex.subs(body,presubs)
1938         if self.get_filter():
1939             body = filter_lines(self.get_filter(),body,self.attributes)
1940         body = Lex.subs(body,postsubs)
1941         template = self.get_template()
1942         stag,etag = config.section2tags(template, self.attributes)
1943         # Write start tag, content, end tag.
1944         writer.write(dovetail_tags(stag,body,etag))
1945
1946 class Paragraphs(AbstractBlocks):
1947     '''List of paragraph definitions.'''
1948     BLOCK_TYPE = Paragraph
1949     PREFIX = 'paradef-'
1950     def __init__(self):
1951         AbstractBlocks.__init__(self)
1952     def load(self,sections):
1953         AbstractBlocks.load(self,sections)
1954     def validate(self):
1955         AbstractBlocks.validate(self)
1956         # Check we have a default paragraph definition, put it last in list.
1957         for b in self.blocks:
1958             if b.name == 'paradef-default':
1959                 self.blocks.append(b)
1960                 self.default = b
1961                 self.blocks.remove(b)
1962                 break
1963         else:
1964             raise EAsciiDoc,'missing [paradef-default] section'
1965
1966 class List(AbstractBlock):
1967     TAGS = ('listtag','itemtag','texttag','entrytag','labeltag')
1968     TYPES = ('bulleted','numbered','labeled','callout')
1969     def __init__(self):
1970         AbstractBlock.__init__(self)
1971         self.CONF_ENTRIES += ('delimiter','type') + self.TAGS
1972         self.listtag=None
1973         self.itemtag=None
1974         self.texttag=None   # Tag for list item text.
1975         self.labeltag=None  # Variable lists only.
1976         self.entrytag=None  # Variable lists only.
1977         self.label=None     # List item label (labeled lists).
1978         self.text=None      # Text in first line of list item.
1979         self.index=None     # Matched delimiter 'index' group (numbered lists).
1980         self.type=None      # List type.
1981     def load(self,name,entries):
1982         AbstractBlock.load(self,name,entries)
1983         for k,v in entries.items():
1984             if k == 'type':
1985                 if v in self.TYPES:
1986                     self.type = v
1987                 else:
1988                     raise EAsciiDoc,'illegal list type: %s' % v
1989             elif k in self.TAGS:
1990                 if is_name(v):
1991                     setattr(self,k,v)
1992                 else:
1993                     raise EAsciiDoc,'illegal list %s name: %s' % (k,v)
1994     def dump(self):
1995         AbstractBlock.dump(self)
1996         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
1997         write('type='+self.type)
1998         write('listtag='+self.listtag)
1999         write('itemtag='+self.itemtag)
2000         write('texttag='+self.texttag)
2001         if self.type == 'labeled':
2002             write('entrytag='+self.entrytag)
2003             write('labeltag='+self.labeltag)
2004         write('')
2005     def isnext(self):
2006         result = AbstractBlock.isnext(self)
2007         if result:
2008             self.label = self.mo.groupdict().get('label')
2009             self.text = self.mo.groupdict().get('text')
2010             self.index = self.mo.groupdict().get('index')
2011         return result
2012     def translate_entry(self):
2013         assert self.type == 'labeled'
2014         stag,etag = config.tag(self.entrytag, self.attributes)
2015         if stag:
2016             writer.write(stag)
2017         if self.text:
2018             # Horizontal label list.
2019             reader.read()   # Discard (already parsed item first line).
2020             writer.write_tag(self.labeltag, [self.label],
2021                              self.presubs, self.attributes)
2022         else:
2023             # Write multiple labels (vertical label list).
2024             while Lex.next() is self:
2025                 reader.read()   # Discard (already parsed item first line).
2026                 writer.write_tag(self.labeltag, [self.label],
2027                                  self.presubs, self.attributes)
2028         # Write item text.
2029         self.translate_item()
2030         if etag:
2031             writer.write(etag)
2032     def iscontinued(self):
2033         if reader.read_next() == '+':
2034             reader.read()   # Discard.
2035             # Allow attribute list to precede continued list item element.
2036             while Lex.next() is AttributeList:
2037                 Lex.next().translate()
2038             return True
2039         else:
2040             return False
2041     def translate_item(self,listindex=None):
2042         if lists.listblock:
2043             self.translate_item_2(listindex)
2044         else:
2045             self.translate_item_1(listindex)
2046     def translate_item_1(self,listindex=None):
2047         '''Translation for '+' style list continuation.'''
2048         if self.type == 'callout':
2049             self.attributes['coids'] = calloutmap.calloutids(listindex)
2050         stag,etag = config.tag(self.itemtag, self.attributes)
2051         if stag:
2052             writer.write(stag)
2053         if self.text and self.text == '+':
2054             # Pathalogical case: continued Horizontal Labeled List with no
2055             # item text.
2056             continued = True
2057         elif not self.text and self.iscontinued():
2058             # Pathalogical case: continued Vertical Labeled List with no
2059             # item text.
2060             continued = True
2061         else:
2062             # Write ItemText.
2063             text = reader.read_until(lists.delimiter + r'|^\+$|^$|' +
2064                                     blocks.delimiter + r'|' + tables.delimiter)
2065             if self.text is not None:
2066                 text = [self.text] + list(text)
2067             text = join_lines(text)
2068             writer.write_tag(self.texttag, text, self.presubs, self.attributes)
2069             continued = self.iscontinued()
2070         while True:
2071             next = Lex.next()
2072             if next in lists.open:
2073                 break
2074             elif isinstance(next,List):
2075                 next.translate()
2076             elif isinstance(next,Paragraph) and 'listelement' in next.options:
2077                 next.translate()
2078             elif continued:
2079                 if next is Title or next is BlockTitle:
2080                     error('title not allowed in list item continuation')
2081                 next.translate()
2082             else:
2083                 break
2084             continued = self.iscontinued()
2085         if etag:
2086             writer.write(etag)
2087     def translate_item_2(self,listindex=None):
2088         '''Translation for List block style lists.'''
2089         if self.type == 'callout':
2090             self.attributes['coids'] = calloutmap.calloutids(listindex)
2091         stag,etag = config.tag(self.itemtag, self.attributes)
2092         if stag:
2093             writer.write(stag)
2094         if self.text or reader.read_next():
2095             # Write ItemText.
2096             text = reader.read_until(lists.delimiter + r'|^$|' +
2097                                     blocks.delimiter + r'|' + tables.delimiter)
2098             if self.text is not None:
2099                 text = [self.text] + list(text)
2100             text = join_lines(text)
2101             writer.write_tag(self.texttag, text, self.presubs, self.attributes)
2102         while True:
2103             next = Lex.next()
2104             if next in lists.open:
2105                 break
2106             elif next is lists.listblock:
2107                 break
2108             elif isinstance(next,List):
2109                 next.translate()
2110             elif isinstance(next,Paragraph) and 'listelement' in next.options:
2111                 next.translate()
2112             elif lists.listblock:
2113                 if next is Title or next is BlockTitle:
2114                     error('title not allowed in list item continuation')
2115                 next.translate()
2116             else:
2117                 break
2118         if etag:
2119             writer.write(etag)
2120     def check_index(self,listindex):
2121         ''' Check calculated listindex (1,2,...) against the item index in the
2122         document (self.index).'''
2123         assert self.type in ('numbered','callout')
2124         if self.index:
2125             matched = False
2126             if re.match(r'\d+', self.index):
2127                 i = int(self.index)
2128                 matched = True
2129             elif re.match(r'[a-z]', self.index):
2130                 i = ord(self.index) - ord('a') + 1
2131                 matched = True
2132             if matched and i != listindex:
2133                 print 'type: ',self.type,': expected ',listindex,' got ',i
2134                 warning("list item %s out of sequence" % self.index)
2135     def translate(self):
2136         lists.open.append(self)
2137         attrs = {}
2138         attrs.update(self.mo.groupdict())
2139         BlockTitle.consume(attrs)
2140         AttributeList.consume(attrs)
2141         self.merge_attributes(attrs)
2142         stag,etag = config.tag(self.listtag, self.attributes)
2143         if stag:
2144             writer.write(stag)
2145         listindex = 0
2146         while Lex.next() is self:
2147             if self.type in ('numbered','callout'):
2148                 listindex += 1
2149                 self.check_index(listindex)
2150             if self.type in ('bulleted','numbered','callout'):
2151                 reader.read()   # Discard (already parsed item first line).
2152                 self.translate_item(listindex)
2153             elif self.type == 'labeled':
2154                 self.translate_entry()
2155             else:
2156                 raise AssertionError,'illegal [%s] list type"' % self.name
2157         if etag:
2158             writer.write(etag)
2159         if self.type == 'callout':
2160             calloutmap.validate(listindex)
2161             calloutmap.listclose()
2162         lists.open.pop()
2163
2164 class Lists(AbstractBlocks):
2165     '''List of List objects.'''
2166     BLOCK_TYPE = List
2167     PREFIX = 'listdef-'
2168     def __init__(self):
2169         AbstractBlocks.__init__(self)
2170         self.open = []              # A stack of the current and parent lists.
2171         self.listblock = None    # Current list is in list block.
2172     def load(self,sections):
2173         AbstractBlocks.load(self,sections)
2174     def validate(self):
2175         AbstractBlocks.validate(self)
2176         for b in self.blocks:
2177             # Check list has valid type.
2178             if not b.type in b.TYPES:
2179                 raise EAsciiDoc,'[%s] illegal type' % b.name
2180             # Check all list tags.
2181             if not b.listtag or not config.tags.has_key(b.listtag):
2182                 warning('[%s] missing listtag' % b.name)
2183             if not b.itemtag or not config.tags.has_key(b.itemtag):
2184                 warning('[%s] missing tag itemtag' % b.name)
2185             if not b.texttag or not config.tags.has_key(b.texttag):
2186                 warning('[%s] missing tag texttag' % b.name)
2187             if b.type == 'labeled':
2188                 if not b.entrytag or not config.tags.has_key(b.entrytag):
2189                     warning('[%s] missing entrytag' % b.name)
2190                 if not b.labeltag or not config.tags.has_key(b.labeltag):
2191                     warning('[%s] missing labeltag' % b.name)
2192
2193 class DelimitedBlock(AbstractBlock):
2194     def __init__(self):
2195         AbstractBlock.__init__(self)
2196         self.CONF_ENTRIES += ('delimiter','template','filter')
2197         self.OPTIONS = ('skip','sectionbody','list')
2198     def load(self,name,entries):
2199         AbstractBlock.load(self,name,entries)
2200     def dump(self):
2201         AbstractBlock.dump(self)
2202         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
2203         write('')
2204     def isnext(self):
2205         return AbstractBlock.isnext(self)
2206     def translate(self):
2207         if 'list' in self.options:
2208             lists.listblock = self
2209         reader.read()   # Discard delimiter.
2210         attrs = {}
2211         # Leave list block attributes for the list element.
2212         if lists.listblock is not self:
2213             BlockTitle.consume(attrs)
2214             AttributeList.consume(attrs)
2215         self.merge_attributes(attrs)
2216         options = self.get_options()
2217         if safe() and self.name == 'blockdef-backend':
2218             unsafe_error('Backend Block')
2219             # Discard block body.
2220             reader.read_until(self.delimiter,same_file=True)
2221         elif 'skip' in options:
2222             # Discard block body.
2223             reader.read_until(self.delimiter,same_file=True)
2224         else:
2225             template = self.get_template()
2226             stag,etag = config.section2tags(template,self.attributes)
2227             if 'sectionbody' in options or 'list' in options:
2228                 # The body is treated like a SimpleSection.
2229                 writer.write(stag)
2230                 Section.translate_body(self)
2231                 writer.write(etag)
2232             else:
2233                 body = reader.read_until(self.delimiter,same_file=True)
2234                 presubs,postsubs = self.get_subs()
2235                 body = Lex.subs(body,presubs)
2236                 if self.get_filter():
2237                     body = filter_lines(self.get_filter(),body,self.attributes)
2238                 body = Lex.subs(body,postsubs)
2239                 # Write start tag, content, end tag.
2240                 writer.write(dovetail_tags(stag,body,etag))
2241         if 'list' in options:
2242             lists.listblock = None
2243         if reader.eof():
2244             error('closing [%s] delimiter expected' % self.name)
2245         else:
2246             delimiter = reader.read()   # Discard delimiter line.
2247             assert re.match(self.delimiter,delimiter)
2248
2249 class DelimitedBlocks(AbstractBlocks):
2250     '''List of delimited blocks.'''
2251     BLOCK_TYPE = DelimitedBlock
2252     PREFIX = 'blockdef-'
2253     def __init__(self):
2254         AbstractBlocks.__init__(self)
2255     def load(self,sections):
2256         '''Update blocks defined in 'sections' dictionary.'''
2257         AbstractBlocks.load(self,sections)
2258     def validate(self):
2259         AbstractBlocks.validate(self)
2260
2261 class Column:
2262     '''Table column.'''
2263     def __init__(self):
2264         self.colalign = None    # 'left','right','center'
2265         self.rulerwidth = None
2266         self.colwidth = None    # Output width in page units.
2267
2268 class Table(AbstractBlock):
2269     COL_STOP = r"(`|'|\.)"  # RE.
2270     ALIGNMENTS = {'`':'left', "'":'right', '.':'center'}
2271     FORMATS = ('fixed','csv','dsv')
2272     def __init__(self):
2273         AbstractBlock.__init__(self)
2274         self.CONF_ENTRIES += ('template','fillchar','format','colspec',
2275                               'headrow','footrow','bodyrow','headdata',
2276                               'footdata', 'bodydata')
2277         # Configuration parameters.
2278         self.fillchar=None
2279         self.format=None    # 'fixed','csv','dsv'
2280         self.colspec=None
2281         self.headrow=None
2282         self.footrow=None
2283         self.bodyrow=None
2284         self.headdata=None
2285         self.footdata=None
2286         self.bodydata=None
2287         # Calculated parameters.
2288         self.underline=None     # RE matching current table underline.
2289         self.isnumeric=False    # True if numeric ruler.
2290         self.tablewidth=None    # Optional table width scale factor.
2291         self.columns=[]         # List of Columns.
2292         # Other.
2293         self.check_msg=''       # Message set by previous self.validate() call.
2294     def load(self,name,entries):
2295         AbstractBlock.load(self,name,entries)
2296         '''Update table definition from section entries in 'entries'.'''
2297         for k,v in entries.items():
2298             if k == 'fillchar':
2299                 if v and len(v) == 1:
2300                     self.fillchar = v
2301                 else:
2302                     raise EAsciiDoc,'malformed table fillchar: %s' % v
2303             elif k == 'format':
2304                 if v in Table.FORMATS:
2305                     self.format = v
2306                 else:
2307                     raise EAsciiDoc,'illegal table format: %s' % v
2308             elif k == 'colspec':
2309                 self.colspec = v
2310             elif k == 'headrow':
2311                 self.headrow = v
2312             elif k == 'footrow':
2313                 self.footrow = v
2314             elif k == 'bodyrow':
2315                 self.bodyrow = v
2316             elif k == 'headdata':
2317                 self.headdata = v
2318             elif k == 'footdata':
2319                 self.footdata = v
2320             elif k == 'bodydata':
2321                 self.bodydata = v
2322     def dump(self):
2323         AbstractBlock.dump(self)
2324         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
2325         write('fillchar='+self.fillchar)
2326         write('format='+self.format)
2327         if self.colspec:
2328             write('colspec='+self.colspec)
2329         if self.headrow:
2330             write('headrow='+self.headrow)
2331         if self.footrow:
2332             write('footrow='+self.footrow)
2333         write('bodyrow='+self.bodyrow)
2334         if self.headdata:
2335             write('headdata='+self.headdata)
2336         if self.footdata:
2337             write('footdata='+self.footdata)
2338         write('bodydata='+self.bodydata)
2339         write('')
2340     def validate(self):
2341         AbstractBlock.validate(self)
2342         '''Check table definition and set self.check_msg if invalid else set
2343         self.check_msg to blank string.'''
2344         # Check global table parameters.
2345         if config.textwidth is None:
2346             self.check_msg = 'missing [miscellaneous] textwidth entry'
2347         elif config.pagewidth is None:
2348             self.check_msg = 'missing [miscellaneous] pagewidth entry'
2349         elif config.pageunits is None:
2350             self.check_msg = 'missing [miscellaneous] pageunits entry'
2351         elif self.headrow is None:
2352             self.check_msg = 'missing headrow entry'
2353         elif self.footrow is None:
2354             self.check_msg = 'missing footrow entry'
2355         elif self.bodyrow is None:
2356             self.check_msg = 'missing bodyrow entry'
2357         elif self.headdata is None:
2358             self.check_msg = 'missing headdata entry'
2359         elif self.footdata is None:
2360             self.check_msg = 'missing footdata entry'
2361         elif self.bodydata is None:
2362             self.check_msg = 'missing bodydata entry'
2363         else:
2364             # No errors.
2365             self.check_msg = ''
2366     def isnext(self):
2367         return AbstractBlock.isnext(self)
2368     def parse_ruler(self,ruler):
2369         '''Parse ruler calculating underline and ruler column widths.'''
2370         fc = re.escape(self.fillchar)
2371         # Strip and save optional tablewidth from end of ruler.
2372         mo = re.match(r'^(.*'+fc+r'+)([\d\.]+)$',ruler)
2373         if mo:
2374             ruler = mo.group(1)
2375             self.tablewidth = float(mo.group(2))
2376             self.attributes['tablewidth'] = str(float(self.tablewidth))
2377         else:
2378             self.tablewidth = None
2379             self.attributes['tablewidth'] = '100.0'
2380         # Guess whether column widths are specified numerically or not.
2381         if ruler[1] != self.fillchar:
2382             # If the first column does not start with a fillchar then numeric.
2383             self.isnumeric = True
2384         elif ruler[1:] == self.fillchar*len(ruler[1:]):
2385             # The case of one column followed by fillchars is numeric.
2386             self.isnumeric = True
2387         else:
2388             self.isnumeric = False
2389         # Underlines must be 3 or more fillchars.
2390         self.underline = r'^' + fc + r'{3,}$'
2391         splits = re.split(self.COL_STOP,ruler)[1:]
2392         # Build self.columns.
2393         for i in range(0,len(splits),2):
2394             c = Column()
2395             c.colalign = self.ALIGNMENTS[splits[i]]
2396             s = splits[i+1]
2397             if self.isnumeric:
2398                 # Strip trailing fillchars.
2399                 s = re.sub(fc+r'+$','',s)
2400                 if s == '':
2401                     c.rulerwidth = None
2402                 else:
2403                     c.rulerwidth = int(validate(s,'int($)>0',
2404                         'malformed ruler: bad width'))
2405             else:   # Calculate column width from inter-fillchar intervals.
2406                 if not re.match(r'^'+fc+r'+$',s):
2407                     raise EAsciiDoc,'malformed ruler: illegal fillchars'
2408                 c.rulerwidth = len(s)+1
2409             self.columns.append(c)
2410         # Fill in unspecified ruler widths.
2411         if self.isnumeric:
2412             if self.columns[0].rulerwidth is None:
2413                 prevwidth = 1
2414             for c in self.columns:
2415                 if c.rulerwidth is None:
2416                     c.rulerwidth = prevwidth
2417                 prevwidth = c.rulerwidth
2418     def build_colspecs(self):
2419         '''Generate colwidths and colspecs. This can only be done after the
2420         table arguments have been parsed since we use the table format.'''
2421         self.attributes['cols'] = len(self.columns)
2422         # Calculate total ruler width.
2423         totalwidth = 0
2424         for c in self.columns:
2425             totalwidth = totalwidth + c.rulerwidth
2426         if totalwidth <= 0:
2427             raise EAsciiDoc,'zero width table'
2428         # Calculate marked up colwidths from rulerwidths.
2429         for c in self.columns:
2430             # Convert ruler width to output page width.
2431             width = float(c.rulerwidth)
2432             if self.format == 'fixed':
2433                 if self.tablewidth is None:
2434                     # Size proportional to ruler width.
2435                     colfraction = width/config.textwidth
2436                 else:
2437                     # Size proportional to page width.
2438                     colfraction = width/totalwidth
2439             else:
2440                     # Size proportional to page width.
2441                 colfraction = width/totalwidth
2442             c.colwidth = colfraction * config.pagewidth # To page units.
2443             if self.tablewidth is not None:
2444                 c.colwidth = c.colwidth * self.tablewidth   # Scale factor.
2445                 if self.tablewidth > 1:
2446                     c.colwidth = c.colwidth/100 # tablewidth is in percent.
2447         # Build colspecs.
2448         if self.colspec:
2449             cols = []
2450             for c in self.columns:
2451                 self.attributes['colalign'] = c.colalign
2452                 self.attributes['colwidth'] = str(int(c.colwidth))
2453                 s = subs_attrs(self.colspec,self.attributes)
2454                 if not s:
2455                     warning('colspec dropped: contains undefined attribute')
2456                 else:
2457                     cols.append(s)
2458             self.attributes['colspecs'] = string.join(cols,writer.newline)
2459     def split_rows(self,rows):
2460         '''Return a two item tuple containing a list of lines up to but not
2461         including the next underline (continued lines are joined ) and the
2462         tuple of all lines after the underline.'''
2463         reo = re.compile(self.underline)
2464         i = 0
2465         while not reo.match(rows[i]):
2466             i = i+1
2467         if i == 0:
2468             raise EAsciiDoc,'missing table rows'
2469         if i >= len(rows):
2470             raise EAsciiDoc,'closing [%s] underline expected' % self.name
2471         return (join_lines(rows[:i]), rows[i+1:])
2472     def parse_rows(self, rows, rtag, dtag):
2473         '''Parse rows list using the row and data tags. Returns a substituted
2474         list of output lines.'''
2475         result = []
2476         # Source rows are parsed as single block, rather than line by line, to
2477         # allow the CSV reader to handle multi-line rows.
2478         if self.format == 'fixed':
2479             rows = self.parse_fixed(rows)
2480         elif self.format == 'csv':
2481             rows = self.parse_csv(rows)
2482         elif self.format == 'dsv':
2483             rows = self.parse_dsv(rows)
2484         else:
2485             assert True,'illegal table format'
2486         # Substitute and indent all data in all rows.
2487         stag,etag = subs_tag(rtag,self.attributes)
2488         for row in rows:
2489             result.append('  '+stag)
2490             for data in self.subs_row(row,dtag):
2491                 result.append('    '+data)
2492             result.append('  '+etag)
2493         return result
2494     def subs_row(self, data, dtag):
2495         '''Substitute the list of source row data elements using the data tag.
2496         Returns a substituted list of output table data items.'''
2497         result = []
2498         if len(data) < len(self.columns):
2499             warning('fewer row data items then table columns')
2500         if len(data) > len(self.columns):
2501             warning('more row data items than table columns')
2502         for i in range(len(self.columns)):
2503             if i > len(data) - 1:
2504                 d = ''  # Fill missing column data with blanks.
2505             else:
2506                 d = data[i]
2507             c = self.columns[i]
2508             self.attributes['colalign'] = c.colalign
2509             self.attributes['colwidth'] = str(int(c.colwidth))+config.pageunits
2510             stag,etag = subs_tag(dtag,self.attributes)
2511             # Insert AsciiDoc line break (' +') where row data has newlines
2512             # ('\n').  This is really only useful when the table format is csv
2513             # and the output markup is HTML. It's also a bit dubious in that it
2514             # assumes the user has not modified the shipped line break pattern.
2515             subs = self.get_subs()[0]
2516             if 'replacements' in subs:
2517                 # Insert line breaks in cell data.
2518                 d = re.sub(r'(?m)\n',r' +\n',d)
2519                 d = string.split(d,'\n')    # So writer.newline is written.
2520             else:
2521                 d = [d]
2522             result = result + [stag] + Lex.subs(d,subs) + [etag]
2523         return result
2524     def parse_fixed(self,rows):
2525         '''Parse the list of source table rows. Each row item in the returned
2526         list contains a list of cell data elements.'''
2527         result = []
2528         for row in rows:
2529             data = []
2530             start = 0
2531             # build an encoded representation
2532             row = char_decode(row)
2533             for c in self.columns:
2534                 end = start + c.rulerwidth
2535                 if c is self.columns[-1]:
2536                     # Text in last column can continue forever.
2537                     # Use the encoded string to slice, but convert back
2538                     # to plain string before further processing
2539                     data.append(string.strip(char_encode(row[start:])))
2540                 else:
2541                     data.append(string.strip(char_encode(row[start:end])))
2542                 start = end
2543             result.append(data)
2544         return result
2545     def parse_csv(self,rows):
2546         '''Parse the list of source table rows. Each row item in the returned
2547         list contains a list of cell data elements.'''
2548         import StringIO
2549         import csv
2550         result = []
2551         rdr = csv.reader(StringIO.StringIO(string.join(rows,'\r\n')),
2552             skipinitialspace=True)
2553         try:
2554             for row in rdr:
2555                 result.append(row)
2556         except:
2557             raise EAsciiDoc,'csv parse error: %s' % row
2558         return result
2559     def parse_dsv(self,rows):
2560         '''Parse the list of source table rows. Each row item in the returned
2561         list contains a list of cell data elements.'''
2562         separator = self.attributes.get('separator',':')
2563         separator = eval('"'+separator+'"')
2564         if len(separator) != 1:
2565             raise EAsciiDoc,'malformed dsv separator: %s' % separator
2566         # TODO If separator is preceeded by an odd number of backslashes then
2567         # it is escaped and should not delimit.
2568         result = []
2569         for row in rows:
2570             # Skip blank lines
2571             if row == '': continue
2572             # Unescape escaped characters.
2573             row = eval('"'+row.replace('"','\\"')+'"')
2574             data = row.split(separator)
2575             data = [s.strip() for s in data]
2576             result.append(data)
2577         return result
2578     def translate(self):
2579         # Reset instance specific properties.
2580         self.underline = None
2581         self.columns = []
2582         attrs = {}
2583         BlockTitle.consume(attrs)
2584         # Add relevant globals to table substitutions.
2585         attrs['pagewidth'] = str(config.pagewidth)
2586         attrs['pageunits'] = config.pageunits
2587         # Mix in document attribute list.
2588         AttributeList.consume(attrs)
2589         # Validate overridable attributes.
2590         for k,v in attrs.items():
2591             if k == 'format':
2592                 if v not in self.FORMATS:
2593                     raise EAsciiDoc, 'illegal [%s] %s: %s' % (self.name,k,v)
2594                 self.format = v
2595             elif k == 'tablewidth':
2596                 try:
2597                     self.tablewidth = float(attrs['tablewidth'])
2598                 except:
2599                     raise EAsciiDoc, 'illegal [%s] %s: %s' % (self.name,k,v)
2600         self.merge_attributes(attrs)
2601         # Parse table ruler.
2602         ruler = reader.read()
2603         assert re.match(self.delimiter,ruler)
2604         self.parse_ruler(ruler)
2605         # Read the entire table.
2606         table = []
2607         while True:
2608             line = reader.read_next()
2609             # Table terminated by underline followed by a blank line or EOF.
2610             if len(table) > 0 and re.match(self.underline,table[-1]):
2611                 if line in ('',None):
2612                     break;
2613             if line is None:
2614                 raise EAsciiDoc,'closing [%s] underline expected' % self.name
2615             table.append(reader.read())
2616         #TODO: Inherited validate() doesn't set check_msg, needs checking.
2617         if self.check_msg:  # Skip if table definition was marked invalid.
2618             warning('skipping %s table: %s' % (self.name,self.check_msg))
2619             return
2620         # Generate colwidths and colspecs.
2621         self.build_colspecs()
2622         # Generate headrows, footrows, bodyrows.
2623         # Headrow, footrow and bodyrow data replaces same named attributes in
2624         # the table markup template. In order to ensure this data does not get
2625         # a second attribute substitution (which would interfere with any
2626         # already substituted inline passthroughs) unique placeholders are used
2627         # (the tab character does not appear elsewhere since it is expanded on
2628         # input) which are replaced after template attribute substitution.
2629         headrows = footrows = []
2630         bodyrows,table = self.split_rows(table)
2631         if table:
2632             headrows = bodyrows
2633             bodyrows,table = self.split_rows(table)
2634             if table:
2635                 footrows,table = self.split_rows(table)
2636         if headrows:
2637             headrows = self.parse_rows(headrows, self.headrow, self.headdata)
2638             headrows = string.join(headrows,writer.newline)
2639             self.attributes['headrows'] = '\theadrows\t'
2640         if footrows:
2641             footrows = self.parse_rows(footrows, self.footrow, self.footdata)
2642             footrows = string.join(footrows,writer.newline)
2643             self.attributes['footrows'] = '\tfootrows\t'
2644         bodyrows = self.parse_rows(bodyrows, self.bodyrow, self.bodydata)
2645         bodyrows = string.join(bodyrows,writer.newline)
2646         self.attributes['bodyrows'] = '\tbodyrows\t'
2647         table = subs_attrs(config.sections[self.template],self.attributes)
2648         table = string.join(table, writer.newline)
2649         # Before we finish replace the table head, foot and body place holders
2650         # with the real data.
2651         if headrows:
2652             table = table.replace('\theadrows\t', headrows, 1)
2653         if footrows:
2654             table = table.replace('\tfootrows\t', footrows, 1)
2655         table = table.replace('\tbodyrows\t', bodyrows, 1)
2656         writer.write(table)
2657
2658 class Tables(AbstractBlocks):
2659     '''List of tables.'''
2660     BLOCK_TYPE = Table
2661     PREFIX = 'tabledef-'
2662     def __init__(self):
2663         AbstractBlocks.__init__(self)
2664     def load(self,sections):
2665         AbstractBlocks.load(self,sections)
2666         '''Update tables defined in 'sections' dictionary.'''
2667     def validate(self):
2668         # Does not call AbstractBlocks.validate().
2669         # Check we have a default table definition,
2670         for i in range(len(self.blocks)):
2671             if self.blocks[i].name == 'tabledef-default':
2672                 default = self.blocks[i]
2673                 break
2674         else:
2675             raise EAsciiDoc,'missing [table-default] section'
2676         # Set default table defaults.
2677         if default.format is None: default.subs = 'fixed'
2678         # Propagate defaults to unspecified table parameters.
2679         for b in self.blocks:
2680             if b is not default:
2681                 if b.fillchar is None: b.fillchar = default.fillchar
2682                 if b.format is None: b.format = default.format
2683                 if b.template is None: b.template = default.template
2684                 if b.colspec is None: b.colspec = default.colspec
2685                 if b.headrow is None: b.headrow = default.headrow
2686                 if b.footrow is None: b.footrow = default.footrow
2687                 if b.bodyrow is None: b.bodyrow = default.bodyrow
2688                 if b.headdata is None: b.headdata = default.headdata
2689                 if b.footdata is None: b.footdata = default.footdata
2690                 if b.bodydata is None: b.bodydata = default.bodydata
2691         # Check all tables have valid fill character.
2692         for b in self.blocks:
2693             if not b.fillchar or len(b.fillchar) != 1:
2694                 raise EAsciiDoc,'[%s] missing or illegal fillchar' % b.name
2695         # Build combined tables delimiter patterns and assign defaults.
2696         delimiters = []
2697         for b in self.blocks:
2698             # Ruler is:
2699             #   (ColStop,(ColWidth,FillChar+)?)+, FillChar+, TableWidth?
2700             b.delimiter = r'^(' + Table.COL_STOP \
2701                 + r'(\d*|' + re.escape(b.fillchar) + r'*)' \
2702                 + r')+' \
2703                 + re.escape(b.fillchar) + r'+' \
2704                 + '([\d\.]*)$'
2705             delimiters.append(b.delimiter)
2706             if not b.headrow:
2707                 b.headrow = b.bodyrow
2708             if not b.footrow:
2709                 b.footrow = b.bodyrow
2710             if not b.headdata:
2711                 b.headdata = b.bodydata
2712             if not b.footdata:
2713                 b.footdata = b.bodydata
2714         self.delimiter = join_regexp(delimiters)
2715         # Check table definitions are valid.
2716         for b in self.blocks:
2717             b.validate()
2718             if config.verbose:
2719                 if b.check_msg:
2720                     warning('[%s] table definition: %s' % (b.name,b.check_msg))
2721
2722 class Macros:
2723     # Default system macro syntax.
2724     SYS_DEFAULT = r'(?u)^(?P<name>\w(\w|-)*?)::(?P<target>\S*?)' + \
2725                   r'(\[(?P<attrlist>.*?)\])$'
2726     def __init__(self):
2727         self.macros = []        # List of Macros.
2728         self.current = None     # The last matched block macro.
2729         # Initialize default system macro.
2730         m = Macro()
2731         m.pattern = self.SYS_DEFAULT
2732         m.prefix = '+'
2733         m.reo = re.compile(m.pattern)
2734         self.macros.append(m)
2735     def load(self,entries):
2736         for entry in entries:
2737             m = Macro()
2738             m.load(entry)
2739             if m.name is None:
2740                 # Delete undefined macro.
2741                 for i in range(len(self.macros)-1,-1,-1):
2742                     if self.macros[i].pattern == m.pattern:
2743                         del self.macros[i]
2744             else:
2745                 # Check for duplicates.
2746                 for m2 in self.macros:
2747                     if m.equals(m2):
2748                         verbose('duplicate macro: '+entry)
2749                         break
2750                 else:
2751                     self.macros.append(m)
2752     def dump(self):
2753         write = lambda s: sys.stdout.write('%s%s' % (s,writer.newline))
2754         write('[macros]')
2755         # Dump all macros except the first (built-in system) macro.
2756         for m in self.macros[1:]:
2757             write('%s=%s%s' % (m.pattern,m.prefix,m.name))
2758         write('')
2759     def validate(self):
2760         # Check all named sections exist.
2761         if config.verbose:
2762             for m in self.macros:
2763                 if m.name and m.prefix != '+':
2764                     m.section_name()
2765     def subs(self,text,prefix='',callouts=False):
2766         # If callouts is True then only callout macros are processed, if False
2767         # then all non-callout macros are processed.
2768         result = text
2769         for m in self.macros:
2770             if m.prefix == prefix:
2771                 if callouts ^ (m.name != 'callout'):
2772                     result = m.subs(result)
2773         return result
2774     def isnext(self):
2775         '''Return matching macro if block macro is next on reader.'''
2776         reader.skip_blank_lines()
2777         line = reader.read_next()
2778         if line:
2779             for m in self.macros:
2780                 if m.prefix == '#':
2781                     if m.reo.match(line):
2782                         self.current = m
2783                         return m
2784         return False
2785     def match(self,prefix,name,text):
2786         '''Return re match object matching 'text' with macro type 'prefix',
2787         macro name 'name'.'''
2788         for m in self.macros:
2789             if m.prefix == prefix:
2790                 mo = m.reo.match(text)
2791                 if mo:
2792                     if m.name == name:
2793                         return mo
2794                     if re.match(name,mo.group('name')):
2795                         return mo
2796         return None
2797
2798 # Macro set just prior to calling _subs_macro(). Ugly but there's no way
2799 # to pass optional arguments with _subs_macro().
2800 _macro = None
2801
2802 def _subs_macro(mo):
2803     '''Function called to perform inline macro substitution. Uses matched macro
2804     regular expression object and returns string containing the substituted
2805     macro body. Called by Macros().subs().'''
2806     # Check if macro reference is escaped.
2807     if mo.group()[0] == '\\':
2808         return mo.group()[1:]   # Strip leading backslash.
2809     d = mo.groupdict()
2810     # Delete groups that didn't participate in match.
2811     for k,v in d.items():
2812         if v is None: del d[k]
2813     if _macro.name:
2814         name = _macro.name
2815     else:
2816         if not d.has_key('name'):
2817             warning('missing macro name group: %s' % mo.re.pattern)
2818             return ''
2819         name = d['name']
2820     section_name = _macro.section_name(name)
2821     if not section_name:
2822         return ''
2823     # If we're dealing with a block macro get optional block ID and block title.
2824     if _macro.prefix == '#':
2825         AttributeList.consume(d)
2826         BlockTitle.consume(d)
2827     # Parse macro attributes.
2828     if d.has_key('attrlist'):
2829         if d['attrlist'] in (None,''):
2830             del d['attrlist']
2831         else:
2832             parse_attributes(d['attrlist'],d)
2833     if name == 'callout':
2834         listindex =int(d['index'])
2835         d['coid'] = calloutmap.add(listindex)
2836     # BUG: We've already done attribute substitution on the macro which means
2837     # that any escaped attribute references are now unescaped and will be
2838     # substituted by config.subs_section() below. As a partial fix have witheld
2839     # {0} from substitution but this kludge doesn't fix it for other attributes
2840     # containg unescaped references.
2841     a0 = d.get('0')
2842     if a0:
2843         d['0'] = chr(0)  # Replace temporarily with unused character.
2844     body = config.subs_section(section_name,d)
2845     if len(body) == 0:
2846         result = ''
2847     elif len(body) == 1:
2848         result = body[0]
2849     else:
2850         if _macro.prefix == '#':
2851             result = string.join(body,writer.newline)
2852         else:
2853             # Internally processed inline macros use UNIX line separator.
2854             result = string.join(body,'\n')
2855     if a0:
2856         result = string.replace(result, chr(0), a0)
2857     return result
2858
2859 class Macro:
2860     def __init__(self):
2861         self.pattern = None     # Matching regular expression.
2862         self.name = ''          # Conf file macro name (None if implicit).
2863         self.prefix = ''        # '' if inline, '+' if system, '#' if block.
2864         self.reo = None         # Compiled pattern re object.
2865     def section_name(self,name=None):
2866         '''Return macro markup template section name based on macro name and
2867         prefix.  Return None section not found.'''
2868         assert self.prefix != '+'
2869         if not name:
2870             assert self.name
2871             name = self.name
2872         if self.prefix == '#':
2873             suffix = '-blockmacro'
2874         else:
2875             suffix = '-inlinemacro'
2876         if config.sections.has_key(name+suffix):
2877             return name+suffix
2878         else:
2879             warning('missing macro section: [%s]' % name+suffix)
2880             return None
2881     def equals(self,m):
2882         if self.pattern != m.pattern:
2883             return False
2884         if self.name != m.name:
2885             return False
2886         if self.prefix != m.prefix:
2887             return False
2888         return True
2889     def load(self,entry):
2890         e = parse_entry(entry)
2891         if not e:
2892             raise EAsciiDoc,'malformed macro entry: %s' % entry
2893         self.pattern, self.name = e
2894         if not is_regexp(self.pattern):
2895             raise EAsciiDoc,'illegal regular expression in macro entry: %s' \
2896                 % entry
2897         self.reo = re.compile(self.pattern)
2898         if self.name:
2899             if self.name[0] in ('+','#'):
2900                 self.prefix, self.name = self.name[0], self.name[1:]
2901         if self.name and not is_name(self.name):
2902             raise EAsciiDoc,'illegal section name in macro entry: %s' % entry
2903     def subs(self,text):
2904         global _macro
2905         _macro = self           # Pass the macro to _subs_macro().
2906         return self.reo.sub(_subs_macro,text)
2907     def translate(self):
2908         ''' Block macro translation.'''
2909         assert self.prefix == '#'
2910         s = reader.read()
2911         s = subs_attrs(s)       # Substitute global attributes.
2912         if s:
2913             s = self.subs(s)
2914             if s:
2915                 writer.write(s)
2916
2917 class CalloutMap:
2918     def __init__(self):
2919         self.comap = {}         # key = list index, value = callouts list.
2920         self.calloutindex = 0   # Current callout index number.
2921         self.listnumber = 1     # Current callout list number.
2922     def listclose(self):
2923         # Called when callout list is closed.
2924         self.listnumber += 1
2925         self.calloutindex = 0
2926         self.comap = {}
2927     def add(self,listindex):
2928         # Add next callout index to listindex map entry. Return the callout id.
2929         self.calloutindex += 1
2930         # Append the coindex to a list in the comap dictionary.
2931         if not self.comap.has_key(listindex):
2932             self.comap[listindex] = [self.calloutindex]
2933         else:
2934             self.comap[listindex].append(self.calloutindex)
2935         return self.calloutid(self.listnumber, self.calloutindex)
2936     def calloutid(listnumber,calloutindex):
2937         return 'CO%d-%d' % (listnumber,calloutindex)
2938     calloutid = staticmethod(calloutid)
2939     def calloutids(self,listindex):
2940         # Retieve list of callout indexes that refer to listindex.
2941         if self.comap.has_key(listindex):
2942             result = ''
2943             for coindex in self.comap[listindex]:
2944                 result += ' ' + self.calloutid(self.listnumber,coindex)
2945             return result.strip()
2946         else:
2947             error('no callouts refer to list item '+str(listindex))
2948             return ''
2949     def validate(self,maxlistindex):
2950         # Check that all list indexes referenced by callouts exist.
2951         for listindex in self.comap.keys():
2952             if listindex > maxlistindex:
2953                 warning('callout refers to non-existent list item '
2954                         + str(listindex))
2955
2956 #---------------------------------------------------------------------------
2957 # Input stream Reader and output stream writer classes.
2958 #---------------------------------------------------------------------------
2959
2960 class Reader1:
2961     '''Line oriented AsciiDoc input file reader. Processes include and
2962     conditional inclusion system macros. Tabs are expanded and lines are right
2963     trimmed.'''
2964     # This class is not used directly, use Reader class instead.
2965     READ_BUFFER_MIN = 10            # Read buffer low level.
2966     def __init__(self):
2967         self.f = None           # Input file object.
2968         self.fname = None       # Input file name.
2969         self.next = []          # Read ahead buffer containing
2970                                 # [filename,linenumber,linetext] lists.
2971         self.cursor = None      # Last read() [filename,linenumber,linetext].
2972         self.tabsize = 8        # Tab expansion number of spaces.
2973         self.parent = None      # Included reader's parent reader.
2974         self._lineno = 0        # The last line read from file object f.
2975         self.include_depth = 0  # Current include depth.
2976         self.include_max = 5    # Maxiumum allowed include depth.
2977     def open(self,fname):
2978         self.fname = fname
2979         verbose('reading: '+fname)
2980         if fname == '<stdin>':
2981             self.f = sys.stdin
2982         else:
2983             self.f = open(fname,"rb")
2984         self._lineno = 0            # The last line read from file object f.
2985         self.next = []
2986         # Prefill buffer by reading the first line and then pushing it back.
2987         if Reader1.read(self):
2988             self.unread(self.cursor)
2989             self.cursor = None
2990     def closefile(self):
2991         '''Used by class methods to close nested include files.'''
2992         self.f.close()
2993         self.next = []
2994     def close(self):
2995         self.closefile()
2996         self.__init__()
2997     def read(self,skip=False):
2998         '''Read next line. Return None if EOF. Expand tabs. Strip trailing
2999         white space. Maintain self.next read ahead buffer. If skip=True then
3000         conditional exclusion is active (ifdef and ifndef macros).'''
3001         # Top up buffer.
3002         if len(self.next) <= self.READ_BUFFER_MIN:
3003             s = self.f.readline()
3004             if s:
3005                 self._lineno = self._lineno + 1
3006             while s:
3007                 if self.tabsize != 0:
3008                     s = string.expandtabs(s,self.tabsize)
3009                 s = string.rstrip(s)
3010                 self.next.append([self.fname,self._lineno,s])
3011                 if len(self.next) > self.READ_BUFFER_MIN:
3012                     break
3013                 s = self.f.readline()
3014                 if s:
3015                     self._lineno = self._lineno + 1
3016         # Return first (oldest) buffer entry.
3017         if len(self.next) > 0:
3018             self.cursor = self.next[0]
3019             del self.next[0]
3020             result = self.cursor[2]
3021             # Check for include macro.
3022             mo = macros.match('+',r'include[1]?',result)
3023             if mo and not skip:
3024                 # Perform attribute substitution on inlcude macro file name.
3025                 fname = subs_attrs(mo.group('target'))
3026                 if not fname:
3027                     return Reader1.read(self)   # Return next input line.
3028                 if self.include_depth >= self.include_max:
3029                     raise EAsciiDoc,'maxiumum inlcude depth exceeded'
3030                 if self.fname != '<stdin>':
3031                     fname = safe_filename(fname, os.path.dirname(self.fname))
3032                     if not fname:
3033                         return Reader1.read(self)   # Return next input line.
3034                     if mo.group('name') == 'include1':
3035                         if not config.dumping:
3036                             # Store the include file in memory for later
3037                             # retrieval by the {include1:} system attribute.
3038                             config.include1[fname] = readlines(fname)
3039                             return '{include1:%s}' % fname
3040                         else:
3041                             # This is a configuration dump, just pass the macro
3042                             # call through.
3043                             return result
3044                 # Parse include macro attributes.
3045                 attrs = {}
3046                 parse_attributes(mo.group('attrlist'),attrs)
3047                 # Clone self and set as parent (self assumes the role of child).
3048                 parent = Reader1()
3049                 assign(parent,self)
3050                 self.parent = parent
3051                 if attrs.has_key('tabsize'):
3052                     self.tabsize = int(validate(attrs['tabsize'],'int($)>=0', \
3053                         'illegal include macro tabsize argument'))
3054                 self.open(fname)
3055                 self.include_depth = self.include_depth + 1
3056                 result = Reader1.read(self)
3057         else:
3058             if not Reader1.eof(self):
3059                 result = Reader1.read(self)
3060             else:
3061                 result = None
3062         return result
3063     def eof(self):
3064         '''Returns True if all lines have been read.'''
3065         if len(self.next) == 0:
3066             # End of current file.
3067             if self.parent:
3068                 self.closefile()
3069                 assign(self,self.parent)    # Restore parent reader.
3070                 return Reader1.eof(self)
3071             else:
3072                 return True
3073         else:
3074             return False
3075     def read_next(self):
3076         '''Like read() but does not advance file pointer.'''
3077         if Reader1.eof(self):
3078             return None
3079         else:
3080             return self.next[0][2]
3081     def unread(self,cursor):
3082         '''Push the line (filename,linenumber,linetext) tuple back into the read
3083         buffer. Note that it's up to the caller to restore the previous
3084         cursor.'''
3085         assert cursor
3086         self.next.insert(0,cursor)
3087
3088 class Reader(Reader1):
3089     ''' Wraps (well, sought of) Reader1 class and implements conditional text
3090     inclusion.'''
3091     def __init__(self):
3092         Reader1.__init__(self)
3093         self.depth = 0          # if nesting depth.
3094         self.skip = False       # true if we're skipping ifdef...endif.
3095         self.skipname = ''      # Name of current endif macro target.
3096         self.skipto = -1        # The depth at which skipping is reenabled.
3097     def read_super(self):
3098         result = Reader1.read(self,self.skip)
3099         if result is None and self.skip:
3100             raise EAsciiDoc,'missing endif::%s[]' % self.skipname
3101         return result
3102     def read(self):
3103         result = self.read_super()
3104         if result is None:
3105             return None
3106         while self.skip:
3107             mo = macros.match('+',r'ifdef|ifndef|endif',result)
3108             if mo:
3109                 name = mo.group('name')
3110                 target = mo.group('target')
3111                 if name == 'endif':
3112                     self.depth = self.depth-1
3113                     if self.depth < 0:
3114                         raise EAsciiDoc,'mismatched macro: %s' % result
3115                     if self.depth == self.skipto:
3116                         self.skip = False
3117                         if target and self.skipname != target:
3118                             raise EAsciiDoc,'mismatched macro: %s' % result
3119                 else:   # ifdef or ifndef.
3120                     if not target:
3121                         raise EAsciiDoc,'missing macro target: %s' % result
3122                     self.depth = self.depth+1
3123             result = self.read_super()
3124             if result is None:
3125                 return None
3126         mo = macros.match('+',r'ifdef|ifndef|endif',result)
3127         if mo:
3128             name = mo.group('name')
3129             target = mo.group('target')
3130             if name == 'endif':
3131                 self.depth = self.depth-1
3132             else:   # ifdef or ifndef.
3133                 if not target:
3134                     raise EAsciiDoc,'missing macro target: %s' % result
3135                 defined = document.attributes.get(target) is not None
3136                 if name == 'ifdef':
3137                     self.skip = not defined
3138                 else:   # ifndef.
3139                     self.skip = defined
3140                 if self.skip:
3141                     self.skipto = self.depth
3142                     self.skipname = target
3143                 self.depth = self.depth+1
3144             result = self.read()
3145         if result:
3146             # Expand executable block macros.
3147             mo = macros.match('+',r'eval|sys|sys2',result)
3148             if mo:
3149                 action = mo.group('name')
3150                 cmd = mo.group('attrlist')
3151                 s = system(action, cmd, is_macro=True)
3152                 if s is not None:
3153                     self.cursor[2] = s  # So we don't re-evaluate.
3154                     result = s
3155         return result
3156     def eof(self):
3157         return self.read_next() is None
3158     def read_next(self):
3159         save_cursor = self.cursor
3160         result = self.read()
3161         if result is not None:
3162             self.unread(self.cursor)
3163             self.cursor = save_cursor
3164         return result
3165     def read_all(self,fname):
3166         '''Read all lines from file fname and return as list. Use like class
3167         method: Reader().read_all(fname)'''
3168         result = []
3169         self.open(fname)
3170         try:
3171             while not self.eof():
3172                 result.append(self.read())
3173         finally:
3174             self.close()
3175         return result
3176     def read_lines(self,count=1):
3177         '''Return tuple containing count lines.'''
3178         result = []
3179         i = 0
3180         while i < count and not self.eof():
3181             result.append(self.read())
3182         return tuple(result)
3183     def read_ahead(self,count=1):
3184         '''Same as read_lines() but does not advance the file pointer.'''
3185         result = []
3186         putback = []
3187         save_cursor = self.cursor
3188         try:
3189             i = 0
3190             while i < count and not self.eof():
3191                 result.append(self.read())
3192                 putback.append(self.cursor)
3193                 i = i+1
3194             while putback:
3195                 self.unread(putback.pop())
3196         finally:
3197             self.cursor = save_cursor
3198         return tuple(result)
3199     def skip_blank_lines(self):
3200         reader.read_until(r'\s*\S+')
3201     def read_until(self,pattern,same_file=False):
3202         '''Like read() but reads lines up to (but not including) the first line
3203         that matches the pattern regular expression. If same_file is True
3204         then the terminating pattern must occur in the file the was being read
3205         when the routine was called.'''
3206         if same_file:
3207             fname = self.cursor[0]
3208         result = []
3209         reo = re.compile(pattern)
3210         while not self.eof():
3211             save_cursor = self.cursor
3212             s = self.read()
3213             if (not same_file or fname == self.cursor[0]) and reo.match(s):
3214                 self.unread(self.cursor)
3215                 self.cursor = save_cursor
3216                 break
3217             result.append(s)
3218         return tuple(result)
3219     def read_continuation(self):
3220         '''Like read() but treats trailing backslash as line continuation
3221         character.'''
3222         s = self.read()
3223         if s is None:
3224             return None
3225         result = ''
3226         while s is not None and len(s) > 0 and s[-1] == '\\':
3227             result = result + s[:-1]
3228             s = self.read()
3229         if s is not None:
3230             result = result + s
3231         return result
3232
3233 class Writer:
3234     '''Writes lines to output file.'''
3235     newline = '\r\n'    # End of line terminator.
3236     f = None            # Output file object.
3237     fname= None         # Output file name.
3238     lines_out = 0       # Number of lines written.
3239     def open(self,fname):
3240         self.fname = os.path.abspath(fname)
3241         verbose('writing: '+fname)
3242         if fname == '<stdout>':
3243             self.f = sys.stdout
3244         else:
3245             self.f = open(fname,"wb+")
3246         self.lines_out = 0
3247     def close(self):
3248         if self.fname != '<stdout>':
3249             self.f.close()
3250     def write(self,*args):
3251         '''Iterates arguments, writes tuple and list arguments one line per
3252         element, else writes argument as single line. If no arguments writes
3253         blank line. If argument is None nothing is written. self.newline is
3254         appended to each line.'''
3255         if len(args) == 0:
3256             self.f.write(self.newline)
3257             self.lines_out = self.lines_out + 1
3258         else:
3259             for arg in args:
3260                 if isinstance(arg,list) or isinstance(arg,tuple):
3261                     for s in arg:
3262                         self.f.write(s+self.newline)
3263                     self.lines_out = self.lines_out + len(arg)
3264                 elif arg is not None:
3265                     self.f.write(arg+self.newline)
3266                     self.lines_out = self.lines_out + 1
3267     def write_tag(self,tagname,content,subs=SUBS_NORMAL,d=None):
3268         '''Write content enveloped by configuration file tag tagname.
3269         Substitutions specified in the 'subs' list are perform on the
3270         'content'.'''
3271         stag,etag = config.tag(tagname,d)
3272         if stag:
3273             self.write(stag)
3274         if content:
3275             self.write(Lex.subs(content,subs))
3276         if etag:
3277             self.write(etag)
3278
3279 #---------------------------------------------------------------------------
3280 # Configuration file processing.
3281 #---------------------------------------------------------------------------
3282 def _subs_specialwords(mo):
3283     '''Special word substitution function called by
3284     Config.subs_specialwords().'''
3285     word = mo.re.pattern                    # The special word.
3286     template = config.specialwords[word]    # The corresponding markup template.
3287     if not config.sections.has_key(template):
3288         raise EAsciiDoc,'missing special word template [%s]' % template
3289     if mo.group()[0] == '\\':
3290         return mo.group()[1:]   # Return escaped word.
3291     args = {}
3292     args['words'] = mo.group()  # The full match string is argument 'words'.
3293     args.update(mo.groupdict()) # Add other named match groups to the arguments.
3294     # Delete groups that didn't participate in match.
3295     for k,v in args.items():
3296         if v is None: del args[k]
3297     lines = subs_attrs(config.sections[template],args)
3298     if len(lines) == 0:
3299         result = ''
3300     elif len(lines) == 1:
3301         result = lines[0]
3302     else:
3303         result = string.join(lines,writer.newline)
3304     return result
3305
3306 class Config:
3307     '''Methods to process configuration files.'''
3308     # Predefined section name regexp's.
3309     SPECIAL_SECTIONS= ('tags','miscellaneous','attributes','specialcharacters',
3310             'specialwords','macros','replacements','quotes','titles',
3311             r'paradef.+',r'listdef.+',r'blockdef.+',r'tabledef.*')
3312     def __init__(self):
3313         self.sections = OrderedDict()   # Keyed by section name containing
3314                                         # lists of section lines.
3315         # Command-line options.
3316         self.verbose = False
3317         self.header_footer = True       # -s, --no-header-footer option.
3318         # [miscellaneous] section.
3319         self.tabsize = 8
3320         self.textwidth = 70
3321         self.newline = '\r\n'
3322         self.pagewidth = None
3323         self.pageunits = None
3324         self.outfilesuffix = ''
3325
3326         self.tags = {}          # Values contain (stag,etag) tuples.
3327         self.specialchars = {}  # Values of special character substitutions.
3328         self.specialwords = {}  # Name is special word pattern, value is macro.
3329         self.replacements = OrderedDict()   # Key is find pattern, value is
3330                                             #replace pattern.
3331         self.specialsections = {} # Name is special section name pattern, value
3332                                   # is corresponding section name.
3333         self.quotes = {}        # Values contain corresponding tag name.
3334         self.fname = ''         # Most recently loaded configuration file name.
3335         self.conf_attrs = {}    # Glossary entries from conf files.
3336         self.cmd_attrs = {}     # Attributes from command-line -a options.
3337         self.loaded = []        # Loaded conf files.
3338         self.include1 = {}      # Holds include1::[] files for {include1:}.
3339         self.dumping = False    # True if asciidoc -c option specified.
3340
3341     def load(self,fname,dir=None):
3342         '''Loads sections dictionary with sections from file fname.
3343         Existing sections are overlaid. Silently skips missing configuration
3344         files.'''
3345         if dir:
3346             fname = os.path.join(dir, fname)
3347         # Sliently skip missing configuration file.
3348         if not os.path.isfile(fname):
3349             return
3350         # Don't load conf files twice (local and application conf files are the
3351         # same if the source file is in the application directory).
3352         if realpath(fname) in self.loaded:
3353             return
3354         rdr = Reader()  # Reader processes system macros.
3355         rdr.open(fname)
3356         self.fname = fname
3357         reo = re.compile(r'(?u)^\[(?P<section>[^\W\d][\w-]*)\]\s*$')
3358         sections = OrderedDict()
3359         section,contents = '',[]
3360         while not rdr.eof():
3361             s = rdr.read()
3362             if s and s[0] == '#':       # Skip comment lines.
3363                 continue
3364             if s[:2] == '\\#':          # Unescape lines starting with '#'.
3365                 s = s[1:]
3366             s = string.rstrip(s)
3367             found = reo.findall(s)
3368             if found:
3369                 if section:             # Store previous section.
3370                     if sections.has_key(section) \
3371                         and self.is_special_section(section):
3372                         # Merge line oriented special sections.
3373                         contents = sections[section] + contents
3374                     sections[section] = contents
3375                 section = found[0].lower()
3376                 contents = []
3377             else:
3378                 contents.append(s)
3379         if section and contents:        # Store last section.
3380             if sections.has_key(section) \
3381                 and self.is_special_section(section):
3382                 # Merge line oriented special sections.
3383                 contents = sections[section] + contents
3384             sections[section] = contents
3385         rdr.close()
3386         # Delete blank lines from sections.
3387         for k in sections.keys():
3388             for i in range(len(sections[k])-1,-1,-1):
3389                 if not sections[k][i]:
3390                     del sections[k][i]
3391                 elif not self.is_special_section(k):
3392                     break   # Only trailing blanks from non-special sections.
3393         # Add/overwrite new sections.
3394         self.sections.update(sections)
3395         self.parse_tags()
3396         # Internally [miscellaneous] section entries are just attributes.
3397         d = {}
3398         parse_entries(sections.get('miscellaneous',()), d, unquote=True,
3399                 allow_name_only=True)
3400         update_attrs(self.conf_attrs,d)
3401         d = {}
3402         parse_entries(sections.get('attributes',()), d, unquote=True,
3403                 allow_name_only=True)
3404         update_attrs(self.conf_attrs,d)
3405         # Update document attributes so they are available immediately.
3406         document.init_attrs()
3407         d = {}
3408         parse_entries(sections.get('titles',()),d)
3409         Title.load(d)
3410         parse_entries(sections.get('specialcharacters',()),self.specialchars)
3411         parse_entries(sections.get('quotes',()),self.quotes,unique_values=True)
3412         self.parse_specialwords()
3413         self.parse_replacements()
3414         self.parse_specialsections()
3415         paragraphs.load(sections)
3416         lists.load(sections)
3417         blocks.load(sections)
3418         tables.load(sections)
3419         macros.load(sections.get('macros',()))
3420         self.loaded.append(realpath(fname))
3421
3422     def load_all(self,dir):
3423         '''Load the standard configuration files from directory 'dir'.'''
3424         self.load('asciidoc.conf',dir)
3425         conf = document.backend + '.conf'
3426         self.load(conf,dir)
3427         conf = document.backend + '-' + document.doctype + '.conf'
3428         self.load(conf,dir)
3429         # Load ./filters/*.conf files if they exist.
3430         filters = os.path.join(dir,'filters')
3431         if os.path.isdir(filters):
3432             for f in os.listdir(filters):
3433                 if re.match(r'^.+\.conf$',f):
3434                     self.load(f,filters)
3435
3436     def load_miscellaneous(self,d):
3437         '''Set miscellaneous configuration entries from dictionary 'd'.'''
3438         def set_misc(name,rule='True',intval=False):
3439             if d.has_key(name):
3440                 errmsg = 'illegal [miscellaneous] %s entry' % name
3441                 if intval:
3442                     setattr(self, name, int(validate(d[name],rule,errmsg)))
3443                 else:
3444                     setattr(self, name, validate(d[name],rule,errmsg))
3445         set_misc('tabsize','int($)>0',intval=True)
3446         set_misc('textwidth','int($)>0',intval=True)
3447         set_misc('pagewidth','int($)>0',intval=True)
3448         set_misc('pageunits')
3449         set_misc('outfilesuffix')
3450         if d.has_key('newline'):
3451             # Convert escape sequences to their character values.
3452             self.newline = eval('"'+d['newline']+'"')
3453
3454     def validate(self):
3455         '''Check the configuration for internal consistancy. Called after all
3456         configuration files have been loaded.'''
3457         # Heuristic validate that at least one configuration file was loaded.
3458         if not self.specialchars or not self.tags or not lists:
3459             raise EAsciiDoc,'incomplete configuration files'
3460         # Check special characters are only one character long.
3461         for k in self.specialchars.keys():
3462             if len(k) != 1:
3463                 raise EAsciiDoc,'[specialcharacters] ' \
3464                                 'must be a single character: %s' % k
3465         # Check all special words have a corresponding inline macro body.
3466         for macro in self.specialwords.values():
3467             if not is_name(macro):
3468                 raise EAsciiDoc,'illegal special word name: %s' % macro
3469             if not self.sections.has_key(macro):
3470                 warning('missing special word macro: [%s]' % macro)
3471         # Check all text quotes have a corresponding tag.
3472         for q in self.quotes.keys():
3473             tag = self.quotes[q]
3474             if not self.tags.has_key(tag):
3475                 warning('[quotes] %s missing tag definition: %s'
3476                     % (q,tag))
3477         # Check all specialsections section names exist.
3478         for k,v in self.specialsections.items():
3479             if not self.sections.has_key(v):
3480                 warning('[%s] missing specialsections section' % v)
3481         paragraphs.validate()
3482         lists.validate()
3483         blocks.validate()
3484         tables.validate()
3485         macros.validate()
3486
3487     def is_special_section(self,section_name):
3488         for name in self.SPECIAL_SECTIONS:
3489             if re.match(name,section_name):
3490                 return True
3491         return False
3492
3493     def dump(self):
3494         '''Dump configuration to stdout.'''
3495         # Header.
3496         hdr = ''
3497         hdr = hdr + '#' + writer.newline
3498         hdr = hdr + '# Generated by AsciiDoc %s for %s %s.%s' % \
3499             (VERSION,document.backend,document.doctype,writer.newline)
3500         t = time.asctime(time.localtime(time.time()))
3501         hdr = hdr + '# %s%s' % (t,writer.newline)
3502         hdr = hdr + '#' + writer.newline
3503         sys.stdout.write(hdr)
3504         # Dump special sections.
3505         # Dump only the configuration file and command-line attributes.
3506         # [miscellanous] entries are dumped as part of the [attributes].
3507         d = {}
3508         d.update(self.conf_attrs)
3509         d.update(self.cmd_attrs)
3510         dump_section('attributes',d)
3511         Title.dump()
3512         dump_section('quotes',self.quotes)
3513         dump_section('specialcharacters',self.specialchars)
3514         d = {}
3515         for k,v in self.specialwords.items():
3516             if d.has_key(v):
3517                 d[v] = '%s "%s"' % (d[v],k)   # Append word list.
3518             else:
3519                 d[v] = '"%s"' % k
3520         dump_section('specialwords',d)
3521         dump_section('replacements',self.replacements)
3522         dump_section('specialsections',self.specialsections)
3523         d = {}
3524         for k,v in self.tags.items():
3525             d[k] = '%s|%s' % v
3526         dump_section('tags',d)
3527         paragraphs.dump()
3528         lists.dump()
3529         blocks.dump()
3530         tables.dump()
3531         macros.dump()
3532         # Dump remaining sections.
3533         for k in self.sections.keys():
3534             if not self.is_special_section(k):
3535                 sys.stdout.write('[%s]%s' % (k,writer.newline))
3536                 for line in self.sections[k]:
3537                     sys.stdout.write('%s%s' % (line,writer.newline))
3538                 sys.stdout.write(writer.newline)
3539
3540     def subs_section(self,section,d):
3541         '''Section attribute substitution using attributes from
3542         document.attributes and 'd'.  Lines containing undefinded
3543         attributes are deleted.'''
3544         if self.sections.has_key(section):
3545             return subs_attrs(self.sections[section],d)
3546         else:
3547             warning('missing [%s] section' % section)
3548             return ()
3549
3550     def parse_tags(self):
3551         '''Parse [tags] section entries into self.tags dictionary.'''
3552         d = {}
3553         parse_entries(self.sections.get('tags',()),d)
3554         for k,v in d.items():
3555             if v is None:
3556                 if self.tags.has_key(k):
3557                     del self.tags[k]
3558             elif v == 'none':
3559                 self.tags[k] = (None,None)
3560             else:
3561                 mo = re.match(r'(?P<stag>.*)\|(?P<etag>.*)',v)
3562                 if mo:
3563                     self.tags[k] = (mo.group('stag'), mo.group('etag'))
3564                 else:
3565                     raise EAsciiDoc,'[tag] %s value malformed' % k
3566
3567     def tag(self, name, d=None):
3568         '''Returns (starttag,endtag) tuple named name from configuration file
3569         [tags] section. Raise error if not found. If a dictionary 'd' is
3570         passed then merge with document attributes and perform attribute
3571         substitution on tags.'''
3572
3573         # TODO: Tags should be stored a single string, not split into start
3574         # and end tags since most are going to be substituted anyway (see
3575         # subs_tag() for how we should process them. parse_tags() (above)
3576         # should only validate i.e. parse_check(). This routine should be renamed
3577         # split_tag() and would call subs_tag(). self.tags dictionary values
3578         # would be strings not tuples.
3579
3580         if not self.tags.has_key(name):
3581             raise EAsciiDoc, 'missing tag: %s' % name
3582         stag,etag = self.tags[name]
3583         if d is not None:
3584             # TODO: Should we warn if substitution drops a tag?
3585             if stag:
3586                 stag = subs_attrs(stag,d)
3587             if etag:
3588                 etag = subs_attrs(etag,d)
3589         if stag is None: stag = ''
3590         if etag is None: etag = ''
3591         return (stag,etag)
3592
3593     def parse_specialsections(self):
3594         '''Parse specialsections section to self.specialsections dictionary.'''
3595         # TODO: This is virtually the same as parse_replacements() and should
3596         # be factored to single routine.
3597         d = {}
3598         parse_entries(self.sections.get('specialsections',()),d,unquote=True)
3599         for pat,sectname in d.items():
3600             pat = strip_quotes(pat)
3601             if not is_regexp(pat):
3602                 raise EAsciiDoc,'[specialsections] entry ' \
3603                                 'is not a valid regular expression: %s' % pat
3604             if sectname is None:
3605                 if self.specialsections.has_key(pat):
3606                     del self.specialsections[pat]
3607             else:
3608                 self.specialsections[pat] = sectname
3609
3610     def parse_replacements(self):
3611         '''Parse replacements section into self.replacements dictionary.'''
3612         d = OrderedDict()
3613         parse_entries(self.sections.get('replacements',()), d, unquote=True)
3614         for pat,rep in d.items():
3615             pat = strip_quotes(pat)
3616             if not is_regexp(pat):
3617                 raise EAsciiDoc,'[replacements] entry in %s ' \
3618                     'is not a valid regular expression: %s' % (self.fname,pat)
3619             if rep is None:
3620                 if self.replacements.has_key(pat):
3621                     del self.replacements[pat]
3622             else:
3623                 self.replacements[pat] =strip_quotes(rep)
3624
3625     def subs_replacements(self,s):
3626         '''Substitute patterns from self.replacements in 's'.'''
3627         result = s
3628         for pat,rep in self.replacements.items():
3629             result = re.sub(pat, rep, result)
3630         return result
3631
3632     def parse_specialwords(self):
3633         '''Parse special words section into self.specialwords dictionary.'''
3634         reo = re.compile(r'(?:\s|^)(".+?"|[^"\s]+)(?=\s|$)')
3635         for line in self.sections.get('specialwords',()):
3636             e = parse_entry(line)
3637             if not e:
3638                 raise EAsciiDoc,'[specialwords] entry in %s is malformed: %s' \
3639                     % (self.fname,line)
3640             name,wordlist = e
3641             if not is_name(name):
3642                 raise EAsciiDoc,'[specialwords] name in %s is illegal: %s' \
3643                     % (self.fname,name)
3644             if wordlist is None:
3645                 # Undefine all words associated with 'name'.
3646                 for k,v in self.specialwords.items():
3647                     if v == name:
3648                         del self.specialwords[k]
3649             else:
3650                 words = reo.findall(wordlist)
3651                 for word in words:
3652                     word = strip_quotes(word)
3653                     if not is_regexp(word):
3654                         raise EAsciiDoc,'[specialwords] entry in %s ' \
3655                             'is not a valid regular expression: %s' \
3656                             % (self.fname,word)
3657                     self.specialwords[word] = name
3658
3659     def subs_specialchars(self,s):
3660         '''Perform special character substitution on string 's'.'''
3661         '''It may seem like a good idea to escape special characters with a '\'
3662         character, the reason we don't is because the escape character itself
3663         then has to be escaped and this makes including code listings
3664         problematic. Use the predefined {amp},{lt},{gt} attributes instead.'''
3665         result = ''
3666         for ch in s:
3667             result = result + self.specialchars.get(ch,ch)
3668         return result
3669
3670     def subs_specialwords(self,s):
3671         '''Search for word patterns from self.specialwords in 's' and
3672         substitute using corresponding macro.'''
3673         result = s
3674         for word in self.specialwords.keys():
3675             result = re.sub(word, _subs_specialwords, result)
3676         return result
3677
3678     def expand_templates(self,section):
3679         result = []
3680         for line in self.sections[section]:
3681             mo = macros.match('+',r'template',line)
3682             if mo:
3683                 s = mo.group('attrlist')
3684                 if self.sections.has_key(s):
3685                     result += self.sections[s]
3686                 else:
3687                     warning('missing [%s] section' % s)
3688             else:
3689                 result.append(line)
3690         return result
3691
3692     def expand_all_templates(self):
3693         for k in self.sections.keys():
3694             self.sections[k] = self.expand_templates(k)
3695
3696     def section2tags(self,section,d={}):
3697         '''Perform attribute substitution on 'section' using document
3698         attributes plus 'd' attributes. Return tuple (stag,etag) containing
3699         pre and post | placeholder tags.'''
3700         assert section is not None
3701         if self.sections.has_key(section):
3702             body = self.sections[section]
3703         else:
3704             warning('missing [%s] section' % section)
3705             body = ()
3706         # Split macro body into start and end tag lists.
3707         stag = []
3708         etag = []
3709         in_stag = True
3710         for s in body:
3711             if in_stag:
3712                 mo = re.match(r'(?P<stag>.*)\|(?P<etag>.*)',s)
3713                 if mo:
3714                     if mo.group('stag'):
3715                         stag.append(mo.group('stag'))
3716                     if mo.group('etag'):
3717                         etag.append(mo.group('etag'))
3718                     in_stag = False
3719                 else:
3720                     stag.append(s)
3721             else:
3722                 etag.append(s)
3723         # Do attribute substitution last so {brkbar} can be used to escape |.
3724         stag = subs_attrs(stag,d)
3725         etag = subs_attrs(etag,d)
3726         return (stag,etag)
3727
3728
3729 #---------------------------------------------------------------------------
3730 # Application code.
3731 #---------------------------------------------------------------------------
3732 # Constants
3733 # ---------
3734 APP_DIR = None              # This file's directory.
3735 USER_DIR = None             # ~/.asciidoc
3736 GLOBAL_CONFIG_DIR = "/etc/asciidoc"
3737 HELP_FILE = 'help.conf'
3738
3739 # Globals
3740 # -------
3741 document = Document()       # The document being processed.
3742 config = Config()           # Configuration file reader.
3743 reader = Reader()           # Input stream line reader.
3744 writer = Writer()           # Output stream line writer.
3745 paragraphs = Paragraphs()   # Paragraph definitions.
3746 lists = Lists()             # List definitions.
3747 blocks = DelimitedBlocks()  # DelimitedBlock definitions.
3748 tables = Tables()           # Table definitions.
3749 macros = Macros()           # Macro definitions.
3750 calloutmap = CalloutMap()   # Coordinates callouts and callout list.
3751
3752 def asciidoc(backend, doctype, confiles, infile, outfile, options):
3753     '''Convert AsciiDoc document to DocBook document of type doctype
3754     The AsciiDoc document is read from file object src the translated
3755     DocBook file written to file object dst.'''
3756     try:
3757         if doctype not in ('article','manpage','book'):
3758             raise EAsciiDoc,'illegal document type'
3759         if backend == 'linuxdoc' and doctype != 'article':
3760             raise EAsciiDoc,'%s %s documents are not supported' \
3761                             % (backend,doctype)
3762         document.backend = backend
3763         if not os.path.exists(os.path.join(APP_DIR, backend+'.conf')) and not \
3764                os.path.exists(os.path.join(GLOBAL_CONFIG_DIR, backend+'.conf')):
3765             warning('non-standard %s backend' % backend, linenos=False)
3766         document.doctype = doctype
3767         document.infile = infile
3768         document.init_attrs()
3769         # Set processing options.
3770         for o in options:
3771             if o == '-c': config.dumping = True
3772             if o == '-s': config.header_footer = False
3773             if o == '-v': config.verbose = True
3774         # Check the infile exists.
3775         if infile != '<stdin>' and not os.path.isfile(infile):
3776             raise EAsciiDoc,'input file %s missing' % infile
3777         if '-e' not in options:
3778             # Load global configuration from system configuration directory.
3779             config.load_all(GLOBAL_CONFIG_DIR)
3780             # Load global configuration files from asciidoc directory.
3781             config.load_all(APP_DIR)
3782             # Load configuration files from ~/.asciidoc if it exists.
3783             if USER_DIR is not None:
3784                 config.load_all(USER_DIR)
3785             # Load configuration files from document directory.
3786             config.load_all(os.path.dirname(infile))
3787         if infile != '<stdin>':
3788             # Load implicit document specific configuration files if they exist.
3789             config.load(os.path.splitext(infile)[0] + '.conf')
3790             config.load(os.path.splitext(infile)[0] + '-' + backend + '.conf')
3791         # If user specified configuration file(s) overlay the defaults.
3792         if confiles:
3793             for conf in confiles:
3794                 if os.path.isfile(conf):
3795                     config.load(conf)
3796                 else:
3797                     raise EAsciiDoc,'configuration file %s missing' % conf
3798         document.init_attrs()   # Add conf files.
3799         # Check configuration for consistency.
3800         config.validate()
3801         # Build outfile name now all conf files have been read.
3802         if outfile is None:
3803             outfile = os.path.splitext(infile)[0] + '.' + backend
3804             if config.outfilesuffix:
3805                 # Change file extension.
3806                 outfile = os.path.splitext(outfile)[0] + config.outfilesuffix
3807         document.outfile = outfile
3808         if config.dumping:
3809             config.dump()
3810         else:
3811             reader.tabsize = config.tabsize
3812             reader.open(infile)
3813             try:
3814                 writer.newline = config.newline
3815                 writer.open(outfile)
3816                 try:
3817                     document.init_attrs()   # Add file name related entries.
3818                     document.translate()
3819                 finally:
3820                     writer.close()
3821             finally:
3822                 reader.closefile()  # Keep reader state for postmortem.
3823     except (KeyboardInterrupt, SystemExit):
3824         print
3825     except Exception,e:
3826         # Cleanup.
3827         if outfile and outfile != '<stdout>' and os.path.isfile(outfile):
3828             os.unlink(outfile)
3829         # Build and print error description.
3830         msg = 'FAILED: '
3831         if reader.cursor:
3832             msg = msg + "%s: line %d: " % (reader.cursor[0],reader.cursor[1])
3833         if isinstance(e,EAsciiDoc):
3834             print_stderr(msg+str(e))
3835         else:
3836             print_stderr(msg+'unexpected error:')
3837             print_stderr('-'*60)
3838             traceback.print_exc(file=sys.stderr)
3839             print_stderr('-'*60)
3840         sys.exit(1)
3841
3842 def usage(msg=''):
3843     if msg:
3844         print_stderr(msg)
3845     print_stderr('Usage:       asciidoc [OPTIONS] FILE')
3846     print_stderr('Man page:    asciidoc -h manpage')
3847     print_stderr('Syntax:      asciidoc -h syntax')
3848     print_stderr('Help topics: asciidoc -h topics')
3849
3850 def show_help(topic):
3851     '''Print help topic to stdout.'''
3852     # Print [topic] section from help.conf files.
3853     topics = OrderedDict()
3854     load_sections(topics, HELP_FILE, GLOBAL_CONFIG_DIR)
3855     load_sections(topics, HELP_FILE, APP_DIR)
3856     if USER_DIR is not None:
3857         load_sections(topics, HELP_FILE, USER_DIR)
3858     if len(topics) == 0:
3859         print_stderr('no help topics found')
3860         sys.exit(1)
3861     lines = topics.get(topic)
3862     if not lines:
3863         print_stderr('help topic not found: %s' % topic)
3864         s = 'available help topics: '
3865         for k in topics.keys():
3866             s += k + ', '
3867         print_stderr(s[:-2])
3868         sys.exit(1)
3869     else:
3870         for line in lines:
3871             print line
3872
3873 def main():
3874     if float(sys.version[:3]) < 2.3:
3875         print_stderr('FAILED: Python 2.3 or better required.')
3876         sys.exit(1)
3877     # Locate the executable and configuration files directory.
3878     global APP_DIR,USER_DIR
3879     APP_DIR = os.path.dirname(realpath(sys.argv[0]))
3880     USER_DIR = os.environ.get('HOME')
3881     if USER_DIR is not None:
3882         USER_DIR = os.path.join(USER_DIR,'.asciidoc')
3883         if not os.path.isdir(USER_DIR):
3884             USER_DIR = None
3885     # Process command line options.
3886     import getopt
3887     try:
3888         #DEPRECATED: --safe option.
3889         opts,args = getopt.getopt(sys.argv[1:],
3890             'a:b:cd:ef:h:no:svw:',
3891             ['attribute=','backend=','conf-file=','doctype=','dump-conf',
3892             'help=','no-conf','no-header-footer','out-file=','profile',
3893             'section-numbers','verbose','version','safe','unsafe'])
3894     except getopt.GetoptError,e:
3895         msg = str(e)
3896         if re.search(r'^option (-h|--help) requires argument$', msg):
3897             # It's OK not to supply help option argument.
3898             show_help('default')
3899             sys.exit(0)
3900         else:
3901             usage(msg)
3902             sys.exit(1)
3903     if len(args) > 1:
3904         usage()
3905         sys.exit(1)
3906     backend = DEFAULT_BACKEND
3907     doctype = DEFAULT_DOCTYPE
3908     confiles = []
3909     outfile = None
3910     options = []
3911     prof = False
3912     for o,v in opts:
3913         if o in ('--help','-h'):
3914             if v:
3915                 show_help(v)
3916             else:
3917                 show_help('default')
3918             sys.exit(0)
3919         if o == '--profile':
3920             prof = True
3921         if o == '--unsafe':
3922             document.safe = False
3923         if o == '--version':
3924             print_stderr('asciidoc %s' % VERSION)
3925             sys.exit(0)
3926         if o in ('-b','--backend'):
3927             backend = v
3928         if o in ('-c','--dump-conf'):
3929             options.append('-c')
3930         if o in ('-d','--doctype'):
3931             doctype = v
3932         if o in ('-e','--no-conf'):
3933             options.append('-e')
3934         if o in ('-f','--conf-file'):
3935             confiles.append(v)
3936         if o in ('-n','--section-numbers'):
3937             o = '-a'
3938             v = 'numbered'
3939         if o in ('-a','--attribute'):
3940             e = parse_entry(v, allow_name_only=True)
3941             if not e:
3942                 usage('Illegal -a option: %s' % v)
3943                 sys.exit(1)
3944             k,v = e
3945             config.cmd_attrs[k] = v
3946         if o in ('-o','--out-file'):
3947             if v == '-':
3948                 outfile = '<stdout>'
3949             else:
3950                 outfile = v
3951         if o in ('-s','--no-header-footer'):
3952             options.append('-s')
3953         if o in ('-v','--verbose'):
3954             options.append('-v')
3955     if len(args) == 0 and len(opts) == 0:
3956         usage()
3957         sys.exit(1)
3958     if len(args) == 0:
3959         usage('No source file specified')
3960         sys.exit(1)
3961     if not backend:
3962         usage('No --backend option specified')
3963         sys.exit(1)
3964     if args[0] == '-':
3965         infile = '<stdin>'
3966     else:
3967         infile = args[0]
3968     if infile == '<stdin>' and not outfile:
3969         outfile = '<stdout>'
3970     # Convert in and out files to absolute paths.
3971     if infile != '<stdin>':
3972         infile = os.path.abspath(infile)
3973     if outfile and outfile != '<stdout>':
3974         outfile = os.path.abspath(outfile)
3975     # Do the work.
3976     if prof:
3977         import profile
3978         profile.run("asciidoc('%s','%s',(),'%s',None,())"
3979             % (backend,doctype,infile))
3980     else:
3981         asciidoc(backend, doctype, confiles, infile, outfile, options)
3982     if document.has_errors:
3983         sys.exit(1)
3984
3985 if __name__ == "__main__":
3986     try:
3987         main()
3988     except KeyboardInterrupt:
3989         pass
3990     except SystemExit:
3991         raise
3992     except:
3993         print_stderr('%s: unexpected error: %s' %
3994                 (os.path.basename(sys.argv[0]), sys.exc_info()[1]))
3995         print_stderr('-'*60)
3996         traceback.print_exc(file=sys.stderr)
3997         print_stderr('-'*60)
3998         sys.exit(1)