botlistprojects/botbackup/lib/pyparsing/pyparsing.py

   1 # module pyparsing.py
   2 #
   3 # Copyright (c) 2003-2007  Paul T. McGuire
   4 #
   5 # Permission is hereby granted, free of charge, to any person obtaining
   6 # a copy of this software and associated documentation files (the
   7 # "Software"), to deal in the Software without restriction, including
   8 # without limitation the rights to use, copy, modify, merge, publish,
   9 # distribute, sublicense, and/or sell copies of the Software, and to
  10 # permit persons to whom the Software is furnished to do so, subject to
  11 # the following conditions:
  12 #
  13 # The above copyright notice and this permission notice shall be
  14 # included in all copies or substantial portions of the Software.
  15 #
  16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23 #
  24 #from __future__ import generators
  25
  26 __doc__ = \
  27 """
  28 pyparsing module - Classes and methods to define and execute parsing grammars
  29
  30 The pyparsing module is an alternative approach to creating and executing simple grammars,
  31 vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
  32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
  33 provides a library of classes that you use to construct the grammar directly in Python.
  34
  35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
  36
  37     from pyparsing import Word, alphas
  38
  39     # define grammar of a greeting
  40     greet = Word( alphas ) + "," + Word( alphas ) + "!"
  41
  42     hello = "Hello, World!"
  43     print hello, "->", greet.parseString( hello )
  44
  45 The program outputs the following::
  46
  47     Hello, World! -> ['Hello', ',', 'World', '!']
  48
  49 The Python representation of the grammar is quite readable, owing to the self-explanatory
  50 class names, and the use of '+', '|' and '^' operators.
  51
  52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
  53 object with named attributes.
  54
  55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
  56  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
  57  - quoted strings
  58  - embedded comments
  59 """
  60
  61 __version__ = "1.4.8"
  62 __versionTime__ = "7 October 2007 00:25"
  63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
  64
  65 import string
  66 from weakref import ref as wkref
  67 import copy,sys
  68 import warnings
  69 import re
  70 import sre_constants
  71 import xml.sax.saxutils
  72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
  73
  74 def _ustr(obj):
  75     """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
  76        str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
  77        then < returns the unicode object | encodes it with the default encoding | ... >.
  78     """
  79     try:
  80         # If this works, then _ustr(obj) has the same behaviour as str(obj), so
  81         # it won't break any existing code.
  82         return str(obj)
  83
  84     except UnicodeEncodeError, e:
  85         # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
  86         # state that "The return value must be a string object". However, does a
  87         # unicode object (being a subclass of basestring) count as a "string
  88         # object"?
  89         # If so, then return a unicode object:
  90         return unicode(obj)
  91         # Else encode it... but how? There are many choices... :)
  92         # Replace unprintables with escape codes?
  93         #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
  94         # Replace unprintables with question marks?
  95         #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
  96         # ...
  97
  98 def _str2dict(strg):
  99     return dict( [(c,0) for c in strg] )
 100     #~ return set( [c for c in strg] )
 101
 102 class _Constants(object):
 103     pass
 104
 105 alphas     = string.lowercase + string.uppercase
 106 nums       = string.digits
 107 hexnums    = nums + "ABCDEFabcdef"
 108 alphanums  = alphas + nums
 109
 110 class ParseBaseException(Exception):
 111     """base exception class for all parsing runtime exceptions"""
 112     __slots__ = ( "loc","msg","pstr","parserElement" )
 113     # Performance tuning: we construct a *lot* of these, so keep this
 114     # constructor as small and fast as possible
 115     def __init__( self, pstr, loc=0, msg=None, elem=None ):
 116         self.loc = loc
 117         if msg is None:
 118             self.msg = pstr
 119             self.pstr = ""
 120         else:
 121             self.msg = msg
 122             self.pstr = pstr
 123         self.parserElement = elem
 124
 125     def __getattr__( self, aname ):
 126         """supported attributes by name are:
 127             - lineno - returns the line number of the exception text
 128             - col - returns the column number of the exception text
 129             - line - returns the line containing the exception text
 130         """
 131         if( aname == "lineno" ):
 132             return lineno( self.loc, self.pstr )
 133         elif( aname in ("col", "column") ):
 134             return col( self.loc, self.pstr )
 135         elif( aname == "line" ):
 136             return line( self.loc, self.pstr )
 137         else:
 138             raise AttributeError, aname
 139
 140     def __str__( self ):
 141         return "%s (at char %d), (line:%d, col:%d)" % \
 142                 ( self.msg, self.loc, self.lineno, self.column )
 143     def __repr__( self ):
 144         return _ustr(self)
 145     def markInputline( self, markerString = ">!<" ):
 146         """Extracts the exception line from the input string, and marks
 147            the location of the exception with a special symbol.
 148         """
 149         line_str = self.line
 150         line_column = self.column - 1
 151         if markerString:
 152             line_str = "".join( [line_str[:line_column],
 153                                 markerString, line_str[line_column:]])
 154         return line_str.strip()
 155
 156 class ParseException(ParseBaseException):
 157     """exception thrown when parse expressions don't match class;
 158        supported attributes by name are:
 159         - lineno - returns the line number of the exception text
 160         - col - returns the column number of the exception text
 161         - line - returns the line containing the exception text
 162     """
 163     pass
 164
 165 class ParseFatalException(ParseBaseException):
 166     """user-throwable exception thrown when inconsistent parse content
 167        is found; stops all parsing immediately"""
 168     pass
 169
 170 #~ class ReparseException(ParseBaseException):
 171     #~ """Experimental class - parse actions can raise this exception to cause
 172        #~ pyparsing to reparse the input string:
 173         #~ - with a modified input string, and/or
 174         #~ - with a modified start location
 175        #~ Set the values of the ReparseException in the constructor, and raise the
 176        #~ exception in a parse action to cause pyparsing to use the new string/location.
 177        #~ Setting the values as None causes no change to be made.
 178        #~ """
 179     #~ def __init_( self, newstring, restartLoc ):
 180         #~ self.newParseText = newstring
 181         #~ self.reparseLoc = restartLoc
 182
 183 class RecursiveGrammarException(Exception):
 184     """exception thrown by validate() if the grammar could be improperly recursive"""
 185     def __init__( self, parseElementList ):
 186         self.parseElementTrace = parseElementList
 187
 188     def __str__( self ):
 189         return "RecursiveGrammarException: %s" % self.parseElementTrace
 190
 191 class _ParseResultsWithOffset(object):
 192     def __init__(self,p1,p2):
 193         self.tup = (p1,p2)
 194     def __getitem__(self,i):
 195         return self.tup[i]
 196     def __repr__(self):
 197         return repr(self.tup)
 198
 199 class ParseResults(object):
 200     """Structured parse results, to provide multiple means of access to the parsed data:
 201        - as a list (len(results))
 202        - by list index (results[0], results[1], etc.)
 203        - by attribute (results.<resultsName>)
 204        """
 205     __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
 206     def __new__(cls, toklist, name=None, asList=True, modal=True ):
 207         if isinstance(toklist, cls):
 208             return toklist
 209         retobj = object.__new__(cls)
 210         retobj.__doinit = True
 211         return retobj
 212
 213     # Performance tuning: we construct a *lot* of these, so keep this
 214     # constructor as small and fast as possible
 215     def __init__( self, toklist, name=None, asList=True, modal=True ):
 216         if self.__doinit:
 217             self.__doinit = False
 218             self.__name = None
 219             self.__parent = None
 220             self.__accumNames = {}
 221             if isinstance(toklist, list):
 222                 self.__toklist = toklist[:]
 223             else:
 224                 self.__toklist = [toklist]
 225             self.__tokdict = dict()
 226
 227         # this line is related to debugging the asXML bug
 228         #~ asList = False
 229
 230         if name:
 231             if not modal:
 232                 self.__accumNames[name] = 0
 233             if isinstance(name,int):
 234                 name = _ustr(name) # will always return a str, but use _ustr for consistency
 235             self.__name = name
 236             if not toklist in (None,'',[]):
 237                 if isinstance(toklist,basestring):
 238                     toklist = [ toklist ]
 239                 if asList:
 240                     if isinstance(toklist,ParseResults):
 241                         self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
 242                     else:
 243                         self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
 244                     self[name].__name = name
 245                 else:
 246                     try:
 247                         self[name] = toklist[0]
 248                     except (KeyError,TypeError):
 249                         self[name] = toklist
 250
 251     def __getitem__( self, i ):
 252         if isinstance( i, (int,slice) ):
 253             return self.__toklist[i]
 254         else:
 255             if i not in self.__accumNames:
 256                 return self.__tokdict[i][-1][0]
 257             else:
 258                 return ParseResults([ v[0] for v in self.__tokdict[i] ])
 259
 260     def __setitem__( self, k, v ):
 261         if isinstance(v,_ParseResultsWithOffset):
 262             self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
 263             sub = v[0]
 264         elif isinstance(k,int):
 265             self.__toklist[k] = v
 266             sub = v
 267         else:
 268             self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
 269             sub = v
 270         if isinstance(sub,ParseResults):
 271             sub.__parent = wkref(self)
 272
 273     def __delitem__( self, i ):
 274         if isinstance(i,(int,slice)):
 275             del self.__toklist[i]
 276         else:
 277             del self.__tokdict[i]
 278
 279     def __contains__( self, k ):
 280         return self.__tokdict.has_key(k)
 281
 282     def __len__( self ): return len( self.__toklist )
 283     def __bool__(self): return len( self.__toklist ) > 0
 284     def __nonzero__( self ): return self.__bool__()
 285     def __iter__( self ): return iter( self.__toklist )
 286     def keys( self ):
 287         """Returns all named result keys."""
 288         return self.__tokdict.keys()
 289
 290     def items( self ):
 291         """Returns all named result keys and values as a list of tuples."""
 292         return [(k,self[k]) for k in self.__tokdict.keys()]
 293
 294     def values( self ):
 295         """Returns all named result values."""
 296         return [ v[-1][0] for v in self.__tokdict.values() ]
 297
 298     def __getattr__( self, name ):
 299         if name not in self.__slots__:
 300             if self.__tokdict.has_key( name ):
 301                 if name not in self.__accumNames:
 302                     return self.__tokdict[name][-1][0]
 303                 else:
 304                     return ParseResults([ v[0] for v in self.__tokdict[name] ])
 305             else:
 306                 return ""
 307         return None
 308
 309     def __add__( self, other ):
 310         ret = self.copy()
 311         ret += other
 312         return ret
 313
 314     def __iadd__( self, other ):
 315         if other.__tokdict:
 316             offset = len(self.__toklist)
 317             addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
 318             otheritems = other.__tokdict.items()
 319             otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
 320                                 for (k,vlist) in otheritems for v in vlist]
 321             for k,v in otherdictitems:
 322                 self[k] = v
 323                 if isinstance(v[0],ParseResults):
 324                     v[0].__parent = wkref(self)
 325         self.__toklist += other.__toklist
 326         self.__accumNames.update( other.__accumNames )
 327         del other
 328         return self
 329
 330     def __repr__( self ):
 331         return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
 332
 333     def __str__( self ):
 334         out = "["
 335         sep = ""
 336         for i in self.__toklist:
 337             if isinstance(i, ParseResults):
 338                 out += sep + _ustr(i)
 339             else:
 340                 out += sep + repr(i)
 341             sep = ", "
 342         out += "]"
 343         return out
 344
 345     def _asStringList( self, sep='' ):
 346         out = []
 347         for item in self.__toklist:
 348             if out and sep:
 349                 out.append(sep)
 350             if isinstance( item, ParseResults ):
 351                 out += item._asStringList()
 352             else:
 353                 out.append( _ustr(item) )
 354         return out
 355
 356     def asList( self ):
 357         """Returns the parse results as a nested list of matching tokens, all converted to strings."""
 358         out = []
 359         for res in self.__toklist:
 360             if isinstance(res,ParseResults):
 361                 out.append( res.asList() )
 362             else:
 363                 out.append( res )
 364         return out
 365
 366     def asDict( self ):
 367         """Returns the named parse results as dictionary."""
 368         return dict( self.items() )
 369
 370     def copy( self ):
 371         """Returns a new copy of a ParseResults object."""
 372         ret = ParseResults( self.__toklist )
 373         ret.__tokdict = self.__tokdict.copy()
 374         ret.__parent = self.__parent
 375         ret.__accumNames.update( self.__accumNames )
 376         ret.__name = self.__name
 377         return ret
 378
 379     def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
 380         """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
 381         nl = "\n"
 382         out = []
 383         namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
 384                                                             for v in vlist ] )
 385         nextLevelIndent = indent + "  "
 386
 387         # collapse out indents if formatting is not desired
 388         if not formatted:
 389             indent = ""
 390             nextLevelIndent = ""
 391             nl = ""
 392
 393         selfTag = None
 394         if doctag is not None:
 395             selfTag = doctag
 396         else:
 397             if self.__name:
 398                 selfTag = self.__name
 399
 400         if not selfTag:
 401             if namedItemsOnly:
 402                 return ""
 403             else:
 404                 selfTag = "ITEM"
 405
 406         out += [ nl, indent, "<", selfTag, ">" ]
 407
 408         worklist = self.__toklist
 409         for i,res in enumerate(worklist):
 410             if isinstance(res,ParseResults):
 411                 if i in namedItems:
 412                     out += [ res.asXML(namedItems[i],
 413                                         namedItemsOnly and doctag is None,
 414                                         nextLevelIndent,
 415                                         formatted)]
 416                 else:
 417                     out += [ res.asXML(None,
 418                                         namedItemsOnly and doctag is None,
 419                                         nextLevelIndent,
 420                                         formatted)]
 421             else:
 422                 # individual token, see if there is a name for it
 423                 resTag = None
 424                 if i in namedItems:
 425                     resTag = namedItems[i]
 426                 if not resTag:
 427                     if namedItemsOnly:
 428                         continue
 429                     else:
 430                         resTag = "ITEM"
 431                 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
 432                 out += [ nl, nextLevelIndent, "<", resTag, ">",
 433                                                 xmlBodyText,
 434                                                 "</", resTag, ">" ]
 435
 436         out += [ nl, indent, "</", selfTag, ">" ]
 437         return "".join(out)
 438
 439     def __lookup(self,sub):
 440         for k,vlist in self.__tokdict.items():
 441             for v,loc in vlist:
 442                 if sub is v:
 443                     return k
 444         return None
 445
 446     def getName(self):
 447         """Returns the results name for this token expression."""
 448         if self.__name:
 449             return self.__name
 450         elif self.__parent:
 451             par = self.__parent()
 452             if par:
 453                 return par.__lookup(self)
 454             else:
 455                 return None
 456         elif (len(self) == 1 and
 457                len(self.__tokdict) == 1 and
 458                self.__tokdict.values()[0][0][1] in (0,-1)):
 459             return self.__tokdict.keys()[0]
 460         else:
 461             return None
 462
 463     def dump(self,indent='',depth=0):
 464         """Diagnostic method for listing out the contents of a ParseResults.
 465            Accepts an optional indent argument so that this string can be embedded
 466            in a nested display of other data."""
 467         out = []
 468         out.append( indent+_ustr(self.asList()) )
 469         keys = self.items()
 470         keys.sort()
 471         for k,v in keys:
 472             if out:
 473                 out.append('\n')
 474             out.append( "%s%s- %s: " % (indent,('  '*depth), k) )
 475             if isinstance(v,ParseResults):
 476                 if v.keys():
 477                     #~ out.append('\n')
 478                     out.append( v.dump(indent,depth+1) )
 479                     #~ out.append('\n')
 480                 else:
 481                     out.append(_ustr(v))
 482             else:
 483                 out.append(_ustr(v))
 484         #~ out.append('\n')
 485         return "".join(out)
 486
 487     # add support for pickle protocol
 488     def __getstate__(self):
 489         return ( self.__toklist,
 490                  ( self.__tokdict.copy(),
 491                    self.__parent is not None and self.__parent() or None,
 492                    self.__accumNames,
 493                    self.__name ) )
 494
 495     def __setstate__(self,state):
 496         self.__toklist = state[0]
 497         self.__tokdict, \
 498         par, \
 499         inAccumNames, \
 500         self.__name = state[1]
 501         self.__accumNames = {}
 502         self.__accumNames.update(inAccumNames)
 503         if par is not None:
 504             self.__parent = wkref(par)
 505         else:
 506             self.__parent = None
 507
 508
 509 def col (loc,strg):
 510     """Returns current column within a string, counting newlines as line separators.
 511    The first column is number 1.
 512
 513    Note: the default parsing behavior is to expand tabs in the input string
 514    before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
 515    on parsing strings containing <TAB>s, and suggested methods to maintain a
 516    consistent view of the parsed string, the parse location, and line and column
 517    positions within the parsed string.
 518    """
 519     return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
 520
 521 def lineno(loc,strg):
 522     """Returns current line number within a string, counting newlines as line separators.
 523    The first line is number 1.
 524
 525    Note: the default parsing behavior is to expand tabs in the input string
 526    before starting the parsing process.  See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
 527    on parsing strings containing <TAB>s, and suggested methods to maintain a
 528    consistent view of the parsed string, the parse location, and line and column
 529    positions within the parsed string.
 530    """
 531     return strg.count("\n",0,loc) + 1
 532
 533 def line( loc, strg ):
 534     """Returns the line of text containing loc within a string, counting newlines as line separators.
 535        """
 536     lastCR = strg.rfind("\n", 0, loc)
 537     nextCR = strg.find("\n", loc)
 538     if nextCR > 0:
 539         return strg[lastCR+1:nextCR]
 540     else:
 541         return strg[lastCR+1:]
 542
 543 def _defaultStartDebugAction( instring, loc, expr ):
 544     print "Match",_ustr(expr),"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
 545
 546 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
 547     print "Matched",_ustr(expr),"->",toks.asList()
 548
 549 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
 550     print "Exception raised:", _ustr(exc)
 551
 552 def nullDebugAction(*args):
 553     """'Do-nothing' debug action, to suppress debugging output during parsing."""
 554     pass
 555
 556 class ParserElement(object):
 557     """Abstract base level parser element class."""
 558     DEFAULT_WHITE_CHARS = " \n\t\r"
 559
 560     def setDefaultWhitespaceChars( chars ):
 561         """Overrides the default whitespace chars
 562         """
 563         ParserElement.DEFAULT_WHITE_CHARS = chars
 564     setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
 565
 566     def __init__( self, savelist=False ):
 567         self.parseAction = list()
 568         self.failAction = None
 569         #~ self.name = "<unknown>"  # don't define self.name, let subclasses try/except upcall
 570         self.strRepr = None
 571         self.resultsName = None
 572         self.saveAsList = savelist
 573         self.skipWhitespace = True
 574         self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
 575         self.copyDefaultWhiteChars = True
 576         self.mayReturnEmpty = False # used when checking for left-recursion
 577         self.keepTabs = False
 578         self.ignoreExprs = list()
 579         self.debug = False
 580         self.streamlined = False
 581         self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
 582         self.errmsg = ""
 583         self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
 584         self.debugActions = ( None, None, None ) #custom debug actions
 585         self.re = None
 586         self.callPreparse = True # used to avoid redundant calls to preParse
 587         self.callDuringTry = False
 588
 589     def copy( self ):
 590         """Make a copy of this ParserElement.  Useful for defining different parse actions
 591            for the same parsing pattern, using copies of the original parse element."""
 592         cpy = copy.copy( self )
 593         cpy.parseAction = self.parseAction[:]
 594         cpy.ignoreExprs = self.ignoreExprs[:]
 595         if self.copyDefaultWhiteChars:
 596             cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
 597         return cpy
 598
 599     def setName( self, name ):
 600         """Define name for this expression, for use in debugging."""
 601         self.name = name
 602         self.errmsg = "Expected " + self.name
 603         if hasattr(self,"exception"):
 604             self.exception.msg = self.errmsg
 605         return self
 606
 607     def setResultsName( self, name, listAllMatches=False ):
 608         """Define name for referencing matching tokens as a nested attribute
 609            of the returned parse results.
 610            NOTE: this returns a *copy* of the original ParserElement object;
 611            this is so that the client can define a basic element, such as an
 612            integer, and reference it in multiple places with different names.
 613         """
 614         newself = self.copy()
 615         newself.resultsName = name
 616         newself.modalResults = not listAllMatches
 617         return newself
 618
 619     def setBreak(self,breakFlag = True):
 620         """Method to invoke the Python pdb debugger when this element is
 621            about to be parsed. Set breakFlag to True to enable, False to
 622            disable.
 623         """
 624         if breakFlag:
 625             _parseMethod = self._parse
 626             def breaker(instring, loc, doActions=True, callPreParse=True):
 627                 import pdb
 628                 pdb.set_trace()
 629                 _parseMethod( instring, loc, doActions, callPreParse )
 630             breaker._originalParseMethod = _parseMethod
 631             self._parse = breaker
 632         else:
 633             if hasattr(self._parse,"_originalParseMethod"):
 634                 self._parse = self._parse._originalParseMethod
 635         return self
 636
 637     def normalizeParseActionArgs( f ):
 638         """Internal method used to decorate parse actions that take fewer than 3 arguments,
 639            so that all parse actions can be called as f(s,l,t)."""
 640         STAR_ARGS = 4
 641
 642         try:
 643             restore = None
 644             if isinstance(f,type):
 645                 restore = f
 646                 f = f.__init__
 647             if f.func_code.co_flags & STAR_ARGS:
 648                 return f
 649             numargs = f.func_code.co_argcount
 650             if hasattr(f,"im_self"):
 651                 numargs -= 1
 652             if restore:
 653                 f = restore
 654         except AttributeError:
 655             try:
 656                 # not a function, must be a callable object, get info from the
 657                 # im_func binding of its bound __call__ method
 658                 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
 659                     return f
 660                 numargs = f.__call__.im_func.func_code.co_argcount
 661                 if hasattr(f.__call__,"im_self"):
 662                     numargs -= 1
 663             except AttributeError:
 664                 # not a bound method, get info directly from __call__ method
 665                 if f.__call__.func_code.co_flags & STAR_ARGS:
 666                     return f
 667                 numargs = f.__call__.func_code.co_argcount
 668                 if hasattr(f.__call__,"im_self"):
 669                     numargs -= 1
 670
 671         #~ print "adding function %s with %d args" % (f.func_name,numargs)
 672         if numargs == 3:
 673             return f
 674         else:
 675             if numargs == 2:
 676                 def tmp(s,l,t):
 677                     return f(l,t)
 678             elif numargs == 1:
 679                 def tmp(s,l,t):
 680                     return f(t)
 681             else: #~ numargs == 0:
 682                 def tmp(s,l,t):
 683                     return f()
 684             try:
 685                 tmp.__name__ = f.__name__
 686             except AttributeError:
 687                 # no need for special handling if attribute doesnt exist
 688                 pass
 689             try:
 690                 tmp.__doc__ = f.__doc__
 691             except AttributeError:
 692                 # no need for special handling if attribute doesnt exist
 693                 pass
 694             try:
 695                 tmp.__dict__.update(f.__dict__)
 696             except AttributeError:
 697                 # no need for special handling if attribute doesnt exist
 698                 pass
 699             return tmp
 700     normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
 701
 702     def setParseAction( self, *fns, **kwargs ):
 703         """Define action to perform when successfully matching parse element definition.
 704            Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
 705            fn(loc,toks), fn(toks), or just fn(), where:
 706             - s   = the original string being parsed (see note below)
 707             - loc = the location of the matching substring
 708             - toks = a list of the matched tokens, packaged as a ParseResults object
 709            If the functions in fns modify the tokens, they can return them as the return
 710            value from fn, and the modified list of tokens will replace the original.
 711            Otherwise, fn does not need to return any value.
 712
 713            Note: the default parsing behavior is to expand tabs in the input string
 714            before starting the parsing process.  See L{I{parseString}<parseString>} for more information
 715            on parsing strings containing <TAB>s, and suggested methods to maintain a
 716            consistent view of the parsed string, the parse location, and line and column
 717            positions within the parsed string.
 718            """
 719         self.parseAction = map(self.normalizeParseActionArgs, list(fns))
 720         self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
 721         return self
 722
 723     def addParseAction( self, *fns, **kwargs ):
 724         """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
 725         self.parseAction += map(self.normalizeParseActionArgs, list(fns))
 726         self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
 727         return self
 728
 729     def setFailAction( self, fn ):
 730         """Define action to perform if parsing fails at this expression.
 731            Fail acton fn is a callable function that takes the arguments
 732            fn(s,loc,expr,err) where:
 733             - s = string being parsed
 734             - loc = location where expression match was attempted and failed
 735             - expr = the parse expression that failed
 736             - err = the exception thrown
 737            The function returns no value.  It may throw ParseFatalException
 738            if it is desired to stop parsing immediately."""
 739         self.failAction = fn
 740         return self
 741
 742     def skipIgnorables( self, instring, loc ):
 743         exprsFound = True
 744         while exprsFound:
 745             exprsFound = False
 746             for e in self.ignoreExprs:
 747                 try:
 748                     while 1:
 749                         loc,dummy = e._parse( instring, loc )
 750                         exprsFound = True
 751                 except ParseException:
 752                     pass
 753         return loc
 754
 755     def preParse( self, instring, loc ):
 756         if self.ignoreExprs:
 757             loc = self.skipIgnorables( instring, loc )
 758
 759         if self.skipWhitespace:
 760             wt = self.whiteChars
 761             instrlen = len(instring)
 762             while loc < instrlen and instring[loc] in wt:
 763                 loc += 1
 764
 765         return loc
 766
 767     def parseImpl( self, instring, loc, doActions=True ):
 768         return loc, []
 769
 770     def postParse( self, instring, loc, tokenlist ):
 771         return tokenlist
 772
 773     #~ @profile
 774     def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
 775         debugging = ( self.debug ) #and doActions )
 776
 777         if debugging or self.failAction:
 778             #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
 779             if (self.debugActions[0] ):
 780                 self.debugActions[0]( instring, loc, self )
 781             if callPreParse and self.callPreparse:
 782                 preloc = self.preParse( instring, loc )
 783             else:
 784                 preloc = loc
 785             tokensStart = loc
 786             try:
 787                 try:
 788                     loc,tokens = self.parseImpl( instring, preloc, doActions )
 789                 except IndexError:
 790                     raise ParseException( instring, len(instring), self.errmsg, self )
 791             except ParseException, err:
 792                 #~ print "Exception raised:", err
 793                 if self.debugActions[2]:
 794                     self.debugActions[2]( instring, tokensStart, self, err )
 795                 if self.failAction:
 796                     self.failAction( instring, tokensStart, self, err )
 797                 raise
 798         else:
 799             if callPreParse and self.callPreparse:
 800                 preloc = self.preParse( instring, loc )
 801             else:
 802                 preloc = loc
 803             tokensStart = loc
 804             if self.mayIndexError or loc >= len(instring):
 805                 try:
 806                     loc,tokens = self.parseImpl( instring, preloc, doActions )
 807                 except IndexError:
 808                     raise ParseException( instring, len(instring), self.errmsg, self )
 809             else:
 810                 loc,tokens = self.parseImpl( instring, preloc, doActions )
 811
 812         tokens = self.postParse( instring, loc, tokens )
 813
 814         retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
 815         if self.parseAction and (doActions or self.callDuringTry):
 816             if debugging:
 817                 try:
 818                     for fn in self.parseAction:
 819                         tokens = fn( instring, tokensStart, retTokens )
 820                         if tokens is not None:
 821                             retTokens = ParseResults( tokens,
 822                                                       self.resultsName,
 823                                                       asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
 824                                                       modal=self.modalResults )
 825                 except ParseException, err:
 826                     #~ print "Exception raised in user parse action:", err
 827                     if (self.debugActions[2] ):
 828                         self.debugActions[2]( instring, tokensStart, self, err )
 829                     raise
 830             else:
 831                 for fn in self.parseAction:
 832                     tokens = fn( instring, tokensStart, retTokens )
 833                     if tokens is not None:
 834                         retTokens = ParseResults( tokens,
 835                                                   self.resultsName,
 836                                                   asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
 837                                                   modal=self.modalResults )
 838
 839         if debugging:
 840             #~ print "Matched",self,"->",retTokens.asList()
 841             if (self.debugActions[1] ):
 842                 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
 843
 844         return loc, retTokens
 845
 846     def tryParse( self, instring, loc ):
 847         return self._parse( instring, loc, doActions=False )[0]
 848
 849     # this method gets repeatedly called during backtracking with the same arguments -
 850     # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
 851     def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
 852         lookup = (self,instring,loc,callPreParse,doActions)
 853         if lookup in ParserElement._exprArgCache:
 854             value = ParserElement._exprArgCache[ lookup ]
 855             if isinstance(value,Exception):
 856                 if isinstance(value,ParseBaseException):
 857                     value.loc = loc
 858                 raise value
 859             return (value[0],value[1].copy())
 860         else:
 861             try:
 862                 value = self._parseNoCache( instring, loc, doActions, callPreParse )
 863                 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
 864                 return value
 865             except ParseBaseException, pe:
 866                 ParserElement._exprArgCache[ lookup ] = pe
 867                 raise
 868
 869     _parse = _parseNoCache
 870
 871     # argument cache for optimizing repeated calls when backtracking through recursive expressions
 872     _exprArgCache = {}
 873     def resetCache():
 874         ParserElement._exprArgCache.clear()
 875     resetCache = staticmethod(resetCache)
 876
 877     _packratEnabled = False
 878     def enablePackrat():
 879         """Enables "packrat" parsing, which adds memoizing to the parsing logic.
 880            Repeated parse attempts at the same string location (which happens
 881            often in many complex grammars) can immediately return a cached value,
 882            instead of re-executing parsing/validating code.  Memoizing is done of
 883            both valid results and parsing exceptions.
 884
 885            This speedup may break existing programs that use parse actions that
 886            have side-effects.  For this reason, packrat parsing is disabled when
 887            you first import pyparsing.  To activate the packrat feature, your
 888            program must call the class method ParserElement.enablePackrat().  If
 889            your program uses psyco to "compile as you go", you must call
 890            enablePackrat before calling psyco.full().  If you do not do this,
 891            Python will crash.  For best results, call enablePackrat() immediately
 892            after importing pyparsing.
 893         """
 894         if not ParserElement._packratEnabled:
 895             ParserElement._packratEnabled = True
 896             ParserElement._parse = ParserElement._parseCache
 897     enablePackrat = staticmethod(enablePackrat)
 898
 899     def parseString( self, instring ):
 900         """Execute the parse expression with the given string.
 901            This is the main interface to the client code, once the complete
 902            expression has been built.
 903
 904            Note: parseString implicitly calls expandtabs() on the input string,
 905            in order to report proper column numbers in parse actions.
 906            If the input string contains tabs and
 907            the grammar uses parse actions that use the loc argument to index into the
 908            string being parsed, you can ensure you have a consistent view of the input
 909            string by:
 910             - calling parseWithTabs on your grammar before calling parseString
 911               (see L{I{parseWithTabs}<parseWithTabs>})
 912             - define your parse action using the full (s,loc,toks) signature, and
 913               reference the input string using the parse action's s argument
 914             - explictly expand the tabs in your input string before calling
 915               parseString
 916         """
 917         ParserElement.resetCache()
 918         if not self.streamlined:
 919             self.streamline()
 920             #~ self.saveAsList = True
 921         for e in self.ignoreExprs:
 922             e.streamline()
 923         if self.keepTabs:
 924             loc, tokens = self._parse( instring, 0 )
 925         else:
 926             loc, tokens = self._parse( instring.expandtabs(), 0 )
 927         return tokens
 928
 929     def scanString( self, instring, maxMatches=sys.maxint ):
 930         """Scan the input string for expression matches.  Each match will return the
 931            matching tokens, start location, and end location.  May be called with optional
 932            maxMatches argument, to clip scanning after 'n' matches are found.
 933
 934            Note that the start and end locations are reported relative to the string
 935            being parsed.  See L{I{parseString}<parseString>} for more information on parsing
 936            strings with embedded tabs."""
 937         if not self.streamlined:
 938             self.streamline()
 939         for e in self.ignoreExprs:
 940             e.streamline()
 941
 942         if not self.keepTabs:
 943             instring = _ustr(instring).expandtabs()
 944         instrlen = len(instring)
 945         loc = 0
 946         preparseFn = self.preParse
 947         parseFn = self._parse
 948         ParserElement.resetCache()
 949         matches = 0
 950         while loc <= instrlen and matches < maxMatches:
 951             try:
 952                 preloc = preparseFn( instring, loc )
 953                 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
 954             except ParseException:
 955                 loc = preloc+1
 956             else:
 957                 matches += 1
 958                 yield tokens, preloc, nextLoc
 959                 loc = nextLoc
 960
 961     def transformString( self, instring ):
 962         """Extension to scanString, to modify matching text with modified tokens that may
 963            be returned from a parse action.  To use transformString, define a grammar and
 964            attach a parse action to it that modifies the returned token list.
 965            Invoking transformString() on a target string will then scan for matches,
 966            and replace the matched text patterns according to the logic in the parse
 967            action.  transformString() returns the resulting transformed string."""
 968         out = []
 969         lastE = 0
 970         # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
 971         # keep string locs straight between transformString and scanString
 972         self.keepTabs = True
 973         for t,s,e in self.scanString( instring ):
 974             out.append( instring[lastE:s] )
 975             if t:
 976                 if isinstance(t,ParseResults):
 977                     out += t.asList()
 978                 elif isinstance(t,list):
 979                     out += t
 980                 else:
 981                     out.append(t)
 982             lastE = e
 983         out.append(instring[lastE:])
 984         return "".join(map(_ustr,out))
 985
 986     def searchString( self, instring, maxMatches=sys.maxint ):
 987         """Another extension to scanString, simplifying the access to the tokens found
 988            to match the given parse expression.  May be called with optional
 989            maxMatches argument, to clip searching after 'n' matches are found.
 990         """
 991         return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
 992
 993     def __add__(self, other ):
 994         """Implementation of + operator - returns And"""
 995         if isinstance( other, basestring ):
 996             other = Literal( other )
 997         if not isinstance( other, ParserElement ):
 998             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
 999                     SyntaxWarning, stacklevel=2)
1000         return And( [ self, other ] )
1001
1002     def __radd__(self, other ):
1003         """Implementation of += operator"""
1004         if isinstance( other, basestring ):
1005             other = Literal( other )
1006         if not isinstance( other, ParserElement ):
1007             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1008                     SyntaxWarning, stacklevel=2)
1009         return other + self
1010
1011     def __or__(self, other ):
1012         """Implementation of | operator - returns MatchFirst"""
1013         if isinstance( other, basestring ):
1014             other = Literal( other )
1015         if not isinstance( other, ParserElement ):
1016             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1017                     SyntaxWarning, stacklevel=2)
1018         return MatchFirst( [ self, other ] )
1019
1020     def __ror__(self, other ):
1021         """Implementation of |= operator"""
1022         if isinstance( other, basestring ):
1023             other = Literal( other )
1024         if not isinstance( other, ParserElement ):
1025             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1026                     SyntaxWarning, stacklevel=2)
1027         return other | self
1028
1029     def __xor__(self, other ):
1030         """Implementation of ^ operator - returns Or"""
1031         if isinstance( other, basestring ):
1032             other = Literal( other )
1033         if not isinstance( other, ParserElement ):
1034             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1035                     SyntaxWarning, stacklevel=2)
1036         return Or( [ self, other ] )
1037
1038     def __rxor__(self, other ):
1039         """Implementation of ^= operator"""
1040         if isinstance( other, basestring ):
1041             other = Literal( other )
1042         if not isinstance( other, ParserElement ):
1043             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1044                     SyntaxWarning, stacklevel=2)
1045         return other ^ self
1046
1047     def __and__(self, other ):
1048         """Implementation of & operator - returns Each"""
1049         if isinstance( other, basestring ):
1050             other = Literal( other )
1051         if not isinstance( other, ParserElement ):
1052             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1053                     SyntaxWarning, stacklevel=2)
1054         return Each( [ self, other ] )
1055
1056     def __rand__(self, other ):
1057         """Implementation of right-& operator"""
1058         if isinstance( other, basestring ):
1059             other = Literal( other )
1060         if not isinstance( other, ParserElement ):
1061             warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1062                     SyntaxWarning, stacklevel=2)
1063         return other & self
1064
1065     def __invert__( self ):
1066         """Implementation of ~ operator - returns NotAny"""
1067         return NotAny( self )
1068
1069     def __call__(self, name):
1070         """Shortcut for setResultsName, with listAllMatches=default::
1071              userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1072            could be written as::
1073              userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1074            """
1075         return self.setResultsName(name)
1076
1077     def suppress( self ):
1078         """Suppresses the output of this ParserElement; useful to keep punctuation from
1079            cluttering up returned output.
1080         """
1081         return Suppress( self )
1082
1083     def leaveWhitespace( self ):
1084         """Disables the skipping of whitespace before matching the characters in the
1085            ParserElement's defined pattern.  This is normally only used internally by
1086            the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1087         """
1088         self.skipWhitespace = False
1089         return self
1090
1091     def setWhitespaceChars( self, chars ):
1092         """Overrides the default whitespace chars
1093         """
1094         self.skipWhitespace = True
1095         self.whiteChars = chars
1096         self.copyDefaultWhiteChars = False
1097         return self
1098
1099     def parseWithTabs( self ):
1100         """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1101            Must be called before parseString when the input grammar contains elements that
1102            match <TAB> characters."""
1103         self.keepTabs = True
1104         return self
1105
1106     def ignore( self, other ):
1107         """Define expression to be ignored (e.g., comments) while doing pattern
1108            matching; may be called repeatedly, to define multiple comment or other
1109            ignorable patterns.
1110         """
1111         if isinstance( other, Suppress ):
1112             if other not in self.ignoreExprs:
1113                 self.ignoreExprs.append( other )
1114         else:
1115             self.ignoreExprs.append( Suppress( other ) )
1116         return self
1117
1118     def setDebugActions( self, startAction, successAction, exceptionAction ):
1119         """Enable display of debugging messages while doing pattern matching."""
1120         self.debugActions = (startAction or _defaultStartDebugAction,
1121                              successAction or _defaultSuccessDebugAction,
1122                              exceptionAction or _defaultExceptionDebugAction)
1123         self.debug = True
1124         return self
1125
1126     def setDebug( self, flag=True ):
1127         """Enable display of debugging messages while doing pattern matching.
1128            Set flag to True to enable, False to disable."""
1129         if flag:
1130             self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1131         else:
1132             self.debug = False
1133         return self
1134
1135     def __str__( self ):
1136         return self.name
1137
1138     def __repr__( self ):
1139         return _ustr(self)
1140
1141     def streamline( self ):
1142         self.streamlined = True
1143         self.strRepr = None
1144         return self
1145
1146     def checkRecursion( self, parseElementList ):
1147         pass
1148
1149     def validate( self, validateTrace=[] ):
1150         """Check defined expressions for valid structure, check for infinite recursive definitions."""
1151         self.checkRecursion( [] )
1152
1153     def parseFile( self, file_or_filename ):
1154         """Execute the parse expression on the given file or filename.
1155            If a filename is specified (instead of a file object),
1156            the entire file is opened, read, and closed before parsing.
1157         """
1158         try:
1159             file_contents = file_or_filename.read()
1160         except AttributeError:
1161             f = open(file_or_filename, "rb")
1162             file_contents = f.read()
1163             f.close()
1164         return self.parseString(file_contents)
1165
1166     def getException(self):
1167         return ParseException("",0,self.errmsg,self)
1168
1169     def __getattr__(self,aname):
1170         if aname == "myException":
1171             self.myException = ret = self.getException();
1172             return ret;
1173         else:
1174             raise AttributeError, "no such attribute " + aname
1175
1176 class Token(ParserElement):
1177     """Abstract ParserElement subclass, for defining atomic matching patterns."""
1178     def __init__( self ):
1179         super(Token,self).__init__( savelist=False )
1180         #self.myException = ParseException("",0,"",self)
1181
1182     def setName(self, name):
1183         s = super(Token,self).setName(name)
1184         self.errmsg = "Expected " + self.name
1185         #s.myException.msg = self.errmsg
1186         return s
1187
1188
1189 class Empty(Token):
1190     """An empty token, will always match."""
1191     def __init__( self ):
1192         super(Empty,self).__init__()
1193         self.name = "Empty"
1194         self.mayReturnEmpty = True
1195         self.mayIndexError = False
1196
1197
1198 class NoMatch(Token):
1199     """A token that will never match."""
1200     def __init__( self ):
1201         super(NoMatch,self).__init__()
1202         self.name = "NoMatch"
1203         self.mayReturnEmpty = True
1204         self.mayIndexError = False
1205         self.errmsg = "Unmatchable token"
1206         #self.myException.msg = self.errmsg
1207
1208     def parseImpl( self, instring, loc, doActions=True ):
1209         exc = self.myException
1210         exc.loc = loc
1211         exc.pstr = instring
1212         raise exc
1213
1214
1215 class Literal(Token):
1216     """Token to exactly match a specified string."""
1217     def __init__( self, matchString ):
1218         super(Literal,self).__init__()
1219         self.match = matchString
1220         self.matchLen = len(matchString)
1221         try:
1222             self.firstMatchChar = matchString[0]
1223         except IndexError:
1224             warnings.warn("null string passed to Literal; use Empty() instead",
1225                             SyntaxWarning, stacklevel=2)
1226             self.__class__ = Empty
1227         self.name = '"%s"' % _ustr(self.match)
1228         self.errmsg = "Expected " + self.name
1229         self.mayReturnEmpty = False
1230         #self.myException.msg = self.errmsg
1231         self.mayIndexError = False
1232
1233     # Performance tuning: this routine gets called a *lot*
1234     # if this is a single character match string  and the first character matches,
1235     # short-circuit as quickly as possible, and avoid calling startswith
1236     #~ @profile
1237     def parseImpl( self, instring, loc, doActions=True ):
1238         if (instring[loc] == self.firstMatchChar and
1239             (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1240             return loc+self.matchLen, self.match
1241         #~ raise ParseException( instring, loc, self.errmsg )
1242         exc = self.myException
1243         exc.loc = loc
1244         exc.pstr = instring
1245         raise exc
1246
1247 class Keyword(Token):
1248     """Token to exactly match a specified string as a keyword, that is, it must be
1249        immediately followed by a non-keyword character.  Compare with Literal::
1250          Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1251          Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1252        Accepts two optional constructor arguments in addition to the keyword string:
1253        identChars is a string of characters that would be valid identifier characters,
1254        defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1255        matching, default is False.
1256     """
1257     DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1258
1259     def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1260         super(Keyword,self).__init__()
1261         self.match = matchString
1262         self.matchLen = len(matchString)
1263         try:
1264             self.firstMatchChar = matchString[0]
1265         except IndexError:
1266             warnings.warn("null string passed to Keyword; use Empty() instead",
1267                             SyntaxWarning, stacklevel=2)
1268         self.name = '"%s"' % self.match
1269         self.errmsg = "Expected " + self.name
1270         self.mayReturnEmpty = False
1271         #self.myException.msg = self.errmsg
1272         self.mayIndexError = False
1273         self.caseless = caseless
1274         if caseless:
1275             self.caselessmatch = matchString.upper()
1276             identChars = identChars.upper()
1277         self.identChars = _str2dict(identChars)
1278
1279     def parseImpl( self, instring, loc, doActions=True ):
1280         if self.caseless:
1281             if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1282                  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1283                  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1284                 return loc+self.matchLen, self.match
1285         else:
1286             if (instring[loc] == self.firstMatchChar and
1287                 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1288                 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1289                 (loc == 0 or instring[loc-1] not in self.identChars) ):
1290                 return loc+self.matchLen, self.match
1291         #~ raise ParseException( instring, loc, self.errmsg )
1292         exc = self.myException
1293         exc.loc = loc
1294         exc.pstr = instring
1295         raise exc
1296
1297     def copy(self):
1298         c = super(Keyword,self).copy()
1299         c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1300         return c
1301
1302     def setDefaultKeywordChars( chars ):
1303         """Overrides the default Keyword chars
1304         """
1305         Keyword.DEFAULT_KEYWORD_CHARS = chars
1306     setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1307
1308
1309 class CaselessLiteral(Literal):
1310     """Token to match a specified string, ignoring case of letters.
1311        Note: the matched results will always be in the case of the given
1312        match string, NOT the case of the input text.
1313     """
1314     def __init__( self, matchString ):
1315         super(CaselessLiteral,self).__init__( matchString.upper() )
1316         # Preserve the defining literal.
1317         self.returnString = matchString
1318         self.name = "'%s'" % self.returnString
1319         self.errmsg = "Expected " + self.name
1320         #self.myException.msg = self.errmsg
1321
1322     def parseImpl( self, instring, loc, doActions=True ):
1323         if instring[ loc:loc+self.matchLen ].upper() == self.match:
1324             return loc+self.matchLen, self.returnString
1325         #~ raise ParseException( instring, loc, self.errmsg )
1326         exc = self.myException
1327         exc.loc = loc
1328         exc.pstr = instring
1329         raise exc
1330
1331 class CaselessKeyword(Keyword):
1332     def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1333         super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1334
1335     def parseImpl( self, instring, loc, doActions=True ):
1336         if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1337              (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1338             return loc+self.matchLen, self.match
1339         #~ raise ParseException( instring, loc, self.errmsg )
1340         exc = self.myException
1341         exc.loc = loc
1342         exc.pstr = instring
1343         raise exc
1344
1345 class Word(Token):
1346     """Token for matching words composed of allowed character sets.
1347        Defined with string containing all allowed initial characters,
1348        an optional string containing allowed body characters (if omitted,
1349        defaults to the initial character set), and an optional minimum,
1350        maximum, and/or exact length.  The default value for min is 1 (a
1351        minimum value < 1 is not valid); the default values for max and exact
1352        are 0, meaning no maximum or exact length restriction.
1353     """
1354     def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1355         super(Word,self).__init__()
1356         self.initCharsOrig = initChars
1357         self.initChars = _str2dict(initChars)
1358         if bodyChars :
1359             self.bodyCharsOrig = bodyChars
1360             self.bodyChars = _str2dict(bodyChars)
1361         else:
1362             self.bodyCharsOrig = initChars
1363             self.bodyChars = _str2dict(initChars)
1364
1365         self.maxSpecified = max > 0
1366
1367         if min < 1:
1368             raise ValueError, "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted"
1369
1370         self.minLen = min
1371
1372         if max > 0:
1373             self.maxLen = max
1374         else:
1375             self.maxLen = sys.maxint
1376
1377         if exact > 0:
1378             self.maxLen = exact
1379             self.minLen = exact
1380
1381         self.name = _ustr(self)
1382         self.errmsg = "Expected " + self.name
1383         #self.myException.msg = self.errmsg
1384         self.mayIndexError = False
1385         self.asKeyword = asKeyword
1386
1387         if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1388             if self.bodyCharsOrig == self.initCharsOrig:
1389                 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1390             elif len(self.bodyCharsOrig) == 1:
1391                 self.reString = "%s[%s]*" % \
1392                                       (re.escape(self.initCharsOrig),
1393                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
1394             else:
1395                 self.reString = "[%s][%s]*" % \
1396                                       (_escapeRegexRangeChars(self.initCharsOrig),
1397                                       _escapeRegexRangeChars(self.bodyCharsOrig),)
1398             if self.asKeyword:
1399                 self.reString = r"\b"+self.reString+r"\b"
1400             try:
1401                 self.re = re.compile( self.reString )
1402             except:
1403                 self.re = None
1404
1405     def parseImpl( self, instring, loc, doActions=True ):
1406         if self.re:
1407             result = self.re.match(instring,loc)
1408             if not result:
1409                 exc = self.myException
1410                 exc.loc = loc
1411                 exc.pstr = instring
1412                 raise exc
1413
1414             loc = result.end()
1415             return loc,result.group()
1416
1417         if not(instring[ loc ] in self.initChars):
1418             #~ raise ParseException( instring, loc, self.errmsg )
1419             exc = self.myException
1420             exc.loc = loc
1421             exc.pstr = instring
1422             raise exc
1423         start = loc
1424         loc += 1
1425         instrlen = len(instring)
1426         bodychars = self.bodyChars
1427         maxloc = start + self.maxLen
1428         maxloc = min( maxloc, instrlen )
1429         while loc < maxloc and instring[loc] in bodychars:
1430             loc += 1
1431
1432         throwException = False
1433         if loc - start < self.minLen:
1434             throwException = True
1435         if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1436             throwException = True
1437         if self.asKeyword:
1438             if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1439                 throwException = True
1440
1441         if throwException:
1442             #~ raise ParseException( instring, loc, self.errmsg )
1443             exc = self.myException
1444             exc.loc = loc
1445             exc.pstr = instring
1446             raise exc
1447
1448         return loc, instring[start:loc]
1449
1450     def __str__( self ):
1451         try:
1452             return super(Word,self).__str__()
1453         except:
1454             pass
1455
1456
1457         if self.strRepr is None:
1458
1459             def charsAsStr(s):
1460                 if len(s)>4:
1461                     return s[:4]+"..."
1462                 else:
1463                     return s
1464
1465             if ( self.initCharsOrig != self.bodyCharsOrig ):
1466                 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1467             else:
1468                 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1469
1470         return self.strRepr
1471
1472
1473 class Regex(Token):
1474     """Token for matching strings that match a given regular expression.
1475        Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1476     """
1477     def __init__( self, pattern, flags=0):
1478         """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1479         super(Regex,self).__init__()
1480
1481         if len(pattern) == 0:
1482             warnings.warn("null string passed to Regex; use Empty() instead",
1483                     SyntaxWarning, stacklevel=2)
1484
1485         self.pattern = pattern
1486         self.flags = flags
1487
1488         try:
1489             self.re = re.compile(self.pattern, self.flags)
1490             self.reString = self.pattern
1491         except sre_constants.error,e:
1492             warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1493                 SyntaxWarning, stacklevel=2)
1494             raise
1495
1496         self.name = _ustr(self)
1497         self.errmsg = "Expected " + self.name
1498         #self.myException.msg = self.errmsg
1499         self.mayIndexError = False
1500         self.mayReturnEmpty = True
1501
1502     def parseImpl( self, instring, loc, doActions=True ):
1503         result = self.re.match(instring,loc)
1504         if not result:
1505             exc = self.myException
1506             exc.loc = loc
1507             exc.pstr = instring
1508             raise exc
1509
1510         loc = result.end()
1511         d = result.groupdict()
1512         ret = ParseResults(result.group())
1513         if d:
1514             for k in d.keys():
1515                 ret[k] = d[k]
1516         return loc,ret
1517
1518     def __str__( self ):
1519         try:
1520             return super(Regex,self).__str__()
1521         except:
1522             pass
1523
1524         if self.strRepr is None:
1525             self.strRepr = "Re:(%s)" % repr(self.pattern)
1526
1527         return self.strRepr
1528
1529
1530 class QuotedString(Token):
1531     """Token for matching strings that are delimited by quoting characters.
1532     """
1533     def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1534         """
1535            Defined with the following parameters:
1536            - quoteChar - string of one or more characters defining the quote delimiting string
1537            - escChar - character to escape quotes, typically backslash (default=None)
1538            - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1539            - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1540            - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1541            - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1542         """
1543         super(QuotedString,self).__init__()
1544
1545         # remove white space from quote chars - wont work anyway
1546         quoteChar = quoteChar.strip()
1547         if len(quoteChar) == 0:
1548             warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1549             raise SyntaxError()
1550
1551         if endQuoteChar is None:
1552             endQuoteChar = quoteChar
1553         else:
1554             endQuoteChar = endQuoteChar.strip()
1555             if len(endQuoteChar) == 0:
1556                 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1557                 raise SyntaxError()
1558
1559         self.quoteChar = quoteChar
1560         self.quoteCharLen = len(quoteChar)
1561         self.firstQuoteChar = quoteChar[0]
1562         self.endQuoteChar = endQuoteChar
1563         self.endQuoteCharLen = len(endQuoteChar)
1564         self.escChar = escChar
1565         self.escQuote = escQuote
1566         self.unquoteResults = unquoteResults
1567
1568         if multiline:
1569             self.flags = re.MULTILINE | re.DOTALL
1570             self.pattern = r'%s(?:[^%s%s]' % \
1571                 ( re.escape(self.quoteChar),
1572                   _escapeRegexRangeChars(self.endQuoteChar[0]),
1573                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1574         else:
1575             self.flags = 0
1576             self.pattern = r'%s(?:[^%s\n\r%s]' % \
1577                 ( re.escape(self.quoteChar),
1578                   _escapeRegexRangeChars(self.endQuoteChar[0]),
1579                   (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1580         if len(self.endQuoteChar) > 1:
1581             self.pattern += (
1582                 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1583                                                _escapeRegexRangeChars(self.endQuoteChar[i]))
1584                                     for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1585                 )
1586         if escQuote:
1587             self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1588         if escChar:
1589             self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1590             self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1591         self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1592
1593         try:
1594             self.re = re.compile(self.pattern, self.flags)
1595             self.reString = self.pattern
1596         except sre_constants.error,e:
1597             warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1598                 SyntaxWarning, stacklevel=2)
1599             raise
1600
1601         self.name = _ustr(self)
1602         self.errmsg = "Expected " + self.name
1603         #self.myException.msg = self.errmsg
1604         self.mayIndexError = False
1605         self.mayReturnEmpty = True
1606
1607     def parseImpl( self, instring, loc, doActions=True ):
1608         result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1609         if not result:
1610             exc = self.myException
1611             exc.loc = loc
1612             exc.pstr = instring
1613             raise exc
1614
1615         loc = result.end()
1616         ret = result.group()
1617
1618         if self.unquoteResults:
1619
1620             # strip off quotes
1621             ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1622
1623             if isinstance(ret,basestring):
1624                 # replace escaped characters
1625                 if self.escChar:
1626                     ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1627
1628                 # replace escaped quotes
1629                 if self.escQuote:
1630                     ret = ret.replace(self.escQuote, self.endQuoteChar)
1631
1632         return loc, ret
1633
1634     def __str__( self ):
1635         try:
1636             return super(QuotedString,self).__str__()
1637         except:
1638             pass
1639
1640         if self.strRepr is None:
1641             self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1642
1643         return self.strRepr
1644
1645
1646 class CharsNotIn(Token):
1647     """Token for matching words composed of characters *not* in a given set.
1648        Defined with string containing all disallowed characters, and an optional
1649        minimum, maximum, and/or exact length.  The default value for min is 1 (a
1650        minimum value < 1 is not valid); the default values for max and exact
1651        are 0, meaning no maximum or exact length restriction.
1652     """
1653     def __init__( self, notChars, min=1, max=0, exact=0 ):
1654         super(CharsNotIn,self).__init__()
1655         self.skipWhitespace = False
1656         self.notChars = notChars
1657
1658         if min < 1:
1659             raise ValueError, "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted"
1660
1661         self.minLen = min
1662
1663         if max > 0:
1664             self.maxLen = max
1665         else:
1666             self.maxLen = sys.maxint
1667
1668         if exact > 0:
1669             self.maxLen = exact
1670             self.minLen = exact
1671
1672         self.name = _ustr(self)
1673         self.errmsg = "Expected " + self.name
1674         self.mayReturnEmpty = ( self.minLen == 0 )
1675         #self.myException.msg = self.errmsg
1676         self.mayIndexError = False
1677
1678     def parseImpl( self, instring, loc, doActions=True ):
1679         if instring[loc] in self.notChars:
1680             #~ raise ParseException( instring, loc, self.errmsg )
1681             exc = self.myException
1682             exc.loc = loc
1683             exc.pstr = instring
1684             raise exc
1685
1686         start = loc
1687         loc += 1
1688         notchars = self.notChars
1689         maxlen = min( start+self.maxLen, len(instring) )
1690         while loc < maxlen and \
1691               (instring[loc] not in notchars):
1692             loc += 1
1693
1694         if loc - start < self.minLen:
1695             #~ raise ParseException( instring, loc, self.errmsg )
1696             exc = self.myException
1697             exc.loc = loc
1698             exc.pstr = instring
1699             raise exc
1700
1701         return loc, instring[start:loc]
1702
1703     def __str__( self ):
1704         try:
1705             return super(CharsNotIn, self).__str__()
1706         except:
1707             pass
1708
1709         if self.strRepr is None:
1710             if len(self.notChars) > 4:
1711                 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1712             else:
1713                 self.strRepr = "!W:(%s)" % self.notChars
1714
1715         return self.strRepr
1716
1717 class White(Token):
1718     """Special matching class for matching whitespace.  Normally, whitespace is ignored
1719        by pyparsing grammars.  This class is included when some whitespace structures
1720        are significant.  Define with a string containing the whitespace characters to be
1721        matched; default is " \\t\\n".  Also takes optional min, max, and exact arguments,
1722        as defined for the Word class."""
1723     whiteStrs = {
1724         " " : "<SPC>",
1725         "\t": "<TAB>",
1726         "\n": "<LF>",
1727         "\r": "<CR>",
1728         "\f": "<FF>",
1729         }
1730     def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1731         super(White,self).__init__()
1732         self.matchWhite = ws
1733         self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1734         #~ self.leaveWhitespace()
1735         self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1736         self.mayReturnEmpty = True
1737         self.errmsg = "Expected " + self.name
1738         #self.myException.msg = self.errmsg
1739
1740         self.minLen = min
1741
1742         if max > 0:
1743             self.maxLen = max
1744         else:
1745             self.maxLen = sys.maxint
1746
1747         if exact > 0:
1748             self.maxLen = exact
1749             self.minLen = exact
1750
1751     def parseImpl( self, instring, loc, doActions=True ):
1752         if not(instring[ loc ] in self.matchWhite):
1753             #~ raise ParseException( instring, loc, self.errmsg )
1754             exc = self.myException
1755             exc.loc = loc
1756             exc.pstr = instring
1757             raise exc
1758         start = loc
1759         loc += 1
1760         maxloc = start + self.maxLen
1761         maxloc = min( maxloc, len(instring) )
1762         while loc < maxloc and instring[loc] in self.matchWhite:
1763             loc += 1
1764
1765         if loc - start < self.minLen:
1766             #~ raise ParseException( instring, loc, self.errmsg )
1767             exc = self.myException
1768             exc.loc = loc
1769             exc.pstr = instring
1770             raise exc
1771
1772         return loc, instring[start:loc]
1773
1774
1775 class _PositionToken(Token):
1776     def __init__( self ):
1777         super(_PositionToken,self).__init__()
1778         self.name=self.__class__.__name__
1779         self.mayReturnEmpty = True
1780         self.mayIndexError = False
1781
1782 class GoToColumn(_PositionToken):
1783     """Token to advance to a specific column of input text; useful for tabular report scraping."""
1784     def __init__( self, colno ):
1785         super(GoToColumn,self).__init__()
1786         self.col = colno
1787
1788     def preParse( self, instring, loc ):
1789         if col(loc,instring) != self.col:
1790             instrlen = len(instring)
1791             if self.ignoreExprs:
1792                 loc = self.skipIgnorables( instring, loc )
1793             while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1794                 loc += 1
1795         return loc
1796
1797     def parseImpl( self, instring, loc, doActions=True ):
1798         thiscol = col( loc, instring )
1799         if thiscol > self.col:
1800             raise ParseException( instring, loc, "Text not in expected column", self )
1801         newloc = loc + self.col - thiscol
1802         ret = instring[ loc: newloc ]
1803         return newloc, ret
1804
1805 class LineStart(_PositionToken):
1806     """Matches if current position is at the beginning of a line within the parse string"""
1807     def __init__( self ):
1808         super(LineStart,self).__init__()
1809         self.setWhitespaceChars( " \t" )
1810         self.errmsg = "Expected start of line"
1811         #self.myException.msg = self.errmsg
1812
1813     def preParse( self, instring, loc ):
1814         preloc = super(LineStart,self).preParse(instring,loc)
1815         if instring[preloc] == "\n":
1816             loc += 1
1817         return loc
1818
1819     def parseImpl( self, instring, loc, doActions=True ):
1820         if not( loc==0 or
1821             (loc == self.preParse( instring, 0 )) or
1822             (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
1823             #~ raise ParseException( instring, loc, "Expected start of line" )
1824             exc = self.myException
1825             exc.loc = loc
1826             exc.pstr = instring
1827             raise exc
1828         return loc, []
1829
1830 class LineEnd(_PositionToken):
1831     """Matches if current position is at the end of a line within the parse string"""
1832     def __init__( self ):
1833         super(LineEnd,self).__init__()
1834         self.setWhitespaceChars( " \t" )
1835         self.errmsg = "Expected end of line"
1836         #self.myException.msg = self.errmsg
1837
1838     def parseImpl( self, instring, loc, doActions=True ):
1839         if loc<len(instring):
1840             if instring[loc] == "\n":
1841                 return loc+1, "\n"
1842             else:
1843                 #~ raise ParseException( instring, loc, "Expected end of line" )
1844                 exc = self.myException
1845                 exc.loc = loc
1846                 exc.pstr = instring
1847                 raise exc
1848         elif loc == len(instring):
1849             return loc+1, []
1850         else:
1851             exc = self.myException
1852             exc.loc = loc
1853             exc.pstr = instring
1854             raise exc
1855
1856 class StringStart(_PositionToken):
1857     """Matches if current position is at the beginning of the parse string"""
1858     def __init__( self ):
1859         super(StringStart,self).__init__()
1860         self.errmsg = "Expected start of text"
1861         #self.myException.msg = self.errmsg
1862
1863     def parseImpl( self, instring, loc, doActions=True ):
1864         if loc != 0:
1865             # see if entire string up to here is just whitespace and ignoreables
1866             if loc != self.preParse( instring, 0 ):
1867                 #~ raise ParseException( instring, loc, "Expected start of text" )
1868                 exc = self.myException
1869                 exc.loc = loc
1870                 exc.pstr = instring
1871                 raise exc
1872         return loc, []
1873
1874 class StringEnd(_PositionToken):
1875     """Matches if current position is at the end of the parse string"""
1876     def __init__( self ):
1877         super(StringEnd,self).__init__()
1878         self.errmsg = "Expected end of text"
1879         #self.myException.msg = self.errmsg
1880
1881     def parseImpl( self, instring, loc, doActions=True ):
1882         if loc < len(instring):
1883             #~ raise ParseException( instring, loc, "Expected end of text" )
1884             exc = self.myException
1885             exc.loc = loc
1886             exc.pstr = instring
1887             raise exc
1888         elif loc == len(instring):
1889             return loc+1, []
1890         elif loc > len(instring):
1891             return loc, []
1892         else:
1893             exc = self.myException
1894             exc.loc = loc
1895             exc.pstr = instring
1896             raise exc
1897
1898
1899 class ParseExpression(ParserElement):
1900     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1901     def __init__( self, exprs, savelist = False ):
1902         super(ParseExpression,self).__init__(savelist)
1903         if isinstance( exprs, list ):
1904             self.exprs = exprs
1905         elif isinstance( exprs, basestring ):
1906             self.exprs = [ Literal( exprs ) ]
1907         else:
1908             self.exprs = [ exprs ]
1909         self.callPreparse = False
1910
1911     def __getitem__( self, i ):
1912         return self.exprs[i]
1913
1914     def append( self, other ):
1915         self.exprs.append( other )
1916         self.strRepr = None
1917         return self
1918
1919     def leaveWhitespace( self ):
1920         """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1921            all contained expressions."""
1922         self.skipWhitespace = False
1923         self.exprs = [ e.copy() for e in self.exprs ]
1924         for e in self.exprs:
1925             e.leaveWhitespace()
1926         return self
1927
1928     def ignore( self, other ):
1929         if isinstance( other, Suppress ):
1930             if other not in self.ignoreExprs:
1931                 super( ParseExpression, self).ignore( other )
1932                 for e in self.exprs:
1933                     e.ignore( self.ignoreExprs[-1] )
1934         else:
1935             super( ParseExpression, self).ignore( other )
1936             for e in self.exprs:
1937                 e.ignore( self.ignoreExprs[-1] )
1938         return self
1939
1940     def __str__( self ):
1941         try:
1942             return super(ParseExpression,self).__str__()
1943         except:
1944             pass
1945
1946         if self.strRepr is None:
1947             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1948         return self.strRepr
1949
1950     def streamline( self ):
1951         super(ParseExpression,self).streamline()
1952
1953         for e in self.exprs:
1954             e.streamline()
1955
1956         # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1957         # but only if there are no parse actions or resultsNames on the nested And's
1958         # (likewise for Or's and MatchFirst's)
1959         if ( len(self.exprs) == 2 ):
1960             other = self.exprs[0]
1961             if ( isinstance( other, self.__class__ ) and
1962                   not(other.parseAction) and
1963                   other.resultsName is None and
1964                   not other.debug ):
1965                 self.exprs = other.exprs[:] + [ self.exprs[1] ]
1966                 self.strRepr = None
1967                 self.mayReturnEmpty |= other.mayReturnEmpty
1968                 self.mayIndexError  |= other.mayIndexError
1969
1970             other = self.exprs[-1]
1971             if ( isinstance( other, self.__class__ ) and
1972                   not(other.parseAction) and
1973                   other.resultsName is None and
1974                   not other.debug ):
1975                 self.exprs = self.exprs[:-1] + other.exprs[:]
1976                 self.strRepr = None
1977                 self.mayReturnEmpty |= other.mayReturnEmpty
1978                 self.mayIndexError  |= other.mayIndexError
1979
1980         return self
1981
1982     def setResultsName( self, name, listAllMatches=False ):
1983         ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1984         return ret
1985
1986     def validate( self, validateTrace=[] ):
1987         tmp = validateTrace[:]+[self]
1988         for e in self.exprs:
1989             e.validate(tmp)
1990         self.checkRecursion( [] )
1991
1992 class And(ParseExpression):
1993     """Requires all given ParseExpressions to be found in the given order.
1994        Expressions may be separated by whitespace.
1995        May be constructed using the '+' operator.
1996     """
1997     def __init__( self, exprs, savelist = True ):
1998         super(And,self).__init__(exprs, savelist)
1999         self.mayReturnEmpty = True
2000         for e in self.exprs:
2001             if not e.mayReturnEmpty:
2002                 self.mayReturnEmpty = False
2003                 break
2004         self.setWhitespaceChars( exprs[0].whiteChars )
2005         self.skipWhitespace = exprs[0].skipWhitespace
2006         self.callPreparse = True
2007
2008     def parseImpl( self, instring, loc, doActions=True ):
2009         # pass False as last arg to _parse for first element, since we already
2010         # pre-parsed the string as part of our And pre-parsing
2011         loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2012         for e in self.exprs[1:]:
2013             loc, exprtokens = e._parse( instring, loc, doActions )
2014             if exprtokens or exprtokens.keys():
2015                 resultlist += exprtokens
2016         return loc, resultlist
2017
2018     def __iadd__(self, other ):
2019         if isinstance( other, basestring ):
2020             other = Literal( other )
2021         return self.append( other ) #And( [ self, other ] )
2022
2023     def checkRecursion( self, parseElementList ):
2024         subRecCheckList = parseElementList[:] + [ self ]
2025         for e in self.exprs:
2026             e.checkRecursion( subRecCheckList )
2027             if not e.mayReturnEmpty:
2028                 break
2029
2030     def __str__( self ):
2031         if hasattr(self,"name"):
2032             return self.name
2033
2034         if self.strRepr is None:
2035             self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2036
2037         return self.strRepr
2038
2039
2040 class Or(ParseExpression):
2041     """Requires that at least one ParseExpression is found.
2042        If two expressions match, the expression that matches the longest string will be used.
2043        May be constructed using the '^' operator.
2044     """
2045     def __init__( self, exprs, savelist = False ):
2046         super(Or,self).__init__(exprs, savelist)
2047         self.mayReturnEmpty = False
2048         for e in self.exprs:
2049             if e.mayReturnEmpty:
2050                 self.mayReturnEmpty = True
2051                 break
2052
2053     def parseImpl( self, instring, loc, doActions=True ):
2054         maxExcLoc = -1
2055         maxMatchLoc = -1
2056         for e in self.exprs:
2057             try:
2058                 loc2 = e.tryParse( instring, loc )
2059             except ParseException, err:
2060                 if err.loc > maxExcLoc:
2061                     maxException = err
2062                     maxExcLoc = err.loc
2063             except IndexError, err:
2064                 if len(instring) > maxExcLoc:
2065                     maxException = ParseException(instring,len(instring),e.errmsg,self)
2066                     maxExcLoc = len(instring)
2067             else:
2068                 if loc2 > maxMatchLoc:
2069                     maxMatchLoc = loc2
2070                     maxMatchExp = e
2071
2072         if maxMatchLoc < 0:
2073             if self.exprs:
2074                 raise maxException
2075             else:
2076                 raise ParseException(instring, loc, "no defined alternatives to match", self)
2077
2078         return maxMatchExp._parse( instring, loc, doActions )
2079
2080     def __ixor__(self, other ):
2081         if isinstance( other, basestring ):
2082             other = Literal( other )
2083         return self.append( other ) #Or( [ self, other ] )
2084
2085     def __str__( self ):
2086         if hasattr(self,"name"):
2087             return self.name
2088
2089         if self.strRepr is None:
2090             self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2091
2092         return self.strRepr
2093
2094     def checkRecursion( self, parseElementList ):
2095         subRecCheckList = parseElementList[:] + [ self ]
2096         for e in self.exprs:
2097             e.checkRecursion( subRecCheckList )
2098
2099
2100 class MatchFirst(ParseExpression):
2101     """Requires that at least one ParseExpression is found.
2102        If two expressions match, the first one listed is the one that will match.
2103        May be constructed using the '|' operator.
2104     """
2105     def __init__( self, exprs, savelist = False ):
2106         super(MatchFirst,self).__init__(exprs, savelist)
2107         if exprs:
2108             self.mayReturnEmpty = False
2109             for e in self.exprs:
2110                 if e.mayReturnEmpty:
2111                     self.mayReturnEmpty = True
2112                     break
2113         else:
2114             self.mayReturnEmpty = True
2115
2116     def parseImpl( self, instring, loc, doActions=True ):
2117         maxExcLoc = -1
2118         for e in self.exprs:
2119             try:
2120                 ret = e._parse( instring, loc, doActions )
2121                 return ret
2122             except ParseException, err:
2123                 if err.loc > maxExcLoc:
2124                     maxException = err
2125                     maxExcLoc = err.loc
2126             except IndexError, err:
2127                 if len(instring) > maxExcLoc:
2128                     maxException = ParseException(instring,len(instring),e.errmsg,self)
2129                     maxExcLoc = len(instring)
2130
2131         # only got here if no expression matched, raise exception for match that made it the furthest
2132         else:
2133             if self.exprs:
2134                 raise maxException
2135             else:
2136                 raise ParseException(instring, loc, "no defined alternatives to match", self)
2137
2138     def __ior__(self, other ):
2139         if isinstance( other, basestring ):
2140             other = Literal( other )
2141         return self.append( other ) #MatchFirst( [ self, other ] )
2142
2143     def __str__( self ):
2144         if hasattr(self,"name"):
2145             return self.name
2146
2147         if self.strRepr is None:
2148             self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2149
2150         return self.strRepr
2151
2152     def checkRecursion( self, parseElementList ):
2153         subRecCheckList = parseElementList[:] + [ self ]
2154         for e in self.exprs:
2155             e.checkRecursion( subRecCheckList )
2156
2157
2158 class Each(ParseExpression):
2159     """Requires all given ParseExpressions to be found, but in any order.
2160        Expressions may be separated by whitespace.
2161        May be constructed using the '&' operator.
2162     """
2163     def __init__( self, exprs, savelist = True ):
2164         super(Each,self).__init__(exprs, savelist)
2165         self.mayReturnEmpty = True
2166         for e in self.exprs:
2167             if not e.mayReturnEmpty:
2168                 self.mayReturnEmpty = False
2169                 break
2170         self.skipWhitespace = True
2171         self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
2172         self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
2173         self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
2174         self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2175         self.required += self.multirequired
2176
2177     def parseImpl( self, instring, loc, doActions=True ):
2178         tmpLoc = loc
2179         tmpReqd = self.required[:]
2180         tmpOpt  = self.optionals[:]
2181         matchOrder = []
2182
2183         keepMatching = True
2184         while keepMatching:
2185             tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2186             failed = []
2187             for e in tmpExprs:
2188                 try:
2189                     tmpLoc = e.tryParse( instring, tmpLoc )
2190                 except ParseException:
2191                     failed.append(e)
2192                 else:
2193                     matchOrder.append(e)
2194                     if e in tmpReqd:
2195                         tmpReqd.remove(e)
2196                     elif e in tmpOpt:
2197                         tmpOpt.remove(e)
2198             if len(failed) == len(tmpExprs):
2199                 keepMatching = False
2200
2201         if tmpReqd:
2202             missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2203             raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2204
2205         resultlist = []
2206         for e in matchOrder:
2207             loc,results = e._parse(instring,loc,doActions)
2208             resultlist.append(results)
2209
2210         finalResults = ParseResults([])
2211         for r in resultlist:
2212             dups = {}
2213             for k in r.keys():
2214                 if k in finalResults.keys():
2215                     tmp = ParseResults(finalResults[k])
2216                     tmp += ParseResults(r[k])
2217                     dups[k] = tmp
2218             finalResults += ParseResults(r)
2219             for k,v in dups.items():
2220                 finalResults[k] = v
2221         return loc, finalResults
2222
2223     def __str__( self ):
2224         if hasattr(self,"name"):
2225             return self.name
2226
2227         if self.strRepr is None:
2228             self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2229
2230         return self.strRepr
2231
2232     def checkRecursion( self, parseElementList ):
2233         subRecCheckList = parseElementList[:] + [ self ]
2234         for e in self.exprs:
2235             e.checkRecursion( subRecCheckList )
2236
2237
2238 class ParseElementEnhance(ParserElement):
2239     """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2240     def __init__( self, expr, savelist=False ):
2241         super(ParseElementEnhance,self).__init__(savelist)
2242         if isinstance( expr, basestring ):
2243             expr = Literal(expr)
2244         self.expr = expr
2245         self.strRepr = None
2246         if expr is not None:
2247             self.mayIndexError = expr.mayIndexError
2248             self.mayReturnEmpty = expr.mayReturnEmpty
2249             self.setWhitespaceChars( expr.whiteChars )
2250             self.skipWhitespace = expr.skipWhitespace
2251             self.saveAsList = expr.saveAsList
2252             self.callPreparse = expr.callPreparse
2253
2254     def parseImpl( self, instring, loc, doActions=True ):
2255         if self.expr is not None:
2256             return self.expr._parse( instring, loc, doActions, callPreParse=False )
2257         else:
2258             raise ParseException("",loc,self.errmsg,self)
2259
2260     def leaveWhitespace( self ):
2261         self.skipWhitespace = False
2262         self.expr = self.expr.copy()
2263         if self.expr is not None:
2264             self.expr.leaveWhitespace()
2265         return self
2266
2267     def ignore( self, other ):
2268         if isinstance( other, Suppress ):
2269             if other not in self.ignoreExprs:
2270                 super( ParseElementEnhance, self).ignore( other )
2271                 if self.expr is not None:
2272                     self.expr.ignore( self.ignoreExprs[-1] )
2273         else:
2274             super( ParseElementEnhance, self).ignore( other )
2275             if self.expr is not None:
2276                 self.expr.ignore( self.ignoreExprs[-1] )
2277         return self
2278
2279     def streamline( self ):
2280         super(ParseElementEnhance,self).streamline()
2281         if self.expr is not None:
2282             self.expr.streamline()
2283         return self
2284
2285     def checkRecursion( self, parseElementList ):
2286         if self in parseElementList:
2287             raise RecursiveGrammarException( parseElementList+[self] )
2288         subRecCheckList = parseElementList[:] + [ self ]
2289         if self.expr is not None:
2290             self.expr.checkRecursion( subRecCheckList )
2291
2292     def validate( self, validateTrace=[] ):
2293         tmp = validateTrace[:]+[self]
2294         if self.expr is not None:
2295             self.expr.validate(tmp)
2296         self.checkRecursion( [] )
2297
2298     def __str__( self ):
2299         try:
2300             return super(ParseElementEnhance,self).__str__()
2301         except:
2302             pass
2303
2304         if self.strRepr is None and self.expr is not None:
2305             self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2306         return self.strRepr
2307
2308
2309 class FollowedBy(ParseElementEnhance):
2310     """Lookahead matching of the given parse expression.  FollowedBy
2311     does *not* advance the parsing position within the input string, it only
2312     verifies that the specified parse expression matches at the current
2313     position.  FollowedBy always returns a null token list."""
2314     def __init__( self, expr ):
2315         super(FollowedBy,self).__init__(expr)
2316         self.mayReturnEmpty = True
2317
2318     def parseImpl( self, instring, loc, doActions=True ):
2319         self.expr.tryParse( instring, loc )
2320         return loc, []
2321
2322
2323 class NotAny(ParseElementEnhance):
2324     """Lookahead to disallow matching with the given parse expression.  NotAny
2325     does *not* advance the parsing position within the input string, it only
2326     verifies that the specified parse expression does *not* match at the current
2327     position.  Also, NotAny does *not* skip over leading whitespace. NotAny
2328     always returns a null token list.  May be constructed using the '~' operator."""
2329     def __init__( self, expr ):
2330         super(NotAny,self).__init__(expr)
2331         #~ self.leaveWhitespace()
2332         self.skipWhitespace = False  # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2333         self.mayReturnEmpty = True
2334         self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2335         #self.myException = ParseException("",0,self.errmsg,self)
2336
2337     def parseImpl( self, instring, loc, doActions=True ):
2338         try:
2339             self.expr.tryParse( instring, loc )
2340         except (ParseException,IndexError):
2341             pass
2342         else:
2343             #~ raise ParseException(instring, loc, self.errmsg )
2344             exc = self.myException
2345             exc.loc = loc
2346             exc.pstr = instring
2347             raise exc
2348         return loc, []
2349
2350     def __str__( self ):
2351         if hasattr(self,"name"):
2352             return self.name
2353
2354         if self.strRepr is None:
2355             self.strRepr = "~{" + _ustr(self.expr) + "}"
2356
2357         return self.strRepr
2358
2359
2360 class ZeroOrMore(ParseElementEnhance):
2361     """Optional repetition of zero or more of the given expression."""
2362     def __init__( self, expr ):
2363         super(ZeroOrMore,self).__init__(expr)
2364         self.mayReturnEmpty = True
2365
2366     def parseImpl( self, instring, loc, doActions=True ):
2367         tokens = []
2368         try:
2369             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2370             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2371             while 1:
2372                 if hasIgnoreExprs:
2373                     preloc = self.skipIgnorables( instring, loc )
2374                 else:
2375                     preloc = loc
2376                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2377                 if tmptokens or tmptokens.keys():
2378                     tokens += tmptokens
2379         except (ParseException,IndexError):
2380             pass
2381
2382         return loc, tokens
2383
2384     def __str__( self ):
2385         if hasattr(self,"name"):
2386             return self.name
2387
2388         if self.strRepr is None:
2389             self.strRepr = "[" + _ustr(self.expr) + "]..."
2390
2391         return self.strRepr
2392
2393     def setResultsName( self, name, listAllMatches=False ):
2394         ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2395         ret.saveAsList = True
2396         return ret
2397
2398
2399 class OneOrMore(ParseElementEnhance):
2400     """Repetition of one or more of the given expression."""
2401     def parseImpl( self, instring, loc, doActions=True ):
2402         # must be at least one
2403         loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2404         try:
2405             hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2406             while 1:
2407                 if hasIgnoreExprs:
2408                     preloc = self.skipIgnorables( instring, loc )
2409                 else:
2410                     preloc = loc
2411                 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2412                 if tmptokens or tmptokens.keys():
2413                     tokens += tmptokens
2414         except (ParseException,IndexError):
2415             pass
2416
2417         return loc, tokens
2418
2419     def __str__( self ):
2420         if hasattr(self,"name"):
2421             return self.name
2422
2423         if self.strRepr is None:
2424             self.strRepr = "{" + _ustr(self.expr) + "}..."
2425
2426         return self.strRepr
2427
2428     def setResultsName( self, name, listAllMatches=False ):
2429         ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2430         ret.saveAsList = True
2431         return ret
2432
2433 class _NullToken(object):
2434     def __bool__(self):
2435         return False
2436     def __str__(self):
2437         return ""
2438
2439 _optionalNotMatched = _NullToken()
2440 class Optional(ParseElementEnhance):
2441     """Optional matching of the given expression.
2442        A default return string can also be specified, if the optional expression
2443        is not found.
2444     """
2445     def __init__( self, exprs, default=_optionalNotMatched ):
2446         super(Optional,self).__init__( exprs, savelist=False )
2447         self.defaultValue = default
2448         self.mayReturnEmpty = True
2449
2450     def parseImpl( self, instring, loc, doActions=True ):
2451         try:
2452             loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2453         except (ParseException,IndexError):
2454             if self.defaultValue is not _optionalNotMatched:
2455                 tokens = [ self.defaultValue ]
2456             else:
2457                 tokens = []
2458         return loc, tokens
2459
2460     def __str__( self ):
2461         if hasattr(self,"name"):
2462             return self.name
2463
2464         if self.strRepr is None:
2465             self.strRepr = "[" + _ustr(self.expr) + "]"
2466
2467         return self.strRepr
2468
2469
2470 class SkipTo(ParseElementEnhance):
2471     """Token for skipping over all undefined text until the matched expression is found.
2472        If include is set to true, the matched expression is also consumed.  The ignore
2473        argument is used to define grammars (typically quoted strings and comments) that
2474        might contain false matches.
2475     """
2476     def __init__( self, other, include=False, ignore=None ):
2477         super( SkipTo, self ).__init__( other )
2478         if ignore is not None:
2479             self.expr = self.expr.copy()
2480             self.expr.ignore(ignore)
2481         self.mayReturnEmpty = True
2482         self.mayIndexError = False
2483         self.includeMatch = include
2484         self.asList = False
2485         self.errmsg = "No match found for "+_ustr(self.expr)
2486         #self.myException = ParseException("",0,self.errmsg,self)
2487
2488     def parseImpl( self, instring, loc, doActions=True ):
2489         startLoc = loc
2490         instrlen = len(instring)
2491         expr = self.expr
2492         while loc <= instrlen:
2493             try:
2494                 loc = expr.skipIgnorables( instring, loc )
2495                 expr._parse( instring, loc, doActions=False, callPreParse=False )
2496                 if self.includeMatch:
2497                     skipText = instring[startLoc:loc]
2498                     loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2499                     if mat:
2500                         skipRes = ParseResults( skipText )
2501                         skipRes += mat
2502                         return loc, [ skipRes ]
2503                     else:
2504                         return loc, [ skipText ]
2505                 else:
2506                     return loc, [ instring[startLoc:loc] ]
2507             except (ParseException,IndexError):
2508                 loc += 1
2509         exc = self.myException
2510         exc.loc = loc
2511         exc.pstr = instring
2512         raise exc
2513
2514 class Forward(ParseElementEnhance):
2515     """Forward declaration of an expression to be defined later -
2516        used for recursive grammars, such as algebraic infix notation.
2517        When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2518
2519        Note: take care when assigning to Forward not to overlook precedence of operators.
2520        Specifically, '|' has a lower precedence than '<<', so that::
2521           fwdExpr << a | b | c
2522        will actually be evaluated as::
2523           (fwdExpr << a) | b | c
2524        thereby leaving b and c out as parseable alternatives.  It is recommended that you
2525        explicitly group the values inserted into the Forward::
2526           fwdExpr << (a | b | c)
2527     """
2528     def __init__( self, other=None ):
2529         super(Forward,self).__init__( other, savelist=False )
2530
2531     def __lshift__( self, other ):
2532         if isinstance( other, basestring ):
2533             other = Literal(other)
2534         self.expr = other
2535         self.mayReturnEmpty = other.mayReturnEmpty
2536         self.strRepr = None
2537         self.mayIndexError = self.expr.mayIndexError
2538         self.mayReturnEmpty = self.expr.mayReturnEmpty
2539         self.setWhitespaceChars( self.expr.whiteChars )
2540         self.skipWhitespace = self.expr.skipWhitespace
2541         self.saveAsList = self.expr.saveAsList
2542         return self
2543
2544     def leaveWhitespace( self ):
2545         self.skipWhitespace = False
2546         return self
2547
2548     def streamline( self ):
2549         if not self.streamlined:
2550             self.streamlined = True
2551             if self.expr is not None:
2552                 self.expr.streamline()
2553         return self
2554
2555     def validate( self, validateTrace=[] ):
2556         if self not in validateTrace:
2557             tmp = validateTrace[:]+[self]
2558             if self.expr is not None:
2559                 self.expr.validate(tmp)
2560         self.checkRecursion([])
2561
2562     def __str__( self ):
2563         if hasattr(self,"name"):
2564             return self.name
2565
2566         self.__class__ = _ForwardNoRecurse
2567         try:
2568             if self.expr is not None:
2569                 retString = _ustr(self.expr)
2570             else:
2571                 retString = "None"
2572         finally:
2573             self.__class__ = Forward
2574         return "Forward: "+retString
2575
2576     def copy(self):
2577         if self.expr is not None:
2578             return super(Forward,self).copy()
2579         else:
2580             ret = Forward()
2581             ret << self
2582             return ret
2583
2584 class _ForwardNoRecurse(Forward):
2585     def __str__( self ):
2586         return "..."
2587
2588 class TokenConverter(ParseElementEnhance):
2589     """Abstract subclass of ParseExpression, for converting parsed results."""
2590     def __init__( self, expr, savelist=False ):
2591         super(TokenConverter,self).__init__( expr )#, savelist )
2592         self.saveAsList = False
2593
2594 class Upcase(TokenConverter):
2595     """Converter to upper case all matching tokens."""
2596     def __init__(self, *args):
2597         super(Upcase,self).__init__(*args)
2598         warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2599                        DeprecationWarning,stacklevel=2)
2600
2601     def postParse( self, instring, loc, tokenlist ):
2602         return map( string.upper, tokenlist )
2603
2604
2605 class Combine(TokenConverter):
2606     """Converter to concatenate all matching tokens to a single string.
2607        By default, the matching patterns must also be contiguous in the input string;
2608        this can be disabled by specifying 'adjacent=False' in the constructor.
2609     """
2610     def __init__( self, expr, joinString="", adjacent=True ):
2611         super(Combine,self).__init__( expr )
2612         # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2613         if adjacent:
2614             self.leaveWhitespace()
2615         self.adjacent = adjacent
2616         self.skipWhitespace = True
2617         self.joinString = joinString
2618
2619     def ignore( self, other ):
2620         if self.adjacent:
2621             ParserElement.ignore(self, other)
2622         else:
2623             super( Combine, self).ignore( other )
2624         return self
2625
2626     def postParse( self, instring, loc, tokenlist ):
2627         retToks = tokenlist.copy()
2628         del retToks[:]
2629         retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2630
2631         if self.resultsName and len(retToks.keys())>0:
2632             return [ retToks ]
2633         else:
2634             return retToks
2635
2636 class Group(TokenConverter):
2637     """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2638     def __init__( self, expr ):
2639         super(Group,self).__init__( expr )
2640         self.saveAsList = True
2641
2642     def postParse( self, instring, loc, tokenlist ):
2643         return [ tokenlist ]
2644
2645 class Dict(TokenConverter):
2646     """Converter to return a repetitive expression as a list, but also as a dictionary.
2647        Each element can also be referenced using the first token in the expression as its key.
2648        Useful for tabular report scraping when the first column can be used as a item key.
2649     """
2650     def __init__( self, exprs ):
2651         super(Dict,self).__init__( exprs )
2652         self.saveAsList = True
2653
2654     def postParse( self, instring, loc, tokenlist ):
2655         for i,tok in enumerate(tokenlist):
2656             if len(tok) == 0:
2657                 continue
2658             ikey = tok[0]
2659             if isinstance(ikey,int):
2660                 ikey = _ustr(tok[0]).strip()
2661             if len(tok)==1:
2662                 tokenlist[ikey] = _ParseResultsWithOffset("",i)
2663             elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2664                 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
2665             else:
2666                 dictvalue = tok.copy() #ParseResults(i)
2667                 del dictvalue[0]
2668                 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2669                     tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
2670                 else:
2671                     tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
2672
2673         if self.resultsName:
2674             return [ tokenlist ]
2675         else:
2676             return tokenlist
2677
2678
2679 class Suppress(TokenConverter):
2680     """Converter for ignoring the results of a parsed expression."""
2681     def postParse( self, instring, loc, tokenlist ):
2682         return []
2683
2684     def suppress( self ):
2685         return self
2686
2687
2688 class OnlyOnce(object):
2689     """Wrapper for parse actions, to ensure they are only called once."""
2690     def __init__(self, methodCall):
2691         self.callable = ParserElement.normalizeParseActionArgs(methodCall)
2692         self.called = False
2693     def __call__(self,s,l,t):
2694         if not self.called:
2695             results = self.callable(s,l,t)
2696             self.called = True
2697             return results
2698         raise ParseException(s,l,"")
2699     def reset(self):
2700         self.called = False
2701
2702 def traceParseAction(f):
2703     """Decorator for debugging parse actions."""
2704     f = ParserElement.normalizeParseActionArgs(f)
2705     def z(*paArgs):
2706         thisFunc = f.func_name
2707         s,l,t = paArgs[-3:]
2708         if len(paArgs)>3:
2709             thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
2710         sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
2711         try:
2712             ret = f(*paArgs)
2713         except Exception, exc:
2714             sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
2715             raise
2716         sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
2717         return ret
2718     try:
2719         z.__name__ = f.__name__
2720     except AttributeError:
2721         pass
2722     return z
2723
2724 #
2725 # global helpers
2726 #
2727 def delimitedList( expr, delim=",", combine=False ):
2728     """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2729        By default, the list elements and delimiters can have intervening whitespace, and
2730        comments, but this can be overridden by passing 'combine=True' in the constructor.
2731        If combine is set to True, the matching tokens are returned as a single token
2732        string, with the delimiters included; otherwise, the matching tokens are returned
2733        as a list of tokens, with the delimiters suppressed.
2734     """
2735     dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
2736     if combine:
2737         return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
2738     else:
2739         return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
2740
2741 def countedArray( expr ):
2742     """Helper to define a counted list of expressions.
2743        This helper defines a pattern of the form::
2744            integer expr expr expr...
2745        where the leading integer tells how many expr expressions follow.
2746        The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
2747     """
2748     arrayExpr = Forward()
2749     def countFieldParseAction(s,l,t):
2750         n = int(t[0])
2751         arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
2752         return []
2753     return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
2754
2755 def _flatten(L):
2756     if type(L) is not list: return [L]
2757     if L == []: return L
2758     return _flatten(L[0]) + _flatten(L[1:])
2759
2760 def matchPreviousLiteral(expr):
2761     """Helper to define an expression that is indirectly defined from
2762        the tokens matched in a previous expression, that is, it looks
2763        for a 'repeat' of a previous expression.  For example::
2764            first = Word(nums)
2765            second = matchPreviousLiteral(first)
2766            matchExpr = first + ":" + second
2767        will match "1:1", but not "1:2".  Because this matches a
2768        previous literal, will also match the leading "1:1" in "1:10".
2769        If this is not desired, use matchPreviousExpr.
2770        Do *not* use with packrat parsing enabled.
2771     """
2772     rep = Forward()
2773     def copyTokenToRepeater(s,l,t):
2774         if t:
2775             if len(t) == 1:
2776                 rep << t[0]
2777             else:
2778                 # flatten t tokens
2779                 tflat = _flatten(t.asList())
2780                 rep << And( [ Literal(tt) for tt in tflat ] )
2781         else:
2782             rep << Empty()
2783     expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
2784     return rep
2785
2786 def matchPreviousExpr(expr):
2787     """Helper to define an expression that is indirectly defined from
2788        the tokens matched in a previous expression, that is, it looks
2789        for a 'repeat' of a previous expression.  For example::
2790            first = Word(nums)
2791            second = matchPreviousExpr(first)
2792            matchExpr = first + ":" + second
2793        will match "1:1", but not "1:2".  Because this matches by
2794        expressions, will *not* match the leading "1:1" in "1:10";
2795        the expressions are evaluated first, and then compared, so
2796        "1" is compared with "10".
2797        Do *not* use with packrat parsing enabled.
2798     """
2799     rep = Forward()
2800     e2 = expr.copy()
2801     rep << e2
2802     def copyTokenToRepeater(s,l,t):
2803         matchTokens = _flatten(t.asList())
2804         def mustMatchTheseTokens(s,l,t):
2805             theseTokens = _flatten(t.asList())
2806             if  theseTokens != matchTokens:
2807                 raise ParseException("",0,"")
2808         rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
2809     expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
2810     return rep
2811
2812 def _escapeRegexRangeChars(s):
2813     #~  escape these chars: ^-]
2814     for c in r"\^-]":
2815         s = s.replace(c,"\\"+c)
2816     s = s.replace("\n",r"\n")
2817     s = s.replace("\t",r"\t")
2818     return _ustr(s)
2819
2820 def oneOf( strs, caseless=False, useRegex=True ):
2821     """Helper to quickly define a set of alternative Literals, and makes sure to do
2822        longest-first testing when there is a conflict, regardless of the input order,
2823        but returns a MatchFirst for best performance.
2824
2825        Parameters:
2826         - strs - a string of space-delimited literals, or a list of string literals
2827         - caseless - (default=False) - treat all literals as caseless
2828         - useRegex - (default=True) - as an optimization, will generate a Regex
2829           object; otherwise, will generate a MatchFirst object (if caseless=True, or
2830           if creating a Regex raises an exception)
2831     """
2832     if caseless:
2833         isequal = ( lambda a,b: a.upper() == b.upper() )
2834         masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2835         parseElementClass = CaselessLiteral
2836     else:
2837         isequal = ( lambda a,b: a == b )
2838         masks = ( lambda a,b: b.startswith(a) )
2839         parseElementClass = Literal
2840
2841     if isinstance(strs,(list,tuple)):
2842         symbols = strs[:]
2843     elif isinstance(strs,basestring):
2844         symbols = strs.split()
2845     else:
2846         warnings.warn("Invalid argument to oneOf, expected string or list",
2847                 SyntaxWarning, stacklevel=2)
2848
2849     i = 0
2850     while i < len(symbols)-1:
2851         cur = symbols[i]
2852         for j,other in enumerate(symbols[i+1:]):
2853             if ( isequal(other, cur) ):
2854                 del symbols[i+j+1]
2855                 break
2856             elif ( masks(cur, other) ):
2857                 del symbols[i+j+1]
2858                 symbols.insert(i,other)
2859                 cur = other
2860                 break
2861         else:
2862             i += 1
2863
2864     if not caseless and useRegex:
2865         #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2866         try:
2867             if len(symbols)==len("".join(symbols)):
2868                 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
2869             else:
2870                 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
2871         except:
2872             warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
2873                     SyntaxWarning, stacklevel=2)
2874
2875
2876     # last resort, just use MatchFirst
2877     return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2878
2879 def dictOf( key, value ):
2880     """Helper to easily and clearly define a dictionary by specifying the respective patterns
2881        for the key and value.  Takes care of defining the Dict, ZeroOrMore, and Group tokens
2882        in the proper order.  The key pattern can include delimiting markers or punctuation,
2883        as long as they are suppressed, thereby leaving the significant key text.  The value
2884        pattern can include named results, so that the Dict results can include named token
2885        fields.
2886     """
2887     return Dict( ZeroOrMore( Group ( key + value ) ) )
2888
2889 _bslash = "\\"
2890 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2891
2892 # convenience constants for positional expressions
2893 empty       = Empty().setName("empty")
2894 lineStart   = LineStart().setName("lineStart")
2895 lineEnd     = LineEnd().setName("lineEnd")
2896 stringStart = StringStart().setName("stringStart")
2897 stringEnd   = StringEnd().setName("stringEnd")
2898
2899 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2900 _printables_less_backslash = "".join([ c for c in printables if c not in  r"\]" ])
2901 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2902 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2903 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
2904 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
2905 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
2906
2907 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2908
2909 def srange(s):
2910     r"""Helper to easily define string ranges for use in Word construction.  Borrows
2911        syntax from regexp '[]' string range definitions::
2912           srange("[0-9]")   -> "0123456789"
2913           srange("[a-z]")   -> "abcdefghijklmnopqrstuvwxyz"
2914           srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2915        The input string must be enclosed in []'s, and the returned string is the expanded
2916        character set joined into a single string.
2917        The values enclosed in the []'s may be::
2918           a single character
2919           an escaped character with a leading backslash (such as \- or \])
2920           an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2921           an escaped octal character with a leading '\0' (\041, which is a '!' character)
2922           a range of any of the above, separated by a dash ('a-z', etc.)
2923           any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2924     """
2925     try:
2926         return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2927     except:
2928         return ""
2929
2930 def replaceWith(replStr):
2931     """Helper method for common parse actions that simply return a literal value.  Especially
2932        useful when used with transformString().
2933     """
2934     def _replFunc(*args):
2935         return [replStr]
2936     return _replFunc
2937
2938 def removeQuotes(s,l,t):
2939     """Helper parse action for removing quotation marks from parsed quoted strings.
2940        To use, add this parse action to quoted string using::
2941          quotedString.setParseAction( removeQuotes )
2942     """
2943     return t[0][1:-1]
2944
2945 def upcaseTokens(s,l,t):
2946     """Helper parse action to convert tokens to upper case."""
2947     return [ tt.upper() for tt in map(_ustr,t) ]
2948
2949 def downcaseTokens(s,l,t):
2950     """Helper parse action to convert tokens to lower case."""
2951     return [ tt.lower() for tt in map(_ustr,t) ]
2952
2953 def keepOriginalText(s,startLoc,t):
2954     """Helper parse action to preserve original parsed text,
2955        overriding any nested parse actions."""
2956     try:
2957         endloc = getTokensEndLoc()
2958     except ParseException:
2959         raise ParseFatalException, "incorrect usage of keepOriginalText - may only be called as a parse action"
2960     del t[:]
2961     t += ParseResults(s[startLoc:endloc])
2962     return t
2963
2964 def getTokensEndLoc():
2965     """Method to be called from within a parse action to determine the end
2966        location of the parsed tokens."""
2967     import inspect
2968     fstack = inspect.stack()
2969     try:
2970         # search up the stack (through intervening argument normalizers) for correct calling routine
2971         for f in fstack[2:]:
2972             if f[3] == "_parseNoCache":
2973                 endloc = f[0].f_locals["loc"]
2974                 return endloc
2975         else:
2976             raise ParseFatalException, "incorrect usage of getTokensEndLoc - may only be called from within a parse action"
2977     finally:
2978         del fstack
2979
2980 def _makeTags(tagStr, xml):
2981     """Internal helper to construct opening and closing tag expressions, given a tag name"""
2982     if isinstance(tagStr,basestring):
2983         resname = tagStr
2984         tagStr = Keyword(tagStr, caseless=not xml)
2985     else:
2986         resname = tagStr.name
2987
2988     tagAttrName = Word(alphas,alphanums+"_-:")
2989     if (xml):
2990         tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
2991         openTag = Suppress("<") + tagStr + \
2992                 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
2993                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2994     else:
2995         printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
2996         tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
2997         openTag = Suppress("<") + tagStr + \
2998                 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
2999                 Optional( Suppress("=") + tagAttrValue ) ))) + \
3000                 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3001     closeTag = Combine("</" + tagStr + ">")
3002
3003     openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3004     closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3005
3006     return openTag, closeTag
3007
3008 def makeHTMLTags(tagStr):
3009     """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3010     return _makeTags( tagStr, False )
3011
3012 def makeXMLTags(tagStr):
3013     """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3014     return _makeTags( tagStr, True )
3015
3016 def withAttribute(*args,**attrDict):
3017     """Helper to create a validating parse action to be used with start tags created
3018        with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3019        with a required attribute value, to avoid false matches on common tags such as
3020        <TD> or <DIV>.
3021
3022        Call withAttribute with a series of attribute names and values. Specify the list
3023        of filter attributes names and values as:
3024         - keyword arguments, as in (class="Customer",align="right"), or
3025         - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3026        For attribute names with a namespace prefix, you must use the second form.  Attribute
3027        names are matched insensitive to upper/lower case.
3028        """
3029     if args:
3030         attrs = args[:]
3031     else:
3032         attrs = attrDict.items()
3033     attrs = [(k.lower(),v) for k,v in attrs]
3034     def pa(s,l,tokens):
3035         for attrName,attrValue in attrs:
3036             if attrName not in tokens:
3037                 raise ParseException(s,l,"no matching attribute " + attrName)
3038             if tokens[attrName] != attrValue:
3039                 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3040                                             (attrName, tokens[attrName], attrValue))
3041     return pa
3042
3043 opAssoc = _Constants()
3044 opAssoc.LEFT = object()
3045 opAssoc.RIGHT = object()
3046
3047 def _flattenOpPrecTokens(tokens):
3048     if isinstance(tokens,ParseResults):
3049         if len(tokens)==1:
3050             if isinstance(tokens[0],ParseResults):
3051                 return _flattenOpPrecTokens(tokens[0])
3052             else:
3053                 return tokens[0]
3054         return map(_flattenOpPrecTokens,tokens)
3055     return tokens
3056
3057 def operatorPrecedence( baseExpr, opList ):
3058     """Helper method for constructing grammars of expressions made up of
3059        operators working in a precedence hierarchy.  Operators may be unary or
3060        binary, left- or right-associative.  Parse actions can also be attached
3061        to operator expressions.
3062
3063        Parameters:
3064         - baseExpr - expression representing the most basic element for the nested
3065         - opList - list of tuples, one for each operator precedence level in the
3066           expression grammar; each tuple is of the form
3067           (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3068            - opExpr is the pyparsing expression for the operator;
3069               may also be a string, which will be converted to a Literal
3070            - numTerms is the number of terms for this operator (must
3071               be 1 or 2)
3072            - rightLeftAssoc is the indicator whether the operator is
3073               right or left associative, using the pyparsing-defined
3074               constants opAssoc.RIGHT and opAssoc.LEFT.
3075            - parseAction is the parse action to be associated with
3076               expressions matching this operator expression (the
3077               parse action tuple member may be omitted)
3078     """
3079     ret = Forward()
3080     lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3081     for i,operDef in enumerate(opList):
3082         opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3083         thisExpr = Forward()#.setName("expr%d" % i)
3084         if rightLeftAssoc == opAssoc.LEFT:
3085             if arity == 1:
3086                 matchExpr = Group( lastExpr + ZeroOrMore( opExpr ) )
3087             elif arity == 2:
3088                 matchExpr = Group( lastExpr + ZeroOrMore( opExpr + lastExpr ) )
3089             else:
3090                 raise ValueError, "operator must be unary (1) or binary (2)"
3091         elif rightLeftAssoc == opAssoc.RIGHT:
3092             if arity == 1:
3093                 # try to avoid LR with this extra test
3094                 if not isinstance(opExpr, Optional):
3095                     opExpr = Optional(opExpr)
3096                 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3097                 matchExpr |= lastExpr
3098             elif arity == 2:
3099                 matchExpr = Group( lastExpr + ZeroOrMore( opExpr + thisExpr ) )
3100             else:
3101                 raise ValueError, "operator must be unary (1) or binary (2)"
3102         else:
3103             raise ValueError, "operator must indicate right or left associativity"
3104         if pa:
3105             matchExpr.setParseAction( pa )
3106         thisExpr << ( matchExpr )
3107         lastExpr = thisExpr
3108     ret << lastExpr
3109     ret.setParseAction(_flattenOpPrecTokens)
3110     return Group(ret)
3111
3112 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
3113 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
3114 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
3115
3116 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3117     """Helper method for defining nested lists enclosed in opening and closing
3118        delimiters ("(" and ")" are the default).
3119
3120        Parameters:
3121         - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3122         - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3123         - content - expression for items within the nested lists (default=None)
3124         - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3125
3126        If an expression is not provided for the content argument, the nested
3127        expression will capture all whitespace-delimited content between delimiters
3128        as a list of separate values.
3129
3130        Use the ignoreExpr argument to define expressions that may contain
3131        opening or closing characters that should not be treated as opening
3132        or closing characters for nesting, such as quotedString or a comment
3133        expression.  Specify multiple expressions using an Or or MatchFirst.
3134        The default is quotedString, but if no expressions are to be ignored,
3135        then pass None for this argument.
3136     """
3137     if opener == closer:
3138         raise ValueError("opening and closing strings cannot be the same")
3139     if content is None:
3140         if isinstance(opener,basestring) and isinstance(closer,basestring):
3141             content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
3142         else:
3143             raise ValueError("opening and closing arguments must be strings if no content expression is given")
3144     ret = Forward()
3145     if ignoreExpr is not None:
3146         ret << ZeroOrMore( ignoreExpr | content | Group( Suppress(opener) + ret + Suppress(closer) ) )
3147     else:
3148         ret << ZeroOrMore( content | Group( Suppress(opener) + ret + Suppress(closer) ) )
3149     return ret
3150
3151 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3152 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3153
3154 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3155 commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3156 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3157 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3158
3159 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3160 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3161
3162 htmlComment = Regex(r"<!--[\s\S]*?-->")
3163 restOfLine = Regex(r".*").leaveWhitespace()
3164 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3165 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3166
3167 javaStyleComment = cppStyleComment
3168 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3169 _noncomma = "".join( [ c for c in printables if c != "," ] )
3170 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3171                                   Optional( Word(" \t") +
3172                                             ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3173 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3174
3175
3176 if __name__ == "__main__":
3177
3178     def test( teststring ):
3179         print teststring,"->",
3180         try:
3181             tokens = simpleSQL.parseString( teststring )
3182             tokenlist = tokens.asList()
3183             print tokenlist
3184             print "tokens = ",        tokens
3185             print "tokens.columns =", tokens.columns
3186             print "tokens.tables =",  tokens.tables
3187             print tokens.asXML("SQL",True)
3188         except ParseException, err:
3189             print err.line
3190             print " "*(err.column-1) + "^"
3191             print err
3192         print
3193
3194     selectToken    = CaselessLiteral( "select" )
3195     fromToken      = CaselessLiteral( "from" )
3196
3197     ident          = Word( alphas, alphanums + "_$" )
3198     columnName     = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3199     columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3200     tableName      = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3201     tableNameList  = Group( delimitedList( tableName ) )#.setName("tables")
3202     simpleSQL      = ( selectToken + \
3203                      ( '*' | columnNameList ).setResultsName( "columns" ) + \
3204                      fromToken + \
3205                      tableNameList.setResultsName( "tables" ) )
3206
3207     test( "SELECT * from XYZZY, ABC" )
3208     test( "select * from SYS.XYZZY" )
3209     test( "Select A from Sys.dual" )
3210     test( "Select AA,BB,CC from Sys.dual" )
3211     test( "Select A, B, C from Sys.dual" )
3212     test( "Select A, B, C from Sys.dual" )
3213     test( "Xelect A, B, C from Sys.dual" )
3214     test( "Select A, B, C frox Sys.dual" )
3215     test( "Select" )
3216     test( "Select ^^^ frox Sys.dual" )
3217     test( "Select A, B, C from Sys.dual, Table2   " )