adding all of botlist, initial add
[botlist.git] / botlistprojects / botbackup / lib / pyparsing / pyparsing.py
blob528e798bcdd0a2d3ad4da2ec9ce86e6ccd6375bc
1 # module pyparsing.py
3 # Copyright (c) 2003-2007 Paul T. McGuire
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #from __future__ import generators
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
37 from pyparsing import Word, alphas
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
45 The program outputs the following::
47 Hello, World! -> ['Hello', ',', 'World', '!']
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
61 __version__ = "1.4.8"
62 __versionTime__ = "7 October 2007 00:25"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
65 import string
66 from weakref import ref as wkref
67 import copy,sys
68 import warnings
69 import re
70 import sre_constants
71 import xml.sax.saxutils
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
74 def _ustr(obj):
75 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
76 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
77 then < returns the unicode object | encodes it with the default encoding | ... >.
78 """
79 try:
80 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
81 # it won't break any existing code.
82 return str(obj)
84 except UnicodeEncodeError, e:
85 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
86 # state that "The return value must be a string object". However, does a
87 # unicode object (being a subclass of basestring) count as a "string
88 # object"?
89 # If so, then return a unicode object:
90 return unicode(obj)
91 # Else encode it... but how? There are many choices... :)
92 # Replace unprintables with escape codes?
93 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
94 # Replace unprintables with question marks?
95 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
96 # ...
98 def _str2dict(strg):
99 return dict( [(c,0) for c in strg] )
100 #~ return set( [c for c in strg] )
102 class _Constants(object):
103 pass
105 alphas = string.lowercase + string.uppercase
106 nums = string.digits
107 hexnums = nums + "ABCDEFabcdef"
108 alphanums = alphas + nums
110 class ParseBaseException(Exception):
111 """base exception class for all parsing runtime exceptions"""
112 __slots__ = ( "loc","msg","pstr","parserElement" )
113 # Performance tuning: we construct a *lot* of these, so keep this
114 # constructor as small and fast as possible
115 def __init__( self, pstr, loc=0, msg=None, elem=None ):
116 self.loc = loc
117 if msg is None:
118 self.msg = pstr
119 self.pstr = ""
120 else:
121 self.msg = msg
122 self.pstr = pstr
123 self.parserElement = elem
125 def __getattr__( self, aname ):
126 """supported attributes by name are:
127 - lineno - returns the line number of the exception text
128 - col - returns the column number of the exception text
129 - line - returns the line containing the exception text
131 if( aname == "lineno" ):
132 return lineno( self.loc, self.pstr )
133 elif( aname in ("col", "column") ):
134 return col( self.loc, self.pstr )
135 elif( aname == "line" ):
136 return line( self.loc, self.pstr )
137 else:
138 raise AttributeError, aname
140 def __str__( self ):
141 return "%s (at char %d), (line:%d, col:%d)" % \
142 ( self.msg, self.loc, self.lineno, self.column )
143 def __repr__( self ):
144 return _ustr(self)
145 def markInputline( self, markerString = ">!<" ):
146 """Extracts the exception line from the input string, and marks
147 the location of the exception with a special symbol.
149 line_str = self.line
150 line_column = self.column - 1
151 if markerString:
152 line_str = "".join( [line_str[:line_column],
153 markerString, line_str[line_column:]])
154 return line_str.strip()
156 class ParseException(ParseBaseException):
157 """exception thrown when parse expressions don't match class;
158 supported attributes by name are:
159 - lineno - returns the line number of the exception text
160 - col - returns the column number of the exception text
161 - line - returns the line containing the exception text
163 pass
165 class ParseFatalException(ParseBaseException):
166 """user-throwable exception thrown when inconsistent parse content
167 is found; stops all parsing immediately"""
168 pass
170 #~ class ReparseException(ParseBaseException):
171 #~ """Experimental class - parse actions can raise this exception to cause
172 #~ pyparsing to reparse the input string:
173 #~ - with a modified input string, and/or
174 #~ - with a modified start location
175 #~ Set the values of the ReparseException in the constructor, and raise the
176 #~ exception in a parse action to cause pyparsing to use the new string/location.
177 #~ Setting the values as None causes no change to be made.
178 #~ """
179 #~ def __init_( self, newstring, restartLoc ):
180 #~ self.newParseText = newstring
181 #~ self.reparseLoc = restartLoc
183 class RecursiveGrammarException(Exception):
184 """exception thrown by validate() if the grammar could be improperly recursive"""
185 def __init__( self, parseElementList ):
186 self.parseElementTrace = parseElementList
188 def __str__( self ):
189 return "RecursiveGrammarException: %s" % self.parseElementTrace
191 class _ParseResultsWithOffset(object):
192 def __init__(self,p1,p2):
193 self.tup = (p1,p2)
194 def __getitem__(self,i):
195 return self.tup[i]
196 def __repr__(self):
197 return repr(self.tup)
199 class ParseResults(object):
200 """Structured parse results, to provide multiple means of access to the parsed data:
201 - as a list (len(results))
202 - by list index (results[0], results[1], etc.)
203 - by attribute (results.<resultsName>)
205 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
206 def __new__(cls, toklist, name=None, asList=True, modal=True ):
207 if isinstance(toklist, cls):
208 return toklist
209 retobj = object.__new__(cls)
210 retobj.__doinit = True
211 return retobj
213 # Performance tuning: we construct a *lot* of these, so keep this
214 # constructor as small and fast as possible
215 def __init__( self, toklist, name=None, asList=True, modal=True ):
216 if self.__doinit:
217 self.__doinit = False
218 self.__name = None
219 self.__parent = None
220 self.__accumNames = {}
221 if isinstance(toklist, list):
222 self.__toklist = toklist[:]
223 else:
224 self.__toklist = [toklist]
225 self.__tokdict = dict()
227 # this line is related to debugging the asXML bug
228 #~ asList = False
230 if name:
231 if not modal:
232 self.__accumNames[name] = 0
233 if isinstance(name,int):
234 name = _ustr(name) # will always return a str, but use _ustr for consistency
235 self.__name = name
236 if not toklist in (None,'',[]):
237 if isinstance(toklist,basestring):
238 toklist = [ toklist ]
239 if asList:
240 if isinstance(toklist,ParseResults):
241 self[name] = _ParseResultsWithOffset(toklist.copy(),-1)
242 else:
243 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),-1)
244 self[name].__name = name
245 else:
246 try:
247 self[name] = toklist[0]
248 except (KeyError,TypeError):
249 self[name] = toklist
251 def __getitem__( self, i ):
252 if isinstance( i, (int,slice) ):
253 return self.__toklist[i]
254 else:
255 if i not in self.__accumNames:
256 return self.__tokdict[i][-1][0]
257 else:
258 return ParseResults([ v[0] for v in self.__tokdict[i] ])
260 def __setitem__( self, k, v ):
261 if isinstance(v,_ParseResultsWithOffset):
262 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
263 sub = v[0]
264 elif isinstance(k,int):
265 self.__toklist[k] = v
266 sub = v
267 else:
268 self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)]
269 sub = v
270 if isinstance(sub,ParseResults):
271 sub.__parent = wkref(self)
273 def __delitem__( self, i ):
274 if isinstance(i,(int,slice)):
275 del self.__toklist[i]
276 else:
277 del self.__tokdict[i]
279 def __contains__( self, k ):
280 return self.__tokdict.has_key(k)
282 def __len__( self ): return len( self.__toklist )
283 def __bool__(self): return len( self.__toklist ) > 0
284 def __nonzero__( self ): return self.__bool__()
285 def __iter__( self ): return iter( self.__toklist )
286 def keys( self ):
287 """Returns all named result keys."""
288 return self.__tokdict.keys()
290 def items( self ):
291 """Returns all named result keys and values as a list of tuples."""
292 return [(k,self[k]) for k in self.__tokdict.keys()]
294 def values( self ):
295 """Returns all named result values."""
296 return [ v[-1][0] for v in self.__tokdict.values() ]
298 def __getattr__( self, name ):
299 if name not in self.__slots__:
300 if self.__tokdict.has_key( name ):
301 if name not in self.__accumNames:
302 return self.__tokdict[name][-1][0]
303 else:
304 return ParseResults([ v[0] for v in self.__tokdict[name] ])
305 else:
306 return ""
307 return None
309 def __add__( self, other ):
310 ret = self.copy()
311 ret += other
312 return ret
314 def __iadd__( self, other ):
315 if other.__tokdict:
316 offset = len(self.__toklist)
317 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
318 otheritems = other.__tokdict.items()
319 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
320 for (k,vlist) in otheritems for v in vlist]
321 for k,v in otherdictitems:
322 self[k] = v
323 if isinstance(v[0],ParseResults):
324 v[0].__parent = wkref(self)
325 self.__toklist += other.__toklist
326 self.__accumNames.update( other.__accumNames )
327 del other
328 return self
330 def __repr__( self ):
331 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
333 def __str__( self ):
334 out = "["
335 sep = ""
336 for i in self.__toklist:
337 if isinstance(i, ParseResults):
338 out += sep + _ustr(i)
339 else:
340 out += sep + repr(i)
341 sep = ", "
342 out += "]"
343 return out
345 def _asStringList( self, sep='' ):
346 out = []
347 for item in self.__toklist:
348 if out and sep:
349 out.append(sep)
350 if isinstance( item, ParseResults ):
351 out += item._asStringList()
352 else:
353 out.append( _ustr(item) )
354 return out
356 def asList( self ):
357 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
358 out = []
359 for res in self.__toklist:
360 if isinstance(res,ParseResults):
361 out.append( res.asList() )
362 else:
363 out.append( res )
364 return out
366 def asDict( self ):
367 """Returns the named parse results as dictionary."""
368 return dict( self.items() )
370 def copy( self ):
371 """Returns a new copy of a ParseResults object."""
372 ret = ParseResults( self.__toklist )
373 ret.__tokdict = self.__tokdict.copy()
374 ret.__parent = self.__parent
375 ret.__accumNames.update( self.__accumNames )
376 ret.__name = self.__name
377 return ret
379 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
380 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
381 nl = "\n"
382 out = []
383 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
384 for v in vlist ] )
385 nextLevelIndent = indent + " "
387 # collapse out indents if formatting is not desired
388 if not formatted:
389 indent = ""
390 nextLevelIndent = ""
391 nl = ""
393 selfTag = None
394 if doctag is not None:
395 selfTag = doctag
396 else:
397 if self.__name:
398 selfTag = self.__name
400 if not selfTag:
401 if namedItemsOnly:
402 return ""
403 else:
404 selfTag = "ITEM"
406 out += [ nl, indent, "<", selfTag, ">" ]
408 worklist = self.__toklist
409 for i,res in enumerate(worklist):
410 if isinstance(res,ParseResults):
411 if i in namedItems:
412 out += [ res.asXML(namedItems[i],
413 namedItemsOnly and doctag is None,
414 nextLevelIndent,
415 formatted)]
416 else:
417 out += [ res.asXML(None,
418 namedItemsOnly and doctag is None,
419 nextLevelIndent,
420 formatted)]
421 else:
422 # individual token, see if there is a name for it
423 resTag = None
424 if i in namedItems:
425 resTag = namedItems[i]
426 if not resTag:
427 if namedItemsOnly:
428 continue
429 else:
430 resTag = "ITEM"
431 xmlBodyText = xml.sax.saxutils.escape(_ustr(res))
432 out += [ nl, nextLevelIndent, "<", resTag, ">",
433 xmlBodyText,
434 "</", resTag, ">" ]
436 out += [ nl, indent, "</", selfTag, ">" ]
437 return "".join(out)
439 def __lookup(self,sub):
440 for k,vlist in self.__tokdict.items():
441 for v,loc in vlist:
442 if sub is v:
443 return k
444 return None
446 def getName(self):
447 """Returns the results name for this token expression."""
448 if self.__name:
449 return self.__name
450 elif self.__parent:
451 par = self.__parent()
452 if par:
453 return par.__lookup(self)
454 else:
455 return None
456 elif (len(self) == 1 and
457 len(self.__tokdict) == 1 and
458 self.__tokdict.values()[0][0][1] in (0,-1)):
459 return self.__tokdict.keys()[0]
460 else:
461 return None
463 def dump(self,indent='',depth=0):
464 """Diagnostic method for listing out the contents of a ParseResults.
465 Accepts an optional indent argument so that this string can be embedded
466 in a nested display of other data."""
467 out = []
468 out.append( indent+_ustr(self.asList()) )
469 keys = self.items()
470 keys.sort()
471 for k,v in keys:
472 if out:
473 out.append('\n')
474 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
475 if isinstance(v,ParseResults):
476 if v.keys():
477 #~ out.append('\n')
478 out.append( v.dump(indent,depth+1) )
479 #~ out.append('\n')
480 else:
481 out.append(_ustr(v))
482 else:
483 out.append(_ustr(v))
484 #~ out.append('\n')
485 return "".join(out)
487 # add support for pickle protocol
488 def __getstate__(self):
489 return ( self.__toklist,
490 ( self.__tokdict.copy(),
491 self.__parent is not None and self.__parent() or None,
492 self.__accumNames,
493 self.__name ) )
495 def __setstate__(self,state):
496 self.__toklist = state[0]
497 self.__tokdict, \
498 par, \
499 inAccumNames, \
500 self.__name = state[1]
501 self.__accumNames = {}
502 self.__accumNames.update(inAccumNames)
503 if par is not None:
504 self.__parent = wkref(par)
505 else:
506 self.__parent = None
509 def col (loc,strg):
510 """Returns current column within a string, counting newlines as line separators.
511 The first column is number 1.
513 Note: the default parsing behavior is to expand tabs in the input string
514 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
515 on parsing strings containing <TAB>s, and suggested methods to maintain a
516 consistent view of the parsed string, the parse location, and line and column
517 positions within the parsed string.
519 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
521 def lineno(loc,strg):
522 """Returns current line number within a string, counting newlines as line separators.
523 The first line is number 1.
525 Note: the default parsing behavior is to expand tabs in the input string
526 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
527 on parsing strings containing <TAB>s, and suggested methods to maintain a
528 consistent view of the parsed string, the parse location, and line and column
529 positions within the parsed string.
531 return strg.count("\n",0,loc) + 1
533 def line( loc, strg ):
534 """Returns the line of text containing loc within a string, counting newlines as line separators.
536 lastCR = strg.rfind("\n", 0, loc)
537 nextCR = strg.find("\n", loc)
538 if nextCR > 0:
539 return strg[lastCR+1:nextCR]
540 else:
541 return strg[lastCR+1:]
543 def _defaultStartDebugAction( instring, loc, expr ):
544 print "Match",_ustr(expr),"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
546 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
547 print "Matched",_ustr(expr),"->",toks.asList()
549 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
550 print "Exception raised:", _ustr(exc)
552 def nullDebugAction(*args):
553 """'Do-nothing' debug action, to suppress debugging output during parsing."""
554 pass
556 class ParserElement(object):
557 """Abstract base level parser element class."""
558 DEFAULT_WHITE_CHARS = " \n\t\r"
560 def setDefaultWhitespaceChars( chars ):
561 """Overrides the default whitespace chars
563 ParserElement.DEFAULT_WHITE_CHARS = chars
564 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
566 def __init__( self, savelist=False ):
567 self.parseAction = list()
568 self.failAction = None
569 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
570 self.strRepr = None
571 self.resultsName = None
572 self.saveAsList = savelist
573 self.skipWhitespace = True
574 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
575 self.copyDefaultWhiteChars = True
576 self.mayReturnEmpty = False # used when checking for left-recursion
577 self.keepTabs = False
578 self.ignoreExprs = list()
579 self.debug = False
580 self.streamlined = False
581 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
582 self.errmsg = ""
583 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
584 self.debugActions = ( None, None, None ) #custom debug actions
585 self.re = None
586 self.callPreparse = True # used to avoid redundant calls to preParse
587 self.callDuringTry = False
589 def copy( self ):
590 """Make a copy of this ParserElement. Useful for defining different parse actions
591 for the same parsing pattern, using copies of the original parse element."""
592 cpy = copy.copy( self )
593 cpy.parseAction = self.parseAction[:]
594 cpy.ignoreExprs = self.ignoreExprs[:]
595 if self.copyDefaultWhiteChars:
596 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
597 return cpy
599 def setName( self, name ):
600 """Define name for this expression, for use in debugging."""
601 self.name = name
602 self.errmsg = "Expected " + self.name
603 if hasattr(self,"exception"):
604 self.exception.msg = self.errmsg
605 return self
607 def setResultsName( self, name, listAllMatches=False ):
608 """Define name for referencing matching tokens as a nested attribute
609 of the returned parse results.
610 NOTE: this returns a *copy* of the original ParserElement object;
611 this is so that the client can define a basic element, such as an
612 integer, and reference it in multiple places with different names.
614 newself = self.copy()
615 newself.resultsName = name
616 newself.modalResults = not listAllMatches
617 return newself
619 def setBreak(self,breakFlag = True):
620 """Method to invoke the Python pdb debugger when this element is
621 about to be parsed. Set breakFlag to True to enable, False to
622 disable.
624 if breakFlag:
625 _parseMethod = self._parse
626 def breaker(instring, loc, doActions=True, callPreParse=True):
627 import pdb
628 pdb.set_trace()
629 _parseMethod( instring, loc, doActions, callPreParse )
630 breaker._originalParseMethod = _parseMethod
631 self._parse = breaker
632 else:
633 if hasattr(self._parse,"_originalParseMethod"):
634 self._parse = self._parse._originalParseMethod
635 return self
637 def normalizeParseActionArgs( f ):
638 """Internal method used to decorate parse actions that take fewer than 3 arguments,
639 so that all parse actions can be called as f(s,l,t)."""
640 STAR_ARGS = 4
642 try:
643 restore = None
644 if isinstance(f,type):
645 restore = f
646 f = f.__init__
647 if f.func_code.co_flags & STAR_ARGS:
648 return f
649 numargs = f.func_code.co_argcount
650 if hasattr(f,"im_self"):
651 numargs -= 1
652 if restore:
653 f = restore
654 except AttributeError:
655 try:
656 # not a function, must be a callable object, get info from the
657 # im_func binding of its bound __call__ method
658 if f.__call__.im_func.func_code.co_flags & STAR_ARGS:
659 return f
660 numargs = f.__call__.im_func.func_code.co_argcount
661 if hasattr(f.__call__,"im_self"):
662 numargs -= 1
663 except AttributeError:
664 # not a bound method, get info directly from __call__ method
665 if f.__call__.func_code.co_flags & STAR_ARGS:
666 return f
667 numargs = f.__call__.func_code.co_argcount
668 if hasattr(f.__call__,"im_self"):
669 numargs -= 1
671 #~ print "adding function %s with %d args" % (f.func_name,numargs)
672 if numargs == 3:
673 return f
674 else:
675 if numargs == 2:
676 def tmp(s,l,t):
677 return f(l,t)
678 elif numargs == 1:
679 def tmp(s,l,t):
680 return f(t)
681 else: #~ numargs == 0:
682 def tmp(s,l,t):
683 return f()
684 try:
685 tmp.__name__ = f.__name__
686 except AttributeError:
687 # no need for special handling if attribute doesnt exist
688 pass
689 try:
690 tmp.__doc__ = f.__doc__
691 except AttributeError:
692 # no need for special handling if attribute doesnt exist
693 pass
694 try:
695 tmp.__dict__.update(f.__dict__)
696 except AttributeError:
697 # no need for special handling if attribute doesnt exist
698 pass
699 return tmp
700 normalizeParseActionArgs = staticmethod(normalizeParseActionArgs)
702 def setParseAction( self, *fns, **kwargs ):
703 """Define action to perform when successfully matching parse element definition.
704 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
705 fn(loc,toks), fn(toks), or just fn(), where:
706 - s = the original string being parsed (see note below)
707 - loc = the location of the matching substring
708 - toks = a list of the matched tokens, packaged as a ParseResults object
709 If the functions in fns modify the tokens, they can return them as the return
710 value from fn, and the modified list of tokens will replace the original.
711 Otherwise, fn does not need to return any value.
713 Note: the default parsing behavior is to expand tabs in the input string
714 before starting the parsing process. See L{I{parseString}<parseString>} for more information
715 on parsing strings containing <TAB>s, and suggested methods to maintain a
716 consistent view of the parsed string, the parse location, and line and column
717 positions within the parsed string.
719 self.parseAction = map(self.normalizeParseActionArgs, list(fns))
720 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
721 return self
723 def addParseAction( self, *fns, **kwargs ):
724 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
725 self.parseAction += map(self.normalizeParseActionArgs, list(fns))
726 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
727 return self
729 def setFailAction( self, fn ):
730 """Define action to perform if parsing fails at this expression.
731 Fail acton fn is a callable function that takes the arguments
732 fn(s,loc,expr,err) where:
733 - s = string being parsed
734 - loc = location where expression match was attempted and failed
735 - expr = the parse expression that failed
736 - err = the exception thrown
737 The function returns no value. It may throw ParseFatalException
738 if it is desired to stop parsing immediately."""
739 self.failAction = fn
740 return self
742 def skipIgnorables( self, instring, loc ):
743 exprsFound = True
744 while exprsFound:
745 exprsFound = False
746 for e in self.ignoreExprs:
747 try:
748 while 1:
749 loc,dummy = e._parse( instring, loc )
750 exprsFound = True
751 except ParseException:
752 pass
753 return loc
755 def preParse( self, instring, loc ):
756 if self.ignoreExprs:
757 loc = self.skipIgnorables( instring, loc )
759 if self.skipWhitespace:
760 wt = self.whiteChars
761 instrlen = len(instring)
762 while loc < instrlen and instring[loc] in wt:
763 loc += 1
765 return loc
767 def parseImpl( self, instring, loc, doActions=True ):
768 return loc, []
770 def postParse( self, instring, loc, tokenlist ):
771 return tokenlist
773 #~ @profile
774 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
775 debugging = ( self.debug ) #and doActions )
777 if debugging or self.failAction:
778 #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
779 if (self.debugActions[0] ):
780 self.debugActions[0]( instring, loc, self )
781 if callPreParse and self.callPreparse:
782 preloc = self.preParse( instring, loc )
783 else:
784 preloc = loc
785 tokensStart = loc
786 try:
787 try:
788 loc,tokens = self.parseImpl( instring, preloc, doActions )
789 except IndexError:
790 raise ParseException( instring, len(instring), self.errmsg, self )
791 except ParseException, err:
792 #~ print "Exception raised:", err
793 if self.debugActions[2]:
794 self.debugActions[2]( instring, tokensStart, self, err )
795 if self.failAction:
796 self.failAction( instring, tokensStart, self, err )
797 raise
798 else:
799 if callPreParse and self.callPreparse:
800 preloc = self.preParse( instring, loc )
801 else:
802 preloc = loc
803 tokensStart = loc
804 if self.mayIndexError or loc >= len(instring):
805 try:
806 loc,tokens = self.parseImpl( instring, preloc, doActions )
807 except IndexError:
808 raise ParseException( instring, len(instring), self.errmsg, self )
809 else:
810 loc,tokens = self.parseImpl( instring, preloc, doActions )
812 tokens = self.postParse( instring, loc, tokens )
814 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
815 if self.parseAction and (doActions or self.callDuringTry):
816 if debugging:
817 try:
818 for fn in self.parseAction:
819 tokens = fn( instring, tokensStart, retTokens )
820 if tokens is not None:
821 retTokens = ParseResults( tokens,
822 self.resultsName,
823 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
824 modal=self.modalResults )
825 except ParseException, err:
826 #~ print "Exception raised in user parse action:", err
827 if (self.debugActions[2] ):
828 self.debugActions[2]( instring, tokensStart, self, err )
829 raise
830 else:
831 for fn in self.parseAction:
832 tokens = fn( instring, tokensStart, retTokens )
833 if tokens is not None:
834 retTokens = ParseResults( tokens,
835 self.resultsName,
836 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
837 modal=self.modalResults )
839 if debugging:
840 #~ print "Matched",self,"->",retTokens.asList()
841 if (self.debugActions[1] ):
842 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
844 return loc, retTokens
846 def tryParse( self, instring, loc ):
847 return self._parse( instring, loc, doActions=False )[0]
849 # this method gets repeatedly called during backtracking with the same arguments -
850 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
851 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
852 lookup = (self,instring,loc,callPreParse,doActions)
853 if lookup in ParserElement._exprArgCache:
854 value = ParserElement._exprArgCache[ lookup ]
855 if isinstance(value,Exception):
856 if isinstance(value,ParseBaseException):
857 value.loc = loc
858 raise value
859 return (value[0],value[1].copy())
860 else:
861 try:
862 value = self._parseNoCache( instring, loc, doActions, callPreParse )
863 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
864 return value
865 except ParseBaseException, pe:
866 ParserElement._exprArgCache[ lookup ] = pe
867 raise
869 _parse = _parseNoCache
871 # argument cache for optimizing repeated calls when backtracking through recursive expressions
872 _exprArgCache = {}
873 def resetCache():
874 ParserElement._exprArgCache.clear()
875 resetCache = staticmethod(resetCache)
877 _packratEnabled = False
878 def enablePackrat():
879 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
880 Repeated parse attempts at the same string location (which happens
881 often in many complex grammars) can immediately return a cached value,
882 instead of re-executing parsing/validating code. Memoizing is done of
883 both valid results and parsing exceptions.
885 This speedup may break existing programs that use parse actions that
886 have side-effects. For this reason, packrat parsing is disabled when
887 you first import pyparsing. To activate the packrat feature, your
888 program must call the class method ParserElement.enablePackrat(). If
889 your program uses psyco to "compile as you go", you must call
890 enablePackrat before calling psyco.full(). If you do not do this,
891 Python will crash. For best results, call enablePackrat() immediately
892 after importing pyparsing.
894 if not ParserElement._packratEnabled:
895 ParserElement._packratEnabled = True
896 ParserElement._parse = ParserElement._parseCache
897 enablePackrat = staticmethod(enablePackrat)
899 def parseString( self, instring ):
900 """Execute the parse expression with the given string.
901 This is the main interface to the client code, once the complete
902 expression has been built.
904 Note: parseString implicitly calls expandtabs() on the input string,
905 in order to report proper column numbers in parse actions.
906 If the input string contains tabs and
907 the grammar uses parse actions that use the loc argument to index into the
908 string being parsed, you can ensure you have a consistent view of the input
909 string by:
910 - calling parseWithTabs on your grammar before calling parseString
911 (see L{I{parseWithTabs}<parseWithTabs>})
912 - define your parse action using the full (s,loc,toks) signature, and
913 reference the input string using the parse action's s argument
914 - explictly expand the tabs in your input string before calling
915 parseString
917 ParserElement.resetCache()
918 if not self.streamlined:
919 self.streamline()
920 #~ self.saveAsList = True
921 for e in self.ignoreExprs:
922 e.streamline()
923 if self.keepTabs:
924 loc, tokens = self._parse( instring, 0 )
925 else:
926 loc, tokens = self._parse( instring.expandtabs(), 0 )
927 return tokens
929 def scanString( self, instring, maxMatches=sys.maxint ):
930 """Scan the input string for expression matches. Each match will return the
931 matching tokens, start location, and end location. May be called with optional
932 maxMatches argument, to clip scanning after 'n' matches are found.
934 Note that the start and end locations are reported relative to the string
935 being parsed. See L{I{parseString}<parseString>} for more information on parsing
936 strings with embedded tabs."""
937 if not self.streamlined:
938 self.streamline()
939 for e in self.ignoreExprs:
940 e.streamline()
942 if not self.keepTabs:
943 instring = _ustr(instring).expandtabs()
944 instrlen = len(instring)
945 loc = 0
946 preparseFn = self.preParse
947 parseFn = self._parse
948 ParserElement.resetCache()
949 matches = 0
950 while loc <= instrlen and matches < maxMatches:
951 try:
952 preloc = preparseFn( instring, loc )
953 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
954 except ParseException:
955 loc = preloc+1
956 else:
957 matches += 1
958 yield tokens, preloc, nextLoc
959 loc = nextLoc
961 def transformString( self, instring ):
962 """Extension to scanString, to modify matching text with modified tokens that may
963 be returned from a parse action. To use transformString, define a grammar and
964 attach a parse action to it that modifies the returned token list.
965 Invoking transformString() on a target string will then scan for matches,
966 and replace the matched text patterns according to the logic in the parse
967 action. transformString() returns the resulting transformed string."""
968 out = []
969 lastE = 0
970 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
971 # keep string locs straight between transformString and scanString
972 self.keepTabs = True
973 for t,s,e in self.scanString( instring ):
974 out.append( instring[lastE:s] )
975 if t:
976 if isinstance(t,ParseResults):
977 out += t.asList()
978 elif isinstance(t,list):
979 out += t
980 else:
981 out.append(t)
982 lastE = e
983 out.append(instring[lastE:])
984 return "".join(map(_ustr,out))
986 def searchString( self, instring, maxMatches=sys.maxint ):
987 """Another extension to scanString, simplifying the access to the tokens found
988 to match the given parse expression. May be called with optional
989 maxMatches argument, to clip searching after 'n' matches are found.
991 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
993 def __add__(self, other ):
994 """Implementation of + operator - returns And"""
995 if isinstance( other, basestring ):
996 other = Literal( other )
997 if not isinstance( other, ParserElement ):
998 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
999 SyntaxWarning, stacklevel=2)
1000 return And( [ self, other ] )
1002 def __radd__(self, other ):
1003 """Implementation of += operator"""
1004 if isinstance( other, basestring ):
1005 other = Literal( other )
1006 if not isinstance( other, ParserElement ):
1007 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1008 SyntaxWarning, stacklevel=2)
1009 return other + self
1011 def __or__(self, other ):
1012 """Implementation of | operator - returns MatchFirst"""
1013 if isinstance( other, basestring ):
1014 other = Literal( other )
1015 if not isinstance( other, ParserElement ):
1016 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1017 SyntaxWarning, stacklevel=2)
1018 return MatchFirst( [ self, other ] )
1020 def __ror__(self, other ):
1021 """Implementation of |= operator"""
1022 if isinstance( other, basestring ):
1023 other = Literal( other )
1024 if not isinstance( other, ParserElement ):
1025 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1026 SyntaxWarning, stacklevel=2)
1027 return other | self
1029 def __xor__(self, other ):
1030 """Implementation of ^ operator - returns Or"""
1031 if isinstance( other, basestring ):
1032 other = Literal( other )
1033 if not isinstance( other, ParserElement ):
1034 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1035 SyntaxWarning, stacklevel=2)
1036 return Or( [ self, other ] )
1038 def __rxor__(self, other ):
1039 """Implementation of ^= operator"""
1040 if isinstance( other, basestring ):
1041 other = Literal( other )
1042 if not isinstance( other, ParserElement ):
1043 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1044 SyntaxWarning, stacklevel=2)
1045 return other ^ self
1047 def __and__(self, other ):
1048 """Implementation of & operator - returns Each"""
1049 if isinstance( other, basestring ):
1050 other = Literal( other )
1051 if not isinstance( other, ParserElement ):
1052 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1053 SyntaxWarning, stacklevel=2)
1054 return Each( [ self, other ] )
1056 def __rand__(self, other ):
1057 """Implementation of right-& operator"""
1058 if isinstance( other, basestring ):
1059 other = Literal( other )
1060 if not isinstance( other, ParserElement ):
1061 warnings.warn("Cannot add element of type %s to ParserElement" % type(other),
1062 SyntaxWarning, stacklevel=2)
1063 return other & self
1065 def __invert__( self ):
1066 """Implementation of ~ operator - returns NotAny"""
1067 return NotAny( self )
1069 def __call__(self, name):
1070 """Shortcut for setResultsName, with listAllMatches=default::
1071 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1072 could be written as::
1073 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1075 return self.setResultsName(name)
1077 def suppress( self ):
1078 """Suppresses the output of this ParserElement; useful to keep punctuation from
1079 cluttering up returned output.
1081 return Suppress( self )
1083 def leaveWhitespace( self ):
1084 """Disables the skipping of whitespace before matching the characters in the
1085 ParserElement's defined pattern. This is normally only used internally by
1086 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1088 self.skipWhitespace = False
1089 return self
1091 def setWhitespaceChars( self, chars ):
1092 """Overrides the default whitespace chars
1094 self.skipWhitespace = True
1095 self.whiteChars = chars
1096 self.copyDefaultWhiteChars = False
1097 return self
1099 def parseWithTabs( self ):
1100 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1101 Must be called before parseString when the input grammar contains elements that
1102 match <TAB> characters."""
1103 self.keepTabs = True
1104 return self
1106 def ignore( self, other ):
1107 """Define expression to be ignored (e.g., comments) while doing pattern
1108 matching; may be called repeatedly, to define multiple comment or other
1109 ignorable patterns.
1111 if isinstance( other, Suppress ):
1112 if other not in self.ignoreExprs:
1113 self.ignoreExprs.append( other )
1114 else:
1115 self.ignoreExprs.append( Suppress( other ) )
1116 return self
1118 def setDebugActions( self, startAction, successAction, exceptionAction ):
1119 """Enable display of debugging messages while doing pattern matching."""
1120 self.debugActions = (startAction or _defaultStartDebugAction,
1121 successAction or _defaultSuccessDebugAction,
1122 exceptionAction or _defaultExceptionDebugAction)
1123 self.debug = True
1124 return self
1126 def setDebug( self, flag=True ):
1127 """Enable display of debugging messages while doing pattern matching.
1128 Set flag to True to enable, False to disable."""
1129 if flag:
1130 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1131 else:
1132 self.debug = False
1133 return self
1135 def __str__( self ):
1136 return self.name
1138 def __repr__( self ):
1139 return _ustr(self)
1141 def streamline( self ):
1142 self.streamlined = True
1143 self.strRepr = None
1144 return self
1146 def checkRecursion( self, parseElementList ):
1147 pass
1149 def validate( self, validateTrace=[] ):
1150 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1151 self.checkRecursion( [] )
1153 def parseFile( self, file_or_filename ):
1154 """Execute the parse expression on the given file or filename.
1155 If a filename is specified (instead of a file object),
1156 the entire file is opened, read, and closed before parsing.
1158 try:
1159 file_contents = file_or_filename.read()
1160 except AttributeError:
1161 f = open(file_or_filename, "rb")
1162 file_contents = f.read()
1163 f.close()
1164 return self.parseString(file_contents)
1166 def getException(self):
1167 return ParseException("",0,self.errmsg,self)
1169 def __getattr__(self,aname):
1170 if aname == "myException":
1171 self.myException = ret = self.getException();
1172 return ret;
1173 else:
1174 raise AttributeError, "no such attribute " + aname
1176 class Token(ParserElement):
1177 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1178 def __init__( self ):
1179 super(Token,self).__init__( savelist=False )
1180 #self.myException = ParseException("",0,"",self)
1182 def setName(self, name):
1183 s = super(Token,self).setName(name)
1184 self.errmsg = "Expected " + self.name
1185 #s.myException.msg = self.errmsg
1186 return s
1189 class Empty(Token):
1190 """An empty token, will always match."""
1191 def __init__( self ):
1192 super(Empty,self).__init__()
1193 self.name = "Empty"
1194 self.mayReturnEmpty = True
1195 self.mayIndexError = False
1198 class NoMatch(Token):
1199 """A token that will never match."""
1200 def __init__( self ):
1201 super(NoMatch,self).__init__()
1202 self.name = "NoMatch"
1203 self.mayReturnEmpty = True
1204 self.mayIndexError = False
1205 self.errmsg = "Unmatchable token"
1206 #self.myException.msg = self.errmsg
1208 def parseImpl( self, instring, loc, doActions=True ):
1209 exc = self.myException
1210 exc.loc = loc
1211 exc.pstr = instring
1212 raise exc
1215 class Literal(Token):
1216 """Token to exactly match a specified string."""
1217 def __init__( self, matchString ):
1218 super(Literal,self).__init__()
1219 self.match = matchString
1220 self.matchLen = len(matchString)
1221 try:
1222 self.firstMatchChar = matchString[0]
1223 except IndexError:
1224 warnings.warn("null string passed to Literal; use Empty() instead",
1225 SyntaxWarning, stacklevel=2)
1226 self.__class__ = Empty
1227 self.name = '"%s"' % _ustr(self.match)
1228 self.errmsg = "Expected " + self.name
1229 self.mayReturnEmpty = False
1230 #self.myException.msg = self.errmsg
1231 self.mayIndexError = False
1233 # Performance tuning: this routine gets called a *lot*
1234 # if this is a single character match string and the first character matches,
1235 # short-circuit as quickly as possible, and avoid calling startswith
1236 #~ @profile
1237 def parseImpl( self, instring, loc, doActions=True ):
1238 if (instring[loc] == self.firstMatchChar and
1239 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1240 return loc+self.matchLen, self.match
1241 #~ raise ParseException( instring, loc, self.errmsg )
1242 exc = self.myException
1243 exc.loc = loc
1244 exc.pstr = instring
1245 raise exc
1247 class Keyword(Token):
1248 """Token to exactly match a specified string as a keyword, that is, it must be
1249 immediately followed by a non-keyword character. Compare with Literal::
1250 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1251 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1252 Accepts two optional constructor arguments in addition to the keyword string:
1253 identChars is a string of characters that would be valid identifier characters,
1254 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1255 matching, default is False.
1257 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1259 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1260 super(Keyword,self).__init__()
1261 self.match = matchString
1262 self.matchLen = len(matchString)
1263 try:
1264 self.firstMatchChar = matchString[0]
1265 except IndexError:
1266 warnings.warn("null string passed to Keyword; use Empty() instead",
1267 SyntaxWarning, stacklevel=2)
1268 self.name = '"%s"' % self.match
1269 self.errmsg = "Expected " + self.name
1270 self.mayReturnEmpty = False
1271 #self.myException.msg = self.errmsg
1272 self.mayIndexError = False
1273 self.caseless = caseless
1274 if caseless:
1275 self.caselessmatch = matchString.upper()
1276 identChars = identChars.upper()
1277 self.identChars = _str2dict(identChars)
1279 def parseImpl( self, instring, loc, doActions=True ):
1280 if self.caseless:
1281 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1282 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1283 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1284 return loc+self.matchLen, self.match
1285 else:
1286 if (instring[loc] == self.firstMatchChar and
1287 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1288 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1289 (loc == 0 or instring[loc-1] not in self.identChars) ):
1290 return loc+self.matchLen, self.match
1291 #~ raise ParseException( instring, loc, self.errmsg )
1292 exc = self.myException
1293 exc.loc = loc
1294 exc.pstr = instring
1295 raise exc
1297 def copy(self):
1298 c = super(Keyword,self).copy()
1299 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1300 return c
1302 def setDefaultKeywordChars( chars ):
1303 """Overrides the default Keyword chars
1305 Keyword.DEFAULT_KEYWORD_CHARS = chars
1306 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1309 class CaselessLiteral(Literal):
1310 """Token to match a specified string, ignoring case of letters.
1311 Note: the matched results will always be in the case of the given
1312 match string, NOT the case of the input text.
1314 def __init__( self, matchString ):
1315 super(CaselessLiteral,self).__init__( matchString.upper() )
1316 # Preserve the defining literal.
1317 self.returnString = matchString
1318 self.name = "'%s'" % self.returnString
1319 self.errmsg = "Expected " + self.name
1320 #self.myException.msg = self.errmsg
1322 def parseImpl( self, instring, loc, doActions=True ):
1323 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1324 return loc+self.matchLen, self.returnString
1325 #~ raise ParseException( instring, loc, self.errmsg )
1326 exc = self.myException
1327 exc.loc = loc
1328 exc.pstr = instring
1329 raise exc
1331 class CaselessKeyword(Keyword):
1332 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1333 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1335 def parseImpl( self, instring, loc, doActions=True ):
1336 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1337 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1338 return loc+self.matchLen, self.match
1339 #~ raise ParseException( instring, loc, self.errmsg )
1340 exc = self.myException
1341 exc.loc = loc
1342 exc.pstr = instring
1343 raise exc
1345 class Word(Token):
1346 """Token for matching words composed of allowed character sets.
1347 Defined with string containing all allowed initial characters,
1348 an optional string containing allowed body characters (if omitted,
1349 defaults to the initial character set), and an optional minimum,
1350 maximum, and/or exact length. The default value for min is 1 (a
1351 minimum value < 1 is not valid); the default values for max and exact
1352 are 0, meaning no maximum or exact length restriction.
1354 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1355 super(Word,self).__init__()
1356 self.initCharsOrig = initChars
1357 self.initChars = _str2dict(initChars)
1358 if bodyChars :
1359 self.bodyCharsOrig = bodyChars
1360 self.bodyChars = _str2dict(bodyChars)
1361 else:
1362 self.bodyCharsOrig = initChars
1363 self.bodyChars = _str2dict(initChars)
1365 self.maxSpecified = max > 0
1367 if min < 1:
1368 raise ValueError, "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted"
1370 self.minLen = min
1372 if max > 0:
1373 self.maxLen = max
1374 else:
1375 self.maxLen = sys.maxint
1377 if exact > 0:
1378 self.maxLen = exact
1379 self.minLen = exact
1381 self.name = _ustr(self)
1382 self.errmsg = "Expected " + self.name
1383 #self.myException.msg = self.errmsg
1384 self.mayIndexError = False
1385 self.asKeyword = asKeyword
1387 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1388 if self.bodyCharsOrig == self.initCharsOrig:
1389 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1390 elif len(self.bodyCharsOrig) == 1:
1391 self.reString = "%s[%s]*" % \
1392 (re.escape(self.initCharsOrig),
1393 _escapeRegexRangeChars(self.bodyCharsOrig),)
1394 else:
1395 self.reString = "[%s][%s]*" % \
1396 (_escapeRegexRangeChars(self.initCharsOrig),
1397 _escapeRegexRangeChars(self.bodyCharsOrig),)
1398 if self.asKeyword:
1399 self.reString = r"\b"+self.reString+r"\b"
1400 try:
1401 self.re = re.compile( self.reString )
1402 except:
1403 self.re = None
1405 def parseImpl( self, instring, loc, doActions=True ):
1406 if self.re:
1407 result = self.re.match(instring,loc)
1408 if not result:
1409 exc = self.myException
1410 exc.loc = loc
1411 exc.pstr = instring
1412 raise exc
1414 loc = result.end()
1415 return loc,result.group()
1417 if not(instring[ loc ] in self.initChars):
1418 #~ raise ParseException( instring, loc, self.errmsg )
1419 exc = self.myException
1420 exc.loc = loc
1421 exc.pstr = instring
1422 raise exc
1423 start = loc
1424 loc += 1
1425 instrlen = len(instring)
1426 bodychars = self.bodyChars
1427 maxloc = start + self.maxLen
1428 maxloc = min( maxloc, instrlen )
1429 while loc < maxloc and instring[loc] in bodychars:
1430 loc += 1
1432 throwException = False
1433 if loc - start < self.minLen:
1434 throwException = True
1435 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1436 throwException = True
1437 if self.asKeyword:
1438 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1439 throwException = True
1441 if throwException:
1442 #~ raise ParseException( instring, loc, self.errmsg )
1443 exc = self.myException
1444 exc.loc = loc
1445 exc.pstr = instring
1446 raise exc
1448 return loc, instring[start:loc]
1450 def __str__( self ):
1451 try:
1452 return super(Word,self).__str__()
1453 except:
1454 pass
1457 if self.strRepr is None:
1459 def charsAsStr(s):
1460 if len(s)>4:
1461 return s[:4]+"..."
1462 else:
1463 return s
1465 if ( self.initCharsOrig != self.bodyCharsOrig ):
1466 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1467 else:
1468 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1470 return self.strRepr
1473 class Regex(Token):
1474 """Token for matching strings that match a given regular expression.
1475 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1477 def __init__( self, pattern, flags=0):
1478 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1479 super(Regex,self).__init__()
1481 if len(pattern) == 0:
1482 warnings.warn("null string passed to Regex; use Empty() instead",
1483 SyntaxWarning, stacklevel=2)
1485 self.pattern = pattern
1486 self.flags = flags
1488 try:
1489 self.re = re.compile(self.pattern, self.flags)
1490 self.reString = self.pattern
1491 except sre_constants.error,e:
1492 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1493 SyntaxWarning, stacklevel=2)
1494 raise
1496 self.name = _ustr(self)
1497 self.errmsg = "Expected " + self.name
1498 #self.myException.msg = self.errmsg
1499 self.mayIndexError = False
1500 self.mayReturnEmpty = True
1502 def parseImpl( self, instring, loc, doActions=True ):
1503 result = self.re.match(instring,loc)
1504 if not result:
1505 exc = self.myException
1506 exc.loc = loc
1507 exc.pstr = instring
1508 raise exc
1510 loc = result.end()
1511 d = result.groupdict()
1512 ret = ParseResults(result.group())
1513 if d:
1514 for k in d.keys():
1515 ret[k] = d[k]
1516 return loc,ret
1518 def __str__( self ):
1519 try:
1520 return super(Regex,self).__str__()
1521 except:
1522 pass
1524 if self.strRepr is None:
1525 self.strRepr = "Re:(%s)" % repr(self.pattern)
1527 return self.strRepr
1530 class QuotedString(Token):
1531 """Token for matching strings that are delimited by quoting characters.
1533 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1535 Defined with the following parameters:
1536 - quoteChar - string of one or more characters defining the quote delimiting string
1537 - escChar - character to escape quotes, typically backslash (default=None)
1538 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1539 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1540 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1541 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1543 super(QuotedString,self).__init__()
1545 # remove white space from quote chars - wont work anyway
1546 quoteChar = quoteChar.strip()
1547 if len(quoteChar) == 0:
1548 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1549 raise SyntaxError()
1551 if endQuoteChar is None:
1552 endQuoteChar = quoteChar
1553 else:
1554 endQuoteChar = endQuoteChar.strip()
1555 if len(endQuoteChar) == 0:
1556 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1557 raise SyntaxError()
1559 self.quoteChar = quoteChar
1560 self.quoteCharLen = len(quoteChar)
1561 self.firstQuoteChar = quoteChar[0]
1562 self.endQuoteChar = endQuoteChar
1563 self.endQuoteCharLen = len(endQuoteChar)
1564 self.escChar = escChar
1565 self.escQuote = escQuote
1566 self.unquoteResults = unquoteResults
1568 if multiline:
1569 self.flags = re.MULTILINE | re.DOTALL
1570 self.pattern = r'%s(?:[^%s%s]' % \
1571 ( re.escape(self.quoteChar),
1572 _escapeRegexRangeChars(self.endQuoteChar[0]),
1573 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1574 else:
1575 self.flags = 0
1576 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1577 ( re.escape(self.quoteChar),
1578 _escapeRegexRangeChars(self.endQuoteChar[0]),
1579 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1580 if len(self.endQuoteChar) > 1:
1581 self.pattern += (
1582 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1583 _escapeRegexRangeChars(self.endQuoteChar[i]))
1584 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1586 if escQuote:
1587 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1588 if escChar:
1589 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1590 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1591 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1593 try:
1594 self.re = re.compile(self.pattern, self.flags)
1595 self.reString = self.pattern
1596 except sre_constants.error,e:
1597 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1598 SyntaxWarning, stacklevel=2)
1599 raise
1601 self.name = _ustr(self)
1602 self.errmsg = "Expected " + self.name
1603 #self.myException.msg = self.errmsg
1604 self.mayIndexError = False
1605 self.mayReturnEmpty = True
1607 def parseImpl( self, instring, loc, doActions=True ):
1608 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1609 if not result:
1610 exc = self.myException
1611 exc.loc = loc
1612 exc.pstr = instring
1613 raise exc
1615 loc = result.end()
1616 ret = result.group()
1618 if self.unquoteResults:
1620 # strip off quotes
1621 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1623 if isinstance(ret,basestring):
1624 # replace escaped characters
1625 if self.escChar:
1626 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1628 # replace escaped quotes
1629 if self.escQuote:
1630 ret = ret.replace(self.escQuote, self.endQuoteChar)
1632 return loc, ret
1634 def __str__( self ):
1635 try:
1636 return super(QuotedString,self).__str__()
1637 except:
1638 pass
1640 if self.strRepr is None:
1641 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1643 return self.strRepr
1646 class CharsNotIn(Token):
1647 """Token for matching words composed of characters *not* in a given set.
1648 Defined with string containing all disallowed characters, and an optional
1649 minimum, maximum, and/or exact length. The default value for min is 1 (a
1650 minimum value < 1 is not valid); the default values for max and exact
1651 are 0, meaning no maximum or exact length restriction.
1653 def __init__( self, notChars, min=1, max=0, exact=0 ):
1654 super(CharsNotIn,self).__init__()
1655 self.skipWhitespace = False
1656 self.notChars = notChars
1658 if min < 1:
1659 raise ValueError, "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted"
1661 self.minLen = min
1663 if max > 0:
1664 self.maxLen = max
1665 else:
1666 self.maxLen = sys.maxint
1668 if exact > 0:
1669 self.maxLen = exact
1670 self.minLen = exact
1672 self.name = _ustr(self)
1673 self.errmsg = "Expected " + self.name
1674 self.mayReturnEmpty = ( self.minLen == 0 )
1675 #self.myException.msg = self.errmsg
1676 self.mayIndexError = False
1678 def parseImpl( self, instring, loc, doActions=True ):
1679 if instring[loc] in self.notChars:
1680 #~ raise ParseException( instring, loc, self.errmsg )
1681 exc = self.myException
1682 exc.loc = loc
1683 exc.pstr = instring
1684 raise exc
1686 start = loc
1687 loc += 1
1688 notchars = self.notChars
1689 maxlen = min( start+self.maxLen, len(instring) )
1690 while loc < maxlen and \
1691 (instring[loc] not in notchars):
1692 loc += 1
1694 if loc - start < self.minLen:
1695 #~ raise ParseException( instring, loc, self.errmsg )
1696 exc = self.myException
1697 exc.loc = loc
1698 exc.pstr = instring
1699 raise exc
1701 return loc, instring[start:loc]
1703 def __str__( self ):
1704 try:
1705 return super(CharsNotIn, self).__str__()
1706 except:
1707 pass
1709 if self.strRepr is None:
1710 if len(self.notChars) > 4:
1711 self.strRepr = "!W:(%s...)" % self.notChars[:4]
1712 else:
1713 self.strRepr = "!W:(%s)" % self.notChars
1715 return self.strRepr
1717 class White(Token):
1718 """Special matching class for matching whitespace. Normally, whitespace is ignored
1719 by pyparsing grammars. This class is included when some whitespace structures
1720 are significant. Define with a string containing the whitespace characters to be
1721 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1722 as defined for the Word class."""
1723 whiteStrs = {
1724 " " : "<SPC>",
1725 "\t": "<TAB>",
1726 "\n": "<LF>",
1727 "\r": "<CR>",
1728 "\f": "<FF>",
1730 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1731 super(White,self).__init__()
1732 self.matchWhite = ws
1733 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
1734 #~ self.leaveWhitespace()
1735 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
1736 self.mayReturnEmpty = True
1737 self.errmsg = "Expected " + self.name
1738 #self.myException.msg = self.errmsg
1740 self.minLen = min
1742 if max > 0:
1743 self.maxLen = max
1744 else:
1745 self.maxLen = sys.maxint
1747 if exact > 0:
1748 self.maxLen = exact
1749 self.minLen = exact
1751 def parseImpl( self, instring, loc, doActions=True ):
1752 if not(instring[ loc ] in self.matchWhite):
1753 #~ raise ParseException( instring, loc, self.errmsg )
1754 exc = self.myException
1755 exc.loc = loc
1756 exc.pstr = instring
1757 raise exc
1758 start = loc
1759 loc += 1
1760 maxloc = start + self.maxLen
1761 maxloc = min( maxloc, len(instring) )
1762 while loc < maxloc and instring[loc] in self.matchWhite:
1763 loc += 1
1765 if loc - start < self.minLen:
1766 #~ raise ParseException( instring, loc, self.errmsg )
1767 exc = self.myException
1768 exc.loc = loc
1769 exc.pstr = instring
1770 raise exc
1772 return loc, instring[start:loc]
1775 class _PositionToken(Token):
1776 def __init__( self ):
1777 super(_PositionToken,self).__init__()
1778 self.name=self.__class__.__name__
1779 self.mayReturnEmpty = True
1780 self.mayIndexError = False
1782 class GoToColumn(_PositionToken):
1783 """Token to advance to a specific column of input text; useful for tabular report scraping."""
1784 def __init__( self, colno ):
1785 super(GoToColumn,self).__init__()
1786 self.col = colno
1788 def preParse( self, instring, loc ):
1789 if col(loc,instring) != self.col:
1790 instrlen = len(instring)
1791 if self.ignoreExprs:
1792 loc = self.skipIgnorables( instring, loc )
1793 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
1794 loc += 1
1795 return loc
1797 def parseImpl( self, instring, loc, doActions=True ):
1798 thiscol = col( loc, instring )
1799 if thiscol > self.col:
1800 raise ParseException( instring, loc, "Text not in expected column", self )
1801 newloc = loc + self.col - thiscol
1802 ret = instring[ loc: newloc ]
1803 return newloc, ret
1805 class LineStart(_PositionToken):
1806 """Matches if current position is at the beginning of a line within the parse string"""
1807 def __init__( self ):
1808 super(LineStart,self).__init__()
1809 self.setWhitespaceChars( " \t" )
1810 self.errmsg = "Expected start of line"
1811 #self.myException.msg = self.errmsg
1813 def preParse( self, instring, loc ):
1814 preloc = super(LineStart,self).preParse(instring,loc)
1815 if instring[preloc] == "\n":
1816 loc += 1
1817 return loc
1819 def parseImpl( self, instring, loc, doActions=True ):
1820 if not( loc==0 or
1821 (loc == self.preParse( instring, 0 )) or
1822 (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
1823 #~ raise ParseException( instring, loc, "Expected start of line" )
1824 exc = self.myException
1825 exc.loc = loc
1826 exc.pstr = instring
1827 raise exc
1828 return loc, []
1830 class LineEnd(_PositionToken):
1831 """Matches if current position is at the end of a line within the parse string"""
1832 def __init__( self ):
1833 super(LineEnd,self).__init__()
1834 self.setWhitespaceChars( " \t" )
1835 self.errmsg = "Expected end of line"
1836 #self.myException.msg = self.errmsg
1838 def parseImpl( self, instring, loc, doActions=True ):
1839 if loc<len(instring):
1840 if instring[loc] == "\n":
1841 return loc+1, "\n"
1842 else:
1843 #~ raise ParseException( instring, loc, "Expected end of line" )
1844 exc = self.myException
1845 exc.loc = loc
1846 exc.pstr = instring
1847 raise exc
1848 elif loc == len(instring):
1849 return loc+1, []
1850 else:
1851 exc = self.myException
1852 exc.loc = loc
1853 exc.pstr = instring
1854 raise exc
1856 class StringStart(_PositionToken):
1857 """Matches if current position is at the beginning of the parse string"""
1858 def __init__( self ):
1859 super(StringStart,self).__init__()
1860 self.errmsg = "Expected start of text"
1861 #self.myException.msg = self.errmsg
1863 def parseImpl( self, instring, loc, doActions=True ):
1864 if loc != 0:
1865 # see if entire string up to here is just whitespace and ignoreables
1866 if loc != self.preParse( instring, 0 ):
1867 #~ raise ParseException( instring, loc, "Expected start of text" )
1868 exc = self.myException
1869 exc.loc = loc
1870 exc.pstr = instring
1871 raise exc
1872 return loc, []
1874 class StringEnd(_PositionToken):
1875 """Matches if current position is at the end of the parse string"""
1876 def __init__( self ):
1877 super(StringEnd,self).__init__()
1878 self.errmsg = "Expected end of text"
1879 #self.myException.msg = self.errmsg
1881 def parseImpl( self, instring, loc, doActions=True ):
1882 if loc < len(instring):
1883 #~ raise ParseException( instring, loc, "Expected end of text" )
1884 exc = self.myException
1885 exc.loc = loc
1886 exc.pstr = instring
1887 raise exc
1888 elif loc == len(instring):
1889 return loc+1, []
1890 elif loc > len(instring):
1891 return loc, []
1892 else:
1893 exc = self.myException
1894 exc.loc = loc
1895 exc.pstr = instring
1896 raise exc
1899 class ParseExpression(ParserElement):
1900 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1901 def __init__( self, exprs, savelist = False ):
1902 super(ParseExpression,self).__init__(savelist)
1903 if isinstance( exprs, list ):
1904 self.exprs = exprs
1905 elif isinstance( exprs, basestring ):
1906 self.exprs = [ Literal( exprs ) ]
1907 else:
1908 self.exprs = [ exprs ]
1909 self.callPreparse = False
1911 def __getitem__( self, i ):
1912 return self.exprs[i]
1914 def append( self, other ):
1915 self.exprs.append( other )
1916 self.strRepr = None
1917 return self
1919 def leaveWhitespace( self ):
1920 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1921 all contained expressions."""
1922 self.skipWhitespace = False
1923 self.exprs = [ e.copy() for e in self.exprs ]
1924 for e in self.exprs:
1925 e.leaveWhitespace()
1926 return self
1928 def ignore( self, other ):
1929 if isinstance( other, Suppress ):
1930 if other not in self.ignoreExprs:
1931 super( ParseExpression, self).ignore( other )
1932 for e in self.exprs:
1933 e.ignore( self.ignoreExprs[-1] )
1934 else:
1935 super( ParseExpression, self).ignore( other )
1936 for e in self.exprs:
1937 e.ignore( self.ignoreExprs[-1] )
1938 return self
1940 def __str__( self ):
1941 try:
1942 return super(ParseExpression,self).__str__()
1943 except:
1944 pass
1946 if self.strRepr is None:
1947 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
1948 return self.strRepr
1950 def streamline( self ):
1951 super(ParseExpression,self).streamline()
1953 for e in self.exprs:
1954 e.streamline()
1956 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1957 # but only if there are no parse actions or resultsNames on the nested And's
1958 # (likewise for Or's and MatchFirst's)
1959 if ( len(self.exprs) == 2 ):
1960 other = self.exprs[0]
1961 if ( isinstance( other, self.__class__ ) and
1962 not(other.parseAction) and
1963 other.resultsName is None and
1964 not other.debug ):
1965 self.exprs = other.exprs[:] + [ self.exprs[1] ]
1966 self.strRepr = None
1967 self.mayReturnEmpty |= other.mayReturnEmpty
1968 self.mayIndexError |= other.mayIndexError
1970 other = self.exprs[-1]
1971 if ( isinstance( other, self.__class__ ) and
1972 not(other.parseAction) and
1973 other.resultsName is None and
1974 not other.debug ):
1975 self.exprs = self.exprs[:-1] + other.exprs[:]
1976 self.strRepr = None
1977 self.mayReturnEmpty |= other.mayReturnEmpty
1978 self.mayIndexError |= other.mayIndexError
1980 return self
1982 def setResultsName( self, name, listAllMatches=False ):
1983 ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
1984 return ret
1986 def validate( self, validateTrace=[] ):
1987 tmp = validateTrace[:]+[self]
1988 for e in self.exprs:
1989 e.validate(tmp)
1990 self.checkRecursion( [] )
1992 class And(ParseExpression):
1993 """Requires all given ParseExpressions to be found in the given order.
1994 Expressions may be separated by whitespace.
1995 May be constructed using the '+' operator.
1997 def __init__( self, exprs, savelist = True ):
1998 super(And,self).__init__(exprs, savelist)
1999 self.mayReturnEmpty = True
2000 for e in self.exprs:
2001 if not e.mayReturnEmpty:
2002 self.mayReturnEmpty = False
2003 break
2004 self.setWhitespaceChars( exprs[0].whiteChars )
2005 self.skipWhitespace = exprs[0].skipWhitespace
2006 self.callPreparse = True
2008 def parseImpl( self, instring, loc, doActions=True ):
2009 # pass False as last arg to _parse for first element, since we already
2010 # pre-parsed the string as part of our And pre-parsing
2011 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2012 for e in self.exprs[1:]:
2013 loc, exprtokens = e._parse( instring, loc, doActions )
2014 if exprtokens or exprtokens.keys():
2015 resultlist += exprtokens
2016 return loc, resultlist
2018 def __iadd__(self, other ):
2019 if isinstance( other, basestring ):
2020 other = Literal( other )
2021 return self.append( other ) #And( [ self, other ] )
2023 def checkRecursion( self, parseElementList ):
2024 subRecCheckList = parseElementList[:] + [ self ]
2025 for e in self.exprs:
2026 e.checkRecursion( subRecCheckList )
2027 if not e.mayReturnEmpty:
2028 break
2030 def __str__( self ):
2031 if hasattr(self,"name"):
2032 return self.name
2034 if self.strRepr is None:
2035 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2037 return self.strRepr
2040 class Or(ParseExpression):
2041 """Requires that at least one ParseExpression is found.
2042 If two expressions match, the expression that matches the longest string will be used.
2043 May be constructed using the '^' operator.
2045 def __init__( self, exprs, savelist = False ):
2046 super(Or,self).__init__(exprs, savelist)
2047 self.mayReturnEmpty = False
2048 for e in self.exprs:
2049 if e.mayReturnEmpty:
2050 self.mayReturnEmpty = True
2051 break
2053 def parseImpl( self, instring, loc, doActions=True ):
2054 maxExcLoc = -1
2055 maxMatchLoc = -1
2056 for e in self.exprs:
2057 try:
2058 loc2 = e.tryParse( instring, loc )
2059 except ParseException, err:
2060 if err.loc > maxExcLoc:
2061 maxException = err
2062 maxExcLoc = err.loc
2063 except IndexError, err:
2064 if len(instring) > maxExcLoc:
2065 maxException = ParseException(instring,len(instring),e.errmsg,self)
2066 maxExcLoc = len(instring)
2067 else:
2068 if loc2 > maxMatchLoc:
2069 maxMatchLoc = loc2
2070 maxMatchExp = e
2072 if maxMatchLoc < 0:
2073 if self.exprs:
2074 raise maxException
2075 else:
2076 raise ParseException(instring, loc, "no defined alternatives to match", self)
2078 return maxMatchExp._parse( instring, loc, doActions )
2080 def __ixor__(self, other ):
2081 if isinstance( other, basestring ):
2082 other = Literal( other )
2083 return self.append( other ) #Or( [ self, other ] )
2085 def __str__( self ):
2086 if hasattr(self,"name"):
2087 return self.name
2089 if self.strRepr is None:
2090 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2092 return self.strRepr
2094 def checkRecursion( self, parseElementList ):
2095 subRecCheckList = parseElementList[:] + [ self ]
2096 for e in self.exprs:
2097 e.checkRecursion( subRecCheckList )
2100 class MatchFirst(ParseExpression):
2101 """Requires that at least one ParseExpression is found.
2102 If two expressions match, the first one listed is the one that will match.
2103 May be constructed using the '|' operator.
2105 def __init__( self, exprs, savelist = False ):
2106 super(MatchFirst,self).__init__(exprs, savelist)
2107 if exprs:
2108 self.mayReturnEmpty = False
2109 for e in self.exprs:
2110 if e.mayReturnEmpty:
2111 self.mayReturnEmpty = True
2112 break
2113 else:
2114 self.mayReturnEmpty = True
2116 def parseImpl( self, instring, loc, doActions=True ):
2117 maxExcLoc = -1
2118 for e in self.exprs:
2119 try:
2120 ret = e._parse( instring, loc, doActions )
2121 return ret
2122 except ParseException, err:
2123 if err.loc > maxExcLoc:
2124 maxException = err
2125 maxExcLoc = err.loc
2126 except IndexError, err:
2127 if len(instring) > maxExcLoc:
2128 maxException = ParseException(instring,len(instring),e.errmsg,self)
2129 maxExcLoc = len(instring)
2131 # only got here if no expression matched, raise exception for match that made it the furthest
2132 else:
2133 if self.exprs:
2134 raise maxException
2135 else:
2136 raise ParseException(instring, loc, "no defined alternatives to match", self)
2138 def __ior__(self, other ):
2139 if isinstance( other, basestring ):
2140 other = Literal( other )
2141 return self.append( other ) #MatchFirst( [ self, other ] )
2143 def __str__( self ):
2144 if hasattr(self,"name"):
2145 return self.name
2147 if self.strRepr is None:
2148 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2150 return self.strRepr
2152 def checkRecursion( self, parseElementList ):
2153 subRecCheckList = parseElementList[:] + [ self ]
2154 for e in self.exprs:
2155 e.checkRecursion( subRecCheckList )
2158 class Each(ParseExpression):
2159 """Requires all given ParseExpressions to be found, but in any order.
2160 Expressions may be separated by whitespace.
2161 May be constructed using the '&' operator.
2163 def __init__( self, exprs, savelist = True ):
2164 super(Each,self).__init__(exprs, savelist)
2165 self.mayReturnEmpty = True
2166 for e in self.exprs:
2167 if not e.mayReturnEmpty:
2168 self.mayReturnEmpty = False
2169 break
2170 self.skipWhitespace = True
2171 self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ]
2172 self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ]
2173 self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ]
2174 self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2175 self.required += self.multirequired
2177 def parseImpl( self, instring, loc, doActions=True ):
2178 tmpLoc = loc
2179 tmpReqd = self.required[:]
2180 tmpOpt = self.optionals[:]
2181 matchOrder = []
2183 keepMatching = True
2184 while keepMatching:
2185 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2186 failed = []
2187 for e in tmpExprs:
2188 try:
2189 tmpLoc = e.tryParse( instring, tmpLoc )
2190 except ParseException:
2191 failed.append(e)
2192 else:
2193 matchOrder.append(e)
2194 if e in tmpReqd:
2195 tmpReqd.remove(e)
2196 elif e in tmpOpt:
2197 tmpOpt.remove(e)
2198 if len(failed) == len(tmpExprs):
2199 keepMatching = False
2201 if tmpReqd:
2202 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2203 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2205 resultlist = []
2206 for e in matchOrder:
2207 loc,results = e._parse(instring,loc,doActions)
2208 resultlist.append(results)
2210 finalResults = ParseResults([])
2211 for r in resultlist:
2212 dups = {}
2213 for k in r.keys():
2214 if k in finalResults.keys():
2215 tmp = ParseResults(finalResults[k])
2216 tmp += ParseResults(r[k])
2217 dups[k] = tmp
2218 finalResults += ParseResults(r)
2219 for k,v in dups.items():
2220 finalResults[k] = v
2221 return loc, finalResults
2223 def __str__( self ):
2224 if hasattr(self,"name"):
2225 return self.name
2227 if self.strRepr is None:
2228 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2230 return self.strRepr
2232 def checkRecursion( self, parseElementList ):
2233 subRecCheckList = parseElementList[:] + [ self ]
2234 for e in self.exprs:
2235 e.checkRecursion( subRecCheckList )
2238 class ParseElementEnhance(ParserElement):
2239 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2240 def __init__( self, expr, savelist=False ):
2241 super(ParseElementEnhance,self).__init__(savelist)
2242 if isinstance( expr, basestring ):
2243 expr = Literal(expr)
2244 self.expr = expr
2245 self.strRepr = None
2246 if expr is not None:
2247 self.mayIndexError = expr.mayIndexError
2248 self.mayReturnEmpty = expr.mayReturnEmpty
2249 self.setWhitespaceChars( expr.whiteChars )
2250 self.skipWhitespace = expr.skipWhitespace
2251 self.saveAsList = expr.saveAsList
2252 self.callPreparse = expr.callPreparse
2254 def parseImpl( self, instring, loc, doActions=True ):
2255 if self.expr is not None:
2256 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2257 else:
2258 raise ParseException("",loc,self.errmsg,self)
2260 def leaveWhitespace( self ):
2261 self.skipWhitespace = False
2262 self.expr = self.expr.copy()
2263 if self.expr is not None:
2264 self.expr.leaveWhitespace()
2265 return self
2267 def ignore( self, other ):
2268 if isinstance( other, Suppress ):
2269 if other not in self.ignoreExprs:
2270 super( ParseElementEnhance, self).ignore( other )
2271 if self.expr is not None:
2272 self.expr.ignore( self.ignoreExprs[-1] )
2273 else:
2274 super( ParseElementEnhance, self).ignore( other )
2275 if self.expr is not None:
2276 self.expr.ignore( self.ignoreExprs[-1] )
2277 return self
2279 def streamline( self ):
2280 super(ParseElementEnhance,self).streamline()
2281 if self.expr is not None:
2282 self.expr.streamline()
2283 return self
2285 def checkRecursion( self, parseElementList ):
2286 if self in parseElementList:
2287 raise RecursiveGrammarException( parseElementList+[self] )
2288 subRecCheckList = parseElementList[:] + [ self ]
2289 if self.expr is not None:
2290 self.expr.checkRecursion( subRecCheckList )
2292 def validate( self, validateTrace=[] ):
2293 tmp = validateTrace[:]+[self]
2294 if self.expr is not None:
2295 self.expr.validate(tmp)
2296 self.checkRecursion( [] )
2298 def __str__( self ):
2299 try:
2300 return super(ParseElementEnhance,self).__str__()
2301 except:
2302 pass
2304 if self.strRepr is None and self.expr is not None:
2305 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2306 return self.strRepr
2309 class FollowedBy(ParseElementEnhance):
2310 """Lookahead matching of the given parse expression. FollowedBy
2311 does *not* advance the parsing position within the input string, it only
2312 verifies that the specified parse expression matches at the current
2313 position. FollowedBy always returns a null token list."""
2314 def __init__( self, expr ):
2315 super(FollowedBy,self).__init__(expr)
2316 self.mayReturnEmpty = True
2318 def parseImpl( self, instring, loc, doActions=True ):
2319 self.expr.tryParse( instring, loc )
2320 return loc, []
2323 class NotAny(ParseElementEnhance):
2324 """Lookahead to disallow matching with the given parse expression. NotAny
2325 does *not* advance the parsing position within the input string, it only
2326 verifies that the specified parse expression does *not* match at the current
2327 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2328 always returns a null token list. May be constructed using the '~' operator."""
2329 def __init__( self, expr ):
2330 super(NotAny,self).__init__(expr)
2331 #~ self.leaveWhitespace()
2332 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2333 self.mayReturnEmpty = True
2334 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2335 #self.myException = ParseException("",0,self.errmsg,self)
2337 def parseImpl( self, instring, loc, doActions=True ):
2338 try:
2339 self.expr.tryParse( instring, loc )
2340 except (ParseException,IndexError):
2341 pass
2342 else:
2343 #~ raise ParseException(instring, loc, self.errmsg )
2344 exc = self.myException
2345 exc.loc = loc
2346 exc.pstr = instring
2347 raise exc
2348 return loc, []
2350 def __str__( self ):
2351 if hasattr(self,"name"):
2352 return self.name
2354 if self.strRepr is None:
2355 self.strRepr = "~{" + _ustr(self.expr) + "}"
2357 return self.strRepr
2360 class ZeroOrMore(ParseElementEnhance):
2361 """Optional repetition of zero or more of the given expression."""
2362 def __init__( self, expr ):
2363 super(ZeroOrMore,self).__init__(expr)
2364 self.mayReturnEmpty = True
2366 def parseImpl( self, instring, loc, doActions=True ):
2367 tokens = []
2368 try:
2369 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2370 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2371 while 1:
2372 if hasIgnoreExprs:
2373 preloc = self.skipIgnorables( instring, loc )
2374 else:
2375 preloc = loc
2376 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2377 if tmptokens or tmptokens.keys():
2378 tokens += tmptokens
2379 except (ParseException,IndexError):
2380 pass
2382 return loc, tokens
2384 def __str__( self ):
2385 if hasattr(self,"name"):
2386 return self.name
2388 if self.strRepr is None:
2389 self.strRepr = "[" + _ustr(self.expr) + "]..."
2391 return self.strRepr
2393 def setResultsName( self, name, listAllMatches=False ):
2394 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2395 ret.saveAsList = True
2396 return ret
2399 class OneOrMore(ParseElementEnhance):
2400 """Repetition of one or more of the given expression."""
2401 def parseImpl( self, instring, loc, doActions=True ):
2402 # must be at least one
2403 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2404 try:
2405 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2406 while 1:
2407 if hasIgnoreExprs:
2408 preloc = self.skipIgnorables( instring, loc )
2409 else:
2410 preloc = loc
2411 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2412 if tmptokens or tmptokens.keys():
2413 tokens += tmptokens
2414 except (ParseException,IndexError):
2415 pass
2417 return loc, tokens
2419 def __str__( self ):
2420 if hasattr(self,"name"):
2421 return self.name
2423 if self.strRepr is None:
2424 self.strRepr = "{" + _ustr(self.expr) + "}..."
2426 return self.strRepr
2428 def setResultsName( self, name, listAllMatches=False ):
2429 ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2430 ret.saveAsList = True
2431 return ret
2433 class _NullToken(object):
2434 def __bool__(self):
2435 return False
2436 def __str__(self):
2437 return ""
2439 _optionalNotMatched = _NullToken()
2440 class Optional(ParseElementEnhance):
2441 """Optional matching of the given expression.
2442 A default return string can also be specified, if the optional expression
2443 is not found.
2445 def __init__( self, exprs, default=_optionalNotMatched ):
2446 super(Optional,self).__init__( exprs, savelist=False )
2447 self.defaultValue = default
2448 self.mayReturnEmpty = True
2450 def parseImpl( self, instring, loc, doActions=True ):
2451 try:
2452 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2453 except (ParseException,IndexError):
2454 if self.defaultValue is not _optionalNotMatched:
2455 tokens = [ self.defaultValue ]
2456 else:
2457 tokens = []
2458 return loc, tokens
2460 def __str__( self ):
2461 if hasattr(self,"name"):
2462 return self.name
2464 if self.strRepr is None:
2465 self.strRepr = "[" + _ustr(self.expr) + "]"
2467 return self.strRepr
2470 class SkipTo(ParseElementEnhance):
2471 """Token for skipping over all undefined text until the matched expression is found.
2472 If include is set to true, the matched expression is also consumed. The ignore
2473 argument is used to define grammars (typically quoted strings and comments) that
2474 might contain false matches.
2476 def __init__( self, other, include=False, ignore=None ):
2477 super( SkipTo, self ).__init__( other )
2478 if ignore is not None:
2479 self.expr = self.expr.copy()
2480 self.expr.ignore(ignore)
2481 self.mayReturnEmpty = True
2482 self.mayIndexError = False
2483 self.includeMatch = include
2484 self.asList = False
2485 self.errmsg = "No match found for "+_ustr(self.expr)
2486 #self.myException = ParseException("",0,self.errmsg,self)
2488 def parseImpl( self, instring, loc, doActions=True ):
2489 startLoc = loc
2490 instrlen = len(instring)
2491 expr = self.expr
2492 while loc <= instrlen:
2493 try:
2494 loc = expr.skipIgnorables( instring, loc )
2495 expr._parse( instring, loc, doActions=False, callPreParse=False )
2496 if self.includeMatch:
2497 skipText = instring[startLoc:loc]
2498 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2499 if mat:
2500 skipRes = ParseResults( skipText )
2501 skipRes += mat
2502 return loc, [ skipRes ]
2503 else:
2504 return loc, [ skipText ]
2505 else:
2506 return loc, [ instring[startLoc:loc] ]
2507 except (ParseException,IndexError):
2508 loc += 1
2509 exc = self.myException
2510 exc.loc = loc
2511 exc.pstr = instring
2512 raise exc
2514 class Forward(ParseElementEnhance):
2515 """Forward declaration of an expression to be defined later -
2516 used for recursive grammars, such as algebraic infix notation.
2517 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2519 Note: take care when assigning to Forward not to overlook precedence of operators.
2520 Specifically, '|' has a lower precedence than '<<', so that::
2521 fwdExpr << a | b | c
2522 will actually be evaluated as::
2523 (fwdExpr << a) | b | c
2524 thereby leaving b and c out as parseable alternatives. It is recommended that you
2525 explicitly group the values inserted into the Forward::
2526 fwdExpr << (a | b | c)
2528 def __init__( self, other=None ):
2529 super(Forward,self).__init__( other, savelist=False )
2531 def __lshift__( self, other ):
2532 if isinstance( other, basestring ):
2533 other = Literal(other)
2534 self.expr = other
2535 self.mayReturnEmpty = other.mayReturnEmpty
2536 self.strRepr = None
2537 self.mayIndexError = self.expr.mayIndexError
2538 self.mayReturnEmpty = self.expr.mayReturnEmpty
2539 self.setWhitespaceChars( self.expr.whiteChars )
2540 self.skipWhitespace = self.expr.skipWhitespace
2541 self.saveAsList = self.expr.saveAsList
2542 return self
2544 def leaveWhitespace( self ):
2545 self.skipWhitespace = False
2546 return self
2548 def streamline( self ):
2549 if not self.streamlined:
2550 self.streamlined = True
2551 if self.expr is not None:
2552 self.expr.streamline()
2553 return self
2555 def validate( self, validateTrace=[] ):
2556 if self not in validateTrace:
2557 tmp = validateTrace[:]+[self]
2558 if self.expr is not None:
2559 self.expr.validate(tmp)
2560 self.checkRecursion([])
2562 def __str__( self ):
2563 if hasattr(self,"name"):
2564 return self.name
2566 self.__class__ = _ForwardNoRecurse
2567 try:
2568 if self.expr is not None:
2569 retString = _ustr(self.expr)
2570 else:
2571 retString = "None"
2572 finally:
2573 self.__class__ = Forward
2574 return "Forward: "+retString
2576 def copy(self):
2577 if self.expr is not None:
2578 return super(Forward,self).copy()
2579 else:
2580 ret = Forward()
2581 ret << self
2582 return ret
2584 class _ForwardNoRecurse(Forward):
2585 def __str__( self ):
2586 return "..."
2588 class TokenConverter(ParseElementEnhance):
2589 """Abstract subclass of ParseExpression, for converting parsed results."""
2590 def __init__( self, expr, savelist=False ):
2591 super(TokenConverter,self).__init__( expr )#, savelist )
2592 self.saveAsList = False
2594 class Upcase(TokenConverter):
2595 """Converter to upper case all matching tokens."""
2596 def __init__(self, *args):
2597 super(Upcase,self).__init__(*args)
2598 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2599 DeprecationWarning,stacklevel=2)
2601 def postParse( self, instring, loc, tokenlist ):
2602 return map( string.upper, tokenlist )
2605 class Combine(TokenConverter):
2606 """Converter to concatenate all matching tokens to a single string.
2607 By default, the matching patterns must also be contiguous in the input string;
2608 this can be disabled by specifying 'adjacent=False' in the constructor.
2610 def __init__( self, expr, joinString="", adjacent=True ):
2611 super(Combine,self).__init__( expr )
2612 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2613 if adjacent:
2614 self.leaveWhitespace()
2615 self.adjacent = adjacent
2616 self.skipWhitespace = True
2617 self.joinString = joinString
2619 def ignore( self, other ):
2620 if self.adjacent:
2621 ParserElement.ignore(self, other)
2622 else:
2623 super( Combine, self).ignore( other )
2624 return self
2626 def postParse( self, instring, loc, tokenlist ):
2627 retToks = tokenlist.copy()
2628 del retToks[:]
2629 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
2631 if self.resultsName and len(retToks.keys())>0:
2632 return [ retToks ]
2633 else:
2634 return retToks
2636 class Group(TokenConverter):
2637 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2638 def __init__( self, expr ):
2639 super(Group,self).__init__( expr )
2640 self.saveAsList = True
2642 def postParse( self, instring, loc, tokenlist ):
2643 return [ tokenlist ]
2645 class Dict(TokenConverter):
2646 """Converter to return a repetitive expression as a list, but also as a dictionary.
2647 Each element can also be referenced using the first token in the expression as its key.
2648 Useful for tabular report scraping when the first column can be used as a item key.
2650 def __init__( self, exprs ):
2651 super(Dict,self).__init__( exprs )
2652 self.saveAsList = True
2654 def postParse( self, instring, loc, tokenlist ):
2655 for i,tok in enumerate(tokenlist):
2656 if len(tok) == 0:
2657 continue
2658 ikey = tok[0]
2659 if isinstance(ikey,int):
2660 ikey = _ustr(tok[0]).strip()
2661 if len(tok)==1:
2662 tokenlist[ikey] = _ParseResultsWithOffset("",i)
2663 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
2664 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
2665 else:
2666 dictvalue = tok.copy() #ParseResults(i)
2667 del dictvalue[0]
2668 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
2669 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
2670 else:
2671 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
2673 if self.resultsName:
2674 return [ tokenlist ]
2675 else:
2676 return tokenlist
2679 class Suppress(TokenConverter):
2680 """Converter for ignoring the results of a parsed expression."""
2681 def postParse( self, instring, loc, tokenlist ):
2682 return []
2684 def suppress( self ):
2685 return self
2688 class OnlyOnce(object):
2689 """Wrapper for parse actions, to ensure they are only called once."""
2690 def __init__(self, methodCall):
2691 self.callable = ParserElement.normalizeParseActionArgs(methodCall)
2692 self.called = False
2693 def __call__(self,s,l,t):
2694 if not self.called:
2695 results = self.callable(s,l,t)
2696 self.called = True
2697 return results
2698 raise ParseException(s,l,"")
2699 def reset(self):
2700 self.called = False
2702 def traceParseAction(f):
2703 """Decorator for debugging parse actions."""
2704 f = ParserElement.normalizeParseActionArgs(f)
2705 def z(*paArgs):
2706 thisFunc = f.func_name
2707 s,l,t = paArgs[-3:]
2708 if len(paArgs)>3:
2709 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
2710 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
2711 try:
2712 ret = f(*paArgs)
2713 except Exception, exc:
2714 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
2715 raise
2716 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
2717 return ret
2718 try:
2719 z.__name__ = f.__name__
2720 except AttributeError:
2721 pass
2722 return z
2725 # global helpers
2727 def delimitedList( expr, delim=",", combine=False ):
2728 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2729 By default, the list elements and delimiters can have intervening whitespace, and
2730 comments, but this can be overridden by passing 'combine=True' in the constructor.
2731 If combine is set to True, the matching tokens are returned as a single token
2732 string, with the delimiters included; otherwise, the matching tokens are returned
2733 as a list of tokens, with the delimiters suppressed.
2735 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
2736 if combine:
2737 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
2738 else:
2739 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
2741 def countedArray( expr ):
2742 """Helper to define a counted list of expressions.
2743 This helper defines a pattern of the form::
2744 integer expr expr expr...
2745 where the leading integer tells how many expr expressions follow.
2746 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
2748 arrayExpr = Forward()
2749 def countFieldParseAction(s,l,t):
2750 n = int(t[0])
2751 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
2752 return []
2753 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
2755 def _flatten(L):
2756 if type(L) is not list: return [L]
2757 if L == []: return L
2758 return _flatten(L[0]) + _flatten(L[1:])
2760 def matchPreviousLiteral(expr):
2761 """Helper to define an expression that is indirectly defined from
2762 the tokens matched in a previous expression, that is, it looks
2763 for a 'repeat' of a previous expression. For example::
2764 first = Word(nums)
2765 second = matchPreviousLiteral(first)
2766 matchExpr = first + ":" + second
2767 will match "1:1", but not "1:2". Because this matches a
2768 previous literal, will also match the leading "1:1" in "1:10".
2769 If this is not desired, use matchPreviousExpr.
2770 Do *not* use with packrat parsing enabled.
2772 rep = Forward()
2773 def copyTokenToRepeater(s,l,t):
2774 if t:
2775 if len(t) == 1:
2776 rep << t[0]
2777 else:
2778 # flatten t tokens
2779 tflat = _flatten(t.asList())
2780 rep << And( [ Literal(tt) for tt in tflat ] )
2781 else:
2782 rep << Empty()
2783 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
2784 return rep
2786 def matchPreviousExpr(expr):
2787 """Helper to define an expression that is indirectly defined from
2788 the tokens matched in a previous expression, that is, it looks
2789 for a 'repeat' of a previous expression. For example::
2790 first = Word(nums)
2791 second = matchPreviousExpr(first)
2792 matchExpr = first + ":" + second
2793 will match "1:1", but not "1:2". Because this matches by
2794 expressions, will *not* match the leading "1:1" in "1:10";
2795 the expressions are evaluated first, and then compared, so
2796 "1" is compared with "10".
2797 Do *not* use with packrat parsing enabled.
2799 rep = Forward()
2800 e2 = expr.copy()
2801 rep << e2
2802 def copyTokenToRepeater(s,l,t):
2803 matchTokens = _flatten(t.asList())
2804 def mustMatchTheseTokens(s,l,t):
2805 theseTokens = _flatten(t.asList())
2806 if theseTokens != matchTokens:
2807 raise ParseException("",0,"")
2808 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
2809 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
2810 return rep
2812 def _escapeRegexRangeChars(s):
2813 #~ escape these chars: ^-]
2814 for c in r"\^-]":
2815 s = s.replace(c,"\\"+c)
2816 s = s.replace("\n",r"\n")
2817 s = s.replace("\t",r"\t")
2818 return _ustr(s)
2820 def oneOf( strs, caseless=False, useRegex=True ):
2821 """Helper to quickly define a set of alternative Literals, and makes sure to do
2822 longest-first testing when there is a conflict, regardless of the input order,
2823 but returns a MatchFirst for best performance.
2825 Parameters:
2826 - strs - a string of space-delimited literals, or a list of string literals
2827 - caseless - (default=False) - treat all literals as caseless
2828 - useRegex - (default=True) - as an optimization, will generate a Regex
2829 object; otherwise, will generate a MatchFirst object (if caseless=True, or
2830 if creating a Regex raises an exception)
2832 if caseless:
2833 isequal = ( lambda a,b: a.upper() == b.upper() )
2834 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
2835 parseElementClass = CaselessLiteral
2836 else:
2837 isequal = ( lambda a,b: a == b )
2838 masks = ( lambda a,b: b.startswith(a) )
2839 parseElementClass = Literal
2841 if isinstance(strs,(list,tuple)):
2842 symbols = strs[:]
2843 elif isinstance(strs,basestring):
2844 symbols = strs.split()
2845 else:
2846 warnings.warn("Invalid argument to oneOf, expected string or list",
2847 SyntaxWarning, stacklevel=2)
2849 i = 0
2850 while i < len(symbols)-1:
2851 cur = symbols[i]
2852 for j,other in enumerate(symbols[i+1:]):
2853 if ( isequal(other, cur) ):
2854 del symbols[i+j+1]
2855 break
2856 elif ( masks(cur, other) ):
2857 del symbols[i+j+1]
2858 symbols.insert(i,other)
2859 cur = other
2860 break
2861 else:
2862 i += 1
2864 if not caseless and useRegex:
2865 #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2866 try:
2867 if len(symbols)==len("".join(symbols)):
2868 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
2869 else:
2870 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
2871 except:
2872 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
2873 SyntaxWarning, stacklevel=2)
2876 # last resort, just use MatchFirst
2877 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2879 def dictOf( key, value ):
2880 """Helper to easily and clearly define a dictionary by specifying the respective patterns
2881 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
2882 in the proper order. The key pattern can include delimiting markers or punctuation,
2883 as long as they are suppressed, thereby leaving the significant key text. The value
2884 pattern can include named results, so that the Dict results can include named token
2885 fields.
2887 return Dict( ZeroOrMore( Group ( key + value ) ) )
2889 _bslash = "\\"
2890 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
2892 # convenience constants for positional expressions
2893 empty = Empty().setName("empty")
2894 lineStart = LineStart().setName("lineStart")
2895 lineEnd = LineEnd().setName("lineEnd")
2896 stringStart = StringStart().setName("stringStart")
2897 stringEnd = StringEnd().setName("stringEnd")
2899 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
2900 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
2901 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
2902 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
2903 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
2904 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
2905 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
2907 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
2909 def srange(s):
2910 r"""Helper to easily define string ranges for use in Word construction. Borrows
2911 syntax from regexp '[]' string range definitions::
2912 srange("[0-9]") -> "0123456789"
2913 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
2914 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2915 The input string must be enclosed in []'s, and the returned string is the expanded
2916 character set joined into a single string.
2917 The values enclosed in the []'s may be::
2918 a single character
2919 an escaped character with a leading backslash (such as \- or \])
2920 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2921 an escaped octal character with a leading '\0' (\041, which is a '!' character)
2922 a range of any of the above, separated by a dash ('a-z', etc.)
2923 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2925 try:
2926 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
2927 except:
2928 return ""
2930 def replaceWith(replStr):
2931 """Helper method for common parse actions that simply return a literal value. Especially
2932 useful when used with transformString().
2934 def _replFunc(*args):
2935 return [replStr]
2936 return _replFunc
2938 def removeQuotes(s,l,t):
2939 """Helper parse action for removing quotation marks from parsed quoted strings.
2940 To use, add this parse action to quoted string using::
2941 quotedString.setParseAction( removeQuotes )
2943 return t[0][1:-1]
2945 def upcaseTokens(s,l,t):
2946 """Helper parse action to convert tokens to upper case."""
2947 return [ tt.upper() for tt in map(_ustr,t) ]
2949 def downcaseTokens(s,l,t):
2950 """Helper parse action to convert tokens to lower case."""
2951 return [ tt.lower() for tt in map(_ustr,t) ]
2953 def keepOriginalText(s,startLoc,t):
2954 """Helper parse action to preserve original parsed text,
2955 overriding any nested parse actions."""
2956 try:
2957 endloc = getTokensEndLoc()
2958 except ParseException:
2959 raise ParseFatalException, "incorrect usage of keepOriginalText - may only be called as a parse action"
2960 del t[:]
2961 t += ParseResults(s[startLoc:endloc])
2962 return t
2964 def getTokensEndLoc():
2965 """Method to be called from within a parse action to determine the end
2966 location of the parsed tokens."""
2967 import inspect
2968 fstack = inspect.stack()
2969 try:
2970 # search up the stack (through intervening argument normalizers) for correct calling routine
2971 for f in fstack[2:]:
2972 if f[3] == "_parseNoCache":
2973 endloc = f[0].f_locals["loc"]
2974 return endloc
2975 else:
2976 raise ParseFatalException, "incorrect usage of getTokensEndLoc - may only be called from within a parse action"
2977 finally:
2978 del fstack
2980 def _makeTags(tagStr, xml):
2981 """Internal helper to construct opening and closing tag expressions, given a tag name"""
2982 if isinstance(tagStr,basestring):
2983 resname = tagStr
2984 tagStr = Keyword(tagStr, caseless=not xml)
2985 else:
2986 resname = tagStr.name
2988 tagAttrName = Word(alphas,alphanums+"_-:")
2989 if (xml):
2990 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
2991 openTag = Suppress("<") + tagStr + \
2992 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
2993 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
2994 else:
2995 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
2996 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
2997 openTag = Suppress("<") + tagStr + \
2998 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
2999 Optional( Suppress("=") + tagAttrValue ) ))) + \
3000 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3001 closeTag = Combine("</" + tagStr + ">")
3003 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3004 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3006 return openTag, closeTag
3008 def makeHTMLTags(tagStr):
3009 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3010 return _makeTags( tagStr, False )
3012 def makeXMLTags(tagStr):
3013 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3014 return _makeTags( tagStr, True )
3016 def withAttribute(*args,**attrDict):
3017 """Helper to create a validating parse action to be used with start tags created
3018 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3019 with a required attribute value, to avoid false matches on common tags such as
3020 <TD> or <DIV>.
3022 Call withAttribute with a series of attribute names and values. Specify the list
3023 of filter attributes names and values as:
3024 - keyword arguments, as in (class="Customer",align="right"), or
3025 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3026 For attribute names with a namespace prefix, you must use the second form. Attribute
3027 names are matched insensitive to upper/lower case.
3029 if args:
3030 attrs = args[:]
3031 else:
3032 attrs = attrDict.items()
3033 attrs = [(k.lower(),v) for k,v in attrs]
3034 def pa(s,l,tokens):
3035 for attrName,attrValue in attrs:
3036 if attrName not in tokens:
3037 raise ParseException(s,l,"no matching attribute " + attrName)
3038 if tokens[attrName] != attrValue:
3039 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3040 (attrName, tokens[attrName], attrValue))
3041 return pa
3043 opAssoc = _Constants()
3044 opAssoc.LEFT = object()
3045 opAssoc.RIGHT = object()
3047 def _flattenOpPrecTokens(tokens):
3048 if isinstance(tokens,ParseResults):
3049 if len(tokens)==1:
3050 if isinstance(tokens[0],ParseResults):
3051 return _flattenOpPrecTokens(tokens[0])
3052 else:
3053 return tokens[0]
3054 return map(_flattenOpPrecTokens,tokens)
3055 return tokens
3057 def operatorPrecedence( baseExpr, opList ):
3058 """Helper method for constructing grammars of expressions made up of
3059 operators working in a precedence hierarchy. Operators may be unary or
3060 binary, left- or right-associative. Parse actions can also be attached
3061 to operator expressions.
3063 Parameters:
3064 - baseExpr - expression representing the most basic element for the nested
3065 - opList - list of tuples, one for each operator precedence level in the
3066 expression grammar; each tuple is of the form
3067 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3068 - opExpr is the pyparsing expression for the operator;
3069 may also be a string, which will be converted to a Literal
3070 - numTerms is the number of terms for this operator (must
3071 be 1 or 2)
3072 - rightLeftAssoc is the indicator whether the operator is
3073 right or left associative, using the pyparsing-defined
3074 constants opAssoc.RIGHT and opAssoc.LEFT.
3075 - parseAction is the parse action to be associated with
3076 expressions matching this operator expression (the
3077 parse action tuple member may be omitted)
3079 ret = Forward()
3080 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3081 for i,operDef in enumerate(opList):
3082 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3083 thisExpr = Forward()#.setName("expr%d" % i)
3084 if rightLeftAssoc == opAssoc.LEFT:
3085 if arity == 1:
3086 matchExpr = Group( lastExpr + ZeroOrMore( opExpr ) )
3087 elif arity == 2:
3088 matchExpr = Group( lastExpr + ZeroOrMore( opExpr + lastExpr ) )
3089 else:
3090 raise ValueError, "operator must be unary (1) or binary (2)"
3091 elif rightLeftAssoc == opAssoc.RIGHT:
3092 if arity == 1:
3093 # try to avoid LR with this extra test
3094 if not isinstance(opExpr, Optional):
3095 opExpr = Optional(opExpr)
3096 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3097 matchExpr |= lastExpr
3098 elif arity == 2:
3099 matchExpr = Group( lastExpr + ZeroOrMore( opExpr + thisExpr ) )
3100 else:
3101 raise ValueError, "operator must be unary (1) or binary (2)"
3102 else:
3103 raise ValueError, "operator must indicate right or left associativity"
3104 if pa:
3105 matchExpr.setParseAction( pa )
3106 thisExpr << ( matchExpr )
3107 lastExpr = thisExpr
3108 ret << lastExpr
3109 ret.setParseAction(_flattenOpPrecTokens)
3110 return Group(ret)
3112 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
3113 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
3114 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
3116 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3117 """Helper method for defining nested lists enclosed in opening and closing
3118 delimiters ("(" and ")" are the default).
3120 Parameters:
3121 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3122 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3123 - content - expression for items within the nested lists (default=None)
3124 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3126 If an expression is not provided for the content argument, the nested
3127 expression will capture all whitespace-delimited content between delimiters
3128 as a list of separate values.
3130 Use the ignoreExpr argument to define expressions that may contain
3131 opening or closing characters that should not be treated as opening
3132 or closing characters for nesting, such as quotedString or a comment
3133 expression. Specify multiple expressions using an Or or MatchFirst.
3134 The default is quotedString, but if no expressions are to be ignored,
3135 then pass None for this argument.
3137 if opener == closer:
3138 raise ValueError("opening and closing strings cannot be the same")
3139 if content is None:
3140 if isinstance(opener,basestring) and isinstance(closer,basestring):
3141 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS).setParseAction(lambda t:t[0].strip()))
3142 else:
3143 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3144 ret = Forward()
3145 if ignoreExpr is not None:
3146 ret << ZeroOrMore( ignoreExpr | content | Group( Suppress(opener) + ret + Suppress(closer) ) )
3147 else:
3148 ret << ZeroOrMore( content | Group( Suppress(opener) + ret + Suppress(closer) ) )
3149 return ret
3151 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3152 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3154 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3155 commonHTMLEntity = Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3156 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3157 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3159 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3160 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3162 htmlComment = Regex(r"<!--[\s\S]*?-->")
3163 restOfLine = Regex(r".*").leaveWhitespace()
3164 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3165 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3167 javaStyleComment = cppStyleComment
3168 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3169 _noncomma = "".join( [ c for c in printables if c != "," ] )
3170 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3171 Optional( Word(" \t") +
3172 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3173 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3176 if __name__ == "__main__":
3178 def test( teststring ):
3179 print teststring,"->",
3180 try:
3181 tokens = simpleSQL.parseString( teststring )
3182 tokenlist = tokens.asList()
3183 print tokenlist
3184 print "tokens = ", tokens
3185 print "tokens.columns =", tokens.columns
3186 print "tokens.tables =", tokens.tables
3187 print tokens.asXML("SQL",True)
3188 except ParseException, err:
3189 print err.line
3190 print " "*(err.column-1) + "^"
3191 print err
3192 print
3194 selectToken = CaselessLiteral( "select" )
3195 fromToken = CaselessLiteral( "from" )
3197 ident = Word( alphas, alphanums + "_$" )
3198 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3199 columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3200 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3201 tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3202 simpleSQL = ( selectToken + \
3203 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3204 fromToken + \
3205 tableNameList.setResultsName( "tables" ) )
3207 test( "SELECT * from XYZZY, ABC" )
3208 test( "select * from SYS.XYZZY" )
3209 test( "Select A from Sys.dual" )
3210 test( "Select AA,BB,CC from Sys.dual" )
3211 test( "Select A, B, C from Sys.dual" )
3212 test( "Select A, B, C from Sys.dual" )
3213 test( "Xelect A, B, C from Sys.dual" )
3214 test( "Select A, B, C frox Sys.dual" )
3215 test( "Select" )
3216 test( "Select ^^^ frox Sys.dual" )
3217 test( "Select A, B, C from Sys.dual, Table2 " )