3 # Copyright (c) 2003-2007 Paul T. McGuire
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #from __future__ import generators
28 pyparsing module - Classes and methods to define and execute parsing grammars
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
37 from pyparsing import Word, alphas
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
45 The program outputs the following::
47 Hello, World! -> ['Hello', ',', 'World', '!']
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
62 __versionTime__
= "7 October 2007 00:25"
63 __author__
= "Paul McGuire <ptmcg@users.sourceforge.net>"
66 from weakref
import ref
as wkref
71 import xml
.sax
.saxutils
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
75 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
76 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
77 then < returns the unicode object | encodes it with the default encoding | ... >.
80 # If this works, then _ustr(obj) has the same behaviour as str(obj), so
81 # it won't break any existing code.
84 except UnicodeEncodeError, e
:
85 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
86 # state that "The return value must be a string object". However, does a
87 # unicode object (being a subclass of basestring) count as a "string
89 # If so, then return a unicode object:
91 # Else encode it... but how? There are many choices... :)
92 # Replace unprintables with escape codes?
93 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
94 # Replace unprintables with question marks?
95 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
99 return dict( [(c
,0) for c
in strg
] )
100 #~ return set( [c for c in strg] )
102 class _Constants(object):
105 alphas
= string
.lowercase
+ string
.uppercase
107 hexnums
= nums
+ "ABCDEFabcdef"
108 alphanums
= alphas
+ nums
110 class ParseBaseException(Exception):
111 """base exception class for all parsing runtime exceptions"""
112 __slots__
= ( "loc","msg","pstr","parserElement" )
113 # Performance tuning: we construct a *lot* of these, so keep this
114 # constructor as small and fast as possible
115 def __init__( self
, pstr
, loc
=0, msg
=None, elem
=None ):
123 self
.parserElement
= elem
125 def __getattr__( self
, aname
):
126 """supported attributes by name are:
127 - lineno - returns the line number of the exception text
128 - col - returns the column number of the exception text
129 - line - returns the line containing the exception text
131 if( aname
== "lineno" ):
132 return lineno( self
.loc
, self
.pstr
)
133 elif( aname
in ("col", "column") ):
134 return col( self
.loc
, self
.pstr
)
135 elif( aname
== "line" ):
136 return line( self
.loc
, self
.pstr
)
138 raise AttributeError, aname
141 return "%s (at char %d), (line:%d, col:%d)" % \
142 ( self
.msg
, self
.loc
, self
.lineno
, self
.column
)
143 def __repr__( self
):
145 def markInputline( self
, markerString
= ">!<" ):
146 """Extracts the exception line from the input string, and marks
147 the location of the exception with a special symbol.
150 line_column
= self
.column
- 1
152 line_str
= "".join( [line_str
[:line_column
],
153 markerString
, line_str
[line_column
:]])
154 return line_str
.strip()
156 class ParseException(ParseBaseException
):
157 """exception thrown when parse expressions don't match class;
158 supported attributes by name are:
159 - lineno - returns the line number of the exception text
160 - col - returns the column number of the exception text
161 - line - returns the line containing the exception text
165 class ParseFatalException(ParseBaseException
):
166 """user-throwable exception thrown when inconsistent parse content
167 is found; stops all parsing immediately"""
170 #~ class ReparseException(ParseBaseException):
171 #~ """Experimental class - parse actions can raise this exception to cause
172 #~ pyparsing to reparse the input string:
173 #~ - with a modified input string, and/or
174 #~ - with a modified start location
175 #~ Set the values of the ReparseException in the constructor, and raise the
176 #~ exception in a parse action to cause pyparsing to use the new string/location.
177 #~ Setting the values as None causes no change to be made.
179 #~ def __init_( self, newstring, restartLoc ):
180 #~ self.newParseText = newstring
181 #~ self.reparseLoc = restartLoc
183 class RecursiveGrammarException(Exception):
184 """exception thrown by validate() if the grammar could be improperly recursive"""
185 def __init__( self
, parseElementList
):
186 self
.parseElementTrace
= parseElementList
189 return "RecursiveGrammarException: %s" % self
.parseElementTrace
191 class _ParseResultsWithOffset(object):
192 def __init__(self
,p1
,p2
):
194 def __getitem__(self
,i
):
197 return repr(self
.tup
)
199 class ParseResults(object):
200 """Structured parse results, to provide multiple means of access to the parsed data:
201 - as a list (len(results))
202 - by list index (results[0], results[1], etc.)
203 - by attribute (results.<resultsName>)
205 __slots__
= ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
206 def __new__(cls
, toklist
, name
=None, asList
=True, modal
=True ):
207 if isinstance(toklist
, cls
):
209 retobj
= object.__new
__(cls
)
210 retobj
.__doinit
= True
213 # Performance tuning: we construct a *lot* of these, so keep this
214 # constructor as small and fast as possible
215 def __init__( self
, toklist
, name
=None, asList
=True, modal
=True ):
217 self
.__doinit
= False
220 self
.__accumNames
= {}
221 if isinstance(toklist
, list):
222 self
.__toklist
= toklist
[:]
224 self
.__toklist
= [toklist
]
225 self
.__tokdict
= dict()
227 # this line is related to debugging the asXML bug
232 self
.__accumNames
[name
] = 0
233 if isinstance(name
,int):
234 name
= _ustr(name
) # will always return a str, but use _ustr for consistency
236 if not toklist
in (None,'',[]):
237 if isinstance(toklist
,basestring
):
238 toklist
= [ toklist
]
240 if isinstance(toklist
,ParseResults
):
241 self
[name
] = _ParseResultsWithOffset(toklist
.copy(),-1)
243 self
[name
] = _ParseResultsWithOffset(ParseResults(toklist
[0]),-1)
244 self
[name
].__name
= name
247 self
[name
] = toklist
[0]
248 except (KeyError,TypeError):
251 def __getitem__( self
, i
):
252 if isinstance( i
, (int,slice) ):
253 return self
.__toklist
[i
]
255 if i
not in self
.__accumNames
:
256 return self
.__tokdict
[i
][-1][0]
258 return ParseResults([ v
[0] for v
in self
.__tokdict
[i
] ])
260 def __setitem__( self
, k
, v
):
261 if isinstance(v
,_ParseResultsWithOffset
):
262 self
.__tokdict
[k
] = self
.__tokdict
.get(k
,list()) + [v
]
264 elif isinstance(k
,int):
265 self
.__toklist
[k
] = v
268 self
.__tokdict
[k
] = self
.__tokdict
.get(k
,list()) + [(v
,0)]
270 if isinstance(sub
,ParseResults
):
271 sub
.__parent
= wkref(self
)
273 def __delitem__( self
, i
):
274 if isinstance(i
,(int,slice)):
275 del self
.__toklist
[i
]
277 del self
.__tokdict
[i
]
279 def __contains__( self
, k
):
280 return self
.__tokdict
.has_key(k
)
282 def __len__( self
): return len( self
.__toklist
)
283 def __bool__(self
): return len( self
.__toklist
) > 0
284 def __nonzero__( self
): return self
.__bool
__()
285 def __iter__( self
): return iter( self
.__toklist
)
287 """Returns all named result keys."""
288 return self
.__tokdict
.keys()
291 """Returns all named result keys and values as a list of tuples."""
292 return [(k
,self
[k
]) for k
in self
.__tokdict
.keys()]
295 """Returns all named result values."""
296 return [ v
[-1][0] for v
in self
.__tokdict
.values() ]
298 def __getattr__( self
, name
):
299 if name
not in self
.__slots
__:
300 if self
.__tokdict
.has_key( name
):
301 if name
not in self
.__accumNames
:
302 return self
.__tokdict
[name
][-1][0]
304 return ParseResults([ v
[0] for v
in self
.__tokdict
[name
] ])
309 def __add__( self
, other
):
314 def __iadd__( self
, other
):
316 offset
= len(self
.__toklist
)
317 addoffset
= ( lambda a
: (a
<0 and offset
) or (a
+offset
) )
318 otheritems
= other
.__tokdict
.items()
319 otherdictitems
= [(k
, _ParseResultsWithOffset(v
[0],addoffset(v
[1])) )
320 for (k
,vlist
) in otheritems
for v
in vlist
]
321 for k
,v
in otherdictitems
:
323 if isinstance(v
[0],ParseResults
):
324 v
[0].__parent
= wkref(self
)
325 self
.__toklist
+= other
.__toklist
326 self
.__accumNames
.update( other
.__accumNames
)
330 def __repr__( self
):
331 return "(%s, %s)" % ( repr( self
.__toklist
), repr( self
.__tokdict
) )
336 for i
in self
.__toklist
:
337 if isinstance(i
, ParseResults
):
338 out
+= sep
+ _ustr(i
)
345 def _asStringList( self
, sep
='' ):
347 for item
in self
.__toklist
:
350 if isinstance( item
, ParseResults
):
351 out
+= item
._asStringList
()
353 out
.append( _ustr(item
) )
357 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
359 for res
in self
.__toklist
:
360 if isinstance(res
,ParseResults
):
361 out
.append( res
.asList() )
367 """Returns the named parse results as dictionary."""
368 return dict( self
.items() )
371 """Returns a new copy of a ParseResults object."""
372 ret
= ParseResults( self
.__toklist
)
373 ret
.__tokdict
= self
.__tokdict
.copy()
374 ret
.__parent
= self
.__parent
375 ret
.__accumNames
.update( self
.__accumNames
)
376 ret
.__name
= self
.__name
379 def asXML( self
, doctag
=None, namedItemsOnly
=False, indent
="", formatted
=True ):
380 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
383 namedItems
= dict( [ (v
[1],k
) for (k
,vlist
) in self
.__tokdict
.items()
385 nextLevelIndent
= indent
+ " "
387 # collapse out indents if formatting is not desired
394 if doctag
is not None:
398 selfTag
= self
.__name
406 out
+= [ nl
, indent
, "<", selfTag
, ">" ]
408 worklist
= self
.__toklist
409 for i
,res
in enumerate(worklist
):
410 if isinstance(res
,ParseResults
):
412 out
+= [ res
.asXML(namedItems
[i
],
413 namedItemsOnly
and doctag
is None,
417 out
+= [ res
.asXML(None,
418 namedItemsOnly
and doctag
is None,
422 # individual token, see if there is a name for it
425 resTag
= namedItems
[i
]
431 xmlBodyText
= xml
.sax
.saxutils
.escape(_ustr(res
))
432 out
+= [ nl
, nextLevelIndent
, "<", resTag
, ">",
436 out
+= [ nl
, indent
, "</", selfTag
, ">" ]
439 def __lookup(self
,sub
):
440 for k
,vlist
in self
.__tokdict
.items():
447 """Returns the results name for this token expression."""
451 par
= self
.__parent
()
453 return par
.__lookup
(self
)
456 elif (len(self
) == 1 and
457 len(self
.__tokdict
) == 1 and
458 self
.__tokdict
.values()[0][0][1] in (0,-1)):
459 return self
.__tokdict
.keys()[0]
463 def dump(self
,indent
='',depth
=0):
464 """Diagnostic method for listing out the contents of a ParseResults.
465 Accepts an optional indent argument so that this string can be embedded
466 in a nested display of other data."""
468 out
.append( indent
+_ustr(self
.asList()) )
474 out
.append( "%s%s- %s: " % (indent
,(' '*depth
), k
) )
475 if isinstance(v
,ParseResults
):
478 out
.append( v
.dump(indent
,depth
+1) )
487 # add support for pickle protocol
488 def __getstate__(self
):
489 return ( self
.__toklist
,
490 ( self
.__tokdict
.copy(),
491 self
.__parent
is not None and self
.__parent
() or None,
495 def __setstate__(self
,state
):
496 self
.__toklist
= state
[0]
500 self
.__name
= state
[1]
501 self
.__accumNames
= {}
502 self
.__accumNames
.update(inAccumNames
)
504 self
.__parent
= wkref(par
)
510 """Returns current column within a string, counting newlines as line separators.
511 The first column is number 1.
513 Note: the default parsing behavior is to expand tabs in the input string
514 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
515 on parsing strings containing <TAB>s, and suggested methods to maintain a
516 consistent view of the parsed string, the parse location, and line and column
517 positions within the parsed string.
519 return (loc
<len(strg
) and strg
[loc
] == '\n') and 1 or loc
- strg
.rfind("\n", 0, loc
)
521 def lineno(loc
,strg
):
522 """Returns current line number within a string, counting newlines as line separators.
523 The first line is number 1.
525 Note: the default parsing behavior is to expand tabs in the input string
526 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
527 on parsing strings containing <TAB>s, and suggested methods to maintain a
528 consistent view of the parsed string, the parse location, and line and column
529 positions within the parsed string.
531 return strg
.count("\n",0,loc
) + 1
533 def line( loc
, strg
):
534 """Returns the line of text containing loc within a string, counting newlines as line separators.
536 lastCR
= strg
.rfind("\n", 0, loc
)
537 nextCR
= strg
.find("\n", loc
)
539 return strg
[lastCR
+1:nextCR
]
541 return strg
[lastCR
+1:]
543 def _defaultStartDebugAction( instring
, loc
, expr
):
544 print "Match",_ustr(expr
),"at loc",loc
,"(%d,%d)" % ( lineno(loc
,instring
), col(loc
,instring
) )
546 def _defaultSuccessDebugAction( instring
, startloc
, endloc
, expr
, toks
):
547 print "Matched",_ustr(expr
),"->",toks
.asList()
549 def _defaultExceptionDebugAction( instring
, loc
, expr
, exc
):
550 print "Exception raised:", _ustr(exc
)
552 def nullDebugAction(*args
):
553 """'Do-nothing' debug action, to suppress debugging output during parsing."""
556 class ParserElement(object):
557 """Abstract base level parser element class."""
558 DEFAULT_WHITE_CHARS
= " \n\t\r"
560 def setDefaultWhitespaceChars( chars
):
561 """Overrides the default whitespace chars
563 ParserElement
.DEFAULT_WHITE_CHARS
= chars
564 setDefaultWhitespaceChars
= staticmethod(setDefaultWhitespaceChars
)
566 def __init__( self
, savelist
=False ):
567 self
.parseAction
= list()
568 self
.failAction
= None
569 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
571 self
.resultsName
= None
572 self
.saveAsList
= savelist
573 self
.skipWhitespace
= True
574 self
.whiteChars
= ParserElement
.DEFAULT_WHITE_CHARS
575 self
.copyDefaultWhiteChars
= True
576 self
.mayReturnEmpty
= False # used when checking for left-recursion
577 self
.keepTabs
= False
578 self
.ignoreExprs
= list()
580 self
.streamlined
= False
581 self
.mayIndexError
= True # used to optimize exception handling for subclasses that don't advance parse index
583 self
.modalResults
= True # used to mark results names as modal (report only last) or cumulative (list all)
584 self
.debugActions
= ( None, None, None ) #custom debug actions
586 self
.callPreparse
= True # used to avoid redundant calls to preParse
587 self
.callDuringTry
= False
590 """Make a copy of this ParserElement. Useful for defining different parse actions
591 for the same parsing pattern, using copies of the original parse element."""
592 cpy
= copy
.copy( self
)
593 cpy
.parseAction
= self
.parseAction
[:]
594 cpy
.ignoreExprs
= self
.ignoreExprs
[:]
595 if self
.copyDefaultWhiteChars
:
596 cpy
.whiteChars
= ParserElement
.DEFAULT_WHITE_CHARS
599 def setName( self
, name
):
600 """Define name for this expression, for use in debugging."""
602 self
.errmsg
= "Expected " + self
.name
603 if hasattr(self
,"exception"):
604 self
.exception
.msg
= self
.errmsg
607 def setResultsName( self
, name
, listAllMatches
=False ):
608 """Define name for referencing matching tokens as a nested attribute
609 of the returned parse results.
610 NOTE: this returns a *copy* of the original ParserElement object;
611 this is so that the client can define a basic element, such as an
612 integer, and reference it in multiple places with different names.
614 newself
= self
.copy()
615 newself
.resultsName
= name
616 newself
.modalResults
= not listAllMatches
619 def setBreak(self
,breakFlag
= True):
620 """Method to invoke the Python pdb debugger when this element is
621 about to be parsed. Set breakFlag to True to enable, False to
625 _parseMethod
= self
._parse
626 def breaker(instring
, loc
, doActions
=True, callPreParse
=True):
629 _parseMethod( instring
, loc
, doActions
, callPreParse
)
630 breaker
._originalParseMethod
= _parseMethod
631 self
._parse
= breaker
633 if hasattr(self
._parse
,"_originalParseMethod"):
634 self
._parse
= self
._parse
._originalParseMethod
637 def normalizeParseActionArgs( f
):
638 """Internal method used to decorate parse actions that take fewer than 3 arguments,
639 so that all parse actions can be called as f(s,l,t)."""
644 if isinstance(f
,type):
647 if f
.func_code
.co_flags
& STAR_ARGS
:
649 numargs
= f
.func_code
.co_argcount
650 if hasattr(f
,"im_self"):
654 except AttributeError:
656 # not a function, must be a callable object, get info from the
657 # im_func binding of its bound __call__ method
658 if f
.__call
__.im_func
.func_code
.co_flags
& STAR_ARGS
:
660 numargs
= f
.__call
__.im_func
.func_code
.co_argcount
661 if hasattr(f
.__call
__,"im_self"):
663 except AttributeError:
664 # not a bound method, get info directly from __call__ method
665 if f
.__call
__.func_code
.co_flags
& STAR_ARGS
:
667 numargs
= f
.__call
__.func_code
.co_argcount
668 if hasattr(f
.__call
__,"im_self"):
671 #~ print "adding function %s with %d args" % (f.func_name,numargs)
681 else: #~ numargs == 0:
685 tmp
.__name
__ = f
.__name
__
686 except AttributeError:
687 # no need for special handling if attribute doesnt exist
690 tmp
.__doc
__ = f
.__doc
__
691 except AttributeError:
692 # no need for special handling if attribute doesnt exist
695 tmp
.__dict
__.update(f
.__dict
__)
696 except AttributeError:
697 # no need for special handling if attribute doesnt exist
700 normalizeParseActionArgs
= staticmethod(normalizeParseActionArgs
)
702 def setParseAction( self
, *fns
, **kwargs
):
703 """Define action to perform when successfully matching parse element definition.
704 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
705 fn(loc,toks), fn(toks), or just fn(), where:
706 - s = the original string being parsed (see note below)
707 - loc = the location of the matching substring
708 - toks = a list of the matched tokens, packaged as a ParseResults object
709 If the functions in fns modify the tokens, they can return them as the return
710 value from fn, and the modified list of tokens will replace the original.
711 Otherwise, fn does not need to return any value.
713 Note: the default parsing behavior is to expand tabs in the input string
714 before starting the parsing process. See L{I{parseString}<parseString>} for more information
715 on parsing strings containing <TAB>s, and suggested methods to maintain a
716 consistent view of the parsed string, the parse location, and line and column
717 positions within the parsed string.
719 self
.parseAction
= map(self
.normalizeParseActionArgs
, list(fns
))
720 self
.callDuringTry
= ("callDuringTry" in kwargs
and kwargs
["callDuringTry"])
723 def addParseAction( self
, *fns
, **kwargs
):
724 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
725 self
.parseAction
+= map(self
.normalizeParseActionArgs
, list(fns
))
726 self
.callDuringTry
= self
.callDuringTry
or ("callDuringTry" in kwargs
and kwargs
["callDuringTry"])
729 def setFailAction( self
, fn
):
730 """Define action to perform if parsing fails at this expression.
731 Fail acton fn is a callable function that takes the arguments
732 fn(s,loc,expr,err) where:
733 - s = string being parsed
734 - loc = location where expression match was attempted and failed
735 - expr = the parse expression that failed
736 - err = the exception thrown
737 The function returns no value. It may throw ParseFatalException
738 if it is desired to stop parsing immediately."""
742 def skipIgnorables( self
, instring
, loc
):
746 for e
in self
.ignoreExprs
:
749 loc
,dummy
= e
._parse
( instring
, loc
)
751 except ParseException
:
755 def preParse( self
, instring
, loc
):
757 loc
= self
.skipIgnorables( instring
, loc
)
759 if self
.skipWhitespace
:
761 instrlen
= len(instring
)
762 while loc
< instrlen
and instring
[loc
] in wt
:
767 def parseImpl( self
, instring
, loc
, doActions
=True ):
770 def postParse( self
, instring
, loc
, tokenlist
):
774 def _parseNoCache( self
, instring
, loc
, doActions
=True, callPreParse
=True ):
775 debugging
= ( self
.debug
) #and doActions )
777 if debugging
or self
.failAction
:
778 #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
779 if (self
.debugActions
[0] ):
780 self
.debugActions
[0]( instring
, loc
, self
)
781 if callPreParse
and self
.callPreparse
:
782 preloc
= self
.preParse( instring
, loc
)
788 loc
,tokens
= self
.parseImpl( instring
, preloc
, doActions
)
790 raise ParseException( instring
, len(instring
), self
.errmsg
, self
)
791 except ParseException
, err
:
792 #~ print "Exception raised:", err
793 if self
.debugActions
[2]:
794 self
.debugActions
[2]( instring
, tokensStart
, self
, err
)
796 self
.failAction( instring
, tokensStart
, self
, err
)
799 if callPreParse
and self
.callPreparse
:
800 preloc
= self
.preParse( instring
, loc
)
804 if self
.mayIndexError
or loc
>= len(instring
):
806 loc
,tokens
= self
.parseImpl( instring
, preloc
, doActions
)
808 raise ParseException( instring
, len(instring
), self
.errmsg
, self
)
810 loc
,tokens
= self
.parseImpl( instring
, preloc
, doActions
)
812 tokens
= self
.postParse( instring
, loc
, tokens
)
814 retTokens
= ParseResults( tokens
, self
.resultsName
, asList
=self
.saveAsList
, modal
=self
.modalResults
)
815 if self
.parseAction
and (doActions
or self
.callDuringTry
):
818 for fn
in self
.parseAction
:
819 tokens
= fn( instring
, tokensStart
, retTokens
)
820 if tokens
is not None:
821 retTokens
= ParseResults( tokens
,
823 asList
=self
.saveAsList
and isinstance(tokens
,(ParseResults
,list)),
824 modal
=self
.modalResults
)
825 except ParseException
, err
:
826 #~ print "Exception raised in user parse action:", err
827 if (self
.debugActions
[2] ):
828 self
.debugActions
[2]( instring
, tokensStart
, self
, err
)
831 for fn
in self
.parseAction
:
832 tokens
= fn( instring
, tokensStart
, retTokens
)
833 if tokens
is not None:
834 retTokens
= ParseResults( tokens
,
836 asList
=self
.saveAsList
and isinstance(tokens
,(ParseResults
,list)),
837 modal
=self
.modalResults
)
840 #~ print "Matched",self,"->",retTokens.asList()
841 if (self
.debugActions
[1] ):
842 self
.debugActions
[1]( instring
, tokensStart
, loc
, self
, retTokens
)
844 return loc
, retTokens
846 def tryParse( self
, instring
, loc
):
847 return self
._parse
( instring
, loc
, doActions
=False )[0]
849 # this method gets repeatedly called during backtracking with the same arguments -
850 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
851 def _parseCache( self
, instring
, loc
, doActions
=True, callPreParse
=True ):
852 lookup
= (self
,instring
,loc
,callPreParse
,doActions
)
853 if lookup
in ParserElement
._exprArgCache
:
854 value
= ParserElement
._exprArgCache
[ lookup
]
855 if isinstance(value
,Exception):
856 if isinstance(value
,ParseBaseException
):
859 return (value
[0],value
[1].copy())
862 value
= self
._parseNoCache
( instring
, loc
, doActions
, callPreParse
)
863 ParserElement
._exprArgCache
[ lookup
] = (value
[0],value
[1].copy())
865 except ParseBaseException
, pe
:
866 ParserElement
._exprArgCache
[ lookup
] = pe
869 _parse
= _parseNoCache
871 # argument cache for optimizing repeated calls when backtracking through recursive expressions
874 ParserElement
._exprArgCache
.clear()
875 resetCache
= staticmethod(resetCache
)
877 _packratEnabled
= False
879 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
880 Repeated parse attempts at the same string location (which happens
881 often in many complex grammars) can immediately return a cached value,
882 instead of re-executing parsing/validating code. Memoizing is done of
883 both valid results and parsing exceptions.
885 This speedup may break existing programs that use parse actions that
886 have side-effects. For this reason, packrat parsing is disabled when
887 you first import pyparsing. To activate the packrat feature, your
888 program must call the class method ParserElement.enablePackrat(). If
889 your program uses psyco to "compile as you go", you must call
890 enablePackrat before calling psyco.full(). If you do not do this,
891 Python will crash. For best results, call enablePackrat() immediately
892 after importing pyparsing.
894 if not ParserElement
._packratEnabled
:
895 ParserElement
._packratEnabled
= True
896 ParserElement
._parse
= ParserElement
._parseCache
897 enablePackrat
= staticmethod(enablePackrat
)
899 def parseString( self
, instring
):
900 """Execute the parse expression with the given string.
901 This is the main interface to the client code, once the complete
902 expression has been built.
904 Note: parseString implicitly calls expandtabs() on the input string,
905 in order to report proper column numbers in parse actions.
906 If the input string contains tabs and
907 the grammar uses parse actions that use the loc argument to index into the
908 string being parsed, you can ensure you have a consistent view of the input
910 - calling parseWithTabs on your grammar before calling parseString
911 (see L{I{parseWithTabs}<parseWithTabs>})
912 - define your parse action using the full (s,loc,toks) signature, and
913 reference the input string using the parse action's s argument
914 - explictly expand the tabs in your input string before calling
917 ParserElement
.resetCache()
918 if not self
.streamlined
:
920 #~ self.saveAsList = True
921 for e
in self
.ignoreExprs
:
924 loc
, tokens
= self
._parse
( instring
, 0 )
926 loc
, tokens
= self
._parse
( instring
.expandtabs(), 0 )
929 def scanString( self
, instring
, maxMatches
=sys
.maxint
):
930 """Scan the input string for expression matches. Each match will return the
931 matching tokens, start location, and end location. May be called with optional
932 maxMatches argument, to clip scanning after 'n' matches are found.
934 Note that the start and end locations are reported relative to the string
935 being parsed. See L{I{parseString}<parseString>} for more information on parsing
936 strings with embedded tabs."""
937 if not self
.streamlined
:
939 for e
in self
.ignoreExprs
:
942 if not self
.keepTabs
:
943 instring
= _ustr(instring
).expandtabs()
944 instrlen
= len(instring
)
946 preparseFn
= self
.preParse
947 parseFn
= self
._parse
948 ParserElement
.resetCache()
950 while loc
<= instrlen
and matches
< maxMatches
:
952 preloc
= preparseFn( instring
, loc
)
953 nextLoc
,tokens
= parseFn( instring
, preloc
, callPreParse
=False )
954 except ParseException
:
958 yield tokens
, preloc
, nextLoc
961 def transformString( self
, instring
):
962 """Extension to scanString, to modify matching text with modified tokens that may
963 be returned from a parse action. To use transformString, define a grammar and
964 attach a parse action to it that modifies the returned token list.
965 Invoking transformString() on a target string will then scan for matches,
966 and replace the matched text patterns according to the logic in the parse
967 action. transformString() returns the resulting transformed string."""
970 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
971 # keep string locs straight between transformString and scanString
973 for t
,s
,e
in self
.scanString( instring
):
974 out
.append( instring
[lastE
:s
] )
976 if isinstance(t
,ParseResults
):
978 elif isinstance(t
,list):
983 out
.append(instring
[lastE
:])
984 return "".join(map(_ustr
,out
))
986 def searchString( self
, instring
, maxMatches
=sys
.maxint
):
987 """Another extension to scanString, simplifying the access to the tokens found
988 to match the given parse expression. May be called with optional
989 maxMatches argument, to clip searching after 'n' matches are found.
991 return ParseResults([ t
for t
,s
,e
in self
.scanString( instring
, maxMatches
) ])
993 def __add__(self
, other
):
994 """Implementation of + operator - returns And"""
995 if isinstance( other
, basestring
):
996 other
= Literal( other
)
997 if not isinstance( other
, ParserElement
):
998 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
999 SyntaxWarning, stacklevel
=2)
1000 return And( [ self
, other
] )
1002 def __radd__(self
, other
):
1003 """Implementation of += operator"""
1004 if isinstance( other
, basestring
):
1005 other
= Literal( other
)
1006 if not isinstance( other
, ParserElement
):
1007 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1008 SyntaxWarning, stacklevel
=2)
1011 def __or__(self
, other
):
1012 """Implementation of | operator - returns MatchFirst"""
1013 if isinstance( other
, basestring
):
1014 other
= Literal( other
)
1015 if not isinstance( other
, ParserElement
):
1016 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1017 SyntaxWarning, stacklevel
=2)
1018 return MatchFirst( [ self
, other
] )
1020 def __ror__(self
, other
):
1021 """Implementation of |= operator"""
1022 if isinstance( other
, basestring
):
1023 other
= Literal( other
)
1024 if not isinstance( other
, ParserElement
):
1025 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1026 SyntaxWarning, stacklevel
=2)
1029 def __xor__(self
, other
):
1030 """Implementation of ^ operator - returns Or"""
1031 if isinstance( other
, basestring
):
1032 other
= Literal( other
)
1033 if not isinstance( other
, ParserElement
):
1034 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1035 SyntaxWarning, stacklevel
=2)
1036 return Or( [ self
, other
] )
1038 def __rxor__(self
, other
):
1039 """Implementation of ^= operator"""
1040 if isinstance( other
, basestring
):
1041 other
= Literal( other
)
1042 if not isinstance( other
, ParserElement
):
1043 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1044 SyntaxWarning, stacklevel
=2)
1047 def __and__(self
, other
):
1048 """Implementation of & operator - returns Each"""
1049 if isinstance( other
, basestring
):
1050 other
= Literal( other
)
1051 if not isinstance( other
, ParserElement
):
1052 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1053 SyntaxWarning, stacklevel
=2)
1054 return Each( [ self
, other
] )
1056 def __rand__(self
, other
):
1057 """Implementation of right-& operator"""
1058 if isinstance( other
, basestring
):
1059 other
= Literal( other
)
1060 if not isinstance( other
, ParserElement
):
1061 warnings
.warn("Cannot add element of type %s to ParserElement" % type(other
),
1062 SyntaxWarning, stacklevel
=2)
1065 def __invert__( self
):
1066 """Implementation of ~ operator - returns NotAny"""
1067 return NotAny( self
)
1069 def __call__(self
, name
):
1070 """Shortcut for setResultsName, with listAllMatches=default::
1071 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1072 could be written as::
1073 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1075 return self
.setResultsName(name
)
1077 def suppress( self
):
1078 """Suppresses the output of this ParserElement; useful to keep punctuation from
1079 cluttering up returned output.
1081 return Suppress( self
)
1083 def leaveWhitespace( self
):
1084 """Disables the skipping of whitespace before matching the characters in the
1085 ParserElement's defined pattern. This is normally only used internally by
1086 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1088 self
.skipWhitespace
= False
1091 def setWhitespaceChars( self
, chars
):
1092 """Overrides the default whitespace chars
1094 self
.skipWhitespace
= True
1095 self
.whiteChars
= chars
1096 self
.copyDefaultWhiteChars
= False
1099 def parseWithTabs( self
):
1100 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1101 Must be called before parseString when the input grammar contains elements that
1102 match <TAB> characters."""
1103 self
.keepTabs
= True
1106 def ignore( self
, other
):
1107 """Define expression to be ignored (e.g., comments) while doing pattern
1108 matching; may be called repeatedly, to define multiple comment or other
1111 if isinstance( other
, Suppress
):
1112 if other
not in self
.ignoreExprs
:
1113 self
.ignoreExprs
.append( other
)
1115 self
.ignoreExprs
.append( Suppress( other
) )
1118 def setDebugActions( self
, startAction
, successAction
, exceptionAction
):
1119 """Enable display of debugging messages while doing pattern matching."""
1120 self
.debugActions
= (startAction
or _defaultStartDebugAction
,
1121 successAction
or _defaultSuccessDebugAction
,
1122 exceptionAction
or _defaultExceptionDebugAction
)
1126 def setDebug( self
, flag
=True ):
1127 """Enable display of debugging messages while doing pattern matching.
1128 Set flag to True to enable, False to disable."""
1130 self
.setDebugActions( _defaultStartDebugAction
, _defaultSuccessDebugAction
, _defaultExceptionDebugAction
)
1135 def __str__( self
):
1138 def __repr__( self
):
1141 def streamline( self
):
1142 self
.streamlined
= True
1146 def checkRecursion( self
, parseElementList
):
1149 def validate( self
, validateTrace
=[] ):
1150 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1151 self
.checkRecursion( [] )
1153 def parseFile( self
, file_or_filename
):
1154 """Execute the parse expression on the given file or filename.
1155 If a filename is specified (instead of a file object),
1156 the entire file is opened, read, and closed before parsing.
1159 file_contents
= file_or_filename
.read()
1160 except AttributeError:
1161 f
= open(file_or_filename
, "rb")
1162 file_contents
= f
.read()
1164 return self
.parseString(file_contents
)
1166 def getException(self
):
1167 return ParseException("",0,self
.errmsg
,self
)
1169 def __getattr__(self
,aname
):
1170 if aname
== "myException":
1171 self
.myException
= ret
= self
.getException();
1174 raise AttributeError, "no such attribute " + aname
1176 class Token(ParserElement
):
1177 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1178 def __init__( self
):
1179 super(Token
,self
).__init
__( savelist
=False )
1180 #self.myException = ParseException("",0,"",self)
1182 def setName(self
, name
):
1183 s
= super(Token
,self
).setName(name
)
1184 self
.errmsg
= "Expected " + self
.name
1185 #s.myException.msg = self.errmsg
1190 """An empty token, will always match."""
1191 def __init__( self
):
1192 super(Empty
,self
).__init
__()
1194 self
.mayReturnEmpty
= True
1195 self
.mayIndexError
= False
1198 class NoMatch(Token
):
1199 """A token that will never match."""
1200 def __init__( self
):
1201 super(NoMatch
,self
).__init
__()
1202 self
.name
= "NoMatch"
1203 self
.mayReturnEmpty
= True
1204 self
.mayIndexError
= False
1205 self
.errmsg
= "Unmatchable token"
1206 #self.myException.msg = self.errmsg
1208 def parseImpl( self
, instring
, loc
, doActions
=True ):
1209 exc
= self
.myException
1215 class Literal(Token
):
1216 """Token to exactly match a specified string."""
1217 def __init__( self
, matchString
):
1218 super(Literal
,self
).__init
__()
1219 self
.match
= matchString
1220 self
.matchLen
= len(matchString
)
1222 self
.firstMatchChar
= matchString
[0]
1224 warnings
.warn("null string passed to Literal; use Empty() instead",
1225 SyntaxWarning, stacklevel
=2)
1226 self
.__class
__ = Empty
1227 self
.name
= '"%s"' % _ustr(self
.match
)
1228 self
.errmsg
= "Expected " + self
.name
1229 self
.mayReturnEmpty
= False
1230 #self.myException.msg = self.errmsg
1231 self
.mayIndexError
= False
1233 # Performance tuning: this routine gets called a *lot*
1234 # if this is a single character match string and the first character matches,
1235 # short-circuit as quickly as possible, and avoid calling startswith
1237 def parseImpl( self
, instring
, loc
, doActions
=True ):
1238 if (instring
[loc
] == self
.firstMatchChar
and
1239 (self
.matchLen
==1 or instring
.startswith(self
.match
,loc
)) ):
1240 return loc
+self
.matchLen
, self
.match
1241 #~ raise ParseException( instring, loc, self.errmsg )
1242 exc
= self
.myException
1247 class Keyword(Token
):
1248 """Token to exactly match a specified string as a keyword, that is, it must be
1249 immediately followed by a non-keyword character. Compare with Literal::
1250 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1251 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1252 Accepts two optional constructor arguments in addition to the keyword string:
1253 identChars is a string of characters that would be valid identifier characters,
1254 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1255 matching, default is False.
1257 DEFAULT_KEYWORD_CHARS
= alphanums
+"_$"
1259 def __init__( self
, matchString
, identChars
=DEFAULT_KEYWORD_CHARS
, caseless
=False ):
1260 super(Keyword
,self
).__init
__()
1261 self
.match
= matchString
1262 self
.matchLen
= len(matchString
)
1264 self
.firstMatchChar
= matchString
[0]
1266 warnings
.warn("null string passed to Keyword; use Empty() instead",
1267 SyntaxWarning, stacklevel
=2)
1268 self
.name
= '"%s"' % self
.match
1269 self
.errmsg
= "Expected " + self
.name
1270 self
.mayReturnEmpty
= False
1271 #self.myException.msg = self.errmsg
1272 self
.mayIndexError
= False
1273 self
.caseless
= caseless
1275 self
.caselessmatch
= matchString
.upper()
1276 identChars
= identChars
.upper()
1277 self
.identChars
= _str2dict(identChars
)
1279 def parseImpl( self
, instring
, loc
, doActions
=True ):
1281 if ( (instring
[ loc
:loc
+self
.matchLen
].upper() == self
.caselessmatch
) and
1282 (loc
>= len(instring
)-self
.matchLen
or instring
[loc
+self
.matchLen
].upper() not in self
.identChars
) and
1283 (loc
== 0 or instring
[loc
-1].upper() not in self
.identChars
) ):
1284 return loc
+self
.matchLen
, self
.match
1286 if (instring
[loc
] == self
.firstMatchChar
and
1287 (self
.matchLen
==1 or instring
.startswith(self
.match
,loc
)) and
1288 (loc
>= len(instring
)-self
.matchLen
or instring
[loc
+self
.matchLen
] not in self
.identChars
) and
1289 (loc
== 0 or instring
[loc
-1] not in self
.identChars
) ):
1290 return loc
+self
.matchLen
, self
.match
1291 #~ raise ParseException( instring, loc, self.errmsg )
1292 exc
= self
.myException
1298 c
= super(Keyword
,self
).copy()
1299 c
.identChars
= Keyword
.DEFAULT_KEYWORD_CHARS
1302 def setDefaultKeywordChars( chars
):
1303 """Overrides the default Keyword chars
1305 Keyword
.DEFAULT_KEYWORD_CHARS
= chars
1306 setDefaultKeywordChars
= staticmethod(setDefaultKeywordChars
)
1309 class CaselessLiteral(Literal
):
1310 """Token to match a specified string, ignoring case of letters.
1311 Note: the matched results will always be in the case of the given
1312 match string, NOT the case of the input text.
1314 def __init__( self
, matchString
):
1315 super(CaselessLiteral
,self
).__init
__( matchString
.upper() )
1316 # Preserve the defining literal.
1317 self
.returnString
= matchString
1318 self
.name
= "'%s'" % self
.returnString
1319 self
.errmsg
= "Expected " + self
.name
1320 #self.myException.msg = self.errmsg
1322 def parseImpl( self
, instring
, loc
, doActions
=True ):
1323 if instring
[ loc
:loc
+self
.matchLen
].upper() == self
.match
:
1324 return loc
+self
.matchLen
, self
.returnString
1325 #~ raise ParseException( instring, loc, self.errmsg )
1326 exc
= self
.myException
1331 class CaselessKeyword(Keyword
):
1332 def __init__( self
, matchString
, identChars
=Keyword
.DEFAULT_KEYWORD_CHARS
):
1333 super(CaselessKeyword
,self
).__init
__( matchString
, identChars
, caseless
=True )
1335 def parseImpl( self
, instring
, loc
, doActions
=True ):
1336 if ( (instring
[ loc
:loc
+self
.matchLen
].upper() == self
.caselessmatch
) and
1337 (loc
>= len(instring
)-self
.matchLen
or instring
[loc
+self
.matchLen
].upper() not in self
.identChars
) ):
1338 return loc
+self
.matchLen
, self
.match
1339 #~ raise ParseException( instring, loc, self.errmsg )
1340 exc
= self
.myException
1346 """Token for matching words composed of allowed character sets.
1347 Defined with string containing all allowed initial characters,
1348 an optional string containing allowed body characters (if omitted,
1349 defaults to the initial character set), and an optional minimum,
1350 maximum, and/or exact length. The default value for min is 1 (a
1351 minimum value < 1 is not valid); the default values for max and exact
1352 are 0, meaning no maximum or exact length restriction.
1354 def __init__( self
, initChars
, bodyChars
=None, min=1, max=0, exact
=0, asKeyword
=False ):
1355 super(Word
,self
).__init
__()
1356 self
.initCharsOrig
= initChars
1357 self
.initChars
= _str2dict(initChars
)
1359 self
.bodyCharsOrig
= bodyChars
1360 self
.bodyChars
= _str2dict(bodyChars
)
1362 self
.bodyCharsOrig
= initChars
1363 self
.bodyChars
= _str2dict(initChars
)
1365 self
.maxSpecified
= max > 0
1368 raise ValueError, "cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted"
1375 self
.maxLen
= sys
.maxint
1381 self
.name
= _ustr(self
)
1382 self
.errmsg
= "Expected " + self
.name
1383 #self.myException.msg = self.errmsg
1384 self
.mayIndexError
= False
1385 self
.asKeyword
= asKeyword
1387 if ' ' not in self
.initCharsOrig
+self
.bodyCharsOrig
and (min==1 and max==0 and exact
==0):
1388 if self
.bodyCharsOrig
== self
.initCharsOrig
:
1389 self
.reString
= "[%s]+" % _escapeRegexRangeChars(self
.initCharsOrig
)
1390 elif len(self
.bodyCharsOrig
) == 1:
1391 self
.reString
= "%s[%s]*" % \
1392 (re
.escape(self
.initCharsOrig
),
1393 _escapeRegexRangeChars(self
.bodyCharsOrig
),)
1395 self
.reString
= "[%s][%s]*" % \
1396 (_escapeRegexRangeChars(self
.initCharsOrig
),
1397 _escapeRegexRangeChars(self
.bodyCharsOrig
),)
1399 self
.reString
= r
"\b"+self
.reString
+r
"\b"
1401 self
.re
= re
.compile( self
.reString
)
1405 def parseImpl( self
, instring
, loc
, doActions
=True ):
1407 result
= self
.re
.match(instring
,loc
)
1409 exc
= self
.myException
1415 return loc
,result
.group()
1417 if not(instring
[ loc
] in self
.initChars
):
1418 #~ raise ParseException( instring, loc, self.errmsg )
1419 exc
= self
.myException
1425 instrlen
= len(instring
)
1426 bodychars
= self
.bodyChars
1427 maxloc
= start
+ self
.maxLen
1428 maxloc
= min( maxloc
, instrlen
)
1429 while loc
< maxloc
and instring
[loc
] in bodychars
:
1432 throwException
= False
1433 if loc
- start
< self
.minLen
:
1434 throwException
= True
1435 if self
.maxSpecified
and loc
< instrlen
and instring
[loc
] in bodychars
:
1436 throwException
= True
1438 if (start
>0 and instring
[start
-1] in bodychars
) or (loc
<instrlen
and instring
[loc
] in bodychars
):
1439 throwException
= True
1442 #~ raise ParseException( instring, loc, self.errmsg )
1443 exc
= self
.myException
1448 return loc
, instring
[start
:loc
]
1450 def __str__( self
):
1452 return super(Word
,self
).__str
__()
1457 if self
.strRepr
is None:
1465 if ( self
.initCharsOrig
!= self
.bodyCharsOrig
):
1466 self
.strRepr
= "W:(%s,%s)" % ( charsAsStr(self
.initCharsOrig
), charsAsStr(self
.bodyCharsOrig
) )
1468 self
.strRepr
= "W:(%s)" % charsAsStr(self
.initCharsOrig
)
1474 """Token for matching strings that match a given regular expression.
1475 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1477 def __init__( self
, pattern
, flags
=0):
1478 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1479 super(Regex
,self
).__init
__()
1481 if len(pattern
) == 0:
1482 warnings
.warn("null string passed to Regex; use Empty() instead",
1483 SyntaxWarning, stacklevel
=2)
1485 self
.pattern
= pattern
1489 self
.re
= re
.compile(self
.pattern
, self
.flags
)
1490 self
.reString
= self
.pattern
1491 except sre_constants
.error
,e
:
1492 warnings
.warn("invalid pattern (%s) passed to Regex" % pattern
,
1493 SyntaxWarning, stacklevel
=2)
1496 self
.name
= _ustr(self
)
1497 self
.errmsg
= "Expected " + self
.name
1498 #self.myException.msg = self.errmsg
1499 self
.mayIndexError
= False
1500 self
.mayReturnEmpty
= True
1502 def parseImpl( self
, instring
, loc
, doActions
=True ):
1503 result
= self
.re
.match(instring
,loc
)
1505 exc
= self
.myException
1511 d
= result
.groupdict()
1512 ret
= ParseResults(result
.group())
1518 def __str__( self
):
1520 return super(Regex
,self
).__str
__()
1524 if self
.strRepr
is None:
1525 self
.strRepr
= "Re:(%s)" % repr(self
.pattern
)
1530 class QuotedString(Token
):
1531 """Token for matching strings that are delimited by quoting characters.
1533 def __init__( self
, quoteChar
, escChar
=None, escQuote
=None, multiline
=False, unquoteResults
=True, endQuoteChar
=None):
1535 Defined with the following parameters:
1536 - quoteChar - string of one or more characters defining the quote delimiting string
1537 - escChar - character to escape quotes, typically backslash (default=None)
1538 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1539 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1540 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1541 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1543 super(QuotedString
,self
).__init
__()
1545 # remove white space from quote chars - wont work anyway
1546 quoteChar
= quoteChar
.strip()
1547 if len(quoteChar
) == 0:
1548 warnings
.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel
=2)
1551 if endQuoteChar
is None:
1552 endQuoteChar
= quoteChar
1554 endQuoteChar
= endQuoteChar
.strip()
1555 if len(endQuoteChar
) == 0:
1556 warnings
.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel
=2)
1559 self
.quoteChar
= quoteChar
1560 self
.quoteCharLen
= len(quoteChar
)
1561 self
.firstQuoteChar
= quoteChar
[0]
1562 self
.endQuoteChar
= endQuoteChar
1563 self
.endQuoteCharLen
= len(endQuoteChar
)
1564 self
.escChar
= escChar
1565 self
.escQuote
= escQuote
1566 self
.unquoteResults
= unquoteResults
1569 self
.flags
= re
.MULTILINE | re
.DOTALL
1570 self
.pattern
= r
'%s(?:[^%s%s]' % \
1571 ( re
.escape(self
.quoteChar
),
1572 _escapeRegexRangeChars(self
.endQuoteChar
[0]),
1573 (escChar
is not None and _escapeRegexRangeChars(escChar
) or '') )
1576 self
.pattern
= r
'%s(?:[^%s\n\r%s]' % \
1577 ( re
.escape(self
.quoteChar
),
1578 _escapeRegexRangeChars(self
.endQuoteChar
[0]),
1579 (escChar
is not None and _escapeRegexRangeChars(escChar
) or '') )
1580 if len(self
.endQuoteChar
) > 1:
1582 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re
.escape(self
.endQuoteChar
[:i
]),
1583 _escapeRegexRangeChars(self
.endQuoteChar
[i
]))
1584 for i
in range(len(self
.endQuoteChar
)-1,0,-1)]) + ')'
1587 self
.pattern
+= (r
'|(?:%s)' % re
.escape(escQuote
))
1589 self
.pattern
+= (r
'|(?:%s.)' % re
.escape(escChar
))
1590 self
.escCharReplacePattern
= re
.escape(self
.escChar
)+"(.)"
1591 self
.pattern
+= (r
')*%s' % re
.escape(self
.endQuoteChar
))
1594 self
.re
= re
.compile(self
.pattern
, self
.flags
)
1595 self
.reString
= self
.pattern
1596 except sre_constants
.error
,e
:
1597 warnings
.warn("invalid pattern (%s) passed to Regex" % self
.pattern
,
1598 SyntaxWarning, stacklevel
=2)
1601 self
.name
= _ustr(self
)
1602 self
.errmsg
= "Expected " + self
.name
1603 #self.myException.msg = self.errmsg
1604 self
.mayIndexError
= False
1605 self
.mayReturnEmpty
= True
1607 def parseImpl( self
, instring
, loc
, doActions
=True ):
1608 result
= instring
[loc
] == self
.firstQuoteChar
and self
.re
.match(instring
,loc
) or None
1610 exc
= self
.myException
1616 ret
= result
.group()
1618 if self
.unquoteResults
:
1621 ret
= ret
[self
.quoteCharLen
:-self
.endQuoteCharLen
]
1623 if isinstance(ret
,basestring
):
1624 # replace escaped characters
1626 ret
= re
.sub(self
.escCharReplacePattern
,"\g<1>",ret
)
1628 # replace escaped quotes
1630 ret
= ret
.replace(self
.escQuote
, self
.endQuoteChar
)
1634 def __str__( self
):
1636 return super(QuotedString
,self
).__str
__()
1640 if self
.strRepr
is None:
1641 self
.strRepr
= "quoted string, starting with %s ending with %s" % (self
.quoteChar
, self
.endQuoteChar
)
1646 class CharsNotIn(Token
):
1647 """Token for matching words composed of characters *not* in a given set.
1648 Defined with string containing all disallowed characters, and an optional
1649 minimum, maximum, and/or exact length. The default value for min is 1 (a
1650 minimum value < 1 is not valid); the default values for max and exact
1651 are 0, meaning no maximum or exact length restriction.
1653 def __init__( self
, notChars
, min=1, max=0, exact
=0 ):
1654 super(CharsNotIn
,self
).__init
__()
1655 self
.skipWhitespace
= False
1656 self
.notChars
= notChars
1659 raise ValueError, "cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted"
1666 self
.maxLen
= sys
.maxint
1672 self
.name
= _ustr(self
)
1673 self
.errmsg
= "Expected " + self
.name
1674 self
.mayReturnEmpty
= ( self
.minLen
== 0 )
1675 #self.myException.msg = self.errmsg
1676 self
.mayIndexError
= False
1678 def parseImpl( self
, instring
, loc
, doActions
=True ):
1679 if instring
[loc
] in self
.notChars
:
1680 #~ raise ParseException( instring, loc, self.errmsg )
1681 exc
= self
.myException
1688 notchars
= self
.notChars
1689 maxlen
= min( start
+self
.maxLen
, len(instring
) )
1690 while loc
< maxlen
and \
1691 (instring
[loc
] not in notchars
):
1694 if loc
- start
< self
.minLen
:
1695 #~ raise ParseException( instring, loc, self.errmsg )
1696 exc
= self
.myException
1701 return loc
, instring
[start
:loc
]
1703 def __str__( self
):
1705 return super(CharsNotIn
, self
).__str
__()
1709 if self
.strRepr
is None:
1710 if len(self
.notChars
) > 4:
1711 self
.strRepr
= "!W:(%s...)" % self
.notChars
[:4]
1713 self
.strRepr
= "!W:(%s)" % self
.notChars
1718 """Special matching class for matching whitespace. Normally, whitespace is ignored
1719 by pyparsing grammars. This class is included when some whitespace structures
1720 are significant. Define with a string containing the whitespace characters to be
1721 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments,
1722 as defined for the Word class."""
1730 def __init__(self
, ws
=" \t\r\n", min=1, max=0, exact
=0):
1731 super(White
,self
).__init
__()
1732 self
.matchWhite
= ws
1733 self
.setWhitespaceChars( "".join([c
for c
in self
.whiteChars
if c
not in self
.matchWhite
]) )
1734 #~ self.leaveWhitespace()
1735 self
.name
= ("".join([White
.whiteStrs
[c
] for c
in self
.matchWhite
]))
1736 self
.mayReturnEmpty
= True
1737 self
.errmsg
= "Expected " + self
.name
1738 #self.myException.msg = self.errmsg
1745 self
.maxLen
= sys
.maxint
1751 def parseImpl( self
, instring
, loc
, doActions
=True ):
1752 if not(instring
[ loc
] in self
.matchWhite
):
1753 #~ raise ParseException( instring, loc, self.errmsg )
1754 exc
= self
.myException
1760 maxloc
= start
+ self
.maxLen
1761 maxloc
= min( maxloc
, len(instring
) )
1762 while loc
< maxloc
and instring
[loc
] in self
.matchWhite
:
1765 if loc
- start
< self
.minLen
:
1766 #~ raise ParseException( instring, loc, self.errmsg )
1767 exc
= self
.myException
1772 return loc
, instring
[start
:loc
]
1775 class _PositionToken(Token
):
1776 def __init__( self
):
1777 super(_PositionToken
,self
).__init
__()
1778 self
.name
=self
.__class
__.__name
__
1779 self
.mayReturnEmpty
= True
1780 self
.mayIndexError
= False
1782 class GoToColumn(_PositionToken
):
1783 """Token to advance to a specific column of input text; useful for tabular report scraping."""
1784 def __init__( self
, colno
):
1785 super(GoToColumn
,self
).__init
__()
1788 def preParse( self
, instring
, loc
):
1789 if col(loc
,instring
) != self
.col
:
1790 instrlen
= len(instring
)
1791 if self
.ignoreExprs
:
1792 loc
= self
.skipIgnorables( instring
, loc
)
1793 while loc
< instrlen
and instring
[loc
].isspace() and col( loc
, instring
) != self
.col
:
1797 def parseImpl( self
, instring
, loc
, doActions
=True ):
1798 thiscol
= col( loc
, instring
)
1799 if thiscol
> self
.col
:
1800 raise ParseException( instring
, loc
, "Text not in expected column", self
)
1801 newloc
= loc
+ self
.col
- thiscol
1802 ret
= instring
[ loc
: newloc
]
1805 class LineStart(_PositionToken
):
1806 """Matches if current position is at the beginning of a line within the parse string"""
1807 def __init__( self
):
1808 super(LineStart
,self
).__init
__()
1809 self
.setWhitespaceChars( " \t" )
1810 self
.errmsg
= "Expected start of line"
1811 #self.myException.msg = self.errmsg
1813 def preParse( self
, instring
, loc
):
1814 preloc
= super(LineStart
,self
).preParse(instring
,loc
)
1815 if instring
[preloc
] == "\n":
1819 def parseImpl( self
, instring
, loc
, doActions
=True ):
1821 (loc
== self
.preParse( instring
, 0 )) or
1822 (instring
[loc
-1] == "\n") ): #col(loc, instring) != 1:
1823 #~ raise ParseException( instring, loc, "Expected start of line" )
1824 exc
= self
.myException
1830 class LineEnd(_PositionToken
):
1831 """Matches if current position is at the end of a line within the parse string"""
1832 def __init__( self
):
1833 super(LineEnd
,self
).__init
__()
1834 self
.setWhitespaceChars( " \t" )
1835 self
.errmsg
= "Expected end of line"
1836 #self.myException.msg = self.errmsg
1838 def parseImpl( self
, instring
, loc
, doActions
=True ):
1839 if loc
<len(instring
):
1840 if instring
[loc
] == "\n":
1843 #~ raise ParseException( instring, loc, "Expected end of line" )
1844 exc
= self
.myException
1848 elif loc
== len(instring
):
1851 exc
= self
.myException
1856 class StringStart(_PositionToken
):
1857 """Matches if current position is at the beginning of the parse string"""
1858 def __init__( self
):
1859 super(StringStart
,self
).__init
__()
1860 self
.errmsg
= "Expected start of text"
1861 #self.myException.msg = self.errmsg
1863 def parseImpl( self
, instring
, loc
, doActions
=True ):
1865 # see if entire string up to here is just whitespace and ignoreables
1866 if loc
!= self
.preParse( instring
, 0 ):
1867 #~ raise ParseException( instring, loc, "Expected start of text" )
1868 exc
= self
.myException
1874 class StringEnd(_PositionToken
):
1875 """Matches if current position is at the end of the parse string"""
1876 def __init__( self
):
1877 super(StringEnd
,self
).__init
__()
1878 self
.errmsg
= "Expected end of text"
1879 #self.myException.msg = self.errmsg
1881 def parseImpl( self
, instring
, loc
, doActions
=True ):
1882 if loc
< len(instring
):
1883 #~ raise ParseException( instring, loc, "Expected end of text" )
1884 exc
= self
.myException
1888 elif loc
== len(instring
):
1890 elif loc
> len(instring
):
1893 exc
= self
.myException
1899 class ParseExpression(ParserElement
):
1900 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1901 def __init__( self
, exprs
, savelist
= False ):
1902 super(ParseExpression
,self
).__init
__(savelist
)
1903 if isinstance( exprs
, list ):
1905 elif isinstance( exprs
, basestring
):
1906 self
.exprs
= [ Literal( exprs
) ]
1908 self
.exprs
= [ exprs
]
1909 self
.callPreparse
= False
1911 def __getitem__( self
, i
):
1912 return self
.exprs
[i
]
1914 def append( self
, other
):
1915 self
.exprs
.append( other
)
1919 def leaveWhitespace( self
):
1920 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
1921 all contained expressions."""
1922 self
.skipWhitespace
= False
1923 self
.exprs
= [ e
.copy() for e
in self
.exprs
]
1924 for e
in self
.exprs
:
1928 def ignore( self
, other
):
1929 if isinstance( other
, Suppress
):
1930 if other
not in self
.ignoreExprs
:
1931 super( ParseExpression
, self
).ignore( other
)
1932 for e
in self
.exprs
:
1933 e
.ignore( self
.ignoreExprs
[-1] )
1935 super( ParseExpression
, self
).ignore( other
)
1936 for e
in self
.exprs
:
1937 e
.ignore( self
.ignoreExprs
[-1] )
1940 def __str__( self
):
1942 return super(ParseExpression
,self
).__str
__()
1946 if self
.strRepr
is None:
1947 self
.strRepr
= "%s:(%s)" % ( self
.__class
__.__name
__, _ustr(self
.exprs
) )
1950 def streamline( self
):
1951 super(ParseExpression
,self
).streamline()
1953 for e
in self
.exprs
:
1956 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
1957 # but only if there are no parse actions or resultsNames on the nested And's
1958 # (likewise for Or's and MatchFirst's)
1959 if ( len(self
.exprs
) == 2 ):
1960 other
= self
.exprs
[0]
1961 if ( isinstance( other
, self
.__class
__ ) and
1962 not(other
.parseAction
) and
1963 other
.resultsName
is None and
1965 self
.exprs
= other
.exprs
[:] + [ self
.exprs
[1] ]
1967 self
.mayReturnEmpty |
= other
.mayReturnEmpty
1968 self
.mayIndexError |
= other
.mayIndexError
1970 other
= self
.exprs
[-1]
1971 if ( isinstance( other
, self
.__class
__ ) and
1972 not(other
.parseAction
) and
1973 other
.resultsName
is None and
1975 self
.exprs
= self
.exprs
[:-1] + other
.exprs
[:]
1977 self
.mayReturnEmpty |
= other
.mayReturnEmpty
1978 self
.mayIndexError |
= other
.mayIndexError
1982 def setResultsName( self
, name
, listAllMatches
=False ):
1983 ret
= super(ParseExpression
,self
).setResultsName(name
,listAllMatches
)
1986 def validate( self
, validateTrace
=[] ):
1987 tmp
= validateTrace
[:]+[self
]
1988 for e
in self
.exprs
:
1990 self
.checkRecursion( [] )
1992 class And(ParseExpression
):
1993 """Requires all given ParseExpressions to be found in the given order.
1994 Expressions may be separated by whitespace.
1995 May be constructed using the '+' operator.
1997 def __init__( self
, exprs
, savelist
= True ):
1998 super(And
,self
).__init
__(exprs
, savelist
)
1999 self
.mayReturnEmpty
= True
2000 for e
in self
.exprs
:
2001 if not e
.mayReturnEmpty
:
2002 self
.mayReturnEmpty
= False
2004 self
.setWhitespaceChars( exprs
[0].whiteChars
)
2005 self
.skipWhitespace
= exprs
[0].skipWhitespace
2006 self
.callPreparse
= True
2008 def parseImpl( self
, instring
, loc
, doActions
=True ):
2009 # pass False as last arg to _parse for first element, since we already
2010 # pre-parsed the string as part of our And pre-parsing
2011 loc
, resultlist
= self
.exprs
[0]._parse
( instring
, loc
, doActions
, callPreParse
=False )
2012 for e
in self
.exprs
[1:]:
2013 loc
, exprtokens
= e
._parse
( instring
, loc
, doActions
)
2014 if exprtokens
or exprtokens
.keys():
2015 resultlist
+= exprtokens
2016 return loc
, resultlist
2018 def __iadd__(self
, other
):
2019 if isinstance( other
, basestring
):
2020 other
= Literal( other
)
2021 return self
.append( other
) #And( [ self, other ] )
2023 def checkRecursion( self
, parseElementList
):
2024 subRecCheckList
= parseElementList
[:] + [ self
]
2025 for e
in self
.exprs
:
2026 e
.checkRecursion( subRecCheckList
)
2027 if not e
.mayReturnEmpty
:
2030 def __str__( self
):
2031 if hasattr(self
,"name"):
2034 if self
.strRepr
is None:
2035 self
.strRepr
= "{" + " ".join( [ _ustr(e
) for e
in self
.exprs
] ) + "}"
2040 class Or(ParseExpression
):
2041 """Requires that at least one ParseExpression is found.
2042 If two expressions match, the expression that matches the longest string will be used.
2043 May be constructed using the '^' operator.
2045 def __init__( self
, exprs
, savelist
= False ):
2046 super(Or
,self
).__init
__(exprs
, savelist
)
2047 self
.mayReturnEmpty
= False
2048 for e
in self
.exprs
:
2049 if e
.mayReturnEmpty
:
2050 self
.mayReturnEmpty
= True
2053 def parseImpl( self
, instring
, loc
, doActions
=True ):
2056 for e
in self
.exprs
:
2058 loc2
= e
.tryParse( instring
, loc
)
2059 except ParseException
, err
:
2060 if err
.loc
> maxExcLoc
:
2063 except IndexError, err
:
2064 if len(instring
) > maxExcLoc
:
2065 maxException
= ParseException(instring
,len(instring
),e
.errmsg
,self
)
2066 maxExcLoc
= len(instring
)
2068 if loc2
> maxMatchLoc
:
2076 raise ParseException(instring
, loc
, "no defined alternatives to match", self
)
2078 return maxMatchExp
._parse
( instring
, loc
, doActions
)
2080 def __ixor__(self
, other
):
2081 if isinstance( other
, basestring
):
2082 other
= Literal( other
)
2083 return self
.append( other
) #Or( [ self, other ] )
2085 def __str__( self
):
2086 if hasattr(self
,"name"):
2089 if self
.strRepr
is None:
2090 self
.strRepr
= "{" + " ^ ".join( [ _ustr(e
) for e
in self
.exprs
] ) + "}"
2094 def checkRecursion( self
, parseElementList
):
2095 subRecCheckList
= parseElementList
[:] + [ self
]
2096 for e
in self
.exprs
:
2097 e
.checkRecursion( subRecCheckList
)
2100 class MatchFirst(ParseExpression
):
2101 """Requires that at least one ParseExpression is found.
2102 If two expressions match, the first one listed is the one that will match.
2103 May be constructed using the '|' operator.
2105 def __init__( self
, exprs
, savelist
= False ):
2106 super(MatchFirst
,self
).__init
__(exprs
, savelist
)
2108 self
.mayReturnEmpty
= False
2109 for e
in self
.exprs
:
2110 if e
.mayReturnEmpty
:
2111 self
.mayReturnEmpty
= True
2114 self
.mayReturnEmpty
= True
2116 def parseImpl( self
, instring
, loc
, doActions
=True ):
2118 for e
in self
.exprs
:
2120 ret
= e
._parse
( instring
, loc
, doActions
)
2122 except ParseException
, err
:
2123 if err
.loc
> maxExcLoc
:
2126 except IndexError, err
:
2127 if len(instring
) > maxExcLoc
:
2128 maxException
= ParseException(instring
,len(instring
),e
.errmsg
,self
)
2129 maxExcLoc
= len(instring
)
2131 # only got here if no expression matched, raise exception for match that made it the furthest
2136 raise ParseException(instring
, loc
, "no defined alternatives to match", self
)
2138 def __ior__(self
, other
):
2139 if isinstance( other
, basestring
):
2140 other
= Literal( other
)
2141 return self
.append( other
) #MatchFirst( [ self, other ] )
2143 def __str__( self
):
2144 if hasattr(self
,"name"):
2147 if self
.strRepr
is None:
2148 self
.strRepr
= "{" + " | ".join( [ _ustr(e
) for e
in self
.exprs
] ) + "}"
2152 def checkRecursion( self
, parseElementList
):
2153 subRecCheckList
= parseElementList
[:] + [ self
]
2154 for e
in self
.exprs
:
2155 e
.checkRecursion( subRecCheckList
)
2158 class Each(ParseExpression
):
2159 """Requires all given ParseExpressions to be found, but in any order.
2160 Expressions may be separated by whitespace.
2161 May be constructed using the '&' operator.
2163 def __init__( self
, exprs
, savelist
= True ):
2164 super(Each
,self
).__init
__(exprs
, savelist
)
2165 self
.mayReturnEmpty
= True
2166 for e
in self
.exprs
:
2167 if not e
.mayReturnEmpty
:
2168 self
.mayReturnEmpty
= False
2170 self
.skipWhitespace
= True
2171 self
.optionals
= [ e
.expr
for e
in exprs
if isinstance(e
,Optional
) ]
2172 self
.multioptionals
= [ e
.expr
for e
in exprs
if isinstance(e
,ZeroOrMore
) ]
2173 self
.multirequired
= [ e
.expr
for e
in exprs
if isinstance(e
,OneOrMore
) ]
2174 self
.required
= [ e
for e
in exprs
if not isinstance(e
,(Optional
,ZeroOrMore
,OneOrMore
)) ]
2175 self
.required
+= self
.multirequired
2177 def parseImpl( self
, instring
, loc
, doActions
=True ):
2179 tmpReqd
= self
.required
[:]
2180 tmpOpt
= self
.optionals
[:]
2185 tmpExprs
= tmpReqd
+ tmpOpt
+ self
.multioptionals
+ self
.multirequired
2189 tmpLoc
= e
.tryParse( instring
, tmpLoc
)
2190 except ParseException
:
2193 matchOrder
.append(e
)
2198 if len(failed
) == len(tmpExprs
):
2199 keepMatching
= False
2202 missing
= ", ".join( [ _ustr(e
) for e
in tmpReqd
] )
2203 raise ParseException(instring
,loc
,"Missing one or more required elements (%s)" % missing
)
2206 for e
in matchOrder
:
2207 loc
,results
= e
._parse
(instring
,loc
,doActions
)
2208 resultlist
.append(results
)
2210 finalResults
= ParseResults([])
2211 for r
in resultlist
:
2214 if k
in finalResults
.keys():
2215 tmp
= ParseResults(finalResults
[k
])
2216 tmp
+= ParseResults(r
[k
])
2218 finalResults
+= ParseResults(r
)
2219 for k
,v
in dups
.items():
2221 return loc
, finalResults
2223 def __str__( self
):
2224 if hasattr(self
,"name"):
2227 if self
.strRepr
is None:
2228 self
.strRepr
= "{" + " & ".join( [ _ustr(e
) for e
in self
.exprs
] ) + "}"
2232 def checkRecursion( self
, parseElementList
):
2233 subRecCheckList
= parseElementList
[:] + [ self
]
2234 for e
in self
.exprs
:
2235 e
.checkRecursion( subRecCheckList
)
2238 class ParseElementEnhance(ParserElement
):
2239 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2240 def __init__( self
, expr
, savelist
=False ):
2241 super(ParseElementEnhance
,self
).__init
__(savelist
)
2242 if isinstance( expr
, basestring
):
2243 expr
= Literal(expr
)
2246 if expr
is not None:
2247 self
.mayIndexError
= expr
.mayIndexError
2248 self
.mayReturnEmpty
= expr
.mayReturnEmpty
2249 self
.setWhitespaceChars( expr
.whiteChars
)
2250 self
.skipWhitespace
= expr
.skipWhitespace
2251 self
.saveAsList
= expr
.saveAsList
2252 self
.callPreparse
= expr
.callPreparse
2254 def parseImpl( self
, instring
, loc
, doActions
=True ):
2255 if self
.expr
is not None:
2256 return self
.expr
._parse
( instring
, loc
, doActions
, callPreParse
=False )
2258 raise ParseException("",loc
,self
.errmsg
,self
)
2260 def leaveWhitespace( self
):
2261 self
.skipWhitespace
= False
2262 self
.expr
= self
.expr
.copy()
2263 if self
.expr
is not None:
2264 self
.expr
.leaveWhitespace()
2267 def ignore( self
, other
):
2268 if isinstance( other
, Suppress
):
2269 if other
not in self
.ignoreExprs
:
2270 super( ParseElementEnhance
, self
).ignore( other
)
2271 if self
.expr
is not None:
2272 self
.expr
.ignore( self
.ignoreExprs
[-1] )
2274 super( ParseElementEnhance
, self
).ignore( other
)
2275 if self
.expr
is not None:
2276 self
.expr
.ignore( self
.ignoreExprs
[-1] )
2279 def streamline( self
):
2280 super(ParseElementEnhance
,self
).streamline()
2281 if self
.expr
is not None:
2282 self
.expr
.streamline()
2285 def checkRecursion( self
, parseElementList
):
2286 if self
in parseElementList
:
2287 raise RecursiveGrammarException( parseElementList
+[self
] )
2288 subRecCheckList
= parseElementList
[:] + [ self
]
2289 if self
.expr
is not None:
2290 self
.expr
.checkRecursion( subRecCheckList
)
2292 def validate( self
, validateTrace
=[] ):
2293 tmp
= validateTrace
[:]+[self
]
2294 if self
.expr
is not None:
2295 self
.expr
.validate(tmp
)
2296 self
.checkRecursion( [] )
2298 def __str__( self
):
2300 return super(ParseElementEnhance
,self
).__str
__()
2304 if self
.strRepr
is None and self
.expr
is not None:
2305 self
.strRepr
= "%s:(%s)" % ( self
.__class
__.__name
__, _ustr(self
.expr
) )
2309 class FollowedBy(ParseElementEnhance
):
2310 """Lookahead matching of the given parse expression. FollowedBy
2311 does *not* advance the parsing position within the input string, it only
2312 verifies that the specified parse expression matches at the current
2313 position. FollowedBy always returns a null token list."""
2314 def __init__( self
, expr
):
2315 super(FollowedBy
,self
).__init
__(expr
)
2316 self
.mayReturnEmpty
= True
2318 def parseImpl( self
, instring
, loc
, doActions
=True ):
2319 self
.expr
.tryParse( instring
, loc
)
2323 class NotAny(ParseElementEnhance
):
2324 """Lookahead to disallow matching with the given parse expression. NotAny
2325 does *not* advance the parsing position within the input string, it only
2326 verifies that the specified parse expression does *not* match at the current
2327 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2328 always returns a null token list. May be constructed using the '~' operator."""
2329 def __init__( self
, expr
):
2330 super(NotAny
,self
).__init
__(expr
)
2331 #~ self.leaveWhitespace()
2332 self
.skipWhitespace
= False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2333 self
.mayReturnEmpty
= True
2334 self
.errmsg
= "Found unwanted token, "+_ustr(self
.expr
)
2335 #self.myException = ParseException("",0,self.errmsg,self)
2337 def parseImpl( self
, instring
, loc
, doActions
=True ):
2339 self
.expr
.tryParse( instring
, loc
)
2340 except (ParseException
,IndexError):
2343 #~ raise ParseException(instring, loc, self.errmsg )
2344 exc
= self
.myException
2350 def __str__( self
):
2351 if hasattr(self
,"name"):
2354 if self
.strRepr
is None:
2355 self
.strRepr
= "~{" + _ustr(self
.expr
) + "}"
2360 class ZeroOrMore(ParseElementEnhance
):
2361 """Optional repetition of zero or more of the given expression."""
2362 def __init__( self
, expr
):
2363 super(ZeroOrMore
,self
).__init
__(expr
)
2364 self
.mayReturnEmpty
= True
2366 def parseImpl( self
, instring
, loc
, doActions
=True ):
2369 loc
, tokens
= self
.expr
._parse
( instring
, loc
, doActions
, callPreParse
=False )
2370 hasIgnoreExprs
= ( len(self
.ignoreExprs
) > 0 )
2373 preloc
= self
.skipIgnorables( instring
, loc
)
2376 loc
, tmptokens
= self
.expr
._parse
( instring
, preloc
, doActions
)
2377 if tmptokens
or tmptokens
.keys():
2379 except (ParseException
,IndexError):
2384 def __str__( self
):
2385 if hasattr(self
,"name"):
2388 if self
.strRepr
is None:
2389 self
.strRepr
= "[" + _ustr(self
.expr
) + "]..."
2393 def setResultsName( self
, name
, listAllMatches
=False ):
2394 ret
= super(ZeroOrMore
,self
).setResultsName(name
,listAllMatches
)
2395 ret
.saveAsList
= True
2399 class OneOrMore(ParseElementEnhance
):
2400 """Repetition of one or more of the given expression."""
2401 def parseImpl( self
, instring
, loc
, doActions
=True ):
2402 # must be at least one
2403 loc
, tokens
= self
.expr
._parse
( instring
, loc
, doActions
, callPreParse
=False )
2405 hasIgnoreExprs
= ( len(self
.ignoreExprs
) > 0 )
2408 preloc
= self
.skipIgnorables( instring
, loc
)
2411 loc
, tmptokens
= self
.expr
._parse
( instring
, preloc
, doActions
)
2412 if tmptokens
or tmptokens
.keys():
2414 except (ParseException
,IndexError):
2419 def __str__( self
):
2420 if hasattr(self
,"name"):
2423 if self
.strRepr
is None:
2424 self
.strRepr
= "{" + _ustr(self
.expr
) + "}..."
2428 def setResultsName( self
, name
, listAllMatches
=False ):
2429 ret
= super(OneOrMore
,self
).setResultsName(name
,listAllMatches
)
2430 ret
.saveAsList
= True
2433 class _NullToken(object):
2439 _optionalNotMatched
= _NullToken()
2440 class Optional(ParseElementEnhance
):
2441 """Optional matching of the given expression.
2442 A default return string can also be specified, if the optional expression
2445 def __init__( self
, exprs
, default
=_optionalNotMatched
):
2446 super(Optional
,self
).__init
__( exprs
, savelist
=False )
2447 self
.defaultValue
= default
2448 self
.mayReturnEmpty
= True
2450 def parseImpl( self
, instring
, loc
, doActions
=True ):
2452 loc
, tokens
= self
.expr
._parse
( instring
, loc
, doActions
, callPreParse
=False )
2453 except (ParseException
,IndexError):
2454 if self
.defaultValue
is not _optionalNotMatched
:
2455 tokens
= [ self
.defaultValue
]
2460 def __str__( self
):
2461 if hasattr(self
,"name"):
2464 if self
.strRepr
is None:
2465 self
.strRepr
= "[" + _ustr(self
.expr
) + "]"
2470 class SkipTo(ParseElementEnhance
):
2471 """Token for skipping over all undefined text until the matched expression is found.
2472 If include is set to true, the matched expression is also consumed. The ignore
2473 argument is used to define grammars (typically quoted strings and comments) that
2474 might contain false matches.
2476 def __init__( self
, other
, include
=False, ignore
=None ):
2477 super( SkipTo
, self
).__init
__( other
)
2478 if ignore
is not None:
2479 self
.expr
= self
.expr
.copy()
2480 self
.expr
.ignore(ignore
)
2481 self
.mayReturnEmpty
= True
2482 self
.mayIndexError
= False
2483 self
.includeMatch
= include
2485 self
.errmsg
= "No match found for "+_ustr(self
.expr
)
2486 #self.myException = ParseException("",0,self.errmsg,self)
2488 def parseImpl( self
, instring
, loc
, doActions
=True ):
2490 instrlen
= len(instring
)
2492 while loc
<= instrlen
:
2494 loc
= expr
.skipIgnorables( instring
, loc
)
2495 expr
._parse
( instring
, loc
, doActions
=False, callPreParse
=False )
2496 if self
.includeMatch
:
2497 skipText
= instring
[startLoc
:loc
]
2498 loc
,mat
= expr
._parse
(instring
,loc
,doActions
,callPreParse
=False)
2500 skipRes
= ParseResults( skipText
)
2502 return loc
, [ skipRes
]
2504 return loc
, [ skipText
]
2506 return loc
, [ instring
[startLoc
:loc
] ]
2507 except (ParseException
,IndexError):
2509 exc
= self
.myException
2514 class Forward(ParseElementEnhance
):
2515 """Forward declaration of an expression to be defined later -
2516 used for recursive grammars, such as algebraic infix notation.
2517 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2519 Note: take care when assigning to Forward not to overlook precedence of operators.
2520 Specifically, '|' has a lower precedence than '<<', so that::
2521 fwdExpr << a | b | c
2522 will actually be evaluated as::
2523 (fwdExpr << a) | b | c
2524 thereby leaving b and c out as parseable alternatives. It is recommended that you
2525 explicitly group the values inserted into the Forward::
2526 fwdExpr << (a | b | c)
2528 def __init__( self
, other
=None ):
2529 super(Forward
,self
).__init
__( other
, savelist
=False )
2531 def __lshift__( self
, other
):
2532 if isinstance( other
, basestring
):
2533 other
= Literal(other
)
2535 self
.mayReturnEmpty
= other
.mayReturnEmpty
2537 self
.mayIndexError
= self
.expr
.mayIndexError
2538 self
.mayReturnEmpty
= self
.expr
.mayReturnEmpty
2539 self
.setWhitespaceChars( self
.expr
.whiteChars
)
2540 self
.skipWhitespace
= self
.expr
.skipWhitespace
2541 self
.saveAsList
= self
.expr
.saveAsList
2544 def leaveWhitespace( self
):
2545 self
.skipWhitespace
= False
2548 def streamline( self
):
2549 if not self
.streamlined
:
2550 self
.streamlined
= True
2551 if self
.expr
is not None:
2552 self
.expr
.streamline()
2555 def validate( self
, validateTrace
=[] ):
2556 if self
not in validateTrace
:
2557 tmp
= validateTrace
[:]+[self
]
2558 if self
.expr
is not None:
2559 self
.expr
.validate(tmp
)
2560 self
.checkRecursion([])
2562 def __str__( self
):
2563 if hasattr(self
,"name"):
2566 self
.__class
__ = _ForwardNoRecurse
2568 if self
.expr
is not None:
2569 retString
= _ustr(self
.expr
)
2573 self
.__class
__ = Forward
2574 return "Forward: "+retString
2577 if self
.expr
is not None:
2578 return super(Forward
,self
).copy()
2584 class _ForwardNoRecurse(Forward
):
2585 def __str__( self
):
2588 class TokenConverter(ParseElementEnhance
):
2589 """Abstract subclass of ParseExpression, for converting parsed results."""
2590 def __init__( self
, expr
, savelist
=False ):
2591 super(TokenConverter
,self
).__init
__( expr
)#, savelist )
2592 self
.saveAsList
= False
2594 class Upcase(TokenConverter
):
2595 """Converter to upper case all matching tokens."""
2596 def __init__(self
, *args
):
2597 super(Upcase
,self
).__init
__(*args
)
2598 warnings
.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2599 DeprecationWarning,stacklevel
=2)
2601 def postParse( self
, instring
, loc
, tokenlist
):
2602 return map( string
.upper
, tokenlist
)
2605 class Combine(TokenConverter
):
2606 """Converter to concatenate all matching tokens to a single string.
2607 By default, the matching patterns must also be contiguous in the input string;
2608 this can be disabled by specifying 'adjacent=False' in the constructor.
2610 def __init__( self
, expr
, joinString
="", adjacent
=True ):
2611 super(Combine
,self
).__init
__( expr
)
2612 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2614 self
.leaveWhitespace()
2615 self
.adjacent
= adjacent
2616 self
.skipWhitespace
= True
2617 self
.joinString
= joinString
2619 def ignore( self
, other
):
2621 ParserElement
.ignore(self
, other
)
2623 super( Combine
, self
).ignore( other
)
2626 def postParse( self
, instring
, loc
, tokenlist
):
2627 retToks
= tokenlist
.copy()
2629 retToks
+= ParseResults([ "".join(tokenlist
._asStringList
(self
.joinString
)) ], modal
=self
.modalResults
)
2631 if self
.resultsName
and len(retToks
.keys())>0:
2636 class Group(TokenConverter
):
2637 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
2638 def __init__( self
, expr
):
2639 super(Group
,self
).__init
__( expr
)
2640 self
.saveAsList
= True
2642 def postParse( self
, instring
, loc
, tokenlist
):
2643 return [ tokenlist
]
2645 class Dict(TokenConverter
):
2646 """Converter to return a repetitive expression as a list, but also as a dictionary.
2647 Each element can also be referenced using the first token in the expression as its key.
2648 Useful for tabular report scraping when the first column can be used as a item key.
2650 def __init__( self
, exprs
):
2651 super(Dict
,self
).__init
__( exprs
)
2652 self
.saveAsList
= True
2654 def postParse( self
, instring
, loc
, tokenlist
):
2655 for i
,tok
in enumerate(tokenlist
):
2659 if isinstance(ikey
,int):
2660 ikey
= _ustr(tok
[0]).strip()
2662 tokenlist
[ikey
] = _ParseResultsWithOffset("",i
)
2663 elif len(tok
)==2 and not isinstance(tok
[1],ParseResults
):
2664 tokenlist
[ikey
] = _ParseResultsWithOffset(tok
[1],i
)
2666 dictvalue
= tok
.copy() #ParseResults(i)
2668 if len(dictvalue
)!= 1 or (isinstance(dictvalue
,ParseResults
) and dictvalue
.keys()):
2669 tokenlist
[ikey
] = _ParseResultsWithOffset(dictvalue
,i
)
2671 tokenlist
[ikey
] = _ParseResultsWithOffset(dictvalue
[0],i
)
2673 if self
.resultsName
:
2674 return [ tokenlist
]
2679 class Suppress(TokenConverter
):
2680 """Converter for ignoring the results of a parsed expression."""
2681 def postParse( self
, instring
, loc
, tokenlist
):
2684 def suppress( self
):
2688 class OnlyOnce(object):
2689 """Wrapper for parse actions, to ensure they are only called once."""
2690 def __init__(self
, methodCall
):
2691 self
.callable = ParserElement
.normalizeParseActionArgs(methodCall
)
2693 def __call__(self
,s
,l
,t
):
2695 results
= self
.callable(s
,l
,t
)
2698 raise ParseException(s
,l
,"")
2702 def traceParseAction(f
):
2703 """Decorator for debugging parse actions."""
2704 f
= ParserElement
.normalizeParseActionArgs(f
)
2706 thisFunc
= f
.func_name
2709 thisFunc
= paArgs
[0].__class
__.__name
__ + '.' + thisFunc
2710 sys
.stderr
.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc
,line(l
,s
),l
,t
) )
2713 except Exception, exc
:
2714 sys
.stderr
.write( "<<leaving %s (exception: %s)\n" % (thisFunc
,exc
) )
2716 sys
.stderr
.write( "<<leaving %s (ret: %s)\n" % (thisFunc
,ret
) )
2719 z
.__name
__ = f
.__name
__
2720 except AttributeError:
2727 def delimitedList( expr
, delim
=",", combine
=False ):
2728 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
2729 By default, the list elements and delimiters can have intervening whitespace, and
2730 comments, but this can be overridden by passing 'combine=True' in the constructor.
2731 If combine is set to True, the matching tokens are returned as a single token
2732 string, with the delimiters included; otherwise, the matching tokens are returned
2733 as a list of tokens, with the delimiters suppressed.
2735 dlName
= _ustr(expr
)+" ["+_ustr(delim
)+" "+_ustr(expr
)+"]..."
2737 return Combine( expr
+ ZeroOrMore( delim
+ expr
) ).setName(dlName
)
2739 return ( expr
+ ZeroOrMore( Suppress( delim
) + expr
) ).setName(dlName
)
2741 def countedArray( expr
):
2742 """Helper to define a counted list of expressions.
2743 This helper defines a pattern of the form::
2744 integer expr expr expr...
2745 where the leading integer tells how many expr expressions follow.
2746 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
2748 arrayExpr
= Forward()
2749 def countFieldParseAction(s
,l
,t
):
2751 arrayExpr
<< (n
and Group(And([expr
]*n
)) or Group(empty
))
2753 return ( Word(nums
).setName("arrayLen").setParseAction(countFieldParseAction
, callDuringTry
=True) + arrayExpr
)
2756 if type(L
) is not list: return [L
]
2757 if L
== []: return L
2758 return _flatten(L
[0]) + _flatten(L
[1:])
2760 def matchPreviousLiteral(expr
):
2761 """Helper to define an expression that is indirectly defined from
2762 the tokens matched in a previous expression, that is, it looks
2763 for a 'repeat' of a previous expression. For example::
2765 second = matchPreviousLiteral(first)
2766 matchExpr = first + ":" + second
2767 will match "1:1", but not "1:2". Because this matches a
2768 previous literal, will also match the leading "1:1" in "1:10".
2769 If this is not desired, use matchPreviousExpr.
2770 Do *not* use with packrat parsing enabled.
2773 def copyTokenToRepeater(s
,l
,t
):
2779 tflat
= _flatten(t
.asList())
2780 rep
<< And( [ Literal(tt
) for tt
in tflat
] )
2783 expr
.addParseAction(copyTokenToRepeater
, callDuringTry
=True)
2786 def matchPreviousExpr(expr
):
2787 """Helper to define an expression that is indirectly defined from
2788 the tokens matched in a previous expression, that is, it looks
2789 for a 'repeat' of a previous expression. For example::
2791 second = matchPreviousExpr(first)
2792 matchExpr = first + ":" + second
2793 will match "1:1", but not "1:2". Because this matches by
2794 expressions, will *not* match the leading "1:1" in "1:10";
2795 the expressions are evaluated first, and then compared, so
2796 "1" is compared with "10".
2797 Do *not* use with packrat parsing enabled.
2802 def copyTokenToRepeater(s
,l
,t
):
2803 matchTokens
= _flatten(t
.asList())
2804 def mustMatchTheseTokens(s
,l
,t
):
2805 theseTokens
= _flatten(t
.asList())
2806 if theseTokens
!= matchTokens
:
2807 raise ParseException("",0,"")
2808 rep
.setParseAction( mustMatchTheseTokens
, callDuringTry
=True )
2809 expr
.addParseAction(copyTokenToRepeater
, callDuringTry
=True)
2812 def _escapeRegexRangeChars(s
):
2813 #~ escape these chars: ^-]
2815 s
= s
.replace(c
,"\\"+c
)
2816 s
= s
.replace("\n",r
"\n")
2817 s
= s
.replace("\t",r
"\t")
2820 def oneOf( strs
, caseless
=False, useRegex
=True ):
2821 """Helper to quickly define a set of alternative Literals, and makes sure to do
2822 longest-first testing when there is a conflict, regardless of the input order,
2823 but returns a MatchFirst for best performance.
2826 - strs - a string of space-delimited literals, or a list of string literals
2827 - caseless - (default=False) - treat all literals as caseless
2828 - useRegex - (default=True) - as an optimization, will generate a Regex
2829 object; otherwise, will generate a MatchFirst object (if caseless=True, or
2830 if creating a Regex raises an exception)
2833 isequal
= ( lambda a
,b
: a
.upper() == b
.upper() )
2834 masks
= ( lambda a
,b
: b
.upper().startswith(a
.upper()) )
2835 parseElementClass
= CaselessLiteral
2837 isequal
= ( lambda a
,b
: a
== b
)
2838 masks
= ( lambda a
,b
: b
.startswith(a
) )
2839 parseElementClass
= Literal
2841 if isinstance(strs
,(list,tuple)):
2843 elif isinstance(strs
,basestring
):
2844 symbols
= strs
.split()
2846 warnings
.warn("Invalid argument to oneOf, expected string or list",
2847 SyntaxWarning, stacklevel
=2)
2850 while i
< len(symbols
)-1:
2852 for j
,other
in enumerate(symbols
[i
+1:]):
2853 if ( isequal(other
, cur
) ):
2856 elif ( masks(cur
, other
) ):
2858 symbols
.insert(i
,other
)
2864 if not caseless
and useRegex
:
2865 #~ print strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )
2867 if len(symbols
)==len("".join(symbols
)):
2868 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym
) for sym
in symbols
] ) )
2870 return Regex( "|".join( [ re
.escape(sym
) for sym
in symbols
] ) )
2872 warnings
.warn("Exception creating Regex for oneOf, building MatchFirst",
2873 SyntaxWarning, stacklevel
=2)
2876 # last resort, just use MatchFirst
2877 return MatchFirst( [ parseElementClass(sym
) for sym
in symbols
] )
2879 def dictOf( key
, value
):
2880 """Helper to easily and clearly define a dictionary by specifying the respective patterns
2881 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
2882 in the proper order. The key pattern can include delimiting markers or punctuation,
2883 as long as they are suppressed, thereby leaving the significant key text. The value
2884 pattern can include named results, so that the Dict results can include named token
2887 return Dict( ZeroOrMore( Group ( key
+ value
) ) )
2890 printables
= "".join( [ c
for c
in string
.printable
if c
not in string
.whitespace
] )
2892 # convenience constants for positional expressions
2893 empty
= Empty().setName("empty")
2894 lineStart
= LineStart().setName("lineStart")
2895 lineEnd
= LineEnd().setName("lineEnd")
2896 stringStart
= StringStart().setName("stringStart")
2897 stringEnd
= StringEnd().setName("stringEnd")
2899 _escapedPunc
= Word( _bslash
, r
"\[]-*.$+^?()~ ", exact
=2 ).setParseAction(lambda s
,l
,t
:t
[0][1])
2900 _printables_less_backslash
= "".join([ c
for c
in printables
if c
not in r
"\]" ])
2901 _escapedHexChar
= Combine( Suppress(_bslash
+ "0x") + Word(hexnums
) ).setParseAction(lambda s
,l
,t
:unichr(int(t
[0],16)))
2902 _escapedOctChar
= Combine( Suppress(_bslash
) + Word("0","01234567") ).setParseAction(lambda s
,l
,t
:unichr(int(t
[0],8)))
2903 _singleChar
= _escapedPunc | _escapedHexChar | _escapedOctChar |
Word(_printables_less_backslash
,exact
=1)
2904 _charRange
= Group(_singleChar
+ Suppress("-") + _singleChar
)
2905 _reBracketExpr
= Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar
) ).setResultsName("body") + "]"
2907 _expanded
= lambda p
: (isinstance(p
,ParseResults
) and ''.join([ unichr(c
) for c
in range(ord(p
[0]),ord(p
[1])+1) ]) or p
)
2910 r
"""Helper to easily define string ranges for use in Word construction. Borrows
2911 syntax from regexp '[]' string range definitions::
2912 srange("[0-9]") -> "0123456789"
2913 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
2914 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
2915 The input string must be enclosed in []'s, and the returned string is the expanded
2916 character set joined into a single string.
2917 The values enclosed in the []'s may be::
2919 an escaped character with a leading backslash (such as \- or \])
2920 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
2921 an escaped octal character with a leading '\0' (\041, which is a '!' character)
2922 a range of any of the above, separated by a dash ('a-z', etc.)
2923 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
2926 return "".join([_expanded(part
) for part
in _reBracketExpr
.parseString(s
).body
])
2930 def replaceWith(replStr
):
2931 """Helper method for common parse actions that simply return a literal value. Especially
2932 useful when used with transformString().
2934 def _replFunc(*args
):
2938 def removeQuotes(s
,l
,t
):
2939 """Helper parse action for removing quotation marks from parsed quoted strings.
2940 To use, add this parse action to quoted string using::
2941 quotedString.setParseAction( removeQuotes )
2945 def upcaseTokens(s
,l
,t
):
2946 """Helper parse action to convert tokens to upper case."""
2947 return [ tt
.upper() for tt
in map(_ustr
,t
) ]
2949 def downcaseTokens(s
,l
,t
):
2950 """Helper parse action to convert tokens to lower case."""
2951 return [ tt
.lower() for tt
in map(_ustr
,t
) ]
2953 def keepOriginalText(s
,startLoc
,t
):
2954 """Helper parse action to preserve original parsed text,
2955 overriding any nested parse actions."""
2957 endloc
= getTokensEndLoc()
2958 except ParseException
:
2959 raise ParseFatalException
, "incorrect usage of keepOriginalText - may only be called as a parse action"
2961 t
+= ParseResults(s
[startLoc
:endloc
])
2964 def getTokensEndLoc():
2965 """Method to be called from within a parse action to determine the end
2966 location of the parsed tokens."""
2968 fstack
= inspect
.stack()
2970 # search up the stack (through intervening argument normalizers) for correct calling routine
2971 for f
in fstack
[2:]:
2972 if f
[3] == "_parseNoCache":
2973 endloc
= f
[0].f_locals
["loc"]
2976 raise ParseFatalException
, "incorrect usage of getTokensEndLoc - may only be called from within a parse action"
2980 def _makeTags(tagStr
, xml
):
2981 """Internal helper to construct opening and closing tag expressions, given a tag name"""
2982 if isinstance(tagStr
,basestring
):
2984 tagStr
= Keyword(tagStr
, caseless
=not xml
)
2986 resname
= tagStr
.name
2988 tagAttrName
= Word(alphas
,alphanums
+"_-:")
2990 tagAttrValue
= dblQuotedString
.copy().setParseAction( removeQuotes
)
2991 openTag
= Suppress("<") + tagStr
+ \
2992 Dict(ZeroOrMore(Group( tagAttrName
+ Suppress("=") + tagAttrValue
))) + \
2993 Optional("/",default
=[False]).setResultsName("empty").setParseAction(lambda s
,l
,t
:t
[0]=='/') + Suppress(">")
2995 printablesLessRAbrack
= "".join( [ c
for c
in printables
if c
not in ">" ] )
2996 tagAttrValue
= quotedString
.copy().setParseAction( removeQuotes
) |
Word(printablesLessRAbrack
)
2997 openTag
= Suppress("<") + tagStr
+ \
2998 Dict(ZeroOrMore(Group( tagAttrName
.setParseAction(downcaseTokens
) + \
2999 Optional( Suppress("=") + tagAttrValue
) ))) + \
3000 Optional("/",default
=[False]).setResultsName("empty").setParseAction(lambda s
,l
,t
:t
[0]=='/') + Suppress(">")
3001 closeTag
= Combine("</" + tagStr
+ ">")
3003 openTag
= openTag
.setResultsName("start"+"".join(resname
.replace(":"," ").title().split())).setName("<%s>" % tagStr
)
3004 closeTag
= closeTag
.setResultsName("end"+"".join(resname
.replace(":"," ").title().split())).setName("</%s>" % tagStr
)
3006 return openTag
, closeTag
3008 def makeHTMLTags(tagStr
):
3009 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3010 return _makeTags( tagStr
, False )
3012 def makeXMLTags(tagStr
):
3013 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3014 return _makeTags( tagStr
, True )
3016 def withAttribute(*args
,**attrDict
):
3017 """Helper to create a validating parse action to be used with start tags created
3018 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3019 with a required attribute value, to avoid false matches on common tags such as
3022 Call withAttribute with a series of attribute names and values. Specify the list
3023 of filter attributes names and values as:
3024 - keyword arguments, as in (class="Customer",align="right"), or
3025 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3026 For attribute names with a namespace prefix, you must use the second form. Attribute
3027 names are matched insensitive to upper/lower case.
3032 attrs
= attrDict
.items()
3033 attrs
= [(k
.lower(),v
) for k
,v
in attrs
]
3035 for attrName
,attrValue
in attrs
:
3036 if attrName
not in tokens
:
3037 raise ParseException(s
,l
,"no matching attribute " + attrName
)
3038 if tokens
[attrName
] != attrValue
:
3039 raise ParseException(s
,l
,"attribute '%s' has value '%s', must be '%s'" %
3040 (attrName
, tokens
[attrName
], attrValue
))
3043 opAssoc
= _Constants()
3044 opAssoc
.LEFT
= object()
3045 opAssoc
.RIGHT
= object()
3047 def _flattenOpPrecTokens(tokens
):
3048 if isinstance(tokens
,ParseResults
):
3050 if isinstance(tokens
[0],ParseResults
):
3051 return _flattenOpPrecTokens(tokens
[0])
3054 return map(_flattenOpPrecTokens
,tokens
)
3057 def operatorPrecedence( baseExpr
, opList
):
3058 """Helper method for constructing grammars of expressions made up of
3059 operators working in a precedence hierarchy. Operators may be unary or
3060 binary, left- or right-associative. Parse actions can also be attached
3061 to operator expressions.
3064 - baseExpr - expression representing the most basic element for the nested
3065 - opList - list of tuples, one for each operator precedence level in the
3066 expression grammar; each tuple is of the form
3067 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3068 - opExpr is the pyparsing expression for the operator;
3069 may also be a string, which will be converted to a Literal
3070 - numTerms is the number of terms for this operator (must
3072 - rightLeftAssoc is the indicator whether the operator is
3073 right or left associative, using the pyparsing-defined
3074 constants opAssoc.RIGHT and opAssoc.LEFT.
3075 - parseAction is the parse action to be associated with
3076 expressions matching this operator expression (the
3077 parse action tuple member may be omitted)
3080 lastExpr
= baseExpr |
( Suppress('(') + ret
+ Suppress(')') )
3081 for i
,operDef
in enumerate(opList
):
3082 opExpr
,arity
,rightLeftAssoc
,pa
= (operDef
+ (None,))[:4]
3083 thisExpr
= Forward()#.setName("expr%d" % i)
3084 if rightLeftAssoc
== opAssoc
.LEFT
:
3086 matchExpr
= Group( lastExpr
+ ZeroOrMore( opExpr
) )
3088 matchExpr
= Group( lastExpr
+ ZeroOrMore( opExpr
+ lastExpr
) )
3090 raise ValueError, "operator must be unary (1) or binary (2)"
3091 elif rightLeftAssoc
== opAssoc
.RIGHT
:
3093 # try to avoid LR with this extra test
3094 if not isinstance(opExpr
, Optional
):
3095 opExpr
= Optional(opExpr
)
3096 matchExpr
= FollowedBy(opExpr
.expr
+ thisExpr
) + Group( opExpr
+ thisExpr
)
3097 matchExpr |
= lastExpr
3099 matchExpr
= Group( lastExpr
+ ZeroOrMore( opExpr
+ thisExpr
) )
3101 raise ValueError, "operator must be unary (1) or binary (2)"
3103 raise ValueError, "operator must indicate right or left associativity"
3105 matchExpr
.setParseAction( pa
)
3106 thisExpr
<< ( matchExpr
)
3109 ret
.setParseAction(_flattenOpPrecTokens
)
3112 dblQuotedString
= Regex(r
'"(?:[^"\n\r\\]|(?:"")|(?:\\.))*"').setName("string enclosed in double quotes")
3113 sglQuotedString
= Regex(r
"'(?:[^'\n\r\\]|(?:'')|(?:\\.))*'").setName("string enclosed in single quotes")
3114 quotedString
= Regex(r
'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\.))*')''').setName("quotedString using single or double quotes")
3116 def nestedExpr(opener
="(", closer
=")", content
=None, ignoreExpr
=quotedString
):
3117 """Helper method for defining nested lists enclosed in opening and closing
3118 delimiters ("(" and ")" are the default).
3121 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3122 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3123 - content - expression for items within the nested lists (default=None)
3124 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3126 If an expression is not provided for the content argument, the nested
3127 expression will capture all whitespace-delimited content between delimiters
3128 as a list of separate values.
3130 Use the ignoreExpr argument to define expressions that may contain
3131 opening or closing characters that should not be treated as opening
3132 or closing characters for nesting, such as quotedString or a comment
3133 expression. Specify multiple expressions using an Or or MatchFirst.
3134 The default is quotedString, but if no expressions are to be ignored,
3135 then pass None for this argument.
3137 if opener
== closer
:
3138 raise ValueError("opening and closing strings cannot be the same")
3140 if isinstance(opener
,basestring
) and isinstance(closer
,basestring
):
3141 content
= (empty
+CharsNotIn(opener
+closer
+ParserElement
.DEFAULT_WHITE_CHARS
).setParseAction(lambda t
:t
[0].strip()))
3143 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3145 if ignoreExpr
is not None:
3146 ret
<< ZeroOrMore( ignoreExpr | content |
Group( Suppress(opener
) + ret
+ Suppress(closer
) ) )
3148 ret
<< ZeroOrMore( content |
Group( Suppress(opener
) + ret
+ Suppress(closer
) ) )
3151 alphas8bit
= srange(r
"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3152 punc8bit
= srange(r
"[\0xa1-\0xbf\0xd7\0xf7]")
3154 anyOpenTag
,anyCloseTag
= makeHTMLTags(Word(alphas
,alphanums
+"_:"))
3155 commonHTMLEntity
= Combine("&" + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";")
3156 _htmlEntityMap
= dict(zip("gt lt amp nbsp quot".split(),"><& '"))
3157 replaceHTMLEntity
= lambda t
: t
.entity
in _htmlEntityMap
and _htmlEntityMap
[t
.entity
] or None
3159 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3160 cStyleComment
= Regex(r
"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3162 htmlComment
= Regex(r
"<!--[\s\S]*?-->")
3163 restOfLine
= Regex(r
".*").leaveWhitespace()
3164 dblSlashComment
= Regex(r
"\/\/(\\\n|.)*").setName("// comment")
3165 cppStyleComment
= Regex(r
"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3167 javaStyleComment
= cppStyleComment
3168 pythonStyleComment
= Regex(r
"#.*").setName("Python style comment")
3169 _noncomma
= "".join( [ c
for c
in printables
if c
!= "," ] )
3170 _commasepitem
= Combine(OneOrMore(Word(_noncomma
) +
3171 Optional( Word(" \t") +
3172 ~
Literal(",") + ~
LineEnd() ) ) ).streamline().setName("commaItem")
3173 commaSeparatedList
= delimitedList( Optional( quotedString | _commasepitem
, default
="") ).setName("commaSeparatedList")
3176 if __name__
== "__main__":
3178 def test( teststring
):
3179 print teststring
,"->",
3181 tokens
= simpleSQL
.parseString( teststring
)
3182 tokenlist
= tokens
.asList()
3184 print "tokens = ", tokens
3185 print "tokens.columns =", tokens
.columns
3186 print "tokens.tables =", tokens
.tables
3187 print tokens
.asXML("SQL",True)
3188 except ParseException
, err
:
3190 print " "*(err
.column
-1) + "^"
3194 selectToken
= CaselessLiteral( "select" )
3195 fromToken
= CaselessLiteral( "from" )
3197 ident
= Word( alphas
, alphanums
+ "_$" )
3198 columnName
= delimitedList( ident
, ".", combine
=True ).setParseAction( upcaseTokens
)
3199 columnNameList
= Group( delimitedList( columnName
) )#.setName("columns")
3200 tableName
= delimitedList( ident
, ".", combine
=True ).setParseAction( upcaseTokens
)
3201 tableNameList
= Group( delimitedList( tableName
) )#.setName("tables")
3202 simpleSQL
= ( selectToken
+ \
3203 ( '*' | columnNameList
).setResultsName( "columns" ) + \
3205 tableNameList
.setResultsName( "tables" ) )
3207 test( "SELECT * from XYZZY, ABC" )
3208 test( "select * from SYS.XYZZY" )
3209 test( "Select A from Sys.dual" )
3210 test( "Select AA,BB,CC from Sys.dual" )
3211 test( "Select A, B, C from Sys.dual" )
3212 test( "Select A, B, C from Sys.dual" )
3213 test( "Xelect A, B, C from Sys.dual" )
3214 test( "Select A, B, C frox Sys.dual" )
3216 test( "Select ^^^ frox Sys.dual" )
3217 test( "Select A, B, C from Sys.dual, Table2 " )