1 # -----------------------------------------------------------------------------
4 # Copyright (C) 2001-2011,
5 # David M. Beazley (Dabeaz LLC)
8 # Redistribution and use in source and binary forms, with or without
9 # modification, are permitted provided that the following conditions are
12 # * Redistributions of source code must retain the above copyright notice,
13 # this list of conditions and the following disclaimer.
14 # * Redistributions in binary form must reproduce the above copyright notice,
15 # this list of conditions and the following disclaimer in the documentation
16 # and/or other materials provided with the distribution.
17 # * Neither the name of the David Beazley or Dabeaz LLC may be used to
18 # endorse or promote products derived from this software without
19 # specific prior written permission.
21 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 # -----------------------------------------------------------------------------
34 # This implements an LR parser that is constructed from grammar rules defined
35 # as Python functions. The grammer is specified by supplying the BNF inside
36 # Python documentation strings. The inspiration for this technique was borrowed
37 # from John Aycock's Spark parsing system. PLY might be viewed as cross between
38 # Spark and the GNU bison utility.
40 # The current implementation is only somewhat object-oriented. The
41 # LR parser itself is defined in terms of an object (which allows multiple
42 # parsers to co-exist). However, most of the variables used during table
43 # construction are defined in terms of global variables. Users shouldn't
44 # notice unless they are trying to define multiple parsers at the same
45 # time using threads (in which case they should have their head examined).
47 # This implementation supports both SLR and LALR(1) parsing. LALR(1)
48 # support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
49 # using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
50 # Techniques, and Tools" (The Dragon Book). LALR(1) has since been replaced
51 # by the more efficient DeRemer and Pennello algorithm.
53 # :::::::: WARNING :::::::
55 # Construction of LR parsing tables is fairly complicated and expensive.
56 # To make this module run fast, a *LOT* of work has been put into
57 # optimization---often at the expensive of readability and what might
58 # consider to be good Python "coding style." Modify the code at your
60 # ----------------------------------------------------------------------------
63 __tabversion__
= "3.2" # Table version
65 #-----------------------------------------------------------------------------
66 # === User configurable parameters ===
68 # Change these to modify the default behavior of yacc (if you wish)
69 #-----------------------------------------------------------------------------
71 yaccdebug
= 1 # Debugging mode. If set, yacc generates a
72 # a 'parser.out' file in the current directory
74 debug_file
= 'parser.out' # Default name of the debugging file
75 tab_module
= 'parsetab' # Default name of the table module
76 default_lr
= 'LALR' # Default LR table generation method
78 error_count
= 3 # Number of symbols that must be shifted to leave recovery mode
80 yaccdevel
= 0 # Set to True if developing yacc. This turns off optimized
81 # implementations of certain functions.
83 resultlimit
= 40 # Size limit of results when running in debug mode.
85 pickle_protocol
= 0 # Protocol to use when writing pickle files
87 import re
, types
, sys
, os
.path
89 # Compatibility function for python 2.6/3.0
90 if sys
.version_info
[0] < 3:
100 except AttributeError:
103 # Python 2.x/3.0 compatibility.
105 if sys
.version_info
[0] < 3:
108 import ply
.lex
as lex
111 # This object is a stand-in for a logging object created by the
112 # logging module. PLY will use this by default to create things
113 # such as the parser.out file. If a user wants more detailed
114 # information, they can create their own logging object and pass
117 class PlyLogger(object):
118 def __init__(self
,f
):
120 def debug(self
,msg
,*args
,**kwargs
):
121 self
.f
.write((msg
% args
) + "\n")
124 def warning(self
,msg
,*args
,**kwargs
):
125 self
.f
.write("WARNING: "+ (msg
% args
) + "\n")
127 def error(self
,msg
,*args
,**kwargs
):
128 self
.f
.write("ERROR: " + (msg
% args
) + "\n")
132 # Null logger is used when no output is generated. Does nothing.
133 class NullLogger(object):
134 def __getattribute__(self
,name
):
136 def __call__(self
,*args
,**kwargs
):
139 # Exception raised for yacc-related errors
140 class YaccError(Exception): pass
142 # Format the result message that the parser produces when running in debug mode.
143 def format_result(r
):
145 if '\n' in repr_str
: repr_str
= repr(repr_str
)
146 if len(repr_str
) > resultlimit
:
147 repr_str
= repr_str
[:resultlimit
]+" ..."
148 result
= "<%s @ 0x%x> (%s)" % (type(r
).__name
__,id(r
),repr_str
)
152 # Format stack entries when the parser is running in debug mode
153 def format_stack_entry(r
):
155 if '\n' in repr_str
: repr_str
= repr(repr_str
)
156 if len(repr_str
) < 16:
159 return "<%s @ 0x%x>" % (type(r
).__name
__,id(r
))
161 #-----------------------------------------------------------------------------
162 # === LR Parsing Engine ===
164 # The following classes are used for the LR parser itself. These are not
165 # used during table construction and are independent of the actual LR
166 # table generation algorithm
167 #-----------------------------------------------------------------------------
169 # This class is used to hold non-terminal grammar symbols during parsing.
170 # It normally has the following attributes set:
171 # .type = Grammar symbol type
172 # .value = Symbol value
173 # .lineno = Starting line number
174 # .endlineno = Ending line number (optional, set automatically)
175 # .lexpos = Starting lex position
176 # .endlexpos = Ending lex position (optional, set automatically)
179 def __str__(self
): return self
.type
180 def __repr__(self
): return str(self
)
182 # This class is a wrapper around the objects actually passed to each
183 # grammar rule. Index lookup and assignment actually assign the
184 # .value attribute of the underlying YaccSymbol object.
185 # The lineno() method returns the line number of a given
186 # item (or 0 if not defined). The linespan() method returns
187 # a tuple of (startline,endline) representing the range of lines
188 # for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos)
189 # representing the range of positional information for a symbol.
191 class YaccProduction
:
192 def __init__(self
,s
,stack
=None):
197 def __getitem__(self
,n
):
198 if n
>= 0: return self
.slice[n
].value
199 else: return self
.stack
[n
].value
201 def __setitem__(self
,n
,v
):
202 self
.slice[n
].value
= v
204 def __getslice__(self
,i
,j
):
205 return [s
.value
for s
in self
.slice[i
:j
]]
208 return len(self
.slice)
211 return getattr(self
.slice[n
],"lineno",0)
213 def set_lineno(self
,n
,lineno
):
214 self
.slice[n
].lineno
= lineno
216 def linespan(self
,n
):
217 startline
= getattr(self
.slice[n
],"lineno",0)
218 endline
= getattr(self
.slice[n
],"endlineno",startline
)
219 return startline
,endline
222 return getattr(self
.slice[n
],"lexpos",0)
225 startpos
= getattr(self
.slice[n
],"lexpos",0)
226 endpos
= getattr(self
.slice[n
],"endlexpos",startpos
)
227 return startpos
,endpos
233 # -----------------------------------------------------------------------------
236 # The LR Parsing engine.
237 # -----------------------------------------------------------------------------
240 def __init__(self
,lrtab
,errorf
):
241 self
.productions
= lrtab
.lr_productions
242 self
.action
= lrtab
.lr_action
243 self
.goto
= lrtab
.lr_goto
244 self
.errorfunc
= errorf
250 del self
.statestack
[:]
254 self
.symstack
.append(sym
)
255 self
.statestack
.append(0)
257 def parse(self
,input=None,lexer
=None,debug
=0,tracking
=0,tokenfunc
=None):
258 if debug
or yaccdevel
:
259 if isinstance(debug
,int):
260 debug
= PlyLogger(sys
.stderr
)
261 return self
.parsedebug(input,lexer
,debug
,tracking
,tokenfunc
)
263 return self
.parseopt(input,lexer
,debug
,tracking
,tokenfunc
)
265 return self
.parseopt_notrack(input,lexer
,debug
,tracking
,tokenfunc
)
268 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
271 # This is the debugging enabled version of parse(). All changes made to the
272 # parsing engine should be made here. For the non-debugging version,
273 # copy this code to a method parseopt() and delete all of the sections
280 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
282 def parsedebug(self
,input=None,lexer
=None,debug
=None,tracking
=0,tokenfunc
=None):
283 lookahead
= None # Current lookahead symbol
284 lookaheadstack
= [ ] # Stack of lookahead symbols
285 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
286 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
287 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
288 pslice
= YaccProduction(None) # Production object passed to grammar rules
289 errorcount
= 0 # Used during error recovery
292 debug
.info("PLY: PARSE DEBUG START")
295 # If no lexer was given, we will try to use the lex module
300 # Set up the lexer and parser objects on pslice
304 # If input was supplied, pass to lexer
305 if input is not None:
308 if tokenfunc
is None:
310 get_token
= lexer
.token
312 get_token
= tokenfunc
314 # Set up the state and symbol stacks
316 statestack
= [ ] # Stack of parsing states
317 self
.statestack
= statestack
318 symstack
= [ ] # Stack of grammar symbols
319 self
.symstack
= symstack
321 pslice
.stack
= symstack
# Put in the production
322 errtoken
= None # Err token
324 # The start state is assumed to be (0,$end)
332 # Get the next symbol on the input. If a lookahead symbol
333 # is already set, we just use that. Otherwise, we'll pull
334 # the next token off of the lookaheadstack or from the lexer
338 debug
.debug('State : %s', state
)
342 if not lookaheadstack
:
343 lookahead
= get_token() # Get the next token
345 lookahead
= lookaheadstack
.pop()
347 lookahead
= YaccSymbol()
348 lookahead
.type = "$end"
351 debug
.debug('Stack : %s',
352 ("%s . %s" % (" ".join([xx
.type for xx
in symstack
][1:]), str(lookahead
))).lstrip())
355 # Check the action table
356 ltype
= lookahead
.type
357 t
= actions
[state
].get(ltype
)
361 # shift a symbol on the stack
366 debug
.debug("Action : Shift and goto state %s", t
)
369 symstack
.append(lookahead
)
372 # Decrease error count on successful shift
373 if errorcount
: errorcount
-=1
377 # reduce a symbol on the stack, emit a production
382 # Get production function
384 sym
.type = pname
# Production name
389 debug
.info("Action : Reduce rule [%s] with %s and goto state %d", p
.str, "["+",".join([format_stack_entry(_v
.value
) for _v
in symstack
[-plen
:]])+"]",-t
)
391 debug
.info("Action : Reduce rule [%s] with %s and goto state %d", p
.str, [],-t
)
396 targ
= symstack
[-plen
-1:]
402 sym
.lineno
= t1
.lineno
403 sym
.lexpos
= t1
.lexpos
405 sym
.endlineno
= getattr(t1
,"endlineno",t1
.lineno
)
406 sym
.endlexpos
= getattr(t1
,"endlexpos",t1
.lexpos
)
410 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
411 # The code enclosed in this section is duplicated
412 # below as a performance optimization. Make sure
413 # changes get made in both locations.
418 # Call the grammar rule with our special slice object
420 del statestack
[-plen
:]
423 debug
.info("Result : %s", format_result(pslice
[0]))
426 state
= goto
[statestack
[-1]][pname
]
427 statestack
.append(state
)
429 # If an error was set. Enter error recovery state
430 lookaheadstack
.append(lookahead
)
433 state
= statestack
[-1]
436 errorcount
= error_count
439 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
445 sym
.lineno
= lexer
.lineno
446 sym
.lexpos
= lexer
.lexpos
451 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
452 # The code enclosed in this section is duplicated
453 # above as a performance optimization. Make sure
454 # changes get made in both locations.
459 # Call the grammar rule with our special slice object
462 debug
.info("Result : %s", format_result(pslice
[0]))
465 state
= goto
[statestack
[-1]][pname
]
466 statestack
.append(state
)
468 # If an error was set. Enter error recovery state
469 lookaheadstack
.append(lookahead
)
472 state
= statestack
[-1]
475 errorcount
= error_count
478 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
482 result
= getattr(n
,"value",None)
484 debug
.info("Done : Returning %s", format_result(result
))
485 debug
.info("PLY: PARSE DEBUG END")
492 debug
.error('Error : %s',
493 ("%s . %s" % (" ".join([xx
.type for xx
in symstack
][1:]), str(lookahead
))).lstrip())
496 # We have some kind of parsing error here. To handle
497 # this, we are going to push the current token onto
498 # the tokenstack and replace it with an 'error' token.
499 # If there are any synchronization rules, they may
502 # In addition to pushing the error token, we call call
503 # the user defined p_error() function if this is the
504 # first syntax error. This function is only called if
506 if errorcount
== 0 or self
.errorok
:
507 errorcount
= error_count
510 if errtoken
.type == "$end":
511 errtoken
= None # End of file!
513 global errok
,token
,restart
514 errok
= self
.errok
# Set some special functions available in error recovery
516 restart
= self
.restart
517 if errtoken
and not hasattr(errtoken
,'lexer'):
518 errtoken
.lexer
= lexer
519 tok
= self
.errorfunc(errtoken
)
520 del errok
, token
, restart
# Delete special functions
523 # User must have done some kind of panic
524 # mode recovery on their own. The
525 # returned token is the next lookahead
531 if hasattr(errtoken
,"lineno"): lineno
= lookahead
.lineno
534 sys
.stderr
.write("yacc: Syntax error at line %d, token=%s\n" % (lineno
, errtoken
.type))
536 sys
.stderr
.write("yacc: Syntax error, token=%s" % errtoken
.type)
538 sys
.stderr
.write("yacc: Parse error in input. EOF\n")
542 errorcount
= error_count
544 # case 1: the statestack only has 1 entry on it. If we're in this state, the
545 # entire parse has been rolled back and we're completely hosed. The token is
546 # discarded and we just keep going.
548 if len(statestack
) <= 1 and lookahead
.type != "$end":
552 # Nuke the pushback stack
553 del lookaheadstack
[:]
556 # case 2: the statestack has a couple of entries on it, but we're
557 # at the end of the file. nuke the top entry and generate an error token
559 # Start nuking entries on the stack
560 if lookahead
.type == "$end":
561 # Whoa. We're really hosed here. Bail out
564 if lookahead
.type != 'error':
566 if sym
.type == 'error':
567 # Hmmm. Error is on top of stack, we'll just nuke input
568 # symbol and continue
573 if hasattr(lookahead
,"lineno"):
574 t
.lineno
= lookahead
.lineno
576 lookaheadstack
.append(lookahead
)
581 state
= statestack
[-1] # Potential bug fix
585 # Call an error function here
586 raise RuntimeError("yacc: internal parser error!!!\n")
588 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
591 # Optimized version of parse() method. DO NOT EDIT THIS CODE DIRECTLY.
592 # Edit the debug version above, then copy any modifications to the method
593 # below while removing #--! DEBUG sections.
594 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
597 def parseopt(self
,input=None,lexer
=None,debug
=0,tracking
=0,tokenfunc
=None):
598 lookahead
= None # Current lookahead symbol
599 lookaheadstack
= [ ] # Stack of lookahead symbols
600 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
601 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
602 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
603 pslice
= YaccProduction(None) # Production object passed to grammar rules
604 errorcount
= 0 # Used during error recovery
606 # If no lexer was given, we will try to use the lex module
611 # Set up the lexer and parser objects on pslice
615 # If input was supplied, pass to lexer
616 if input is not None:
619 if tokenfunc
is None:
621 get_token
= lexer
.token
623 get_token
= tokenfunc
625 # Set up the state and symbol stacks
627 statestack
= [ ] # Stack of parsing states
628 self
.statestack
= statestack
629 symstack
= [ ] # Stack of grammar symbols
630 self
.symstack
= symstack
632 pslice
.stack
= symstack
# Put in the production
633 errtoken
= None # Err token
635 # The start state is assumed to be (0,$end)
643 # Get the next symbol on the input. If a lookahead symbol
644 # is already set, we just use that. Otherwise, we'll pull
645 # the next token off of the lookaheadstack or from the lexer
648 if not lookaheadstack
:
649 lookahead
= get_token() # Get the next token
651 lookahead
= lookaheadstack
.pop()
653 lookahead
= YaccSymbol()
654 lookahead
.type = '$end'
656 # Check the action table
657 ltype
= lookahead
.type
658 t
= actions
[state
].get(ltype
)
662 # shift a symbol on the stack
666 symstack
.append(lookahead
)
669 # Decrease error count on successful shift
670 if errorcount
: errorcount
-=1
674 # reduce a symbol on the stack, emit a production
679 # Get production function
681 sym
.type = pname
# Production name
685 targ
= symstack
[-plen
-1:]
691 sym
.lineno
= t1
.lineno
692 sym
.lexpos
= t1
.lexpos
694 sym
.endlineno
= getattr(t1
,"endlineno",t1
.lineno
)
695 sym
.endlexpos
= getattr(t1
,"endlexpos",t1
.lexpos
)
699 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
700 # The code enclosed in this section is duplicated
701 # below as a performance optimization. Make sure
702 # changes get made in both locations.
707 # Call the grammar rule with our special slice object
709 del statestack
[-plen
:]
712 state
= goto
[statestack
[-1]][pname
]
713 statestack
.append(state
)
715 # If an error was set. Enter error recovery state
716 lookaheadstack
.append(lookahead
)
719 state
= statestack
[-1]
722 errorcount
= error_count
725 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
731 sym
.lineno
= lexer
.lineno
732 sym
.lexpos
= lexer
.lexpos
737 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
738 # The code enclosed in this section is duplicated
739 # above as a performance optimization. Make sure
740 # changes get made in both locations.
745 # Call the grammar rule with our special slice object
748 state
= goto
[statestack
[-1]][pname
]
749 statestack
.append(state
)
751 # If an error was set. Enter error recovery state
752 lookaheadstack
.append(lookahead
)
755 state
= statestack
[-1]
758 errorcount
= error_count
761 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
765 return getattr(n
,"value",None)
769 # We have some kind of parsing error here. To handle
770 # this, we are going to push the current token onto
771 # the tokenstack and replace it with an 'error' token.
772 # If there are any synchronization rules, they may
775 # In addition to pushing the error token, we call call
776 # the user defined p_error() function if this is the
777 # first syntax error. This function is only called if
779 if errorcount
== 0 or self
.errorok
:
780 errorcount
= error_count
783 if errtoken
.type == '$end':
784 errtoken
= None # End of file!
786 global errok
,token
,restart
787 errok
= self
.errok
# Set some special functions available in error recovery
789 restart
= self
.restart
790 if errtoken
and not hasattr(errtoken
,'lexer'):
791 errtoken
.lexer
= lexer
792 tok
= self
.errorfunc(errtoken
)
793 del errok
, token
, restart
# Delete special functions
796 # User must have done some kind of panic
797 # mode recovery on their own. The
798 # returned token is the next lookahead
804 if hasattr(errtoken
,"lineno"): lineno
= lookahead
.lineno
807 sys
.stderr
.write("yacc: Syntax error at line %d, token=%s\n" % (lineno
, errtoken
.type))
809 sys
.stderr
.write("yacc: Syntax error, token=%s" % errtoken
.type)
811 sys
.stderr
.write("yacc: Parse error in input. EOF\n")
815 errorcount
= error_count
817 # case 1: the statestack only has 1 entry on it. If we're in this state, the
818 # entire parse has been rolled back and we're completely hosed. The token is
819 # discarded and we just keep going.
821 if len(statestack
) <= 1 and lookahead
.type != '$end':
825 # Nuke the pushback stack
826 del lookaheadstack
[:]
829 # case 2: the statestack has a couple of entries on it, but we're
830 # at the end of the file. nuke the top entry and generate an error token
832 # Start nuking entries on the stack
833 if lookahead
.type == '$end':
834 # Whoa. We're really hosed here. Bail out
837 if lookahead
.type != 'error':
839 if sym
.type == 'error':
840 # Hmmm. Error is on top of stack, we'll just nuke input
841 # symbol and continue
846 if hasattr(lookahead
,"lineno"):
847 t
.lineno
= lookahead
.lineno
849 lookaheadstack
.append(lookahead
)
854 state
= statestack
[-1] # Potential bug fix
858 # Call an error function here
859 raise RuntimeError("yacc: internal parser error!!!\n")
861 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
862 # parseopt_notrack().
864 # Optimized version of parseopt() with line number tracking removed.
865 # DO NOT EDIT THIS CODE DIRECTLY. Copy the optimized version and remove
866 # code in the #--! TRACKING sections
867 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
869 def parseopt_notrack(self
,input=None,lexer
=None,debug
=0,tracking
=0,tokenfunc
=None):
870 lookahead
= None # Current lookahead symbol
871 lookaheadstack
= [ ] # Stack of lookahead symbols
872 actions
= self
.action
# Local reference to action table (to avoid lookup on self.)
873 goto
= self
.goto
# Local reference to goto table (to avoid lookup on self.)
874 prod
= self
.productions
# Local reference to production list (to avoid lookup on self.)
875 pslice
= YaccProduction(None) # Production object passed to grammar rules
876 errorcount
= 0 # Used during error recovery
878 # If no lexer was given, we will try to use the lex module
883 # Set up the lexer and parser objects on pslice
887 # If input was supplied, pass to lexer
888 if input is not None:
891 if tokenfunc
is None:
893 get_token
= lexer
.token
895 get_token
= tokenfunc
897 # Set up the state and symbol stacks
899 statestack
= [ ] # Stack of parsing states
900 self
.statestack
= statestack
901 symstack
= [ ] # Stack of grammar symbols
902 self
.symstack
= symstack
904 pslice
.stack
= symstack
# Put in the production
905 errtoken
= None # Err token
907 # The start state is assumed to be (0,$end)
915 # Get the next symbol on the input. If a lookahead symbol
916 # is already set, we just use that. Otherwise, we'll pull
917 # the next token off of the lookaheadstack or from the lexer
920 if not lookaheadstack
:
921 lookahead
= get_token() # Get the next token
923 lookahead
= lookaheadstack
.pop()
925 lookahead
= YaccSymbol()
926 lookahead
.type = '$end'
928 # Check the action table
929 ltype
= lookahead
.type
930 t
= actions
[state
].get(ltype
)
934 # shift a symbol on the stack
938 symstack
.append(lookahead
)
941 # Decrease error count on successful shift
942 if errorcount
: errorcount
-=1
946 # reduce a symbol on the stack, emit a production
951 # Get production function
953 sym
.type = pname
# Production name
957 targ
= symstack
[-plen
-1:]
960 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
961 # The code enclosed in this section is duplicated
962 # below as a performance optimization. Make sure
963 # changes get made in both locations.
968 # Call the grammar rule with our special slice object
970 del statestack
[-plen
:]
973 state
= goto
[statestack
[-1]][pname
]
974 statestack
.append(state
)
976 # If an error was set. Enter error recovery state
977 lookaheadstack
.append(lookahead
)
980 state
= statestack
[-1]
983 errorcount
= error_count
986 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
992 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
993 # The code enclosed in this section is duplicated
994 # above as a performance optimization. Make sure
995 # changes get made in both locations.
1000 # Call the grammar rule with our special slice object
1002 symstack
.append(sym
)
1003 state
= goto
[statestack
[-1]][pname
]
1004 statestack
.append(state
)
1006 # If an error was set. Enter error recovery state
1007 lookaheadstack
.append(lookahead
)
1010 state
= statestack
[-1]
1013 errorcount
= error_count
1016 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1020 return getattr(n
,"value",None)
1024 # We have some kind of parsing error here. To handle
1025 # this, we are going to push the current token onto
1026 # the tokenstack and replace it with an 'error' token.
1027 # If there are any synchronization rules, they may
1030 # In addition to pushing the error token, we call call
1031 # the user defined p_error() function if this is the
1032 # first syntax error. This function is only called if
1034 if errorcount
== 0 or self
.errorok
:
1035 errorcount
= error_count
1037 errtoken
= lookahead
1038 if errtoken
.type == '$end':
1039 errtoken
= None # End of file!
1041 global errok
,token
,restart
1042 errok
= self
.errok
# Set some special functions available in error recovery
1044 restart
= self
.restart
1045 if errtoken
and not hasattr(errtoken
,'lexer'):
1046 errtoken
.lexer
= lexer
1047 tok
= self
.errorfunc(errtoken
)
1048 del errok
, token
, restart
# Delete special functions
1051 # User must have done some kind of panic
1052 # mode recovery on their own. The
1053 # returned token is the next lookahead
1059 if hasattr(errtoken
,"lineno"): lineno
= lookahead
.lineno
1062 sys
.stderr
.write("yacc: Syntax error at line %d, token=%s\n" % (lineno
, errtoken
.type))
1064 sys
.stderr
.write("yacc: Syntax error, token=%s" % errtoken
.type)
1066 sys
.stderr
.write("yacc: Parse error in input. EOF\n")
1070 errorcount
= error_count
1072 # case 1: the statestack only has 1 entry on it. If we're in this state, the
1073 # entire parse has been rolled back and we're completely hosed. The token is
1074 # discarded and we just keep going.
1076 if len(statestack
) <= 1 and lookahead
.type != '$end':
1080 # Nuke the pushback stack
1081 del lookaheadstack
[:]
1084 # case 2: the statestack has a couple of entries on it, but we're
1085 # at the end of the file. nuke the top entry and generate an error token
1087 # Start nuking entries on the stack
1088 if lookahead
.type == '$end':
1089 # Whoa. We're really hosed here. Bail out
1092 if lookahead
.type != 'error':
1094 if sym
.type == 'error':
1095 # Hmmm. Error is on top of stack, we'll just nuke input
1096 # symbol and continue
1101 if hasattr(lookahead
,"lineno"):
1102 t
.lineno
= lookahead
.lineno
1104 lookaheadstack
.append(lookahead
)
1109 state
= statestack
[-1] # Potential bug fix
1113 # Call an error function here
1114 raise RuntimeError("yacc: internal parser error!!!\n")
1116 # -----------------------------------------------------------------------------
1117 # === Grammar Representation ===
1119 # The following functions, classes, and variables are used to represent and
1120 # manipulate the rules that make up a grammar.
1121 # -----------------------------------------------------------------------------
1125 # regex matching identifiers
1126 _is_identifier
= re
.compile(r
'^[a-zA-Z0-9_-]+$')
1128 # -----------------------------------------------------------------------------
1131 # This class stores the raw information about a single production or grammar rule.
1132 # A grammar rule refers to a specification such as this:
1134 # expr : expr PLUS term
1136 # Here are the basic attributes defined on all productions
1138 # name - Name of the production. For example 'expr'
1139 # prod - A list of symbols on the right side ['expr','PLUS','term']
1140 # prec - Production precedence level
1141 # number - Production number.
1142 # func - Function that executes on reduce
1143 # file - File where production function is defined
1144 # lineno - Line number where production function is defined
1146 # The following attributes are defined or optional.
1148 # len - Length of the production (number of symbols on right hand side)
1149 # usyms - Set of unique symbols found in the production
1150 # -----------------------------------------------------------------------------
1152 class Production(object):
1154 def __init__(self
,number
,name
,prod
,precedence
=('right',0),func
=None,file='',line
=0):
1156 self
.prod
= tuple(prod
)
1157 self
.number
= number
1159 self
.callable = None
1162 self
.prec
= precedence
1164 # Internal settings used during table construction
1166 self
.len = len(self
.prod
) # Length of the production
1168 # Create a list of unique production symbols used in the production
1171 if s
not in self
.usyms
:
1172 self
.usyms
.append(s
)
1174 # List of all LR items for the production
1178 # Create a string representation
1180 self
.str = "%s -> %s" % (self
.name
," ".join(self
.prod
))
1182 self
.str = "%s -> <empty>" % self
.name
1188 return "Production("+str(self
)+")"
1191 return len(self
.prod
)
1193 def __nonzero__(self
):
1196 def __getitem__(self
,index
):
1197 return self
.prod
[index
]
1199 # Return the nth lr_item from the production (or None if at the end)
1200 def lr_item(self
,n
):
1201 if n
> len(self
.prod
): return None
1204 # Precompute the list of productions immediately following. Hack. Remove later
1206 p
.lr_after
= Prodnames
[p
.prod
[n
+1]]
1207 except (IndexError,KeyError):
1210 p
.lr_before
= p
.prod
[n
-1]
1216 # Bind the production function name to a callable
1217 def bind(self
,pdict
):
1219 self
.callable = pdict
[self
.func
]
1221 # This class serves as a minimal standin for Production objects when
1222 # reading table data from files. It only contains information
1223 # actually used by the LR parsing engine, plus some additional
1224 # debugging information.
1225 class MiniProduction(object):
1226 def __init__(self
,str,name
,len,func
,file,line
):
1230 self
.callable = None
1237 return "MiniProduction(%s)" % self
.str
1239 # Bind the production function name to a callable
1240 def bind(self
,pdict
):
1242 self
.callable = pdict
[self
.func
]
1245 # -----------------------------------------------------------------------------
1248 # This class represents a specific stage of parsing a production rule. For
1251 # expr : expr . PLUS term
1253 # In the above, the "." represents the current location of the parse. Here
1256 # name - Name of the production. For example 'expr'
1257 # prod - A list of symbols on the right side ['expr','.', 'PLUS','term']
1258 # number - Production number.
1260 # lr_next Next LR item. Example, if we are ' expr -> expr . PLUS term'
1261 # then lr_next refers to 'expr -> expr PLUS . term'
1262 # lr_index - LR item index (location of the ".") in the prod list.
1263 # lookaheads - LALR lookahead symbols for this item
1264 # len - Length of the production (number of symbols on right hand side)
1265 # lr_after - List of all productions that immediately follow
1266 # lr_before - Grammar symbol immediately before
1267 # -----------------------------------------------------------------------------
1269 class LRItem(object):
1270 def __init__(self
,p
,n
):
1272 self
.prod
= list(p
.prod
)
1273 self
.number
= p
.number
1275 self
.lookaheads
= { }
1276 self
.prod
.insert(n
,".")
1277 self
.prod
= tuple(self
.prod
)
1278 self
.len = len(self
.prod
)
1279 self
.usyms
= p
.usyms
1283 s
= "%s -> %s" % (self
.name
," ".join(self
.prod
))
1285 s
= "%s -> <empty>" % self
.name
1289 return "LRItem("+str(self
)+")"
1291 # -----------------------------------------------------------------------------
1292 # rightmost_terminal()
1294 # Return the rightmost terminal from a list of symbols. Used in add_production()
1295 # -----------------------------------------------------------------------------
1296 def rightmost_terminal(symbols
, terminals
):
1297 i
= len(symbols
) - 1
1299 if symbols
[i
] in terminals
:
1304 # -----------------------------------------------------------------------------
1305 # === GRAMMAR CLASS ===
1307 # The following class represents the contents of the specified grammar along
1308 # with various computed properties such as first sets, follow sets, LR items, etc.
1309 # This data is used for critical parts of the table generation process later.
1310 # -----------------------------------------------------------------------------
1312 class GrammarError(YaccError
): pass
1314 class Grammar(object):
1315 def __init__(self
,terminals
):
1316 self
.Productions
= [None] # A list of all of the productions. The first
1317 # entry is always reserved for the purpose of
1318 # building an augmented grammar
1320 self
.Prodnames
= { } # A dictionary mapping the names of nonterminals to a list of all
1321 # productions of that nonterminal.
1323 self
.Prodmap
= { } # A dictionary that is only used to detect duplicate
1326 self
.Terminals
= { } # A dictionary mapping the names of terminal symbols to a
1327 # list of the rules where they are used.
1329 for term
in terminals
:
1330 self
.Terminals
[term
] = []
1332 self
.Terminals
['error'] = []
1334 self
.Nonterminals
= { } # A dictionary mapping names of nonterminals to a list
1335 # of rule numbers where they are used.
1337 self
.First
= { } # A dictionary of precomputed FIRST(x) symbols
1339 self
.Follow
= { } # A dictionary of precomputed FOLLOW(x) symbols
1341 self
.Precedence
= { } # Precedence rules for each terminal. Contains tuples of the
1342 # form ('right',level) or ('nonassoc', level) or ('left',level)
1344 self
.UsedPrecedence
= { } # Precedence rules that were actually used by the grammer.
1345 # This is only used to provide error checking and to generate
1346 # a warning about unused precedence rules.
1348 self
.Start
= None # Starting symbol for the grammar
1352 return len(self
.Productions
)
1354 def __getitem__(self
,index
):
1355 return self
.Productions
[index
]
1357 # -----------------------------------------------------------------------------
1360 # Sets the precedence for a given terminal. assoc is the associativity such as
1361 # 'left','right', or 'nonassoc'. level is a numeric level.
1363 # -----------------------------------------------------------------------------
1365 def set_precedence(self
,term
,assoc
,level
):
1366 assert self
.Productions
== [None],"Must call set_precedence() before add_production()"
1367 if term
in self
.Precedence
:
1368 raise GrammarError("Precedence already specified for terminal '%s'" % term
)
1369 if assoc
not in ['left','right','nonassoc']:
1370 raise GrammarError("Associativity must be one of 'left','right', or 'nonassoc'")
1371 self
.Precedence
[term
] = (assoc
,level
)
1373 # -----------------------------------------------------------------------------
1376 # Given an action function, this function assembles a production rule and
1377 # computes its precedence level.
1379 # The production rule is supplied as a list of symbols. For example,
1380 # a rule such as 'expr : expr PLUS term' has a production name of 'expr' and
1381 # symbols ['expr','PLUS','term'].
1383 # Precedence is determined by the precedence of the right-most non-terminal
1384 # or the precedence of a terminal specified by %prec.
1386 # A variety of error checks are performed to make sure production symbols
1387 # are valid and that %prec is used correctly.
1388 # -----------------------------------------------------------------------------
1390 def add_production(self
,prodname
,syms
,func
=None,file='',line
=0):
1392 if prodname
in self
.Terminals
:
1393 raise GrammarError("%s:%d: Illegal rule name '%s'. Already defined as a token" % (file,line
,prodname
))
1394 if prodname
== 'error':
1395 raise GrammarError("%s:%d: Illegal rule name '%s'. error is a reserved word" % (file,line
,prodname
))
1396 if not _is_identifier
.match(prodname
):
1397 raise GrammarError("%s:%d: Illegal rule name '%s'" % (file,line
,prodname
))
1399 # Look for literal tokens
1400 for n
,s
in enumerate(syms
):
1405 raise GrammarError("%s:%d: Literal token %s in rule '%s' may only be a single character" % (file,line
,s
, prodname
))
1406 if not c
in self
.Terminals
:
1407 self
.Terminals
[c
] = []
1412 if not _is_identifier
.match(s
) and s
!= '%prec':
1413 raise GrammarError("%s:%d: Illegal name '%s' in rule '%s'" % (file,line
,s
, prodname
))
1415 # Determine the precedence level
1417 if syms
[-1] == '%prec':
1418 raise GrammarError("%s:%d: Syntax error. Nothing follows %%prec" % (file,line
))
1419 if syms
[-2] != '%prec':
1420 raise GrammarError("%s:%d: Syntax error. %%prec can only appear at the end of a grammar rule" % (file,line
))
1422 prodprec
= self
.Precedence
.get(precname
,None)
1424 raise GrammarError("%s:%d: Nothing known about the precedence of '%s'" % (file,line
,precname
))
1426 self
.UsedPrecedence
[precname
] = 1
1427 del syms
[-2:] # Drop %prec from the rule
1429 # If no %prec, precedence is determined by the rightmost terminal symbol
1430 precname
= rightmost_terminal(syms
,self
.Terminals
)
1431 prodprec
= self
.Precedence
.get(precname
,('right',0))
1433 # See if the rule is already in the rulemap
1434 map = "%s -> %s" % (prodname
,syms
)
1435 if map in self
.Prodmap
:
1436 m
= self
.Prodmap
[map]
1437 raise GrammarError("%s:%d: Duplicate rule %s. " % (file,line
, m
) +
1438 "Previous definition at %s:%d" % (m
.file, m
.line
))
1440 # From this point on, everything is valid. Create a new Production instance
1441 pnumber
= len(self
.Productions
)
1442 if not prodname
in self
.Nonterminals
:
1443 self
.Nonterminals
[prodname
] = [ ]
1445 # Add the production number to Terminals and Nonterminals
1447 if t
in self
.Terminals
:
1448 self
.Terminals
[t
].append(pnumber
)
1450 if not t
in self
.Nonterminals
:
1451 self
.Nonterminals
[t
] = [ ]
1452 self
.Nonterminals
[t
].append(pnumber
)
1454 # Create a production and add it to the list of productions
1455 p
= Production(pnumber
,prodname
,syms
,prodprec
,func
,file,line
)
1456 self
.Productions
.append(p
)
1457 self
.Prodmap
[map] = p
1459 # Add to the global productions list
1461 self
.Prodnames
[prodname
].append(p
)
1463 self
.Prodnames
[prodname
] = [ p
]
1466 # -----------------------------------------------------------------------------
1469 # Sets the starting symbol and creates the augmented grammar. Production
1470 # rule 0 is S' -> start where start is the start symbol.
1471 # -----------------------------------------------------------------------------
1473 def set_start(self
,start
=None):
1475 start
= self
.Productions
[1].name
1476 if start
not in self
.Nonterminals
:
1477 raise GrammarError("start symbol %s undefined" % start
)
1478 self
.Productions
[0] = Production(0,"S'",[start
])
1479 self
.Nonterminals
[start
].append(0)
1482 # -----------------------------------------------------------------------------
1483 # find_unreachable()
1485 # Find all of the nonterminal symbols that can't be reached from the starting
1486 # symbol. Returns a list of nonterminals that can't be reached.
1487 # -----------------------------------------------------------------------------
1489 def find_unreachable(self
):
1491 # Mark all symbols that are reachable from a symbol s
1492 def mark_reachable_from(s
):
1494 # We've already reached symbol s.
1497 for p
in self
.Prodnames
.get(s
,[]):
1499 mark_reachable_from(r
)
1502 for s
in list(self
.Terminals
) + list(self
.Nonterminals
):
1505 mark_reachable_from( self
.Productions
[0].prod
[0] )
1507 return [s
for s
in list(self
.Nonterminals
)
1508 if not reachable
[s
]]
1510 # -----------------------------------------------------------------------------
1513 # This function looks at the various parsing rules and tries to detect
1514 # infinite recursion cycles (grammar rules where there is no possible way
1515 # to derive a string of only terminals).
1516 # -----------------------------------------------------------------------------
1518 def infinite_cycles(self
):
1522 for t
in self
.Terminals
:
1525 terminates
['$end'] = 1
1529 # Initialize to false:
1530 for n
in self
.Nonterminals
:
1533 # Then propagate termination until no change:
1536 for (n
,pl
) in self
.Prodnames
.items():
1537 # Nonterminal n terminates iff any of its productions terminates.
1539 # Production p terminates iff all of its rhs symbols terminate.
1541 if not terminates
[s
]:
1542 # The symbol s does not terminate,
1543 # so production p does not terminate.
1547 # didn't break from the loop,
1548 # so every symbol s terminates
1549 # so production p terminates.
1553 # symbol n terminates!
1554 if not terminates
[n
]:
1557 # Don't need to consider any more productions for this n.
1564 for (s
,term
) in terminates
.items():
1566 if not s
in self
.Prodnames
and not s
in self
.Terminals
and s
!= 'error':
1567 # s is used-but-not-defined, and we've already warned of that,
1568 # so it would be overkill to say that it's also non-terminating.
1576 # -----------------------------------------------------------------------------
1577 # undefined_symbols()
1579 # Find all symbols that were used the grammar, but not defined as tokens or
1580 # grammar rules. Returns a list of tuples (sym, prod) where sym in the symbol
1581 # and prod is the production where the symbol was used.
1582 # -----------------------------------------------------------------------------
1583 def undefined_symbols(self
):
1585 for p
in self
.Productions
:
1589 if not s
in self
.Prodnames
and not s
in self
.Terminals
and s
!= 'error':
1590 result
.append((s
,p
))
1593 # -----------------------------------------------------------------------------
1594 # unused_terminals()
1596 # Find all terminals that were defined, but not used by the grammar. Returns
1597 # a list of all symbols.
1598 # -----------------------------------------------------------------------------
1599 def unused_terminals(self
):
1601 for s
,v
in self
.Terminals
.items():
1602 if s
!= 'error' and not v
:
1603 unused_tok
.append(s
)
1607 # ------------------------------------------------------------------------------
1610 # Find all grammar rules that were defined, but not used (maybe not reachable)
1611 # Returns a list of productions.
1612 # ------------------------------------------------------------------------------
1614 def unused_rules(self
):
1616 for s
,v
in self
.Nonterminals
.items():
1618 p
= self
.Prodnames
[s
][0]
1619 unused_prod
.append(p
)
1622 # -----------------------------------------------------------------------------
1623 # unused_precedence()
1625 # Returns a list of tuples (term,precedence) corresponding to precedence
1626 # rules that were never used by the grammar. term is the name of the terminal
1627 # on which precedence was applied and precedence is a string such as 'left' or
1628 # 'right' corresponding to the type of precedence.
1629 # -----------------------------------------------------------------------------
1631 def unused_precedence(self
):
1633 for termname
in self
.Precedence
:
1634 if not (termname
in self
.Terminals
or termname
in self
.UsedPrecedence
):
1635 unused
.append((termname
,self
.Precedence
[termname
][0]))
1639 # -------------------------------------------------------------------------
1642 # Compute the value of FIRST1(beta) where beta is a tuple of symbols.
1644 # During execution of compute_first1, the result may be incomplete.
1645 # Afterward (e.g., when called from compute_follow()), it will be complete.
1646 # -------------------------------------------------------------------------
1647 def _first(self
,beta
):
1649 # We are computing First(x1,x2,x3,...,xn)
1652 x_produces_empty
= 0
1654 # Add all the non-<empty> symbols of First[x] to the result.
1655 for f
in self
.First
[x
]:
1657 x_produces_empty
= 1
1659 if f
not in result
: result
.append(f
)
1661 if x_produces_empty
:
1662 # We have to consider the next x in beta,
1663 # i.e. stay in the loop.
1666 # We don't have to consider any further symbols in beta.
1669 # There was no 'break' from the loop,
1670 # so x_produces_empty was true for all x in beta,
1671 # so beta produces empty as well.
1672 result
.append('<empty>')
1676 # -------------------------------------------------------------------------
1679 # Compute the value of FIRST1(X) for all symbols
1680 # -------------------------------------------------------------------------
1681 def compute_first(self
):
1686 for t
in self
.Terminals
:
1689 self
.First
['$end'] = ['$end']
1693 # Initialize to the empty set:
1694 for n
in self
.Nonterminals
:
1697 # Then propagate symbols until no change:
1700 for n
in self
.Nonterminals
:
1701 for p
in self
.Prodnames
[n
]:
1702 for f
in self
._first
(p
.prod
):
1703 if f
not in self
.First
[n
]:
1704 self
.First
[n
].append( f
)
1711 # ---------------------------------------------------------------------
1714 # Computes all of the follow sets for every non-terminal symbol. The
1715 # follow set is the set of all symbols that might follow a given
1716 # non-terminal. See the Dragon book, 2nd Ed. p. 189.
1717 # ---------------------------------------------------------------------
1718 def compute_follow(self
,start
=None):
1719 # If already computed, return the result
1723 # If first sets not computed yet, do that first.
1725 self
.compute_first()
1727 # Add '$end' to the follow list of the start symbol
1728 for k
in self
.Nonterminals
:
1729 self
.Follow
[k
] = [ ]
1732 start
= self
.Productions
[1].name
1734 self
.Follow
[start
] = [ '$end' ]
1738 for p
in self
.Productions
[1:]:
1739 # Here is the production set
1740 for i
in range(len(p
.prod
)):
1742 if B
in self
.Nonterminals
:
1743 # Okay. We got a non-terminal in a production
1744 fst
= self
._first
(p
.prod
[i
+1:])
1747 if f
!= '<empty>' and f
not in self
.Follow
[B
]:
1748 self
.Follow
[B
].append(f
)
1752 if hasempty
or i
== (len(p
.prod
)-1):
1753 # Add elements of follow(a) to follow(b)
1754 for f
in self
.Follow
[p
.name
]:
1755 if f
not in self
.Follow
[B
]:
1756 self
.Follow
[B
].append(f
)
1758 if not didadd
: break
1762 # -----------------------------------------------------------------------------
1765 # This function walks the list of productions and builds a complete set of the
1766 # LR items. The LR items are stored in two ways: First, they are uniquely
1767 # numbered and placed in the list _lritems. Second, a linked list of LR items
1768 # is built for each production. For example:
1774 # [E -> . E PLUS E, E -> E . PLUS E, E -> E PLUS . E, E -> E PLUS E . ]
1775 # -----------------------------------------------------------------------------
1777 def build_lritems(self
):
1778 for p
in self
.Productions
:
1787 # Precompute the list of productions immediately following
1789 lri
.lr_after
= self
.Prodnames
[lri
.prod
[i
+1]]
1790 except (IndexError,KeyError):
1793 lri
.lr_before
= lri
.prod
[i
-1]
1795 lri
.lr_before
= None
1797 lastlri
.lr_next
= lri
1799 lr_items
.append(lri
)
1802 p
.lr_items
= lr_items
1804 # -----------------------------------------------------------------------------
1805 # == Class LRTable ==
1807 # This basic class represents a basic table of LR parsing information.
1808 # Methods for generating the tables are not defined here. They are defined
1809 # in the derived class LRGeneratedTable.
1810 # -----------------------------------------------------------------------------
1812 class VersionError(YaccError
): pass
1814 class LRTable(object):
1816 self
.lr_action
= None
1818 self
.lr_productions
= None
1819 self
.lr_method
= None
1821 def read_table(self
,module
):
1822 if isinstance(module
,types
.ModuleType
):
1825 if sys
.version_info
[0] < 3:
1826 exec("import %s as parsetab" % module
)
1829 exec("import %s as parsetab" % module
, env
, env
)
1830 parsetab
= env
['parsetab']
1832 if parsetab
._tabversion
!= __tabversion__
:
1833 raise VersionError("yacc table file version is out of date")
1835 self
.lr_action
= parsetab
._lr
_action
1836 self
.lr_goto
= parsetab
._lr
_goto
1838 self
.lr_productions
= []
1839 for p
in parsetab
._lr
_productions
:
1840 self
.lr_productions
.append(MiniProduction(*p
))
1842 self
.lr_method
= parsetab
._lr
_method
1843 return parsetab
._lr
_signature
1845 def read_pickle(self
,filename
):
1847 import cPickle
as pickle
1851 in_f
= open(filename
,"rb")
1853 tabversion
= pickle
.load(in_f
)
1854 if tabversion
!= __tabversion__
:
1855 raise VersionError("yacc table file version is out of date")
1856 self
.lr_method
= pickle
.load(in_f
)
1857 signature
= pickle
.load(in_f
)
1858 self
.lr_action
= pickle
.load(in_f
)
1859 self
.lr_goto
= pickle
.load(in_f
)
1860 productions
= pickle
.load(in_f
)
1862 self
.lr_productions
= []
1863 for p
in productions
:
1864 self
.lr_productions
.append(MiniProduction(*p
))
1869 # Bind all production function names to callable objects in pdict
1870 def bind_callables(self
,pdict
):
1871 for p
in self
.lr_productions
:
1874 # -----------------------------------------------------------------------------
1875 # === LR Generator ===
1877 # The following classes and functions are used to generate LR parsing tables on
1879 # -----------------------------------------------------------------------------
1881 # -----------------------------------------------------------------------------
1885 # The following two functions are used to compute set valued functions
1888 # F(x) = F'(x) U U{F(y) | x R y}
1890 # This is used to compute the values of Read() sets as well as FOLLOW sets
1891 # in LALR(1) generation.
1893 # Inputs: X - An input set
1895 # FP - Set-valued function
1896 # ------------------------------------------------------------------------------
1898 def digraph(X
,R
,FP
):
1905 if N
[x
] == 0: traverse(x
,N
,stack
,F
,X
,R
,FP
)
1908 def traverse(x
,N
,stack
,F
,X
,R
,FP
):
1912 F
[x
] = FP(x
) # F(X) <- F'(x)
1914 rel
= R(x
) # Get y's related to x
1917 traverse(y
,N
,stack
,F
,X
,R
,FP
)
1918 N
[x
] = min(N
[x
],N
[y
])
1919 for a
in F
.get(y
,[]):
1920 if a
not in F
[x
]: F
[x
].append(a
)
1922 N
[stack
[-1]] = MAXINT
1924 element
= stack
.pop()
1926 N
[stack
[-1]] = MAXINT
1928 element
= stack
.pop()
1930 class LALRError(YaccError
): pass
1932 # -----------------------------------------------------------------------------
1933 # == LRGeneratedTable ==
1935 # This class implements the LR table generation algorithm. There are no
1936 # public methods except for write()
1937 # -----------------------------------------------------------------------------
1939 class LRGeneratedTable(LRTable
):
1940 def __init__(self
,grammar
,method
='LALR',log
=None):
1941 if method
not in ['SLR','LALR']:
1942 raise LALRError("Unsupported method %s" % method
)
1944 self
.grammar
= grammar
1945 self
.lr_method
= method
1952 # Internal attributes
1953 self
.lr_action
= {} # Action table
1954 self
.lr_goto
= {} # Goto table
1955 self
.lr_productions
= grammar
.Productions
# Copy of grammar Production array
1956 self
.lr_goto_cache
= {} # Cache of computed gotos
1957 self
.lr0_cidhash
= {} # Cache of closures
1959 self
._add
_count
= 0 # Internal counter used to detect cycles
1961 # Diagonistic information filled in by the table generator
1962 self
.sr_conflict
= 0
1963 self
.rr_conflict
= 0
1964 self
.conflicts
= [] # List of conflicts
1966 self
.sr_conflicts
= []
1967 self
.rr_conflicts
= []
1970 self
.grammar
.build_lritems()
1971 self
.grammar
.compute_first()
1972 self
.grammar
.compute_follow()
1973 self
.lr_parse_table()
1975 # Compute the LR(0) closure operation on I, where I is a set of LR(0) items.
1977 def lr0_closure(self
,I
):
1978 self
._add
_count
+= 1
1980 # Add everything in I to J
1986 for x
in j
.lr_after
:
1987 if getattr(x
,"lr0_added",0) == self
._add
_count
: continue
1990 x
.lr0_added
= self
._add
_count
1995 # Compute the LR(0) goto function goto(I,X) where I is a set
1996 # of LR(0) items and X is a grammar symbol. This function is written
1997 # in a way that guarantees uniqueness of the generated goto sets
1998 # (i.e. the same goto set will never be returned as two different Python
1999 # objects). With uniqueness, we can later do fast set comparisons using
2000 # id(obj) instead of element-wise comparison.
2002 def lr0_goto(self
,I
,x
):
2003 # First we look for a previously cached entry
2004 g
= self
.lr_goto_cache
.get((id(I
),x
),None)
2007 # Now we generate the goto set in a way that guarantees uniqueness
2010 s
= self
.lr_goto_cache
.get(x
,None)
2013 self
.lr_goto_cache
[x
] = s
2018 if n
and n
.lr_before
== x
:
2019 s1
= s
.get(id(n
),None)
2025 g
= s
.get('$end',None)
2028 g
= self
.lr0_closure(gs
)
2032 self
.lr_goto_cache
[(id(I
),x
)] = g
2035 # Compute the LR(0) sets of item function
2036 def lr0_items(self
):
2038 C
= [ self
.lr0_closure([self
.grammar
.Productions
[0].lr_next
]) ]
2041 self
.lr0_cidhash
[id(I
)] = i
2044 # Loop over the items in C and each grammar symbols
2050 # Collect all of the symbols that could possibly be in the goto(I,X) sets
2057 g
= self
.lr0_goto(I
,x
)
2059 if id(g
) in self
.lr0_cidhash
: continue
2060 self
.lr0_cidhash
[id(g
)] = len(C
)
2065 # -----------------------------------------------------------------------------
2066 # ==== LALR(1) Parsing ====
2068 # LALR(1) parsing is almost exactly the same as SLR except that instead of
2069 # relying upon Follow() sets when performing reductions, a more selective
2070 # lookahead set that incorporates the state of the LR(0) machine is utilized.
2071 # Thus, we mainly just have to focus on calculating the lookahead sets.
2073 # The method used here is due to DeRemer and Pennelo (1982).
2075 # DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
2076 # Lookahead Sets", ACM Transactions on Programming Languages and Systems,
2077 # Vol. 4, No. 4, Oct. 1982, pp. 615-649
2079 # Further details can also be found in:
2081 # J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
2082 # McGraw-Hill Book Company, (1985).
2084 # -----------------------------------------------------------------------------
2086 # -----------------------------------------------------------------------------
2087 # compute_nullable_nonterminals()
2089 # Creates a dictionary containing all of the non-terminals that might produce
2090 # an empty production.
2091 # -----------------------------------------------------------------------------
2093 def compute_nullable_nonterminals(self
):
2097 for p
in self
.grammar
.Productions
[1:]:
2099 nullable
[p
.name
] = 1
2102 if not t
in nullable
: break
2104 nullable
[p
.name
] = 1
2105 if len(nullable
) == num_nullable
: break
2106 num_nullable
= len(nullable
)
2109 # -----------------------------------------------------------------------------
2110 # find_nonterminal_trans(C)
2112 # Given a set of LR(0) items, this functions finds all of the non-terminal
2113 # transitions. These are transitions in which a dot appears immediately before
2114 # a non-terminal. Returns a list of tuples of the form (state,N) where state
2115 # is the state number and N is the nonterminal symbol.
2117 # The input C is the set of LR(0) items.
2118 # -----------------------------------------------------------------------------
2120 def find_nonterminal_transitions(self
,C
):
2122 for state
in range(len(C
)):
2124 if p
.lr_index
< p
.len - 1:
2125 t
= (state
,p
.prod
[p
.lr_index
+1])
2126 if t
[1] in self
.grammar
.Nonterminals
:
2127 if t
not in trans
: trans
.append(t
)
2131 # -----------------------------------------------------------------------------
2134 # Computes the DR(p,A) relationships for non-terminal transitions. The input
2135 # is a tuple (state,N) where state is a number and N is a nonterminal symbol.
2137 # Returns a list of terminals.
2138 # -----------------------------------------------------------------------------
2140 def dr_relation(self
,C
,trans
,nullable
):
2145 g
= self
.lr0_goto(C
[state
],N
)
2147 if p
.lr_index
< p
.len - 1:
2148 a
= p
.prod
[p
.lr_index
+1]
2149 if a
in self
.grammar
.Terminals
:
2150 if a
not in terms
: terms
.append(a
)
2152 # This extra bit is to handle the start state
2153 if state
== 0 and N
== self
.grammar
.Productions
[0].prod
[0]:
2154 terms
.append('$end')
2158 # -----------------------------------------------------------------------------
2161 # Computes the READS() relation (p,A) READS (t,C).
2162 # -----------------------------------------------------------------------------
2164 def reads_relation(self
,C
, trans
, empty
):
2165 # Look for empty transitions
2169 g
= self
.lr0_goto(C
[state
],N
)
2170 j
= self
.lr0_cidhash
.get(id(g
),-1)
2172 if p
.lr_index
< p
.len - 1:
2173 a
= p
.prod
[p
.lr_index
+ 1]
2179 # -----------------------------------------------------------------------------
2180 # compute_lookback_includes()
2182 # Determines the lookback and includes relations
2186 # This relation is determined by running the LR(0) state machine forward.
2187 # For example, starting with a production "N : . A B C", we run it forward
2188 # to obtain "N : A B C ." We then build a relationship between this final
2189 # state and the starting state. These relationships are stored in a dictionary
2194 # Computes the INCLUDE() relation (p,A) INCLUDES (p',B).
2196 # This relation is used to determine non-terminal transitions that occur
2197 # inside of other non-terminal transition states. (p,A) INCLUDES (p', B)
2198 # if the following holds:
2200 # B -> LAT, where T -> epsilon and p' -L-> p
2202 # L is essentially a prefix (which may be empty), T is a suffix that must be
2203 # able to derive an empty string. State p' must lead to state p with the string L.
2205 # -----------------------------------------------------------------------------
2207 def compute_lookback_includes(self
,C
,trans
,nullable
):
2209 lookdict
= {} # Dictionary of lookback relations
2210 includedict
= {} # Dictionary of include relations
2212 # Make a dictionary of non-terminal transitions
2217 # Loop over all transitions and compute lookbacks and includes
2218 for state
,N
in trans
:
2222 if p
.name
!= N
: continue
2224 # Okay, we have a name match. We now follow the production all the way
2225 # through the state machine until we get the . on the right hand side
2227 lr_index
= p
.lr_index
2229 while lr_index
< p
.len - 1:
2230 lr_index
= lr_index
+ 1
2231 t
= p
.prod
[lr_index
]
2233 # Check to see if this symbol and state are a non-terminal transition
2235 # Yes. Okay, there is some chance that this is an includes relation
2236 # the only way to know for certain is whether the rest of the
2237 # production derives empty
2241 if p
.prod
[li
] in self
.grammar
.Terminals
: break # No forget it
2242 if not p
.prod
[li
] in nullable
: break
2245 # Appears to be a relation between (j,t) and (state,N)
2246 includes
.append((j
,t
))
2248 g
= self
.lr0_goto(C
[j
],t
) # Go to next set
2249 j
= self
.lr0_cidhash
.get(id(g
),-1) # Go to next state
2251 # When we get here, j is the final state, now we have to locate the production
2253 if r
.name
!= p
.name
: continue
2254 if r
.len != p
.len: continue
2256 # This look is comparing a production ". A B C" with "A B C ."
2257 while i
< r
.lr_index
:
2258 if r
.prod
[i
] != p
.prod
[i
+1]: break
2263 if not i
in includedict
: includedict
[i
] = []
2264 includedict
[i
].append((state
,N
))
2265 lookdict
[(state
,N
)] = lookb
2267 return lookdict
,includedict
2269 # -----------------------------------------------------------------------------
2270 # compute_read_sets()
2272 # Given a set of LR(0) items, this function computes the read sets.
2274 # Inputs: C = Set of LR(0) items
2275 # ntrans = Set of nonterminal transitions
2276 # nullable = Set of empty transitions
2278 # Returns a set containing the read sets
2279 # -----------------------------------------------------------------------------
2281 def compute_read_sets(self
,C
, ntrans
, nullable
):
2282 FP
= lambda x
: self
.dr_relation(C
,x
,nullable
)
2283 R
= lambda x
: self
.reads_relation(C
,x
,nullable
)
2284 F
= digraph(ntrans
,R
,FP
)
2287 # -----------------------------------------------------------------------------
2288 # compute_follow_sets()
2290 # Given a set of LR(0) items, a set of non-terminal transitions, a readset,
2291 # and an include set, this function computes the follow sets
2293 # Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
2296 # ntrans = Set of nonterminal transitions
2297 # readsets = Readset (previously computed)
2298 # inclsets = Include sets (previously computed)
2300 # Returns a set containing the follow sets
2301 # -----------------------------------------------------------------------------
2303 def compute_follow_sets(self
,ntrans
,readsets
,inclsets
):
2304 FP
= lambda x
: readsets
[x
]
2305 R
= lambda x
: inclsets
.get(x
,[])
2306 F
= digraph(ntrans
,R
,FP
)
2309 # -----------------------------------------------------------------------------
2312 # Attaches the lookahead symbols to grammar rules.
2314 # Inputs: lookbacks - Set of lookback relations
2315 # followset - Computed follow set
2317 # This function directly attaches the lookaheads to productions contained
2318 # in the lookbacks set
2319 # -----------------------------------------------------------------------------
2321 def add_lookaheads(self
,lookbacks
,followset
):
2322 for trans
,lb
in lookbacks
.items():
2323 # Loop over productions in lookback
2325 if not state
in p
.lookaheads
:
2326 p
.lookaheads
[state
] = []
2327 f
= followset
.get(trans
,[])
2329 if a
not in p
.lookaheads
[state
]: p
.lookaheads
[state
].append(a
)
2331 # -----------------------------------------------------------------------------
2332 # add_lalr_lookaheads()
2334 # This function does all of the work of adding lookahead information for use
2336 # -----------------------------------------------------------------------------
2338 def add_lalr_lookaheads(self
,C
):
2339 # Determine all of the nullable nonterminals
2340 nullable
= self
.compute_nullable_nonterminals()
2342 # Find all non-terminal transitions
2343 trans
= self
.find_nonterminal_transitions(C
)
2346 readsets
= self
.compute_read_sets(C
,trans
,nullable
)
2348 # Compute lookback/includes relations
2349 lookd
, included
= self
.compute_lookback_includes(C
,trans
,nullable
)
2351 # Compute LALR FOLLOW sets
2352 followsets
= self
.compute_follow_sets(trans
,readsets
,included
)
2354 # Add all of the lookaheads
2355 self
.add_lookaheads(lookd
,followsets
)
2357 # -----------------------------------------------------------------------------
2360 # This function constructs the parse tables for SLR or LALR
2361 # -----------------------------------------------------------------------------
2362 def lr_parse_table(self
):
2363 Productions
= self
.grammar
.Productions
2364 Precedence
= self
.grammar
.Precedence
2365 goto
= self
.lr_goto
# Goto array
2366 action
= self
.lr_action
# Action array
2367 log
= self
.log
# Logger for output
2369 actionp
= { } # Action production array (temporary)
2371 log
.info("Parsing method: %s", self
.lr_method
)
2373 # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
2374 # This determines the number of states
2376 C
= self
.lr0_items()
2378 if self
.lr_method
== 'LALR':
2379 self
.add_lalr_lookaheads(C
)
2381 # Build the parser table, state by state
2384 # Loop over each production in I
2385 actlist
= [ ] # List of actions
2390 log
.info("state %d", st
)
2393 log
.info(" (%d) %s", p
.number
, str(p
))
2397 if p
.len == p
.lr_index
+ 1:
2399 # Start symbol. Accept!
2400 st_action
["$end"] = 0
2401 st_actionp
["$end"] = p
2403 # We are at the end of a production. Reduce!
2404 if self
.lr_method
== 'LALR':
2405 laheads
= p
.lookaheads
[st
]
2407 laheads
= self
.grammar
.Follow
[p
.name
]
2409 actlist
.append((a
,p
,"reduce using rule %d (%s)" % (p
.number
,p
)))
2410 r
= st_action
.get(a
,None)
2412 # Whoa. Have a shift/reduce or reduce/reduce conflict
2414 # Need to decide on shift or reduce here
2415 # By default we favor shifting. Need to add
2416 # some precedence rules here.
2417 sprec
,slevel
= Productions
[st_actionp
[a
].number
].prec
2418 rprec
,rlevel
= Precedence
.get(a
,('right',0))
2419 if (slevel
< rlevel
) or ((slevel
== rlevel
) and (rprec
== 'left')):
2420 # We really need to reduce here.
2421 st_action
[a
] = -p
.number
2423 if not slevel
and not rlevel
:
2424 log
.info(" ! shift/reduce conflict for %s resolved as reduce",a
)
2425 self
.sr_conflicts
.append((st
,a
,'reduce'))
2426 Productions
[p
.number
].reduced
+= 1
2427 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
2430 # Hmmm. Guess we'll keep the shift
2432 log
.info(" ! shift/reduce conflict for %s resolved as shift",a
)
2433 self
.sr_conflicts
.append((st
,a
,'shift'))
2435 # Reduce/reduce conflict. In this case, we favor the rule
2436 # that was defined first in the grammar file
2437 oldp
= Productions
[-r
]
2438 pp
= Productions
[p
.number
]
2439 if oldp
.line
> pp
.line
:
2440 st_action
[a
] = -p
.number
2442 chosenp
,rejectp
= pp
,oldp
2443 Productions
[p
.number
].reduced
+= 1
2444 Productions
[oldp
.number
].reduced
-= 1
2446 chosenp
,rejectp
= oldp
,pp
2447 self
.rr_conflicts
.append((st
,chosenp
,rejectp
))
2448 log
.info(" ! reduce/reduce conflict for %s resolved using rule %d (%s)", a
,st_actionp
[a
].number
, st_actionp
[a
])
2450 raise LALRError("Unknown conflict in state %d" % st
)
2452 st_action
[a
] = -p
.number
2454 Productions
[p
.number
].reduced
+= 1
2457 a
= p
.prod
[i
+1] # Get symbol right after the "."
2458 if a
in self
.grammar
.Terminals
:
2459 g
= self
.lr0_goto(I
,a
)
2460 j
= self
.lr0_cidhash
.get(id(g
),-1)
2462 # We are in a shift state
2463 actlist
.append((a
,p
,"shift and go to state %d" % j
))
2464 r
= st_action
.get(a
,None)
2466 # Whoa have a shift/reduce or shift/shift conflict
2469 raise LALRError("Shift/shift conflict in state %d" % st
)
2471 # Do a precedence check.
2472 # - if precedence of reduce rule is higher, we reduce.
2473 # - if precedence of reduce is same and left assoc, we reduce.
2474 # - otherwise we shift
2475 rprec
,rlevel
= Productions
[st_actionp
[a
].number
].prec
2476 sprec
,slevel
= Precedence
.get(a
,('right',0))
2477 if (slevel
> rlevel
) or ((slevel
== rlevel
) and (rprec
== 'right')):
2478 # We decide to shift here... highest precedence to shift
2479 Productions
[st_actionp
[a
].number
].reduced
-= 1
2483 log
.info(" ! shift/reduce conflict for %s resolved as shift",a
)
2484 self
.sr_conflicts
.append((st
,a
,'shift'))
2485 elif (slevel
== rlevel
) and (rprec
== 'nonassoc'):
2488 # Hmmm. Guess we'll keep the reduce
2489 if not slevel
and not rlevel
:
2490 log
.info(" ! shift/reduce conflict for %s resolved as reduce",a
)
2491 self
.sr_conflicts
.append((st
,a
,'reduce'))
2494 raise LALRError("Unknown conflict in state %d" % st
)
2499 # Print the actions associated with each terminal
2501 for a
,p
,m
in actlist
:
2503 if p
is st_actionp
[a
]:
2504 log
.info(" %-15s %s",a
,m
)
2505 _actprint
[(a
,m
)] = 1
2507 # Print the actions that were not used. (debugging)
2509 for a
,p
,m
in actlist
:
2511 if p
is not st_actionp
[a
]:
2512 if not (a
,m
) in _actprint
:
2513 log
.debug(" ! %-15s [ %s ]",a
,m
)
2515 _actprint
[(a
,m
)] = 1
2519 # Construct the goto table for this state
2524 if s
in self
.grammar
.Nonterminals
:
2527 g
= self
.lr0_goto(I
,n
)
2528 j
= self
.lr0_cidhash
.get(id(g
),-1)
2531 log
.info(" %-30s shift and go to state %d",n
,j
)
2533 action
[st
] = st_action
2534 actionp
[st
] = st_actionp
2539 # -----------------------------------------------------------------------------
2542 # This function writes the LR parsing tables to a file
2543 # -----------------------------------------------------------------------------
2545 def write_table(self
,modulename
,outputdir
='',signature
=""):
2546 basemodulename
= modulename
.split(".")[-1]
2547 filename
= os
.path
.join(outputdir
,basemodulename
) + ".py"
2549 f
= open(filename
,"w")
2553 # This file is automatically generated. Do not edit.
2559 """ % (filename
, __tabversion__
, self
.lr_method
, signature
))
2561 # Change smaller to 0 to go back to original tables
2564 # Factor out names to try and make smaller
2568 for s
,nd
in self
.lr_action
.items():
2569 for name
,v
in nd
.items():
2577 f
.write("\n_lr_action_items = {")
2578 for k
,v
in items
.items():
2579 f
.write("%r:([" % k
)
2591 for _k, _v in _lr_action_items.items():
2592 for _x,_y in zip(_v[0],_v[1]):
2593 if not _x in _lr_action: _lr_action[_x] = { }
2594 _lr_action[_x][_k] = _y
2595 del _lr_action_items
2599 f
.write("\n_lr_action = { ");
2600 for k
,v
in self
.lr_action
.items():
2601 f
.write("(%r,%r):%r," % (k
[0],k
[1],v
))
2605 # Factor out names to try and make smaller
2608 for s
,nd
in self
.lr_goto
.items():
2609 for name
,v
in nd
.items():
2617 f
.write("\n_lr_goto_items = {")
2618 for k
,v
in items
.items():
2619 f
.write("%r:([" % k
)
2631 for _k, _v in _lr_goto_items.items():
2632 for _x,_y in zip(_v[0],_v[1]):
2633 if not _x in _lr_goto: _lr_goto[_x] = { }
2634 _lr_goto[_x][_k] = _y
2638 f
.write("\n_lr_goto = { ");
2639 for k
,v
in self
.lr_goto
.items():
2640 f
.write("(%r,%r):%r," % (k
[0],k
[1],v
))
2643 # Write production table
2644 f
.write("_lr_productions = [\n")
2645 for p
in self
.lr_productions
:
2647 f
.write(" (%r,%r,%d,%r,%r,%d),\n" % (p
.str,p
.name
, p
.len, p
.func
,p
.file,p
.line
))
2649 f
.write(" (%r,%r,%d,None,None,None),\n" % (str(p
),p
.name
, p
.len))
2654 e
= sys
.exc_info()[1]
2655 sys
.stderr
.write("Unable to create '%s'\n" % filename
)
2656 sys
.stderr
.write(str(e
)+"\n")
2660 # -----------------------------------------------------------------------------
2663 # This function pickles the LR parsing tables to a supplied file object
2664 # -----------------------------------------------------------------------------
2666 def pickle_table(self
,filename
,signature
=""):
2668 import cPickle
as pickle
2671 outf
= open(filename
,"wb")
2672 pickle
.dump(__tabversion__
,outf
,pickle_protocol
)
2673 pickle
.dump(self
.lr_method
,outf
,pickle_protocol
)
2674 pickle
.dump(signature
,outf
,pickle_protocol
)
2675 pickle
.dump(self
.lr_action
,outf
,pickle_protocol
)
2676 pickle
.dump(self
.lr_goto
,outf
,pickle_protocol
)
2679 for p
in self
.lr_productions
:
2681 outp
.append((p
.str,p
.name
, p
.len, p
.func
,p
.file,p
.line
))
2683 outp
.append((str(p
),p
.name
,p
.len,None,None,None))
2684 pickle
.dump(outp
,outf
,pickle_protocol
)
2687 # -----------------------------------------------------------------------------
2688 # === INTROSPECTION ===
2690 # The following functions and classes are used to implement the PLY
2691 # introspection features followed by the yacc() function itself.
2692 # -----------------------------------------------------------------------------
2694 # -----------------------------------------------------------------------------
2695 # get_caller_module_dict()
2697 # This function returns a dictionary containing all of the symbols defined within
2698 # a caller further down the call stack. This is used to get the environment
2699 # associated with the yacc() call if none was provided.
2700 # -----------------------------------------------------------------------------
2702 def get_caller_module_dict(levels
):
2705 except RuntimeError:
2706 e
,b
,t
= sys
.exc_info()
2711 ldict
= f
.f_globals
.copy()
2712 if f
.f_globals
!= f
.f_locals
:
2713 ldict
.update(f
.f_locals
)
2717 # -----------------------------------------------------------------------------
2720 # This takes a raw grammar rule string and parses it into production data
2721 # -----------------------------------------------------------------------------
2722 def parse_grammar(doc
,file,line
):
2724 # Split the doc string into lines
2725 pstrings
= doc
.splitlines()
2734 # This is a continuation of a previous rule
2736 raise SyntaxError("%s:%d: Misplaced '|'" % (file,dline
))
2744 if assign
!= ':' and assign
!= '::=':
2745 raise SyntaxError("%s:%d: Syntax error. Expected ':'" % (file,dline
))
2747 grammar
.append((file,dline
,prodname
,syms
))
2751 raise SyntaxError("%s:%d: Syntax error in rule '%s'" % (file,dline
,ps
.strip()))
2755 # -----------------------------------------------------------------------------
2758 # This class represents information extracted for building a parser including
2759 # start symbol, error function, tokens, precedence list, action functions,
2761 # -----------------------------------------------------------------------------
2762 class ParserReflect(object):
2763 def __init__(self
,pdict
,log
=None):
2766 self
.error_func
= None
2773 self
.log
= PlyLogger(sys
.stderr
)
2777 # Get all of the basic information
2780 self
.get_error_func()
2782 self
.get_precedence()
2783 self
.get_pfunctions()
2785 # Validate all of the information
2786 def validate_all(self
):
2787 self
.validate_start()
2788 self
.validate_error_func()
2789 self
.validate_tokens()
2790 self
.validate_precedence()
2791 self
.validate_pfunctions()
2792 self
.validate_files()
2795 # Compute a signature over the grammar
2796 def signature(self
):
2798 from hashlib
import md5
2804 sig
.update(self
.start
.encode('latin-1'))
2806 sig
.update("".join(["".join(p
) for p
in self
.prec
]).encode('latin-1'))
2808 sig
.update(" ".join(self
.tokens
).encode('latin-1'))
2809 for f
in self
.pfuncs
:
2811 sig
.update(f
[3].encode('latin-1'))
2812 except (TypeError,ValueError):
2816 # -----------------------------------------------------------------------------
2819 # This method checks to see if there are duplicated p_rulename() functions
2820 # in the parser module file. Without this function, it is really easy for
2821 # users to make mistakes by cutting and pasting code fragments (and it's a real
2822 # bugger to try and figure out why the resulting parser doesn't work). Therefore,
2823 # we just do a little regular expression pattern matching of def statements
2824 # to try and detect duplicates.
2825 # -----------------------------------------------------------------------------
2827 def validate_files(self
):
2828 # Match def p_funcname(
2829 fre
= re
.compile(r
'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
2831 for filename
in self
.files
.keys():
2832 base
,ext
= os
.path
.splitext(filename
)
2833 if ext
!= '.py': return 1 # No idea. Assume it's okay.
2837 lines
= f
.readlines()
2843 for linen
,l
in enumerate(lines
):
2848 prev
= counthash
.get(name
)
2850 counthash
[name
] = linen
2852 self
.log
.warning("%s:%d: Function %s redefined. Previously defined on line %d", filename
,linen
,name
,prev
)
2854 # Get the start symbol
2855 def get_start(self
):
2856 self
.start
= self
.pdict
.get('start')
2858 # Validate the start symbol
2859 def validate_start(self
):
2860 if self
.start
is not None:
2861 if not isinstance(self
.start
,str):
2862 self
.log
.error("'start' must be a string")
2864 # Look for error handler
2865 def get_error_func(self
):
2866 self
.error_func
= self
.pdict
.get('p_error')
2868 # Validate the error function
2869 def validate_error_func(self
):
2871 if isinstance(self
.error_func
,types
.FunctionType
):
2873 elif isinstance(self
.error_func
, types
.MethodType
):
2876 self
.log
.error("'p_error' defined, but is not a function or method")
2880 eline
= func_code(self
.error_func
).co_firstlineno
2881 efile
= func_code(self
.error_func
).co_filename
2882 self
.files
[efile
] = 1
2884 if (func_code(self
.error_func
).co_argcount
!= 1+ismethod
):
2885 self
.log
.error("%s:%d: p_error() requires 1 argument",efile
,eline
)
2888 # Get the tokens map
2889 def get_tokens(self
):
2890 tokens
= self
.pdict
.get("tokens",None)
2892 self
.log
.error("No token list is defined")
2896 if not isinstance(tokens
,(list, tuple)):
2897 self
.log
.error("tokens must be a list or tuple")
2902 self
.log
.error("tokens is empty")
2906 self
.tokens
= tokens
2908 # Validate the tokens
2909 def validate_tokens(self
):
2910 # Validate the tokens.
2911 if 'error' in self
.tokens
:
2912 self
.log
.error("Illegal token name 'error'. Is a reserved word")
2917 for n
in self
.tokens
:
2919 self
.log
.warning("Token '%s' multiply defined", n
)
2922 # Get the precedence map (if any)
2923 def get_precedence(self
):
2924 self
.prec
= self
.pdict
.get("precedence",None)
2926 # Validate and parse the precedence map
2927 def validate_precedence(self
):
2930 if not isinstance(self
.prec
,(list,tuple)):
2931 self
.log
.error("precedence must be a list or tuple")
2934 for level
,p
in enumerate(self
.prec
):
2935 if not isinstance(p
,(list,tuple)):
2936 self
.log
.error("Bad precedence table")
2941 self
.log
.error("Malformed precedence entry %s. Must be (assoc, term, ..., term)",p
)
2945 if not isinstance(assoc
,str):
2946 self
.log
.error("precedence associativity must be a string")
2950 if not isinstance(term
,str):
2951 self
.log
.error("precedence items must be strings")
2954 preclist
.append((term
,assoc
,level
+1))
2955 self
.preclist
= preclist
2957 # Get all p_functions from the grammar
2958 def get_pfunctions(self
):
2960 for name
, item
in self
.pdict
.items():
2961 if name
[:2] != 'p_': continue
2962 if name
== 'p_error': continue
2963 if isinstance(item
,(types
.FunctionType
,types
.MethodType
)):
2964 line
= func_code(item
).co_firstlineno
2965 file = func_code(item
).co_filename
2966 p_functions
.append((line
,file,name
,item
.__doc
__))
2968 # Sort all of the actions by line number
2970 self
.pfuncs
= p_functions
2973 # Validate all of the p_functions
2974 def validate_pfunctions(self
):
2976 # Check for non-empty symbols
2977 if len(self
.pfuncs
) == 0:
2978 self
.log
.error("no rules of the form p_rulename are defined")
2982 for line
, file, name
, doc
in self
.pfuncs
:
2983 func
= self
.pdict
[name
]
2984 if isinstance(func
, types
.MethodType
):
2988 if func_code(func
).co_argcount
> reqargs
:
2989 self
.log
.error("%s:%d: Rule '%s' has too many arguments",file,line
,func
.__name
__)
2991 elif func_code(func
).co_argcount
< reqargs
:
2992 self
.log
.error("%s:%d: Rule '%s' requires an argument",file,line
,func
.__name
__)
2994 elif not func
.__doc
__:
2995 self
.log
.warning("%s:%d: No documentation string specified in function '%s' (ignored)",file,line
,func
.__name
__)
2998 parsed_g
= parse_grammar(doc
,file,line
)
3000 grammar
.append((name
, g
))
3002 e
= sys
.exc_info()[1]
3003 self
.log
.error(str(e
))
3006 # Looks like a valid grammar rule
3007 # Mark the file in which defined.
3008 self
.files
[file] = 1
3010 # Secondary validation step that looks for p_ definitions that are not functions
3011 # or functions that look like they might be grammar rules.
3013 for n
,v
in self
.pdict
.items():
3014 if n
[0:2] == 'p_' and isinstance(v
, (types
.FunctionType
, types
.MethodType
)): continue
3015 if n
[0:2] == 't_': continue
3016 if n
[0:2] == 'p_' and n
!= 'p_error':
3017 self
.log
.warning("'%s' not defined as a function", n
)
3018 if ((isinstance(v
,types
.FunctionType
) and func_code(v
).co_argcount
== 1) or
3019 (isinstance(v
,types
.MethodType
) and func_code(v
).co_argcount
== 2)):
3021 doc
= v
.__doc
__.split(" ")
3023 self
.log
.warning("%s:%d: Possible grammar rule '%s' defined without p_ prefix",
3024 func_code(v
).co_filename
, func_code(v
).co_firstlineno
,n
)
3028 self
.grammar
= grammar
3030 # -----------------------------------------------------------------------------
3034 # -----------------------------------------------------------------------------
3036 def yacc(method
='LALR', debug
=yaccdebug
, module
=None, tabmodule
=tab_module
, start
=None,
3037 check_recursion
=1, optimize
=0, write_tables
=1, debugfile
=debug_file
,outputdir
='',
3038 debuglog
=None, errorlog
= None, picklefile
=None):
3040 global parse
# Reference to the parsing method of the last built parser
3042 # If pickling is enabled, table files are not created
3047 if errorlog
is None:
3048 errorlog
= PlyLogger(sys
.stderr
)
3050 # Get the module dictionary used for the parser
3052 _items
= [(k
,getattr(module
,k
)) for k
in dir(module
)]
3053 pdict
= dict(_items
)
3055 pdict
= get_caller_module_dict(2)
3057 # Collect parser information from the dictionary
3058 pinfo
= ParserReflect(pdict
,log
=errorlog
)
3062 raise YaccError("Unable to build parser")
3064 # Check signature against table files (if any)
3065 signature
= pinfo
.signature()
3071 read_signature
= lr
.read_pickle(picklefile
)
3073 read_signature
= lr
.read_table(tabmodule
)
3074 if optimize
or (read_signature
== signature
):
3076 lr
.bind_callables(pinfo
.pdict
)
3077 parser
= LRParser(lr
,pinfo
.error_func
)
3078 parse
= parser
.parse
3081 e
= sys
.exc_info()[1]
3082 errorlog
.warning("There was a problem loading the table file: %s", repr(e
))
3083 except VersionError
:
3085 errorlog
.warning(str(e
))
3089 if debuglog
is None:
3091 debuglog
= PlyLogger(open(debugfile
,"w"))
3093 debuglog
= NullLogger()
3095 debuglog
.info("Created by PLY version %s (http://www.dabeaz.com/ply)", __version__
)
3100 # Validate the parser information
3101 if pinfo
.validate_all():
3102 raise YaccError("Unable to build parser")
3104 if not pinfo
.error_func
:
3105 errorlog
.warning("no p_error() function is defined")
3107 # Create a grammar object
3108 grammar
= Grammar(pinfo
.tokens
)
3110 # Set precedence level for terminals
3111 for term
, assoc
, level
in pinfo
.preclist
:
3113 grammar
.set_precedence(term
,assoc
,level
)
3114 except GrammarError
:
3115 e
= sys
.exc_info()[1]
3116 errorlog
.warning("%s",str(e
))
3118 # Add productions to the grammar
3119 for funcname
, gram
in pinfo
.grammar
:
3120 file, line
, prodname
, syms
= gram
3122 grammar
.add_production(prodname
,syms
,funcname
,file,line
)
3123 except GrammarError
:
3124 e
= sys
.exc_info()[1]
3125 errorlog
.error("%s",str(e
))
3128 # Set the grammar start symbols
3131 grammar
.set_start(pinfo
.start
)
3133 grammar
.set_start(start
)
3134 except GrammarError
:
3135 e
= sys
.exc_info()[1]
3136 errorlog
.error(str(e
))
3140 raise YaccError("Unable to build parser")
3142 # Verify the grammar structure
3143 undefined_symbols
= grammar
.undefined_symbols()
3144 for sym
, prod
in undefined_symbols
:
3145 errorlog
.error("%s:%d: Symbol '%s' used, but not defined as a token or a rule",prod
.file,prod
.line
,sym
)
3148 unused_terminals
= grammar
.unused_terminals()
3149 if unused_terminals
:
3151 debuglog
.info("Unused terminals:")
3153 for term
in unused_terminals
:
3154 errorlog
.warning("Token '%s' defined, but not used", term
)
3155 debuglog
.info(" %s", term
)
3157 # Print out all productions to the debug log
3160 debuglog
.info("Grammar")
3162 for n
,p
in enumerate(grammar
.Productions
):
3163 debuglog
.info("Rule %-5d %s", n
, p
)
3165 # Find unused non-terminals
3166 unused_rules
= grammar
.unused_rules()
3167 for prod
in unused_rules
:
3168 errorlog
.warning("%s:%d: Rule '%s' defined, but not used", prod
.file, prod
.line
, prod
.name
)
3170 if len(unused_terminals
) == 1:
3171 errorlog
.warning("There is 1 unused token")
3172 if len(unused_terminals
) > 1:
3173 errorlog
.warning("There are %d unused tokens", len(unused_terminals
))
3175 if len(unused_rules
) == 1:
3176 errorlog
.warning("There is 1 unused rule")
3177 if len(unused_rules
) > 1:
3178 errorlog
.warning("There are %d unused rules", len(unused_rules
))
3182 debuglog
.info("Terminals, with rules where they appear")
3184 terms
= list(grammar
.Terminals
)
3187 debuglog
.info("%-20s : %s", term
, " ".join([str(s
) for s
in grammar
.Terminals
[term
]]))
3190 debuglog
.info("Nonterminals, with rules where they appear")
3192 nonterms
= list(grammar
.Nonterminals
)
3194 for nonterm
in nonterms
:
3195 debuglog
.info("%-20s : %s", nonterm
, " ".join([str(s
) for s
in grammar
.Nonterminals
[nonterm
]]))
3199 unreachable
= grammar
.find_unreachable()
3200 for u
in unreachable
:
3201 errorlog
.warning("Symbol '%s' is unreachable",u
)
3203 infinite
= grammar
.infinite_cycles()
3204 for inf
in infinite
:
3205 errorlog
.error("Infinite recursion detected for symbol '%s'", inf
)
3208 unused_prec
= grammar
.unused_precedence()
3209 for term
, assoc
in unused_prec
:
3210 errorlog
.error("Precedence rule '%s' defined for unknown symbol '%s'", assoc
, term
)
3214 raise YaccError("Unable to build parser")
3216 # Run the LRGeneratedTable on the grammar
3218 errorlog
.debug("Generating %s tables", method
)
3220 lr
= LRGeneratedTable(grammar
,method
,debuglog
)
3223 num_sr
= len(lr
.sr_conflicts
)
3225 # Report shift/reduce and reduce/reduce conflicts
3227 errorlog
.warning("1 shift/reduce conflict")
3229 errorlog
.warning("%d shift/reduce conflicts", num_sr
)
3231 num_rr
= len(lr
.rr_conflicts
)
3233 errorlog
.warning("1 reduce/reduce conflict")
3235 errorlog
.warning("%d reduce/reduce conflicts", num_rr
)
3237 # Write out conflicts to the output file
3238 if debug
and (lr
.sr_conflicts
or lr
.rr_conflicts
):
3239 debuglog
.warning("")
3240 debuglog
.warning("Conflicts:")
3241 debuglog
.warning("")
3243 for state
, tok
, resolution
in lr
.sr_conflicts
:
3244 debuglog
.warning("shift/reduce conflict for %s in state %d resolved as %s", tok
, state
, resolution
)
3246 already_reported
= {}
3247 for state
, rule
, rejected
in lr
.rr_conflicts
:
3248 if (state
,id(rule
),id(rejected
)) in already_reported
:
3250 debuglog
.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state
, rule
)
3251 debuglog
.warning("rejected rule (%s) in state %d", rejected
,state
)
3252 errorlog
.warning("reduce/reduce conflict in state %d resolved using rule (%s)", state
, rule
)
3253 errorlog
.warning("rejected rule (%s) in state %d", rejected
, state
)
3254 already_reported
[state
,id(rule
),id(rejected
)] = 1
3257 for state
, rule
, rejected
in lr
.rr_conflicts
:
3258 if not rejected
.reduced
and (rejected
not in warned_never
):
3259 debuglog
.warning("Rule (%s) is never reduced", rejected
)
3260 errorlog
.warning("Rule (%s) is never reduced", rejected
)
3261 warned_never
.append(rejected
)
3263 # Write the table file if requested
3265 lr
.write_table(tabmodule
,outputdir
,signature
)
3267 # Write a pickled version of the tables
3269 lr
.pickle_table(picklefile
,signature
)
3272 lr
.bind_callables(pinfo
.pdict
)
3273 parser
= LRParser(lr
,pinfo
.error_func
)
3275 parse
= parser
.parse