1 ;;; python.wy -- LALR grammar for Python
3 ;; Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
4 ;; 2011, 2012 Free Software Foundation, Inc.
5 ;; Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
6 ;; 2009, 2010 Python Software Foundation; All Rights Reserved
8 ;; Author: Richard Kim <ryk@dspwiz.com>
9 ;; Maintainer: Richard Kim <ryk@dspwiz.com>
13 ;; This file is part of GNU Emacs.
15 ;; GNU Emacs is free software: you can redistribute it and/or modify
16 ;; it under the terms of the GNU General Public License as published by
17 ;; the Free Software Foundation, either version 3 of the License, or
18 ;; (at your option) any later version.
20 ;; GNU Emacs is distributed in the hope that it will be useful,
21 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
22 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 ;; GNU General Public License for more details.
25 ;; You should have received a copy of the GNU General Public License
26 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
30 ;; This is an LALR python parser that follows the official python
31 ;; grammar closely with very few exceptions. The Python grammar is
32 ;; used and reproduced under the following license:
34 ;; PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
35 ;; --------------------------------------------
36 ;; 1. This LICENSE AGREEMENT is between the Python Software Foundation
37 ;; ("PSF"), and the Individual or Organization ("Licensee") accessing
38 ;; and otherwise using this software ("Python") in source or binary
39 ;; form and its associated documentation.
41 ;; 2. Subject to the terms and conditions of this License Agreement,
42 ;; PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide
43 ;; license to reproduce, analyze, test, perform and/or display
44 ;; publicly, prepare derivative works, distribute, and otherwise use
45 ;; Python alone or in any derivative version, provided, however, that
46 ;; PSF's License Agreement and PSF's notice of copyright, i.e.,
47 ;; "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
48 ;; 2009, 2010 Python Software Foundation; All Rights Reserved" are
49 ;; retained in Python alone or in any derivative version prepared by
52 ;; 3. In the event Licensee prepares a derivative work that is based
53 ;; on or incorporates Python or any part thereof, and wants to make
54 ;; the derivative work available to others as provided herein, then
55 ;; Licensee hereby agrees to include in any such work a brief summary
56 ;; of the changes made to Python.
58 ;; 4. PSF is making Python available to Licensee on an "AS IS"
59 ;; basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
60 ;; IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
61 ;; DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
62 ;; FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
63 ;; INFRINGE ANY THIRD PARTY RIGHTS.
65 ;; 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
66 ;; FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
67 ;; RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR
68 ;; ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
70 ;; 6. This License Agreement will automatically terminate upon a
71 ;; material breach of its terms and conditions.
73 ;; 7. Nothing in this License Agreement shall be deemed to create any
74 ;; relationship of agency, partnership, or joint venture between PSF
75 ;; and Licensee. This License Agreement does not grant permission to
76 ;; use PSF trademarks or trade name in a trademark sense to endorse or
77 ;; promote products or services of Licensee, or any third party.
79 ;; 8. By copying, installing or otherwise using Python, Licensee
80 ;; agrees to be bound by the terms and conditions of this License
85 ;; * Verify that semantic-lex-python-number regexp is correct.
91 %package wisent-python-wy
93 %languagemode python-mode
95 ;; The default start symbol
97 ;; Alternate entry points
98 ;; - Needed by partial re-parse
99 %start function_parameter
101 %start indented_block
102 ;; - Needed by EXPANDFULL clauses
103 %start function_parameters
105 %start indented_block_body
107 ;; -------------------------------
108 ;; Misc. Python specific terminals
109 ;; -------------------------------
110 ;; The value of these tokens are for documentation only, they are not
111 ;; used by the lexer.
112 %token <charquote> BACKSLASH "\\"
113 %token <newline> NEWLINE "\n"
114 %token <indentation> INDENT "^\\s-+"
115 %token <indentation> DEDENT "[^:INDENT:]"
116 %token <indentation> INDENT_BLOCK "(INDENT DEDENT)"
118 ;; -----------------------------
119 ;; Block & Parenthesis terminals
120 ;; -----------------------------
121 %type <block> ;;syntax "\\s(\\|\\s)" matchdatatype block
123 %token <block> PAREN_BLOCK "(LPAREN RPAREN)"
124 %token <block> BRACE_BLOCK "(LBRACE RBRACE)"
125 %token <block> BRACK_BLOCK "(LBRACK RBRACK)"
127 %token <open-paren> LPAREN "("
128 %token <close-paren> RPAREN ")"
129 %token <open-paren> LBRACE "{"
130 %token <close-paren> RBRACE "}"
131 %token <open-paren> LBRACK "["
132 %token <close-paren> RBRACK "]"
134 ;; ------------------
135 ;; Operator terminals
136 ;; ------------------
137 %type <punctuation> ;;syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
139 %token <punctuation> LTLTEQ "<<="
140 %token <punctuation> GTGTEQ ">>="
141 %token <punctuation> EXPEQ "**="
142 %token <punctuation> DIVDIVEQ "//="
143 %token <punctuation> DIVDIV "//"
144 %token <punctuation> LTLT "<<"
145 %token <punctuation> GTGT ">>"
146 %token <punctuation> EXPONENT "**"
147 %token <punctuation> EQ "=="
148 %token <punctuation> GE ">="
149 %token <punctuation> LE "<="
150 %token <punctuation> PLUSEQ "+="
151 %token <punctuation> MINUSEQ "-="
152 %token <punctuation> MULTEQ "*="
153 %token <punctuation> DIVEQ "/="
154 %token <punctuation> MODEQ "%="
155 %token <punctuation> AMPEQ "&="
156 %token <punctuation> OREQ "|="
157 %token <punctuation> HATEQ "^="
158 %token <punctuation> LTGT "<>"
159 %token <punctuation> NE "!="
160 %token <punctuation> HAT "^"
161 %token <punctuation> LT "<"
162 %token <punctuation> GT ">"
163 %token <punctuation> AMP "&"
164 %token <punctuation> MULT "*"
165 %token <punctuation> DIV "/"
166 %token <punctuation> MOD "%"
167 %token <punctuation> PLUS "+"
168 %token <punctuation> MINUS "-"
169 %token <punctuation> PERIOD "."
170 %token <punctuation> TILDE "~"
171 %token <punctuation> BAR "|"
172 %token <punctuation> COLON ":"
173 %token <punctuation> SEMICOLON ";"
174 %token <punctuation> COMMA ","
175 %token <punctuation> ASSIGN "="
176 %token <punctuation> BACKQUOTE "`"
182 %token <string> STRING_LITERAL
184 %type <number> ;;syntax semantic-lex-number-expression
185 %token <number> NUMBER_LITERAL
187 %type <symbol> ;;syntax "\\(\\sw\\|\\s_\\)+"
193 %type <keyword> ;;syntax "\\(\\sw\\|\\s_\\)+" matchdatatype keyword
197 "Logical AND binary operator ... "
201 "EXPR as NAME makes value of EXPR available as variable NAME"
203 %keyword ASSERT "assert"
205 "Raise AssertionError exception if <expr> is false"
207 %keyword BREAK "break"
209 "Terminate 'for' or 'while' loop"
211 %keyword CLASS "class"
215 %keyword CONTINUE "continue"
216 %put CONTINUE summary
217 "Skip to the next iteration of enclosing 'for' or 'while' loop"
221 "Define a new function"
225 "Delete specified objects, i.e., undo what assignment did"
229 "Shorthand for 'else if' following an 'if' statement"
233 "Start the 'else' clause following an 'if' statement"
235 %keyword EXCEPT "except"
237 "Specify exception handlers along with 'try' keyword"
241 "Dynamically execute Python code"
243 %keyword FINALLY "finally"
245 "Specify code to be executed after 'try' statements whether or not an exception occurred"
253 "Modify behavior of 'import' statement"
255 %keyword GLOBAL "global"
257 "Declare one or more symbols as global symbols"
261 "Start 'if' conditional statement"
263 %keyword IMPORT "import"
265 "Load specified modules"
269 "Part of 'for' statement "
273 "Binary operator that tests for object equality"
275 %keyword LAMBDA "lambda"
277 "Create anonymous function"
281 "Unary boolean negation operator"
285 "Binary logical 'or' operator"
289 "Statement that does nothing"
291 %keyword PRINT "print"
293 "Print each argument to standard output"
295 %keyword RAISE "raise"
299 %keyword RETURN "return"
301 "Return from a function"
305 "Start of statements protected by exception handlers"
307 %keyword WHILE "while"
309 "Start a 'while' loop"
311 %keyword YIELD "yield"
313 "Create a generator function"
317 ;;;****************************************************************************
319 ;;;****************************************************************************
321 ;; simple_stmt are statements that do not involve INDENT tokens
322 ;; compound_stmt are statements that involve INDENT tokens
329 ;;;****************************************************************************
331 ;;;****************************************************************************
333 ;; simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
335 : small_stmt_list semicolon_opt NEWLINE
338 ;; small_stmt (';' small_stmt)*
341 | small_stmt_list SEMICOLON small_stmt
356 ;;;============================================================================
358 ;;;============================================================================
360 ;; print_stmt: 'print' [ test (',' test)* [','] ]
361 ;; | '>>' test [ (',' test)+ [','] ]
363 : PRINT print_stmt_trailer
367 ;; [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ]
371 | GTGT test trailing_test_list_with_opt_comma_opt
375 ;; [ (',' test)+ [','] ]
376 trailing_test_list_with_opt_comma_opt
378 | trailing_test_list comma_opt
386 | trailing_test_list COMMA test
390 ;;;============================================================================
392 ;;;============================================================================
394 ;; expr_stmt: testlist (augassign testlist | ('=' testlist)*)
396 : testlist expr_stmt_trailer
397 (if (and $2 (stringp $1) (string-match "^\\(\\sw\\|\\s_\\)+$" $1))
398 ;; If this is an assignment statement and left side is a symbol,
399 ;; then generate a 'variable token, else return 'code token.
400 (VARIABLE-TAG $1 nil nil)
404 ;; Could be EMPTY because of eq_testlist_zom.
405 ;; (augassign testlist | ('=' testlist)*)
415 | eq_testlist_zom ASSIGN testlist
419 ;; augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
420 ;; | '<<=' | '>>=' | '**=' | '//='
422 : PLUSEQ | MINUSEQ | MULTEQ | DIVEQ | MODEQ
423 | AMPEQ | OREQ | HATEQ | LTLTEQ
424 | GTGTEQ | EXPEQ | DIVDIVEQ
427 ;;;============================================================================
429 ;;;============================================================================
431 ;; del_stmt: 'del' exprlist
437 ;; exprlist: expr (',' expr)* [',']
439 : expr_list comma_opt
447 | expr_list COMMA expr
451 ;;;============================================================================
453 ;;;============================================================================
461 ;;;============================================================================
463 ;;;============================================================================
473 ;; break_stmt: 'break'
479 ;; continue_stmt: 'continue'
485 ;; return_stmt: 'return' [testlist]
487 : RETURN testlist_opt
498 ;; yield_stmt: 'yield' testlist
506 ;; raise_stmt: 'raise' [test [',' test [',' test]]]
508 : RAISE zero_one_two_or_three_tests
512 ;; [test [',' test [',' test]]]
513 zero_one_two_or_three_tests
515 | test zero_one_or_two_tests
519 ;; [',' test [',' test]]
520 zero_one_or_two_tests
522 | COMMA test zero_or_one_comma_test
527 zero_or_one_comma_test
533 ;;;============================================================================
535 ;;;============================================================================
537 ;; import_stmt : 'import' dotted_as_name (',' dotted_as_name)*
538 ;; | 'from' dotted_name 'import'
539 ;; ('*' | import_as_name (',' import_as_name)*)
541 : IMPORT dotted_as_name_list
543 | FROM dotted_name IMPORT star_or_import_as_name_list
547 ;; dotted_as_name (',' dotted_as_name)*
550 | dotted_as_name_list COMMA dotted_as_name
553 ;; ('*' | import_as_name (',' import_as_name)*)
554 star_or_import_as_name_list
557 | import_as_name_list
561 ;; import_as_name (',' import_as_name)*
565 | import_as_name_list COMMA import_as_name
569 ;; import_as_name: NAME [NAME NAME]
575 ;; dotted_as_name: dotted_name [AS NAME]
577 : dotted_name as_name_opt
587 ;; dotted_name: NAME ('.' NAME)*
590 | dotted_name PERIOD NAME
591 (format "%s.%s" $1 $3)
594 ;;;============================================================================
596 ;;;============================================================================
598 ;; global_stmt: 'global' NAME (',' NAME)*
600 : GLOBAL comma_sep_name_list
607 | comma_sep_name_list COMMA NAME
610 ;;;============================================================================
612 ;;;============================================================================
614 ;; exec_stmt: 'exec' expr ['in' test [',' test]]
616 : EXEC expr exec_trailer
620 ;; ['in' test [',' test]]
623 | IN test comma_test_opt
634 ;;;============================================================================
636 ;;;============================================================================
638 ;; assert_stmt: 'assert' test [',' test]
640 : ASSERT test comma_test_opt
644 ;;;****************************************************************************
646 ;;;****************************************************************************
657 ;;;============================================================================
659 ;;;============================================================================
661 ;; if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
663 : IF test COLON suite elif_suite_pair_list else_suite_pair_opt
667 ;; ('elif' test ':' suite)*
670 | elif_suite_pair_list ELIF test COLON suite
674 ;; ['else' ':' suite]
681 ;; This NT follows the COLON token for most compound statements.
682 ;; suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
686 | NEWLINE indented_block
692 (EXPANDFULL $1 indented_block_body)
704 ;;;============================================================================
706 ;;;============================================================================
708 ;; while_stmt: 'while' test ':' suite ['else' ':' suite]
710 : WHILE test COLON suite else_suite_pair_opt
714 ;;;============================================================================
716 ;;;============================================================================
718 ;; for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
720 : FOR exprlist IN testlist COLON suite else_suite_pair_opt
724 ;;;============================================================================
726 ;;;============================================================================
728 ;; try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break
729 ;; ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite)
731 : TRY COLON suite except_clause_suite_pair_list else_suite_pair_opt
733 | TRY COLON suite FINALLY COLON suite
737 ;; (except_clause ':' suite)+
738 except_clause_suite_pair_list
739 : except_clause COLON suite
741 | except_clause_suite_pair_list except_clause COLON suite
745 ;; # NB compile.c makes sure that the default except clause is last
746 ;; except_clause: 'except' [test [',' test]]
748 : EXCEPT zero_one_or_two_test
755 | test zero_or_one_comma_test
759 ;;;============================================================================
761 ;;;============================================================================
763 ;; funcdef: 'def' NAME parameters ':' suite
765 : DEF NAME function_parameter_list COLON suite
766 (FUNCTION-TAG $2 nil $3)
769 function_parameter_list
771 (let ((wisent-python-EXPANDING-block t))
772 (EXPANDFULL $1 function_parameters))
775 ;; parameters: '(' [varargslist] ')'
781 | function_parameter COMMA
782 | function_parameter RPAREN
788 ;; (VARIABLE-TAG $1 nil nil)
790 (VARIABLE-TAG $2 nil nil)
792 (VARIABLE-TAG $2 nil nil)
795 ;;;============================================================================
796 ;;;@@ class_declaration
797 ;;;============================================================================
799 ;; classdef: 'class' NAME ['(' testlist ')'] ':' suite
801 : CLASS NAME paren_class_list_opt COLON suite
802 (TYPE-TAG $2 $1 ;; Name "class"
804 (cons $3 nil) ;; (SUPERCLASSES . INTERFACES)
808 ;; ['(' testlist ')']
816 (let ((wisent-python-EXPANDING-block t))
817 (mapcar 'semantic-tag-name (EXPANDFULL $1 paren_classes)))
820 ;; parameters: '(' [varargslist] ')'
827 (VARIABLE-TAG $1 nil nil)
829 (VARIABLE-TAG $1 nil nil)
832 ;; In general, the base class can be specified by a general expression
833 ;; which evalue to a class object, i.e., base classes are not just names!
834 ;; However base classes are names in most cases. Thus the
835 ;; non-terminals below work only with simple names. Even if the
836 ;; parser can parse general expressions, I don't see much benefit in
837 ;; generating a string of expression as base class "name".
842 ;;;****************************************************************************
844 ;;;****************************************************************************
846 ;; test: and_test ('or' and_test)* | lambdef
852 ;; and_test ('or' and_test)*
855 | test_test OR and_test
859 ;; and_test: not_test ('and' not_test)*
862 | and_test AND not_test
866 ;; not_test: 'not' not_test | comparison
873 ;; comparison: expr (comp_op expr)*
876 | comparison comp_op expr
880 ;; comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
882 : LT | GT | EQ | GE | LE | LTGT | NE | IN | NOT IN | IS | IS NOT
885 ;; expr: xor_expr ('|' xor_expr)*
892 ;; xor_expr: and_expr ('^' and_expr)*
895 | xor_expr HAT and_expr
899 ;; and_expr: shift_expr ('&' shift_expr)*
902 | and_expr AMP shift_expr
906 ;; shift_expr: arith_expr (('<<'|'>>') arith_expr)*
909 | shift_expr shift_expr_operators arith_expr
919 ;; arith_expr: term (('+'|'-') term)*
922 | arith_expr plus_or_minus term
932 ;; term: factor (('*'|'/'|'%'|'//') factor)*
935 | term term_operator factor
946 ;; factor: ('+'|'-'|'~') factor | power
948 : prefix_operators factor
960 ;; power: atom trailer* ('**' factor)*
962 : atom trailer_zom exponent_zom
964 (if $2 (concat " " $2 " ") "")
965 (if $3 (concat " " $3) "")
971 | trailer_zom trailer
977 | exponent_zom EXPONENT factor
981 ;; trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
991 ;; atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}'
992 ;; | '`' testlist '`' | NAME | NUMBER | STRING+
1000 | BACKQUOTE testlist BACKQUOTE
1004 | one_or_more_string
1013 ;; testlist: test (',' test)* [',']
1015 : comma_sep_test_list comma_opt
1021 | comma_sep_test_list COMMA test
1022 (format "%s, %s" $1 $3)
1025 ;; (read $1) and (read $2) were done before to peel away the double quotes.
1026 ;; However that does not work for single quotes, so it was taken out.
1029 | one_or_more_string STRING_LITERAL
1033 ;;;****************************************************************************
1035 ;;;****************************************************************************
1037 ;; lambdef: 'lambda' [varargslist] ':' test
1039 : LAMBDA varargslist_opt COLON test
1040 (format "%s %s" $1 (or $2 ""))
1049 ;; varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME)
1050 ;; | fpdef ['=' test] (',' fpdef ['=' test])* [',']
1052 : fpdef_opt_test_list_comma_zom rest_args
1054 | fpdef_opt_test_list comma_opt
1057 ;; ('*' NAME [',' '**' NAME] | '**' NAME)
1059 : MULT NAME multmult_name_opt
1060 () ;;(VARIABLE-TAG $2 nil nil)
1062 () ;;(VARIABLE-TAG $2 nil nil)
1068 | COMMA EXPONENT NAME
1069 (VARIABLE-TAG $3 nil nil)
1072 fpdef_opt_test_list_comma_zom
1074 | fpdef_opt_test_list_comma_zom fpdef_opt_test COMMA
1078 ;; fpdef ['=' test] (',' fpdef ['=' test])*
1081 | fpdef_opt_test_list COMMA fpdef_opt_test
1090 ;; fpdef: NAME | '(' fplist ')'
1093 (VARIABLE-TAG $1 nil nil)
1094 ;; Below breaks the parser. Don't know why, but my guess is that
1095 ;; LPAREN/RPAREN clashes with the ones in function_parameters.
1096 ;; | LPAREN fplist RPAREN
1100 ;; fplist: fpdef (',' fpdef)* [',']
1102 : fpdef_list comma_opt
1105 ;; fpdef (',' fpdef)*
1108 | fpdef_list COMMA fpdef
1118 ;;;****************************************************************************
1120 ;;;****************************************************************************
1134 ;;; wisent-python.wy ends here