1 ;;; python.wy -- LALR grammar for Python
3 ;; Copyright (C) 2002-2015 Free Software Foundation, Inc.
4 ;; Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
5 ;; 2009, 2010 Python Software Foundation; All Rights Reserved
7 ;; Author: Richard Kim <ryk@dspwiz.com>
8 ;; Maintainer: Richard Kim <ryk@dspwiz.com>
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
29 ;; This is an LALR python parser that follows the official python
30 ;; grammar closely with very few exceptions. The Python grammar is
31 ;; used and reproduced under the following license:
33 ;; PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
34 ;; --------------------------------------------
35 ;; 1. This LICENSE AGREEMENT is between the Python Software Foundation
36 ;; ("PSF"), and the Individual or Organization ("Licensee") accessing
37 ;; and otherwise using this software ("Python") in source or binary
38 ;; form and its associated documentation.
40 ;; 2. Subject to the terms and conditions of this License Agreement,
41 ;; PSF hereby grants Licensee a nonexclusive, royalty-free, world-wide
42 ;; license to reproduce, analyze, test, perform and/or display
43 ;; publicly, prepare derivative works, distribute, and otherwise use
44 ;; Python alone or in any derivative version, provided, however, that
45 ;; PSF's License Agreement and PSF's notice of copyright, i.e.,
46 ;; "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
47 ;; 2009, 2010 Python Software Foundation; All Rights Reserved" are
48 ;; retained in Python alone or in any derivative version prepared by
51 ;; 3. In the event Licensee prepares a derivative work that is based
52 ;; on or incorporates Python or any part thereof, and wants to make
53 ;; the derivative work available to others as provided herein, then
54 ;; Licensee hereby agrees to include in any such work a brief summary
55 ;; of the changes made to Python.
57 ;; 4. PSF is making Python available to Licensee on an "AS IS"
58 ;; basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
59 ;; IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
60 ;; DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
61 ;; FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
62 ;; INFRINGE ANY THIRD PARTY RIGHTS.
64 ;; 5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
65 ;; FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS A
66 ;; RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, OR
67 ;; ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
69 ;; 6. This License Agreement will automatically terminate upon a
70 ;; material breach of its terms and conditions.
72 ;; 7. Nothing in this License Agreement shall be deemed to create any
73 ;; relationship of agency, partnership, or joint venture between PSF
74 ;; and Licensee. This License Agreement does not grant permission to
75 ;; use PSF trademarks or trade name in a trademark sense to endorse or
76 ;; promote products or services of Licensee, or any third party.
78 ;; 8. By copying, installing or otherwise using Python, Licensee
79 ;; agrees to be bound by the terms and conditions of this License
84 ;; * Verify that semantic-lex-python-number regexp is correct.
90 %package wisent-python-wy
91 %provide semantic/wisent/python-wy
94 (declare-function wisent-python-reconstitute-function-tag
95 "semantic/wisent/python" (tag suite))
96 (declare-function wisent-python-reconstitute-class-tag "semantic/wisent/python"
98 (declare-function semantic-parse-region "semantic"
99 (start end &optional nonterminal depth returnonerror))
102 %languagemode python-mode
104 ;; The default start symbol
106 ;; Alternate entry points
107 ;; - Needed by partial re-parse
108 %start function_parameter
110 %start indented_block
111 ;; - Needed by EXPANDFULL clauses
112 %start function_parameters
114 %start indented_block_body
116 ;; -------------------------------
117 ;; Misc. Python specific terminals
118 ;; -------------------------------
119 ;; The value of these tokens are for documentation only, they are not
120 ;; used by the lexer.
121 %token <charquote> BACKSLASH "\\"
122 %token <newline> NEWLINE "\n"
123 %token <indentation> INDENT "^\\s-+"
124 %token <indentation> DEDENT "[^:INDENT:]"
125 %token <indentation> INDENT_BLOCK "(INDENT DEDENT)"
127 ;; -----------------------------
128 ;; Block & Parenthesis terminals
129 ;; -----------------------------
130 %type <block> ;;syntax "\\s(\\|\\s)" matchdatatype block
132 %token <block> PAREN_BLOCK "(LPAREN RPAREN)"
133 %token <block> BRACE_BLOCK "(LBRACE RBRACE)"
134 %token <block> BRACK_BLOCK "(LBRACK RBRACK)"
136 %token <open-paren> LPAREN "("
137 %token <close-paren> RPAREN ")"
138 %token <open-paren> LBRACE "{"
139 %token <close-paren> RBRACE "}"
140 %token <open-paren> LBRACK "["
141 %token <close-paren> RBRACK "]"
143 ;; ------------------
144 ;; Operator terminals
145 ;; ------------------
146 %type <punctuation> ;;syntax "\\(\\s.\\|\\s$\\|\\s'\\)+" matchdatatype string
148 %token <punctuation> LTLTEQ "<<="
149 %token <punctuation> GTGTEQ ">>="
150 %token <punctuation> EXPEQ "**="
151 %token <punctuation> DIVDIVEQ "//="
152 %token <punctuation> DIVDIV "//"
153 %token <punctuation> LTLT "<<"
154 %token <punctuation> GTGT ">>"
155 %token <punctuation> EXPONENT "**"
156 %token <punctuation> EQ "=="
157 %token <punctuation> GE ">="
158 %token <punctuation> LE "<="
159 %token <punctuation> PLUSEQ "+="
160 %token <punctuation> MINUSEQ "-="
161 %token <punctuation> MULTEQ "*="
162 %token <punctuation> DIVEQ "/="
163 %token <punctuation> MODEQ "%="
164 %token <punctuation> AMPEQ "&="
165 %token <punctuation> OREQ "|="
166 %token <punctuation> HATEQ "^="
167 %token <punctuation> LTGT "<>"
168 %token <punctuation> NE "!="
169 %token <punctuation> HAT "^"
170 %token <punctuation> LT "<"
171 %token <punctuation> GT ">"
172 %token <punctuation> AMP "&"
173 %token <punctuation> MULT "*"
174 %token <punctuation> DIV "/"
175 %token <punctuation> MOD "%"
176 %token <punctuation> PLUS "+"
177 %token <punctuation> MINUS "-"
178 %token <punctuation> PERIOD "."
179 %token <punctuation> TILDE "~"
180 %token <punctuation> BAR "|"
181 %token <punctuation> COLON ":"
182 %token <punctuation> SEMICOLON ";"
183 %token <punctuation> COMMA ","
184 %token <punctuation> ASSIGN "="
185 %token <punctuation> BACKQUOTE "`"
186 %token <punctuation> AT "@"
192 %token <string> STRING_LITERAL
194 %type <number> ;;syntax semantic-lex-number-expression
195 %token <number> NUMBER_LITERAL
197 %type <symbol> ;;syntax "\\(\\sw\\|\\s_\\)+"
203 %type <keyword> ;;syntax "\\(\\sw\\|\\s_\\)+" matchdatatype keyword
207 "Logical AND binary operator ... "
211 "EXPR as NAME makes value of EXPR available as variable NAME"
213 %keyword ASSERT "assert"
215 "Raise AssertionError exception if <expr> is false"
217 %keyword BREAK "break"
219 "Terminate 'for' or 'while' loop"
221 %keyword CLASS "class"
225 %keyword CONTINUE "continue"
226 %put CONTINUE summary
227 "Skip to the next iteration of enclosing 'for' or 'while' loop"
231 "Define a new function"
235 "Delete specified objects, i.e., undo what assignment did"
239 "Shorthand for 'else if' following an 'if' statement"
243 "Start the 'else' clause following an 'if' statement"
245 %keyword EXCEPT "except"
247 "Specify exception handlers along with 'try' keyword"
251 "Dynamically execute Python code"
253 %keyword FINALLY "finally"
255 "Specify code to be executed after 'try' statements whether or not an exception occurred"
263 "Modify behavior of 'import' statement"
265 %keyword GLOBAL "global"
267 "Declare one or more symbols as global symbols"
271 "Start 'if' conditional statement"
273 %keyword IMPORT "import"
275 "Load specified modules"
279 "Part of 'for' statement "
283 "Binary operator that tests for object equality"
285 %keyword LAMBDA "lambda"
287 "Create anonymous function"
291 "Unary boolean negation operator"
295 "Binary logical 'or' operator"
299 "Statement that does nothing"
301 %keyword PRINT "print"
303 "Print each argument to standard output"
305 %keyword RAISE "raise"
309 %keyword RETURN "return"
311 "Return from a function"
315 "Start of statements protected by exception handlers"
317 %keyword WHILE "while"
319 "Start a 'while' loop"
323 "Start statement with an associated context object"
325 %keyword YIELD "yield"
327 "Create a generator function"
331 ;;;****************************************************************************
333 ;;;****************************************************************************
335 ;; simple_stmt are statements that do not involve INDENT tokens
336 ;; compound_stmt are statements that involve INDENT tokens
343 ;;;****************************************************************************
345 ;;;****************************************************************************
347 ;; simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
349 : small_stmt_list semicolon_opt NEWLINE
352 ;; small_stmt (';' small_stmt)*
355 | small_stmt_list SEMICOLON small_stmt
370 ;;;============================================================================
372 ;;;============================================================================
374 ;; print_stmt: 'print' [ test (',' test)* [','] ]
375 ;; | '>>' test [ (',' test)+ [','] ]
377 : PRINT print_stmt_trailer
381 ;; [ test (',' test)* [','] ] | '>>' test [ (',' test)+ [','] ]
385 | GTGT test trailing_test_list_with_opt_comma_opt
389 ;; [ (',' test)+ [','] ]
390 trailing_test_list_with_opt_comma_opt
392 | trailing_test_list comma_opt
400 | trailing_test_list COMMA test
404 ;;;============================================================================
406 ;;;============================================================================
408 ;; expr_stmt: testlist (augassign testlist | ('=' testlist)*)
410 : testlist expr_stmt_trailer
411 (if (and $2 (stringp $1) (string-match "^\\(\\sw\\|\\s_\\)+$" $1))
412 ;; If this is an assignment statement and left side is a symbol,
413 ;; then generate a 'variable token, else return 'code token.
414 (VARIABLE-TAG $1 nil nil)
418 ;; Could be EMPTY because of eq_testlist_zom.
419 ;; (augassign testlist | ('=' testlist)*)
429 | eq_testlist_zom ASSIGN testlist
433 ;; augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
434 ;; | '<<=' | '>>=' | '**=' | '//='
436 : PLUSEQ | MINUSEQ | MULTEQ | DIVEQ | MODEQ
437 | AMPEQ | OREQ | HATEQ | LTLTEQ
438 | GTGTEQ | EXPEQ | DIVDIVEQ
441 ;;;============================================================================
443 ;;;============================================================================
445 ;; del_stmt: 'del' exprlist
451 ;; exprlist: expr (',' expr)* [',']
453 : expr_list comma_opt
461 | expr_list COMMA expr
465 ;;;============================================================================
467 ;;;============================================================================
475 ;;;============================================================================
477 ;;;============================================================================
487 ;; break_stmt: 'break'
493 ;; continue_stmt: 'continue'
499 ;; return_stmt: 'return' [testlist]
501 : RETURN testlist_opt
512 ;; yield_stmt: 'yield' testlist
520 ;; raise_stmt: 'raise' [test [',' test [',' test]]]
522 : RAISE zero_one_two_or_three_tests
526 ;; [test [',' test [',' test]]]
527 zero_one_two_or_three_tests
529 | test zero_one_or_two_tests
533 ;; [',' test [',' test]]
534 zero_one_or_two_tests
536 | COMMA test zero_or_one_comma_test
541 zero_or_one_comma_test
547 ;;;============================================================================
549 ;;;============================================================================
551 ;; import_stmt : 'import' dotted_as_name (',' dotted_as_name)*
552 ;; | 'from' dotted_name 'import'
553 ;; ('*' | import_as_name (',' import_as_name)*)
555 : IMPORT dotted_as_name_list
557 | FROM dotted_name IMPORT star_or_import_as_name_list
561 ;; dotted_as_name (',' dotted_as_name)*
563 : dotted_as_name_list COMMA dotted_as_name
569 ;; ('*' | import_as_name (',' import_as_name)*)
570 star_or_import_as_name_list
573 | import_as_name_list
577 ;; import_as_name (',' import_as_name)*
581 | import_as_name_list COMMA import_as_name
585 ;; import_as_name: NAME [NAME NAME]
591 ;; dotted_as_name: dotted_name [AS NAME]
593 : dotted_name as_name_opt
603 ;; dotted_name: NAME ('.' NAME)*
606 | dotted_name PERIOD NAME
607 (format "%s.%s" $1 $3)
610 ;;;============================================================================
612 ;;;============================================================================
614 ;; global_stmt: 'global' NAME (',' NAME)*
616 : GLOBAL comma_sep_name_list
623 | comma_sep_name_list COMMA NAME
626 ;;;============================================================================
628 ;;;============================================================================
630 ;; exec_stmt: 'exec' expr ['in' test [',' test]]
632 : EXEC expr exec_trailer
636 ;; ['in' test [',' test]]
639 | IN test comma_test_opt
650 ;;;============================================================================
652 ;;;============================================================================
654 ;; assert_stmt: 'assert' test [',' test]
656 : ASSERT test comma_test_opt
660 ;;;****************************************************************************
662 ;;;****************************************************************************
674 ;;;============================================================================
676 ;;;============================================================================
678 ;; if_stmt: 'if' test ':' suite ('elif' test ':' suite)* ['else' ':' suite]
680 : IF test COLON suite elif_suite_pair_list else_suite_pair_opt
684 ;; ('elif' test ':' suite)*
687 | elif_suite_pair_list ELIF test COLON suite
691 ;; ['else' ':' suite]
698 ;; This NT follows the COLON token for most compound statements.
699 ;; suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
703 | NEWLINE indented_block
709 (EXPANDFULL $1 indented_block_body)
721 ;;;============================================================================
723 ;;;============================================================================
725 ;; while_stmt: 'while' test ':' suite ['else' ':' suite]
727 : WHILE test COLON suite else_suite_pair_opt
731 ;;;============================================================================
733 ;;;============================================================================
735 ;; for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite]
737 : FOR exprlist IN testlist COLON suite else_suite_pair_opt
741 ;;;============================================================================
743 ;;;============================================================================
745 ;; try_stmt: ('try' ':' suite (except_clause ':' suite)+ #diagram:break
746 ;; ['else' ':' suite] | 'try' ':' suite 'finally' ':' suite)
748 : TRY COLON suite except_clause_suite_pair_list else_suite_pair_opt
750 | TRY COLON suite FINALLY COLON suite
754 ;; (except_clause ':' suite)+
755 except_clause_suite_pair_list
756 : except_clause COLON suite
758 | except_clause_suite_pair_list except_clause COLON suite
762 ;; # NB compile.c makes sure that the default except clause is last
763 ;; except_clause: 'except' [test [',' test]]
765 : EXCEPT zero_one_or_two_test
772 | test zero_or_one_comma_test
776 ;;;============================================================================
778 ;;;============================================================================
780 ;; with_stmt: 'with' test [ with_var ] ':' suite
782 : WITH test COLON suite
784 | WITH test with_var COLON suite
785 (CODE-TAG $1 nil) ;; TODO capture variable
793 ;;;============================================================================
795 ;;;============================================================================
798 : AT dotted_name varargslist_opt NEWLINE
799 (FUNCTION-TAG $2 "decorator" $3)
805 | decorator decorators
809 ;; funcdef: [decorators] 'def' NAME parameters ':' suite
811 : DEF NAME function_parameter_list COLON suite
812 (wisent-python-reconstitute-function-tag
813 (FUNCTION-TAG $2 nil $3) $5)
814 | decorators DEF NAME function_parameter_list COLON suite
815 (wisent-python-reconstitute-function-tag
816 (FUNCTION-TAG $3 nil $4 :decorators $1) $6)
819 function_parameter_list
821 (let ((wisent-python-EXPANDING-block t))
822 (EXPANDFULL $1 function_parameters))
825 ;; parameters: '(' [varargslist] ')'
831 | function_parameter COMMA
832 | function_parameter RPAREN
838 ;; (VARIABLE-TAG $1 nil nil)
840 (VARIABLE-TAG $2 nil nil)
842 (VARIABLE-TAG $2 nil nil)
845 ;;;============================================================================
846 ;;;@@ class_declaration
847 ;;;============================================================================
849 ;; classdef: 'class' NAME ['(' testlist ')'] ':' suite
851 : CLASS NAME paren_class_list_opt COLON suite
852 (wisent-python-reconstitute-class-tag
853 (TYPE-TAG $2 $1 ;; Name "class"
855 (cons $3 nil) ;; (SUPERCLASSES . INTERFACES)
859 ;; ['(' testlist ')']
867 (let ((wisent-python-EXPANDING-block t))
868 (mapcar 'semantic-tag-name (EXPANDFULL $1 paren_classes)))
871 ;; parameters: '(' [varargslist] ')'
878 (VARIABLE-TAG $1 nil nil)
880 (VARIABLE-TAG $1 nil nil)
883 ;; In general, the base class can be specified by a general expression
884 ;; which evaluates to a class object, i.e., base classes are not just names!
885 ;; However base classes are names in most cases. Thus the
886 ;; non-terminals below work only with simple names. Even if the
887 ;; parser can parse general expressions, I don't see much benefit in
888 ;; generating a string of expression as base class "name".
893 ;;;****************************************************************************
895 ;;;****************************************************************************
897 ;; test: and_test ('or' and_test)* | lambdef
903 ;; and_test ('or' and_test)*
906 | test_test OR and_test
910 ;; and_test: not_test ('and' not_test)*
913 | and_test AND not_test
917 ;; not_test: 'not' not_test | comparison
924 ;; comparison: expr (comp_op expr)*
927 | comparison comp_op expr
931 ;; comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not'
933 : LT | GT | EQ | GE | LE | LTGT | NE | IN | NOT IN | IS | IS NOT
936 ;; expr: xor_expr ('|' xor_expr)*
943 ;; xor_expr: and_expr ('^' and_expr)*
946 | xor_expr HAT and_expr
950 ;; and_expr: shift_expr ('&' shift_expr)*
953 | and_expr AMP shift_expr
957 ;; shift_expr: arith_expr (('<<'|'>>') arith_expr)*
960 | shift_expr shift_expr_operators arith_expr
970 ;; arith_expr: term (('+'|'-') term)*
973 | arith_expr plus_or_minus term
983 ;; term: factor (('*'|'/'|'%'|'//') factor)*
986 | term term_operator factor
997 ;; factor: ('+'|'-'|'~') factor | power
999 : prefix_operators factor
1011 ;; power: atom trailer* ('**' factor)*
1013 : atom trailer_zom exponent_zom
1015 (if $2 (concat " " $2 " ") "")
1016 (if $3 (concat " " $3) "")
1022 | trailer_zom trailer
1028 | exponent_zom EXPONENT factor
1032 ;; trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1042 ;; atom: '(' [testlist] ')' | '[' [listmaker] ']' | '{' [dictmaker] '}'
1043 ;; | '`' testlist '`' | NAME | NUMBER | STRING+
1051 | BACKQUOTE testlist BACKQUOTE
1055 | one_or_more_string
1064 ;; testlist: test (',' test)* [',']
1066 : comma_sep_test_list comma_opt
1072 | comma_sep_test_list COMMA test
1073 (format "%s, %s" $1 $3)
1076 ;; (read $1) and (read $2) were done before to peel away the double quotes.
1077 ;; However that does not work for single quotes, so it was taken out.
1080 | one_or_more_string STRING_LITERAL
1084 ;;;****************************************************************************
1086 ;;;****************************************************************************
1088 ;; lambdef: 'lambda' [varargslist] ':' test
1090 : LAMBDA varargslist_opt COLON test
1091 (format "%s %s" $1 (or $2 ""))
1100 ;; varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME] | '**' NAME)
1101 ;; | fpdef ['=' test] (',' fpdef ['=' test])* [',']
1103 : fpdef_opt_test_list_comma_zom rest_args
1105 | fpdef_opt_test_list comma_opt
1108 ;; ('*' NAME [',' '**' NAME] | '**' NAME)
1110 : MULT NAME multmult_name_opt
1111 () ;;(VARIABLE-TAG $2 nil nil)
1113 () ;;(VARIABLE-TAG $2 nil nil)
1119 | COMMA EXPONENT NAME
1120 (VARIABLE-TAG $3 nil nil)
1123 fpdef_opt_test_list_comma_zom
1125 | fpdef_opt_test_list_comma_zom fpdef_opt_test COMMA
1129 ;; fpdef ['=' test] (',' fpdef ['=' test])*
1132 | fpdef_opt_test_list COMMA fpdef_opt_test
1141 ;; fpdef: NAME | '(' fplist ')'
1144 (VARIABLE-TAG $1 nil nil)
1145 ;; Below breaks the parser. Don't know why, but my guess is that
1146 ;; LPAREN/RPAREN clashes with the ones in function_parameters.
1147 ;; | LPAREN fplist RPAREN
1151 ;; fplist: fpdef (',' fpdef)* [',']
1153 : fpdef_list comma_opt
1156 ;; fpdef (',' fpdef)*
1159 | fpdef_list COMMA fpdef
1169 ;;;****************************************************************************
1171 ;;;****************************************************************************
1185 ;;; python.wy ends here