merge 2to3 improvments
[python/dscho.git] / Python / ast.c
blob83572ee882150e97e1190a14ce4f26847ce04708
1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
16 #include <assert.h>
18 /* Data structure used internally */
19 struct compiling {
20 char *c_encoding; /* source encoding */
21 PyArena *c_arena; /* arena for allocating memeory */
22 const char *c_filename; /* filename */
25 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
26 static expr_ty ast_for_expr(struct compiling *, const node *);
27 static stmt_ty ast_for_stmt(struct compiling *, const node *);
28 static asdl_seq *ast_for_suite(struct compiling *, const node *);
29 static asdl_seq *ast_for_exprlist(struct compiling *, const node *,
30 expr_context_ty);
31 static expr_ty ast_for_testlist(struct compiling *, const node *);
32 static stmt_ty ast_for_classdef(struct compiling *, const node *, asdl_seq *);
34 /* Note different signature for ast_for_call */
35 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
37 static PyObject *parsenumber(struct compiling *, const char *);
38 static PyObject *parsestr(struct compiling *, const node *n, int *bytesmode);
39 static PyObject *parsestrplus(struct compiling *, const node *n,
40 int *bytesmode);
42 #ifndef LINENO
43 #define LINENO(n) ((n)->n_lineno)
44 #endif
46 #define COMP_GENEXP 0
47 #define COMP_LISTCOMP 1
48 #define COMP_SETCOMP 2
50 static identifier
51 new_identifier(const char* n, PyArena *arena)
53 PyObject* id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
54 Py_UNICODE *u;
55 if (!id)
56 return NULL;
57 u = PyUnicode_AS_UNICODE(id);
58 /* Check whether there are non-ASCII characters in the
59 identifier; if so, normalize to NFKC. */
60 for (; *u; u++) {
61 if (*u >= 128) {
62 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
63 PyObject *id2;
64 if (!m)
65 return NULL;
66 id2 = PyObject_CallMethod(m, "normalize", "sO", "NFKC", id);
67 Py_DECREF(m);
68 if (!id2)
69 return NULL;
70 Py_DECREF(id);
71 id = id2;
72 break;
75 PyUnicode_InternInPlace(&id);
76 PyArena_AddPyObject(arena, id);
77 return id;
80 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
82 /* This routine provides an invalid object for the syntax error.
83 The outermost routine must unpack this error and create the
84 proper object. We do this so that we don't have to pass
85 the filename to everything function.
87 XXX Maybe we should just pass the filename...
90 static int
91 ast_error(const node *n, const char *errstr)
93 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
94 if (!u)
95 return 0;
96 PyErr_SetObject(PyExc_SyntaxError, u);
97 Py_DECREF(u);
98 return 0;
101 static void
102 ast_error_finish(const char *filename)
104 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
105 long lineno;
107 assert(PyErr_Occurred());
108 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
109 return;
111 PyErr_Fetch(&type, &value, &tback);
112 errstr = PyTuple_GetItem(value, 0);
113 if (!errstr)
114 return;
115 Py_INCREF(errstr);
116 lineno = PyLong_AsLong(PyTuple_GetItem(value, 1));
117 if (lineno == -1) {
118 Py_DECREF(errstr);
119 return;
121 Py_DECREF(value);
123 loc = PyErr_ProgramText(filename, lineno);
124 if (!loc) {
125 Py_INCREF(Py_None);
126 loc = Py_None;
128 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
129 Py_DECREF(loc);
130 if (!tmp) {
131 Py_DECREF(errstr);
132 return;
134 value = PyTuple_Pack(2, errstr, tmp);
135 Py_DECREF(errstr);
136 Py_DECREF(tmp);
137 if (!value)
138 return;
139 PyErr_Restore(type, value, tback);
142 /* num_stmts() returns number of contained statements.
144 Use this routine to determine how big a sequence is needed for
145 the statements in a parse tree. Its raison d'etre is this bit of
146 grammar:
148 stmt: simple_stmt | compound_stmt
149 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
151 A simple_stmt can contain multiple small_stmt elements joined
152 by semicolons. If the arg is a simple_stmt, the number of
153 small_stmt elements is returned.
156 static int
157 num_stmts(const node *n)
159 int i, l;
160 node *ch;
162 switch (TYPE(n)) {
163 case single_input:
164 if (TYPE(CHILD(n, 0)) == NEWLINE)
165 return 0;
166 else
167 return num_stmts(CHILD(n, 0));
168 case file_input:
169 l = 0;
170 for (i = 0; i < NCH(n); i++) {
171 ch = CHILD(n, i);
172 if (TYPE(ch) == stmt)
173 l += num_stmts(ch);
175 return l;
176 case stmt:
177 return num_stmts(CHILD(n, 0));
178 case compound_stmt:
179 return 1;
180 case simple_stmt:
181 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
182 case suite:
183 if (NCH(n) == 1)
184 return num_stmts(CHILD(n, 0));
185 else {
186 l = 0;
187 for (i = 2; i < (NCH(n) - 1); i++)
188 l += num_stmts(CHILD(n, i));
189 return l;
191 default: {
192 char buf[128];
194 sprintf(buf, "Non-statement found: %d %d",
195 TYPE(n), NCH(n));
196 Py_FatalError(buf);
199 assert(0);
200 return 0;
203 /* Transform the CST rooted at node * to the appropriate AST
206 mod_ty
207 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
208 PyArena *arena)
210 int i, j, k, num;
211 asdl_seq *stmts = NULL;
212 stmt_ty s;
213 node *ch;
214 struct compiling c;
216 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
217 c.c_encoding = "utf-8";
218 if (TYPE(n) == encoding_decl) {
219 #if 0
220 ast_error(n, "encoding declaration in Unicode string");
221 goto error;
222 #endif
223 n = CHILD(n, 0);
225 } else if (TYPE(n) == encoding_decl) {
226 c.c_encoding = STR(n);
227 n = CHILD(n, 0);
228 } else {
229 /* PEP 3120 */
230 c.c_encoding = "utf-8";
232 c.c_arena = arena;
233 c.c_filename = filename;
235 k = 0;
236 switch (TYPE(n)) {
237 case file_input:
238 stmts = asdl_seq_new(num_stmts(n), arena);
239 if (!stmts)
240 return NULL;
241 for (i = 0; i < NCH(n) - 1; i++) {
242 ch = CHILD(n, i);
243 if (TYPE(ch) == NEWLINE)
244 continue;
245 REQ(ch, stmt);
246 num = num_stmts(ch);
247 if (num == 1) {
248 s = ast_for_stmt(&c, ch);
249 if (!s)
250 goto error;
251 asdl_seq_SET(stmts, k++, s);
253 else {
254 ch = CHILD(ch, 0);
255 REQ(ch, simple_stmt);
256 for (j = 0; j < num; j++) {
257 s = ast_for_stmt(&c, CHILD(ch, j * 2));
258 if (!s)
259 goto error;
260 asdl_seq_SET(stmts, k++, s);
264 return Module(stmts, arena);
265 case eval_input: {
266 expr_ty testlist_ast;
268 /* XXX Why not comp_for here? */
269 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
270 if (!testlist_ast)
271 goto error;
272 return Expression(testlist_ast, arena);
274 case single_input:
275 if (TYPE(CHILD(n, 0)) == NEWLINE) {
276 stmts = asdl_seq_new(1, arena);
277 if (!stmts)
278 goto error;
279 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
280 arena));
281 if (!asdl_seq_GET(stmts, 0))
282 goto error;
283 return Interactive(stmts, arena);
285 else {
286 n = CHILD(n, 0);
287 num = num_stmts(n);
288 stmts = asdl_seq_new(num, arena);
289 if (!stmts)
290 goto error;
291 if (num == 1) {
292 s = ast_for_stmt(&c, n);
293 if (!s)
294 goto error;
295 asdl_seq_SET(stmts, 0, s);
297 else {
298 /* Only a simple_stmt can contain multiple statements. */
299 REQ(n, simple_stmt);
300 for (i = 0; i < NCH(n); i += 2) {
301 if (TYPE(CHILD(n, i)) == NEWLINE)
302 break;
303 s = ast_for_stmt(&c, CHILD(n, i));
304 if (!s)
305 goto error;
306 asdl_seq_SET(stmts, i / 2, s);
310 return Interactive(stmts, arena);
312 default:
313 PyErr_Format(PyExc_SystemError,
314 "invalid node %d for PyAST_FromNode", TYPE(n));
315 goto error;
317 error:
318 ast_error_finish(filename);
319 return NULL;
322 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
325 static operator_ty
326 get_operator(const node *n)
328 switch (TYPE(n)) {
329 case VBAR:
330 return BitOr;
331 case CIRCUMFLEX:
332 return BitXor;
333 case AMPER:
334 return BitAnd;
335 case LEFTSHIFT:
336 return LShift;
337 case RIGHTSHIFT:
338 return RShift;
339 case PLUS:
340 return Add;
341 case MINUS:
342 return Sub;
343 case STAR:
344 return Mult;
345 case SLASH:
346 return Div;
347 case DOUBLESLASH:
348 return FloorDiv;
349 case PERCENT:
350 return Mod;
351 default:
352 return (operator_ty)0;
356 static const char* FORBIDDEN[] = {
357 "None",
358 "True",
359 "False",
360 "__debug__",
361 NULL,
364 static int
365 forbidden_name(expr_ty e, const node *n)
367 const char **p;
368 assert(PyUnicode_Check(e->v.Name.id));
369 for (p = FORBIDDEN; *p; p++) {
370 if (PyUnicode_CompareWithASCIIString(e->v.Name.id, *p) == 0) {
371 ast_error(n, "assignment to keyword");
372 return 1;
375 return 0;
378 /* Set the context ctx for expr_ty e, recursively traversing e.
380 Only sets context for expr kinds that "can appear in assignment context"
381 (according to ../Parser/Python.asdl). For other expr kinds, it sets
382 an appropriate syntax error and returns false.
385 static int
386 set_context(struct compiling *c, expr_ty e, expr_context_ty ctx, const node *n)
388 asdl_seq *s = NULL;
389 /* If a particular expression type can't be used for assign / delete,
390 set expr_name to its name and an error message will be generated.
392 const char* expr_name = NULL;
394 /* The ast defines augmented store and load contexts, but the
395 implementation here doesn't actually use them. The code may be
396 a little more complex than necessary as a result. It also means
397 that expressions in an augmented assignment have a Store context.
398 Consider restructuring so that augmented assignment uses
399 set_context(), too.
401 assert(ctx != AugStore && ctx != AugLoad);
403 switch (e->kind) {
404 case Attribute_kind:
405 e->v.Attribute.ctx = ctx;
406 break;
407 case Subscript_kind:
408 e->v.Subscript.ctx = ctx;
409 break;
410 case Starred_kind:
411 e->v.Starred.ctx = ctx;
412 if (!set_context(c, e->v.Starred.value, ctx, n))
413 return 0;
414 break;
415 case Name_kind:
416 if (ctx == Store) {
417 if (forbidden_name(e, n))
418 return 0; /* forbidden_name() calls ast_error() */
420 e->v.Name.ctx = ctx;
421 break;
422 case List_kind:
423 e->v.List.ctx = ctx;
424 s = e->v.List.elts;
425 break;
426 case Tuple_kind:
427 if (asdl_seq_LEN(e->v.Tuple.elts) == 0)
428 return ast_error(n, "can't assign to ()");
429 e->v.Tuple.ctx = ctx;
430 s = e->v.Tuple.elts;
431 break;
432 case Lambda_kind:
433 expr_name = "lambda";
434 break;
435 case Call_kind:
436 expr_name = "function call";
437 break;
438 case BoolOp_kind:
439 case BinOp_kind:
440 case UnaryOp_kind:
441 expr_name = "operator";
442 break;
443 case GeneratorExp_kind:
444 expr_name = "generator expression";
445 break;
446 case Yield_kind:
447 expr_name = "yield expression";
448 break;
449 case ListComp_kind:
450 expr_name = "list comprehension";
451 break;
452 case SetComp_kind:
453 expr_name = "set comprehension";
454 break;
455 case DictComp_kind:
456 expr_name = "dict comprehension";
457 break;
458 case Dict_kind:
459 case Set_kind:
460 case Num_kind:
461 case Str_kind:
462 expr_name = "literal";
463 break;
464 case Ellipsis_kind:
465 expr_name = "Ellipsis";
466 break;
467 case Compare_kind:
468 expr_name = "comparison";
469 break;
470 case IfExp_kind:
471 expr_name = "conditional expression";
472 break;
473 default:
474 PyErr_Format(PyExc_SystemError,
475 "unexpected expression in assignment %d (line %d)",
476 e->kind, e->lineno);
477 return 0;
479 /* Check for error string set by switch */
480 if (expr_name) {
481 char buf[300];
482 PyOS_snprintf(buf, sizeof(buf),
483 "can't %s %s",
484 ctx == Store ? "assign to" : "delete",
485 expr_name);
486 return ast_error(n, buf);
489 /* If the LHS is a list or tuple, we need to set the assignment
490 context for all the contained elements.
492 if (s) {
493 int i;
495 for (i = 0; i < asdl_seq_LEN(s); i++) {
496 if (!set_context(c, (expr_ty)asdl_seq_GET(s, i), ctx, n))
497 return 0;
500 return 1;
503 static operator_ty
504 ast_for_augassign(struct compiling *c, const node *n)
506 REQ(n, augassign);
507 n = CHILD(n, 0);
508 switch (STR(n)[0]) {
509 case '+':
510 return Add;
511 case '-':
512 return Sub;
513 case '/':
514 if (STR(n)[1] == '/')
515 return FloorDiv;
516 else
517 return Div;
518 case '%':
519 return Mod;
520 case '<':
521 return LShift;
522 case '>':
523 return RShift;
524 case '&':
525 return BitAnd;
526 case '^':
527 return BitXor;
528 case '|':
529 return BitOr;
530 case '*':
531 if (STR(n)[1] == '*')
532 return Pow;
533 else
534 return Mult;
535 default:
536 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
537 return (operator_ty)0;
541 static cmpop_ty
542 ast_for_comp_op(struct compiling *c, const node *n)
544 /* comp_op: '<'|'>'|'=='|'>='|'<='|'!='|'in'|'not' 'in'|'is'
545 |'is' 'not'
547 REQ(n, comp_op);
548 if (NCH(n) == 1) {
549 n = CHILD(n, 0);
550 switch (TYPE(n)) {
551 case LESS:
552 return Lt;
553 case GREATER:
554 return Gt;
555 case EQEQUAL: /* == */
556 return Eq;
557 case LESSEQUAL:
558 return LtE;
559 case GREATEREQUAL:
560 return GtE;
561 case NOTEQUAL:
562 return NotEq;
563 case NAME:
564 if (strcmp(STR(n), "in") == 0)
565 return In;
566 if (strcmp(STR(n), "is") == 0)
567 return Is;
568 default:
569 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
570 STR(n));
571 return (cmpop_ty)0;
574 else if (NCH(n) == 2) {
575 /* handle "not in" and "is not" */
576 switch (TYPE(CHILD(n, 0))) {
577 case NAME:
578 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
579 return NotIn;
580 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
581 return IsNot;
582 default:
583 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
584 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
585 return (cmpop_ty)0;
588 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
589 NCH(n));
590 return (cmpop_ty)0;
593 static asdl_seq *
594 seq_for_testlist(struct compiling *c, const node *n)
596 /* testlist: test (',' test)* [','] */
597 asdl_seq *seq;
598 expr_ty expression;
599 int i;
600 assert(TYPE(n) == testlist || TYPE(n) == testlist_comp);
602 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
603 if (!seq)
604 return NULL;
606 for (i = 0; i < NCH(n); i += 2) {
607 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == test_nocond);
609 expression = ast_for_expr(c, CHILD(n, i));
610 if (!expression)
611 return NULL;
613 assert(i / 2 < seq->size);
614 asdl_seq_SET(seq, i / 2, expression);
616 return seq;
619 static arg_ty
620 compiler_arg(struct compiling *c, const node *n)
622 identifier name;
623 expr_ty annotation = NULL;
624 node *ch;
626 assert(TYPE(n) == tfpdef || TYPE(n) == vfpdef);
627 ch = CHILD(n, 0);
628 name = NEW_IDENTIFIER(ch);
629 if (!name)
630 return NULL;
632 if (NCH(n) == 3 && TYPE(CHILD(n, 1)) == COLON) {
633 annotation = ast_for_expr(c, CHILD(n, 2));
634 if (!annotation)
635 return NULL;
638 return arg(name, annotation, c->c_arena);
639 #if 0
640 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
641 if (!set_context(c, result, Store, n))
642 return NULL;
643 return result;
644 #endif
647 /* returns -1 if failed to handle keyword only arguments
648 returns new position to keep processing if successful
649 (',' tfpdef ['=' test])*
651 start pointing here
653 static int
654 handle_keywordonly_args(struct compiling *c, const node *n, int start,
655 asdl_seq *kwonlyargs, asdl_seq *kwdefaults)
657 PyObject *argname;
658 node *ch;
659 expr_ty expression, annotation;
660 arg_ty arg;
661 int i = start;
662 int j = 0; /* index for kwdefaults and kwonlyargs */
664 if (kwonlyargs == NULL) {
665 ast_error(CHILD(n, start), "named arguments must follow bare *");
666 return -1;
668 assert(kwdefaults != NULL);
669 while (i < NCH(n)) {
670 ch = CHILD(n, i);
671 switch (TYPE(ch)) {
672 case vfpdef:
673 case tfpdef:
674 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
675 expression = ast_for_expr(c, CHILD(n, i + 2));
676 asdl_seq_SET(kwdefaults, j, expression);
677 i += 2; /* '=' and test */
679 else { /* setting NULL if no default value exists */
680 asdl_seq_SET(kwdefaults, j, NULL);
682 if (NCH(ch) == 3) {
683 /* ch is NAME ':' test */
684 annotation = ast_for_expr(c, CHILD(ch, 2));
685 if (!annotation) {
686 ast_error(ch, "expected expression");
687 goto error;
690 else {
691 annotation = NULL;
693 ch = CHILD(ch, 0);
694 argname = NEW_IDENTIFIER(ch);
695 if (!argname)
696 goto error;
697 arg = arg(argname, annotation, c->c_arena);
698 if (!arg)
699 goto error;
700 asdl_seq_SET(kwonlyargs, j++, arg);
701 i += 2; /* the name and the comma */
702 break;
703 case DOUBLESTAR:
704 return i;
705 default:
706 ast_error(ch, "unexpected node");
707 goto error;
710 return i;
711 error:
712 return -1;
715 /* Create AST for argument list. */
717 static arguments_ty
718 ast_for_arguments(struct compiling *c, const node *n)
720 /* This function handles both typedargslist (function definition)
721 and varargslist (lambda definition).
723 parameters: '(' [typedargslist] ')'
724 typedargslist: ((tfpdef ['=' test] ',')*
725 ('*' [tfpdef] (',' tfpdef ['=' test])* [',' '**' tfpdef]
726 | '**' tfpdef)
727 | tfpdef ['=' test] (',' tfpdef ['=' test])* [','])
728 tfpdef: NAME [':' test]
729 varargslist: ((vfpdef ['=' test] ',')*
730 ('*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef]
731 | '**' vfpdef)
732 | vfpdef ['=' test] (',' vfpdef ['=' test])* [','])
733 vfpdef: NAME
735 int i, j, k, nposargs = 0, nkwonlyargs = 0;
736 int nposdefaults = 0, found_default = 0;
737 asdl_seq *posargs, *posdefaults, *kwonlyargs, *kwdefaults;
738 identifier vararg = NULL, kwarg = NULL;
739 arg_ty arg;
740 expr_ty varargannotation = NULL, kwargannotation = NULL;
741 node *ch;
743 if (TYPE(n) == parameters) {
744 if (NCH(n) == 2) /* () as argument list */
745 return arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
746 NULL, c->c_arena);
747 n = CHILD(n, 1);
749 assert(TYPE(n) == typedargslist || TYPE(n) == varargslist);
751 /* First count the number of positional args & defaults. The
752 variable i is the loop index for this for loop and the next.
753 The next loop picks up where the first leaves off.
755 for (i = 0; i < NCH(n); i++) {
756 ch = CHILD(n, i);
757 if (TYPE(ch) == STAR) {
758 /* skip star */
759 i++;
760 if (i < NCH(n) && /* skip argument following star */
761 (TYPE(CHILD(n, i)) == tfpdef ||
762 TYPE(CHILD(n, i)) == vfpdef)) {
763 i++;
765 break;
767 if (TYPE(ch) == DOUBLESTAR) break;
768 if (TYPE(ch) == vfpdef || TYPE(ch) == tfpdef) nposargs++;
769 if (TYPE(ch) == EQUAL) nposdefaults++;
771 /* count the number of keyword only args &
772 defaults for keyword only args */
773 for ( ; i < NCH(n); ++i) {
774 ch = CHILD(n, i);
775 if (TYPE(ch) == DOUBLESTAR) break;
776 if (TYPE(ch) == tfpdef || TYPE(ch) == vfpdef) nkwonlyargs++;
778 posargs = (nposargs ? asdl_seq_new(nposargs, c->c_arena) : NULL);
779 if (!posargs && nposargs)
780 goto error;
781 kwonlyargs = (nkwonlyargs ?
782 asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
783 if (!kwonlyargs && nkwonlyargs)
784 goto error;
785 posdefaults = (nposdefaults ?
786 asdl_seq_new(nposdefaults, c->c_arena) : NULL);
787 if (!posdefaults && nposdefaults)
788 goto error;
789 /* The length of kwonlyargs and kwdefaults are same
790 since we set NULL as default for keyword only argument w/o default
791 - we have sequence data structure, but no dictionary */
792 kwdefaults = (nkwonlyargs ?
793 asdl_seq_new(nkwonlyargs, c->c_arena) : NULL);
794 if (!kwdefaults && nkwonlyargs)
795 goto error;
797 if (nposargs + nkwonlyargs > 255) {
798 ast_error(n, "more than 255 arguments");
799 return NULL;
802 /* tfpdef: NAME [':' test]
803 vfpdef: NAME
805 i = 0;
806 j = 0; /* index for defaults */
807 k = 0; /* index for args */
808 while (i < NCH(n)) {
809 ch = CHILD(n, i);
810 switch (TYPE(ch)) {
811 case tfpdef:
812 case vfpdef:
813 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
814 anything other than EQUAL or a comma? */
815 /* XXX Should NCH(n) check be made a separate check? */
816 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
817 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
818 if (!expression)
819 goto error;
820 assert(posdefaults != NULL);
821 asdl_seq_SET(posdefaults, j++, expression);
822 i += 2;
823 found_default = 1;
825 else if (found_default) {
826 ast_error(n,
827 "non-default argument follows default argument");
828 goto error;
830 arg = compiler_arg(c, ch);
831 if (!arg)
832 goto error;
833 asdl_seq_SET(posargs, k++, arg);
834 i += 2; /* the name and the comma */
835 break;
836 case STAR:
837 if (i+1 >= NCH(n)) {
838 ast_error(CHILD(n, i),
839 "named arguments must follow bare *");
840 goto error;
842 ch = CHILD(n, i+1); /* tfpdef or COMMA */
843 if (TYPE(ch) == COMMA) {
844 int res = 0;
845 i += 2; /* now follows keyword only arguments */
846 res = handle_keywordonly_args(c, n, i,
847 kwonlyargs, kwdefaults);
848 if (res == -1) goto error;
849 i = res; /* res has new position to process */
851 else {
852 vararg = NEW_IDENTIFIER(CHILD(ch, 0));
853 if (!vararg)
854 return NULL;
855 if (NCH(ch) > 1) {
856 /* there is an annotation on the vararg */
857 varargannotation = ast_for_expr(c, CHILD(ch, 2));
859 i += 3;
860 if (i < NCH(n) && (TYPE(CHILD(n, i)) == tfpdef
861 || TYPE(CHILD(n, i)) == vfpdef)) {
862 int res = 0;
863 res = handle_keywordonly_args(c, n, i,
864 kwonlyargs, kwdefaults);
865 if (res == -1) goto error;
866 i = res; /* res has new position to process */
869 break;
870 case DOUBLESTAR:
871 ch = CHILD(n, i+1); /* tfpdef */
872 assert(TYPE(ch) == tfpdef || TYPE(ch) == vfpdef);
873 kwarg = NEW_IDENTIFIER(CHILD(ch, 0));
874 if (NCH(ch) > 1) {
875 /* there is an annotation on the kwarg */
876 kwargannotation = ast_for_expr(c, CHILD(ch, 2));
878 if (!kwarg)
879 goto error;
880 i += 3;
881 break;
882 default:
883 PyErr_Format(PyExc_SystemError,
884 "unexpected node in varargslist: %d @ %d",
885 TYPE(ch), i);
886 goto error;
889 return arguments(posargs, vararg, varargannotation, kwonlyargs, kwarg,
890 kwargannotation, posdefaults, kwdefaults, c->c_arena);
891 error:
892 Py_XDECREF(vararg);
893 Py_XDECREF(kwarg);
894 return NULL;
897 static expr_ty
898 ast_for_dotted_name(struct compiling *c, const node *n)
900 expr_ty e;
901 identifier id;
902 int lineno, col_offset;
903 int i;
905 REQ(n, dotted_name);
907 lineno = LINENO(n);
908 col_offset = n->n_col_offset;
910 id = NEW_IDENTIFIER(CHILD(n, 0));
911 if (!id)
912 return NULL;
913 e = Name(id, Load, lineno, col_offset, c->c_arena);
914 if (!e)
915 return NULL;
917 for (i = 2; i < NCH(n); i+=2) {
918 id = NEW_IDENTIFIER(CHILD(n, i));
919 if (!id)
920 return NULL;
921 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
922 if (!e)
923 return NULL;
926 return e;
929 static expr_ty
930 ast_for_decorator(struct compiling *c, const node *n)
932 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
933 expr_ty d = NULL;
934 expr_ty name_expr;
936 REQ(n, decorator);
937 REQ(CHILD(n, 0), AT);
938 REQ(RCHILD(n, -1), NEWLINE);
940 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
941 if (!name_expr)
942 return NULL;
944 if (NCH(n) == 3) { /* No arguments */
945 d = name_expr;
946 name_expr = NULL;
948 else if (NCH(n) == 5) { /* Call with no arguments */
949 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
950 n->n_col_offset, c->c_arena);
951 if (!d)
952 return NULL;
953 name_expr = NULL;
955 else {
956 d = ast_for_call(c, CHILD(n, 3), name_expr);
957 if (!d)
958 return NULL;
959 name_expr = NULL;
962 return d;
965 static asdl_seq*
966 ast_for_decorators(struct compiling *c, const node *n)
968 asdl_seq* decorator_seq;
969 expr_ty d;
970 int i;
972 REQ(n, decorators);
973 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
974 if (!decorator_seq)
975 return NULL;
977 for (i = 0; i < NCH(n); i++) {
978 d = ast_for_decorator(c, CHILD(n, i));
979 if (!d)
980 return NULL;
981 asdl_seq_SET(decorator_seq, i, d);
983 return decorator_seq;
986 static stmt_ty
987 ast_for_funcdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
989 /* funcdef: 'def' NAME parameters ['->' test] ':' suite */
990 identifier name;
991 arguments_ty args;
992 asdl_seq *body;
993 expr_ty returns = NULL;
994 int name_i = 1;
996 REQ(n, funcdef);
998 name = NEW_IDENTIFIER(CHILD(n, name_i));
999 if (!name)
1000 return NULL;
1001 args = ast_for_arguments(c, CHILD(n, name_i + 1));
1002 if (!args)
1003 return NULL;
1004 if (TYPE(CHILD(n, name_i+2)) == RARROW) {
1005 returns = ast_for_expr(c, CHILD(n, name_i + 3));
1006 if (!returns)
1007 return NULL;
1008 name_i += 2;
1010 body = ast_for_suite(c, CHILD(n, name_i + 3));
1011 if (!body)
1012 return NULL;
1014 return FunctionDef(name, args, body, decorator_seq, returns, LINENO(n),
1015 n->n_col_offset, c->c_arena);
1018 static stmt_ty
1019 ast_for_decorated(struct compiling *c, const node *n)
1021 /* decorated: decorators (classdef | funcdef) */
1022 stmt_ty thing = NULL;
1023 asdl_seq *decorator_seq = NULL;
1025 REQ(n, decorated);
1027 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
1028 if (!decorator_seq)
1029 return NULL;
1031 assert(TYPE(CHILD(n, 1)) == funcdef ||
1032 TYPE(CHILD(n, 1)) == classdef);
1034 if (TYPE(CHILD(n, 1)) == funcdef) {
1035 thing = ast_for_funcdef(c, CHILD(n, 1), decorator_seq);
1036 } else if (TYPE(CHILD(n, 1)) == classdef) {
1037 thing = ast_for_classdef(c, CHILD(n, 1), decorator_seq);
1039 /* we count the decorators in when talking about the class' or
1040 * function's line number */
1041 if (thing) {
1042 thing->lineno = LINENO(n);
1043 thing->col_offset = n->n_col_offset;
1045 return thing;
1048 static expr_ty
1049 ast_for_lambdef(struct compiling *c, const node *n)
1051 /* lambdef: 'lambda' [varargslist] ':' test
1052 lambdef_nocond: 'lambda' [varargslist] ':' test_nocond */
1053 arguments_ty args;
1054 expr_ty expression;
1056 if (NCH(n) == 3) {
1057 args = arguments(NULL, NULL, NULL, NULL, NULL, NULL, NULL,
1058 NULL, c->c_arena);
1059 if (!args)
1060 return NULL;
1061 expression = ast_for_expr(c, CHILD(n, 2));
1062 if (!expression)
1063 return NULL;
1065 else {
1066 args = ast_for_arguments(c, CHILD(n, 1));
1067 if (!args)
1068 return NULL;
1069 expression = ast_for_expr(c, CHILD(n, 3));
1070 if (!expression)
1071 return NULL;
1074 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
1077 static expr_ty
1078 ast_for_ifexpr(struct compiling *c, const node *n)
1080 /* test: or_test 'if' or_test 'else' test */
1081 expr_ty expression, body, orelse;
1083 assert(NCH(n) == 5);
1084 body = ast_for_expr(c, CHILD(n, 0));
1085 if (!body)
1086 return NULL;
1087 expression = ast_for_expr(c, CHILD(n, 2));
1088 if (!expression)
1089 return NULL;
1090 orelse = ast_for_expr(c, CHILD(n, 4));
1091 if (!orelse)
1092 return NULL;
1093 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
1094 c->c_arena);
1098 Count the number of 'for' loops in a comprehension.
1100 Helper for ast_for_comprehension().
1103 static int
1104 count_comp_fors(struct compiling *c, const node *n)
1106 int n_fors = 0;
1108 count_comp_for:
1109 n_fors++;
1110 REQ(n, comp_for);
1111 if (NCH(n) == 5)
1112 n = CHILD(n, 4);
1113 else
1114 return n_fors;
1115 count_comp_iter:
1116 REQ(n, comp_iter);
1117 n = CHILD(n, 0);
1118 if (TYPE(n) == comp_for)
1119 goto count_comp_for;
1120 else if (TYPE(n) == comp_if) {
1121 if (NCH(n) == 3) {
1122 n = CHILD(n, 2);
1123 goto count_comp_iter;
1125 else
1126 return n_fors;
1129 /* Should never be reached */
1130 PyErr_SetString(PyExc_SystemError,
1131 "logic error in count_comp_fors");
1132 return -1;
1135 /* Count the number of 'if' statements in a comprehension.
1137 Helper for ast_for_comprehension().
1140 static int
1141 count_comp_ifs(struct compiling *c, const node *n)
1143 int n_ifs = 0;
1145 while (1) {
1146 REQ(n, comp_iter);
1147 if (TYPE(CHILD(n, 0)) == comp_for)
1148 return n_ifs;
1149 n = CHILD(n, 0);
1150 REQ(n, comp_if);
1151 n_ifs++;
1152 if (NCH(n) == 2)
1153 return n_ifs;
1154 n = CHILD(n, 2);
1158 static asdl_seq *
1159 ast_for_comprehension(struct compiling *c, const node *n)
1161 int i, n_fors;
1162 asdl_seq *comps;
1164 n_fors = count_comp_fors(c, n);
1165 if (n_fors == -1)
1166 return NULL;
1168 comps = asdl_seq_new(n_fors, c->c_arena);
1169 if (!comps)
1170 return NULL;
1172 for (i = 0; i < n_fors; i++) {
1173 comprehension_ty comp;
1174 asdl_seq *t;
1175 expr_ty expression;
1176 node *for_ch;
1178 REQ(n, comp_for);
1180 for_ch = CHILD(n, 1);
1181 t = ast_for_exprlist(c, for_ch, Store);
1182 if (!t)
1183 return NULL;
1184 expression = ast_for_expr(c, CHILD(n, 3));
1185 if (!expression)
1186 return NULL;
1188 /* Check the # of children rather than the length of t, since
1189 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1190 if (NCH(for_ch) == 1)
1191 comp = comprehension((expr_ty)asdl_seq_GET(t, 0), expression,
1192 NULL, c->c_arena);
1193 else
1194 comp = comprehension(Tuple(t, Store, LINENO(n), n->n_col_offset,
1195 c->c_arena),
1196 expression, NULL, c->c_arena);
1198 if (!comp)
1199 return NULL;
1201 if (NCH(n) == 5) {
1202 int j, n_ifs;
1203 asdl_seq *ifs;
1205 n = CHILD(n, 4);
1206 n_ifs = count_comp_ifs(c, n);
1207 if (n_ifs == -1)
1208 return NULL;
1210 ifs = asdl_seq_new(n_ifs, c->c_arena);
1211 if (!ifs)
1212 return NULL;
1214 for (j = 0; j < n_ifs; j++) {
1215 REQ(n, comp_iter);
1216 n = CHILD(n, 0);
1217 REQ(n, comp_if);
1219 expression = ast_for_expr(c, CHILD(n, 1));
1220 if (!expression)
1221 return NULL;
1222 asdl_seq_SET(ifs, j, expression);
1223 if (NCH(n) == 3)
1224 n = CHILD(n, 2);
1226 /* on exit, must guarantee that n is a comp_for */
1227 if (TYPE(n) == comp_iter)
1228 n = CHILD(n, 0);
1229 comp->ifs = ifs;
1231 asdl_seq_SET(comps, i, comp);
1233 return comps;
1236 static expr_ty
1237 ast_for_itercomp(struct compiling *c, const node *n, int type)
1239 /* testlist_comp: test ( comp_for | (',' test)* [','] )
1240 argument: [test '='] test [comp_for] # Really [keyword '='] test */
1241 expr_ty elt;
1242 asdl_seq *comps;
1244 assert(NCH(n) > 1);
1246 elt = ast_for_expr(c, CHILD(n, 0));
1247 if (!elt)
1248 return NULL;
1250 comps = ast_for_comprehension(c, CHILD(n, 1));
1251 if (!comps)
1252 return NULL;
1254 if (type == COMP_GENEXP)
1255 return GeneratorExp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1256 else if (type == COMP_LISTCOMP)
1257 return ListComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1258 else if (type == COMP_SETCOMP)
1259 return SetComp(elt, comps, LINENO(n), n->n_col_offset, c->c_arena);
1260 else
1261 /* Should never happen */
1262 return NULL;
1265 static expr_ty
1266 ast_for_dictcomp(struct compiling *c, const node *n)
1268 expr_ty key, value;
1269 asdl_seq *comps;
1271 assert(NCH(n) > 3);
1272 REQ(CHILD(n, 1), COLON);
1274 key = ast_for_expr(c, CHILD(n, 0));
1275 if (!key)
1276 return NULL;
1278 value = ast_for_expr(c, CHILD(n, 2));
1279 if (!value)
1280 return NULL;
1282 comps = ast_for_comprehension(c, CHILD(n, 3));
1283 if (!comps)
1284 return NULL;
1286 return DictComp(key, value, comps, LINENO(n), n->n_col_offset, c->c_arena);
1289 static expr_ty
1290 ast_for_genexp(struct compiling *c, const node *n)
1292 assert(TYPE(n) == (testlist_comp) || TYPE(n) == (argument));
1293 return ast_for_itercomp(c, n, COMP_GENEXP);
1296 static expr_ty
1297 ast_for_listcomp(struct compiling *c, const node *n)
1299 assert(TYPE(n) == (testlist_comp));
1300 return ast_for_itercomp(c, n, COMP_LISTCOMP);
1303 static expr_ty
1304 ast_for_setcomp(struct compiling *c, const node *n)
1306 assert(TYPE(n) == (dictorsetmaker));
1307 return ast_for_itercomp(c, n, COMP_SETCOMP);
1311 static expr_ty
1312 ast_for_atom(struct compiling *c, const node *n)
1314 /* atom: '(' [yield_expr|testlist_comp] ')' | '[' [testlist_comp] ']'
1315 | '{' [dictmaker|testlist_comp] '}' | NAME | NUMBER | STRING+
1316 | '...' | 'None' | 'True' | 'False'
1318 node *ch = CHILD(n, 0);
1319 int bytesmode = 0;
1321 switch (TYPE(ch)) {
1322 case NAME: {
1323 /* All names start in Load context, but may later be
1324 changed. */
1325 PyObject *name = NEW_IDENTIFIER(ch);
1326 if (!name)
1327 return NULL;
1328 return Name(name, Load, LINENO(n), n->n_col_offset, c->c_arena);
1330 case STRING: {
1331 PyObject *str = parsestrplus(c, n, &bytesmode);
1332 if (!str) {
1333 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
1334 PyObject *type, *value, *tback, *errstr;
1335 PyErr_Fetch(&type, &value, &tback);
1336 errstr = PyObject_Str(value);
1337 if (errstr) {
1338 char *s = "";
1339 char buf[128];
1340 s = _PyUnicode_AsString(errstr);
1341 PyOS_snprintf(buf, sizeof(buf), "(unicode error) %s", s);
1342 ast_error(n, buf);
1343 Py_DECREF(errstr);
1344 } else {
1345 ast_error(n, "(unicode error) unknown error");
1347 Py_DECREF(type);
1348 Py_DECREF(value);
1349 Py_XDECREF(tback);
1351 return NULL;
1353 PyArena_AddPyObject(c->c_arena, str);
1354 if (bytesmode)
1355 return Bytes(str, LINENO(n), n->n_col_offset, c->c_arena);
1356 else
1357 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1359 case NUMBER: {
1360 PyObject *pynum = parsenumber(c, STR(ch));
1361 if (!pynum)
1362 return NULL;
1364 PyArena_AddPyObject(c->c_arena, pynum);
1365 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1367 case ELLIPSIS: /* Ellipsis */
1368 return Ellipsis(LINENO(n), n->n_col_offset, c->c_arena);
1369 case LPAR: /* some parenthesized expressions */
1370 ch = CHILD(n, 1);
1372 if (TYPE(ch) == RPAR)
1373 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1375 if (TYPE(ch) == yield_expr)
1376 return ast_for_expr(c, ch);
1378 /* testlist_comp: test ( comp_for | (',' test)* [','] ) */
1379 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == comp_for))
1380 return ast_for_genexp(c, ch);
1382 return ast_for_testlist(c, ch);
1383 case LSQB: /* list (or list comprehension) */
1384 ch = CHILD(n, 1);
1386 if (TYPE(ch) == RSQB)
1387 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1389 REQ(ch, testlist_comp);
1390 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1391 asdl_seq *elts = seq_for_testlist(c, ch);
1392 if (!elts)
1393 return NULL;
1395 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1397 else
1398 return ast_for_listcomp(c, ch);
1399 case LBRACE: {
1400 /* dictorsetmaker: test ':' test (',' test ':' test)* [','] |
1401 * test (gen_for | (',' test)* [',']) */
1402 int i, size;
1403 asdl_seq *keys, *values;
1405 ch = CHILD(n, 1);
1406 if (TYPE(ch) == RBRACE) {
1407 /* it's an empty dict */
1408 return Dict(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1409 } else if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1410 /* it's a simple set */
1411 asdl_seq *elts;
1412 size = (NCH(ch) + 1) / 2; /* +1 in case no trailing comma */
1413 elts = asdl_seq_new(size, c->c_arena);
1414 if (!elts)
1415 return NULL;
1416 for (i = 0; i < NCH(ch); i += 2) {
1417 expr_ty expression;
1418 expression = ast_for_expr(c, CHILD(ch, i));
1419 if (!expression)
1420 return NULL;
1421 asdl_seq_SET(elts, i / 2, expression);
1423 return Set(elts, LINENO(n), n->n_col_offset, c->c_arena);
1424 } else if (TYPE(CHILD(ch, 1)) == comp_for) {
1425 /* it's a set comprehension */
1426 return ast_for_setcomp(c, ch);
1427 } else if (NCH(ch) > 3 && TYPE(CHILD(ch, 3)) == comp_for) {
1428 return ast_for_dictcomp(c, ch);
1429 } else {
1430 /* it's a dict */
1431 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1432 keys = asdl_seq_new(size, c->c_arena);
1433 if (!keys)
1434 return NULL;
1436 values = asdl_seq_new(size, c->c_arena);
1437 if (!values)
1438 return NULL;
1440 for (i = 0; i < NCH(ch); i += 4) {
1441 expr_ty expression;
1443 expression = ast_for_expr(c, CHILD(ch, i));
1444 if (!expression)
1445 return NULL;
1447 asdl_seq_SET(keys, i / 4, expression);
1449 expression = ast_for_expr(c, CHILD(ch, i + 2));
1450 if (!expression)
1451 return NULL;
1453 asdl_seq_SET(values, i / 4, expression);
1455 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1458 default:
1459 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1460 return NULL;
1464 static slice_ty
1465 ast_for_slice(struct compiling *c, const node *n)
1467 node *ch;
1468 expr_ty lower = NULL, upper = NULL, step = NULL;
1470 REQ(n, subscript);
1473 subscript: test | [test] ':' [test] [sliceop]
1474 sliceop: ':' [test]
1476 ch = CHILD(n, 0);
1477 if (NCH(n) == 1 && TYPE(ch) == test) {
1478 /* 'step' variable hold no significance in terms of being used over
1479 other vars */
1480 step = ast_for_expr(c, ch);
1481 if (!step)
1482 return NULL;
1484 return Index(step, c->c_arena);
1487 if (TYPE(ch) == test) {
1488 lower = ast_for_expr(c, ch);
1489 if (!lower)
1490 return NULL;
1493 /* If there's an upper bound it's in the second or third position. */
1494 if (TYPE(ch) == COLON) {
1495 if (NCH(n) > 1) {
1496 node *n2 = CHILD(n, 1);
1498 if (TYPE(n2) == test) {
1499 upper = ast_for_expr(c, n2);
1500 if (!upper)
1501 return NULL;
1504 } else if (NCH(n) > 2) {
1505 node *n2 = CHILD(n, 2);
1507 if (TYPE(n2) == test) {
1508 upper = ast_for_expr(c, n2);
1509 if (!upper)
1510 return NULL;
1514 ch = CHILD(n, NCH(n) - 1);
1515 if (TYPE(ch) == sliceop) {
1516 if (NCH(ch) == 1) {
1517 /* No expression, so step is None */
1518 ch = CHILD(ch, 0);
1519 step = Name(new_identifier("None", c->c_arena), Load,
1520 LINENO(ch), ch->n_col_offset, c->c_arena);
1521 if (!step)
1522 return NULL;
1523 } else {
1524 ch = CHILD(ch, 1);
1525 if (TYPE(ch) == test) {
1526 step = ast_for_expr(c, ch);
1527 if (!step)
1528 return NULL;
1533 return Slice(lower, upper, step, c->c_arena);
1536 static expr_ty
1537 ast_for_binop(struct compiling *c, const node *n)
1539 /* Must account for a sequence of expressions.
1540 How should A op B op C by represented?
1541 BinOp(BinOp(A, op, B), op, C).
1544 int i, nops;
1545 expr_ty expr1, expr2, result;
1546 operator_ty newoperator;
1548 expr1 = ast_for_expr(c, CHILD(n, 0));
1549 if (!expr1)
1550 return NULL;
1552 expr2 = ast_for_expr(c, CHILD(n, 2));
1553 if (!expr2)
1554 return NULL;
1556 newoperator = get_operator(CHILD(n, 1));
1557 if (!newoperator)
1558 return NULL;
1560 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1561 c->c_arena);
1562 if (!result)
1563 return NULL;
1565 nops = (NCH(n) - 1) / 2;
1566 for (i = 1; i < nops; i++) {
1567 expr_ty tmp_result, tmp;
1568 const node* next_oper = CHILD(n, i * 2 + 1);
1570 newoperator = get_operator(next_oper);
1571 if (!newoperator)
1572 return NULL;
1574 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1575 if (!tmp)
1576 return NULL;
1578 tmp_result = BinOp(result, newoperator, tmp,
1579 LINENO(next_oper), next_oper->n_col_offset,
1580 c->c_arena);
1581 if (!tmp_result)
1582 return NULL;
1583 result = tmp_result;
1585 return result;
1588 static expr_ty
1589 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1591 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1592 subscriptlist: subscript (',' subscript)* [',']
1593 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1595 REQ(n, trailer);
1596 if (TYPE(CHILD(n, 0)) == LPAR) {
1597 if (NCH(n) == 2)
1598 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1599 n->n_col_offset, c->c_arena);
1600 else
1601 return ast_for_call(c, CHILD(n, 1), left_expr);
1603 else if (TYPE(CHILD(n, 0)) == DOT ) {
1604 PyObject *attr_id = NEW_IDENTIFIER(CHILD(n, 1));
1605 if (!attr_id)
1606 return NULL;
1607 return Attribute(left_expr, attr_id, Load,
1608 LINENO(n), n->n_col_offset, c->c_arena);
1610 else {
1611 REQ(CHILD(n, 0), LSQB);
1612 REQ(CHILD(n, 2), RSQB);
1613 n = CHILD(n, 1);
1614 if (NCH(n) == 1) {
1615 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1616 if (!slc)
1617 return NULL;
1618 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1619 c->c_arena);
1621 else {
1622 /* The grammar is ambiguous here. The ambiguity is resolved
1623 by treating the sequence as a tuple literal if there are
1624 no slice features.
1626 int j;
1627 slice_ty slc;
1628 expr_ty e;
1629 int simple = 1;
1630 asdl_seq *slices, *elts;
1631 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1632 if (!slices)
1633 return NULL;
1634 for (j = 0; j < NCH(n); j += 2) {
1635 slc = ast_for_slice(c, CHILD(n, j));
1636 if (!slc)
1637 return NULL;
1638 if (slc->kind != Index_kind)
1639 simple = 0;
1640 asdl_seq_SET(slices, j / 2, slc);
1642 if (!simple) {
1643 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1644 Load, LINENO(n), n->n_col_offset, c->c_arena);
1646 /* extract Index values and put them in a Tuple */
1647 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1648 if (!elts)
1649 return NULL;
1650 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1651 slc = (slice_ty)asdl_seq_GET(slices, j);
1652 assert(slc->kind == Index_kind && slc->v.Index.value);
1653 asdl_seq_SET(elts, j, slc->v.Index.value);
1655 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1656 if (!e)
1657 return NULL;
1658 return Subscript(left_expr, Index(e, c->c_arena),
1659 Load, LINENO(n), n->n_col_offset, c->c_arena);
1664 static expr_ty
1665 ast_for_factor(struct compiling *c, const node *n)
1667 node *pfactor, *ppower, *patom, *pnum;
1668 expr_ty expression;
1670 /* If the unary - operator is applied to a constant, don't generate
1671 a UNARY_NEGATIVE opcode. Just store the approriate value as a
1672 constant. The peephole optimizer already does something like
1673 this but it doesn't handle the case where the constant is
1674 (sys.maxint - 1). In that case, we want a PyIntObject, not a
1675 PyLongObject.
1677 if (TYPE(CHILD(n, 0)) == MINUS
1678 && NCH(n) == 2
1679 && TYPE((pfactor = CHILD(n, 1))) == factor
1680 && NCH(pfactor) == 1
1681 && TYPE((ppower = CHILD(pfactor, 0))) == power
1682 && NCH(ppower) == 1
1683 && TYPE((patom = CHILD(ppower, 0))) == atom
1684 && TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1685 char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1686 if (s == NULL)
1687 return NULL;
1688 s[0] = '-';
1689 strcpy(s + 1, STR(pnum));
1690 PyObject_FREE(STR(pnum));
1691 STR(pnum) = s;
1692 return ast_for_atom(c, patom);
1695 expression = ast_for_expr(c, CHILD(n, 1));
1696 if (!expression)
1697 return NULL;
1699 switch (TYPE(CHILD(n, 0))) {
1700 case PLUS:
1701 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1702 c->c_arena);
1703 case MINUS:
1704 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1705 c->c_arena);
1706 case TILDE:
1707 return UnaryOp(Invert, expression, LINENO(n),
1708 n->n_col_offset, c->c_arena);
1710 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1711 TYPE(CHILD(n, 0)));
1712 return NULL;
1715 static expr_ty
1716 ast_for_power(struct compiling *c, const node *n)
1718 /* power: atom trailer* ('**' factor)*
1720 int i;
1721 expr_ty e, tmp;
1722 REQ(n, power);
1723 e = ast_for_atom(c, CHILD(n, 0));
1724 if (!e)
1725 return NULL;
1726 if (NCH(n) == 1)
1727 return e;
1728 for (i = 1; i < NCH(n); i++) {
1729 node *ch = CHILD(n, i);
1730 if (TYPE(ch) != trailer)
1731 break;
1732 tmp = ast_for_trailer(c, ch, e);
1733 if (!tmp)
1734 return NULL;
1735 tmp->lineno = e->lineno;
1736 tmp->col_offset = e->col_offset;
1737 e = tmp;
1739 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1740 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1741 if (!f)
1742 return NULL;
1743 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1744 if (!tmp)
1745 return NULL;
1746 e = tmp;
1748 return e;
1751 static expr_ty
1752 ast_for_starred(struct compiling *c, const node *n)
1754 expr_ty tmp;
1755 REQ(n, star_expr);
1757 tmp = ast_for_expr(c, CHILD(n, 1));
1758 if (!tmp)
1759 return NULL;
1761 /* The Load context is changed later. */
1762 return Starred(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
1766 /* Do not name a variable 'expr'! Will cause a compile error.
1769 static expr_ty
1770 ast_for_expr(struct compiling *c, const node *n)
1772 /* handle the full range of simple expressions
1773 test: or_test ['if' or_test 'else' test] | lambdef
1774 test_nocond: or_test | lambdef_nocond
1775 or_test: and_test ('or' and_test)*
1776 and_test: not_test ('and' not_test)*
1777 not_test: 'not' not_test | comparison
1778 comparison: expr (comp_op expr)*
1779 expr: xor_expr ('|' xor_expr)*
1780 xor_expr: and_expr ('^' and_expr)*
1781 and_expr: shift_expr ('&' shift_expr)*
1782 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1783 arith_expr: term (('+'|'-') term)*
1784 term: factor (('*'|'/'|'%'|'//') factor)*
1785 factor: ('+'|'-'|'~') factor | power
1786 power: atom trailer* ('**' factor)*
1789 asdl_seq *seq;
1790 int i;
1792 loop:
1793 switch (TYPE(n)) {
1794 case test:
1795 case test_nocond:
1796 if (TYPE(CHILD(n, 0)) == lambdef ||
1797 TYPE(CHILD(n, 0)) == lambdef_nocond)
1798 return ast_for_lambdef(c, CHILD(n, 0));
1799 else if (NCH(n) > 1)
1800 return ast_for_ifexpr(c, n);
1801 /* Fallthrough */
1802 case or_test:
1803 case and_test:
1804 if (NCH(n) == 1) {
1805 n = CHILD(n, 0);
1806 goto loop;
1808 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1809 if (!seq)
1810 return NULL;
1811 for (i = 0; i < NCH(n); i += 2) {
1812 expr_ty e = ast_for_expr(c, CHILD(n, i));
1813 if (!e)
1814 return NULL;
1815 asdl_seq_SET(seq, i / 2, e);
1817 if (!strcmp(STR(CHILD(n, 1)), "and"))
1818 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1819 c->c_arena);
1820 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1821 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1822 case not_test:
1823 if (NCH(n) == 1) {
1824 n = CHILD(n, 0);
1825 goto loop;
1827 else {
1828 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1829 if (!expression)
1830 return NULL;
1832 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1833 c->c_arena);
1835 case comparison:
1836 if (NCH(n) == 1) {
1837 n = CHILD(n, 0);
1838 goto loop;
1840 else {
1841 expr_ty expression;
1842 asdl_int_seq *ops;
1843 asdl_seq *cmps;
1844 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1845 if (!ops)
1846 return NULL;
1847 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1848 if (!cmps) {
1849 return NULL;
1851 for (i = 1; i < NCH(n); i += 2) {
1852 cmpop_ty newoperator;
1854 newoperator = ast_for_comp_op(c, CHILD(n, i));
1855 if (!newoperator) {
1856 return NULL;
1859 expression = ast_for_expr(c, CHILD(n, i + 1));
1860 if (!expression) {
1861 return NULL;
1864 asdl_seq_SET(ops, i / 2, newoperator);
1865 asdl_seq_SET(cmps, i / 2, expression);
1867 expression = ast_for_expr(c, CHILD(n, 0));
1868 if (!expression) {
1869 return NULL;
1872 return Compare(expression, ops, cmps, LINENO(n),
1873 n->n_col_offset, c->c_arena);
1875 break;
1877 case star_expr:
1878 if (TYPE(CHILD(n, 0)) == STAR) {
1879 return ast_for_starred(c, n);
1881 /* Fallthrough */
1882 /* The next five cases all handle BinOps. The main body of code
1883 is the same in each case, but the switch turned inside out to
1884 reuse the code for each type of operator.
1886 case expr:
1887 case xor_expr:
1888 case and_expr:
1889 case shift_expr:
1890 case arith_expr:
1891 case term:
1892 if (NCH(n) == 1) {
1893 n = CHILD(n, 0);
1894 goto loop;
1896 return ast_for_binop(c, n);
1897 case yield_expr: {
1898 expr_ty exp = NULL;
1899 if (NCH(n) == 2) {
1900 exp = ast_for_testlist(c, CHILD(n, 1));
1901 if (!exp)
1902 return NULL;
1904 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1906 case factor:
1907 if (NCH(n) == 1) {
1908 n = CHILD(n, 0);
1909 goto loop;
1911 return ast_for_factor(c, n);
1912 case power:
1913 return ast_for_power(c, n);
1914 default:
1915 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1916 return NULL;
1918 /* should never get here unless if error is set */
1919 return NULL;
1922 static expr_ty
1923 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1926 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1927 | '**' test)
1928 argument: [test '='] test [comp_for] # Really [keyword '='] test
1931 int i, nargs, nkeywords, ngens;
1932 asdl_seq *args;
1933 asdl_seq *keywords;
1934 expr_ty vararg = NULL, kwarg = NULL;
1936 REQ(n, arglist);
1938 nargs = 0;
1939 nkeywords = 0;
1940 ngens = 0;
1941 for (i = 0; i < NCH(n); i++) {
1942 node *ch = CHILD(n, i);
1943 if (TYPE(ch) == argument) {
1944 if (NCH(ch) == 1)
1945 nargs++;
1946 else if (TYPE(CHILD(ch, 1)) == comp_for)
1947 ngens++;
1948 else
1949 nkeywords++;
1952 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
1953 ast_error(n, "Generator expression must be parenthesized "
1954 "if not sole argument");
1955 return NULL;
1958 if (nargs + nkeywords + ngens > 255) {
1959 ast_error(n, "more than 255 arguments");
1960 return NULL;
1963 args = asdl_seq_new(nargs + ngens, c->c_arena);
1964 if (!args)
1965 return NULL;
1966 keywords = asdl_seq_new(nkeywords, c->c_arena);
1967 if (!keywords)
1968 return NULL;
1969 nargs = 0;
1970 nkeywords = 0;
1971 for (i = 0; i < NCH(n); i++) {
1972 node *ch = CHILD(n, i);
1973 if (TYPE(ch) == argument) {
1974 expr_ty e;
1975 if (NCH(ch) == 1) {
1976 if (nkeywords) {
1977 ast_error(CHILD(ch, 0),
1978 "non-keyword arg after keyword arg");
1979 return NULL;
1981 if (vararg) {
1982 ast_error(CHILD(ch, 0),
1983 "only named arguments may follow *expression");
1984 return NULL;
1986 e = ast_for_expr(c, CHILD(ch, 0));
1987 if (!e)
1988 return NULL;
1989 asdl_seq_SET(args, nargs++, e);
1991 else if (TYPE(CHILD(ch, 1)) == comp_for) {
1992 e = ast_for_genexp(c, ch);
1993 if (!e)
1994 return NULL;
1995 asdl_seq_SET(args, nargs++, e);
1997 else {
1998 keyword_ty kw;
1999 identifier key, tmp;
2000 int k;
2002 /* CHILD(ch, 0) is test, but must be an identifier? */
2003 e = ast_for_expr(c, CHILD(ch, 0));
2004 if (!e)
2005 return NULL;
2006 /* f(lambda x: x[0] = 3) ends up getting parsed with
2007 * LHS test = lambda x: x[0], and RHS test = 3.
2008 * SF bug 132313 points out that complaining about a keyword
2009 * then is very confusing.
2011 if (e->kind == Lambda_kind) {
2012 ast_error(CHILD(ch, 0), "lambda cannot contain assignment");
2013 return NULL;
2014 } else if (e->kind != Name_kind) {
2015 ast_error(CHILD(ch, 0), "keyword can't be an expression");
2016 return NULL;
2017 } else if (forbidden_name(e, ch)) {
2018 return NULL;
2020 key = e->v.Name.id;
2021 for (k = 0; k < nkeywords; k++) {
2022 tmp = ((keyword_ty)asdl_seq_GET(keywords, k))->arg;
2023 if (!PyUnicode_Compare(tmp, key)) {
2024 ast_error(CHILD(ch, 0), "keyword argument repeated");
2025 return NULL;
2028 e = ast_for_expr(c, CHILD(ch, 2));
2029 if (!e)
2030 return NULL;
2031 kw = keyword(key, e, c->c_arena);
2032 if (!kw)
2033 return NULL;
2034 asdl_seq_SET(keywords, nkeywords++, kw);
2037 else if (TYPE(ch) == STAR) {
2038 vararg = ast_for_expr(c, CHILD(n, i+1));
2039 if (!vararg)
2040 return NULL;
2041 i++;
2043 else if (TYPE(ch) == DOUBLESTAR) {
2044 kwarg = ast_for_expr(c, CHILD(n, i+1));
2045 if (!kwarg)
2046 return NULL;
2047 i++;
2051 return Call(func, args, keywords, vararg, kwarg, func->lineno, func->col_offset, c->c_arena);
2054 static expr_ty
2055 ast_for_testlist(struct compiling *c, const node* n)
2057 /* testlist_comp: test (comp_for | (',' test)* [',']) */
2058 /* testlist: test (',' test)* [','] */
2059 /* testlist1: test (',' test)* */
2060 assert(NCH(n) > 0);
2061 if (TYPE(n) == testlist_comp) {
2062 if (NCH(n) > 1)
2063 assert(TYPE(CHILD(n, 1)) != comp_for);
2065 else {
2066 assert(TYPE(n) == testlist ||
2067 TYPE(n) == testlist1);
2069 if (NCH(n) == 1)
2070 return ast_for_expr(c, CHILD(n, 0));
2071 else {
2072 asdl_seq *tmp = seq_for_testlist(c, n);
2073 if (!tmp)
2074 return NULL;
2075 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
2079 static stmt_ty
2080 ast_for_expr_stmt(struct compiling *c, const node *n)
2082 REQ(n, expr_stmt);
2083 /* expr_stmt: testlist (augassign (yield_expr|testlist)
2084 | ('=' (yield_expr|testlist))*)
2085 testlist: test (',' test)* [',']
2086 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
2087 | '<<=' | '>>=' | '**=' | '//='
2088 test: ... here starts the operator precendence dance
2091 if (NCH(n) == 1) {
2092 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
2093 if (!e)
2094 return NULL;
2096 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
2098 else if (TYPE(CHILD(n, 1)) == augassign) {
2099 expr_ty expr1, expr2;
2100 operator_ty newoperator;
2101 node *ch = CHILD(n, 0);
2103 expr1 = ast_for_testlist(c, ch);
2104 if (!expr1)
2105 return NULL;
2106 if(!set_context(c, expr1, Store, ch))
2107 return NULL;
2109 ch = CHILD(n, 2);
2110 if (TYPE(ch) == testlist)
2111 expr2 = ast_for_testlist(c, ch);
2112 else
2113 expr2 = ast_for_expr(c, ch);
2114 if (!expr2)
2115 return NULL;
2117 newoperator = ast_for_augassign(c, CHILD(n, 1));
2118 if (!newoperator)
2119 return NULL;
2121 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2123 else {
2124 int i;
2125 asdl_seq *targets;
2126 node *value;
2127 expr_ty expression;
2129 /* a normal assignment */
2130 REQ(CHILD(n, 1), EQUAL);
2131 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2132 if (!targets)
2133 return NULL;
2134 for (i = 0; i < NCH(n) - 2; i += 2) {
2135 expr_ty e;
2136 node *ch = CHILD(n, i);
2137 if (TYPE(ch) == yield_expr) {
2138 ast_error(ch, "assignment to yield expression not possible");
2139 return NULL;
2141 e = ast_for_testlist(c, ch);
2143 /* set context to assign */
2144 if (!e)
2145 return NULL;
2147 if (!set_context(c, e, Store, CHILD(n, i)))
2148 return NULL;
2150 asdl_seq_SET(targets, i / 2, e);
2152 value = CHILD(n, NCH(n) - 1);
2153 if (TYPE(value) == testlist)
2154 expression = ast_for_testlist(c, value);
2155 else
2156 expression = ast_for_expr(c, value);
2157 if (!expression)
2158 return NULL;
2159 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
2163 static asdl_seq *
2164 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2166 asdl_seq *seq;
2167 int i;
2168 expr_ty e;
2170 REQ(n, exprlist);
2172 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2173 if (!seq)
2174 return NULL;
2175 for (i = 0; i < NCH(n); i += 2) {
2176 e = ast_for_expr(c, CHILD(n, i));
2177 if (!e)
2178 return NULL;
2179 asdl_seq_SET(seq, i / 2, e);
2180 if (context && !set_context(c, e, context, CHILD(n, i)))
2181 return NULL;
2183 return seq;
2186 static stmt_ty
2187 ast_for_del_stmt(struct compiling *c, const node *n)
2189 asdl_seq *expr_list;
2191 /* del_stmt: 'del' exprlist */
2192 REQ(n, del_stmt);
2194 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2195 if (!expr_list)
2196 return NULL;
2197 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2200 static stmt_ty
2201 ast_for_flow_stmt(struct compiling *c, const node *n)
2204 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2205 | yield_stmt
2206 break_stmt: 'break'
2207 continue_stmt: 'continue'
2208 return_stmt: 'return' [testlist]
2209 yield_stmt: yield_expr
2210 yield_expr: 'yield' testlist
2211 raise_stmt: 'raise' [test [',' test [',' test]]]
2213 node *ch;
2215 REQ(n, flow_stmt);
2216 ch = CHILD(n, 0);
2217 switch (TYPE(ch)) {
2218 case break_stmt:
2219 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2220 case continue_stmt:
2221 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2222 case yield_stmt: { /* will reduce to yield_expr */
2223 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2224 if (!exp)
2225 return NULL;
2226 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2228 case return_stmt:
2229 if (NCH(ch) == 1)
2230 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2231 else {
2232 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2233 if (!expression)
2234 return NULL;
2235 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
2237 case raise_stmt:
2238 if (NCH(ch) == 1)
2239 return Raise(NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2240 else if (NCH(ch) >= 2) {
2241 expr_ty cause = NULL;
2242 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2243 if (!expression)
2244 return NULL;
2245 if (NCH(ch) == 4) {
2246 cause = ast_for_expr(c, CHILD(ch, 3));
2247 if (!cause)
2248 return NULL;
2250 return Raise(expression, cause, LINENO(n), n->n_col_offset, c->c_arena);
2252 default:
2253 PyErr_Format(PyExc_SystemError,
2254 "unexpected flow_stmt: %d", TYPE(ch));
2255 return NULL;
2258 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2259 return NULL;
2262 static alias_ty
2263 alias_for_import_name(struct compiling *c, const node *n)
2266 import_as_name: NAME ['as' NAME]
2267 dotted_as_name: dotted_name ['as' NAME]
2268 dotted_name: NAME ('.' NAME)*
2270 PyObject *str, *name;
2272 loop:
2273 switch (TYPE(n)) {
2274 case import_as_name:
2275 str = NULL;
2276 if (NCH(n) == 3) {
2277 str = NEW_IDENTIFIER(CHILD(n, 2));
2278 if (!str)
2279 return NULL;
2281 name = NEW_IDENTIFIER(CHILD(n, 0));
2282 if (!name)
2283 return NULL;
2284 return alias(name, str, c->c_arena);
2285 case dotted_as_name:
2286 if (NCH(n) == 1) {
2287 n = CHILD(n, 0);
2288 goto loop;
2290 else {
2291 alias_ty a = alias_for_import_name(c, CHILD(n, 0));
2292 if (!a)
2293 return NULL;
2294 assert(!a->asname);
2295 a->asname = NEW_IDENTIFIER(CHILD(n, 2));
2296 if (!a->asname)
2297 return NULL;
2298 return a;
2300 break;
2301 case dotted_name:
2302 if (NCH(n) == 1) {
2303 name = NEW_IDENTIFIER(CHILD(n, 0));
2304 if (!name)
2305 return NULL;
2306 return alias(name, NULL, c->c_arena);
2308 else {
2309 /* Create a string of the form "a.b.c" */
2310 int i;
2311 size_t len;
2312 char *s;
2313 PyObject *uni;
2315 len = 0;
2316 for (i = 0; i < NCH(n); i += 2)
2317 /* length of string plus one for the dot */
2318 len += strlen(STR(CHILD(n, i))) + 1;
2319 len--; /* the last name doesn't have a dot */
2320 str = PyBytes_FromStringAndSize(NULL, len);
2321 if (!str)
2322 return NULL;
2323 s = PyBytes_AS_STRING(str);
2324 if (!s)
2325 return NULL;
2326 for (i = 0; i < NCH(n); i += 2) {
2327 char *sch = STR(CHILD(n, i));
2328 strcpy(s, STR(CHILD(n, i)));
2329 s += strlen(sch);
2330 *s++ = '.';
2332 --s;
2333 *s = '\0';
2334 uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str),
2335 PyBytes_GET_SIZE(str),
2336 NULL);
2337 Py_DECREF(str);
2338 if (!uni)
2339 return NULL;
2340 str = uni;
2341 PyUnicode_InternInPlace(&str);
2342 PyArena_AddPyObject(c->c_arena, str);
2343 return alias(str, NULL, c->c_arena);
2345 break;
2346 case STAR:
2347 str = PyUnicode_InternFromString("*");
2348 PyArena_AddPyObject(c->c_arena, str);
2349 return alias(str, NULL, c->c_arena);
2350 default:
2351 PyErr_Format(PyExc_SystemError,
2352 "unexpected import name: %d", TYPE(n));
2353 return NULL;
2356 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2357 return NULL;
2360 static stmt_ty
2361 ast_for_import_stmt(struct compiling *c, const node *n)
2364 import_stmt: import_name | import_from
2365 import_name: 'import' dotted_as_names
2366 import_from: 'from' (('.' | '...')* dotted_name | ('.' | '...')+)
2367 'import' ('*' | '(' import_as_names ')' | import_as_names)
2369 int lineno;
2370 int col_offset;
2371 int i;
2372 asdl_seq *aliases;
2374 REQ(n, import_stmt);
2375 lineno = LINENO(n);
2376 col_offset = n->n_col_offset;
2377 n = CHILD(n, 0);
2378 if (TYPE(n) == import_name) {
2379 n = CHILD(n, 1);
2380 REQ(n, dotted_as_names);
2381 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2382 if (!aliases)
2383 return NULL;
2384 for (i = 0; i < NCH(n); i += 2) {
2385 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2386 if (!import_alias)
2387 return NULL;
2388 asdl_seq_SET(aliases, i / 2, import_alias);
2390 return Import(aliases, lineno, col_offset, c->c_arena);
2392 else if (TYPE(n) == import_from) {
2393 int n_children;
2394 int idx, ndots = 0;
2395 alias_ty mod = NULL;
2396 identifier modname;
2398 /* Count the number of dots (for relative imports) and check for the
2399 optional module name */
2400 for (idx = 1; idx < NCH(n); idx++) {
2401 if (TYPE(CHILD(n, idx)) == dotted_name) {
2402 mod = alias_for_import_name(c, CHILD(n, idx));
2403 idx++;
2404 break;
2405 } else if (TYPE(CHILD(n, idx)) == ELLIPSIS) {
2406 /* three consecutive dots are tokenized as one ELLIPSIS */
2407 ndots += 3;
2408 continue;
2409 } else if (TYPE(CHILD(n, idx)) != DOT) {
2410 break;
2412 ndots++;
2414 idx++; /* skip over the 'import' keyword */
2415 switch (TYPE(CHILD(n, idx))) {
2416 case STAR:
2417 /* from ... import * */
2418 n = CHILD(n, idx);
2419 n_children = 1;
2420 break;
2421 case LPAR:
2422 /* from ... import (x, y, z) */
2423 n = CHILD(n, idx + 1);
2424 n_children = NCH(n);
2425 break;
2426 case import_as_names:
2427 /* from ... import x, y, z */
2428 n = CHILD(n, idx);
2429 n_children = NCH(n);
2430 if (n_children % 2 == 0) {
2431 ast_error(n, "trailing comma not allowed without"
2432 " surrounding parentheses");
2433 return NULL;
2435 break;
2436 default:
2437 ast_error(n, "Unexpected node-type in from-import");
2438 return NULL;
2441 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2442 if (!aliases)
2443 return NULL;
2445 /* handle "from ... import *" special b/c there's no children */
2446 if (TYPE(n) == STAR) {
2447 alias_ty import_alias = alias_for_import_name(c, n);
2448 if (!import_alias)
2449 return NULL;
2450 asdl_seq_SET(aliases, 0, import_alias);
2452 else {
2453 for (i = 0; i < NCH(n); i += 2) {
2454 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2455 if (!import_alias)
2456 return NULL;
2457 asdl_seq_SET(aliases, i / 2, import_alias);
2460 if (mod != NULL)
2461 modname = mod->name;
2462 else
2463 modname = new_identifier("", c->c_arena);
2464 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2465 c->c_arena);
2467 PyErr_Format(PyExc_SystemError,
2468 "unknown import statement: starts with command '%s'",
2469 STR(CHILD(n, 0)));
2470 return NULL;
2473 static stmt_ty
2474 ast_for_global_stmt(struct compiling *c, const node *n)
2476 /* global_stmt: 'global' NAME (',' NAME)* */
2477 identifier name;
2478 asdl_seq *s;
2479 int i;
2481 REQ(n, global_stmt);
2482 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2483 if (!s)
2484 return NULL;
2485 for (i = 1; i < NCH(n); i += 2) {
2486 name = NEW_IDENTIFIER(CHILD(n, i));
2487 if (!name)
2488 return NULL;
2489 asdl_seq_SET(s, i / 2, name);
2491 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2494 static stmt_ty
2495 ast_for_nonlocal_stmt(struct compiling *c, const node *n)
2497 /* nonlocal_stmt: 'nonlocal' NAME (',' NAME)* */
2498 identifier name;
2499 asdl_seq *s;
2500 int i;
2502 REQ(n, nonlocal_stmt);
2503 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2504 if (!s)
2505 return NULL;
2506 for (i = 1; i < NCH(n); i += 2) {
2507 name = NEW_IDENTIFIER(CHILD(n, i));
2508 if (!name)
2509 return NULL;
2510 asdl_seq_SET(s, i / 2, name);
2512 return Nonlocal(s, LINENO(n), n->n_col_offset, c->c_arena);
2515 static stmt_ty
2516 ast_for_assert_stmt(struct compiling *c, const node *n)
2518 /* assert_stmt: 'assert' test [',' test] */
2519 REQ(n, assert_stmt);
2520 if (NCH(n) == 2) {
2521 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2522 if (!expression)
2523 return NULL;
2524 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2526 else if (NCH(n) == 4) {
2527 expr_ty expr1, expr2;
2529 expr1 = ast_for_expr(c, CHILD(n, 1));
2530 if (!expr1)
2531 return NULL;
2532 expr2 = ast_for_expr(c, CHILD(n, 3));
2533 if (!expr2)
2534 return NULL;
2536 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2538 PyErr_Format(PyExc_SystemError,
2539 "improper number of parts to 'assert' statement: %d",
2540 NCH(n));
2541 return NULL;
2544 static asdl_seq *
2545 ast_for_suite(struct compiling *c, const node *n)
2547 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2548 asdl_seq *seq;
2549 stmt_ty s;
2550 int i, total, num, end, pos = 0;
2551 node *ch;
2553 REQ(n, suite);
2555 total = num_stmts(n);
2556 seq = asdl_seq_new(total, c->c_arena);
2557 if (!seq)
2558 return NULL;
2559 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2560 n = CHILD(n, 0);
2561 /* simple_stmt always ends with a NEWLINE,
2562 and may have a trailing SEMI
2564 end = NCH(n) - 1;
2565 if (TYPE(CHILD(n, end - 1)) == SEMI)
2566 end--;
2567 /* loop by 2 to skip semi-colons */
2568 for (i = 0; i < end; i += 2) {
2569 ch = CHILD(n, i);
2570 s = ast_for_stmt(c, ch);
2571 if (!s)
2572 return NULL;
2573 asdl_seq_SET(seq, pos++, s);
2576 else {
2577 for (i = 2; i < (NCH(n) - 1); i++) {
2578 ch = CHILD(n, i);
2579 REQ(ch, stmt);
2580 num = num_stmts(ch);
2581 if (num == 1) {
2582 /* small_stmt or compound_stmt with only one child */
2583 s = ast_for_stmt(c, ch);
2584 if (!s)
2585 return NULL;
2586 asdl_seq_SET(seq, pos++, s);
2588 else {
2589 int j;
2590 ch = CHILD(ch, 0);
2591 REQ(ch, simple_stmt);
2592 for (j = 0; j < NCH(ch); j += 2) {
2593 /* statement terminates with a semi-colon ';' */
2594 if (NCH(CHILD(ch, j)) == 0) {
2595 assert((j + 1) == NCH(ch));
2596 break;
2598 s = ast_for_stmt(c, CHILD(ch, j));
2599 if (!s)
2600 return NULL;
2601 asdl_seq_SET(seq, pos++, s);
2606 assert(pos == seq->size);
2607 return seq;
2610 static stmt_ty
2611 ast_for_if_stmt(struct compiling *c, const node *n)
2613 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2614 ['else' ':' suite]
2616 char *s;
2618 REQ(n, if_stmt);
2620 if (NCH(n) == 4) {
2621 expr_ty expression;
2622 asdl_seq *suite_seq;
2624 expression = ast_for_expr(c, CHILD(n, 1));
2625 if (!expression)
2626 return NULL;
2627 suite_seq = ast_for_suite(c, CHILD(n, 3));
2628 if (!suite_seq)
2629 return NULL;
2631 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset,
2632 c->c_arena);
2635 s = STR(CHILD(n, 4));
2636 /* s[2], the third character in the string, will be
2637 's' for el_s_e, or
2638 'i' for el_i_f
2640 if (s[2] == 's') {
2641 expr_ty expression;
2642 asdl_seq *seq1, *seq2;
2644 expression = ast_for_expr(c, CHILD(n, 1));
2645 if (!expression)
2646 return NULL;
2647 seq1 = ast_for_suite(c, CHILD(n, 3));
2648 if (!seq1)
2649 return NULL;
2650 seq2 = ast_for_suite(c, CHILD(n, 6));
2651 if (!seq2)
2652 return NULL;
2654 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset,
2655 c->c_arena);
2657 else if (s[2] == 'i') {
2658 int i, n_elif, has_else = 0;
2659 expr_ty expression;
2660 asdl_seq *suite_seq;
2661 asdl_seq *orelse = NULL;
2662 n_elif = NCH(n) - 4;
2663 /* must reference the child n_elif+1 since 'else' token is third,
2664 not fourth, child from the end. */
2665 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2666 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2667 has_else = 1;
2668 n_elif -= 3;
2670 n_elif /= 4;
2672 if (has_else) {
2673 asdl_seq *suite_seq2;
2675 orelse = asdl_seq_new(1, c->c_arena);
2676 if (!orelse)
2677 return NULL;
2678 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2679 if (!expression)
2680 return NULL;
2681 suite_seq = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2682 if (!suite_seq)
2683 return NULL;
2684 suite_seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2685 if (!suite_seq2)
2686 return NULL;
2688 asdl_seq_SET(orelse, 0,
2689 If(expression, suite_seq, suite_seq2,
2690 LINENO(CHILD(n, NCH(n) - 6)),
2691 CHILD(n, NCH(n) - 6)->n_col_offset,
2692 c->c_arena));
2693 /* the just-created orelse handled the last elif */
2694 n_elif--;
2697 for (i = 0; i < n_elif; i++) {
2698 int off = 5 + (n_elif - i - 1) * 4;
2699 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2700 if (!newobj)
2701 return NULL;
2702 expression = ast_for_expr(c, CHILD(n, off));
2703 if (!expression)
2704 return NULL;
2705 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2706 if (!suite_seq)
2707 return NULL;
2709 asdl_seq_SET(newobj, 0,
2710 If(expression, suite_seq, orelse,
2711 LINENO(CHILD(n, off)),
2712 CHILD(n, off)->n_col_offset, c->c_arena));
2713 orelse = newobj;
2715 expression = ast_for_expr(c, CHILD(n, 1));
2716 if (!expression)
2717 return NULL;
2718 suite_seq = ast_for_suite(c, CHILD(n, 3));
2719 if (!suite_seq)
2720 return NULL;
2721 return If(expression, suite_seq, orelse,
2722 LINENO(n), n->n_col_offset, c->c_arena);
2725 PyErr_Format(PyExc_SystemError,
2726 "unexpected token in 'if' statement: %s", s);
2727 return NULL;
2730 static stmt_ty
2731 ast_for_while_stmt(struct compiling *c, const node *n)
2733 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2734 REQ(n, while_stmt);
2736 if (NCH(n) == 4) {
2737 expr_ty expression;
2738 asdl_seq *suite_seq;
2740 expression = ast_for_expr(c, CHILD(n, 1));
2741 if (!expression)
2742 return NULL;
2743 suite_seq = ast_for_suite(c, CHILD(n, 3));
2744 if (!suite_seq)
2745 return NULL;
2746 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2748 else if (NCH(n) == 7) {
2749 expr_ty expression;
2750 asdl_seq *seq1, *seq2;
2752 expression = ast_for_expr(c, CHILD(n, 1));
2753 if (!expression)
2754 return NULL;
2755 seq1 = ast_for_suite(c, CHILD(n, 3));
2756 if (!seq1)
2757 return NULL;
2758 seq2 = ast_for_suite(c, CHILD(n, 6));
2759 if (!seq2)
2760 return NULL;
2762 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2765 PyErr_Format(PyExc_SystemError,
2766 "wrong number of tokens for 'while' statement: %d",
2767 NCH(n));
2768 return NULL;
2771 static stmt_ty
2772 ast_for_for_stmt(struct compiling *c, const node *n)
2774 asdl_seq *_target, *seq = NULL, *suite_seq;
2775 expr_ty expression;
2776 expr_ty target;
2777 const node *node_target;
2778 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2779 REQ(n, for_stmt);
2781 if (NCH(n) == 9) {
2782 seq = ast_for_suite(c, CHILD(n, 8));
2783 if (!seq)
2784 return NULL;
2787 node_target = CHILD(n, 1);
2788 _target = ast_for_exprlist(c, node_target, Store);
2789 if (!_target)
2790 return NULL;
2791 /* Check the # of children rather than the length of _target, since
2792 for x, in ... has 1 element in _target, but still requires a Tuple. */
2793 if (NCH(node_target) == 1)
2794 target = (expr_ty)asdl_seq_GET(_target, 0);
2795 else
2796 target = Tuple(_target, Store, LINENO(n), n->n_col_offset, c->c_arena);
2798 expression = ast_for_testlist(c, CHILD(n, 3));
2799 if (!expression)
2800 return NULL;
2801 suite_seq = ast_for_suite(c, CHILD(n, 5));
2802 if (!suite_seq)
2803 return NULL;
2805 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
2806 c->c_arena);
2809 static excepthandler_ty
2810 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
2812 /* except_clause: 'except' [test ['as' test]] */
2813 REQ(exc, except_clause);
2814 REQ(body, suite);
2816 if (NCH(exc) == 1) {
2817 asdl_seq *suite_seq = ast_for_suite(c, body);
2818 if (!suite_seq)
2819 return NULL;
2821 return ExceptHandler(NULL, NULL, suite_seq, LINENO(exc),
2822 exc->n_col_offset, c->c_arena);
2824 else if (NCH(exc) == 2) {
2825 expr_ty expression;
2826 asdl_seq *suite_seq;
2828 expression = ast_for_expr(c, CHILD(exc, 1));
2829 if (!expression)
2830 return NULL;
2831 suite_seq = ast_for_suite(c, body);
2832 if (!suite_seq)
2833 return NULL;
2835 return ExceptHandler(expression, NULL, suite_seq, LINENO(exc),
2836 exc->n_col_offset, c->c_arena);
2838 else if (NCH(exc) == 4) {
2839 asdl_seq *suite_seq;
2840 expr_ty expression;
2841 identifier e = NEW_IDENTIFIER(CHILD(exc, 3));
2842 if (!e)
2843 return NULL;
2844 expression = ast_for_expr(c, CHILD(exc, 1));
2845 if (!expression)
2846 return NULL;
2847 suite_seq = ast_for_suite(c, body);
2848 if (!suite_seq)
2849 return NULL;
2851 return ExceptHandler(expression, e, suite_seq, LINENO(exc),
2852 exc->n_col_offset, c->c_arena);
2855 PyErr_Format(PyExc_SystemError,
2856 "wrong number of children for 'except' clause: %d",
2857 NCH(exc));
2858 return NULL;
2861 static stmt_ty
2862 ast_for_try_stmt(struct compiling *c, const node *n)
2864 const int nch = NCH(n);
2865 int n_except = (nch - 3)/3;
2866 asdl_seq *body, *orelse = NULL, *finally = NULL;
2868 REQ(n, try_stmt);
2870 body = ast_for_suite(c, CHILD(n, 2));
2871 if (body == NULL)
2872 return NULL;
2874 if (TYPE(CHILD(n, nch - 3)) == NAME) {
2875 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
2876 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
2877 /* we can assume it's an "else",
2878 because nch >= 9 for try-else-finally and
2879 it would otherwise have a type of except_clause */
2880 orelse = ast_for_suite(c, CHILD(n, nch - 4));
2881 if (orelse == NULL)
2882 return NULL;
2883 n_except--;
2886 finally = ast_for_suite(c, CHILD(n, nch - 1));
2887 if (finally == NULL)
2888 return NULL;
2889 n_except--;
2891 else {
2892 /* we can assume it's an "else",
2893 otherwise it would have a type of except_clause */
2894 orelse = ast_for_suite(c, CHILD(n, nch - 1));
2895 if (orelse == NULL)
2896 return NULL;
2897 n_except--;
2900 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
2901 ast_error(n, "malformed 'try' statement");
2902 return NULL;
2905 if (n_except > 0) {
2906 int i;
2907 stmt_ty except_st;
2908 /* process except statements to create a try ... except */
2909 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
2910 if (handlers == NULL)
2911 return NULL;
2913 for (i = 0; i < n_except; i++) {
2914 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
2915 CHILD(n, 5 + i * 3));
2916 if (!e)
2917 return NULL;
2918 asdl_seq_SET(handlers, i, e);
2921 except_st = TryExcept(body, handlers, orelse, LINENO(n),
2922 n->n_col_offset, c->c_arena);
2923 if (!finally)
2924 return except_st;
2926 /* if a 'finally' is present too, we nest the TryExcept within a
2927 TryFinally to emulate try ... except ... finally */
2928 body = asdl_seq_new(1, c->c_arena);
2929 if (body == NULL)
2930 return NULL;
2931 asdl_seq_SET(body, 0, except_st);
2934 /* must be a try ... finally (except clauses are in body, if any exist) */
2935 assert(finally != NULL);
2936 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
2939 /* with_item: test ['as' expr] */
2940 static stmt_ty
2941 ast_for_with_item(struct compiling *c, const node *n, asdl_seq *content)
2943 expr_ty context_expr, optional_vars = NULL;
2945 REQ(n, with_item);
2946 context_expr = ast_for_expr(c, CHILD(n, 0));
2947 if (NCH(n) == 3) {
2948 optional_vars = ast_for_expr(c, CHILD(n, 2));
2950 if (!optional_vars) {
2951 return NULL;
2953 if (!set_context(c, optional_vars, Store, n)) {
2954 return NULL;
2958 return With(context_expr, optional_vars, content, LINENO(n),
2959 n->n_col_offset, c->c_arena);
2962 /* with_stmt: 'with' with_item (',' with_item)* ':' suite */
2963 static stmt_ty
2964 ast_for_with_stmt(struct compiling *c, const node *n)
2966 int i;
2967 stmt_ty ret;
2968 asdl_seq *inner;
2970 REQ(n, with_stmt);
2972 /* process the with items inside-out */
2973 i = NCH(n) - 1;
2974 /* the suite of the innermost with item is the suite of the with stmt */
2975 inner = ast_for_suite(c, CHILD(n, i));
2976 if (!inner)
2977 return NULL;
2979 for (;;) {
2980 i -= 2;
2981 ret = ast_for_with_item(c, CHILD(n, i), inner);
2982 if (!ret)
2983 return NULL;
2984 /* was this the last item? */
2985 if (i == 1)
2986 break;
2987 /* if not, wrap the result so far in a new sequence */
2988 inner = asdl_seq_new(1, c->c_arena);
2989 if (!inner)
2990 return NULL;
2991 asdl_seq_SET(inner, 0, ret);
2994 return ret;
2997 static stmt_ty
2998 ast_for_classdef(struct compiling *c, const node *n, asdl_seq *decorator_seq)
3000 /* classdef: 'class' NAME ['(' arglist ')'] ':' suite */
3001 PyObject *classname;
3002 asdl_seq *s;
3003 expr_ty call;
3005 REQ(n, classdef);
3007 if (NCH(n) == 4) { /* class NAME ':' suite */
3008 s = ast_for_suite(c, CHILD(n, 3));
3009 if (!s)
3010 return NULL;
3011 classname = NEW_IDENTIFIER(CHILD(n, 1));
3012 if (!classname)
3013 return NULL;
3014 return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,
3015 LINENO(n), n->n_col_offset, c->c_arena);
3018 if (TYPE(CHILD(n, 3)) == RPAR) { /* class NAME '(' ')' ':' suite */
3019 s = ast_for_suite(c, CHILD(n,5));
3020 if (!s)
3021 return NULL;
3022 classname = NEW_IDENTIFIER(CHILD(n, 1));
3023 if (!classname)
3024 return NULL;
3025 return ClassDef(classname, NULL, NULL, NULL, NULL, s, decorator_seq,
3026 LINENO(n), n->n_col_offset, c->c_arena);
3029 /* class NAME '(' arglist ')' ':' suite */
3030 /* build up a fake Call node so we can extract its pieces */
3032 PyObject *dummy_name;
3033 expr_ty dummy;
3034 dummy_name = NEW_IDENTIFIER(CHILD(n, 1));
3035 if (!dummy_name)
3036 return NULL;
3037 dummy = Name(dummy_name, Load, LINENO(n), n->n_col_offset, c->c_arena);
3038 call = ast_for_call(c, CHILD(n, 3), dummy);
3039 if (!call)
3040 return NULL;
3042 s = ast_for_suite(c, CHILD(n, 6));
3043 if (!s)
3044 return NULL;
3045 classname = NEW_IDENTIFIER(CHILD(n, 1));
3046 if (!classname)
3047 return NULL;
3049 return ClassDef(classname, call->v.Call.args, call->v.Call.keywords,
3050 call->v.Call.starargs, call->v.Call.kwargs, s,
3051 decorator_seq, LINENO(n), n->n_col_offset, c->c_arena);
3054 static stmt_ty
3055 ast_for_stmt(struct compiling *c, const node *n)
3057 if (TYPE(n) == stmt) {
3058 assert(NCH(n) == 1);
3059 n = CHILD(n, 0);
3061 if (TYPE(n) == simple_stmt) {
3062 assert(num_stmts(n) == 1);
3063 n = CHILD(n, 0);
3065 if (TYPE(n) == small_stmt) {
3066 n = CHILD(n, 0);
3067 /* small_stmt: expr_stmt | del_stmt | pass_stmt | flow_stmt
3068 | import_stmt | global_stmt | nonlocal_stmt | assert_stmt
3070 switch (TYPE(n)) {
3071 case expr_stmt:
3072 return ast_for_expr_stmt(c, n);
3073 case del_stmt:
3074 return ast_for_del_stmt(c, n);
3075 case pass_stmt:
3076 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
3077 case flow_stmt:
3078 return ast_for_flow_stmt(c, n);
3079 case import_stmt:
3080 return ast_for_import_stmt(c, n);
3081 case global_stmt:
3082 return ast_for_global_stmt(c, n);
3083 case nonlocal_stmt:
3084 return ast_for_nonlocal_stmt(c, n);
3085 case assert_stmt:
3086 return ast_for_assert_stmt(c, n);
3087 default:
3088 PyErr_Format(PyExc_SystemError,
3089 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3090 TYPE(n), NCH(n));
3091 return NULL;
3094 else {
3095 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
3096 | funcdef | classdef | decorated
3098 node *ch = CHILD(n, 0);
3099 REQ(n, compound_stmt);
3100 switch (TYPE(ch)) {
3101 case if_stmt:
3102 return ast_for_if_stmt(c, ch);
3103 case while_stmt:
3104 return ast_for_while_stmt(c, ch);
3105 case for_stmt:
3106 return ast_for_for_stmt(c, ch);
3107 case try_stmt:
3108 return ast_for_try_stmt(c, ch);
3109 case with_stmt:
3110 return ast_for_with_stmt(c, ch);
3111 case funcdef:
3112 return ast_for_funcdef(c, ch, NULL);
3113 case classdef:
3114 return ast_for_classdef(c, ch, NULL);
3115 case decorated:
3116 return ast_for_decorated(c, ch);
3117 default:
3118 PyErr_Format(PyExc_SystemError,
3119 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3120 TYPE(n), NCH(n));
3121 return NULL;
3126 static PyObject *
3127 parsenumber(struct compiling *c, const char *s)
3129 const char *end;
3130 long x;
3131 double dx;
3132 #ifndef WITHOUT_COMPLEX
3133 Py_complex compl;
3134 int imflag;
3135 #endif
3137 assert(s != NULL);
3138 errno = 0;
3139 end = s + strlen(s) - 1;
3140 #ifndef WITHOUT_COMPLEX
3141 imflag = *end == 'j' || *end == 'J';
3142 #endif
3143 if (s[0] == '0') {
3144 x = (long) PyOS_strtoul((char *)s, (char **)&end, 0);
3145 if (x < 0 && errno == 0) {
3146 return PyLong_FromString((char *)s,
3147 (char **)0,
3151 else
3152 x = PyOS_strtol((char *)s, (char **)&end, 0);
3153 if (*end == '\0') {
3154 if (errno != 0)
3155 return PyLong_FromString((char *)s, (char **)0, 0);
3156 return PyLong_FromLong(x);
3158 /* XXX Huge floats may silently fail */
3159 #ifndef WITHOUT_COMPLEX
3160 if (imflag) {
3161 compl.real = 0.;
3162 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
3163 if (compl.imag == -1.0 && PyErr_Occurred())
3164 return NULL;
3165 return PyComplex_FromCComplex(compl);
3167 else
3168 #endif
3170 dx = PyOS_string_to_double(s, NULL, NULL);
3171 if (dx == -1.0 && PyErr_Occurred())
3172 return NULL;
3173 return PyFloat_FromDouble(dx);
3177 static PyObject *
3178 decode_utf8(struct compiling *c, const char **sPtr, const char *end, char* encoding)
3180 PyObject *u, *v;
3181 char *s, *t;
3182 t = s = (char *)*sPtr;
3183 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3184 while (s < end && (*s & 0x80)) s++;
3185 *sPtr = s;
3186 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3187 if (u == NULL)
3188 return NULL;
3189 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3190 Py_DECREF(u);
3191 return v;
3194 static PyObject *
3195 decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, const char *encoding)
3197 PyObject *v, *u;
3198 char *buf;
3199 char *p;
3200 const char *end;
3202 if (encoding == NULL) {
3203 buf = (char *)s;
3204 u = NULL;
3205 } else {
3206 /* check for integer overflow */
3207 if (len > PY_SIZE_MAX / 4)
3208 return NULL;
3209 /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
3210 u = PyBytes_FromStringAndSize((char *)NULL, len * 4);
3211 if (u == NULL)
3212 return NULL;
3213 p = buf = PyBytes_AsString(u);
3214 end = s + len;
3215 while (s < end) {
3216 if (*s == '\\') {
3217 *p++ = *s++;
3218 if (*s & 0x80) {
3219 strcpy(p, "u005c");
3220 p += 5;
3223 if (*s & 0x80) { /* XXX inefficient */
3224 PyObject *w;
3225 char *r;
3226 Py_ssize_t rn, i;
3227 w = decode_utf8(c, &s, end, "utf-16-be");
3228 if (w == NULL) {
3229 Py_DECREF(u);
3230 return NULL;
3232 r = PyBytes_AS_STRING(w);
3233 rn = Py_SIZE(w);
3234 assert(rn % 2 == 0);
3235 for (i = 0; i < rn; i += 2) {
3236 sprintf(p, "\\u%02x%02x",
3237 r[i + 0] & 0xFF,
3238 r[i + 1] & 0xFF);
3239 p += 6;
3241 Py_DECREF(w);
3242 } else {
3243 *p++ = *s++;
3246 len = p - buf;
3247 s = buf;
3249 if (rawmode)
3250 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3251 else
3252 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3253 Py_XDECREF(u);
3254 return v;
3257 /* s is a Python string literal, including the bracketing quote characters,
3258 * and r &/or b prefixes (if any), and embedded escape sequences (if any).
3259 * parsestr parses it, and returns the decoded Python string object.
3261 static PyObject *
3262 parsestr(struct compiling *c, const node *n, int *bytesmode)
3264 size_t len;
3265 const char *s = STR(n);
3266 int quote = Py_CHARMASK(*s);
3267 int rawmode = 0;
3268 int need_encoding;
3269 if (isalpha(quote)) {
3270 if (quote == 'b' || quote == 'B') {
3271 quote = *++s;
3272 *bytesmode = 1;
3274 if (quote == 'r' || quote == 'R') {
3275 quote = *++s;
3276 rawmode = 1;
3279 if (quote != '\'' && quote != '\"') {
3280 PyErr_BadInternalCall();
3281 return NULL;
3283 s++;
3284 len = strlen(s);
3285 if (len > INT_MAX) {
3286 PyErr_SetString(PyExc_OverflowError,
3287 "string to parse is too long");
3288 return NULL;
3290 if (s[--len] != quote) {
3291 PyErr_BadInternalCall();
3292 return NULL;
3294 if (len >= 4 && s[0] == quote && s[1] == quote) {
3295 s += 2;
3296 len -= 2;
3297 if (s[--len] != quote || s[--len] != quote) {
3298 PyErr_BadInternalCall();
3299 return NULL;
3302 if (!*bytesmode && !rawmode) {
3303 return decode_unicode(c, s, len, rawmode, c->c_encoding);
3305 if (*bytesmode) {
3306 /* Disallow non-ascii characters (but not escapes) */
3307 const char *c;
3308 for (c = s; *c; c++) {
3309 if (Py_CHARMASK(*c) >= 0x80) {
3310 ast_error(n, "bytes can only contain ASCII "
3311 "literal characters.");
3312 return NULL;
3316 need_encoding = (!*bytesmode && c->c_encoding != NULL &&
3317 strcmp(c->c_encoding, "utf-8") != 0);
3318 if (rawmode || strchr(s, '\\') == NULL) {
3319 if (need_encoding) {
3320 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3321 if (u == NULL || !*bytesmode)
3322 return u;
3323 v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
3324 Py_DECREF(u);
3325 return v;
3326 } else if (*bytesmode) {
3327 return PyBytes_FromStringAndSize(s, len);
3328 } else if (strcmp(c->c_encoding, "utf-8") == 0) {
3329 return PyUnicode_FromStringAndSize(s, len);
3330 } else {
3331 return PyUnicode_DecodeLatin1(s, len, NULL);
3334 return PyBytes_DecodeEscape(s, len, NULL, 1,
3335 need_encoding ? c->c_encoding : NULL);
3338 /* Build a Python string object out of a STRING+ atom. This takes care of
3339 * compile-time literal catenation, calling parsestr() on each piece, and
3340 * pasting the intermediate results together.
3342 static PyObject *
3343 parsestrplus(struct compiling *c, const node *n, int *bytesmode)
3345 PyObject *v;
3346 int i;
3347 REQ(CHILD(n, 0), STRING);
3348 v = parsestr(c, CHILD(n, 0), bytesmode);
3349 if (v != NULL) {
3350 /* String literal concatenation */
3351 for (i = 1; i < NCH(n); i++) {
3352 PyObject *s;
3353 int subbm = 0;
3354 s = parsestr(c, CHILD(n, i), &subbm);
3355 if (s == NULL)
3356 goto onError;
3357 if (*bytesmode != subbm) {
3358 ast_error(n, "cannot mix bytes and nonbytes literals");
3359 goto onError;
3361 if (PyBytes_Check(v) && PyBytes_Check(s)) {
3362 PyBytes_ConcatAndDel(&v, s);
3363 if (v == NULL)
3364 goto onError;
3366 else {
3367 PyObject *temp = PyUnicode_Concat(v, s);
3368 Py_DECREF(s);
3369 Py_DECREF(v);
3370 v = temp;
3371 if (v == NULL)
3372 goto onError;
3376 return v;
3378 onError:
3379 Py_XDECREF(v);
3380 return NULL;