Patch by Jeremy Katz (SF #1609407)
[python.git] / Python / ast.c
blobdcf7f2b3ab08f85600cf711bd3af3010a0571354
1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
16 #include <assert.h>
18 /* Data structure used internally */
19 struct compiling {
20 char *c_encoding; /* source encoding */
21 PyArena *c_arena; /* arena for allocating memeory */
24 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
25 static expr_ty ast_for_expr(struct compiling *, const node *);
26 static stmt_ty ast_for_stmt(struct compiling *, const node *);
27 static asdl_seq *ast_for_suite(struct compiling *, const node *);
28 static asdl_seq *ast_for_exprlist(struct compiling *, const node *, expr_context_ty);
29 static expr_ty ast_for_testlist(struct compiling *, const node *);
30 static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
32 /* Note different signature for ast_for_call */
33 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
35 static PyObject *parsenumber(const char *);
36 static PyObject *parsestr(const char *s, const char *encoding);
37 static PyObject *parsestrplus(struct compiling *, const node *n);
39 #ifndef LINENO
40 #define LINENO(n) ((n)->n_lineno)
41 #endif
43 static identifier
44 new_identifier(const char* n, PyArena *arena) {
45 PyObject* id = PyString_InternFromString(n);
46 PyArena_AddPyObject(arena, id);
47 return id;
50 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
52 /* This routine provides an invalid object for the syntax error.
53 The outermost routine must unpack this error and create the
54 proper object. We do this so that we don't have to pass
55 the filename to everything function.
57 XXX Maybe we should just pass the filename...
60 static int
61 ast_error(const node *n, const char *errstr)
63 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
64 if (!u)
65 return 0;
66 PyErr_SetObject(PyExc_SyntaxError, u);
67 Py_DECREF(u);
68 return 0;
71 static void
72 ast_error_finish(const char *filename)
74 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
75 long lineno;
77 assert(PyErr_Occurred());
78 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
79 return;
81 PyErr_Fetch(&type, &value, &tback);
82 errstr = PyTuple_GetItem(value, 0);
83 if (!errstr)
84 return;
85 Py_INCREF(errstr);
86 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
87 if (lineno == -1) {
88 Py_DECREF(errstr);
89 return;
91 Py_DECREF(value);
93 loc = PyErr_ProgramText(filename, lineno);
94 if (!loc) {
95 Py_INCREF(Py_None);
96 loc = Py_None;
98 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
99 Py_DECREF(loc);
100 if (!tmp) {
101 Py_DECREF(errstr);
102 return;
104 value = PyTuple_Pack(2, errstr, tmp);
105 Py_DECREF(errstr);
106 Py_DECREF(tmp);
107 if (!value)
108 return;
109 PyErr_Restore(type, value, tback);
112 /* num_stmts() returns number of contained statements.
114 Use this routine to determine how big a sequence is needed for
115 the statements in a parse tree. Its raison d'etre is this bit of
116 grammar:
118 stmt: simple_stmt | compound_stmt
119 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
121 A simple_stmt can contain multiple small_stmt elements joined
122 by semicolons. If the arg is a simple_stmt, the number of
123 small_stmt elements is returned.
126 static int
127 num_stmts(const node *n)
129 int i, l;
130 node *ch;
132 switch (TYPE(n)) {
133 case single_input:
134 if (TYPE(CHILD(n, 0)) == NEWLINE)
135 return 0;
136 else
137 return num_stmts(CHILD(n, 0));
138 case file_input:
139 l = 0;
140 for (i = 0; i < NCH(n); i++) {
141 ch = CHILD(n, i);
142 if (TYPE(ch) == stmt)
143 l += num_stmts(ch);
145 return l;
146 case stmt:
147 return num_stmts(CHILD(n, 0));
148 case compound_stmt:
149 return 1;
150 case simple_stmt:
151 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
152 case suite:
153 if (NCH(n) == 1)
154 return num_stmts(CHILD(n, 0));
155 else {
156 l = 0;
157 for (i = 2; i < (NCH(n) - 1); i++)
158 l += num_stmts(CHILD(n, i));
159 return l;
161 default: {
162 char buf[128];
164 sprintf(buf, "Non-statement found: %d %d\n",
165 TYPE(n), NCH(n));
166 Py_FatalError(buf);
169 assert(0);
170 return 0;
173 /* Transform the CST rooted at node * to the appropriate AST
176 mod_ty
177 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
178 PyArena *arena)
180 int i, j, k, num;
181 asdl_seq *stmts = NULL;
182 stmt_ty s;
183 node *ch;
184 struct compiling c;
186 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
187 c.c_encoding = "utf-8";
188 if (TYPE(n) == encoding_decl) {
189 ast_error(n, "encoding declaration in Unicode string");
190 goto error;
192 } else if (TYPE(n) == encoding_decl) {
193 c.c_encoding = STR(n);
194 n = CHILD(n, 0);
195 } else {
196 c.c_encoding = NULL;
198 c.c_arena = arena;
200 k = 0;
201 switch (TYPE(n)) {
202 case file_input:
203 stmts = asdl_seq_new(num_stmts(n), arena);
204 if (!stmts)
205 return NULL;
206 for (i = 0; i < NCH(n) - 1; i++) {
207 ch = CHILD(n, i);
208 if (TYPE(ch) == NEWLINE)
209 continue;
210 REQ(ch, stmt);
211 num = num_stmts(ch);
212 if (num == 1) {
213 s = ast_for_stmt(&c, ch);
214 if (!s)
215 goto error;
216 asdl_seq_SET(stmts, k++, s);
218 else {
219 ch = CHILD(ch, 0);
220 REQ(ch, simple_stmt);
221 for (j = 0; j < num; j++) {
222 s = ast_for_stmt(&c, CHILD(ch, j * 2));
223 if (!s)
224 goto error;
225 asdl_seq_SET(stmts, k++, s);
229 return Module(stmts, arena);
230 case eval_input: {
231 expr_ty testlist_ast;
233 /* XXX Why not gen_for here? */
234 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
235 if (!testlist_ast)
236 goto error;
237 return Expression(testlist_ast, arena);
239 case single_input:
240 if (TYPE(CHILD(n, 0)) == NEWLINE) {
241 stmts = asdl_seq_new(1, arena);
242 if (!stmts)
243 goto error;
244 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
245 arena));
246 return Interactive(stmts, arena);
248 else {
249 n = CHILD(n, 0);
250 num = num_stmts(n);
251 stmts = asdl_seq_new(num, arena);
252 if (!stmts)
253 goto error;
254 if (num == 1) {
255 s = ast_for_stmt(&c, n);
256 if (!s)
257 goto error;
258 asdl_seq_SET(stmts, 0, s);
260 else {
261 /* Only a simple_stmt can contain multiple statements. */
262 REQ(n, simple_stmt);
263 for (i = 0; i < NCH(n); i += 2) {
264 if (TYPE(CHILD(n, i)) == NEWLINE)
265 break;
266 s = ast_for_stmt(&c, CHILD(n, i));
267 if (!s)
268 goto error;
269 asdl_seq_SET(stmts, i / 2, s);
273 return Interactive(stmts, arena);
275 default:
276 goto error;
278 error:
279 ast_error_finish(filename);
280 return NULL;
283 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
286 static operator_ty
287 get_operator(const node *n)
289 switch (TYPE(n)) {
290 case VBAR:
291 return BitOr;
292 case CIRCUMFLEX:
293 return BitXor;
294 case AMPER:
295 return BitAnd;
296 case LEFTSHIFT:
297 return LShift;
298 case RIGHTSHIFT:
299 return RShift;
300 case PLUS:
301 return Add;
302 case MINUS:
303 return Sub;
304 case STAR:
305 return Mult;
306 case SLASH:
307 return Div;
308 case DOUBLESLASH:
309 return FloorDiv;
310 case PERCENT:
311 return Mod;
312 default:
313 return (operator_ty)0;
317 /* Set the context ctx for expr_ty e, recursively traversing e.
319 Only sets context for expr kinds that "can appear in assignment context"
320 (according to ../Parser/Python.asdl). For other expr kinds, it sets
321 an appropriate syntax error and returns false.
324 static int
325 set_context(expr_ty e, expr_context_ty ctx, const node *n)
327 asdl_seq *s = NULL;
328 /* If a particular expression type can't be used for assign / delete,
329 set expr_name to its name and an error message will be generated.
331 const char* expr_name = NULL;
333 /* The ast defines augmented store and load contexts, but the
334 implementation here doesn't actually use them. The code may be
335 a little more complex than necessary as a result. It also means
336 that expressions in an augmented assignment have a Store context.
337 Consider restructuring so that augmented assignment uses
338 set_context(), too.
340 assert(ctx != AugStore && ctx != AugLoad);
342 switch (e->kind) {
343 case Attribute_kind:
344 if (ctx == Store &&
345 !strcmp(PyString_AS_STRING(e->v.Attribute.attr), "None")) {
346 return ast_error(n, "assignment to None");
348 e->v.Attribute.ctx = ctx;
349 break;
350 case Subscript_kind:
351 e->v.Subscript.ctx = ctx;
352 break;
353 case Name_kind:
354 if (ctx == Store &&
355 !strcmp(PyString_AS_STRING(e->v.Name.id), "None")) {
356 return ast_error(n, "assignment to None");
358 e->v.Name.ctx = ctx;
359 break;
360 case List_kind:
361 e->v.List.ctx = ctx;
362 s = e->v.List.elts;
363 break;
364 case Tuple_kind:
365 if (asdl_seq_LEN(e->v.Tuple.elts) == 0)
366 return ast_error(n, "can't assign to ()");
367 e->v.Tuple.ctx = ctx;
368 s = e->v.Tuple.elts;
369 break;
370 case Lambda_kind:
371 expr_name = "lambda";
372 break;
373 case Call_kind:
374 expr_name = "function call";
375 break;
376 case BoolOp_kind:
377 case BinOp_kind:
378 case UnaryOp_kind:
379 expr_name = "operator";
380 break;
381 case GeneratorExp_kind:
382 expr_name = "generator expression";
383 break;
384 case Yield_kind:
385 expr_name = "yield expression";
386 break;
387 case ListComp_kind:
388 expr_name = "list comprehension";
389 break;
390 case Dict_kind:
391 case Num_kind:
392 case Str_kind:
393 expr_name = "literal";
394 break;
395 case Compare_kind:
396 expr_name = "comparison";
397 break;
398 case Repr_kind:
399 expr_name = "repr";
400 break;
401 case IfExp_kind:
402 expr_name = "conditional expression";
403 break;
404 default:
405 PyErr_Format(PyExc_SystemError,
406 "unexpected expression in assignment %d (line %d)",
407 e->kind, e->lineno);
408 return 0;
410 /* Check for error string set by switch */
411 if (expr_name) {
412 char buf[300];
413 PyOS_snprintf(buf, sizeof(buf),
414 "can't %s %s",
415 ctx == Store ? "assign to" : "delete",
416 expr_name);
417 return ast_error(n, buf);
420 /* If the LHS is a list or tuple, we need to set the assignment
421 context for all the contained elements.
423 if (s) {
424 int i;
426 for (i = 0; i < asdl_seq_LEN(s); i++) {
427 if (!set_context((expr_ty)asdl_seq_GET(s, i), ctx, n))
428 return 0;
431 return 1;
434 static operator_ty
435 ast_for_augassign(const node *n)
437 REQ(n, augassign);
438 n = CHILD(n, 0);
439 switch (STR(n)[0]) {
440 case '+':
441 return Add;
442 case '-':
443 return Sub;
444 case '/':
445 if (STR(n)[1] == '/')
446 return FloorDiv;
447 else
448 return Div;
449 case '%':
450 return Mod;
451 case '<':
452 return LShift;
453 case '>':
454 return RShift;
455 case '&':
456 return BitAnd;
457 case '^':
458 return BitXor;
459 case '|':
460 return BitOr;
461 case '*':
462 if (STR(n)[1] == '*')
463 return Pow;
464 else
465 return Mult;
466 default:
467 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
468 return (operator_ty)0;
472 static cmpop_ty
473 ast_for_comp_op(const node *n)
475 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
476 |'is' 'not'
478 REQ(n, comp_op);
479 if (NCH(n) == 1) {
480 n = CHILD(n, 0);
481 switch (TYPE(n)) {
482 case LESS:
483 return Lt;
484 case GREATER:
485 return Gt;
486 case EQEQUAL: /* == */
487 return Eq;
488 case LESSEQUAL:
489 return LtE;
490 case GREATEREQUAL:
491 return GtE;
492 case NOTEQUAL:
493 return NotEq;
494 case NAME:
495 if (strcmp(STR(n), "in") == 0)
496 return In;
497 if (strcmp(STR(n), "is") == 0)
498 return Is;
499 default:
500 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
501 STR(n));
502 return (cmpop_ty)0;
505 else if (NCH(n) == 2) {
506 /* handle "not in" and "is not" */
507 switch (TYPE(CHILD(n, 0))) {
508 case NAME:
509 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
510 return NotIn;
511 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
512 return IsNot;
513 default:
514 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
515 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
516 return (cmpop_ty)0;
519 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
520 NCH(n));
521 return (cmpop_ty)0;
524 static asdl_seq *
525 seq_for_testlist(struct compiling *c, const node *n)
527 /* testlist: test (',' test)* [','] */
528 asdl_seq *seq;
529 expr_ty expression;
530 int i;
531 assert(TYPE(n) == testlist
532 || TYPE(n) == listmaker
533 || TYPE(n) == testlist_gexp
534 || TYPE(n) == testlist_safe
535 || TYPE(n) == testlist1
538 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
539 if (!seq)
540 return NULL;
542 for (i = 0; i < NCH(n); i += 2) {
543 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
545 expression = ast_for_expr(c, CHILD(n, i));
546 if (!expression)
547 return NULL;
549 assert(i / 2 < seq->size);
550 asdl_seq_SET(seq, i / 2, expression);
552 return seq;
555 static expr_ty
556 compiler_complex_args(struct compiling *c, const node *n)
558 int i, len = (NCH(n) + 1) / 2;
559 expr_ty result;
560 asdl_seq *args = asdl_seq_new(len, c->c_arena);
561 if (!args)
562 return NULL;
564 /* fpdef: NAME | '(' fplist ')'
565 fplist: fpdef (',' fpdef)* [',']
567 REQ(n, fplist);
568 for (i = 0; i < len; i++) {
569 const node *fpdef_node = CHILD(n, 2*i);
570 const node *child;
571 expr_ty arg;
572 set_name:
573 /* fpdef_node is either a NAME or an fplist */
574 child = CHILD(fpdef_node, 0);
575 if (TYPE(child) == NAME) {
576 if (!strcmp(STR(child), "None")) {
577 ast_error(child, "assignment to None");
578 return NULL;
580 arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child),
581 child->n_col_offset, c->c_arena);
583 else {
584 assert(TYPE(fpdef_node) == fpdef);
585 /* fpdef_node[0] is not a name, so it must be a '(', get CHILD[1] */
586 child = CHILD(fpdef_node, 1);
587 assert(TYPE(child) == fplist);
588 /* NCH == 1 means we have (x), we need to elide the extra parens */
589 if (NCH(child) == 1) {
590 fpdef_node = CHILD(child, 0);
591 assert(TYPE(fpdef_node) == fpdef);
592 goto set_name;
594 arg = compiler_complex_args(c, child);
596 asdl_seq_SET(args, i, arg);
599 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
600 if (!set_context(result, Store, n))
601 return NULL;
602 return result;
606 /* Create AST for argument list. */
608 static arguments_ty
609 ast_for_arguments(struct compiling *c, const node *n)
611 /* parameters: '(' [varargslist] ')'
612 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
613 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
615 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
616 asdl_seq *args, *defaults;
617 identifier vararg = NULL, kwarg = NULL;
618 node *ch;
620 if (TYPE(n) == parameters) {
621 if (NCH(n) == 2) /* () as argument list */
622 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
623 n = CHILD(n, 1);
625 REQ(n, varargslist);
627 /* first count the number of normal args & defaults */
628 for (i = 0; i < NCH(n); i++) {
629 ch = CHILD(n, i);
630 if (TYPE(ch) == fpdef)
631 n_args++;
632 if (TYPE(ch) == EQUAL)
633 n_defaults++;
635 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
636 if (!args && n_args)
637 return NULL; /* Don't need to goto error; no objects allocated */
638 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
639 if (!defaults && n_defaults)
640 return NULL; /* Don't need to goto error; no objects allocated */
642 /* fpdef: NAME | '(' fplist ')'
643 fplist: fpdef (',' fpdef)* [',']
645 i = 0;
646 j = 0; /* index for defaults */
647 k = 0; /* index for args */
648 while (i < NCH(n)) {
649 ch = CHILD(n, i);
650 switch (TYPE(ch)) {
651 case fpdef:
652 handle_fpdef:
653 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
654 anything other than EQUAL or a comma? */
655 /* XXX Should NCH(n) check be made a separate check? */
656 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
657 expr_ty expression = ast_for_expr(c, CHILD(n, i + 2));
658 if (!expression)
659 goto error;
660 assert(defaults != NULL);
661 asdl_seq_SET(defaults, j++, expression);
662 i += 2;
663 found_default = 1;
665 else if (found_default) {
666 ast_error(n,
667 "non-default argument follows default argument");
668 goto error;
670 if (NCH(ch) == 3) {
671 ch = CHILD(ch, 1);
672 /* def foo((x)): is not complex, special case. */
673 if (NCH(ch) != 1) {
674 /* We have complex arguments, setup for unpacking. */
675 asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
676 } else {
677 /* def foo((x)): setup for checking NAME below. */
678 /* Loop because there can be many parens and tuple
679 unpacking mixed in. */
680 ch = CHILD(ch, 0);
681 assert(TYPE(ch) == fpdef);
682 goto handle_fpdef;
685 if (TYPE(CHILD(ch, 0)) == NAME) {
686 expr_ty name;
687 if (!strcmp(STR(CHILD(ch, 0)), "None")) {
688 ast_error(CHILD(ch, 0), "assignment to None");
689 goto error;
691 name = Name(NEW_IDENTIFIER(CHILD(ch, 0)),
692 Param, LINENO(ch), ch->n_col_offset,
693 c->c_arena);
694 if (!name)
695 goto error;
696 asdl_seq_SET(args, k++, name);
699 i += 2; /* the name and the comma */
700 break;
701 case STAR:
702 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
703 ast_error(CHILD(n, i+1), "assignment to None");
704 goto error;
706 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
707 i += 3;
708 break;
709 case DOUBLESTAR:
710 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
711 ast_error(CHILD(n, i+1), "assignment to None");
712 goto error;
714 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
715 i += 3;
716 break;
717 default:
718 PyErr_Format(PyExc_SystemError,
719 "unexpected node in varargslist: %d @ %d",
720 TYPE(ch), i);
721 goto error;
725 return arguments(args, vararg, kwarg, defaults, c->c_arena);
727 error:
728 Py_XDECREF(vararg);
729 Py_XDECREF(kwarg);
730 return NULL;
733 static expr_ty
734 ast_for_dotted_name(struct compiling *c, const node *n)
736 expr_ty e;
737 identifier id;
738 int lineno, col_offset;
739 int i;
741 REQ(n, dotted_name);
743 lineno = LINENO(n);
744 col_offset = n->n_col_offset;
746 id = NEW_IDENTIFIER(CHILD(n, 0));
747 if (!id)
748 return NULL;
749 e = Name(id, Load, lineno, col_offset, c->c_arena);
750 if (!e)
751 return NULL;
753 for (i = 2; i < NCH(n); i+=2) {
754 id = NEW_IDENTIFIER(CHILD(n, i));
755 if (!id)
756 return NULL;
757 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
758 if (!e)
759 return NULL;
762 return e;
765 static expr_ty
766 ast_for_decorator(struct compiling *c, const node *n)
768 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
769 expr_ty d = NULL;
770 expr_ty name_expr;
772 REQ(n, decorator);
773 REQ(CHILD(n, 0), AT);
774 REQ(RCHILD(n, -1), NEWLINE);
776 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
777 if (!name_expr)
778 return NULL;
780 if (NCH(n) == 3) { /* No arguments */
781 d = name_expr;
782 name_expr = NULL;
784 else if (NCH(n) == 5) { /* Call with no arguments */
785 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
786 n->n_col_offset, c->c_arena);
787 if (!d)
788 return NULL;
789 name_expr = NULL;
791 else {
792 d = ast_for_call(c, CHILD(n, 3), name_expr);
793 if (!d)
794 return NULL;
795 name_expr = NULL;
798 return d;
801 static asdl_seq*
802 ast_for_decorators(struct compiling *c, const node *n)
804 asdl_seq* decorator_seq;
805 expr_ty d;
806 int i;
808 REQ(n, decorators);
809 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
810 if (!decorator_seq)
811 return NULL;
813 for (i = 0; i < NCH(n); i++) {
814 d = ast_for_decorator(c, CHILD(n, i));
815 if (!d)
816 return NULL;
817 asdl_seq_SET(decorator_seq, i, d);
819 return decorator_seq;
822 static stmt_ty
823 ast_for_funcdef(struct compiling *c, const node *n)
825 /* funcdef: 'def' [decorators] NAME parameters ':' suite */
826 identifier name;
827 arguments_ty args;
828 asdl_seq *body;
829 asdl_seq *decorator_seq = NULL;
830 int name_i;
832 REQ(n, funcdef);
834 if (NCH(n) == 6) { /* decorators are present */
835 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
836 if (!decorator_seq)
837 return NULL;
838 name_i = 2;
840 else {
841 name_i = 1;
844 name = NEW_IDENTIFIER(CHILD(n, name_i));
845 if (!name)
846 return NULL;
847 else if (!strcmp(STR(CHILD(n, name_i)), "None")) {
848 ast_error(CHILD(n, name_i), "assignment to None");
849 return NULL;
851 args = ast_for_arguments(c, CHILD(n, name_i + 1));
852 if (!args)
853 return NULL;
854 body = ast_for_suite(c, CHILD(n, name_i + 3));
855 if (!body)
856 return NULL;
858 return FunctionDef(name, args, body, decorator_seq, LINENO(n),
859 n->n_col_offset, c->c_arena);
862 static expr_ty
863 ast_for_lambdef(struct compiling *c, const node *n)
865 /* lambdef: 'lambda' [varargslist] ':' test */
866 arguments_ty args;
867 expr_ty expression;
869 if (NCH(n) == 3) {
870 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
871 if (!args)
872 return NULL;
873 expression = ast_for_expr(c, CHILD(n, 2));
874 if (!expression)
875 return NULL;
877 else {
878 args = ast_for_arguments(c, CHILD(n, 1));
879 if (!args)
880 return NULL;
881 expression = ast_for_expr(c, CHILD(n, 3));
882 if (!expression)
883 return NULL;
886 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
889 static expr_ty
890 ast_for_ifexpr(struct compiling *c, const node *n)
892 /* test: or_test 'if' or_test 'else' test */
893 expr_ty expression, body, orelse;
895 assert(NCH(n) == 5);
896 body = ast_for_expr(c, CHILD(n, 0));
897 if (!body)
898 return NULL;
899 expression = ast_for_expr(c, CHILD(n, 2));
900 if (!expression)
901 return NULL;
902 orelse = ast_for_expr(c, CHILD(n, 4));
903 if (!orelse)
904 return NULL;
905 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
906 c->c_arena);
909 /* XXX(nnorwitz): the listcomp and genexpr code should be refactored
910 so there is only a single version. Possibly for loops can also re-use
911 the code.
914 /* Count the number of 'for' loop in a list comprehension.
916 Helper for ast_for_listcomp().
919 static int
920 count_list_fors(const node *n)
922 int n_fors = 0;
923 node *ch = CHILD(n, 1);
925 count_list_for:
926 n_fors++;
927 REQ(ch, list_for);
928 if (NCH(ch) == 5)
929 ch = CHILD(ch, 4);
930 else
931 return n_fors;
932 count_list_iter:
933 REQ(ch, list_iter);
934 ch = CHILD(ch, 0);
935 if (TYPE(ch) == list_for)
936 goto count_list_for;
937 else if (TYPE(ch) == list_if) {
938 if (NCH(ch) == 3) {
939 ch = CHILD(ch, 2);
940 goto count_list_iter;
942 else
943 return n_fors;
946 /* Should never be reached */
947 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
948 return -1;
951 /* Count the number of 'if' statements in a list comprehension.
953 Helper for ast_for_listcomp().
956 static int
957 count_list_ifs(const node *n)
959 int n_ifs = 0;
961 count_list_iter:
962 REQ(n, list_iter);
963 if (TYPE(CHILD(n, 0)) == list_for)
964 return n_ifs;
965 n = CHILD(n, 0);
966 REQ(n, list_if);
967 n_ifs++;
968 if (NCH(n) == 2)
969 return n_ifs;
970 n = CHILD(n, 2);
971 goto count_list_iter;
974 static expr_ty
975 ast_for_listcomp(struct compiling *c, const node *n)
977 /* listmaker: test ( list_for | (',' test)* [','] )
978 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
979 list_iter: list_for | list_if
980 list_if: 'if' test [list_iter]
981 testlist_safe: test [(',' test)+ [',']]
983 expr_ty elt;
984 asdl_seq *listcomps;
985 int i, n_fors;
986 node *ch;
988 REQ(n, listmaker);
989 assert(NCH(n) > 1);
991 elt = ast_for_expr(c, CHILD(n, 0));
992 if (!elt)
993 return NULL;
995 n_fors = count_list_fors(n);
996 if (n_fors == -1)
997 return NULL;
999 listcomps = asdl_seq_new(n_fors, c->c_arena);
1000 if (!listcomps)
1001 return NULL;
1003 ch = CHILD(n, 1);
1004 for (i = 0; i < n_fors; i++) {
1005 comprehension_ty lc;
1006 asdl_seq *t;
1007 expr_ty expression;
1008 node *for_ch;
1010 REQ(ch, list_for);
1012 for_ch = CHILD(ch, 1);
1013 t = ast_for_exprlist(c, for_ch, Store);
1014 if (!t)
1015 return NULL;
1016 expression = ast_for_testlist(c, CHILD(ch, 3));
1017 if (!expression)
1018 return NULL;
1020 /* Check the # of children rather than the length of t, since
1021 [x for x, in ... ] has 1 element in t, but still requires a Tuple. */
1022 if (NCH(for_ch) == 1)
1023 lc = comprehension((expr_ty)asdl_seq_GET(t, 0), expression, NULL,
1024 c->c_arena);
1025 else
1026 lc = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset,
1027 c->c_arena),
1028 expression, NULL, c->c_arena);
1029 if (!lc)
1030 return NULL;
1032 if (NCH(ch) == 5) {
1033 int j, n_ifs;
1034 asdl_seq *ifs;
1036 ch = CHILD(ch, 4);
1037 n_ifs = count_list_ifs(ch);
1038 if (n_ifs == -1)
1039 return NULL;
1041 ifs = asdl_seq_new(n_ifs, c->c_arena);
1042 if (!ifs)
1043 return NULL;
1045 for (j = 0; j < n_ifs; j++) {
1046 REQ(ch, list_iter);
1047 ch = CHILD(ch, 0);
1048 REQ(ch, list_if);
1050 asdl_seq_SET(ifs, j, ast_for_expr(c, CHILD(ch, 1)));
1051 if (NCH(ch) == 3)
1052 ch = CHILD(ch, 2);
1054 /* on exit, must guarantee that ch is a list_for */
1055 if (TYPE(ch) == list_iter)
1056 ch = CHILD(ch, 0);
1057 lc->ifs = ifs;
1059 asdl_seq_SET(listcomps, i, lc);
1062 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1066 Count the number of 'for' loops in a generator expression.
1068 Helper for ast_for_genexp().
1071 static int
1072 count_gen_fors(const node *n)
1074 int n_fors = 0;
1075 node *ch = CHILD(n, 1);
1077 count_gen_for:
1078 n_fors++;
1079 REQ(ch, gen_for);
1080 if (NCH(ch) == 5)
1081 ch = CHILD(ch, 4);
1082 else
1083 return n_fors;
1084 count_gen_iter:
1085 REQ(ch, gen_iter);
1086 ch = CHILD(ch, 0);
1087 if (TYPE(ch) == gen_for)
1088 goto count_gen_for;
1089 else if (TYPE(ch) == gen_if) {
1090 if (NCH(ch) == 3) {
1091 ch = CHILD(ch, 2);
1092 goto count_gen_iter;
1094 else
1095 return n_fors;
1098 /* Should never be reached */
1099 PyErr_SetString(PyExc_SystemError,
1100 "logic error in count_gen_fors");
1101 return -1;
1104 /* Count the number of 'if' statements in a generator expression.
1106 Helper for ast_for_genexp().
1109 static int
1110 count_gen_ifs(const node *n)
1112 int n_ifs = 0;
1114 while (1) {
1115 REQ(n, gen_iter);
1116 if (TYPE(CHILD(n, 0)) == gen_for)
1117 return n_ifs;
1118 n = CHILD(n, 0);
1119 REQ(n, gen_if);
1120 n_ifs++;
1121 if (NCH(n) == 2)
1122 return n_ifs;
1123 n = CHILD(n, 2);
1127 /* TODO(jhylton): Combine with list comprehension code? */
1128 static expr_ty
1129 ast_for_genexp(struct compiling *c, const node *n)
1131 /* testlist_gexp: test ( gen_for | (',' test)* [','] )
1132 argument: [test '='] test [gen_for] # Really [keyword '='] test */
1133 expr_ty elt;
1134 asdl_seq *genexps;
1135 int i, n_fors;
1136 node *ch;
1138 assert(TYPE(n) == (testlist_gexp) || TYPE(n) == (argument));
1139 assert(NCH(n) > 1);
1141 elt = ast_for_expr(c, CHILD(n, 0));
1142 if (!elt)
1143 return NULL;
1145 n_fors = count_gen_fors(n);
1146 if (n_fors == -1)
1147 return NULL;
1149 genexps = asdl_seq_new(n_fors, c->c_arena);
1150 if (!genexps)
1151 return NULL;
1153 ch = CHILD(n, 1);
1154 for (i = 0; i < n_fors; i++) {
1155 comprehension_ty ge;
1156 asdl_seq *t;
1157 expr_ty expression;
1158 node *for_ch;
1160 REQ(ch, gen_for);
1162 for_ch = CHILD(ch, 1);
1163 t = ast_for_exprlist(c, for_ch, Store);
1164 if (!t)
1165 return NULL;
1166 expression = ast_for_expr(c, CHILD(ch, 3));
1167 if (!expression)
1168 return NULL;
1170 /* Check the # of children rather than the length of t, since
1171 (x for x, in ...) has 1 element in t, but still requires a Tuple. */
1172 if (NCH(for_ch) == 1)
1173 ge = comprehension((expr_ty)asdl_seq_GET(t, 0), expression,
1174 NULL, c->c_arena);
1175 else
1176 ge = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset,
1177 c->c_arena),
1178 expression, NULL, c->c_arena);
1180 if (!ge)
1181 return NULL;
1183 if (NCH(ch) == 5) {
1184 int j, n_ifs;
1185 asdl_seq *ifs;
1187 ch = CHILD(ch, 4);
1188 n_ifs = count_gen_ifs(ch);
1189 if (n_ifs == -1)
1190 return NULL;
1192 ifs = asdl_seq_new(n_ifs, c->c_arena);
1193 if (!ifs)
1194 return NULL;
1196 for (j = 0; j < n_ifs; j++) {
1197 REQ(ch, gen_iter);
1198 ch = CHILD(ch, 0);
1199 REQ(ch, gen_if);
1201 expression = ast_for_expr(c, CHILD(ch, 1));
1202 if (!expression)
1203 return NULL;
1204 asdl_seq_SET(ifs, j, expression);
1205 if (NCH(ch) == 3)
1206 ch = CHILD(ch, 2);
1208 /* on exit, must guarantee that ch is a gen_for */
1209 if (TYPE(ch) == gen_iter)
1210 ch = CHILD(ch, 0);
1211 ge->ifs = ifs;
1213 asdl_seq_SET(genexps, i, ge);
1216 return GeneratorExp(elt, genexps, LINENO(n), n->n_col_offset, c->c_arena);
1219 static expr_ty
1220 ast_for_atom(struct compiling *c, const node *n)
1222 /* atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']'
1223 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1225 node *ch = CHILD(n, 0);
1227 switch (TYPE(ch)) {
1228 case NAME:
1229 /* All names start in Load context, but may later be
1230 changed. */
1231 return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
1232 case STRING: {
1233 PyObject *str = parsestrplus(c, n);
1234 if (!str)
1235 return NULL;
1237 PyArena_AddPyObject(c->c_arena, str);
1238 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1240 case NUMBER: {
1241 PyObject *pynum = parsenumber(STR(ch));
1242 if (!pynum)
1243 return NULL;
1245 PyArena_AddPyObject(c->c_arena, pynum);
1246 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1248 case LPAR: /* some parenthesized expressions */
1249 ch = CHILD(n, 1);
1251 if (TYPE(ch) == RPAR)
1252 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1254 if (TYPE(ch) == yield_expr)
1255 return ast_for_expr(c, ch);
1257 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for))
1258 return ast_for_genexp(c, ch);
1260 return ast_for_testlist_gexp(c, ch);
1261 case LSQB: /* list (or list comprehension) */
1262 ch = CHILD(n, 1);
1264 if (TYPE(ch) == RSQB)
1265 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1267 REQ(ch, listmaker);
1268 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1269 asdl_seq *elts = seq_for_testlist(c, ch);
1270 if (!elts)
1271 return NULL;
1273 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1275 else
1276 return ast_for_listcomp(c, ch);
1277 case LBRACE: {
1278 /* dictmaker: test ':' test (',' test ':' test)* [','] */
1279 int i, size;
1280 asdl_seq *keys, *values;
1282 ch = CHILD(n, 1);
1283 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1284 keys = asdl_seq_new(size, c->c_arena);
1285 if (!keys)
1286 return NULL;
1288 values = asdl_seq_new(size, c->c_arena);
1289 if (!values)
1290 return NULL;
1292 for (i = 0; i < NCH(ch); i += 4) {
1293 expr_ty expression;
1295 expression = ast_for_expr(c, CHILD(ch, i));
1296 if (!expression)
1297 return NULL;
1299 asdl_seq_SET(keys, i / 4, expression);
1301 expression = ast_for_expr(c, CHILD(ch, i + 2));
1302 if (!expression)
1303 return NULL;
1305 asdl_seq_SET(values, i / 4, expression);
1307 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1309 case BACKQUOTE: { /* repr */
1310 expr_ty expression = ast_for_testlist(c, CHILD(n, 1));
1311 if (!expression)
1312 return NULL;
1314 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1316 default:
1317 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1318 return NULL;
1322 static slice_ty
1323 ast_for_slice(struct compiling *c, const node *n)
1325 node *ch;
1326 expr_ty lower = NULL, upper = NULL, step = NULL;
1328 REQ(n, subscript);
1331 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1332 sliceop: ':' [test]
1334 ch = CHILD(n, 0);
1335 if (TYPE(ch) == DOT)
1336 return Ellipsis(c->c_arena);
1338 if (NCH(n) == 1 && TYPE(ch) == test) {
1339 /* 'step' variable hold no significance in terms of being used over
1340 other vars */
1341 step = ast_for_expr(c, ch);
1342 if (!step)
1343 return NULL;
1345 return Index(step, c->c_arena);
1348 if (TYPE(ch) == test) {
1349 lower = ast_for_expr(c, ch);
1350 if (!lower)
1351 return NULL;
1354 /* If there's an upper bound it's in the second or third position. */
1355 if (TYPE(ch) == COLON) {
1356 if (NCH(n) > 1) {
1357 node *n2 = CHILD(n, 1);
1359 if (TYPE(n2) == test) {
1360 upper = ast_for_expr(c, n2);
1361 if (!upper)
1362 return NULL;
1365 } else if (NCH(n) > 2) {
1366 node *n2 = CHILD(n, 2);
1368 if (TYPE(n2) == test) {
1369 upper = ast_for_expr(c, n2);
1370 if (!upper)
1371 return NULL;
1375 ch = CHILD(n, NCH(n) - 1);
1376 if (TYPE(ch) == sliceop) {
1377 if (NCH(ch) == 1) {
1378 /* No expression, so step is None */
1379 ch = CHILD(ch, 0);
1380 step = Name(new_identifier("None", c->c_arena), Load,
1381 LINENO(ch), ch->n_col_offset, c->c_arena);
1382 if (!step)
1383 return NULL;
1384 } else {
1385 ch = CHILD(ch, 1);
1386 if (TYPE(ch) == test) {
1387 step = ast_for_expr(c, ch);
1388 if (!step)
1389 return NULL;
1394 return Slice(lower, upper, step, c->c_arena);
1397 static expr_ty
1398 ast_for_binop(struct compiling *c, const node *n)
1400 /* Must account for a sequence of expressions.
1401 How should A op B op C by represented?
1402 BinOp(BinOp(A, op, B), op, C).
1405 int i, nops;
1406 expr_ty expr1, expr2, result;
1407 operator_ty newoperator;
1409 expr1 = ast_for_expr(c, CHILD(n, 0));
1410 if (!expr1)
1411 return NULL;
1413 expr2 = ast_for_expr(c, CHILD(n, 2));
1414 if (!expr2)
1415 return NULL;
1417 newoperator = get_operator(CHILD(n, 1));
1418 if (!newoperator)
1419 return NULL;
1421 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1422 c->c_arena);
1423 if (!result)
1424 return NULL;
1426 nops = (NCH(n) - 1) / 2;
1427 for (i = 1; i < nops; i++) {
1428 expr_ty tmp_result, tmp;
1429 const node* next_oper = CHILD(n, i * 2 + 1);
1431 newoperator = get_operator(next_oper);
1432 if (!newoperator)
1433 return NULL;
1435 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1436 if (!tmp)
1437 return NULL;
1439 tmp_result = BinOp(result, newoperator, tmp,
1440 LINENO(next_oper), next_oper->n_col_offset,
1441 c->c_arena);
1442 if (!tmp)
1443 return NULL;
1444 result = tmp_result;
1446 return result;
1449 static expr_ty
1450 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1452 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1453 subscriptlist: subscript (',' subscript)* [',']
1454 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1456 REQ(n, trailer);
1457 if (TYPE(CHILD(n, 0)) == LPAR) {
1458 if (NCH(n) == 2)
1459 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1460 n->n_col_offset, c->c_arena);
1461 else
1462 return ast_for_call(c, CHILD(n, 1), left_expr);
1464 else if (TYPE(CHILD(n, 0)) == DOT ) {
1465 return Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load,
1466 LINENO(n), n->n_col_offset, c->c_arena);
1468 else {
1469 REQ(CHILD(n, 0), LSQB);
1470 REQ(CHILD(n, 2), RSQB);
1471 n = CHILD(n, 1);
1472 if (NCH(n) == 1) {
1473 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1474 if (!slc)
1475 return NULL;
1476 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1477 c->c_arena);
1479 else {
1480 /* The grammar is ambiguous here. The ambiguity is resolved
1481 by treating the sequence as a tuple literal if there are
1482 no slice features.
1484 int j;
1485 slice_ty slc;
1486 expr_ty e;
1487 bool simple = true;
1488 asdl_seq *slices, *elts;
1489 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1490 if (!slices)
1491 return NULL;
1492 for (j = 0; j < NCH(n); j += 2) {
1493 slc = ast_for_slice(c, CHILD(n, j));
1494 if (!slc)
1495 return NULL;
1496 if (slc->kind != Index_kind)
1497 simple = false;
1498 asdl_seq_SET(slices, j / 2, slc);
1500 if (!simple) {
1501 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1502 Load, LINENO(n), n->n_col_offset, c->c_arena);
1504 /* extract Index values and put them in a Tuple */
1505 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1506 if (!elts)
1507 return NULL;
1508 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1509 slc = (slice_ty)asdl_seq_GET(slices, j);
1510 assert(slc->kind == Index_kind && slc->v.Index.value);
1511 asdl_seq_SET(elts, j, slc->v.Index.value);
1513 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1514 if (!e)
1515 return NULL;
1516 return Subscript(left_expr, Index(e, c->c_arena),
1517 Load, LINENO(n), n->n_col_offset, c->c_arena);
1522 static expr_ty
1523 ast_for_factor(struct compiling *c, const node *n)
1525 node *pfactor, *ppower, *patom, *pnum;
1526 expr_ty expression;
1528 /* If the unary - operator is applied to a constant, don't generate
1529 a UNARY_NEGATIVE opcode. Just store the approriate value as a
1530 constant. The peephole optimizer already does something like
1531 this but it doesn't handle the case where the constant is
1532 (sys.maxint - 1). In that case, we want a PyIntObject, not a
1533 PyLongObject.
1535 if (TYPE(CHILD(n, 0)) == MINUS
1536 && NCH(n) == 2
1537 && TYPE((pfactor = CHILD(n, 1))) == factor
1538 && NCH(pfactor) == 1
1539 && TYPE((ppower = CHILD(pfactor, 0))) == power
1540 && NCH(ppower) == 1
1541 && TYPE((patom = CHILD(ppower, 0))) == atom
1542 && TYPE((pnum = CHILD(patom, 0))) == NUMBER) {
1543 char *s = PyObject_MALLOC(strlen(STR(pnum)) + 2);
1544 if (s == NULL)
1545 return NULL;
1546 s[0] = '-';
1547 strcpy(s + 1, STR(pnum));
1548 PyObject_FREE(STR(pnum));
1549 STR(pnum) = s;
1550 return ast_for_atom(c, patom);
1553 expression = ast_for_expr(c, CHILD(n, 1));
1554 if (!expression)
1555 return NULL;
1557 switch (TYPE(CHILD(n, 0))) {
1558 case PLUS:
1559 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset,
1560 c->c_arena);
1561 case MINUS:
1562 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset,
1563 c->c_arena);
1564 case TILDE:
1565 return UnaryOp(Invert, expression, LINENO(n),
1566 n->n_col_offset, c->c_arena);
1568 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1569 TYPE(CHILD(n, 0)));
1570 return NULL;
1573 static expr_ty
1574 ast_for_power(struct compiling *c, const node *n)
1576 /* power: atom trailer* ('**' factor)*
1578 int i;
1579 expr_ty e, tmp;
1580 REQ(n, power);
1581 e = ast_for_atom(c, CHILD(n, 0));
1582 if (!e)
1583 return NULL;
1584 if (NCH(n) == 1)
1585 return e;
1586 for (i = 1; i < NCH(n); i++) {
1587 node *ch = CHILD(n, i);
1588 if (TYPE(ch) != trailer)
1589 break;
1590 tmp = ast_for_trailer(c, ch, e);
1591 if (!tmp)
1592 return NULL;
1593 tmp->lineno = e->lineno;
1594 tmp->col_offset = e->col_offset;
1595 e = tmp;
1597 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1598 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1599 if (!f)
1600 return NULL;
1601 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1602 if (!tmp)
1603 return NULL;
1604 e = tmp;
1606 return e;
1609 /* Do not name a variable 'expr'! Will cause a compile error.
1612 static expr_ty
1613 ast_for_expr(struct compiling *c, const node *n)
1615 /* handle the full range of simple expressions
1616 test: or_test ['if' or_test 'else' test] | lambdef
1617 or_test: and_test ('or' and_test)*
1618 and_test: not_test ('and' not_test)*
1619 not_test: 'not' not_test | comparison
1620 comparison: expr (comp_op expr)*
1621 expr: xor_expr ('|' xor_expr)*
1622 xor_expr: and_expr ('^' and_expr)*
1623 and_expr: shift_expr ('&' shift_expr)*
1624 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1625 arith_expr: term (('+'|'-') term)*
1626 term: factor (('*'|'/'|'%'|'//') factor)*
1627 factor: ('+'|'-'|'~') factor | power
1628 power: atom trailer* ('**' factor)*
1630 As well as modified versions that exist for backward compatibility,
1631 to explicitly allow:
1632 [ x for x in lambda: 0, lambda: 1 ]
1633 (which would be ambiguous without these extra rules)
1635 old_test: or_test | old_lambdef
1636 old_lambdef: 'lambda' [vararglist] ':' old_test
1640 asdl_seq *seq;
1641 int i;
1643 loop:
1644 switch (TYPE(n)) {
1645 case test:
1646 case old_test:
1647 if (TYPE(CHILD(n, 0)) == lambdef ||
1648 TYPE(CHILD(n, 0)) == old_lambdef)
1649 return ast_for_lambdef(c, CHILD(n, 0));
1650 else if (NCH(n) > 1)
1651 return ast_for_ifexpr(c, n);
1652 /* Fallthrough */
1653 case or_test:
1654 case and_test:
1655 if (NCH(n) == 1) {
1656 n = CHILD(n, 0);
1657 goto loop;
1659 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1660 if (!seq)
1661 return NULL;
1662 for (i = 0; i < NCH(n); i += 2) {
1663 expr_ty e = ast_for_expr(c, CHILD(n, i));
1664 if (!e)
1665 return NULL;
1666 asdl_seq_SET(seq, i / 2, e);
1668 if (!strcmp(STR(CHILD(n, 1)), "and"))
1669 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1670 c->c_arena);
1671 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1672 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1673 case not_test:
1674 if (NCH(n) == 1) {
1675 n = CHILD(n, 0);
1676 goto loop;
1678 else {
1679 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1680 if (!expression)
1681 return NULL;
1683 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1684 c->c_arena);
1686 case comparison:
1687 if (NCH(n) == 1) {
1688 n = CHILD(n, 0);
1689 goto loop;
1691 else {
1692 expr_ty expression;
1693 asdl_int_seq *ops;
1694 asdl_seq *cmps;
1695 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1696 if (!ops)
1697 return NULL;
1698 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1699 if (!cmps) {
1700 return NULL;
1702 for (i = 1; i < NCH(n); i += 2) {
1703 cmpop_ty newoperator;
1705 newoperator = ast_for_comp_op(CHILD(n, i));
1706 if (!newoperator) {
1707 return NULL;
1710 expression = ast_for_expr(c, CHILD(n, i + 1));
1711 if (!expression) {
1712 return NULL;
1715 asdl_seq_SET(ops, i / 2, newoperator);
1716 asdl_seq_SET(cmps, i / 2, expression);
1718 expression = ast_for_expr(c, CHILD(n, 0));
1719 if (!expression) {
1720 return NULL;
1723 return Compare(expression, ops, cmps, LINENO(n),
1724 n->n_col_offset, c->c_arena);
1726 break;
1728 /* The next five cases all handle BinOps. The main body of code
1729 is the same in each case, but the switch turned inside out to
1730 reuse the code for each type of operator.
1732 case expr:
1733 case xor_expr:
1734 case and_expr:
1735 case shift_expr:
1736 case arith_expr:
1737 case term:
1738 if (NCH(n) == 1) {
1739 n = CHILD(n, 0);
1740 goto loop;
1742 return ast_for_binop(c, n);
1743 case yield_expr: {
1744 expr_ty exp = NULL;
1745 if (NCH(n) == 2) {
1746 exp = ast_for_testlist(c, CHILD(n, 1));
1747 if (!exp)
1748 return NULL;
1750 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1752 case factor:
1753 if (NCH(n) == 1) {
1754 n = CHILD(n, 0);
1755 goto loop;
1757 return ast_for_factor(c, n);
1758 case power:
1759 return ast_for_power(c, n);
1760 default:
1761 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1762 return NULL;
1764 /* should never get here unless if error is set */
1765 return NULL;
1768 static expr_ty
1769 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1772 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1773 | '**' test)
1774 argument: [test '='] test [gen_for] # Really [keyword '='] test
1777 int i, nargs, nkeywords, ngens;
1778 asdl_seq *args;
1779 asdl_seq *keywords;
1780 expr_ty vararg = NULL, kwarg = NULL;
1782 REQ(n, arglist);
1784 nargs = 0;
1785 nkeywords = 0;
1786 ngens = 0;
1787 for (i = 0; i < NCH(n); i++) {
1788 node *ch = CHILD(n, i);
1789 if (TYPE(ch) == argument) {
1790 if (NCH(ch) == 1)
1791 nargs++;
1792 else if (TYPE(CHILD(ch, 1)) == gen_for)
1793 ngens++;
1794 else
1795 nkeywords++;
1798 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
1799 ast_error(n, "Generator expression must be parenthesized "
1800 "if not sole argument");
1801 return NULL;
1804 if (nargs + nkeywords + ngens > 255) {
1805 ast_error(n, "more than 255 arguments");
1806 return NULL;
1809 args = asdl_seq_new(nargs + ngens, c->c_arena);
1810 if (!args)
1811 return NULL;
1812 keywords = asdl_seq_new(nkeywords, c->c_arena);
1813 if (!keywords)
1814 return NULL;
1815 nargs = 0;
1816 nkeywords = 0;
1817 for (i = 0; i < NCH(n); i++) {
1818 node *ch = CHILD(n, i);
1819 if (TYPE(ch) == argument) {
1820 expr_ty e;
1821 if (NCH(ch) == 1) {
1822 if (nkeywords) {
1823 ast_error(CHILD(ch, 0),
1824 "non-keyword arg after keyword arg");
1825 return NULL;
1827 e = ast_for_expr(c, CHILD(ch, 0));
1828 if (!e)
1829 return NULL;
1830 asdl_seq_SET(args, nargs++, e);
1832 else if (TYPE(CHILD(ch, 1)) == gen_for) {
1833 e = ast_for_genexp(c, ch);
1834 if (!e)
1835 return NULL;
1836 asdl_seq_SET(args, nargs++, e);
1838 else {
1839 keyword_ty kw;
1840 identifier key;
1842 /* CHILD(ch, 0) is test, but must be an identifier? */
1843 e = ast_for_expr(c, CHILD(ch, 0));
1844 if (!e)
1845 return NULL;
1846 /* f(lambda x: x[0] = 3) ends up getting parsed with
1847 * LHS test = lambda x: x[0], and RHS test = 3.
1848 * SF bug 132313 points out that complaining about a keyword
1849 * then is very confusing.
1851 if (e->kind == Lambda_kind) {
1852 ast_error(CHILD(ch, 0), "lambda cannot contain assignment");
1853 return NULL;
1854 } else if (e->kind != Name_kind) {
1855 ast_error(CHILD(ch, 0), "keyword can't be an expression");
1856 return NULL;
1858 key = e->v.Name.id;
1859 e = ast_for_expr(c, CHILD(ch, 2));
1860 if (!e)
1861 return NULL;
1862 kw = keyword(key, e, c->c_arena);
1863 if (!kw)
1864 return NULL;
1865 asdl_seq_SET(keywords, nkeywords++, kw);
1868 else if (TYPE(ch) == STAR) {
1869 vararg = ast_for_expr(c, CHILD(n, i+1));
1870 i++;
1872 else if (TYPE(ch) == DOUBLESTAR) {
1873 kwarg = ast_for_expr(c, CHILD(n, i+1));
1874 i++;
1878 return Call(func, args, keywords, vararg, kwarg, func->lineno, func->col_offset, c->c_arena);
1881 static expr_ty
1882 ast_for_testlist(struct compiling *c, const node* n)
1884 /* testlist_gexp: test (',' test)* [','] */
1885 /* testlist: test (',' test)* [','] */
1886 /* testlist_safe: test (',' test)+ [','] */
1887 /* testlist1: test (',' test)* */
1888 assert(NCH(n) > 0);
1889 if (TYPE(n) == testlist_gexp) {
1890 if (NCH(n) > 1)
1891 assert(TYPE(CHILD(n, 1)) != gen_for);
1893 else {
1894 assert(TYPE(n) == testlist ||
1895 TYPE(n) == testlist_safe ||
1896 TYPE(n) == testlist1);
1898 if (NCH(n) == 1)
1899 return ast_for_expr(c, CHILD(n, 0));
1900 else {
1901 asdl_seq *tmp = seq_for_testlist(c, n);
1902 if (!tmp)
1903 return NULL;
1904 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
1908 static expr_ty
1909 ast_for_testlist_gexp(struct compiling *c, const node* n)
1911 /* testlist_gexp: test ( gen_for | (',' test)* [','] ) */
1912 /* argument: test [ gen_for ] */
1913 assert(TYPE(n) == testlist_gexp || TYPE(n) == argument);
1914 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for)
1915 return ast_for_genexp(c, n);
1916 return ast_for_testlist(c, n);
1919 /* like ast_for_testlist() but returns a sequence */
1920 static asdl_seq*
1921 ast_for_class_bases(struct compiling *c, const node* n)
1923 /* testlist: test (',' test)* [','] */
1924 assert(NCH(n) > 0);
1925 REQ(n, testlist);
1926 if (NCH(n) == 1) {
1927 expr_ty base;
1928 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
1929 if (!bases)
1930 return NULL;
1931 base = ast_for_expr(c, CHILD(n, 0));
1932 if (!base)
1933 return NULL;
1934 asdl_seq_SET(bases, 0, base);
1935 return bases;
1938 return seq_for_testlist(c, n);
1941 static stmt_ty
1942 ast_for_expr_stmt(struct compiling *c, const node *n)
1944 REQ(n, expr_stmt);
1945 /* expr_stmt: testlist (augassign (yield_expr|testlist)
1946 | ('=' (yield_expr|testlist))*)
1947 testlist: test (',' test)* [',']
1948 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
1949 | '<<=' | '>>=' | '**=' | '//='
1950 test: ... here starts the operator precendence dance
1953 if (NCH(n) == 1) {
1954 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
1955 if (!e)
1956 return NULL;
1958 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
1960 else if (TYPE(CHILD(n, 1)) == augassign) {
1961 expr_ty expr1, expr2;
1962 operator_ty newoperator;
1963 node *ch = CHILD(n, 0);
1965 expr1 = ast_for_testlist(c, ch);
1966 if (!expr1)
1967 return NULL;
1968 /* TODO(nas): Remove duplicated error checks (set_context does it) */
1969 switch (expr1->kind) {
1970 case GeneratorExp_kind:
1971 ast_error(ch, "augmented assignment to generator "
1972 "expression not possible");
1973 return NULL;
1974 case Yield_kind:
1975 ast_error(ch, "augmented assignment to yield "
1976 "expression not possible");
1977 return NULL;
1978 case Name_kind: {
1979 const char *var_name = PyString_AS_STRING(expr1->v.Name.id);
1980 if (var_name[0] == 'N' && !strcmp(var_name, "None")) {
1981 ast_error(ch, "assignment to None");
1982 return NULL;
1984 break;
1986 case Attribute_kind:
1987 case Subscript_kind:
1988 break;
1989 default:
1990 ast_error(ch, "illegal expression for augmented "
1991 "assignment");
1992 return NULL;
1994 set_context(expr1, Store, ch);
1996 ch = CHILD(n, 2);
1997 if (TYPE(ch) == testlist)
1998 expr2 = ast_for_testlist(c, ch);
1999 else
2000 expr2 = ast_for_expr(c, ch);
2001 if (!expr2)
2002 return NULL;
2004 newoperator = ast_for_augassign(CHILD(n, 1));
2005 if (!newoperator)
2006 return NULL;
2008 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2010 else {
2011 int i;
2012 asdl_seq *targets;
2013 node *value;
2014 expr_ty expression;
2016 /* a normal assignment */
2017 REQ(CHILD(n, 1), EQUAL);
2018 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
2019 if (!targets)
2020 return NULL;
2021 for (i = 0; i < NCH(n) - 2; i += 2) {
2022 expr_ty e;
2023 node *ch = CHILD(n, i);
2024 if (TYPE(ch) == yield_expr) {
2025 ast_error(ch, "assignment to yield expression not possible");
2026 return NULL;
2028 e = ast_for_testlist(c, ch);
2030 /* set context to assign */
2031 if (!e)
2032 return NULL;
2034 if (!set_context(e, Store, CHILD(n, i)))
2035 return NULL;
2037 asdl_seq_SET(targets, i / 2, e);
2039 value = CHILD(n, NCH(n) - 1);
2040 if (TYPE(value) == testlist)
2041 expression = ast_for_testlist(c, value);
2042 else
2043 expression = ast_for_expr(c, value);
2044 if (!expression)
2045 return NULL;
2046 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
2050 static stmt_ty
2051 ast_for_print_stmt(struct compiling *c, const node *n)
2053 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
2054 | '>>' test [ (',' test)+ [','] ] )
2056 expr_ty dest = NULL, expression;
2057 asdl_seq *seq;
2058 bool nl;
2059 int i, j, start = 1;
2061 REQ(n, print_stmt);
2062 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
2063 dest = ast_for_expr(c, CHILD(n, 2));
2064 if (!dest)
2065 return NULL;
2066 start = 4;
2068 seq = asdl_seq_new((NCH(n) + 1 - start) / 2, c->c_arena);
2069 if (!seq)
2070 return NULL;
2071 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2072 expression = ast_for_expr(c, CHILD(n, i));
2073 if (!expression)
2074 return NULL;
2075 asdl_seq_SET(seq, j, expression);
2077 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2078 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2081 static asdl_seq *
2082 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2084 asdl_seq *seq;
2085 int i;
2086 expr_ty e;
2088 REQ(n, exprlist);
2090 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2091 if (!seq)
2092 return NULL;
2093 for (i = 0; i < NCH(n); i += 2) {
2094 e = ast_for_expr(c, CHILD(n, i));
2095 if (!e)
2096 return NULL;
2097 asdl_seq_SET(seq, i / 2, e);
2098 if (context && !set_context(e, context, CHILD(n, i)))
2099 return NULL;
2101 return seq;
2104 static stmt_ty
2105 ast_for_del_stmt(struct compiling *c, const node *n)
2107 asdl_seq *expr_list;
2109 /* del_stmt: 'del' exprlist */
2110 REQ(n, del_stmt);
2112 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2113 if (!expr_list)
2114 return NULL;
2115 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2118 static stmt_ty
2119 ast_for_flow_stmt(struct compiling *c, const node *n)
2122 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2123 | yield_stmt
2124 break_stmt: 'break'
2125 continue_stmt: 'continue'
2126 return_stmt: 'return' [testlist]
2127 yield_stmt: yield_expr
2128 yield_expr: 'yield' testlist
2129 raise_stmt: 'raise' [test [',' test [',' test]]]
2131 node *ch;
2133 REQ(n, flow_stmt);
2134 ch = CHILD(n, 0);
2135 switch (TYPE(ch)) {
2136 case break_stmt:
2137 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2138 case continue_stmt:
2139 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2140 case yield_stmt: { /* will reduce to yield_expr */
2141 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2142 if (!exp)
2143 return NULL;
2144 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2146 case return_stmt:
2147 if (NCH(ch) == 1)
2148 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2149 else {
2150 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2151 if (!expression)
2152 return NULL;
2153 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
2155 case raise_stmt:
2156 if (NCH(ch) == 1)
2157 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2158 else if (NCH(ch) == 2) {
2159 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2160 if (!expression)
2161 return NULL;
2162 return Raise(expression, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2164 else if (NCH(ch) == 4) {
2165 expr_ty expr1, expr2;
2167 expr1 = ast_for_expr(c, CHILD(ch, 1));
2168 if (!expr1)
2169 return NULL;
2170 expr2 = ast_for_expr(c, CHILD(ch, 3));
2171 if (!expr2)
2172 return NULL;
2174 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2176 else if (NCH(ch) == 6) {
2177 expr_ty expr1, expr2, expr3;
2179 expr1 = ast_for_expr(c, CHILD(ch, 1));
2180 if (!expr1)
2181 return NULL;
2182 expr2 = ast_for_expr(c, CHILD(ch, 3));
2183 if (!expr2)
2184 return NULL;
2185 expr3 = ast_for_expr(c, CHILD(ch, 5));
2186 if (!expr3)
2187 return NULL;
2189 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset, c->c_arena);
2191 default:
2192 PyErr_Format(PyExc_SystemError,
2193 "unexpected flow_stmt: %d", TYPE(ch));
2194 return NULL;
2197 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2198 return NULL;
2201 static alias_ty
2202 alias_for_import_name(struct compiling *c, const node *n)
2205 import_as_name: NAME ['as' NAME]
2206 dotted_as_name: dotted_name ['as' NAME]
2207 dotted_name: NAME ('.' NAME)*
2209 PyObject *str;
2211 loop:
2212 switch (TYPE(n)) {
2213 case import_as_name:
2214 str = NULL;
2215 if (NCH(n) == 3) {
2216 str = NEW_IDENTIFIER(CHILD(n, 2));
2218 return alias(NEW_IDENTIFIER(CHILD(n, 0)), str, c->c_arena);
2219 case dotted_as_name:
2220 if (NCH(n) == 1) {
2221 n = CHILD(n, 0);
2222 goto loop;
2224 else {
2225 alias_ty a = alias_for_import_name(c, CHILD(n, 0));
2226 if (!a)
2227 return NULL;
2228 assert(!a->asname);
2229 a->asname = NEW_IDENTIFIER(CHILD(n, 2));
2230 return a;
2232 break;
2233 case dotted_name:
2234 if (NCH(n) == 1)
2235 return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena);
2236 else {
2237 /* Create a string of the form "a.b.c" */
2238 int i;
2239 size_t len;
2240 char *s;
2242 len = 0;
2243 for (i = 0; i < NCH(n); i += 2)
2244 /* length of string plus one for the dot */
2245 len += strlen(STR(CHILD(n, i))) + 1;
2246 len--; /* the last name doesn't have a dot */
2247 str = PyString_FromStringAndSize(NULL, len);
2248 if (!str)
2249 return NULL;
2250 s = PyString_AS_STRING(str);
2251 if (!s)
2252 return NULL;
2253 for (i = 0; i < NCH(n); i += 2) {
2254 char *sch = STR(CHILD(n, i));
2255 strcpy(s, STR(CHILD(n, i)));
2256 s += strlen(sch);
2257 *s++ = '.';
2259 --s;
2260 *s = '\0';
2261 PyString_InternInPlace(&str);
2262 PyArena_AddPyObject(c->c_arena, str);
2263 return alias(str, NULL, c->c_arena);
2265 break;
2266 case STAR:
2267 str = PyString_InternFromString("*");
2268 PyArena_AddPyObject(c->c_arena, str);
2269 return alias(str, NULL, c->c_arena);
2270 default:
2271 PyErr_Format(PyExc_SystemError,
2272 "unexpected import name: %d", TYPE(n));
2273 return NULL;
2276 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2277 return NULL;
2280 static stmt_ty
2281 ast_for_import_stmt(struct compiling *c, const node *n)
2284 import_stmt: import_name | import_from
2285 import_name: 'import' dotted_as_names
2286 import_from: 'from' ('.'* dotted_name | '.') 'import'
2287 ('*' | '(' import_as_names ')' | import_as_names)
2289 int lineno;
2290 int col_offset;
2291 int i;
2292 asdl_seq *aliases;
2294 REQ(n, import_stmt);
2295 lineno = LINENO(n);
2296 col_offset = n->n_col_offset;
2297 n = CHILD(n, 0);
2298 if (TYPE(n) == import_name) {
2299 n = CHILD(n, 1);
2300 REQ(n, dotted_as_names);
2301 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2302 if (!aliases)
2303 return NULL;
2304 for (i = 0; i < NCH(n); i += 2) {
2305 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2306 if (!import_alias)
2307 return NULL;
2308 asdl_seq_SET(aliases, i / 2, import_alias);
2310 return Import(aliases, lineno, col_offset, c->c_arena);
2312 else if (TYPE(n) == import_from) {
2313 int n_children;
2314 int idx, ndots = 0;
2315 alias_ty mod = NULL;
2316 identifier modname;
2318 /* Count the number of dots (for relative imports) and check for the
2319 optional module name */
2320 for (idx = 1; idx < NCH(n); idx++) {
2321 if (TYPE(CHILD(n, idx)) == dotted_name) {
2322 mod = alias_for_import_name(c, CHILD(n, idx));
2323 idx++;
2324 break;
2325 } else if (TYPE(CHILD(n, idx)) != DOT) {
2326 break;
2328 ndots++;
2330 idx++; /* skip over the 'import' keyword */
2331 switch (TYPE(CHILD(n, idx))) {
2332 case STAR:
2333 /* from ... import * */
2334 n = CHILD(n, idx);
2335 n_children = 1;
2336 if (ndots) {
2337 ast_error(n, "'import *' not allowed with 'from .'");
2338 return NULL;
2340 break;
2341 case LPAR:
2342 /* from ... import (x, y, z) */
2343 n = CHILD(n, idx + 1);
2344 n_children = NCH(n);
2345 break;
2346 case import_as_names:
2347 /* from ... import x, y, z */
2348 n = CHILD(n, idx);
2349 n_children = NCH(n);
2350 if (n_children % 2 == 0) {
2351 ast_error(n, "trailing comma not allowed without"
2352 " surrounding parentheses");
2353 return NULL;
2355 break;
2356 default:
2357 ast_error(n, "Unexpected node-type in from-import");
2358 return NULL;
2361 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2362 if (!aliases)
2363 return NULL;
2365 /* handle "from ... import *" special b/c there's no children */
2366 if (TYPE(n) == STAR) {
2367 alias_ty import_alias = alias_for_import_name(c, n);
2368 if (!import_alias)
2369 return NULL;
2370 asdl_seq_SET(aliases, 0, import_alias);
2372 else {
2373 for (i = 0; i < NCH(n); i += 2) {
2374 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2375 if (!import_alias)
2376 return NULL;
2377 asdl_seq_SET(aliases, i / 2, import_alias);
2380 if (mod != NULL)
2381 modname = mod->name;
2382 else
2383 modname = new_identifier("", c->c_arena);
2384 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2385 c->c_arena);
2387 PyErr_Format(PyExc_SystemError,
2388 "unknown import statement: starts with command '%s'",
2389 STR(CHILD(n, 0)));
2390 return NULL;
2393 static stmt_ty
2394 ast_for_global_stmt(struct compiling *c, const node *n)
2396 /* global_stmt: 'global' NAME (',' NAME)* */
2397 identifier name;
2398 asdl_seq *s;
2399 int i;
2401 REQ(n, global_stmt);
2402 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2403 if (!s)
2404 return NULL;
2405 for (i = 1; i < NCH(n); i += 2) {
2406 name = NEW_IDENTIFIER(CHILD(n, i));
2407 if (!name)
2408 return NULL;
2409 asdl_seq_SET(s, i / 2, name);
2411 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2414 static stmt_ty
2415 ast_for_exec_stmt(struct compiling *c, const node *n)
2417 expr_ty expr1, globals = NULL, locals = NULL;
2418 int n_children = NCH(n);
2419 if (n_children != 2 && n_children != 4 && n_children != 6) {
2420 PyErr_Format(PyExc_SystemError,
2421 "poorly formed 'exec' statement: %d parts to statement",
2422 n_children);
2423 return NULL;
2426 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2427 REQ(n, exec_stmt);
2428 expr1 = ast_for_expr(c, CHILD(n, 1));
2429 if (!expr1)
2430 return NULL;
2431 if (n_children >= 4) {
2432 globals = ast_for_expr(c, CHILD(n, 3));
2433 if (!globals)
2434 return NULL;
2436 if (n_children == 6) {
2437 locals = ast_for_expr(c, CHILD(n, 5));
2438 if (!locals)
2439 return NULL;
2442 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset, c->c_arena);
2445 static stmt_ty
2446 ast_for_assert_stmt(struct compiling *c, const node *n)
2448 /* assert_stmt: 'assert' test [',' test] */
2449 REQ(n, assert_stmt);
2450 if (NCH(n) == 2) {
2451 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2452 if (!expression)
2453 return NULL;
2454 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2456 else if (NCH(n) == 4) {
2457 expr_ty expr1, expr2;
2459 expr1 = ast_for_expr(c, CHILD(n, 1));
2460 if (!expr1)
2461 return NULL;
2462 expr2 = ast_for_expr(c, CHILD(n, 3));
2463 if (!expr2)
2464 return NULL;
2466 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2468 PyErr_Format(PyExc_SystemError,
2469 "improper number of parts to 'assert' statement: %d",
2470 NCH(n));
2471 return NULL;
2474 static asdl_seq *
2475 ast_for_suite(struct compiling *c, const node *n)
2477 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2478 asdl_seq *seq;
2479 stmt_ty s;
2480 int i, total, num, end, pos = 0;
2481 node *ch;
2483 REQ(n, suite);
2485 total = num_stmts(n);
2486 seq = asdl_seq_new(total, c->c_arena);
2487 if (!seq)
2488 return NULL;
2489 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2490 n = CHILD(n, 0);
2491 /* simple_stmt always ends with a NEWLINE,
2492 and may have a trailing SEMI
2494 end = NCH(n) - 1;
2495 if (TYPE(CHILD(n, end - 1)) == SEMI)
2496 end--;
2497 /* loop by 2 to skip semi-colons */
2498 for (i = 0; i < end; i += 2) {
2499 ch = CHILD(n, i);
2500 s = ast_for_stmt(c, ch);
2501 if (!s)
2502 return NULL;
2503 asdl_seq_SET(seq, pos++, s);
2506 else {
2507 for (i = 2; i < (NCH(n) - 1); i++) {
2508 ch = CHILD(n, i);
2509 REQ(ch, stmt);
2510 num = num_stmts(ch);
2511 if (num == 1) {
2512 /* small_stmt or compound_stmt with only one child */
2513 s = ast_for_stmt(c, ch);
2514 if (!s)
2515 return NULL;
2516 asdl_seq_SET(seq, pos++, s);
2518 else {
2519 int j;
2520 ch = CHILD(ch, 0);
2521 REQ(ch, simple_stmt);
2522 for (j = 0; j < NCH(ch); j += 2) {
2523 /* statement terminates with a semi-colon ';' */
2524 if (NCH(CHILD(ch, j)) == 0) {
2525 assert((j + 1) == NCH(ch));
2526 break;
2528 s = ast_for_stmt(c, CHILD(ch, j));
2529 if (!s)
2530 return NULL;
2531 asdl_seq_SET(seq, pos++, s);
2536 assert(pos == seq->size);
2537 return seq;
2540 static stmt_ty
2541 ast_for_if_stmt(struct compiling *c, const node *n)
2543 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2544 ['else' ':' suite]
2546 char *s;
2548 REQ(n, if_stmt);
2550 if (NCH(n) == 4) {
2551 expr_ty expression;
2552 asdl_seq *suite_seq;
2554 expression = ast_for_expr(c, CHILD(n, 1));
2555 if (!expression)
2556 return NULL;
2557 suite_seq = ast_for_suite(c, CHILD(n, 3));
2558 if (!suite_seq)
2559 return NULL;
2561 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2564 s = STR(CHILD(n, 4));
2565 /* s[2], the third character in the string, will be
2566 's' for el_s_e, or
2567 'i' for el_i_f
2569 if (s[2] == 's') {
2570 expr_ty expression;
2571 asdl_seq *seq1, *seq2;
2573 expression = ast_for_expr(c, CHILD(n, 1));
2574 if (!expression)
2575 return NULL;
2576 seq1 = ast_for_suite(c, CHILD(n, 3));
2577 if (!seq1)
2578 return NULL;
2579 seq2 = ast_for_suite(c, CHILD(n, 6));
2580 if (!seq2)
2581 return NULL;
2583 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2585 else if (s[2] == 'i') {
2586 int i, n_elif, has_else = 0;
2587 asdl_seq *orelse = NULL;
2588 n_elif = NCH(n) - 4;
2589 /* must reference the child n_elif+1 since 'else' token is third,
2590 not fourth, child from the end. */
2591 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2592 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2593 has_else = 1;
2594 n_elif -= 3;
2596 n_elif /= 4;
2598 if (has_else) {
2599 expr_ty expression;
2600 asdl_seq *seq1, *seq2;
2602 orelse = asdl_seq_new(1, c->c_arena);
2603 if (!orelse)
2604 return NULL;
2605 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2606 if (!expression)
2607 return NULL;
2608 seq1 = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2609 if (!seq1)
2610 return NULL;
2611 seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2612 if (!seq2)
2613 return NULL;
2615 asdl_seq_SET(orelse, 0, If(expression, seq1, seq2,
2616 LINENO(CHILD(n, NCH(n) - 6)), CHILD(n, NCH(n) - 6)->n_col_offset,
2617 c->c_arena));
2618 /* the just-created orelse handled the last elif */
2619 n_elif--;
2622 for (i = 0; i < n_elif; i++) {
2623 int off = 5 + (n_elif - i - 1) * 4;
2624 expr_ty expression;
2625 asdl_seq *suite_seq;
2626 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2627 if (!newobj)
2628 return NULL;
2629 expression = ast_for_expr(c, CHILD(n, off));
2630 if (!expression)
2631 return NULL;
2632 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2633 if (!suite_seq)
2634 return NULL;
2636 asdl_seq_SET(newobj, 0,
2637 If(expression, suite_seq, orelse,
2638 LINENO(CHILD(n, off)), CHILD(n, off)->n_col_offset, c->c_arena));
2639 orelse = newobj;
2641 return If(ast_for_expr(c, CHILD(n, 1)),
2642 ast_for_suite(c, CHILD(n, 3)),
2643 orelse, LINENO(n), n->n_col_offset, c->c_arena);
2646 PyErr_Format(PyExc_SystemError,
2647 "unexpected token in 'if' statement: %s", s);
2648 return NULL;
2651 static stmt_ty
2652 ast_for_while_stmt(struct compiling *c, const node *n)
2654 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2655 REQ(n, while_stmt);
2657 if (NCH(n) == 4) {
2658 expr_ty expression;
2659 asdl_seq *suite_seq;
2661 expression = ast_for_expr(c, CHILD(n, 1));
2662 if (!expression)
2663 return NULL;
2664 suite_seq = ast_for_suite(c, CHILD(n, 3));
2665 if (!suite_seq)
2666 return NULL;
2667 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2669 else if (NCH(n) == 7) {
2670 expr_ty expression;
2671 asdl_seq *seq1, *seq2;
2673 expression = ast_for_expr(c, CHILD(n, 1));
2674 if (!expression)
2675 return NULL;
2676 seq1 = ast_for_suite(c, CHILD(n, 3));
2677 if (!seq1)
2678 return NULL;
2679 seq2 = ast_for_suite(c, CHILD(n, 6));
2680 if (!seq2)
2681 return NULL;
2683 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2686 PyErr_Format(PyExc_SystemError,
2687 "wrong number of tokens for 'while' statement: %d",
2688 NCH(n));
2689 return NULL;
2692 static stmt_ty
2693 ast_for_for_stmt(struct compiling *c, const node *n)
2695 asdl_seq *_target, *seq = NULL, *suite_seq;
2696 expr_ty expression;
2697 expr_ty target;
2698 const node *node_target;
2699 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2700 REQ(n, for_stmt);
2702 if (NCH(n) == 9) {
2703 seq = ast_for_suite(c, CHILD(n, 8));
2704 if (!seq)
2705 return NULL;
2708 node_target = CHILD(n, 1);
2709 _target = ast_for_exprlist(c, node_target, Store);
2710 if (!_target)
2711 return NULL;
2712 /* Check the # of children rather than the length of _target, since
2713 for x, in ... has 1 element in _target, but still requires a Tuple. */
2714 if (NCH(node_target) == 1)
2715 target = (expr_ty)asdl_seq_GET(_target, 0);
2716 else
2717 target = Tuple(_target, Store, LINENO(n), n->n_col_offset, c->c_arena);
2719 expression = ast_for_testlist(c, CHILD(n, 3));
2720 if (!expression)
2721 return NULL;
2722 suite_seq = ast_for_suite(c, CHILD(n, 5));
2723 if (!suite_seq)
2724 return NULL;
2726 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
2727 c->c_arena);
2730 static excepthandler_ty
2731 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
2733 /* except_clause: 'except' [test [',' test]] */
2734 REQ(exc, except_clause);
2735 REQ(body, suite);
2737 if (NCH(exc) == 1) {
2738 asdl_seq *suite_seq = ast_for_suite(c, body);
2739 if (!suite_seq)
2740 return NULL;
2742 return excepthandler(NULL, NULL, suite_seq, LINENO(exc),
2743 exc->n_col_offset, c->c_arena);
2745 else if (NCH(exc) == 2) {
2746 expr_ty expression;
2747 asdl_seq *suite_seq;
2749 expression = ast_for_expr(c, CHILD(exc, 1));
2750 if (!expression)
2751 return NULL;
2752 suite_seq = ast_for_suite(c, body);
2753 if (!suite_seq)
2754 return NULL;
2756 return excepthandler(expression, NULL, suite_seq, LINENO(exc),
2757 exc->n_col_offset, c->c_arena);
2759 else if (NCH(exc) == 4) {
2760 asdl_seq *suite_seq;
2761 expr_ty expression;
2762 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
2763 if (!e)
2764 return NULL;
2765 if (!set_context(e, Store, CHILD(exc, 3)))
2766 return NULL;
2767 expression = ast_for_expr(c, CHILD(exc, 1));
2768 if (!expression)
2769 return NULL;
2770 suite_seq = ast_for_suite(c, body);
2771 if (!suite_seq)
2772 return NULL;
2774 return excepthandler(expression, e, suite_seq, LINENO(exc),
2775 exc->n_col_offset, c->c_arena);
2778 PyErr_Format(PyExc_SystemError,
2779 "wrong number of children for 'except' clause: %d",
2780 NCH(exc));
2781 return NULL;
2784 static stmt_ty
2785 ast_for_try_stmt(struct compiling *c, const node *n)
2787 const int nch = NCH(n);
2788 int n_except = (nch - 3)/3;
2789 asdl_seq *body, *orelse = NULL, *finally = NULL;
2791 REQ(n, try_stmt);
2793 body = ast_for_suite(c, CHILD(n, 2));
2794 if (body == NULL)
2795 return NULL;
2797 if (TYPE(CHILD(n, nch - 3)) == NAME) {
2798 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
2799 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
2800 /* we can assume it's an "else",
2801 because nch >= 9 for try-else-finally and
2802 it would otherwise have a type of except_clause */
2803 orelse = ast_for_suite(c, CHILD(n, nch - 4));
2804 if (orelse == NULL)
2805 return NULL;
2806 n_except--;
2809 finally = ast_for_suite(c, CHILD(n, nch - 1));
2810 if (finally == NULL)
2811 return NULL;
2812 n_except--;
2814 else {
2815 /* we can assume it's an "else",
2816 otherwise it would have a type of except_clause */
2817 orelse = ast_for_suite(c, CHILD(n, nch - 1));
2818 if (orelse == NULL)
2819 return NULL;
2820 n_except--;
2823 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
2824 ast_error(n, "malformed 'try' statement");
2825 return NULL;
2828 if (n_except > 0) {
2829 int i;
2830 stmt_ty except_st;
2831 /* process except statements to create a try ... except */
2832 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
2833 if (handlers == NULL)
2834 return NULL;
2836 for (i = 0; i < n_except; i++) {
2837 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
2838 CHILD(n, 5 + i * 3));
2839 if (!e)
2840 return NULL;
2841 asdl_seq_SET(handlers, i, e);
2844 except_st = TryExcept(body, handlers, orelse, LINENO(n),
2845 n->n_col_offset, c->c_arena);
2846 if (!finally)
2847 return except_st;
2849 /* if a 'finally' is present too, we nest the TryExcept within a
2850 TryFinally to emulate try ... except ... finally */
2851 body = asdl_seq_new(1, c->c_arena);
2852 if (body == NULL)
2853 return NULL;
2854 asdl_seq_SET(body, 0, except_st);
2857 /* must be a try ... finally (except clauses are in body, if any exist) */
2858 assert(finally != NULL);
2859 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
2862 static expr_ty
2863 ast_for_with_var(struct compiling *c, const node *n)
2865 REQ(n, with_var);
2866 return ast_for_expr(c, CHILD(n, 1));
2869 /* with_stmt: 'with' test [ with_var ] ':' suite */
2870 static stmt_ty
2871 ast_for_with_stmt(struct compiling *c, const node *n)
2873 expr_ty context_expr, optional_vars = NULL;
2874 int suite_index = 3; /* skip 'with', test, and ':' */
2875 asdl_seq *suite_seq;
2877 assert(TYPE(n) == with_stmt);
2878 context_expr = ast_for_expr(c, CHILD(n, 1));
2879 if (TYPE(CHILD(n, 2)) == with_var) {
2880 optional_vars = ast_for_with_var(c, CHILD(n, 2));
2882 if (!optional_vars) {
2883 return NULL;
2885 if (!set_context(optional_vars, Store, n)) {
2886 return NULL;
2888 suite_index = 4;
2891 suite_seq = ast_for_suite(c, CHILD(n, suite_index));
2892 if (!suite_seq) {
2893 return NULL;
2895 return With(context_expr, optional_vars, suite_seq, LINENO(n),
2896 n->n_col_offset, c->c_arena);
2899 static stmt_ty
2900 ast_for_classdef(struct compiling *c, const node *n)
2902 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
2903 asdl_seq *bases, *s;
2905 REQ(n, classdef);
2907 if (!strcmp(STR(CHILD(n, 1)), "None")) {
2908 ast_error(n, "assignment to None");
2909 return NULL;
2912 if (NCH(n) == 4) {
2913 s = ast_for_suite(c, CHILD(n, 3));
2914 if (!s)
2915 return NULL;
2916 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n),
2917 n->n_col_offset, c->c_arena);
2919 /* check for empty base list */
2920 if (TYPE(CHILD(n,3)) == RPAR) {
2921 s = ast_for_suite(c, CHILD(n,5));
2922 if (!s)
2923 return NULL;
2924 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n),
2925 n->n_col_offset, c->c_arena);
2928 /* else handle the base class list */
2929 bases = ast_for_class_bases(c, CHILD(n, 3));
2930 if (!bases)
2931 return NULL;
2933 s = ast_for_suite(c, CHILD(n, 6));
2934 if (!s)
2935 return NULL;
2936 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n),
2937 n->n_col_offset, c->c_arena);
2940 static stmt_ty
2941 ast_for_stmt(struct compiling *c, const node *n)
2943 if (TYPE(n) == stmt) {
2944 assert(NCH(n) == 1);
2945 n = CHILD(n, 0);
2947 if (TYPE(n) == simple_stmt) {
2948 assert(num_stmts(n) == 1);
2949 n = CHILD(n, 0);
2951 if (TYPE(n) == small_stmt) {
2952 REQ(n, small_stmt);
2953 n = CHILD(n, 0);
2954 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
2955 | flow_stmt | import_stmt | global_stmt | exec_stmt
2956 | assert_stmt
2958 switch (TYPE(n)) {
2959 case expr_stmt:
2960 return ast_for_expr_stmt(c, n);
2961 case print_stmt:
2962 return ast_for_print_stmt(c, n);
2963 case del_stmt:
2964 return ast_for_del_stmt(c, n);
2965 case pass_stmt:
2966 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
2967 case flow_stmt:
2968 return ast_for_flow_stmt(c, n);
2969 case import_stmt:
2970 return ast_for_import_stmt(c, n);
2971 case global_stmt:
2972 return ast_for_global_stmt(c, n);
2973 case exec_stmt:
2974 return ast_for_exec_stmt(c, n);
2975 case assert_stmt:
2976 return ast_for_assert_stmt(c, n);
2977 default:
2978 PyErr_Format(PyExc_SystemError,
2979 "unhandled small_stmt: TYPE=%d NCH=%d\n",
2980 TYPE(n), NCH(n));
2981 return NULL;
2984 else {
2985 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
2986 | funcdef | classdef
2988 node *ch = CHILD(n, 0);
2989 REQ(n, compound_stmt);
2990 switch (TYPE(ch)) {
2991 case if_stmt:
2992 return ast_for_if_stmt(c, ch);
2993 case while_stmt:
2994 return ast_for_while_stmt(c, ch);
2995 case for_stmt:
2996 return ast_for_for_stmt(c, ch);
2997 case try_stmt:
2998 return ast_for_try_stmt(c, ch);
2999 case with_stmt:
3000 return ast_for_with_stmt(c, ch);
3001 case funcdef:
3002 return ast_for_funcdef(c, ch);
3003 case classdef:
3004 return ast_for_classdef(c, ch);
3005 default:
3006 PyErr_Format(PyExc_SystemError,
3007 "unhandled small_stmt: TYPE=%d NCH=%d\n",
3008 TYPE(n), NCH(n));
3009 return NULL;
3014 static PyObject *
3015 parsenumber(const char *s)
3017 const char *end;
3018 long x;
3019 double dx;
3020 #ifndef WITHOUT_COMPLEX
3021 Py_complex c;
3022 int imflag;
3023 #endif
3025 errno = 0;
3026 end = s + strlen(s) - 1;
3027 #ifndef WITHOUT_COMPLEX
3028 imflag = *end == 'j' || *end == 'J';
3029 #endif
3030 if (*end == 'l' || *end == 'L')
3031 return PyLong_FromString((char *)s, (char **)0, 0);
3032 if (s[0] == '0') {
3033 x = (long) PyOS_strtoul((char *)s, (char **)&end, 0);
3034 if (x < 0 && errno == 0) {
3035 return PyLong_FromString((char *)s,
3036 (char **)0,
3040 else
3041 x = PyOS_strtol((char *)s, (char **)&end, 0);
3042 if (*end == '\0') {
3043 if (errno != 0)
3044 return PyLong_FromString((char *)s, (char **)0, 0);
3045 return PyInt_FromLong(x);
3047 /* XXX Huge floats may silently fail */
3048 #ifndef WITHOUT_COMPLEX
3049 if (imflag) {
3050 c.real = 0.;
3051 PyFPE_START_PROTECT("atof", return 0)
3052 c.imag = PyOS_ascii_atof(s);
3053 PyFPE_END_PROTECT(c)
3054 return PyComplex_FromCComplex(c);
3056 else
3057 #endif
3059 PyFPE_START_PROTECT("atof", return 0)
3060 dx = PyOS_ascii_atof(s);
3061 PyFPE_END_PROTECT(dx)
3062 return PyFloat_FromDouble(dx);
3066 static PyObject *
3067 decode_utf8(const char **sPtr, const char *end, char* encoding)
3069 #ifndef Py_USING_UNICODE
3070 Py_FatalError("decode_utf8 should not be called in this build.");
3071 return NULL;
3072 #else
3073 PyObject *u, *v;
3074 char *s, *t;
3075 t = s = (char *)*sPtr;
3076 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3077 while (s < end && (*s & 0x80)) s++;
3078 *sPtr = s;
3079 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3080 if (u == NULL)
3081 return NULL;
3082 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3083 Py_DECREF(u);
3084 return v;
3085 #endif
3088 static PyObject *
3089 decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
3091 PyObject *v, *u;
3092 char *buf;
3093 char *p;
3094 const char *end;
3095 if (encoding == NULL) {
3096 buf = (char *)s;
3097 u = NULL;
3098 } else if (strcmp(encoding, "iso-8859-1") == 0) {
3099 buf = (char *)s;
3100 u = NULL;
3101 } else {
3102 /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
3103 u = PyString_FromStringAndSize((char *)NULL, len * 4);
3104 if (u == NULL)
3105 return NULL;
3106 p = buf = PyString_AsString(u);
3107 end = s + len;
3108 while (s < end) {
3109 if (*s == '\\') {
3110 *p++ = *s++;
3111 if (*s & 0x80) {
3112 strcpy(p, "u005c");
3113 p += 5;
3116 if (*s & 0x80) { /* XXX inefficient */
3117 PyObject *w;
3118 char *r;
3119 Py_ssize_t rn, i;
3120 w = decode_utf8(&s, end, "utf-16-be");
3121 if (w == NULL) {
3122 Py_DECREF(u);
3123 return NULL;
3125 r = PyString_AsString(w);
3126 rn = PyString_Size(w);
3127 assert(rn % 2 == 0);
3128 for (i = 0; i < rn; i += 2) {
3129 sprintf(p, "\\u%02x%02x",
3130 r[i + 0] & 0xFF,
3131 r[i + 1] & 0xFF);
3132 p += 6;
3134 Py_DECREF(w);
3135 } else {
3136 *p++ = *s++;
3139 len = p - buf;
3140 s = buf;
3142 if (rawmode)
3143 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3144 else
3145 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3146 Py_XDECREF(u);
3147 return v;
3150 /* s is a Python string literal, including the bracketing quote characters,
3151 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3152 * parsestr parses it, and returns the decoded Python string object.
3154 static PyObject *
3155 parsestr(const char *s, const char *encoding)
3157 size_t len;
3158 int quote = Py_CHARMASK(*s);
3159 int rawmode = 0;
3160 int need_encoding;
3161 int unicode = 0;
3163 if (isalpha(quote) || quote == '_') {
3164 if (quote == 'u' || quote == 'U') {
3165 quote = *++s;
3166 unicode = 1;
3168 if (quote == 'r' || quote == 'R') {
3169 quote = *++s;
3170 rawmode = 1;
3173 if (quote != '\'' && quote != '\"') {
3174 PyErr_BadInternalCall();
3175 return NULL;
3177 s++;
3178 len = strlen(s);
3179 if (len > INT_MAX) {
3180 PyErr_SetString(PyExc_OverflowError,
3181 "string to parse is too long");
3182 return NULL;
3184 if (s[--len] != quote) {
3185 PyErr_BadInternalCall();
3186 return NULL;
3188 if (len >= 4 && s[0] == quote && s[1] == quote) {
3189 s += 2;
3190 len -= 2;
3191 if (s[--len] != quote || s[--len] != quote) {
3192 PyErr_BadInternalCall();
3193 return NULL;
3196 #ifdef Py_USING_UNICODE
3197 if (unicode || Py_UnicodeFlag) {
3198 return decode_unicode(s, len, rawmode, encoding);
3200 #endif
3201 need_encoding = (encoding != NULL &&
3202 strcmp(encoding, "utf-8") != 0 &&
3203 strcmp(encoding, "iso-8859-1") != 0);
3204 if (rawmode || strchr(s, '\\') == NULL) {
3205 if (need_encoding) {
3206 #ifndef Py_USING_UNICODE
3207 /* This should not happen - we never see any other
3208 encoding. */
3209 Py_FatalError(
3210 "cannot deal with encodings in this build.");
3211 #else
3212 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3213 if (u == NULL)
3214 return NULL;
3215 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3216 Py_DECREF(u);
3217 return v;
3218 #endif
3219 } else {
3220 return PyString_FromStringAndSize(s, len);
3224 return PyString_DecodeEscape(s, len, NULL, unicode,
3225 need_encoding ? encoding : NULL);
3228 /* Build a Python string object out of a STRING atom. This takes care of
3229 * compile-time literal catenation, calling parsestr() on each piece, and
3230 * pasting the intermediate results together.
3232 static PyObject *
3233 parsestrplus(struct compiling *c, const node *n)
3235 PyObject *v;
3236 int i;
3237 REQ(CHILD(n, 0), STRING);
3238 if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
3239 /* String literal concatenation */
3240 for (i = 1; i < NCH(n); i++) {
3241 PyObject *s;
3242 s = parsestr(STR(CHILD(n, i)), c->c_encoding);
3243 if (s == NULL)
3244 goto onError;
3245 if (PyString_Check(v) && PyString_Check(s)) {
3246 PyString_ConcatAndDel(&v, s);
3247 if (v == NULL)
3248 goto onError;
3250 #ifdef Py_USING_UNICODE
3251 else {
3252 PyObject *temp = PyUnicode_Concat(v, s);
3253 Py_DECREF(s);
3254 Py_DECREF(v);
3255 v = temp;
3256 if (v == NULL)
3257 goto onError;
3259 #endif
3262 return v;
3264 onError:
3265 Py_XDECREF(v);
3266 return NULL;