Catch situations where currentframe() returns None. See SF patch #1447410, this is...
[python.git] / Python / ast.c
blobbb1774bb976d2f0310ef3a32adaec0fd65930ebd
1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
16 #include <assert.h>
18 /* XXX TO DO
19 - re-indent this file (should be done)
20 - internal error checking (freeing memory, etc.)
21 - syntax errors
24 /* Data structure used internally */
25 struct compiling {
26 char *c_encoding; /* source encoding */
27 PyArena *c_arena; /* arena for allocating memeory */
30 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
31 static expr_ty ast_for_expr(struct compiling *, const node *);
32 static stmt_ty ast_for_stmt(struct compiling *, const node *);
33 static asdl_seq *ast_for_suite(struct compiling *, const node *);
34 static asdl_seq *ast_for_exprlist(struct compiling *, const node *, int);
35 static expr_ty ast_for_testlist(struct compiling *, const node *);
36 static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
38 /* Note different signature for ast_for_call */
39 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
41 static PyObject *parsenumber(const char *);
42 static PyObject *parsestr(const char *s, const char *encoding);
43 static PyObject *parsestrplus(struct compiling *, const node *n);
45 #ifndef LINENO
46 #define LINENO(n) ((n)->n_lineno)
47 #endif
49 static identifier
50 new_identifier(const char* n, PyArena *arena) {
51 PyObject* id = PyString_InternFromString(n);
52 PyArena_AddPyObject(arena, id);
53 return id;
56 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
58 /* This routine provides an invalid object for the syntax error.
59 The outermost routine must unpack this error and create the
60 proper object. We do this so that we don't have to pass
61 the filename to everything function.
63 XXX Maybe we should just pass the filename...
66 static int
67 ast_error(const node *n, const char *errstr)
69 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
70 if (!u)
71 return 0;
72 PyErr_SetObject(PyExc_SyntaxError, u);
73 Py_DECREF(u);
74 return 0;
77 static void
78 ast_error_finish(const char *filename)
80 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
81 long lineno;
83 assert(PyErr_Occurred());
84 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
85 return;
87 PyErr_Fetch(&type, &value, &tback);
88 errstr = PyTuple_GetItem(value, 0);
89 if (!errstr)
90 return;
91 Py_INCREF(errstr);
92 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
93 if (lineno == -1) {
94 Py_DECREF(errstr);
95 return;
97 Py_DECREF(value);
99 loc = PyErr_ProgramText(filename, lineno);
100 if (!loc) {
101 Py_INCREF(Py_None);
102 loc = Py_None;
104 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
105 Py_DECREF(loc);
106 if (!tmp) {
107 Py_DECREF(errstr);
108 return;
110 value = Py_BuildValue("(OO)", errstr, tmp);
111 Py_DECREF(errstr);
112 Py_DECREF(tmp);
113 if (!value)
114 return;
115 PyErr_Restore(type, value, tback);
118 /* num_stmts() returns number of contained statements.
120 Use this routine to determine how big a sequence is needed for
121 the statements in a parse tree. Its raison d'etre is this bit of
122 grammar:
124 stmt: simple_stmt | compound_stmt
125 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
127 A simple_stmt can contain multiple small_stmt elements joined
128 by semicolons. If the arg is a simple_stmt, the number of
129 small_stmt elements is returned.
132 static int
133 num_stmts(const node *n)
135 int i, l;
136 node *ch;
138 switch (TYPE(n)) {
139 case single_input:
140 if (TYPE(CHILD(n, 0)) == NEWLINE)
141 return 0;
142 else
143 return num_stmts(CHILD(n, 0));
144 case file_input:
145 l = 0;
146 for (i = 0; i < NCH(n); i++) {
147 ch = CHILD(n, i);
148 if (TYPE(ch) == stmt)
149 l += num_stmts(ch);
151 return l;
152 case stmt:
153 return num_stmts(CHILD(n, 0));
154 case compound_stmt:
155 return 1;
156 case simple_stmt:
157 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
158 case suite:
159 if (NCH(n) == 1)
160 return num_stmts(CHILD(n, 0));
161 else {
162 l = 0;
163 for (i = 2; i < (NCH(n) - 1); i++)
164 l += num_stmts(CHILD(n, i));
165 return l;
167 default: {
168 char buf[128];
170 sprintf(buf, "Non-statement found: %d %d\n",
171 TYPE(n), NCH(n));
172 Py_FatalError(buf);
175 assert(0);
176 return 0;
179 /* Transform the CST rooted at node * to the appropriate AST
182 mod_ty
183 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
184 PyArena *arena)
186 int i, j, k, num;
187 asdl_seq *stmts = NULL;
188 stmt_ty s;
189 node *ch;
190 struct compiling c;
192 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
193 c.c_encoding = "utf-8";
194 } else if (TYPE(n) == encoding_decl) {
195 c.c_encoding = STR(n);
196 n = CHILD(n, 0);
197 } else {
198 c.c_encoding = NULL;
200 c.c_arena = arena;
202 k = 0;
203 switch (TYPE(n)) {
204 case file_input:
205 stmts = asdl_seq_new(num_stmts(n), arena);
206 if (!stmts)
207 return NULL;
208 for (i = 0; i < NCH(n) - 1; i++) {
209 ch = CHILD(n, i);
210 if (TYPE(ch) == NEWLINE)
211 continue;
212 REQ(ch, stmt);
213 num = num_stmts(ch);
214 if (num == 1) {
215 s = ast_for_stmt(&c, ch);
216 if (!s)
217 goto error;
218 asdl_seq_SET(stmts, k++, s);
220 else {
221 ch = CHILD(ch, 0);
222 REQ(ch, simple_stmt);
223 for (j = 0; j < num; j++) {
224 s = ast_for_stmt(&c, CHILD(ch, j * 2));
225 if (!s)
226 goto error;
227 asdl_seq_SET(stmts, k++, s);
231 return Module(stmts, arena);
232 case eval_input: {
233 expr_ty testlist_ast;
235 /* XXX Why not gen_for here? */
236 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
237 if (!testlist_ast)
238 goto error;
239 return Expression(testlist_ast, arena);
241 case single_input:
242 if (TYPE(CHILD(n, 0)) == NEWLINE) {
243 stmts = asdl_seq_new(1, arena);
244 if (!stmts)
245 goto error;
246 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset, arena));
247 return Interactive(stmts, arena);
249 else {
250 n = CHILD(n, 0);
251 num = num_stmts(n);
252 stmts = asdl_seq_new(num, arena);
253 if (!stmts)
254 goto error;
255 if (num == 1) {
256 s = ast_for_stmt(&c, n);
257 if (!s)
258 goto error;
259 asdl_seq_SET(stmts, 0, s);
261 else {
262 /* Only a simple_stmt can contain multiple statements. */
263 REQ(n, simple_stmt);
264 for (i = 0; i < NCH(n); i += 2) {
265 if (TYPE(CHILD(n, i)) == NEWLINE)
266 break;
267 s = ast_for_stmt(&c, CHILD(n, i));
268 if (!s)
269 goto error;
270 asdl_seq_SET(stmts, i / 2, s);
274 return Interactive(stmts, arena);
276 default:
277 goto error;
279 error:
280 ast_error_finish(filename);
281 return NULL;
284 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
287 static operator_ty
288 get_operator(const node *n)
290 switch (TYPE(n)) {
291 case VBAR:
292 return BitOr;
293 case CIRCUMFLEX:
294 return BitXor;
295 case AMPER:
296 return BitAnd;
297 case LEFTSHIFT:
298 return LShift;
299 case RIGHTSHIFT:
300 return RShift;
301 case PLUS:
302 return Add;
303 case MINUS:
304 return Sub;
305 case STAR:
306 return Mult;
307 case SLASH:
308 return Div;
309 case DOUBLESLASH:
310 return FloorDiv;
311 case PERCENT:
312 return Mod;
313 default:
314 return 0;
318 /* Set the context ctx for expr_ty e, recursively traversing e.
320 Only sets context for expr kinds that "can appear in assignment context"
321 (according to ../Parser/Python.asdl). For other expr kinds, it sets
322 an appropriate syntax error and returns false.
325 static int
326 set_context(expr_ty e, expr_context_ty ctx, const node *n)
328 asdl_seq *s = NULL;
329 /* If a particular expression type can't be used for assign / delete,
330 set expr_name to its name and an error message will be generated.
332 const char* expr_name = NULL;
334 /* The ast defines augmented store and load contexts, but the
335 implementation here doesn't actually use them. The code may be
336 a little more complex than necessary as a result. It also means
337 that expressions in an augmented assignment have no context.
338 Consider restructuring so that augmented assignment uses
339 set_context(), too.
341 assert(ctx != AugStore && ctx != AugLoad);
343 switch (e->kind) {
344 case Attribute_kind:
345 if (ctx == Store &&
346 !strcmp(PyString_AS_STRING(e->v.Attribute.attr), "None")) {
347 return ast_error(n, "assignment to None");
349 e->v.Attribute.ctx = ctx;
350 break;
351 case Subscript_kind:
352 e->v.Subscript.ctx = ctx;
353 break;
354 case Name_kind:
355 if (ctx == Store &&
356 !strcmp(PyString_AS_STRING(e->v.Name.id), "None")) {
357 return ast_error(n, "assignment to None");
359 e->v.Name.ctx = ctx;
360 break;
361 case List_kind:
362 e->v.List.ctx = ctx;
363 s = e->v.List.elts;
364 break;
365 case Tuple_kind:
366 if (asdl_seq_LEN(e->v.Tuple.elts) == 0)
367 return ast_error(n, "can't assign to ()");
368 e->v.Tuple.ctx = ctx;
369 s = e->v.Tuple.elts;
370 break;
371 case Lambda_kind:
372 expr_name = "lambda";
373 break;
374 case Call_kind:
375 expr_name = "function call";
376 break;
377 case BoolOp_kind:
378 case BinOp_kind:
379 case UnaryOp_kind:
380 expr_name = "operator";
381 break;
382 case GeneratorExp_kind:
383 expr_name = "generator expression";
384 break;
385 case ListComp_kind:
386 expr_name = "list comprehension";
387 break;
388 case Dict_kind:
389 case Num_kind:
390 case Str_kind:
391 expr_name = "literal";
392 break;
393 case Compare_kind:
394 expr_name = "comparison";
395 break;
396 case Repr_kind:
397 expr_name = "repr";
398 break;
399 default:
400 PyErr_Format(PyExc_SystemError,
401 "unexpected expression in assignment %d (line %d)",
402 e->kind, e->lineno);
403 return 0;
405 /* Check for error string set by switch */
406 if (expr_name) {
407 char buf[300];
408 PyOS_snprintf(buf, sizeof(buf),
409 "can't %s %s",
410 ctx == Store ? "assign to" : "delete",
411 expr_name);
412 return ast_error(n, buf);
415 /* If the LHS is a list or tuple, we need to set the assignment
416 context for all the contained elements.
418 if (s) {
419 int i;
421 for (i = 0; i < asdl_seq_LEN(s); i++) {
422 if (!set_context(asdl_seq_GET(s, i), ctx, n))
423 return 0;
426 return 1;
429 static operator_ty
430 ast_for_augassign(const node *n)
432 REQ(n, augassign);
433 n = CHILD(n, 0);
434 switch (STR(n)[0]) {
435 case '+':
436 return Add;
437 case '-':
438 return Sub;
439 case '/':
440 if (STR(n)[1] == '/')
441 return FloorDiv;
442 else
443 return Div;
444 case '%':
445 return Mod;
446 case '<':
447 return LShift;
448 case '>':
449 return RShift;
450 case '&':
451 return BitAnd;
452 case '^':
453 return BitXor;
454 case '|':
455 return BitOr;
456 case '*':
457 if (STR(n)[1] == '*')
458 return Pow;
459 else
460 return Mult;
461 default:
462 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
463 return 0;
467 static cmpop_ty
468 ast_for_comp_op(const node *n)
470 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
471 |'is' 'not'
473 REQ(n, comp_op);
474 if (NCH(n) == 1) {
475 n = CHILD(n, 0);
476 switch (TYPE(n)) {
477 case LESS:
478 return Lt;
479 case GREATER:
480 return Gt;
481 case EQEQUAL: /* == */
482 return Eq;
483 case LESSEQUAL:
484 return LtE;
485 case GREATEREQUAL:
486 return GtE;
487 case NOTEQUAL:
488 return NotEq;
489 case NAME:
490 if (strcmp(STR(n), "in") == 0)
491 return In;
492 if (strcmp(STR(n), "is") == 0)
493 return Is;
494 default:
495 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
496 STR(n));
497 return 0;
500 else if (NCH(n) == 2) {
501 /* handle "not in" and "is not" */
502 switch (TYPE(CHILD(n, 0))) {
503 case NAME:
504 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
505 return NotIn;
506 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
507 return IsNot;
508 default:
509 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
510 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
511 return 0;
514 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
515 NCH(n));
516 return 0;
519 static asdl_seq *
520 seq_for_testlist(struct compiling *c, const node *n)
522 /* testlist: test (',' test)* [','] */
523 asdl_seq *seq;
524 expr_ty expression;
525 int i;
526 assert(TYPE(n) == testlist
527 || TYPE(n) == listmaker
528 || TYPE(n) == testlist_gexp
529 || TYPE(n) == testlist_safe
532 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
533 if (!seq)
534 return NULL;
536 for (i = 0; i < NCH(n); i += 2) {
537 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
539 expression = ast_for_expr(c, CHILD(n, i));
540 if (!expression)
541 return NULL;
543 assert(i / 2 < seq->size);
544 asdl_seq_SET(seq, i / 2, expression);
546 return seq;
549 static expr_ty
550 compiler_complex_args(struct compiling *c, const node *n)
552 int i, len = (NCH(n) + 1) / 2;
553 expr_ty result;
554 asdl_seq *args = asdl_seq_new(len, c->c_arena);
555 if (!args)
556 return NULL;
558 REQ(n, fplist);
559 for (i = 0; i < len; i++) {
560 const node *child = CHILD(CHILD(n, 2*i), 0);
561 expr_ty arg;
562 if (TYPE(child) == NAME) {
563 if (!strcmp(STR(child), "None")) {
564 ast_error(child, "assignment to None");
565 return NULL;
567 arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child), child->n_col_offset,
568 c->c_arena);
570 else {
571 arg = compiler_complex_args(c, CHILD(CHILD(n, 2*i), 1));
573 asdl_seq_SET(args, i, arg);
576 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
577 if (!set_context(result, Store, n))
578 return NULL;
579 return result;
583 /* Create AST for argument list. */
585 static arguments_ty
586 ast_for_arguments(struct compiling *c, const node *n)
588 /* parameters: '(' [varargslist] ')'
589 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
590 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
592 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
593 asdl_seq *args, *defaults;
594 identifier vararg = NULL, kwarg = NULL;
595 node *ch;
597 if (TYPE(n) == parameters) {
598 if (NCH(n) == 2) /* () as argument list */
599 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
600 n = CHILD(n, 1);
602 REQ(n, varargslist);
604 /* first count the number of normal args & defaults */
605 for (i = 0; i < NCH(n); i++) {
606 ch = CHILD(n, i);
607 if (TYPE(ch) == fpdef)
608 n_args++;
609 if (TYPE(ch) == EQUAL)
610 n_defaults++;
612 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
613 if (!args && n_args)
614 return NULL; /* Don't need to go to NULL; nothing allocated */
615 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
616 if (!defaults && n_defaults)
617 goto error;
619 /* fpdef: NAME | '(' fplist ')'
620 fplist: fpdef (',' fpdef)* [',']
622 i = 0;
623 j = 0; /* index for defaults */
624 k = 0; /* index for args */
625 while (i < NCH(n)) {
626 ch = CHILD(n, i);
627 switch (TYPE(ch)) {
628 case fpdef:
629 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
630 anything other than EQUAL or a comma? */
631 /* XXX Should NCH(n) check be made a separate check? */
632 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
633 asdl_seq_SET(defaults, j++,
634 ast_for_expr(c, CHILD(n, i + 2)));
635 i += 2;
636 found_default = 1;
638 else if (found_default) {
639 ast_error(n,
640 "non-default argument follows default argument");
641 goto error;
643 if (NCH(ch) == 3) {
644 asdl_seq_SET(args, k++,
645 compiler_complex_args(c, CHILD(ch, 1)));
647 else if (TYPE(CHILD(ch, 0)) == NAME) {
648 expr_ty name;
649 if (!strcmp(STR(CHILD(ch, 0)), "None")) {
650 ast_error(CHILD(ch, 0), "assignment to None");
651 goto error;
653 name = Name(NEW_IDENTIFIER(CHILD(ch, 0)),
654 Param, LINENO(ch), ch->n_col_offset, c->c_arena);
655 if (!name)
656 goto error;
657 asdl_seq_SET(args, k++, name);
660 i += 2; /* the name and the comma */
661 break;
662 case STAR:
663 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
664 ast_error(CHILD(n, i+1), "assignment to None");
665 goto error;
667 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
668 i += 3;
669 break;
670 case DOUBLESTAR:
671 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
672 ast_error(CHILD(n, i+1), "assignment to None");
673 goto error;
675 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
676 i += 3;
677 break;
678 default:
679 PyErr_Format(PyExc_SystemError,
680 "unexpected node in varargslist: %d @ %d",
681 TYPE(ch), i);
682 goto error;
686 return arguments(args, vararg, kwarg, defaults, c->c_arena);
688 error:
689 Py_XDECREF(vararg);
690 Py_XDECREF(kwarg);
691 return NULL;
694 static expr_ty
695 ast_for_dotted_name(struct compiling *c, const node *n)
697 expr_ty e;
698 identifier id;
699 int lineno, col_offset;
700 int i;
702 REQ(n, dotted_name);
704 lineno = LINENO(n);
705 col_offset = n->n_col_offset;
707 id = NEW_IDENTIFIER(CHILD(n, 0));
708 if (!id)
709 return NULL;
710 e = Name(id, Load, lineno, col_offset, c->c_arena);
711 if (!e)
712 return NULL;
714 for (i = 2; i < NCH(n); i+=2) {
715 id = NEW_IDENTIFIER(CHILD(n, i));
716 if (!id)
717 return NULL;
718 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
719 if (!e)
720 return NULL;
723 return e;
726 static expr_ty
727 ast_for_decorator(struct compiling *c, const node *n)
729 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
730 expr_ty d = NULL;
731 expr_ty name_expr;
733 REQ(n, decorator);
734 REQ(CHILD(n, 0), AT);
735 REQ(RCHILD(n, -1), NEWLINE);
737 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
738 if (!name_expr)
739 return NULL;
741 if (NCH(n) == 3) { /* No arguments */
742 d = name_expr;
743 name_expr = NULL;
745 else if (NCH(n) == 5) { /* Call with no arguments */
746 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
747 if (!d)
748 return NULL;
749 name_expr = NULL;
751 else {
752 d = ast_for_call(c, CHILD(n, 3), name_expr);
753 if (!d)
754 return NULL;
755 name_expr = NULL;
758 return d;
761 static asdl_seq*
762 ast_for_decorators(struct compiling *c, const node *n)
764 asdl_seq* decorator_seq;
765 expr_ty d;
766 int i;
768 REQ(n, decorators);
769 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
770 if (!decorator_seq)
771 return NULL;
773 for (i = 0; i < NCH(n); i++) {
774 d = ast_for_decorator(c, CHILD(n, i));
775 if (!d)
776 return NULL;
777 asdl_seq_SET(decorator_seq, i, d);
779 return decorator_seq;
782 static stmt_ty
783 ast_for_funcdef(struct compiling *c, const node *n)
785 /* funcdef: 'def' [decorators] NAME parameters ':' suite */
786 identifier name;
787 arguments_ty args;
788 asdl_seq *body;
789 asdl_seq *decorator_seq = NULL;
790 int name_i;
792 REQ(n, funcdef);
794 if (NCH(n) == 6) { /* decorators are present */
795 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
796 if (!decorator_seq)
797 return NULL;
798 name_i = 2;
800 else {
801 name_i = 1;
804 name = NEW_IDENTIFIER(CHILD(n, name_i));
805 if (!name)
806 return NULL;
807 else if (!strcmp(STR(CHILD(n, name_i)), "None")) {
808 ast_error(CHILD(n, name_i), "assignment to None");
809 return NULL;
811 args = ast_for_arguments(c, CHILD(n, name_i + 1));
812 if (!args)
813 return NULL;
814 body = ast_for_suite(c, CHILD(n, name_i + 3));
815 if (!body)
816 return NULL;
818 return FunctionDef(name, args, body, decorator_seq, LINENO(n), n->n_col_offset, c->c_arena);
821 static expr_ty
822 ast_for_lambdef(struct compiling *c, const node *n)
824 /* lambdef: 'lambda' [varargslist] ':' test */
825 arguments_ty args;
826 expr_ty expression;
828 if (NCH(n) == 3) {
829 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
830 if (!args)
831 return NULL;
832 expression = ast_for_expr(c, CHILD(n, 2));
833 if (!expression)
834 return NULL;
836 else {
837 args = ast_for_arguments(c, CHILD(n, 1));
838 if (!args)
839 return NULL;
840 expression = ast_for_expr(c, CHILD(n, 3));
841 if (!expression)
842 return NULL;
845 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
848 static expr_ty
849 ast_for_ifexpr(struct compiling *c, const node *n)
851 /* test: or_test 'if' or_test 'else' test */
852 expr_ty expression, body, orelse;
854 assert(NCH(n) == 5);
855 body = ast_for_expr(c, CHILD(n, 0));
856 if (!body)
857 return NULL;
858 expression = ast_for_expr(c, CHILD(n, 2));
859 if (!expression)
860 return NULL;
861 orelse = ast_for_expr(c, CHILD(n, 4));
862 if (!orelse)
863 return NULL;
864 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset, c->c_arena);
867 /* Count the number of 'for' loop in a list comprehension.
869 Helper for ast_for_listcomp().
872 static int
873 count_list_fors(const node *n)
875 int n_fors = 0;
876 node *ch = CHILD(n, 1);
878 count_list_for:
879 n_fors++;
880 REQ(ch, list_for);
881 if (NCH(ch) == 5)
882 ch = CHILD(ch, 4);
883 else
884 return n_fors;
885 count_list_iter:
886 REQ(ch, list_iter);
887 ch = CHILD(ch, 0);
888 if (TYPE(ch) == list_for)
889 goto count_list_for;
890 else if (TYPE(ch) == list_if) {
891 if (NCH(ch) == 3) {
892 ch = CHILD(ch, 2);
893 goto count_list_iter;
895 else
896 return n_fors;
899 /* Should never be reached */
900 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
901 return -1;
904 /* Count the number of 'if' statements in a list comprehension.
906 Helper for ast_for_listcomp().
909 static int
910 count_list_ifs(const node *n)
912 int n_ifs = 0;
914 count_list_iter:
915 REQ(n, list_iter);
916 if (TYPE(CHILD(n, 0)) == list_for)
917 return n_ifs;
918 n = CHILD(n, 0);
919 REQ(n, list_if);
920 n_ifs++;
921 if (NCH(n) == 2)
922 return n_ifs;
923 n = CHILD(n, 2);
924 goto count_list_iter;
927 static expr_ty
928 ast_for_listcomp(struct compiling *c, const node *n)
930 /* listmaker: test ( list_for | (',' test)* [','] )
931 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
932 list_iter: list_for | list_if
933 list_if: 'if' test [list_iter]
934 testlist_safe: test [(',' test)+ [',']]
936 expr_ty elt;
937 asdl_seq *listcomps;
938 int i, n_fors;
939 node *ch;
941 REQ(n, listmaker);
942 assert(NCH(n) > 1);
944 elt = ast_for_expr(c, CHILD(n, 0));
945 if (!elt)
946 return NULL;
948 n_fors = count_list_fors(n);
949 if (n_fors == -1)
950 return NULL;
952 listcomps = asdl_seq_new(n_fors, c->c_arena);
953 if (!listcomps)
954 return NULL;
956 ch = CHILD(n, 1);
957 for (i = 0; i < n_fors; i++) {
958 comprehension_ty lc;
959 asdl_seq *t;
960 expr_ty expression;
962 REQ(ch, list_for);
964 t = ast_for_exprlist(c, CHILD(ch, 1), Store);
965 if (!t)
966 return NULL;
967 expression = ast_for_testlist(c, CHILD(ch, 3));
968 if (!expression)
969 return NULL;
971 if (asdl_seq_LEN(t) == 1)
972 lc = comprehension(asdl_seq_GET(t, 0), expression, NULL,
973 c->c_arena);
974 else
975 lc = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset, c->c_arena),
976 expression, NULL, c->c_arena);
977 if (!lc)
978 return NULL;
980 if (NCH(ch) == 5) {
981 int j, n_ifs;
982 asdl_seq *ifs;
984 ch = CHILD(ch, 4);
985 n_ifs = count_list_ifs(ch);
986 if (n_ifs == -1)
987 return NULL;
989 ifs = asdl_seq_new(n_ifs, c->c_arena);
990 if (!ifs)
991 return NULL;
993 for (j = 0; j < n_ifs; j++) {
994 REQ(ch, list_iter);
995 ch = CHILD(ch, 0);
996 REQ(ch, list_if);
998 asdl_seq_SET(ifs, j, ast_for_expr(c, CHILD(ch, 1)));
999 if (NCH(ch) == 3)
1000 ch = CHILD(ch, 2);
1002 /* on exit, must guarantee that ch is a list_for */
1003 if (TYPE(ch) == list_iter)
1004 ch = CHILD(ch, 0);
1005 lc->ifs = ifs;
1007 asdl_seq_SET(listcomps, i, lc);
1010 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1014 Count the number of 'for' loops in a generator expression.
1016 Helper for ast_for_genexp().
1019 static int
1020 count_gen_fors(const node *n)
1022 int n_fors = 0;
1023 node *ch = CHILD(n, 1);
1025 count_gen_for:
1026 n_fors++;
1027 REQ(ch, gen_for);
1028 if (NCH(ch) == 5)
1029 ch = CHILD(ch, 4);
1030 else
1031 return n_fors;
1032 count_gen_iter:
1033 REQ(ch, gen_iter);
1034 ch = CHILD(ch, 0);
1035 if (TYPE(ch) == gen_for)
1036 goto count_gen_for;
1037 else if (TYPE(ch) == gen_if) {
1038 if (NCH(ch) == 3) {
1039 ch = CHILD(ch, 2);
1040 goto count_gen_iter;
1042 else
1043 return n_fors;
1046 /* Should never be reached */
1047 PyErr_SetString(PyExc_SystemError,
1048 "logic error in count_gen_fors");
1049 return -1;
1052 /* Count the number of 'if' statements in a generator expression.
1054 Helper for ast_for_genexp().
1057 static int
1058 count_gen_ifs(const node *n)
1060 int n_ifs = 0;
1062 while (1) {
1063 REQ(n, gen_iter);
1064 if (TYPE(CHILD(n, 0)) == gen_for)
1065 return n_ifs;
1066 n = CHILD(n, 0);
1067 REQ(n, gen_if);
1068 n_ifs++;
1069 if (NCH(n) == 2)
1070 return n_ifs;
1071 n = CHILD(n, 2);
1075 /* TODO(jhylton): Combine with list comprehension code? */
1076 static expr_ty
1077 ast_for_genexp(struct compiling *c, const node *n)
1079 /* testlist_gexp: test ( gen_for | (',' test)* [','] )
1080 argument: [test '='] test [gen_for] # Really [keyword '='] test */
1081 expr_ty elt;
1082 asdl_seq *genexps;
1083 int i, n_fors;
1084 node *ch;
1086 assert(TYPE(n) == (testlist_gexp) || TYPE(n) == (argument));
1087 assert(NCH(n) > 1);
1089 elt = ast_for_expr(c, CHILD(n, 0));
1090 if (!elt)
1091 return NULL;
1093 n_fors = count_gen_fors(n);
1094 if (n_fors == -1)
1095 return NULL;
1097 genexps = asdl_seq_new(n_fors, c->c_arena);
1098 if (!genexps)
1099 return NULL;
1101 ch = CHILD(n, 1);
1102 for (i = 0; i < n_fors; i++) {
1103 comprehension_ty ge;
1104 asdl_seq *t;
1105 expr_ty expression;
1107 REQ(ch, gen_for);
1109 t = ast_for_exprlist(c, CHILD(ch, 1), Store);
1110 if (!t)
1111 return NULL;
1112 expression = ast_for_expr(c, CHILD(ch, 3));
1113 if (!expression)
1114 return NULL;
1116 if (asdl_seq_LEN(t) == 1)
1117 ge = comprehension(asdl_seq_GET(t, 0), expression,
1118 NULL, c->c_arena);
1119 else
1120 ge = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset, c->c_arena),
1121 expression, NULL, c->c_arena);
1123 if (!ge)
1124 return NULL;
1126 if (NCH(ch) == 5) {
1127 int j, n_ifs;
1128 asdl_seq *ifs;
1130 ch = CHILD(ch, 4);
1131 n_ifs = count_gen_ifs(ch);
1132 if (n_ifs == -1)
1133 return NULL;
1135 ifs = asdl_seq_new(n_ifs, c->c_arena);
1136 if (!ifs)
1137 return NULL;
1139 for (j = 0; j < n_ifs; j++) {
1140 REQ(ch, gen_iter);
1141 ch = CHILD(ch, 0);
1142 REQ(ch, gen_if);
1144 expression = ast_for_expr(c, CHILD(ch, 1));
1145 if (!expression)
1146 return NULL;
1147 asdl_seq_SET(ifs, j, expression);
1148 if (NCH(ch) == 3)
1149 ch = CHILD(ch, 2);
1151 /* on exit, must guarantee that ch is a gen_for */
1152 if (TYPE(ch) == gen_iter)
1153 ch = CHILD(ch, 0);
1154 ge->ifs = ifs;
1156 asdl_seq_SET(genexps, i, ge);
1159 return GeneratorExp(elt, genexps, LINENO(n), n->n_col_offset, c->c_arena);
1162 static expr_ty
1163 ast_for_atom(struct compiling *c, const node *n)
1165 /* atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']'
1166 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1168 node *ch = CHILD(n, 0);
1170 switch (TYPE(ch)) {
1171 case NAME:
1172 /* All names start in Load context, but may later be
1173 changed. */
1174 return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
1175 case STRING: {
1176 PyObject *str = parsestrplus(c, n);
1177 if (!str)
1178 return NULL;
1180 PyArena_AddPyObject(c->c_arena, str);
1181 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1183 case NUMBER: {
1184 PyObject *pynum = parsenumber(STR(ch));
1185 if (!pynum)
1186 return NULL;
1188 PyArena_AddPyObject(c->c_arena, pynum);
1189 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1191 case LPAR: /* some parenthesized expressions */
1192 ch = CHILD(n, 1);
1194 if (TYPE(ch) == RPAR)
1195 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1197 if (TYPE(ch) == yield_expr)
1198 return ast_for_expr(c, ch);
1200 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for))
1201 return ast_for_genexp(c, ch);
1203 return ast_for_testlist_gexp(c, ch);
1204 case LSQB: /* list (or list comprehension) */
1205 ch = CHILD(n, 1);
1207 if (TYPE(ch) == RSQB)
1208 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1210 REQ(ch, listmaker);
1211 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1212 asdl_seq *elts = seq_for_testlist(c, ch);
1213 if (!elts)
1214 return NULL;
1216 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1218 else
1219 return ast_for_listcomp(c, ch);
1220 case LBRACE: {
1221 /* dictmaker: test ':' test (',' test ':' test)* [','] */
1222 int i, size;
1223 asdl_seq *keys, *values;
1225 ch = CHILD(n, 1);
1226 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1227 keys = asdl_seq_new(size, c->c_arena);
1228 if (!keys)
1229 return NULL;
1231 values = asdl_seq_new(size, c->c_arena);
1232 if (!values)
1233 return NULL;
1235 for (i = 0; i < NCH(ch); i += 4) {
1236 expr_ty expression;
1238 expression = ast_for_expr(c, CHILD(ch, i));
1239 if (!expression)
1240 return NULL;
1242 asdl_seq_SET(keys, i / 4, expression);
1244 expression = ast_for_expr(c, CHILD(ch, i + 2));
1245 if (!expression)
1246 return NULL;
1248 asdl_seq_SET(values, i / 4, expression);
1250 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1252 case BACKQUOTE: { /* repr */
1253 expr_ty expression = ast_for_testlist(c, CHILD(n, 1));
1254 if (!expression)
1255 return NULL;
1257 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1259 default:
1260 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1261 return NULL;
1265 static slice_ty
1266 ast_for_slice(struct compiling *c, const node *n)
1268 node *ch;
1269 expr_ty lower = NULL, upper = NULL, step = NULL;
1271 REQ(n, subscript);
1274 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1275 sliceop: ':' [test]
1277 ch = CHILD(n, 0);
1278 if (TYPE(ch) == DOT)
1279 return Ellipsis(c->c_arena);
1281 if (NCH(n) == 1 && TYPE(ch) == test) {
1282 /* 'step' variable hold no significance in terms of being used over
1283 other vars */
1284 step = ast_for_expr(c, ch);
1285 if (!step)
1286 return NULL;
1288 return Index(step, c->c_arena);
1291 if (TYPE(ch) == test) {
1292 lower = ast_for_expr(c, ch);
1293 if (!lower)
1294 return NULL;
1297 /* If there's an upper bound it's in the second or third position. */
1298 if (TYPE(ch) == COLON) {
1299 if (NCH(n) > 1) {
1300 node *n2 = CHILD(n, 1);
1302 if (TYPE(n2) == test) {
1303 upper = ast_for_expr(c, n2);
1304 if (!upper)
1305 return NULL;
1308 } else if (NCH(n) > 2) {
1309 node *n2 = CHILD(n, 2);
1311 if (TYPE(n2) == test) {
1312 upper = ast_for_expr(c, n2);
1313 if (!upper)
1314 return NULL;
1318 ch = CHILD(n, NCH(n) - 1);
1319 if (TYPE(ch) == sliceop) {
1320 if (NCH(ch) == 1)
1321 /* XXX: If only 1 child, then should just be a colon. Should we
1322 just skip assigning and just get to the return? */
1323 ch = CHILD(ch, 0);
1324 else
1325 ch = CHILD(ch, 1);
1326 if (TYPE(ch) == test) {
1327 step = ast_for_expr(c, ch);
1328 if (!step)
1329 return NULL;
1333 return Slice(lower, upper, step, c->c_arena);
1336 static expr_ty
1337 ast_for_binop(struct compiling *c, const node *n)
1339 /* Must account for a sequence of expressions.
1340 How should A op B op C by represented?
1341 BinOp(BinOp(A, op, B), op, C).
1344 int i, nops;
1345 expr_ty expr1, expr2, result;
1346 operator_ty operator;
1348 expr1 = ast_for_expr(c, CHILD(n, 0));
1349 if (!expr1)
1350 return NULL;
1352 expr2 = ast_for_expr(c, CHILD(n, 2));
1353 if (!expr2)
1354 return NULL;
1356 operator = get_operator(CHILD(n, 1));
1357 if (!operator)
1358 return NULL;
1360 result = BinOp(expr1, operator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
1361 if (!result)
1362 return NULL;
1364 nops = (NCH(n) - 1) / 2;
1365 for (i = 1; i < nops; i++) {
1366 expr_ty tmp_result, tmp;
1367 const node* next_oper = CHILD(n, i * 2 + 1);
1369 operator = get_operator(next_oper);
1370 if (!operator)
1371 return NULL;
1373 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1374 if (!tmp)
1375 return NULL;
1377 tmp_result = BinOp(result, operator, tmp,
1378 LINENO(next_oper), next_oper->n_col_offset, c->c_arena);
1379 if (!tmp)
1380 return NULL;
1381 result = tmp_result;
1383 return result;
1386 static expr_ty
1387 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1389 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1390 subscriptlist: subscript (',' subscript)* [',']
1391 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1393 REQ(n, trailer);
1394 if (TYPE(CHILD(n, 0)) == LPAR) {
1395 if (NCH(n) == 2)
1396 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
1397 else
1398 return ast_for_call(c, CHILD(n, 1), left_expr);
1400 else if (TYPE(CHILD(n, 0)) == DOT ) {
1401 return Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load,
1402 LINENO(n), n->n_col_offset, c->c_arena);
1404 else {
1405 REQ(CHILD(n, 0), LSQB);
1406 REQ(CHILD(n, 2), RSQB);
1407 n = CHILD(n, 1);
1408 if (NCH(n) == 1) {
1409 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1410 if (!slc)
1411 return NULL;
1412 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset, c->c_arena);
1414 else {
1415 /* The grammar is ambiguous here. The ambiguity is resolved
1416 by treating the sequence as a tuple literal if there are
1417 no slice features.
1419 int j;
1420 slice_ty slc;
1421 expr_ty e;
1422 bool simple = true;
1423 asdl_seq *slices, *elts;
1424 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1425 if (!slices)
1426 return NULL;
1427 for (j = 0; j < NCH(n); j += 2) {
1428 slc = ast_for_slice(c, CHILD(n, j));
1429 if (!slc)
1430 return NULL;
1431 if (slc->kind != Index_kind)
1432 simple = false;
1433 asdl_seq_SET(slices, j / 2, slc);
1435 if (!simple) {
1436 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1437 Load, LINENO(n), n->n_col_offset, c->c_arena);
1439 /* extract Index values and put them in a Tuple */
1440 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1441 if (!elts)
1442 return NULL;
1443 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1444 slc = (slice_ty)asdl_seq_GET(slices, j);
1445 assert(slc->kind == Index_kind && slc->v.Index.value);
1446 asdl_seq_SET(elts, j, slc->v.Index.value);
1448 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1449 if (!e)
1450 return NULL;
1451 return Subscript(left_expr, Index(e, c->c_arena),
1452 Load, LINENO(n), n->n_col_offset, c->c_arena);
1457 static expr_ty
1458 ast_for_power(struct compiling *c, const node *n)
1460 /* power: atom trailer* ('**' factor)*
1462 int i;
1463 expr_ty e, tmp;
1464 REQ(n, power);
1465 e = ast_for_atom(c, CHILD(n, 0));
1466 if (!e)
1467 return NULL;
1468 if (NCH(n) == 1)
1469 return e;
1470 for (i = 1; i < NCH(n); i++) {
1471 node *ch = CHILD(n, i);
1472 if (TYPE(ch) != trailer)
1473 break;
1474 tmp = ast_for_trailer(c, ch, e);
1475 if (!tmp)
1476 return NULL;
1477 tmp->lineno = e->lineno;
1478 tmp->col_offset = e->col_offset;
1479 e = tmp;
1481 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1482 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1483 if (!f)
1484 return NULL;
1485 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1486 if (!tmp)
1487 return NULL;
1488 e = tmp;
1490 return e;
1493 /* Do not name a variable 'expr'! Will cause a compile error.
1496 static expr_ty
1497 ast_for_expr(struct compiling *c, const node *n)
1499 /* handle the full range of simple expressions
1500 test: or_test ['if' or_test 'else' test] | lambdef
1501 or_test: and_test ('or' and_test)*
1502 and_test: not_test ('and' not_test)*
1503 not_test: 'not' not_test | comparison
1504 comparison: expr (comp_op expr)*
1505 expr: xor_expr ('|' xor_expr)*
1506 xor_expr: and_expr ('^' and_expr)*
1507 and_expr: shift_expr ('&' shift_expr)*
1508 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1509 arith_expr: term (('+'|'-') term)*
1510 term: factor (('*'|'/'|'%'|'//') factor)*
1511 factor: ('+'|'-'|'~') factor | power
1512 power: atom trailer* ('**' factor)*
1514 As well as modified versions that exist for backward compatibility,
1515 to explicitly allow:
1516 [ x for x in lambda: 0, lambda: 1 ]
1517 (which would be ambiguous without these extra rules)
1519 old_test: or_test | old_lambdef
1520 old_lambdef: 'lambda' [vararglist] ':' old_test
1524 asdl_seq *seq;
1525 int i;
1527 loop:
1528 switch (TYPE(n)) {
1529 case test:
1530 case old_test:
1531 if (TYPE(CHILD(n, 0)) == lambdef ||
1532 TYPE(CHILD(n, 0)) == old_lambdef)
1533 return ast_for_lambdef(c, CHILD(n, 0));
1534 else if (NCH(n) > 1)
1535 return ast_for_ifexpr(c, n);
1536 /* Fallthrough */
1537 case or_test:
1538 case and_test:
1539 if (NCH(n) == 1) {
1540 n = CHILD(n, 0);
1541 goto loop;
1543 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1544 if (!seq)
1545 return NULL;
1546 for (i = 0; i < NCH(n); i += 2) {
1547 expr_ty e = ast_for_expr(c, CHILD(n, i));
1548 if (!e)
1549 return NULL;
1550 asdl_seq_SET(seq, i / 2, e);
1552 if (!strcmp(STR(CHILD(n, 1)), "and"))
1553 return BoolOp(And, seq, LINENO(n), n->n_col_offset, c->c_arena);
1554 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1555 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1556 case not_test:
1557 if (NCH(n) == 1) {
1558 n = CHILD(n, 0);
1559 goto loop;
1561 else {
1562 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1563 if (!expression)
1564 return NULL;
1566 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset, c->c_arena);
1568 case comparison:
1569 if (NCH(n) == 1) {
1570 n = CHILD(n, 0);
1571 goto loop;
1573 else {
1574 expr_ty expression;
1575 asdl_seq *ops, *cmps;
1576 ops = asdl_seq_new(NCH(n) / 2, c->c_arena);
1577 if (!ops)
1578 return NULL;
1579 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1580 if (!cmps) {
1581 return NULL;
1583 for (i = 1; i < NCH(n); i += 2) {
1584 /* XXX cmpop_ty is just an enum */
1585 cmpop_ty operator;
1587 operator = ast_for_comp_op(CHILD(n, i));
1588 if (!operator) {
1589 return NULL;
1592 expression = ast_for_expr(c, CHILD(n, i + 1));
1593 if (!expression) {
1594 return NULL;
1597 asdl_seq_SET(ops, i / 2, (void *)(Py_uintptr_t)operator);
1598 asdl_seq_SET(cmps, i / 2, expression);
1600 expression = ast_for_expr(c, CHILD(n, 0));
1601 if (!expression) {
1602 return NULL;
1605 return Compare(expression, ops, cmps, LINENO(n), n->n_col_offset, c->c_arena);
1607 break;
1609 /* The next five cases all handle BinOps. The main body of code
1610 is the same in each case, but the switch turned inside out to
1611 reuse the code for each type of operator.
1613 case expr:
1614 case xor_expr:
1615 case and_expr:
1616 case shift_expr:
1617 case arith_expr:
1618 case term:
1619 if (NCH(n) == 1) {
1620 n = CHILD(n, 0);
1621 goto loop;
1623 return ast_for_binop(c, n);
1624 case yield_expr: {
1625 expr_ty exp = NULL;
1626 if (NCH(n) == 2) {
1627 exp = ast_for_testlist(c, CHILD(n, 1));
1628 if (!exp)
1629 return NULL;
1631 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1633 case factor: {
1634 expr_ty expression;
1636 if (NCH(n) == 1) {
1637 n = CHILD(n, 0);
1638 goto loop;
1641 expression = ast_for_expr(c, CHILD(n, 1));
1642 if (!expression)
1643 return NULL;
1645 switch (TYPE(CHILD(n, 0))) {
1646 case PLUS:
1647 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, c->c_arena);
1648 case MINUS:
1649 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, c->c_arena);
1650 case TILDE:
1651 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset, c->c_arena);
1653 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1654 TYPE(CHILD(n, 0)));
1655 break;
1657 case power:
1658 return ast_for_power(c, n);
1659 default:
1660 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1661 return NULL;
1663 /* should never get here unless if error is set */
1664 return NULL;
1667 static expr_ty
1668 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1671 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1672 | '**' test)
1673 argument: [test '='] test [gen_for] # Really [keyword '='] test
1676 int i, nargs, nkeywords, ngens;
1677 asdl_seq *args;
1678 asdl_seq *keywords;
1679 expr_ty vararg = NULL, kwarg = NULL;
1681 REQ(n, arglist);
1683 nargs = 0;
1684 nkeywords = 0;
1685 ngens = 0;
1686 for (i = 0; i < NCH(n); i++) {
1687 node *ch = CHILD(n, i);
1688 if (TYPE(ch) == argument) {
1689 if (NCH(ch) == 1)
1690 nargs++;
1691 else if (TYPE(CHILD(ch, 1)) == gen_for)
1692 ngens++;
1693 else
1694 nkeywords++;
1697 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
1698 ast_error(n, "Generator expression must be parenthesized "
1699 "if not sole argument");
1700 return NULL;
1703 if (nargs + nkeywords + ngens > 255) {
1704 ast_error(n, "more than 255 arguments");
1705 return NULL;
1708 args = asdl_seq_new(nargs + ngens, c->c_arena);
1709 if (!args)
1710 return NULL;
1711 keywords = asdl_seq_new(nkeywords, c->c_arena);
1712 if (!keywords)
1713 return NULL;
1714 nargs = 0;
1715 nkeywords = 0;
1716 for (i = 0; i < NCH(n); i++) {
1717 node *ch = CHILD(n, i);
1718 if (TYPE(ch) == argument) {
1719 expr_ty e;
1720 if (NCH(ch) == 1) {
1721 e = ast_for_expr(c, CHILD(ch, 0));
1722 if (!e)
1723 return NULL;
1724 asdl_seq_SET(args, nargs++, e);
1726 else if (TYPE(CHILD(ch, 1)) == gen_for) {
1727 e = ast_for_genexp(c, ch);
1728 if (!e)
1729 return NULL;
1730 asdl_seq_SET(args, nargs++, e);
1732 else {
1733 keyword_ty kw;
1734 identifier key;
1736 /* CHILD(ch, 0) is test, but must be an identifier? */
1737 e = ast_for_expr(c, CHILD(ch, 0));
1738 if (!e)
1739 return NULL;
1740 /* f(lambda x: x[0] = 3) ends up getting parsed with
1741 * LHS test = lambda x: x[0], and RHS test = 3.
1742 * SF bug 132313 points out that complaining about a keyword
1743 * then is very confusing.
1745 if (e->kind == Lambda_kind) {
1746 ast_error(CHILD(ch, 0), "lambda cannot contain assignment");
1747 return NULL;
1748 } else if (e->kind != Name_kind) {
1749 ast_error(CHILD(ch, 0), "keyword can't be an expression");
1750 return NULL;
1752 key = e->v.Name.id;
1753 e = ast_for_expr(c, CHILD(ch, 2));
1754 if (!e)
1755 return NULL;
1756 kw = keyword(key, e, c->c_arena);
1757 if (!kw)
1758 return NULL;
1759 asdl_seq_SET(keywords, nkeywords++, kw);
1762 else if (TYPE(ch) == STAR) {
1763 vararg = ast_for_expr(c, CHILD(n, i+1));
1764 i++;
1766 else if (TYPE(ch) == DOUBLESTAR) {
1767 kwarg = ast_for_expr(c, CHILD(n, i+1));
1768 i++;
1772 return Call(func, args, keywords, vararg, kwarg, func->lineno, func->col_offset, c->c_arena);
1775 static expr_ty
1776 ast_for_testlist(struct compiling *c, const node* n)
1778 /* testlist_gexp: test (',' test)* [','] */
1779 /* testlist: test (',' test)* [','] */
1780 /* testlist_safe: test (',' test)+ [','] */
1781 /* testlist1: test (',' test)* */
1782 assert(NCH(n) > 0);
1783 if (TYPE(n) == testlist_gexp) {
1784 if (NCH(n) > 1)
1785 assert(TYPE(CHILD(n, 1)) != gen_for);
1787 else {
1788 assert(TYPE(n) == testlist ||
1789 TYPE(n) == testlist_safe ||
1790 TYPE(n) == testlist1);
1792 if (NCH(n) == 1)
1793 return ast_for_expr(c, CHILD(n, 0));
1794 else {
1795 asdl_seq *tmp = seq_for_testlist(c, n);
1796 if (!tmp)
1797 return NULL;
1798 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
1802 static expr_ty
1803 ast_for_testlist_gexp(struct compiling *c, const node* n)
1805 /* testlist_gexp: test ( gen_for | (',' test)* [','] ) */
1806 /* argument: test [ gen_for ] */
1807 assert(TYPE(n) == testlist_gexp || TYPE(n) == argument);
1808 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for)
1809 return ast_for_genexp(c, n);
1810 return ast_for_testlist(c, n);
1813 /* like ast_for_testlist() but returns a sequence */
1814 static asdl_seq*
1815 ast_for_class_bases(struct compiling *c, const node* n)
1817 /* testlist: test (',' test)* [','] */
1818 assert(NCH(n) > 0);
1819 REQ(n, testlist);
1820 if (NCH(n) == 1) {
1821 expr_ty base;
1822 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
1823 if (!bases)
1824 return NULL;
1825 base = ast_for_expr(c, CHILD(n, 0));
1826 if (!base)
1827 return NULL;
1828 asdl_seq_SET(bases, 0, base);
1829 return bases;
1832 return seq_for_testlist(c, n);
1835 static stmt_ty
1836 ast_for_expr_stmt(struct compiling *c, const node *n)
1838 REQ(n, expr_stmt);
1839 /* expr_stmt: testlist (augassign (yield_expr|testlist)
1840 | ('=' (yield_expr|testlist))*)
1841 testlist: test (',' test)* [',']
1842 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
1843 | '<<=' | '>>=' | '**=' | '//='
1844 test: ... here starts the operator precendence dance
1847 if (NCH(n) == 1) {
1848 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
1849 if (!e)
1850 return NULL;
1852 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
1854 else if (TYPE(CHILD(n, 1)) == augassign) {
1855 expr_ty expr1, expr2;
1856 operator_ty operator;
1857 node *ch = CHILD(n, 0);
1859 if (TYPE(ch) == testlist)
1860 expr1 = ast_for_testlist(c, ch);
1861 else
1862 expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch), n->n_col_offset,
1863 c->c_arena);
1865 if (!expr1)
1866 return NULL;
1867 /* TODO(jhylton): Figure out why set_context() can't be used here. */
1868 switch (expr1->kind) {
1869 case GeneratorExp_kind:
1870 ast_error(ch, "augmented assignment to generator "
1871 "expression not possible");
1872 return NULL;
1873 case Name_kind: {
1874 const char *var_name = PyString_AS_STRING(expr1->v.Name.id);
1875 if (var_name[0] == 'N' && !strcmp(var_name, "None")) {
1876 ast_error(ch, "assignment to None");
1877 return NULL;
1879 break;
1881 case Attribute_kind:
1882 case Subscript_kind:
1883 break;
1884 default:
1885 ast_error(ch, "illegal expression for augmented "
1886 "assignment");
1887 return NULL;
1890 ch = CHILD(n, 2);
1891 if (TYPE(ch) == testlist)
1892 expr2 = ast_for_testlist(c, ch);
1893 else
1894 expr2 = Yield(ast_for_expr(c, ch), LINENO(ch), ch->n_col_offset, c->c_arena);
1895 if (!expr2)
1896 return NULL;
1898 operator = ast_for_augassign(CHILD(n, 1));
1899 if (!operator)
1900 return NULL;
1902 return AugAssign(expr1, operator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
1904 else {
1905 int i;
1906 asdl_seq *targets;
1907 node *value;
1908 expr_ty expression;
1910 /* a normal assignment */
1911 REQ(CHILD(n, 1), EQUAL);
1912 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
1913 if (!targets)
1914 return NULL;
1915 for (i = 0; i < NCH(n) - 2; i += 2) {
1916 expr_ty e;
1917 node *ch = CHILD(n, i);
1918 if (TYPE(ch) == yield_expr) {
1919 ast_error(ch, "assignment to yield expression not possible");
1920 return NULL;
1922 e = ast_for_testlist(c, ch);
1924 /* set context to assign */
1925 if (!e)
1926 return NULL;
1928 if (!set_context(e, Store, CHILD(n, i)))
1929 return NULL;
1931 asdl_seq_SET(targets, i / 2, e);
1933 value = CHILD(n, NCH(n) - 1);
1934 if (TYPE(value) == testlist)
1935 expression = ast_for_testlist(c, value);
1936 else
1937 expression = ast_for_expr(c, value);
1938 if (!expression)
1939 return NULL;
1940 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
1944 static stmt_ty
1945 ast_for_print_stmt(struct compiling *c, const node *n)
1947 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
1948 | '>>' test [ (',' test)+ [','] ] )
1950 expr_ty dest = NULL, expression;
1951 asdl_seq *seq;
1952 bool nl;
1953 int i, j, start = 1;
1955 REQ(n, print_stmt);
1956 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
1957 dest = ast_for_expr(c, CHILD(n, 2));
1958 if (!dest)
1959 return NULL;
1960 start = 4;
1962 seq = asdl_seq_new((NCH(n) + 1 - start) / 2, c->c_arena);
1963 if (!seq)
1964 return NULL;
1965 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
1966 expression = ast_for_expr(c, CHILD(n, i));
1967 if (!expression)
1968 return NULL;
1969 asdl_seq_SET(seq, j, expression);
1971 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
1972 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
1975 static asdl_seq *
1976 ast_for_exprlist(struct compiling *c, const node *n, int context)
1978 asdl_seq *seq;
1979 int i;
1980 expr_ty e;
1982 REQ(n, exprlist);
1984 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1985 if (!seq)
1986 return NULL;
1987 for (i = 0; i < NCH(n); i += 2) {
1988 e = ast_for_expr(c, CHILD(n, i));
1989 if (!e)
1990 return NULL;
1991 asdl_seq_SET(seq, i / 2, e);
1992 if (context && !set_context(e, context, CHILD(n, i)))
1993 return NULL;
1995 return seq;
1998 static stmt_ty
1999 ast_for_del_stmt(struct compiling *c, const node *n)
2001 asdl_seq *expr_list;
2003 /* del_stmt: 'del' exprlist */
2004 REQ(n, del_stmt);
2006 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2007 if (!expr_list)
2008 return NULL;
2009 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2012 static stmt_ty
2013 ast_for_flow_stmt(struct compiling *c, const node *n)
2016 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2017 | yield_stmt
2018 break_stmt: 'break'
2019 continue_stmt: 'continue'
2020 return_stmt: 'return' [testlist]
2021 yield_stmt: yield_expr
2022 yield_expr: 'yield' testlist
2023 raise_stmt: 'raise' [test [',' test [',' test]]]
2025 node *ch;
2027 REQ(n, flow_stmt);
2028 ch = CHILD(n, 0);
2029 switch (TYPE(ch)) {
2030 case break_stmt:
2031 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2032 case continue_stmt:
2033 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2034 case yield_stmt: { /* will reduce to yield_expr */
2035 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2036 if (!exp)
2037 return NULL;
2038 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2040 case return_stmt:
2041 if (NCH(ch) == 1)
2042 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2043 else {
2044 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2045 if (!expression)
2046 return NULL;
2047 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
2049 case raise_stmt:
2050 if (NCH(ch) == 1)
2051 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2052 else if (NCH(ch) == 2) {
2053 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2054 if (!expression)
2055 return NULL;
2056 return Raise(expression, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2058 else if (NCH(ch) == 4) {
2059 expr_ty expr1, expr2;
2061 expr1 = ast_for_expr(c, CHILD(ch, 1));
2062 if (!expr1)
2063 return NULL;
2064 expr2 = ast_for_expr(c, CHILD(ch, 3));
2065 if (!expr2)
2066 return NULL;
2068 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2070 else if (NCH(ch) == 6) {
2071 expr_ty expr1, expr2, expr3;
2073 expr1 = ast_for_expr(c, CHILD(ch, 1));
2074 if (!expr1)
2075 return NULL;
2076 expr2 = ast_for_expr(c, CHILD(ch, 3));
2077 if (!expr2)
2078 return NULL;
2079 expr3 = ast_for_expr(c, CHILD(ch, 5));
2080 if (!expr3)
2081 return NULL;
2083 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset, c->c_arena);
2085 default:
2086 PyErr_Format(PyExc_SystemError,
2087 "unexpected flow_stmt: %d", TYPE(ch));
2088 return NULL;
2091 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2092 return NULL;
2095 static alias_ty
2096 alias_for_import_name(struct compiling *c, const node *n)
2099 import_as_name: NAME ['as' NAME]
2100 dotted_as_name: dotted_name ['as' NAME]
2101 dotted_name: NAME ('.' NAME)*
2103 PyObject *str;
2105 loop:
2106 switch (TYPE(n)) {
2107 case import_as_name:
2108 str = (NCH(n) == 3) ? NEW_IDENTIFIER(CHILD(n, 2)) : NULL;
2109 return alias(NEW_IDENTIFIER(CHILD(n, 0)), str, c->c_arena);
2110 case dotted_as_name:
2111 if (NCH(n) == 1) {
2112 n = CHILD(n, 0);
2113 goto loop;
2115 else {
2116 alias_ty a = alias_for_import_name(c, CHILD(n, 0));
2117 assert(!a->asname);
2118 a->asname = NEW_IDENTIFIER(CHILD(n, 2));
2119 return a;
2121 break;
2122 case dotted_name:
2123 if (NCH(n) == 1)
2124 return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena);
2125 else {
2126 /* Create a string of the form "a.b.c" */
2127 int i;
2128 size_t len;
2129 char *s;
2131 len = 0;
2132 for (i = 0; i < NCH(n); i += 2)
2133 /* length of string plus one for the dot */
2134 len += strlen(STR(CHILD(n, i))) + 1;
2135 len--; /* the last name doesn't have a dot */
2136 str = PyString_FromStringAndSize(NULL, len);
2137 if (!str)
2138 return NULL;
2139 s = PyString_AS_STRING(str);
2140 if (!s)
2141 return NULL;
2142 for (i = 0; i < NCH(n); i += 2) {
2143 char *sch = STR(CHILD(n, i));
2144 strcpy(s, STR(CHILD(n, i)));
2145 s += strlen(sch);
2146 *s++ = '.';
2148 --s;
2149 *s = '\0';
2150 PyString_InternInPlace(&str);
2151 PyArena_AddPyObject(c->c_arena, str);
2152 return alias(str, NULL, c->c_arena);
2154 break;
2155 case STAR:
2156 str = PyString_InternFromString("*");
2157 PyArena_AddPyObject(c->c_arena, str);
2158 return alias(str, NULL, c->c_arena);
2159 default:
2160 PyErr_Format(PyExc_SystemError,
2161 "unexpected import name: %d", TYPE(n));
2162 return NULL;
2165 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2166 return NULL;
2169 static stmt_ty
2170 ast_for_import_stmt(struct compiling *c, const node *n)
2173 import_stmt: import_name | import_from
2174 import_name: 'import' dotted_as_names
2175 import_from: 'from' ('.'* dotted_name | '.') 'import'
2176 ('*' | '(' import_as_names ')' | import_as_names)
2178 int lineno;
2179 int col_offset;
2180 int i;
2181 asdl_seq *aliases;
2183 REQ(n, import_stmt);
2184 lineno = LINENO(n);
2185 col_offset = n->n_col_offset;
2186 n = CHILD(n, 0);
2187 if (TYPE(n) == import_name) {
2188 n = CHILD(n, 1);
2189 REQ(n, dotted_as_names);
2190 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2191 if (!aliases)
2192 return NULL;
2193 for (i = 0; i < NCH(n); i += 2) {
2194 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2195 if (!import_alias)
2196 return NULL;
2197 asdl_seq_SET(aliases, i / 2, import_alias);
2199 return Import(aliases, lineno, col_offset, c->c_arena);
2201 else if (TYPE(n) == import_from) {
2202 int n_children;
2203 int idx, ndots = 0;
2204 alias_ty mod = NULL;
2205 identifier modname;
2207 /* Count the number of dots (for relative imports) and check for the
2208 optional module name */
2209 for (idx = 1; idx < NCH(n); idx++) {
2210 if (TYPE(CHILD(n, idx)) == dotted_name) {
2211 mod = alias_for_import_name(c, CHILD(n, idx));
2212 idx++;
2213 break;
2214 } else if (TYPE(CHILD(n, idx)) != DOT) {
2215 break;
2217 ndots++;
2219 idx++; /* skip over the 'import' keyword */
2220 switch (TYPE(CHILD(n, idx))) {
2221 case STAR:
2222 /* from ... import * */
2223 n = CHILD(n, idx);
2224 n_children = 1;
2225 if (ndots) {
2226 ast_error(n, "'import *' not allowed with 'from .'");
2227 return NULL;
2229 break;
2230 case LPAR:
2231 /* from ... import (x, y, z) */
2232 n = CHILD(n, idx + 1);
2233 n_children = NCH(n);
2234 break;
2235 case import_as_names:
2236 /* from ... import x, y, z */
2237 n = CHILD(n, idx);
2238 n_children = NCH(n);
2239 if (n_children % 2 == 0) {
2240 ast_error(n, "trailing comma not allowed without"
2241 " surrounding parentheses");
2242 return NULL;
2244 break;
2245 default:
2246 ast_error(n, "Unexpected node-type in from-import");
2247 return NULL;
2250 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2251 if (!aliases)
2252 return NULL;
2254 /* handle "from ... import *" special b/c there's no children */
2255 if (TYPE(n) == STAR) {
2256 alias_ty import_alias = alias_for_import_name(c, n);
2257 if (!import_alias)
2258 return NULL;
2259 asdl_seq_SET(aliases, 0, import_alias);
2261 else {
2262 for (i = 0; i < NCH(n); i += 2) {
2263 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2264 if (!import_alias)
2265 return NULL;
2266 asdl_seq_SET(aliases, i / 2, import_alias);
2269 if (mod != NULL)
2270 modname = mod->name;
2271 else
2272 modname = new_identifier("", c->c_arena);
2273 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2274 c->c_arena);
2276 PyErr_Format(PyExc_SystemError,
2277 "unknown import statement: starts with command '%s'",
2278 STR(CHILD(n, 0)));
2279 return NULL;
2282 static stmt_ty
2283 ast_for_global_stmt(struct compiling *c, const node *n)
2285 /* global_stmt: 'global' NAME (',' NAME)* */
2286 identifier name;
2287 asdl_seq *s;
2288 int i;
2290 REQ(n, global_stmt);
2291 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2292 if (!s)
2293 return NULL;
2294 for (i = 1; i < NCH(n); i += 2) {
2295 name = NEW_IDENTIFIER(CHILD(n, i));
2296 if (!name)
2297 return NULL;
2298 asdl_seq_SET(s, i / 2, name);
2300 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2303 static stmt_ty
2304 ast_for_exec_stmt(struct compiling *c, const node *n)
2306 expr_ty expr1, globals = NULL, locals = NULL;
2307 int n_children = NCH(n);
2308 if (n_children != 2 && n_children != 4 && n_children != 6) {
2309 PyErr_Format(PyExc_SystemError,
2310 "poorly formed 'exec' statement: %d parts to statement",
2311 n_children);
2312 return NULL;
2315 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2316 REQ(n, exec_stmt);
2317 expr1 = ast_for_expr(c, CHILD(n, 1));
2318 if (!expr1)
2319 return NULL;
2320 if (n_children >= 4) {
2321 globals = ast_for_expr(c, CHILD(n, 3));
2322 if (!globals)
2323 return NULL;
2325 if (n_children == 6) {
2326 locals = ast_for_expr(c, CHILD(n, 5));
2327 if (!locals)
2328 return NULL;
2331 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset, c->c_arena);
2334 static stmt_ty
2335 ast_for_assert_stmt(struct compiling *c, const node *n)
2337 /* assert_stmt: 'assert' test [',' test] */
2338 REQ(n, assert_stmt);
2339 if (NCH(n) == 2) {
2340 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2341 if (!expression)
2342 return NULL;
2343 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2345 else if (NCH(n) == 4) {
2346 expr_ty expr1, expr2;
2348 expr1 = ast_for_expr(c, CHILD(n, 1));
2349 if (!expr1)
2350 return NULL;
2351 expr2 = ast_for_expr(c, CHILD(n, 3));
2352 if (!expr2)
2353 return NULL;
2355 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2357 PyErr_Format(PyExc_SystemError,
2358 "improper number of parts to 'assert' statement: %d",
2359 NCH(n));
2360 return NULL;
2363 static asdl_seq *
2364 ast_for_suite(struct compiling *c, const node *n)
2366 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2367 asdl_seq *seq;
2368 stmt_ty s;
2369 int i, total, num, end, pos = 0;
2370 node *ch;
2372 REQ(n, suite);
2374 total = num_stmts(n);
2375 seq = asdl_seq_new(total, c->c_arena);
2376 if (!seq)
2377 return NULL;
2378 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2379 n = CHILD(n, 0);
2380 /* simple_stmt always ends with a NEWLINE,
2381 and may have a trailing SEMI
2383 end = NCH(n) - 1;
2384 if (TYPE(CHILD(n, end - 1)) == SEMI)
2385 end--;
2386 /* loop by 2 to skip semi-colons */
2387 for (i = 0; i < end; i += 2) {
2388 ch = CHILD(n, i);
2389 s = ast_for_stmt(c, ch);
2390 if (!s)
2391 return NULL;
2392 asdl_seq_SET(seq, pos++, s);
2395 else {
2396 for (i = 2; i < (NCH(n) - 1); i++) {
2397 ch = CHILD(n, i);
2398 REQ(ch, stmt);
2399 num = num_stmts(ch);
2400 if (num == 1) {
2401 /* small_stmt or compound_stmt with only one child */
2402 s = ast_for_stmt(c, ch);
2403 if (!s)
2404 return NULL;
2405 asdl_seq_SET(seq, pos++, s);
2407 else {
2408 int j;
2409 ch = CHILD(ch, 0);
2410 REQ(ch, simple_stmt);
2411 for (j = 0; j < NCH(ch); j += 2) {
2412 /* statement terminates with a semi-colon ';' */
2413 if (NCH(CHILD(ch, j)) == 0) {
2414 assert((j + 1) == NCH(ch));
2415 break;
2417 s = ast_for_stmt(c, CHILD(ch, j));
2418 if (!s)
2419 return NULL;
2420 asdl_seq_SET(seq, pos++, s);
2425 assert(pos == seq->size);
2426 return seq;
2429 static stmt_ty
2430 ast_for_if_stmt(struct compiling *c, const node *n)
2432 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2433 ['else' ':' suite]
2435 char *s;
2437 REQ(n, if_stmt);
2439 if (NCH(n) == 4) {
2440 expr_ty expression;
2441 asdl_seq *suite_seq;
2443 expression = ast_for_expr(c, CHILD(n, 1));
2444 if (!expression)
2445 return NULL;
2446 suite_seq = ast_for_suite(c, CHILD(n, 3));
2447 if (!suite_seq)
2448 return NULL;
2450 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2453 s = STR(CHILD(n, 4));
2454 /* s[2], the third character in the string, will be
2455 's' for el_s_e, or
2456 'i' for el_i_f
2458 if (s[2] == 's') {
2459 expr_ty expression;
2460 asdl_seq *seq1, *seq2;
2462 expression = ast_for_expr(c, CHILD(n, 1));
2463 if (!expression)
2464 return NULL;
2465 seq1 = ast_for_suite(c, CHILD(n, 3));
2466 if (!seq1)
2467 return NULL;
2468 seq2 = ast_for_suite(c, CHILD(n, 6));
2469 if (!seq2)
2470 return NULL;
2472 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2474 else if (s[2] == 'i') {
2475 int i, n_elif, has_else = 0;
2476 asdl_seq *orelse = NULL;
2477 n_elif = NCH(n) - 4;
2478 /* must reference the child n_elif+1 since 'else' token is third,
2479 not fourth, child from the end. */
2480 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2481 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2482 has_else = 1;
2483 n_elif -= 3;
2485 n_elif /= 4;
2487 if (has_else) {
2488 expr_ty expression;
2489 asdl_seq *seq1, *seq2;
2491 orelse = asdl_seq_new(1, c->c_arena);
2492 if (!orelse)
2493 return NULL;
2494 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2495 if (!expression)
2496 return NULL;
2497 seq1 = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2498 if (!seq1)
2499 return NULL;
2500 seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2501 if (!seq2)
2502 return NULL;
2504 asdl_seq_SET(orelse, 0, If(expression, seq1, seq2,
2505 LINENO(CHILD(n, NCH(n) - 6)), CHILD(n, NCH(n) - 6)->n_col_offset,
2506 c->c_arena));
2507 /* the just-created orelse handled the last elif */
2508 n_elif--;
2511 for (i = 0; i < n_elif; i++) {
2512 int off = 5 + (n_elif - i - 1) * 4;
2513 expr_ty expression;
2514 asdl_seq *suite_seq;
2515 asdl_seq *new = asdl_seq_new(1, c->c_arena);
2516 if (!new)
2517 return NULL;
2518 expression = ast_for_expr(c, CHILD(n, off));
2519 if (!expression)
2520 return NULL;
2521 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2522 if (!suite_seq)
2523 return NULL;
2525 asdl_seq_SET(new, 0,
2526 If(expression, suite_seq, orelse,
2527 LINENO(CHILD(n, off)), CHILD(n, off)->n_col_offset, c->c_arena));
2528 orelse = new;
2530 return If(ast_for_expr(c, CHILD(n, 1)),
2531 ast_for_suite(c, CHILD(n, 3)),
2532 orelse, LINENO(n), n->n_col_offset, c->c_arena);
2535 PyErr_Format(PyExc_SystemError,
2536 "unexpected token in 'if' statement: %s", s);
2537 return NULL;
2540 static stmt_ty
2541 ast_for_while_stmt(struct compiling *c, const node *n)
2543 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2544 REQ(n, while_stmt);
2546 if (NCH(n) == 4) {
2547 expr_ty expression;
2548 asdl_seq *suite_seq;
2550 expression = ast_for_expr(c, CHILD(n, 1));
2551 if (!expression)
2552 return NULL;
2553 suite_seq = ast_for_suite(c, CHILD(n, 3));
2554 if (!suite_seq)
2555 return NULL;
2556 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2558 else if (NCH(n) == 7) {
2559 expr_ty expression;
2560 asdl_seq *seq1, *seq2;
2562 expression = ast_for_expr(c, CHILD(n, 1));
2563 if (!expression)
2564 return NULL;
2565 seq1 = ast_for_suite(c, CHILD(n, 3));
2566 if (!seq1)
2567 return NULL;
2568 seq2 = ast_for_suite(c, CHILD(n, 6));
2569 if (!seq2)
2570 return NULL;
2572 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2575 PyErr_Format(PyExc_SystemError,
2576 "wrong number of tokens for 'while' statement: %d",
2577 NCH(n));
2578 return NULL;
2581 static stmt_ty
2582 ast_for_for_stmt(struct compiling *c, const node *n)
2584 asdl_seq *_target, *seq = NULL, *suite_seq;
2585 expr_ty expression;
2586 expr_ty target;
2587 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2588 REQ(n, for_stmt);
2590 if (NCH(n) == 9) {
2591 seq = ast_for_suite(c, CHILD(n, 8));
2592 if (!seq)
2593 return NULL;
2596 _target = ast_for_exprlist(c, CHILD(n, 1), Store);
2597 if (!_target)
2598 return NULL;
2599 if (asdl_seq_LEN(_target) == 1)
2600 target = asdl_seq_GET(_target, 0);
2601 else
2602 target = Tuple(_target, Store, LINENO(n), n->n_col_offset, c->c_arena);
2604 expression = ast_for_testlist(c, CHILD(n, 3));
2605 if (!expression)
2606 return NULL;
2607 suite_seq = ast_for_suite(c, CHILD(n, 5));
2608 if (!suite_seq)
2609 return NULL;
2611 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset, c->c_arena);
2614 static excepthandler_ty
2615 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
2617 /* except_clause: 'except' [test [',' test]] */
2618 REQ(exc, except_clause);
2619 REQ(body, suite);
2621 if (NCH(exc) == 1) {
2622 asdl_seq *suite_seq = ast_for_suite(c, body);
2623 if (!suite_seq)
2624 return NULL;
2626 return excepthandler(NULL, NULL, suite_seq, c->c_arena);
2628 else if (NCH(exc) == 2) {
2629 expr_ty expression;
2630 asdl_seq *suite_seq;
2632 expression = ast_for_expr(c, CHILD(exc, 1));
2633 if (!expression)
2634 return NULL;
2635 suite_seq = ast_for_suite(c, body);
2636 if (!suite_seq)
2637 return NULL;
2639 return excepthandler(expression, NULL, suite_seq, c->c_arena);
2641 else if (NCH(exc) == 4) {
2642 asdl_seq *suite_seq;
2643 expr_ty expression;
2644 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
2645 if (!e)
2646 return NULL;
2647 if (!set_context(e, Store, CHILD(exc, 3)))
2648 return NULL;
2649 expression = ast_for_expr(c, CHILD(exc, 1));
2650 if (!expression)
2651 return NULL;
2652 suite_seq = ast_for_suite(c, body);
2653 if (!suite_seq)
2654 return NULL;
2656 return excepthandler(expression, e, suite_seq, c->c_arena);
2659 PyErr_Format(PyExc_SystemError,
2660 "wrong number of children for 'except' clause: %d",
2661 NCH(exc));
2662 return NULL;
2665 static stmt_ty
2666 ast_for_try_stmt(struct compiling *c, const node *n)
2668 const int nch = NCH(n);
2669 int n_except = (nch - 3)/3;
2670 asdl_seq *body, *orelse = NULL, *finally = NULL;
2672 REQ(n, try_stmt);
2674 body = ast_for_suite(c, CHILD(n, 2));
2675 if (body == NULL)
2676 return NULL;
2678 if (TYPE(CHILD(n, nch - 3)) == NAME) {
2679 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
2680 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
2681 /* we can assume it's an "else",
2682 because nch >= 9 for try-else-finally and
2683 it would otherwise have a type of except_clause */
2684 orelse = ast_for_suite(c, CHILD(n, nch - 4));
2685 if (orelse == NULL)
2686 return NULL;
2687 n_except--;
2690 finally = ast_for_suite(c, CHILD(n, nch - 1));
2691 if (finally == NULL)
2692 return NULL;
2693 n_except--;
2695 else {
2696 /* we can assume it's an "else",
2697 otherwise it would have a type of except_clause */
2698 orelse = ast_for_suite(c, CHILD(n, nch - 1));
2699 if (orelse == NULL)
2700 return NULL;
2701 n_except--;
2704 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
2705 ast_error(n, "malformed 'try' statement");
2706 return NULL;
2709 if (n_except > 0) {
2710 int i;
2711 stmt_ty except_st;
2712 /* process except statements to create a try ... except */
2713 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
2714 if (handlers == NULL)
2715 return NULL;
2717 for (i = 0; i < n_except; i++) {
2718 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
2719 CHILD(n, 5 + i * 3));
2720 if (!e)
2721 return NULL;
2722 asdl_seq_SET(handlers, i, e);
2725 except_st = TryExcept(body, handlers, orelse, LINENO(n), n->n_col_offset, c->c_arena);
2726 if (!finally)
2727 return except_st;
2729 /* if a 'finally' is present too, we nest the TryExcept within a
2730 TryFinally to emulate try ... except ... finally */
2731 body = asdl_seq_new(1, c->c_arena);
2732 if (body == NULL)
2733 return NULL;
2734 asdl_seq_SET(body, 0, except_st);
2737 /* must be a try ... finally (except clauses are in body, if any exist) */
2738 assert(finally != NULL);
2739 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
2742 static expr_ty
2743 ast_for_with_var(struct compiling *c, const node *n)
2745 REQ(n, with_var);
2746 if (strcmp(STR(CHILD(n, 0)), "as") != 0) {
2747 ast_error(n, "expected \"with [expr] as [var]\"");
2748 return NULL;
2750 return ast_for_expr(c, CHILD(n, 1));
2753 /* with_stmt: 'with' test [ with_var ] ':' suite */
2754 static stmt_ty
2755 ast_for_with_stmt(struct compiling *c, const node *n)
2757 expr_ty context_expr, optional_vars = NULL;
2758 int suite_index = 3; /* skip 'with', test, and ':' */
2759 asdl_seq *suite_seq;
2761 assert(TYPE(n) == with_stmt);
2762 context_expr = ast_for_expr(c, CHILD(n, 1));
2763 if (TYPE(CHILD(n, 2)) == with_var) {
2764 optional_vars = ast_for_with_var(c, CHILD(n, 2));
2766 if (!optional_vars) {
2767 return NULL;
2769 if (!set_context(optional_vars, Store, n)) {
2770 return NULL;
2772 suite_index = 4;
2775 suite_seq = ast_for_suite(c, CHILD(n, suite_index));
2776 if (!suite_seq) {
2777 return NULL;
2779 return With(context_expr, optional_vars, suite_seq, LINENO(n),
2780 n->n_col_offset, c->c_arena);
2783 static stmt_ty
2784 ast_for_classdef(struct compiling *c, const node *n)
2786 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
2787 asdl_seq *bases, *s;
2789 REQ(n, classdef);
2791 if (!strcmp(STR(CHILD(n, 1)), "None")) {
2792 ast_error(n, "assignment to None");
2793 return NULL;
2796 if (NCH(n) == 4) {
2797 s = ast_for_suite(c, CHILD(n, 3));
2798 if (!s)
2799 return NULL;
2800 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n), n->n_col_offset,
2801 c->c_arena);
2803 /* check for empty base list */
2804 if (TYPE(CHILD(n,3)) == RPAR) {
2805 s = ast_for_suite(c, CHILD(n,5));
2806 if (!s)
2807 return NULL;
2808 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n), n->n_col_offset,
2809 c->c_arena);
2812 /* else handle the base class list */
2813 bases = ast_for_class_bases(c, CHILD(n, 3));
2814 if (!bases)
2815 return NULL;
2817 s = ast_for_suite(c, CHILD(n, 6));
2818 if (!s)
2819 return NULL;
2820 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n), n->n_col_offset,
2821 c->c_arena);
2824 static stmt_ty
2825 ast_for_stmt(struct compiling *c, const node *n)
2827 if (TYPE(n) == stmt) {
2828 assert(NCH(n) == 1);
2829 n = CHILD(n, 0);
2831 if (TYPE(n) == simple_stmt) {
2832 assert(num_stmts(n) == 1);
2833 n = CHILD(n, 0);
2835 if (TYPE(n) == small_stmt) {
2836 REQ(n, small_stmt);
2837 n = CHILD(n, 0);
2838 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
2839 | flow_stmt | import_stmt | global_stmt | exec_stmt
2840 | assert_stmt
2842 switch (TYPE(n)) {
2843 case expr_stmt:
2844 return ast_for_expr_stmt(c, n);
2845 case print_stmt:
2846 return ast_for_print_stmt(c, n);
2847 case del_stmt:
2848 return ast_for_del_stmt(c, n);
2849 case pass_stmt:
2850 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
2851 case flow_stmt:
2852 return ast_for_flow_stmt(c, n);
2853 case import_stmt:
2854 return ast_for_import_stmt(c, n);
2855 case global_stmt:
2856 return ast_for_global_stmt(c, n);
2857 case exec_stmt:
2858 return ast_for_exec_stmt(c, n);
2859 case assert_stmt:
2860 return ast_for_assert_stmt(c, n);
2861 default:
2862 PyErr_Format(PyExc_SystemError,
2863 "unhandled small_stmt: TYPE=%d NCH=%d\n",
2864 TYPE(n), NCH(n));
2865 return NULL;
2868 else {
2869 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
2870 | funcdef | classdef
2872 node *ch = CHILD(n, 0);
2873 REQ(n, compound_stmt);
2874 switch (TYPE(ch)) {
2875 case if_stmt:
2876 return ast_for_if_stmt(c, ch);
2877 case while_stmt:
2878 return ast_for_while_stmt(c, ch);
2879 case for_stmt:
2880 return ast_for_for_stmt(c, ch);
2881 case try_stmt:
2882 return ast_for_try_stmt(c, ch);
2883 case with_stmt:
2884 return ast_for_with_stmt(c, ch);
2885 case funcdef:
2886 return ast_for_funcdef(c, ch);
2887 case classdef:
2888 return ast_for_classdef(c, ch);
2889 default:
2890 PyErr_Format(PyExc_SystemError,
2891 "unhandled small_stmt: TYPE=%d NCH=%d\n",
2892 TYPE(n), NCH(n));
2893 return NULL;
2898 static PyObject *
2899 parsenumber(const char *s)
2901 const char *end;
2902 long x;
2903 double dx;
2904 #ifndef WITHOUT_COMPLEX
2905 Py_complex c;
2906 int imflag;
2907 #endif
2909 errno = 0;
2910 end = s + strlen(s) - 1;
2911 #ifndef WITHOUT_COMPLEX
2912 imflag = *end == 'j' || *end == 'J';
2913 #endif
2914 if (*end == 'l' || *end == 'L')
2915 return PyLong_FromString((char *)s, (char **)0, 0);
2916 if (s[0] == '0') {
2917 x = (long) PyOS_strtoul((char *)s, (char **)&end, 0);
2918 if (x < 0 && errno == 0) {
2919 return PyLong_FromString((char *)s,
2920 (char **)0,
2924 else
2925 x = PyOS_strtol((char *)s, (char **)&end, 0);
2926 if (*end == '\0') {
2927 if (errno != 0)
2928 return PyLong_FromString((char *)s, (char **)0, 0);
2929 return PyInt_FromLong(x);
2931 /* XXX Huge floats may silently fail */
2932 #ifndef WITHOUT_COMPLEX
2933 if (imflag) {
2934 c.real = 0.;
2935 PyFPE_START_PROTECT("atof", return 0)
2936 c.imag = PyOS_ascii_atof(s);
2937 PyFPE_END_PROTECT(c)
2938 return PyComplex_FromCComplex(c);
2940 else
2941 #endif
2943 PyFPE_START_PROTECT("atof", return 0)
2944 dx = PyOS_ascii_atof(s);
2945 PyFPE_END_PROTECT(dx)
2946 return PyFloat_FromDouble(dx);
2950 static PyObject *
2951 decode_utf8(const char **sPtr, const char *end, char* encoding)
2953 #ifndef Py_USING_UNICODE
2954 Py_FatalError("decode_utf8 should not be called in this build.");
2955 return NULL;
2956 #else
2957 PyObject *u, *v;
2958 char *s, *t;
2959 t = s = (char *)*sPtr;
2960 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
2961 while (s < end && (*s & 0x80)) s++;
2962 *sPtr = s;
2963 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
2964 if (u == NULL)
2965 return NULL;
2966 v = PyUnicode_AsEncodedString(u, encoding, NULL);
2967 Py_DECREF(u);
2968 return v;
2969 #endif
2972 static PyObject *
2973 decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
2975 PyObject *v, *u;
2976 char *buf;
2977 char *p;
2978 const char *end;
2979 if (encoding == NULL) {
2980 buf = (char *)s;
2981 u = NULL;
2982 } else if (strcmp(encoding, "iso-8859-1") == 0) {
2983 buf = (char *)s;
2984 u = NULL;
2985 } else {
2986 /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
2987 u = PyString_FromStringAndSize((char *)NULL, len * 4);
2988 if (u == NULL)
2989 return NULL;
2990 p = buf = PyString_AsString(u);
2991 end = s + len;
2992 while (s < end) {
2993 if (*s == '\\') {
2994 *p++ = *s++;
2995 if (*s & 0x80) {
2996 strcpy(p, "u005c");
2997 p += 5;
3000 if (*s & 0x80) { /* XXX inefficient */
3001 PyObject *w;
3002 char *r;
3003 int rn, i;
3004 w = decode_utf8(&s, end, "utf-16-be");
3005 if (w == NULL) {
3006 Py_DECREF(u);
3007 return NULL;
3009 r = PyString_AsString(w);
3010 rn = PyString_Size(w);
3011 assert(rn % 2 == 0);
3012 for (i = 0; i < rn; i += 2) {
3013 sprintf(p, "\\u%02x%02x",
3014 r[i + 0] & 0xFF,
3015 r[i + 1] & 0xFF);
3016 p += 6;
3018 Py_DECREF(w);
3019 } else {
3020 *p++ = *s++;
3023 len = p - buf;
3024 s = buf;
3026 if (rawmode)
3027 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3028 else
3029 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3030 Py_XDECREF(u);
3031 return v;
3034 /* s is a Python string literal, including the bracketing quote characters,
3035 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3036 * parsestr parses it, and returns the decoded Python string object.
3038 static PyObject *
3039 parsestr(const char *s, const char *encoding)
3041 size_t len;
3042 int quote = Py_CHARMASK(*s);
3043 int rawmode = 0;
3044 int need_encoding;
3045 int unicode = 0;
3047 if (isalpha(quote) || quote == '_') {
3048 if (quote == 'u' || quote == 'U') {
3049 quote = *++s;
3050 unicode = 1;
3052 if (quote == 'r' || quote == 'R') {
3053 quote = *++s;
3054 rawmode = 1;
3057 if (quote != '\'' && quote != '\"') {
3058 PyErr_BadInternalCall();
3059 return NULL;
3061 s++;
3062 len = strlen(s);
3063 if (len > INT_MAX) {
3064 PyErr_SetString(PyExc_OverflowError,
3065 "string to parse is too long");
3066 return NULL;
3068 if (s[--len] != quote) {
3069 PyErr_BadInternalCall();
3070 return NULL;
3072 if (len >= 4 && s[0] == quote && s[1] == quote) {
3073 s += 2;
3074 len -= 2;
3075 if (s[--len] != quote || s[--len] != quote) {
3076 PyErr_BadInternalCall();
3077 return NULL;
3080 #ifdef Py_USING_UNICODE
3081 if (unicode || Py_UnicodeFlag) {
3082 return decode_unicode(s, len, rawmode, encoding);
3084 #endif
3085 need_encoding = (encoding != NULL &&
3086 strcmp(encoding, "utf-8") != 0 &&
3087 strcmp(encoding, "iso-8859-1") != 0);
3088 if (rawmode || strchr(s, '\\') == NULL) {
3089 if (need_encoding) {
3090 #ifndef Py_USING_UNICODE
3091 /* This should not happen - we never see any other
3092 encoding. */
3093 Py_FatalError("cannot deal with encodings in this build.");
3094 #else
3095 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3096 if (u == NULL)
3097 return NULL;
3098 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3099 Py_DECREF(u);
3100 return v;
3101 #endif
3102 } else {
3103 return PyString_FromStringAndSize(s, len);
3107 return PyString_DecodeEscape(s, len, NULL, unicode,
3108 need_encoding ? encoding : NULL);
3111 /* Build a Python string object out of a STRING atom. This takes care of
3112 * compile-time literal catenation, calling parsestr() on each piece, and
3113 * pasting the intermediate results together.
3115 static PyObject *
3116 parsestrplus(struct compiling *c, const node *n)
3118 PyObject *v;
3119 int i;
3120 REQ(CHILD(n, 0), STRING);
3121 if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
3122 /* String literal concatenation */
3123 for (i = 1; i < NCH(n); i++) {
3124 PyObject *s;
3125 s = parsestr(STR(CHILD(n, i)), c->c_encoding);
3126 if (s == NULL)
3127 goto onError;
3128 if (PyString_Check(v) && PyString_Check(s)) {
3129 PyString_ConcatAndDel(&v, s);
3130 if (v == NULL)
3131 goto onError;
3133 #ifdef Py_USING_UNICODE
3134 else {
3135 PyObject *temp = PyUnicode_Concat(v, s);
3136 Py_DECREF(s);
3137 Py_DECREF(v);
3138 v = temp;
3139 if (v == NULL)
3140 goto onError;
3142 #endif
3145 return v;
3147 onError:
3148 Py_XDECREF(v);
3149 return NULL;