Patch #1357836:
[python.git] / Python / ast.c
blobf3e611b8e375576feeea115420df2dd4cceb0bf2
1 /*
2 * This file includes functions to transform a concrete syntax tree (CST) to
3 * an abstract syntax tree (AST). The main function is PyAST_FromNode().
5 */
6 #include "Python.h"
7 #include "Python-ast.h"
8 #include "grammar.h"
9 #include "node.h"
10 #include "pyarena.h"
11 #include "ast.h"
12 #include "token.h"
13 #include "parsetok.h"
14 #include "graminit.h"
16 #include <assert.h>
18 /* XXX TO DO
19 - re-indent this file (should be done)
20 - internal error checking (freeing memory, etc.)
21 - syntax errors
24 /* Data structure used internally */
25 struct compiling {
26 char *c_encoding; /* source encoding */
27 PyArena *c_arena; /* arena for allocating memeory */
30 static asdl_seq *seq_for_testlist(struct compiling *, const node *);
31 static expr_ty ast_for_expr(struct compiling *, const node *);
32 static stmt_ty ast_for_stmt(struct compiling *, const node *);
33 static asdl_seq *ast_for_suite(struct compiling *, const node *);
34 static asdl_seq *ast_for_exprlist(struct compiling *, const node *, expr_context_ty);
35 static expr_ty ast_for_testlist(struct compiling *, const node *);
36 static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
38 /* Note different signature for ast_for_call */
39 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);
41 static PyObject *parsenumber(const char *);
42 static PyObject *parsestr(const char *s, const char *encoding);
43 static PyObject *parsestrplus(struct compiling *, const node *n);
45 #ifndef LINENO
46 #define LINENO(n) ((n)->n_lineno)
47 #endif
49 static identifier
50 new_identifier(const char* n, PyArena *arena) {
51 PyObject* id = PyString_InternFromString(n);
52 PyArena_AddPyObject(arena, id);
53 return id;
56 #define NEW_IDENTIFIER(n) new_identifier(STR(n), c->c_arena)
58 /* This routine provides an invalid object for the syntax error.
59 The outermost routine must unpack this error and create the
60 proper object. We do this so that we don't have to pass
61 the filename to everything function.
63 XXX Maybe we should just pass the filename...
66 static int
67 ast_error(const node *n, const char *errstr)
69 PyObject *u = Py_BuildValue("zi", errstr, LINENO(n));
70 if (!u)
71 return 0;
72 PyErr_SetObject(PyExc_SyntaxError, u);
73 Py_DECREF(u);
74 return 0;
77 static void
78 ast_error_finish(const char *filename)
80 PyObject *type, *value, *tback, *errstr, *loc, *tmp;
81 long lineno;
83 assert(PyErr_Occurred());
84 if (!PyErr_ExceptionMatches(PyExc_SyntaxError))
85 return;
87 PyErr_Fetch(&type, &value, &tback);
88 errstr = PyTuple_GetItem(value, 0);
89 if (!errstr)
90 return;
91 Py_INCREF(errstr);
92 lineno = PyInt_AsLong(PyTuple_GetItem(value, 1));
93 if (lineno == -1) {
94 Py_DECREF(errstr);
95 return;
97 Py_DECREF(value);
99 loc = PyErr_ProgramText(filename, lineno);
100 if (!loc) {
101 Py_INCREF(Py_None);
102 loc = Py_None;
104 tmp = Py_BuildValue("(zlOO)", filename, lineno, Py_None, loc);
105 Py_DECREF(loc);
106 if (!tmp) {
107 Py_DECREF(errstr);
108 return;
110 value = PyTuple_Pack(2, errstr, tmp);
111 Py_DECREF(errstr);
112 Py_DECREF(tmp);
113 if (!value)
114 return;
115 PyErr_Restore(type, value, tback);
118 /* num_stmts() returns number of contained statements.
120 Use this routine to determine how big a sequence is needed for
121 the statements in a parse tree. Its raison d'etre is this bit of
122 grammar:
124 stmt: simple_stmt | compound_stmt
125 simple_stmt: small_stmt (';' small_stmt)* [';'] NEWLINE
127 A simple_stmt can contain multiple small_stmt elements joined
128 by semicolons. If the arg is a simple_stmt, the number of
129 small_stmt elements is returned.
132 static int
133 num_stmts(const node *n)
135 int i, l;
136 node *ch;
138 switch (TYPE(n)) {
139 case single_input:
140 if (TYPE(CHILD(n, 0)) == NEWLINE)
141 return 0;
142 else
143 return num_stmts(CHILD(n, 0));
144 case file_input:
145 l = 0;
146 for (i = 0; i < NCH(n); i++) {
147 ch = CHILD(n, i);
148 if (TYPE(ch) == stmt)
149 l += num_stmts(ch);
151 return l;
152 case stmt:
153 return num_stmts(CHILD(n, 0));
154 case compound_stmt:
155 return 1;
156 case simple_stmt:
157 return NCH(n) / 2; /* Divide by 2 to remove count of semi-colons */
158 case suite:
159 if (NCH(n) == 1)
160 return num_stmts(CHILD(n, 0));
161 else {
162 l = 0;
163 for (i = 2; i < (NCH(n) - 1); i++)
164 l += num_stmts(CHILD(n, i));
165 return l;
167 default: {
168 char buf[128];
170 sprintf(buf, "Non-statement found: %d %d\n",
171 TYPE(n), NCH(n));
172 Py_FatalError(buf);
175 assert(0);
176 return 0;
179 /* Transform the CST rooted at node * to the appropriate AST
182 mod_ty
183 PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
184 PyArena *arena)
186 int i, j, k, num;
187 asdl_seq *stmts = NULL;
188 stmt_ty s;
189 node *ch;
190 struct compiling c;
192 if (flags && flags->cf_flags & PyCF_SOURCE_IS_UTF8) {
193 c.c_encoding = "utf-8";
194 if (TYPE(n) == encoding_decl) {
195 ast_error(n, "encoding declaration in Unicode string");
196 goto error;
198 } else if (TYPE(n) == encoding_decl) {
199 c.c_encoding = STR(n);
200 n = CHILD(n, 0);
201 } else {
202 c.c_encoding = NULL;
204 c.c_arena = arena;
206 k = 0;
207 switch (TYPE(n)) {
208 case file_input:
209 stmts = asdl_seq_new(num_stmts(n), arena);
210 if (!stmts)
211 return NULL;
212 for (i = 0; i < NCH(n) - 1; i++) {
213 ch = CHILD(n, i);
214 if (TYPE(ch) == NEWLINE)
215 continue;
216 REQ(ch, stmt);
217 num = num_stmts(ch);
218 if (num == 1) {
219 s = ast_for_stmt(&c, ch);
220 if (!s)
221 goto error;
222 asdl_seq_SET(stmts, k++, s);
224 else {
225 ch = CHILD(ch, 0);
226 REQ(ch, simple_stmt);
227 for (j = 0; j < num; j++) {
228 s = ast_for_stmt(&c, CHILD(ch, j * 2));
229 if (!s)
230 goto error;
231 asdl_seq_SET(stmts, k++, s);
235 return Module(stmts, arena);
236 case eval_input: {
237 expr_ty testlist_ast;
239 /* XXX Why not gen_for here? */
240 testlist_ast = ast_for_testlist(&c, CHILD(n, 0));
241 if (!testlist_ast)
242 goto error;
243 return Expression(testlist_ast, arena);
245 case single_input:
246 if (TYPE(CHILD(n, 0)) == NEWLINE) {
247 stmts = asdl_seq_new(1, arena);
248 if (!stmts)
249 goto error;
250 asdl_seq_SET(stmts, 0, Pass(n->n_lineno, n->n_col_offset,
251 arena));
252 return Interactive(stmts, arena);
254 else {
255 n = CHILD(n, 0);
256 num = num_stmts(n);
257 stmts = asdl_seq_new(num, arena);
258 if (!stmts)
259 goto error;
260 if (num == 1) {
261 s = ast_for_stmt(&c, n);
262 if (!s)
263 goto error;
264 asdl_seq_SET(stmts, 0, s);
266 else {
267 /* Only a simple_stmt can contain multiple statements. */
268 REQ(n, simple_stmt);
269 for (i = 0; i < NCH(n); i += 2) {
270 if (TYPE(CHILD(n, i)) == NEWLINE)
271 break;
272 s = ast_for_stmt(&c, CHILD(n, i));
273 if (!s)
274 goto error;
275 asdl_seq_SET(stmts, i / 2, s);
279 return Interactive(stmts, arena);
281 default:
282 goto error;
284 error:
285 ast_error_finish(filename);
286 return NULL;
289 /* Return the AST repr. of the operator represented as syntax (|, ^, etc.)
292 static operator_ty
293 get_operator(const node *n)
295 switch (TYPE(n)) {
296 case VBAR:
297 return BitOr;
298 case CIRCUMFLEX:
299 return BitXor;
300 case AMPER:
301 return BitAnd;
302 case LEFTSHIFT:
303 return LShift;
304 case RIGHTSHIFT:
305 return RShift;
306 case PLUS:
307 return Add;
308 case MINUS:
309 return Sub;
310 case STAR:
311 return Mult;
312 case SLASH:
313 return Div;
314 case DOUBLESLASH:
315 return FloorDiv;
316 case PERCENT:
317 return Mod;
318 default:
319 return (operator_ty)0;
323 /* Set the context ctx for expr_ty e, recursively traversing e.
325 Only sets context for expr kinds that "can appear in assignment context"
326 (according to ../Parser/Python.asdl). For other expr kinds, it sets
327 an appropriate syntax error and returns false.
330 static int
331 set_context(expr_ty e, expr_context_ty ctx, const node *n)
333 asdl_seq *s = NULL;
334 /* If a particular expression type can't be used for assign / delete,
335 set expr_name to its name and an error message will be generated.
337 const char* expr_name = NULL;
339 /* The ast defines augmented store and load contexts, but the
340 implementation here doesn't actually use them. The code may be
341 a little more complex than necessary as a result. It also means
342 that expressions in an augmented assignment have no context.
343 Consider restructuring so that augmented assignment uses
344 set_context(), too.
346 assert(ctx != AugStore && ctx != AugLoad);
348 switch (e->kind) {
349 case Attribute_kind:
350 if (ctx == Store &&
351 !strcmp(PyString_AS_STRING(e->v.Attribute.attr), "None")) {
352 return ast_error(n, "assignment to None");
354 e->v.Attribute.ctx = ctx;
355 break;
356 case Subscript_kind:
357 e->v.Subscript.ctx = ctx;
358 break;
359 case Name_kind:
360 if (ctx == Store &&
361 !strcmp(PyString_AS_STRING(e->v.Name.id), "None")) {
362 return ast_error(n, "assignment to None");
364 e->v.Name.ctx = ctx;
365 break;
366 case List_kind:
367 e->v.List.ctx = ctx;
368 s = e->v.List.elts;
369 break;
370 case Tuple_kind:
371 if (asdl_seq_LEN(e->v.Tuple.elts) == 0)
372 return ast_error(n, "can't assign to ()");
373 e->v.Tuple.ctx = ctx;
374 s = e->v.Tuple.elts;
375 break;
376 case Lambda_kind:
377 expr_name = "lambda";
378 break;
379 case Call_kind:
380 expr_name = "function call";
381 break;
382 case BoolOp_kind:
383 case BinOp_kind:
384 case UnaryOp_kind:
385 expr_name = "operator";
386 break;
387 case GeneratorExp_kind:
388 expr_name = "generator expression";
389 break;
390 case ListComp_kind:
391 expr_name = "list comprehension";
392 break;
393 case Dict_kind:
394 case Num_kind:
395 case Str_kind:
396 expr_name = "literal";
397 break;
398 case Compare_kind:
399 expr_name = "comparison";
400 break;
401 case Repr_kind:
402 expr_name = "repr";
403 break;
404 case IfExp_kind:
405 expr_name = "conditional expression";
406 break;
407 default:
408 PyErr_Format(PyExc_SystemError,
409 "unexpected expression in assignment %d (line %d)",
410 e->kind, e->lineno);
411 return 0;
413 /* Check for error string set by switch */
414 if (expr_name) {
415 char buf[300];
416 PyOS_snprintf(buf, sizeof(buf),
417 "can't %s %s",
418 ctx == Store ? "assign to" : "delete",
419 expr_name);
420 return ast_error(n, buf);
423 /* If the LHS is a list or tuple, we need to set the assignment
424 context for all the contained elements.
426 if (s) {
427 int i;
429 for (i = 0; i < asdl_seq_LEN(s); i++) {
430 if (!set_context((expr_ty)asdl_seq_GET(s, i), ctx, n))
431 return 0;
434 return 1;
437 static operator_ty
438 ast_for_augassign(const node *n)
440 REQ(n, augassign);
441 n = CHILD(n, 0);
442 switch (STR(n)[0]) {
443 case '+':
444 return Add;
445 case '-':
446 return Sub;
447 case '/':
448 if (STR(n)[1] == '/')
449 return FloorDiv;
450 else
451 return Div;
452 case '%':
453 return Mod;
454 case '<':
455 return LShift;
456 case '>':
457 return RShift;
458 case '&':
459 return BitAnd;
460 case '^':
461 return BitXor;
462 case '|':
463 return BitOr;
464 case '*':
465 if (STR(n)[1] == '*')
466 return Pow;
467 else
468 return Mult;
469 default:
470 PyErr_Format(PyExc_SystemError, "invalid augassign: %s", STR(n));
471 return (operator_ty)0;
475 static cmpop_ty
476 ast_for_comp_op(const node *n)
478 /* comp_op: '<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'
479 |'is' 'not'
481 REQ(n, comp_op);
482 if (NCH(n) == 1) {
483 n = CHILD(n, 0);
484 switch (TYPE(n)) {
485 case LESS:
486 return Lt;
487 case GREATER:
488 return Gt;
489 case EQEQUAL: /* == */
490 return Eq;
491 case LESSEQUAL:
492 return LtE;
493 case GREATEREQUAL:
494 return GtE;
495 case NOTEQUAL:
496 return NotEq;
497 case NAME:
498 if (strcmp(STR(n), "in") == 0)
499 return In;
500 if (strcmp(STR(n), "is") == 0)
501 return Is;
502 default:
503 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s",
504 STR(n));
505 return (cmpop_ty)0;
508 else if (NCH(n) == 2) {
509 /* handle "not in" and "is not" */
510 switch (TYPE(CHILD(n, 0))) {
511 case NAME:
512 if (strcmp(STR(CHILD(n, 1)), "in") == 0)
513 return NotIn;
514 if (strcmp(STR(CHILD(n, 0)), "is") == 0)
515 return IsNot;
516 default:
517 PyErr_Format(PyExc_SystemError, "invalid comp_op: %s %s",
518 STR(CHILD(n, 0)), STR(CHILD(n, 1)));
519 return (cmpop_ty)0;
522 PyErr_Format(PyExc_SystemError, "invalid comp_op: has %d children",
523 NCH(n));
524 return (cmpop_ty)0;
527 static asdl_seq *
528 seq_for_testlist(struct compiling *c, const node *n)
530 /* testlist: test (',' test)* [','] */
531 asdl_seq *seq;
532 expr_ty expression;
533 int i;
534 assert(TYPE(n) == testlist
535 || TYPE(n) == listmaker
536 || TYPE(n) == testlist_gexp
537 || TYPE(n) == testlist_safe
540 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
541 if (!seq)
542 return NULL;
544 for (i = 0; i < NCH(n); i += 2) {
545 assert(TYPE(CHILD(n, i)) == test || TYPE(CHILD(n, i)) == old_test);
547 expression = ast_for_expr(c, CHILD(n, i));
548 if (!expression)
549 return NULL;
551 assert(i / 2 < seq->size);
552 asdl_seq_SET(seq, i / 2, expression);
554 return seq;
557 static expr_ty
558 compiler_complex_args(struct compiling *c, const node *n)
560 int i, len = (NCH(n) + 1) / 2;
561 expr_ty result;
562 asdl_seq *args = asdl_seq_new(len, c->c_arena);
563 if (!args)
564 return NULL;
566 REQ(n, fplist);
567 for (i = 0; i < len; i++) {
568 const node *child = CHILD(CHILD(n, 2*i), 0);
569 expr_ty arg;
570 if (TYPE(child) == NAME) {
571 if (!strcmp(STR(child), "None")) {
572 ast_error(child, "assignment to None");
573 return NULL;
575 arg = Name(NEW_IDENTIFIER(child), Store, LINENO(child),
576 child->n_col_offset, c->c_arena);
578 else {
579 arg = compiler_complex_args(c, CHILD(CHILD(n, 2*i), 1));
581 asdl_seq_SET(args, i, arg);
584 result = Tuple(args, Store, LINENO(n), n->n_col_offset, c->c_arena);
585 if (!set_context(result, Store, n))
586 return NULL;
587 return result;
591 /* Create AST for argument list. */
593 static arguments_ty
594 ast_for_arguments(struct compiling *c, const node *n)
596 /* parameters: '(' [varargslist] ')'
597 varargslist: (fpdef ['=' test] ',')* ('*' NAME [',' '**' NAME]
598 | '**' NAME) | fpdef ['=' test] (',' fpdef ['=' test])* [',']
600 int i, j, k, n_args = 0, n_defaults = 0, found_default = 0;
601 asdl_seq *args, *defaults;
602 identifier vararg = NULL, kwarg = NULL;
603 node *ch;
605 if (TYPE(n) == parameters) {
606 if (NCH(n) == 2) /* () as argument list */
607 return arguments(NULL, NULL, NULL, NULL, c->c_arena);
608 n = CHILD(n, 1);
610 REQ(n, varargslist);
612 /* first count the number of normal args & defaults */
613 for (i = 0; i < NCH(n); i++) {
614 ch = CHILD(n, i);
615 if (TYPE(ch) == fpdef)
616 n_args++;
617 if (TYPE(ch) == EQUAL)
618 n_defaults++;
620 args = (n_args ? asdl_seq_new(n_args, c->c_arena) : NULL);
621 if (!args && n_args)
622 return NULL; /* Don't need to go to NULL; nothing allocated */
623 defaults = (n_defaults ? asdl_seq_new(n_defaults, c->c_arena) : NULL);
624 if (!defaults && n_defaults)
625 goto error;
627 /* fpdef: NAME | '(' fplist ')'
628 fplist: fpdef (',' fpdef)* [',']
630 i = 0;
631 j = 0; /* index for defaults */
632 k = 0; /* index for args */
633 while (i < NCH(n)) {
634 ch = CHILD(n, i);
635 switch (TYPE(ch)) {
636 case fpdef:
637 /* XXX Need to worry about checking if TYPE(CHILD(n, i+1)) is
638 anything other than EQUAL or a comma? */
639 /* XXX Should NCH(n) check be made a separate check? */
640 if (i + 1 < NCH(n) && TYPE(CHILD(n, i + 1)) == EQUAL) {
641 asdl_seq_SET(defaults, j++,
642 ast_for_expr(c, CHILD(n, i + 2)));
643 i += 2;
644 found_default = 1;
646 else if (found_default) {
647 ast_error(n,
648 "non-default argument follows default argument");
649 goto error;
651 if (NCH(ch) == 3) {
652 ch = CHILD(ch, 1);
653 /* def foo((x)): is not complex, special case. */
654 if (NCH(ch) != 1) {
655 /* We have complex arguments, setup for unpacking. */
656 asdl_seq_SET(args, k++, compiler_complex_args(c, ch));
657 } else {
658 /* def foo((x)): setup for checking NAME below. */
659 ch = CHILD(ch, 0);
662 if (TYPE(CHILD(ch, 0)) == NAME) {
663 expr_ty name;
664 if (!strcmp(STR(CHILD(ch, 0)), "None")) {
665 ast_error(CHILD(ch, 0), "assignment to None");
666 goto error;
668 name = Name(NEW_IDENTIFIER(CHILD(ch, 0)),
669 Param, LINENO(ch), ch->n_col_offset,
670 c->c_arena);
671 if (!name)
672 goto error;
673 asdl_seq_SET(args, k++, name);
676 i += 2; /* the name and the comma */
677 break;
678 case STAR:
679 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
680 ast_error(CHILD(n, i+1), "assignment to None");
681 goto error;
683 vararg = NEW_IDENTIFIER(CHILD(n, i+1));
684 i += 3;
685 break;
686 case DOUBLESTAR:
687 if (!strcmp(STR(CHILD(n, i+1)), "None")) {
688 ast_error(CHILD(n, i+1), "assignment to None");
689 goto error;
691 kwarg = NEW_IDENTIFIER(CHILD(n, i+1));
692 i += 3;
693 break;
694 default:
695 PyErr_Format(PyExc_SystemError,
696 "unexpected node in varargslist: %d @ %d",
697 TYPE(ch), i);
698 goto error;
702 return arguments(args, vararg, kwarg, defaults, c->c_arena);
704 error:
705 Py_XDECREF(vararg);
706 Py_XDECREF(kwarg);
707 return NULL;
710 static expr_ty
711 ast_for_dotted_name(struct compiling *c, const node *n)
713 expr_ty e;
714 identifier id;
715 int lineno, col_offset;
716 int i;
718 REQ(n, dotted_name);
720 lineno = LINENO(n);
721 col_offset = n->n_col_offset;
723 id = NEW_IDENTIFIER(CHILD(n, 0));
724 if (!id)
725 return NULL;
726 e = Name(id, Load, lineno, col_offset, c->c_arena);
727 if (!e)
728 return NULL;
730 for (i = 2; i < NCH(n); i+=2) {
731 id = NEW_IDENTIFIER(CHILD(n, i));
732 if (!id)
733 return NULL;
734 e = Attribute(e, id, Load, lineno, col_offset, c->c_arena);
735 if (!e)
736 return NULL;
739 return e;
742 static expr_ty
743 ast_for_decorator(struct compiling *c, const node *n)
745 /* decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE */
746 expr_ty d = NULL;
747 expr_ty name_expr;
749 REQ(n, decorator);
750 REQ(CHILD(n, 0), AT);
751 REQ(RCHILD(n, -1), NEWLINE);
753 name_expr = ast_for_dotted_name(c, CHILD(n, 1));
754 if (!name_expr)
755 return NULL;
757 if (NCH(n) == 3) { /* No arguments */
758 d = name_expr;
759 name_expr = NULL;
761 else if (NCH(n) == 5) { /* Call with no arguments */
762 d = Call(name_expr, NULL, NULL, NULL, NULL, LINENO(n),
763 n->n_col_offset, c->c_arena);
764 if (!d)
765 return NULL;
766 name_expr = NULL;
768 else {
769 d = ast_for_call(c, CHILD(n, 3), name_expr);
770 if (!d)
771 return NULL;
772 name_expr = NULL;
775 return d;
778 static asdl_seq*
779 ast_for_decorators(struct compiling *c, const node *n)
781 asdl_seq* decorator_seq;
782 expr_ty d;
783 int i;
785 REQ(n, decorators);
786 decorator_seq = asdl_seq_new(NCH(n), c->c_arena);
787 if (!decorator_seq)
788 return NULL;
790 for (i = 0; i < NCH(n); i++) {
791 d = ast_for_decorator(c, CHILD(n, i));
792 if (!d)
793 return NULL;
794 asdl_seq_SET(decorator_seq, i, d);
796 return decorator_seq;
799 static stmt_ty
800 ast_for_funcdef(struct compiling *c, const node *n)
802 /* funcdef: 'def' [decorators] NAME parameters ':' suite */
803 identifier name;
804 arguments_ty args;
805 asdl_seq *body;
806 asdl_seq *decorator_seq = NULL;
807 int name_i;
809 REQ(n, funcdef);
811 if (NCH(n) == 6) { /* decorators are present */
812 decorator_seq = ast_for_decorators(c, CHILD(n, 0));
813 if (!decorator_seq)
814 return NULL;
815 name_i = 2;
817 else {
818 name_i = 1;
821 name = NEW_IDENTIFIER(CHILD(n, name_i));
822 if (!name)
823 return NULL;
824 else if (!strcmp(STR(CHILD(n, name_i)), "None")) {
825 ast_error(CHILD(n, name_i), "assignment to None");
826 return NULL;
828 args = ast_for_arguments(c, CHILD(n, name_i + 1));
829 if (!args)
830 return NULL;
831 body = ast_for_suite(c, CHILD(n, name_i + 3));
832 if (!body)
833 return NULL;
835 return FunctionDef(name, args, body, decorator_seq, LINENO(n),
836 n->n_col_offset, c->c_arena);
839 static expr_ty
840 ast_for_lambdef(struct compiling *c, const node *n)
842 /* lambdef: 'lambda' [varargslist] ':' test */
843 arguments_ty args;
844 expr_ty expression;
846 if (NCH(n) == 3) {
847 args = arguments(NULL, NULL, NULL, NULL, c->c_arena);
848 if (!args)
849 return NULL;
850 expression = ast_for_expr(c, CHILD(n, 2));
851 if (!expression)
852 return NULL;
854 else {
855 args = ast_for_arguments(c, CHILD(n, 1));
856 if (!args)
857 return NULL;
858 expression = ast_for_expr(c, CHILD(n, 3));
859 if (!expression)
860 return NULL;
863 return Lambda(args, expression, LINENO(n), n->n_col_offset, c->c_arena);
866 static expr_ty
867 ast_for_ifexpr(struct compiling *c, const node *n)
869 /* test: or_test 'if' or_test 'else' test */
870 expr_ty expression, body, orelse;
872 assert(NCH(n) == 5);
873 body = ast_for_expr(c, CHILD(n, 0));
874 if (!body)
875 return NULL;
876 expression = ast_for_expr(c, CHILD(n, 2));
877 if (!expression)
878 return NULL;
879 orelse = ast_for_expr(c, CHILD(n, 4));
880 if (!orelse)
881 return NULL;
882 return IfExp(expression, body, orelse, LINENO(n), n->n_col_offset,
883 c->c_arena);
886 /* Count the number of 'for' loop in a list comprehension.
888 Helper for ast_for_listcomp().
891 static int
892 count_list_fors(const node *n)
894 int n_fors = 0;
895 node *ch = CHILD(n, 1);
897 count_list_for:
898 n_fors++;
899 REQ(ch, list_for);
900 if (NCH(ch) == 5)
901 ch = CHILD(ch, 4);
902 else
903 return n_fors;
904 count_list_iter:
905 REQ(ch, list_iter);
906 ch = CHILD(ch, 0);
907 if (TYPE(ch) == list_for)
908 goto count_list_for;
909 else if (TYPE(ch) == list_if) {
910 if (NCH(ch) == 3) {
911 ch = CHILD(ch, 2);
912 goto count_list_iter;
914 else
915 return n_fors;
918 /* Should never be reached */
919 PyErr_SetString(PyExc_SystemError, "logic error in count_list_fors");
920 return -1;
923 /* Count the number of 'if' statements in a list comprehension.
925 Helper for ast_for_listcomp().
928 static int
929 count_list_ifs(const node *n)
931 int n_ifs = 0;
933 count_list_iter:
934 REQ(n, list_iter);
935 if (TYPE(CHILD(n, 0)) == list_for)
936 return n_ifs;
937 n = CHILD(n, 0);
938 REQ(n, list_if);
939 n_ifs++;
940 if (NCH(n) == 2)
941 return n_ifs;
942 n = CHILD(n, 2);
943 goto count_list_iter;
946 static expr_ty
947 ast_for_listcomp(struct compiling *c, const node *n)
949 /* listmaker: test ( list_for | (',' test)* [','] )
950 list_for: 'for' exprlist 'in' testlist_safe [list_iter]
951 list_iter: list_for | list_if
952 list_if: 'if' test [list_iter]
953 testlist_safe: test [(',' test)+ [',']]
955 expr_ty elt;
956 asdl_seq *listcomps;
957 int i, n_fors;
958 node *ch;
960 REQ(n, listmaker);
961 assert(NCH(n) > 1);
963 elt = ast_for_expr(c, CHILD(n, 0));
964 if (!elt)
965 return NULL;
967 n_fors = count_list_fors(n);
968 if (n_fors == -1)
969 return NULL;
971 listcomps = asdl_seq_new(n_fors, c->c_arena);
972 if (!listcomps)
973 return NULL;
975 ch = CHILD(n, 1);
976 for (i = 0; i < n_fors; i++) {
977 comprehension_ty lc;
978 asdl_seq *t;
979 expr_ty expression;
981 REQ(ch, list_for);
983 t = ast_for_exprlist(c, CHILD(ch, 1), Store);
984 if (!t)
985 return NULL;
986 expression = ast_for_testlist(c, CHILD(ch, 3));
987 if (!expression)
988 return NULL;
990 if (asdl_seq_LEN(t) == 1)
991 lc = comprehension((expr_ty)asdl_seq_GET(t, 0), expression, NULL,
992 c->c_arena);
993 else
994 lc = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset,
995 c->c_arena),
996 expression, NULL, c->c_arena);
997 if (!lc)
998 return NULL;
1000 if (NCH(ch) == 5) {
1001 int j, n_ifs;
1002 asdl_seq *ifs;
1004 ch = CHILD(ch, 4);
1005 n_ifs = count_list_ifs(ch);
1006 if (n_ifs == -1)
1007 return NULL;
1009 ifs = asdl_seq_new(n_ifs, c->c_arena);
1010 if (!ifs)
1011 return NULL;
1013 for (j = 0; j < n_ifs; j++) {
1014 REQ(ch, list_iter);
1015 ch = CHILD(ch, 0);
1016 REQ(ch, list_if);
1018 asdl_seq_SET(ifs, j, ast_for_expr(c, CHILD(ch, 1)));
1019 if (NCH(ch) == 3)
1020 ch = CHILD(ch, 2);
1022 /* on exit, must guarantee that ch is a list_for */
1023 if (TYPE(ch) == list_iter)
1024 ch = CHILD(ch, 0);
1025 lc->ifs = ifs;
1027 asdl_seq_SET(listcomps, i, lc);
1030 return ListComp(elt, listcomps, LINENO(n), n->n_col_offset, c->c_arena);
1034 Count the number of 'for' loops in a generator expression.
1036 Helper for ast_for_genexp().
1039 static int
1040 count_gen_fors(const node *n)
1042 int n_fors = 0;
1043 node *ch = CHILD(n, 1);
1045 count_gen_for:
1046 n_fors++;
1047 REQ(ch, gen_for);
1048 if (NCH(ch) == 5)
1049 ch = CHILD(ch, 4);
1050 else
1051 return n_fors;
1052 count_gen_iter:
1053 REQ(ch, gen_iter);
1054 ch = CHILD(ch, 0);
1055 if (TYPE(ch) == gen_for)
1056 goto count_gen_for;
1057 else if (TYPE(ch) == gen_if) {
1058 if (NCH(ch) == 3) {
1059 ch = CHILD(ch, 2);
1060 goto count_gen_iter;
1062 else
1063 return n_fors;
1066 /* Should never be reached */
1067 PyErr_SetString(PyExc_SystemError,
1068 "logic error in count_gen_fors");
1069 return -1;
1072 /* Count the number of 'if' statements in a generator expression.
1074 Helper for ast_for_genexp().
1077 static int
1078 count_gen_ifs(const node *n)
1080 int n_ifs = 0;
1082 while (1) {
1083 REQ(n, gen_iter);
1084 if (TYPE(CHILD(n, 0)) == gen_for)
1085 return n_ifs;
1086 n = CHILD(n, 0);
1087 REQ(n, gen_if);
1088 n_ifs++;
1089 if (NCH(n) == 2)
1090 return n_ifs;
1091 n = CHILD(n, 2);
1095 /* TODO(jhylton): Combine with list comprehension code? */
1096 static expr_ty
1097 ast_for_genexp(struct compiling *c, const node *n)
1099 /* testlist_gexp: test ( gen_for | (',' test)* [','] )
1100 argument: [test '='] test [gen_for] # Really [keyword '='] test */
1101 expr_ty elt;
1102 asdl_seq *genexps;
1103 int i, n_fors;
1104 node *ch;
1106 assert(TYPE(n) == (testlist_gexp) || TYPE(n) == (argument));
1107 assert(NCH(n) > 1);
1109 elt = ast_for_expr(c, CHILD(n, 0));
1110 if (!elt)
1111 return NULL;
1113 n_fors = count_gen_fors(n);
1114 if (n_fors == -1)
1115 return NULL;
1117 genexps = asdl_seq_new(n_fors, c->c_arena);
1118 if (!genexps)
1119 return NULL;
1121 ch = CHILD(n, 1);
1122 for (i = 0; i < n_fors; i++) {
1123 comprehension_ty ge;
1124 asdl_seq *t;
1125 expr_ty expression;
1127 REQ(ch, gen_for);
1129 t = ast_for_exprlist(c, CHILD(ch, 1), Store);
1130 if (!t)
1131 return NULL;
1132 expression = ast_for_expr(c, CHILD(ch, 3));
1133 if (!expression)
1134 return NULL;
1136 if (asdl_seq_LEN(t) == 1)
1137 ge = comprehension((expr_ty)asdl_seq_GET(t, 0), expression,
1138 NULL, c->c_arena);
1139 else
1140 ge = comprehension(Tuple(t, Store, LINENO(ch), ch->n_col_offset,
1141 c->c_arena),
1142 expression, NULL, c->c_arena);
1144 if (!ge)
1145 return NULL;
1147 if (NCH(ch) == 5) {
1148 int j, n_ifs;
1149 asdl_seq *ifs;
1151 ch = CHILD(ch, 4);
1152 n_ifs = count_gen_ifs(ch);
1153 if (n_ifs == -1)
1154 return NULL;
1156 ifs = asdl_seq_new(n_ifs, c->c_arena);
1157 if (!ifs)
1158 return NULL;
1160 for (j = 0; j < n_ifs; j++) {
1161 REQ(ch, gen_iter);
1162 ch = CHILD(ch, 0);
1163 REQ(ch, gen_if);
1165 expression = ast_for_expr(c, CHILD(ch, 1));
1166 if (!expression)
1167 return NULL;
1168 asdl_seq_SET(ifs, j, expression);
1169 if (NCH(ch) == 3)
1170 ch = CHILD(ch, 2);
1172 /* on exit, must guarantee that ch is a gen_for */
1173 if (TYPE(ch) == gen_iter)
1174 ch = CHILD(ch, 0);
1175 ge->ifs = ifs;
1177 asdl_seq_SET(genexps, i, ge);
1180 return GeneratorExp(elt, genexps, LINENO(n), n->n_col_offset, c->c_arena);
1183 static expr_ty
1184 ast_for_atom(struct compiling *c, const node *n)
1186 /* atom: '(' [yield_expr|testlist_gexp] ')' | '[' [listmaker] ']'
1187 | '{' [dictmaker] '}' | '`' testlist '`' | NAME | NUMBER | STRING+
1189 node *ch = CHILD(n, 0);
1191 switch (TYPE(ch)) {
1192 case NAME:
1193 /* All names start in Load context, but may later be
1194 changed. */
1195 return Name(NEW_IDENTIFIER(ch), Load, LINENO(n), n->n_col_offset, c->c_arena);
1196 case STRING: {
1197 PyObject *str = parsestrplus(c, n);
1198 if (!str)
1199 return NULL;
1201 PyArena_AddPyObject(c->c_arena, str);
1202 return Str(str, LINENO(n), n->n_col_offset, c->c_arena);
1204 case NUMBER: {
1205 PyObject *pynum = parsenumber(STR(ch));
1206 if (!pynum)
1207 return NULL;
1209 PyArena_AddPyObject(c->c_arena, pynum);
1210 return Num(pynum, LINENO(n), n->n_col_offset, c->c_arena);
1212 case LPAR: /* some parenthesized expressions */
1213 ch = CHILD(n, 1);
1215 if (TYPE(ch) == RPAR)
1216 return Tuple(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1218 if (TYPE(ch) == yield_expr)
1219 return ast_for_expr(c, ch);
1221 if ((NCH(ch) > 1) && (TYPE(CHILD(ch, 1)) == gen_for))
1222 return ast_for_genexp(c, ch);
1224 return ast_for_testlist_gexp(c, ch);
1225 case LSQB: /* list (or list comprehension) */
1226 ch = CHILD(n, 1);
1228 if (TYPE(ch) == RSQB)
1229 return List(NULL, Load, LINENO(n), n->n_col_offset, c->c_arena);
1231 REQ(ch, listmaker);
1232 if (NCH(ch) == 1 || TYPE(CHILD(ch, 1)) == COMMA) {
1233 asdl_seq *elts = seq_for_testlist(c, ch);
1234 if (!elts)
1235 return NULL;
1237 return List(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1239 else
1240 return ast_for_listcomp(c, ch);
1241 case LBRACE: {
1242 /* dictmaker: test ':' test (',' test ':' test)* [','] */
1243 int i, size;
1244 asdl_seq *keys, *values;
1246 ch = CHILD(n, 1);
1247 size = (NCH(ch) + 1) / 4; /* +1 in case no trailing comma */
1248 keys = asdl_seq_new(size, c->c_arena);
1249 if (!keys)
1250 return NULL;
1252 values = asdl_seq_new(size, c->c_arena);
1253 if (!values)
1254 return NULL;
1256 for (i = 0; i < NCH(ch); i += 4) {
1257 expr_ty expression;
1259 expression = ast_for_expr(c, CHILD(ch, i));
1260 if (!expression)
1261 return NULL;
1263 asdl_seq_SET(keys, i / 4, expression);
1265 expression = ast_for_expr(c, CHILD(ch, i + 2));
1266 if (!expression)
1267 return NULL;
1269 asdl_seq_SET(values, i / 4, expression);
1271 return Dict(keys, values, LINENO(n), n->n_col_offset, c->c_arena);
1273 case BACKQUOTE: { /* repr */
1274 expr_ty expression = ast_for_testlist(c, CHILD(n, 1));
1275 if (!expression)
1276 return NULL;
1278 return Repr(expression, LINENO(n), n->n_col_offset, c->c_arena);
1280 default:
1281 PyErr_Format(PyExc_SystemError, "unhandled atom %d", TYPE(ch));
1282 return NULL;
1286 static slice_ty
1287 ast_for_slice(struct compiling *c, const node *n)
1289 node *ch;
1290 expr_ty lower = NULL, upper = NULL, step = NULL;
1292 REQ(n, subscript);
1295 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1296 sliceop: ':' [test]
1298 ch = CHILD(n, 0);
1299 if (TYPE(ch) == DOT)
1300 return Ellipsis(c->c_arena);
1302 if (NCH(n) == 1 && TYPE(ch) == test) {
1303 /* 'step' variable hold no significance in terms of being used over
1304 other vars */
1305 step = ast_for_expr(c, ch);
1306 if (!step)
1307 return NULL;
1309 return Index(step, c->c_arena);
1312 if (TYPE(ch) == test) {
1313 lower = ast_for_expr(c, ch);
1314 if (!lower)
1315 return NULL;
1318 /* If there's an upper bound it's in the second or third position. */
1319 if (TYPE(ch) == COLON) {
1320 if (NCH(n) > 1) {
1321 node *n2 = CHILD(n, 1);
1323 if (TYPE(n2) == test) {
1324 upper = ast_for_expr(c, n2);
1325 if (!upper)
1326 return NULL;
1329 } else if (NCH(n) > 2) {
1330 node *n2 = CHILD(n, 2);
1332 if (TYPE(n2) == test) {
1333 upper = ast_for_expr(c, n2);
1334 if (!upper)
1335 return NULL;
1339 ch = CHILD(n, NCH(n) - 1);
1340 if (TYPE(ch) == sliceop) {
1341 if (NCH(ch) == 1) {
1342 /* No expression, so step is None */
1343 ch = CHILD(ch, 0);
1344 step = Name(new_identifier("None", c->c_arena), Load,
1345 LINENO(ch), ch->n_col_offset, c->c_arena);
1346 if (!step)
1347 return NULL;
1348 } else {
1349 ch = CHILD(ch, 1);
1350 if (TYPE(ch) == test) {
1351 step = ast_for_expr(c, ch);
1352 if (!step)
1353 return NULL;
1358 return Slice(lower, upper, step, c->c_arena);
1361 static expr_ty
1362 ast_for_binop(struct compiling *c, const node *n)
1364 /* Must account for a sequence of expressions.
1365 How should A op B op C by represented?
1366 BinOp(BinOp(A, op, B), op, C).
1369 int i, nops;
1370 expr_ty expr1, expr2, result;
1371 operator_ty newoperator;
1373 expr1 = ast_for_expr(c, CHILD(n, 0));
1374 if (!expr1)
1375 return NULL;
1377 expr2 = ast_for_expr(c, CHILD(n, 2));
1378 if (!expr2)
1379 return NULL;
1381 newoperator = get_operator(CHILD(n, 1));
1382 if (!newoperator)
1383 return NULL;
1385 result = BinOp(expr1, newoperator, expr2, LINENO(n), n->n_col_offset,
1386 c->c_arena);
1387 if (!result)
1388 return NULL;
1390 nops = (NCH(n) - 1) / 2;
1391 for (i = 1; i < nops; i++) {
1392 expr_ty tmp_result, tmp;
1393 const node* next_oper = CHILD(n, i * 2 + 1);
1395 newoperator = get_operator(next_oper);
1396 if (!newoperator)
1397 return NULL;
1399 tmp = ast_for_expr(c, CHILD(n, i * 2 + 2));
1400 if (!tmp)
1401 return NULL;
1403 tmp_result = BinOp(result, newoperator, tmp,
1404 LINENO(next_oper), next_oper->n_col_offset,
1405 c->c_arena);
1406 if (!tmp)
1407 return NULL;
1408 result = tmp_result;
1410 return result;
1413 static expr_ty
1414 ast_for_trailer(struct compiling *c, const node *n, expr_ty left_expr)
1416 /* trailer: '(' [arglist] ')' | '[' subscriptlist ']' | '.' NAME
1417 subscriptlist: subscript (',' subscript)* [',']
1418 subscript: '.' '.' '.' | test | [test] ':' [test] [sliceop]
1420 REQ(n, trailer);
1421 if (TYPE(CHILD(n, 0)) == LPAR) {
1422 if (NCH(n) == 2)
1423 return Call(left_expr, NULL, NULL, NULL, NULL, LINENO(n),
1424 n->n_col_offset, c->c_arena);
1425 else
1426 return ast_for_call(c, CHILD(n, 1), left_expr);
1428 else if (TYPE(CHILD(n, 0)) == DOT ) {
1429 return Attribute(left_expr, NEW_IDENTIFIER(CHILD(n, 1)), Load,
1430 LINENO(n), n->n_col_offset, c->c_arena);
1432 else {
1433 REQ(CHILD(n, 0), LSQB);
1434 REQ(CHILD(n, 2), RSQB);
1435 n = CHILD(n, 1);
1436 if (NCH(n) == 1) {
1437 slice_ty slc = ast_for_slice(c, CHILD(n, 0));
1438 if (!slc)
1439 return NULL;
1440 return Subscript(left_expr, slc, Load, LINENO(n), n->n_col_offset,
1441 c->c_arena);
1443 else {
1444 /* The grammar is ambiguous here. The ambiguity is resolved
1445 by treating the sequence as a tuple literal if there are
1446 no slice features.
1448 int j;
1449 slice_ty slc;
1450 expr_ty e;
1451 bool simple = true;
1452 asdl_seq *slices, *elts;
1453 slices = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1454 if (!slices)
1455 return NULL;
1456 for (j = 0; j < NCH(n); j += 2) {
1457 slc = ast_for_slice(c, CHILD(n, j));
1458 if (!slc)
1459 return NULL;
1460 if (slc->kind != Index_kind)
1461 simple = false;
1462 asdl_seq_SET(slices, j / 2, slc);
1464 if (!simple) {
1465 return Subscript(left_expr, ExtSlice(slices, c->c_arena),
1466 Load, LINENO(n), n->n_col_offset, c->c_arena);
1468 /* extract Index values and put them in a Tuple */
1469 elts = asdl_seq_new(asdl_seq_LEN(slices), c->c_arena);
1470 if (!elts)
1471 return NULL;
1472 for (j = 0; j < asdl_seq_LEN(slices); ++j) {
1473 slc = (slice_ty)asdl_seq_GET(slices, j);
1474 assert(slc->kind == Index_kind && slc->v.Index.value);
1475 asdl_seq_SET(elts, j, slc->v.Index.value);
1477 e = Tuple(elts, Load, LINENO(n), n->n_col_offset, c->c_arena);
1478 if (!e)
1479 return NULL;
1480 return Subscript(left_expr, Index(e, c->c_arena),
1481 Load, LINENO(n), n->n_col_offset, c->c_arena);
1486 static expr_ty
1487 ast_for_power(struct compiling *c, const node *n)
1489 /* power: atom trailer* ('**' factor)*
1491 int i;
1492 expr_ty e, tmp;
1493 REQ(n, power);
1494 e = ast_for_atom(c, CHILD(n, 0));
1495 if (!e)
1496 return NULL;
1497 if (NCH(n) == 1)
1498 return e;
1499 for (i = 1; i < NCH(n); i++) {
1500 node *ch = CHILD(n, i);
1501 if (TYPE(ch) != trailer)
1502 break;
1503 tmp = ast_for_trailer(c, ch, e);
1504 if (!tmp)
1505 return NULL;
1506 tmp->lineno = e->lineno;
1507 tmp->col_offset = e->col_offset;
1508 e = tmp;
1510 if (TYPE(CHILD(n, NCH(n) - 1)) == factor) {
1511 expr_ty f = ast_for_expr(c, CHILD(n, NCH(n) - 1));
1512 if (!f)
1513 return NULL;
1514 tmp = BinOp(e, Pow, f, LINENO(n), n->n_col_offset, c->c_arena);
1515 if (!tmp)
1516 return NULL;
1517 e = tmp;
1519 return e;
1522 /* Do not name a variable 'expr'! Will cause a compile error.
1525 static expr_ty
1526 ast_for_expr(struct compiling *c, const node *n)
1528 /* handle the full range of simple expressions
1529 test: or_test ['if' or_test 'else' test] | lambdef
1530 or_test: and_test ('or' and_test)*
1531 and_test: not_test ('and' not_test)*
1532 not_test: 'not' not_test | comparison
1533 comparison: expr (comp_op expr)*
1534 expr: xor_expr ('|' xor_expr)*
1535 xor_expr: and_expr ('^' and_expr)*
1536 and_expr: shift_expr ('&' shift_expr)*
1537 shift_expr: arith_expr (('<<'|'>>') arith_expr)*
1538 arith_expr: term (('+'|'-') term)*
1539 term: factor (('*'|'/'|'%'|'//') factor)*
1540 factor: ('+'|'-'|'~') factor | power
1541 power: atom trailer* ('**' factor)*
1543 As well as modified versions that exist for backward compatibility,
1544 to explicitly allow:
1545 [ x for x in lambda: 0, lambda: 1 ]
1546 (which would be ambiguous without these extra rules)
1548 old_test: or_test | old_lambdef
1549 old_lambdef: 'lambda' [vararglist] ':' old_test
1553 asdl_seq *seq;
1554 int i;
1556 loop:
1557 switch (TYPE(n)) {
1558 case test:
1559 case old_test:
1560 if (TYPE(CHILD(n, 0)) == lambdef ||
1561 TYPE(CHILD(n, 0)) == old_lambdef)
1562 return ast_for_lambdef(c, CHILD(n, 0));
1563 else if (NCH(n) > 1)
1564 return ast_for_ifexpr(c, n);
1565 /* Fallthrough */
1566 case or_test:
1567 case and_test:
1568 if (NCH(n) == 1) {
1569 n = CHILD(n, 0);
1570 goto loop;
1572 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
1573 if (!seq)
1574 return NULL;
1575 for (i = 0; i < NCH(n); i += 2) {
1576 expr_ty e = ast_for_expr(c, CHILD(n, i));
1577 if (!e)
1578 return NULL;
1579 asdl_seq_SET(seq, i / 2, e);
1581 if (!strcmp(STR(CHILD(n, 1)), "and"))
1582 return BoolOp(And, seq, LINENO(n), n->n_col_offset,
1583 c->c_arena);
1584 assert(!strcmp(STR(CHILD(n, 1)), "or"));
1585 return BoolOp(Or, seq, LINENO(n), n->n_col_offset, c->c_arena);
1586 case not_test:
1587 if (NCH(n) == 1) {
1588 n = CHILD(n, 0);
1589 goto loop;
1591 else {
1592 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
1593 if (!expression)
1594 return NULL;
1596 return UnaryOp(Not, expression, LINENO(n), n->n_col_offset,
1597 c->c_arena);
1599 case comparison:
1600 if (NCH(n) == 1) {
1601 n = CHILD(n, 0);
1602 goto loop;
1604 else {
1605 expr_ty expression;
1606 asdl_int_seq *ops;
1607 asdl_seq *cmps;
1608 ops = asdl_int_seq_new(NCH(n) / 2, c->c_arena);
1609 if (!ops)
1610 return NULL;
1611 cmps = asdl_seq_new(NCH(n) / 2, c->c_arena);
1612 if (!cmps) {
1613 return NULL;
1615 for (i = 1; i < NCH(n); i += 2) {
1616 cmpop_ty newoperator;
1618 newoperator = ast_for_comp_op(CHILD(n, i));
1619 if (!newoperator) {
1620 return NULL;
1623 expression = ast_for_expr(c, CHILD(n, i + 1));
1624 if (!expression) {
1625 return NULL;
1628 asdl_seq_SET(ops, i / 2, newoperator);
1629 asdl_seq_SET(cmps, i / 2, expression);
1631 expression = ast_for_expr(c, CHILD(n, 0));
1632 if (!expression) {
1633 return NULL;
1636 return Compare(expression, ops, cmps, LINENO(n),
1637 n->n_col_offset, c->c_arena);
1639 break;
1641 /* The next five cases all handle BinOps. The main body of code
1642 is the same in each case, but the switch turned inside out to
1643 reuse the code for each type of operator.
1645 case expr:
1646 case xor_expr:
1647 case and_expr:
1648 case shift_expr:
1649 case arith_expr:
1650 case term:
1651 if (NCH(n) == 1) {
1652 n = CHILD(n, 0);
1653 goto loop;
1655 return ast_for_binop(c, n);
1656 case yield_expr: {
1657 expr_ty exp = NULL;
1658 if (NCH(n) == 2) {
1659 exp = ast_for_testlist(c, CHILD(n, 1));
1660 if (!exp)
1661 return NULL;
1663 return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena);
1665 case factor: {
1666 expr_ty expression;
1668 if (NCH(n) == 1) {
1669 n = CHILD(n, 0);
1670 goto loop;
1673 expression = ast_for_expr(c, CHILD(n, 1));
1674 if (!expression)
1675 return NULL;
1677 switch (TYPE(CHILD(n, 0))) {
1678 case PLUS:
1679 return UnaryOp(UAdd, expression, LINENO(n), n->n_col_offset, c->c_arena);
1680 case MINUS:
1681 return UnaryOp(USub, expression, LINENO(n), n->n_col_offset, c->c_arena);
1682 case TILDE:
1683 return UnaryOp(Invert, expression, LINENO(n), n->n_col_offset, c->c_arena);
1685 PyErr_Format(PyExc_SystemError, "unhandled factor: %d",
1686 TYPE(CHILD(n, 0)));
1687 break;
1689 case power:
1690 return ast_for_power(c, n);
1691 default:
1692 PyErr_Format(PyExc_SystemError, "unhandled expr: %d", TYPE(n));
1693 return NULL;
1695 /* should never get here unless if error is set */
1696 return NULL;
1699 static expr_ty
1700 ast_for_call(struct compiling *c, const node *n, expr_ty func)
1703 arglist: (argument ',')* (argument [',']| '*' test [',' '**' test]
1704 | '**' test)
1705 argument: [test '='] test [gen_for] # Really [keyword '='] test
1708 int i, nargs, nkeywords, ngens;
1709 asdl_seq *args;
1710 asdl_seq *keywords;
1711 expr_ty vararg = NULL, kwarg = NULL;
1713 REQ(n, arglist);
1715 nargs = 0;
1716 nkeywords = 0;
1717 ngens = 0;
1718 for (i = 0; i < NCH(n); i++) {
1719 node *ch = CHILD(n, i);
1720 if (TYPE(ch) == argument) {
1721 if (NCH(ch) == 1)
1722 nargs++;
1723 else if (TYPE(CHILD(ch, 1)) == gen_for)
1724 ngens++;
1725 else
1726 nkeywords++;
1729 if (ngens > 1 || (ngens && (nargs || nkeywords))) {
1730 ast_error(n, "Generator expression must be parenthesized "
1731 "if not sole argument");
1732 return NULL;
1735 if (nargs + nkeywords + ngens > 255) {
1736 ast_error(n, "more than 255 arguments");
1737 return NULL;
1740 args = asdl_seq_new(nargs + ngens, c->c_arena);
1741 if (!args)
1742 return NULL;
1743 keywords = asdl_seq_new(nkeywords, c->c_arena);
1744 if (!keywords)
1745 return NULL;
1746 nargs = 0;
1747 nkeywords = 0;
1748 for (i = 0; i < NCH(n); i++) {
1749 node *ch = CHILD(n, i);
1750 if (TYPE(ch) == argument) {
1751 expr_ty e;
1752 if (NCH(ch) == 1) {
1753 if (nkeywords) {
1754 ast_error(CHILD(ch, 0),
1755 "non-keyword arg after keyword arg");
1756 return NULL;
1758 e = ast_for_expr(c, CHILD(ch, 0));
1759 if (!e)
1760 return NULL;
1761 asdl_seq_SET(args, nargs++, e);
1763 else if (TYPE(CHILD(ch, 1)) == gen_for) {
1764 e = ast_for_genexp(c, ch);
1765 if (!e)
1766 return NULL;
1767 asdl_seq_SET(args, nargs++, e);
1769 else {
1770 keyword_ty kw;
1771 identifier key;
1773 /* CHILD(ch, 0) is test, but must be an identifier? */
1774 e = ast_for_expr(c, CHILD(ch, 0));
1775 if (!e)
1776 return NULL;
1777 /* f(lambda x: x[0] = 3) ends up getting parsed with
1778 * LHS test = lambda x: x[0], and RHS test = 3.
1779 * SF bug 132313 points out that complaining about a keyword
1780 * then is very confusing.
1782 if (e->kind == Lambda_kind) {
1783 ast_error(CHILD(ch, 0), "lambda cannot contain assignment");
1784 return NULL;
1785 } else if (e->kind != Name_kind) {
1786 ast_error(CHILD(ch, 0), "keyword can't be an expression");
1787 return NULL;
1789 key = e->v.Name.id;
1790 e = ast_for_expr(c, CHILD(ch, 2));
1791 if (!e)
1792 return NULL;
1793 kw = keyword(key, e, c->c_arena);
1794 if (!kw)
1795 return NULL;
1796 asdl_seq_SET(keywords, nkeywords++, kw);
1799 else if (TYPE(ch) == STAR) {
1800 vararg = ast_for_expr(c, CHILD(n, i+1));
1801 i++;
1803 else if (TYPE(ch) == DOUBLESTAR) {
1804 kwarg = ast_for_expr(c, CHILD(n, i+1));
1805 i++;
1809 return Call(func, args, keywords, vararg, kwarg, func->lineno, func->col_offset, c->c_arena);
1812 static expr_ty
1813 ast_for_testlist(struct compiling *c, const node* n)
1815 /* testlist_gexp: test (',' test)* [','] */
1816 /* testlist: test (',' test)* [','] */
1817 /* testlist_safe: test (',' test)+ [','] */
1818 /* testlist1: test (',' test)* */
1819 assert(NCH(n) > 0);
1820 if (TYPE(n) == testlist_gexp) {
1821 if (NCH(n) > 1)
1822 assert(TYPE(CHILD(n, 1)) != gen_for);
1824 else {
1825 assert(TYPE(n) == testlist ||
1826 TYPE(n) == testlist_safe ||
1827 TYPE(n) == testlist1);
1829 if (NCH(n) == 1)
1830 return ast_for_expr(c, CHILD(n, 0));
1831 else {
1832 asdl_seq *tmp = seq_for_testlist(c, n);
1833 if (!tmp)
1834 return NULL;
1835 return Tuple(tmp, Load, LINENO(n), n->n_col_offset, c->c_arena);
1839 static expr_ty
1840 ast_for_testlist_gexp(struct compiling *c, const node* n)
1842 /* testlist_gexp: test ( gen_for | (',' test)* [','] ) */
1843 /* argument: test [ gen_for ] */
1844 assert(TYPE(n) == testlist_gexp || TYPE(n) == argument);
1845 if (NCH(n) > 1 && TYPE(CHILD(n, 1)) == gen_for)
1846 return ast_for_genexp(c, n);
1847 return ast_for_testlist(c, n);
1850 /* like ast_for_testlist() but returns a sequence */
1851 static asdl_seq*
1852 ast_for_class_bases(struct compiling *c, const node* n)
1854 /* testlist: test (',' test)* [','] */
1855 assert(NCH(n) > 0);
1856 REQ(n, testlist);
1857 if (NCH(n) == 1) {
1858 expr_ty base;
1859 asdl_seq *bases = asdl_seq_new(1, c->c_arena);
1860 if (!bases)
1861 return NULL;
1862 base = ast_for_expr(c, CHILD(n, 0));
1863 if (!base)
1864 return NULL;
1865 asdl_seq_SET(bases, 0, base);
1866 return bases;
1869 return seq_for_testlist(c, n);
1872 static stmt_ty
1873 ast_for_expr_stmt(struct compiling *c, const node *n)
1875 REQ(n, expr_stmt);
1876 /* expr_stmt: testlist (augassign (yield_expr|testlist)
1877 | ('=' (yield_expr|testlist))*)
1878 testlist: test (',' test)* [',']
1879 augassign: '+=' | '-=' | '*=' | '/=' | '%=' | '&=' | '|=' | '^='
1880 | '<<=' | '>>=' | '**=' | '//='
1881 test: ... here starts the operator precendence dance
1884 if (NCH(n) == 1) {
1885 expr_ty e = ast_for_testlist(c, CHILD(n, 0));
1886 if (!e)
1887 return NULL;
1889 return Expr(e, LINENO(n), n->n_col_offset, c->c_arena);
1891 else if (TYPE(CHILD(n, 1)) == augassign) {
1892 expr_ty expr1, expr2;
1893 operator_ty newoperator;
1894 node *ch = CHILD(n, 0);
1896 if (TYPE(ch) == testlist)
1897 expr1 = ast_for_testlist(c, ch);
1898 else
1899 expr1 = Yield(ast_for_expr(c, CHILD(ch, 0)), LINENO(ch), n->n_col_offset,
1900 c->c_arena);
1902 if (!expr1)
1903 return NULL;
1904 /* TODO(jhylton): Figure out why set_context() can't be used here. */
1905 switch (expr1->kind) {
1906 case GeneratorExp_kind:
1907 ast_error(ch, "augmented assignment to generator "
1908 "expression not possible");
1909 return NULL;
1910 case Name_kind: {
1911 const char *var_name = PyString_AS_STRING(expr1->v.Name.id);
1912 if (var_name[0] == 'N' && !strcmp(var_name, "None")) {
1913 ast_error(ch, "assignment to None");
1914 return NULL;
1916 break;
1918 case Attribute_kind:
1919 case Subscript_kind:
1920 break;
1921 default:
1922 ast_error(ch, "illegal expression for augmented "
1923 "assignment");
1924 return NULL;
1927 ch = CHILD(n, 2);
1928 if (TYPE(ch) == testlist)
1929 expr2 = ast_for_testlist(c, ch);
1930 else
1931 expr2 = Yield(ast_for_expr(c, ch), LINENO(ch), ch->n_col_offset, c->c_arena);
1932 if (!expr2)
1933 return NULL;
1935 newoperator = ast_for_augassign(CHILD(n, 1));
1936 if (!newoperator)
1937 return NULL;
1939 return AugAssign(expr1, newoperator, expr2, LINENO(n), n->n_col_offset, c->c_arena);
1941 else {
1942 int i;
1943 asdl_seq *targets;
1944 node *value;
1945 expr_ty expression;
1947 /* a normal assignment */
1948 REQ(CHILD(n, 1), EQUAL);
1949 targets = asdl_seq_new(NCH(n) / 2, c->c_arena);
1950 if (!targets)
1951 return NULL;
1952 for (i = 0; i < NCH(n) - 2; i += 2) {
1953 expr_ty e;
1954 node *ch = CHILD(n, i);
1955 if (TYPE(ch) == yield_expr) {
1956 ast_error(ch, "assignment to yield expression not possible");
1957 return NULL;
1959 e = ast_for_testlist(c, ch);
1961 /* set context to assign */
1962 if (!e)
1963 return NULL;
1965 if (!set_context(e, Store, CHILD(n, i)))
1966 return NULL;
1968 asdl_seq_SET(targets, i / 2, e);
1970 value = CHILD(n, NCH(n) - 1);
1971 if (TYPE(value) == testlist)
1972 expression = ast_for_testlist(c, value);
1973 else
1974 expression = ast_for_expr(c, value);
1975 if (!expression)
1976 return NULL;
1977 return Assign(targets, expression, LINENO(n), n->n_col_offset, c->c_arena);
1981 static stmt_ty
1982 ast_for_print_stmt(struct compiling *c, const node *n)
1984 /* print_stmt: 'print' ( [ test (',' test)* [','] ]
1985 | '>>' test [ (',' test)+ [','] ] )
1987 expr_ty dest = NULL, expression;
1988 asdl_seq *seq;
1989 bool nl;
1990 int i, j, start = 1;
1992 REQ(n, print_stmt);
1993 if (NCH(n) >= 2 && TYPE(CHILD(n, 1)) == RIGHTSHIFT) {
1994 dest = ast_for_expr(c, CHILD(n, 2));
1995 if (!dest)
1996 return NULL;
1997 start = 4;
1999 seq = asdl_seq_new((NCH(n) + 1 - start) / 2, c->c_arena);
2000 if (!seq)
2001 return NULL;
2002 for (i = start, j = 0; i < NCH(n); i += 2, ++j) {
2003 expression = ast_for_expr(c, CHILD(n, i));
2004 if (!expression)
2005 return NULL;
2006 asdl_seq_SET(seq, j, expression);
2008 nl = (TYPE(CHILD(n, NCH(n) - 1)) == COMMA) ? false : true;
2009 return Print(dest, seq, nl, LINENO(n), n->n_col_offset, c->c_arena);
2012 static asdl_seq *
2013 ast_for_exprlist(struct compiling *c, const node *n, expr_context_ty context)
2015 asdl_seq *seq;
2016 int i;
2017 expr_ty e;
2019 REQ(n, exprlist);
2021 seq = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2022 if (!seq)
2023 return NULL;
2024 for (i = 0; i < NCH(n); i += 2) {
2025 e = ast_for_expr(c, CHILD(n, i));
2026 if (!e)
2027 return NULL;
2028 asdl_seq_SET(seq, i / 2, e);
2029 if (context && !set_context(e, context, CHILD(n, i)))
2030 return NULL;
2032 return seq;
2035 static stmt_ty
2036 ast_for_del_stmt(struct compiling *c, const node *n)
2038 asdl_seq *expr_list;
2040 /* del_stmt: 'del' exprlist */
2041 REQ(n, del_stmt);
2043 expr_list = ast_for_exprlist(c, CHILD(n, 1), Del);
2044 if (!expr_list)
2045 return NULL;
2046 return Delete(expr_list, LINENO(n), n->n_col_offset, c->c_arena);
2049 static stmt_ty
2050 ast_for_flow_stmt(struct compiling *c, const node *n)
2053 flow_stmt: break_stmt | continue_stmt | return_stmt | raise_stmt
2054 | yield_stmt
2055 break_stmt: 'break'
2056 continue_stmt: 'continue'
2057 return_stmt: 'return' [testlist]
2058 yield_stmt: yield_expr
2059 yield_expr: 'yield' testlist
2060 raise_stmt: 'raise' [test [',' test [',' test]]]
2062 node *ch;
2064 REQ(n, flow_stmt);
2065 ch = CHILD(n, 0);
2066 switch (TYPE(ch)) {
2067 case break_stmt:
2068 return Break(LINENO(n), n->n_col_offset, c->c_arena);
2069 case continue_stmt:
2070 return Continue(LINENO(n), n->n_col_offset, c->c_arena);
2071 case yield_stmt: { /* will reduce to yield_expr */
2072 expr_ty exp = ast_for_expr(c, CHILD(ch, 0));
2073 if (!exp)
2074 return NULL;
2075 return Expr(exp, LINENO(n), n->n_col_offset, c->c_arena);
2077 case return_stmt:
2078 if (NCH(ch) == 1)
2079 return Return(NULL, LINENO(n), n->n_col_offset, c->c_arena);
2080 else {
2081 expr_ty expression = ast_for_testlist(c, CHILD(ch, 1));
2082 if (!expression)
2083 return NULL;
2084 return Return(expression, LINENO(n), n->n_col_offset, c->c_arena);
2086 case raise_stmt:
2087 if (NCH(ch) == 1)
2088 return Raise(NULL, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2089 else if (NCH(ch) == 2) {
2090 expr_ty expression = ast_for_expr(c, CHILD(ch, 1));
2091 if (!expression)
2092 return NULL;
2093 return Raise(expression, NULL, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2095 else if (NCH(ch) == 4) {
2096 expr_ty expr1, expr2;
2098 expr1 = ast_for_expr(c, CHILD(ch, 1));
2099 if (!expr1)
2100 return NULL;
2101 expr2 = ast_for_expr(c, CHILD(ch, 3));
2102 if (!expr2)
2103 return NULL;
2105 return Raise(expr1, expr2, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2107 else if (NCH(ch) == 6) {
2108 expr_ty expr1, expr2, expr3;
2110 expr1 = ast_for_expr(c, CHILD(ch, 1));
2111 if (!expr1)
2112 return NULL;
2113 expr2 = ast_for_expr(c, CHILD(ch, 3));
2114 if (!expr2)
2115 return NULL;
2116 expr3 = ast_for_expr(c, CHILD(ch, 5));
2117 if (!expr3)
2118 return NULL;
2120 return Raise(expr1, expr2, expr3, LINENO(n), n->n_col_offset, c->c_arena);
2122 default:
2123 PyErr_Format(PyExc_SystemError,
2124 "unexpected flow_stmt: %d", TYPE(ch));
2125 return NULL;
2128 PyErr_SetString(PyExc_SystemError, "unhandled flow statement");
2129 return NULL;
2132 static alias_ty
2133 alias_for_import_name(struct compiling *c, const node *n)
2136 import_as_name: NAME ['as' NAME]
2137 dotted_as_name: dotted_name ['as' NAME]
2138 dotted_name: NAME ('.' NAME)*
2140 PyObject *str;
2142 loop:
2143 switch (TYPE(n)) {
2144 case import_as_name:
2145 str = (NCH(n) == 3) ? NEW_IDENTIFIER(CHILD(n, 2)) : NULL;
2146 return alias(NEW_IDENTIFIER(CHILD(n, 0)), str, c->c_arena);
2147 case dotted_as_name:
2148 if (NCH(n) == 1) {
2149 n = CHILD(n, 0);
2150 goto loop;
2152 else {
2153 alias_ty a = alias_for_import_name(c, CHILD(n, 0));
2154 assert(!a->asname);
2155 a->asname = NEW_IDENTIFIER(CHILD(n, 2));
2156 return a;
2158 break;
2159 case dotted_name:
2160 if (NCH(n) == 1)
2161 return alias(NEW_IDENTIFIER(CHILD(n, 0)), NULL, c->c_arena);
2162 else {
2163 /* Create a string of the form "a.b.c" */
2164 int i;
2165 size_t len;
2166 char *s;
2168 len = 0;
2169 for (i = 0; i < NCH(n); i += 2)
2170 /* length of string plus one for the dot */
2171 len += strlen(STR(CHILD(n, i))) + 1;
2172 len--; /* the last name doesn't have a dot */
2173 str = PyString_FromStringAndSize(NULL, len);
2174 if (!str)
2175 return NULL;
2176 s = PyString_AS_STRING(str);
2177 if (!s)
2178 return NULL;
2179 for (i = 0; i < NCH(n); i += 2) {
2180 char *sch = STR(CHILD(n, i));
2181 strcpy(s, STR(CHILD(n, i)));
2182 s += strlen(sch);
2183 *s++ = '.';
2185 --s;
2186 *s = '\0';
2187 PyString_InternInPlace(&str);
2188 PyArena_AddPyObject(c->c_arena, str);
2189 return alias(str, NULL, c->c_arena);
2191 break;
2192 case STAR:
2193 str = PyString_InternFromString("*");
2194 PyArena_AddPyObject(c->c_arena, str);
2195 return alias(str, NULL, c->c_arena);
2196 default:
2197 PyErr_Format(PyExc_SystemError,
2198 "unexpected import name: %d", TYPE(n));
2199 return NULL;
2202 PyErr_SetString(PyExc_SystemError, "unhandled import name condition");
2203 return NULL;
2206 static stmt_ty
2207 ast_for_import_stmt(struct compiling *c, const node *n)
2210 import_stmt: import_name | import_from
2211 import_name: 'import' dotted_as_names
2212 import_from: 'from' ('.'* dotted_name | '.') 'import'
2213 ('*' | '(' import_as_names ')' | import_as_names)
2215 int lineno;
2216 int col_offset;
2217 int i;
2218 asdl_seq *aliases;
2220 REQ(n, import_stmt);
2221 lineno = LINENO(n);
2222 col_offset = n->n_col_offset;
2223 n = CHILD(n, 0);
2224 if (TYPE(n) == import_name) {
2225 n = CHILD(n, 1);
2226 REQ(n, dotted_as_names);
2227 aliases = asdl_seq_new((NCH(n) + 1) / 2, c->c_arena);
2228 if (!aliases)
2229 return NULL;
2230 for (i = 0; i < NCH(n); i += 2) {
2231 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2232 if (!import_alias)
2233 return NULL;
2234 asdl_seq_SET(aliases, i / 2, import_alias);
2236 return Import(aliases, lineno, col_offset, c->c_arena);
2238 else if (TYPE(n) == import_from) {
2239 int n_children;
2240 int idx, ndots = 0;
2241 alias_ty mod = NULL;
2242 identifier modname;
2244 /* Count the number of dots (for relative imports) and check for the
2245 optional module name */
2246 for (idx = 1; idx < NCH(n); idx++) {
2247 if (TYPE(CHILD(n, idx)) == dotted_name) {
2248 mod = alias_for_import_name(c, CHILD(n, idx));
2249 idx++;
2250 break;
2251 } else if (TYPE(CHILD(n, idx)) != DOT) {
2252 break;
2254 ndots++;
2256 idx++; /* skip over the 'import' keyword */
2257 switch (TYPE(CHILD(n, idx))) {
2258 case STAR:
2259 /* from ... import * */
2260 n = CHILD(n, idx);
2261 n_children = 1;
2262 if (ndots) {
2263 ast_error(n, "'import *' not allowed with 'from .'");
2264 return NULL;
2266 break;
2267 case LPAR:
2268 /* from ... import (x, y, z) */
2269 n = CHILD(n, idx + 1);
2270 n_children = NCH(n);
2271 break;
2272 case import_as_names:
2273 /* from ... import x, y, z */
2274 n = CHILD(n, idx);
2275 n_children = NCH(n);
2276 if (n_children % 2 == 0) {
2277 ast_error(n, "trailing comma not allowed without"
2278 " surrounding parentheses");
2279 return NULL;
2281 break;
2282 default:
2283 ast_error(n, "Unexpected node-type in from-import");
2284 return NULL;
2287 aliases = asdl_seq_new((n_children + 1) / 2, c->c_arena);
2288 if (!aliases)
2289 return NULL;
2291 /* handle "from ... import *" special b/c there's no children */
2292 if (TYPE(n) == STAR) {
2293 alias_ty import_alias = alias_for_import_name(c, n);
2294 if (!import_alias)
2295 return NULL;
2296 asdl_seq_SET(aliases, 0, import_alias);
2298 else {
2299 for (i = 0; i < NCH(n); i += 2) {
2300 alias_ty import_alias = alias_for_import_name(c, CHILD(n, i));
2301 if (!import_alias)
2302 return NULL;
2303 asdl_seq_SET(aliases, i / 2, import_alias);
2306 if (mod != NULL)
2307 modname = mod->name;
2308 else
2309 modname = new_identifier("", c->c_arena);
2310 return ImportFrom(modname, aliases, ndots, lineno, col_offset,
2311 c->c_arena);
2313 PyErr_Format(PyExc_SystemError,
2314 "unknown import statement: starts with command '%s'",
2315 STR(CHILD(n, 0)));
2316 return NULL;
2319 static stmt_ty
2320 ast_for_global_stmt(struct compiling *c, const node *n)
2322 /* global_stmt: 'global' NAME (',' NAME)* */
2323 identifier name;
2324 asdl_seq *s;
2325 int i;
2327 REQ(n, global_stmt);
2328 s = asdl_seq_new(NCH(n) / 2, c->c_arena);
2329 if (!s)
2330 return NULL;
2331 for (i = 1; i < NCH(n); i += 2) {
2332 name = NEW_IDENTIFIER(CHILD(n, i));
2333 if (!name)
2334 return NULL;
2335 asdl_seq_SET(s, i / 2, name);
2337 return Global(s, LINENO(n), n->n_col_offset, c->c_arena);
2340 static stmt_ty
2341 ast_for_exec_stmt(struct compiling *c, const node *n)
2343 expr_ty expr1, globals = NULL, locals = NULL;
2344 int n_children = NCH(n);
2345 if (n_children != 2 && n_children != 4 && n_children != 6) {
2346 PyErr_Format(PyExc_SystemError,
2347 "poorly formed 'exec' statement: %d parts to statement",
2348 n_children);
2349 return NULL;
2352 /* exec_stmt: 'exec' expr ['in' test [',' test]] */
2353 REQ(n, exec_stmt);
2354 expr1 = ast_for_expr(c, CHILD(n, 1));
2355 if (!expr1)
2356 return NULL;
2357 if (n_children >= 4) {
2358 globals = ast_for_expr(c, CHILD(n, 3));
2359 if (!globals)
2360 return NULL;
2362 if (n_children == 6) {
2363 locals = ast_for_expr(c, CHILD(n, 5));
2364 if (!locals)
2365 return NULL;
2368 return Exec(expr1, globals, locals, LINENO(n), n->n_col_offset, c->c_arena);
2371 static stmt_ty
2372 ast_for_assert_stmt(struct compiling *c, const node *n)
2374 /* assert_stmt: 'assert' test [',' test] */
2375 REQ(n, assert_stmt);
2376 if (NCH(n) == 2) {
2377 expr_ty expression = ast_for_expr(c, CHILD(n, 1));
2378 if (!expression)
2379 return NULL;
2380 return Assert(expression, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2382 else if (NCH(n) == 4) {
2383 expr_ty expr1, expr2;
2385 expr1 = ast_for_expr(c, CHILD(n, 1));
2386 if (!expr1)
2387 return NULL;
2388 expr2 = ast_for_expr(c, CHILD(n, 3));
2389 if (!expr2)
2390 return NULL;
2392 return Assert(expr1, expr2, LINENO(n), n->n_col_offset, c->c_arena);
2394 PyErr_Format(PyExc_SystemError,
2395 "improper number of parts to 'assert' statement: %d",
2396 NCH(n));
2397 return NULL;
2400 static asdl_seq *
2401 ast_for_suite(struct compiling *c, const node *n)
2403 /* suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT */
2404 asdl_seq *seq;
2405 stmt_ty s;
2406 int i, total, num, end, pos = 0;
2407 node *ch;
2409 REQ(n, suite);
2411 total = num_stmts(n);
2412 seq = asdl_seq_new(total, c->c_arena);
2413 if (!seq)
2414 return NULL;
2415 if (TYPE(CHILD(n, 0)) == simple_stmt) {
2416 n = CHILD(n, 0);
2417 /* simple_stmt always ends with a NEWLINE,
2418 and may have a trailing SEMI
2420 end = NCH(n) - 1;
2421 if (TYPE(CHILD(n, end - 1)) == SEMI)
2422 end--;
2423 /* loop by 2 to skip semi-colons */
2424 for (i = 0; i < end; i += 2) {
2425 ch = CHILD(n, i);
2426 s = ast_for_stmt(c, ch);
2427 if (!s)
2428 return NULL;
2429 asdl_seq_SET(seq, pos++, s);
2432 else {
2433 for (i = 2; i < (NCH(n) - 1); i++) {
2434 ch = CHILD(n, i);
2435 REQ(ch, stmt);
2436 num = num_stmts(ch);
2437 if (num == 1) {
2438 /* small_stmt or compound_stmt with only one child */
2439 s = ast_for_stmt(c, ch);
2440 if (!s)
2441 return NULL;
2442 asdl_seq_SET(seq, pos++, s);
2444 else {
2445 int j;
2446 ch = CHILD(ch, 0);
2447 REQ(ch, simple_stmt);
2448 for (j = 0; j < NCH(ch); j += 2) {
2449 /* statement terminates with a semi-colon ';' */
2450 if (NCH(CHILD(ch, j)) == 0) {
2451 assert((j + 1) == NCH(ch));
2452 break;
2454 s = ast_for_stmt(c, CHILD(ch, j));
2455 if (!s)
2456 return NULL;
2457 asdl_seq_SET(seq, pos++, s);
2462 assert(pos == seq->size);
2463 return seq;
2466 static stmt_ty
2467 ast_for_if_stmt(struct compiling *c, const node *n)
2469 /* if_stmt: 'if' test ':' suite ('elif' test ':' suite)*
2470 ['else' ':' suite]
2472 char *s;
2474 REQ(n, if_stmt);
2476 if (NCH(n) == 4) {
2477 expr_ty expression;
2478 asdl_seq *suite_seq;
2480 expression = ast_for_expr(c, CHILD(n, 1));
2481 if (!expression)
2482 return NULL;
2483 suite_seq = ast_for_suite(c, CHILD(n, 3));
2484 if (!suite_seq)
2485 return NULL;
2487 return If(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2490 s = STR(CHILD(n, 4));
2491 /* s[2], the third character in the string, will be
2492 's' for el_s_e, or
2493 'i' for el_i_f
2495 if (s[2] == 's') {
2496 expr_ty expression;
2497 asdl_seq *seq1, *seq2;
2499 expression = ast_for_expr(c, CHILD(n, 1));
2500 if (!expression)
2501 return NULL;
2502 seq1 = ast_for_suite(c, CHILD(n, 3));
2503 if (!seq1)
2504 return NULL;
2505 seq2 = ast_for_suite(c, CHILD(n, 6));
2506 if (!seq2)
2507 return NULL;
2509 return If(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2511 else if (s[2] == 'i') {
2512 int i, n_elif, has_else = 0;
2513 asdl_seq *orelse = NULL;
2514 n_elif = NCH(n) - 4;
2515 /* must reference the child n_elif+1 since 'else' token is third,
2516 not fourth, child from the end. */
2517 if (TYPE(CHILD(n, (n_elif + 1))) == NAME
2518 && STR(CHILD(n, (n_elif + 1)))[2] == 's') {
2519 has_else = 1;
2520 n_elif -= 3;
2522 n_elif /= 4;
2524 if (has_else) {
2525 expr_ty expression;
2526 asdl_seq *seq1, *seq2;
2528 orelse = asdl_seq_new(1, c->c_arena);
2529 if (!orelse)
2530 return NULL;
2531 expression = ast_for_expr(c, CHILD(n, NCH(n) - 6));
2532 if (!expression)
2533 return NULL;
2534 seq1 = ast_for_suite(c, CHILD(n, NCH(n) - 4));
2535 if (!seq1)
2536 return NULL;
2537 seq2 = ast_for_suite(c, CHILD(n, NCH(n) - 1));
2538 if (!seq2)
2539 return NULL;
2541 asdl_seq_SET(orelse, 0, If(expression, seq1, seq2,
2542 LINENO(CHILD(n, NCH(n) - 6)), CHILD(n, NCH(n) - 6)->n_col_offset,
2543 c->c_arena));
2544 /* the just-created orelse handled the last elif */
2545 n_elif--;
2548 for (i = 0; i < n_elif; i++) {
2549 int off = 5 + (n_elif - i - 1) * 4;
2550 expr_ty expression;
2551 asdl_seq *suite_seq;
2552 asdl_seq *newobj = asdl_seq_new(1, c->c_arena);
2553 if (!newobj)
2554 return NULL;
2555 expression = ast_for_expr(c, CHILD(n, off));
2556 if (!expression)
2557 return NULL;
2558 suite_seq = ast_for_suite(c, CHILD(n, off + 2));
2559 if (!suite_seq)
2560 return NULL;
2562 asdl_seq_SET(newobj, 0,
2563 If(expression, suite_seq, orelse,
2564 LINENO(CHILD(n, off)), CHILD(n, off)->n_col_offset, c->c_arena));
2565 orelse = newobj;
2567 return If(ast_for_expr(c, CHILD(n, 1)),
2568 ast_for_suite(c, CHILD(n, 3)),
2569 orelse, LINENO(n), n->n_col_offset, c->c_arena);
2572 PyErr_Format(PyExc_SystemError,
2573 "unexpected token in 'if' statement: %s", s);
2574 return NULL;
2577 static stmt_ty
2578 ast_for_while_stmt(struct compiling *c, const node *n)
2580 /* while_stmt: 'while' test ':' suite ['else' ':' suite] */
2581 REQ(n, while_stmt);
2583 if (NCH(n) == 4) {
2584 expr_ty expression;
2585 asdl_seq *suite_seq;
2587 expression = ast_for_expr(c, CHILD(n, 1));
2588 if (!expression)
2589 return NULL;
2590 suite_seq = ast_for_suite(c, CHILD(n, 3));
2591 if (!suite_seq)
2592 return NULL;
2593 return While(expression, suite_seq, NULL, LINENO(n), n->n_col_offset, c->c_arena);
2595 else if (NCH(n) == 7) {
2596 expr_ty expression;
2597 asdl_seq *seq1, *seq2;
2599 expression = ast_for_expr(c, CHILD(n, 1));
2600 if (!expression)
2601 return NULL;
2602 seq1 = ast_for_suite(c, CHILD(n, 3));
2603 if (!seq1)
2604 return NULL;
2605 seq2 = ast_for_suite(c, CHILD(n, 6));
2606 if (!seq2)
2607 return NULL;
2609 return While(expression, seq1, seq2, LINENO(n), n->n_col_offset, c->c_arena);
2612 PyErr_Format(PyExc_SystemError,
2613 "wrong number of tokens for 'while' statement: %d",
2614 NCH(n));
2615 return NULL;
2618 static stmt_ty
2619 ast_for_for_stmt(struct compiling *c, const node *n)
2621 asdl_seq *_target, *seq = NULL, *suite_seq;
2622 expr_ty expression;
2623 expr_ty target;
2624 /* for_stmt: 'for' exprlist 'in' testlist ':' suite ['else' ':' suite] */
2625 REQ(n, for_stmt);
2627 if (NCH(n) == 9) {
2628 seq = ast_for_suite(c, CHILD(n, 8));
2629 if (!seq)
2630 return NULL;
2633 _target = ast_for_exprlist(c, CHILD(n, 1), Store);
2634 if (!_target)
2635 return NULL;
2636 if (asdl_seq_LEN(_target) == 1)
2637 target = (expr_ty)asdl_seq_GET(_target, 0);
2638 else
2639 target = Tuple(_target, Store, LINENO(n), n->n_col_offset, c->c_arena);
2641 expression = ast_for_testlist(c, CHILD(n, 3));
2642 if (!expression)
2643 return NULL;
2644 suite_seq = ast_for_suite(c, CHILD(n, 5));
2645 if (!suite_seq)
2646 return NULL;
2648 return For(target, expression, suite_seq, seq, LINENO(n), n->n_col_offset,
2649 c->c_arena);
2652 static excepthandler_ty
2653 ast_for_except_clause(struct compiling *c, const node *exc, node *body)
2655 /* except_clause: 'except' [test [',' test]] */
2656 REQ(exc, except_clause);
2657 REQ(body, suite);
2659 if (NCH(exc) == 1) {
2660 asdl_seq *suite_seq = ast_for_suite(c, body);
2661 if (!suite_seq)
2662 return NULL;
2664 return excepthandler(NULL, NULL, suite_seq, LINENO(exc),
2665 exc->n_col_offset, c->c_arena);
2667 else if (NCH(exc) == 2) {
2668 expr_ty expression;
2669 asdl_seq *suite_seq;
2671 expression = ast_for_expr(c, CHILD(exc, 1));
2672 if (!expression)
2673 return NULL;
2674 suite_seq = ast_for_suite(c, body);
2675 if (!suite_seq)
2676 return NULL;
2678 return excepthandler(expression, NULL, suite_seq, LINENO(exc),
2679 exc->n_col_offset, c->c_arena);
2681 else if (NCH(exc) == 4) {
2682 asdl_seq *suite_seq;
2683 expr_ty expression;
2684 expr_ty e = ast_for_expr(c, CHILD(exc, 3));
2685 if (!e)
2686 return NULL;
2687 if (!set_context(e, Store, CHILD(exc, 3)))
2688 return NULL;
2689 expression = ast_for_expr(c, CHILD(exc, 1));
2690 if (!expression)
2691 return NULL;
2692 suite_seq = ast_for_suite(c, body);
2693 if (!suite_seq)
2694 return NULL;
2696 return excepthandler(expression, e, suite_seq, LINENO(exc),
2697 exc->n_col_offset, c->c_arena);
2700 PyErr_Format(PyExc_SystemError,
2701 "wrong number of children for 'except' clause: %d",
2702 NCH(exc));
2703 return NULL;
2706 static stmt_ty
2707 ast_for_try_stmt(struct compiling *c, const node *n)
2709 const int nch = NCH(n);
2710 int n_except = (nch - 3)/3;
2711 asdl_seq *body, *orelse = NULL, *finally = NULL;
2713 REQ(n, try_stmt);
2715 body = ast_for_suite(c, CHILD(n, 2));
2716 if (body == NULL)
2717 return NULL;
2719 if (TYPE(CHILD(n, nch - 3)) == NAME) {
2720 if (strcmp(STR(CHILD(n, nch - 3)), "finally") == 0) {
2721 if (nch >= 9 && TYPE(CHILD(n, nch - 6)) == NAME) {
2722 /* we can assume it's an "else",
2723 because nch >= 9 for try-else-finally and
2724 it would otherwise have a type of except_clause */
2725 orelse = ast_for_suite(c, CHILD(n, nch - 4));
2726 if (orelse == NULL)
2727 return NULL;
2728 n_except--;
2731 finally = ast_for_suite(c, CHILD(n, nch - 1));
2732 if (finally == NULL)
2733 return NULL;
2734 n_except--;
2736 else {
2737 /* we can assume it's an "else",
2738 otherwise it would have a type of except_clause */
2739 orelse = ast_for_suite(c, CHILD(n, nch - 1));
2740 if (orelse == NULL)
2741 return NULL;
2742 n_except--;
2745 else if (TYPE(CHILD(n, nch - 3)) != except_clause) {
2746 ast_error(n, "malformed 'try' statement");
2747 return NULL;
2750 if (n_except > 0) {
2751 int i;
2752 stmt_ty except_st;
2753 /* process except statements to create a try ... except */
2754 asdl_seq *handlers = asdl_seq_new(n_except, c->c_arena);
2755 if (handlers == NULL)
2756 return NULL;
2758 for (i = 0; i < n_except; i++) {
2759 excepthandler_ty e = ast_for_except_clause(c, CHILD(n, 3 + i * 3),
2760 CHILD(n, 5 + i * 3));
2761 if (!e)
2762 return NULL;
2763 asdl_seq_SET(handlers, i, e);
2766 except_st = TryExcept(body, handlers, orelse, LINENO(n),
2767 n->n_col_offset, c->c_arena);
2768 if (!finally)
2769 return except_st;
2771 /* if a 'finally' is present too, we nest the TryExcept within a
2772 TryFinally to emulate try ... except ... finally */
2773 body = asdl_seq_new(1, c->c_arena);
2774 if (body == NULL)
2775 return NULL;
2776 asdl_seq_SET(body, 0, except_st);
2779 /* must be a try ... finally (except clauses are in body, if any exist) */
2780 assert(finally != NULL);
2781 return TryFinally(body, finally, LINENO(n), n->n_col_offset, c->c_arena);
2784 static expr_ty
2785 ast_for_with_var(struct compiling *c, const node *n)
2787 REQ(n, with_var);
2788 if (strcmp(STR(CHILD(n, 0)), "as") != 0) {
2789 ast_error(n, "expected \"with [expr] as [var]\"");
2790 return NULL;
2792 return ast_for_expr(c, CHILD(n, 1));
2795 /* with_stmt: 'with' test [ with_var ] ':' suite */
2796 static stmt_ty
2797 ast_for_with_stmt(struct compiling *c, const node *n)
2799 expr_ty context_expr, optional_vars = NULL;
2800 int suite_index = 3; /* skip 'with', test, and ':' */
2801 asdl_seq *suite_seq;
2803 assert(TYPE(n) == with_stmt);
2804 context_expr = ast_for_expr(c, CHILD(n, 1));
2805 if (TYPE(CHILD(n, 2)) == with_var) {
2806 optional_vars = ast_for_with_var(c, CHILD(n, 2));
2808 if (!optional_vars) {
2809 return NULL;
2811 if (!set_context(optional_vars, Store, n)) {
2812 return NULL;
2814 suite_index = 4;
2817 suite_seq = ast_for_suite(c, CHILD(n, suite_index));
2818 if (!suite_seq) {
2819 return NULL;
2821 return With(context_expr, optional_vars, suite_seq, LINENO(n),
2822 n->n_col_offset, c->c_arena);
2825 static stmt_ty
2826 ast_for_classdef(struct compiling *c, const node *n)
2828 /* classdef: 'class' NAME ['(' testlist ')'] ':' suite */
2829 asdl_seq *bases, *s;
2831 REQ(n, classdef);
2833 if (!strcmp(STR(CHILD(n, 1)), "None")) {
2834 ast_error(n, "assignment to None");
2835 return NULL;
2838 if (NCH(n) == 4) {
2839 s = ast_for_suite(c, CHILD(n, 3));
2840 if (!s)
2841 return NULL;
2842 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n),
2843 n->n_col_offset, c->c_arena);
2845 /* check for empty base list */
2846 if (TYPE(CHILD(n,3)) == RPAR) {
2847 s = ast_for_suite(c, CHILD(n,5));
2848 if (!s)
2849 return NULL;
2850 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), NULL, s, LINENO(n),
2851 n->n_col_offset, c->c_arena);
2854 /* else handle the base class list */
2855 bases = ast_for_class_bases(c, CHILD(n, 3));
2856 if (!bases)
2857 return NULL;
2859 s = ast_for_suite(c, CHILD(n, 6));
2860 if (!s)
2861 return NULL;
2862 return ClassDef(NEW_IDENTIFIER(CHILD(n, 1)), bases, s, LINENO(n),
2863 n->n_col_offset, c->c_arena);
2866 static stmt_ty
2867 ast_for_stmt(struct compiling *c, const node *n)
2869 if (TYPE(n) == stmt) {
2870 assert(NCH(n) == 1);
2871 n = CHILD(n, 0);
2873 if (TYPE(n) == simple_stmt) {
2874 assert(num_stmts(n) == 1);
2875 n = CHILD(n, 0);
2877 if (TYPE(n) == small_stmt) {
2878 REQ(n, small_stmt);
2879 n = CHILD(n, 0);
2880 /* small_stmt: expr_stmt | print_stmt | del_stmt | pass_stmt
2881 | flow_stmt | import_stmt | global_stmt | exec_stmt
2882 | assert_stmt
2884 switch (TYPE(n)) {
2885 case expr_stmt:
2886 return ast_for_expr_stmt(c, n);
2887 case print_stmt:
2888 return ast_for_print_stmt(c, n);
2889 case del_stmt:
2890 return ast_for_del_stmt(c, n);
2891 case pass_stmt:
2892 return Pass(LINENO(n), n->n_col_offset, c->c_arena);
2893 case flow_stmt:
2894 return ast_for_flow_stmt(c, n);
2895 case import_stmt:
2896 return ast_for_import_stmt(c, n);
2897 case global_stmt:
2898 return ast_for_global_stmt(c, n);
2899 case exec_stmt:
2900 return ast_for_exec_stmt(c, n);
2901 case assert_stmt:
2902 return ast_for_assert_stmt(c, n);
2903 default:
2904 PyErr_Format(PyExc_SystemError,
2905 "unhandled small_stmt: TYPE=%d NCH=%d\n",
2906 TYPE(n), NCH(n));
2907 return NULL;
2910 else {
2911 /* compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt
2912 | funcdef | classdef
2914 node *ch = CHILD(n, 0);
2915 REQ(n, compound_stmt);
2916 switch (TYPE(ch)) {
2917 case if_stmt:
2918 return ast_for_if_stmt(c, ch);
2919 case while_stmt:
2920 return ast_for_while_stmt(c, ch);
2921 case for_stmt:
2922 return ast_for_for_stmt(c, ch);
2923 case try_stmt:
2924 return ast_for_try_stmt(c, ch);
2925 case with_stmt:
2926 return ast_for_with_stmt(c, ch);
2927 case funcdef:
2928 return ast_for_funcdef(c, ch);
2929 case classdef:
2930 return ast_for_classdef(c, ch);
2931 default:
2932 PyErr_Format(PyExc_SystemError,
2933 "unhandled small_stmt: TYPE=%d NCH=%d\n",
2934 TYPE(n), NCH(n));
2935 return NULL;
2940 static PyObject *
2941 parsenumber(const char *s)
2943 const char *end;
2944 long x;
2945 double dx;
2946 #ifndef WITHOUT_COMPLEX
2947 Py_complex c;
2948 int imflag;
2949 #endif
2951 errno = 0;
2952 end = s + strlen(s) - 1;
2953 #ifndef WITHOUT_COMPLEX
2954 imflag = *end == 'j' || *end == 'J';
2955 #endif
2956 if (*end == 'l' || *end == 'L')
2957 return PyLong_FromString((char *)s, (char **)0, 0);
2958 if (s[0] == '0') {
2959 x = (long) PyOS_strtoul((char *)s, (char **)&end, 0);
2960 if (x < 0 && errno == 0) {
2961 return PyLong_FromString((char *)s,
2962 (char **)0,
2966 else
2967 x = PyOS_strtol((char *)s, (char **)&end, 0);
2968 if (*end == '\0') {
2969 if (errno != 0)
2970 return PyLong_FromString((char *)s, (char **)0, 0);
2971 return PyInt_FromLong(x);
2973 /* XXX Huge floats may silently fail */
2974 #ifndef WITHOUT_COMPLEX
2975 if (imflag) {
2976 c.real = 0.;
2977 PyFPE_START_PROTECT("atof", return 0)
2978 c.imag = PyOS_ascii_atof(s);
2979 PyFPE_END_PROTECT(c)
2980 return PyComplex_FromCComplex(c);
2982 else
2983 #endif
2985 PyFPE_START_PROTECT("atof", return 0)
2986 dx = PyOS_ascii_atof(s);
2987 PyFPE_END_PROTECT(dx)
2988 return PyFloat_FromDouble(dx);
2992 static PyObject *
2993 decode_utf8(const char **sPtr, const char *end, char* encoding)
2995 #ifndef Py_USING_UNICODE
2996 Py_FatalError("decode_utf8 should not be called in this build.");
2997 return NULL;
2998 #else
2999 PyObject *u, *v;
3000 char *s, *t;
3001 t = s = (char *)*sPtr;
3002 /* while (s < end && *s != '\\') s++; */ /* inefficient for u".." */
3003 while (s < end && (*s & 0x80)) s++;
3004 *sPtr = s;
3005 u = PyUnicode_DecodeUTF8(t, s - t, NULL);
3006 if (u == NULL)
3007 return NULL;
3008 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3009 Py_DECREF(u);
3010 return v;
3011 #endif
3014 static PyObject *
3015 decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
3017 PyObject *v, *u;
3018 char *buf;
3019 char *p;
3020 const char *end;
3021 if (encoding == NULL) {
3022 buf = (char *)s;
3023 u = NULL;
3024 } else if (strcmp(encoding, "iso-8859-1") == 0) {
3025 buf = (char *)s;
3026 u = NULL;
3027 } else {
3028 /* "\XX" may become "\u005c\uHHLL" (12 bytes) */
3029 u = PyString_FromStringAndSize((char *)NULL, len * 4);
3030 if (u == NULL)
3031 return NULL;
3032 p = buf = PyString_AsString(u);
3033 end = s + len;
3034 while (s < end) {
3035 if (*s == '\\') {
3036 *p++ = *s++;
3037 if (*s & 0x80) {
3038 strcpy(p, "u005c");
3039 p += 5;
3042 if (*s & 0x80) { /* XXX inefficient */
3043 PyObject *w;
3044 char *r;
3045 Py_ssize_t rn, i;
3046 w = decode_utf8(&s, end, "utf-16-be");
3047 if (w == NULL) {
3048 Py_DECREF(u);
3049 return NULL;
3051 r = PyString_AsString(w);
3052 rn = PyString_Size(w);
3053 assert(rn % 2 == 0);
3054 for (i = 0; i < rn; i += 2) {
3055 sprintf(p, "\\u%02x%02x",
3056 r[i + 0] & 0xFF,
3057 r[i + 1] & 0xFF);
3058 p += 6;
3060 Py_DECREF(w);
3061 } else {
3062 *p++ = *s++;
3065 len = p - buf;
3066 s = buf;
3068 if (rawmode)
3069 v = PyUnicode_DecodeRawUnicodeEscape(s, len, NULL);
3070 else
3071 v = PyUnicode_DecodeUnicodeEscape(s, len, NULL);
3072 Py_XDECREF(u);
3073 return v;
3076 /* s is a Python string literal, including the bracketing quote characters,
3077 * and r &/or u prefixes (if any), and embedded escape sequences (if any).
3078 * parsestr parses it, and returns the decoded Python string object.
3080 static PyObject *
3081 parsestr(const char *s, const char *encoding)
3083 size_t len;
3084 int quote = Py_CHARMASK(*s);
3085 int rawmode = 0;
3086 int need_encoding;
3087 int unicode = 0;
3089 if (isalpha(quote) || quote == '_') {
3090 if (quote == 'u' || quote == 'U') {
3091 quote = *++s;
3092 unicode = 1;
3094 if (quote == 'r' || quote == 'R') {
3095 quote = *++s;
3096 rawmode = 1;
3099 if (quote != '\'' && quote != '\"') {
3100 PyErr_BadInternalCall();
3101 return NULL;
3103 s++;
3104 len = strlen(s);
3105 if (len > INT_MAX) {
3106 PyErr_SetString(PyExc_OverflowError,
3107 "string to parse is too long");
3108 return NULL;
3110 if (s[--len] != quote) {
3111 PyErr_BadInternalCall();
3112 return NULL;
3114 if (len >= 4 && s[0] == quote && s[1] == quote) {
3115 s += 2;
3116 len -= 2;
3117 if (s[--len] != quote || s[--len] != quote) {
3118 PyErr_BadInternalCall();
3119 return NULL;
3122 #ifdef Py_USING_UNICODE
3123 if (unicode || Py_UnicodeFlag) {
3124 return decode_unicode(s, len, rawmode, encoding);
3126 #endif
3127 need_encoding = (encoding != NULL &&
3128 strcmp(encoding, "utf-8") != 0 &&
3129 strcmp(encoding, "iso-8859-1") != 0);
3130 if (rawmode || strchr(s, '\\') == NULL) {
3131 if (need_encoding) {
3132 #ifndef Py_USING_UNICODE
3133 /* This should not happen - we never see any other
3134 encoding. */
3135 Py_FatalError(
3136 "cannot deal with encodings in this build.");
3137 #else
3138 PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
3139 if (u == NULL)
3140 return NULL;
3141 v = PyUnicode_AsEncodedString(u, encoding, NULL);
3142 Py_DECREF(u);
3143 return v;
3144 #endif
3145 } else {
3146 return PyString_FromStringAndSize(s, len);
3150 return PyString_DecodeEscape(s, len, NULL, unicode,
3151 need_encoding ? encoding : NULL);
3154 /* Build a Python string object out of a STRING atom. This takes care of
3155 * compile-time literal catenation, calling parsestr() on each piece, and
3156 * pasting the intermediate results together.
3158 static PyObject *
3159 parsestrplus(struct compiling *c, const node *n)
3161 PyObject *v;
3162 int i;
3163 REQ(CHILD(n, 0), STRING);
3164 if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
3165 /* String literal concatenation */
3166 for (i = 1; i < NCH(n); i++) {
3167 PyObject *s;
3168 s = parsestr(STR(CHILD(n, i)), c->c_encoding);
3169 if (s == NULL)
3170 goto onError;
3171 if (PyString_Check(v) && PyString_Check(s)) {
3172 PyString_ConcatAndDel(&v, s);
3173 if (v == NULL)
3174 goto onError;
3176 #ifdef Py_USING_UNICODE
3177 else {
3178 PyObject *temp = PyUnicode_Concat(v, s);
3179 Py_DECREF(s);
3180 Py_DECREF(v);
3181 v = temp;
3182 if (v == NULL)
3183 goto onError;
3185 #endif
3188 return v;
3190 onError:
3191 Py_XDECREF(v);
3192 return NULL;