Issue #7701: Fix crash in binascii.b2a_uu() in debug mode when given a
[python.git] / Parser / tokenizer.c
blob04749c865798e9daee43a0789461b0e7d7a5c590
2 /* Tokenizer implementation */
4 #include "Python.h"
5 #include "pgenheaders.h"
7 #include <ctype.h>
8 #include <assert.h>
10 #include "tokenizer.h"
11 #include "errcode.h"
13 #ifndef PGEN
14 #include "unicodeobject.h"
15 #include "stringobject.h"
16 #include "fileobject.h"
17 #include "codecs.h"
18 #include "abstract.h"
19 #include "pydebug.h"
20 #endif /* PGEN */
22 extern char *PyOS_Readline(FILE *, FILE *, char *);
23 /* Return malloc'ed string including trailing \n;
24 empty malloc'ed string for EOF;
25 NULL if interrupted */
27 /* Don't ever change this -- it would break the portability of Python code */
28 #define TABSIZE 8
30 /* Forward */
31 static struct tok_state *tok_new(void);
32 static int tok_nextc(struct tok_state *tok);
33 static void tok_backup(struct tok_state *tok, int c);
35 /* Token names */
37 char *_PyParser_TokenNames[] = {
38 "ENDMARKER",
39 "NAME",
40 "NUMBER",
41 "STRING",
42 "NEWLINE",
43 "INDENT",
44 "DEDENT",
45 "LPAR",
46 "RPAR",
47 "LSQB",
48 "RSQB",
49 "COLON",
50 "COMMA",
51 "SEMI",
52 "PLUS",
53 "MINUS",
54 "STAR",
55 "SLASH",
56 "VBAR",
57 "AMPER",
58 "LESS",
59 "GREATER",
60 "EQUAL",
61 "DOT",
62 "PERCENT",
63 "BACKQUOTE",
64 "LBRACE",
65 "RBRACE",
66 "EQEQUAL",
67 "NOTEQUAL",
68 "LESSEQUAL",
69 "GREATEREQUAL",
70 "TILDE",
71 "CIRCUMFLEX",
72 "LEFTSHIFT",
73 "RIGHTSHIFT",
74 "DOUBLESTAR",
75 "PLUSEQUAL",
76 "MINEQUAL",
77 "STAREQUAL",
78 "SLASHEQUAL",
79 "PERCENTEQUAL",
80 "AMPEREQUAL",
81 "VBAREQUAL",
82 "CIRCUMFLEXEQUAL",
83 "LEFTSHIFTEQUAL",
84 "RIGHTSHIFTEQUAL",
85 "DOUBLESTAREQUAL",
86 "DOUBLESLASH",
87 "DOUBLESLASHEQUAL",
88 "AT",
89 /* This table must match the #defines in token.h! */
90 "OP",
91 "<ERRORTOKEN>",
92 "<N_TOKENS>"
96 /* Create and initialize a new tok_state structure */
98 static struct tok_state *
99 tok_new(void)
101 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
102 sizeof(struct tok_state));
103 if (tok == NULL)
104 return NULL;
105 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
106 tok->done = E_OK;
107 tok->fp = NULL;
108 tok->input = NULL;
109 tok->tabsize = TABSIZE;
110 tok->indent = 0;
111 tok->indstack[0] = 0;
112 tok->atbol = 1;
113 tok->pendin = 0;
114 tok->prompt = tok->nextprompt = NULL;
115 tok->lineno = 0;
116 tok->level = 0;
117 tok->filename = NULL;
118 tok->altwarning = 0;
119 tok->alterror = 0;
120 tok->alttabsize = 1;
121 tok->altindstack[0] = 0;
122 tok->decoding_state = 0;
123 tok->decoding_erred = 0;
124 tok->read_coding_spec = 0;
125 tok->encoding = NULL;
126 tok->cont_line = 0;
127 #ifndef PGEN
128 tok->decoding_readline = NULL;
129 tok->decoding_buffer = NULL;
130 #endif
131 return tok;
134 static char *
135 new_string(const char *s, Py_ssize_t len)
137 char* result = (char *)PyMem_MALLOC(len + 1);
138 if (result != NULL) {
139 memcpy(result, s, len);
140 result[len] = '\0';
142 return result;
145 #ifdef PGEN
147 static char *
148 decoding_fgets(char *s, int size, struct tok_state *tok)
150 return fgets(s, size, tok->fp);
153 static int
154 decoding_feof(struct tok_state *tok)
156 return feof(tok->fp);
159 static char *
160 decode_str(const char *str, int exec_input, struct tok_state *tok)
162 return new_string(str, strlen(str));
165 #else /* PGEN */
167 static char *
168 error_ret(struct tok_state *tok) /* XXX */
170 tok->decoding_erred = 1;
171 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
172 PyMem_FREE(tok->buf);
173 tok->buf = NULL;
174 return NULL; /* as if it were EOF */
178 static char *
179 get_normal_name(char *s) /* for utf-8 and latin-1 */
181 char buf[13];
182 int i;
183 for (i = 0; i < 12; i++) {
184 int c = s[i];
185 if (c == '\0')
186 break;
187 else if (c == '_')
188 buf[i] = '-';
189 else
190 buf[i] = tolower(c);
192 buf[i] = '\0';
193 if (strcmp(buf, "utf-8") == 0 ||
194 strncmp(buf, "utf-8-", 6) == 0)
195 return "utf-8";
196 else if (strcmp(buf, "latin-1") == 0 ||
197 strcmp(buf, "iso-8859-1") == 0 ||
198 strcmp(buf, "iso-latin-1") == 0 ||
199 strncmp(buf, "latin-1-", 8) == 0 ||
200 strncmp(buf, "iso-8859-1-", 11) == 0 ||
201 strncmp(buf, "iso-latin-1-", 12) == 0)
202 return "iso-8859-1";
203 else
204 return s;
207 /* Return the coding spec in S, or NULL if none is found. */
209 static char *
210 get_coding_spec(const char *s, Py_ssize_t size)
212 Py_ssize_t i;
213 /* Coding spec must be in a comment, and that comment must be
214 * the only statement on the source code line. */
215 for (i = 0; i < size - 6; i++) {
216 if (s[i] == '#')
217 break;
218 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
219 return NULL;
221 for (; i < size - 6; i++) { /* XXX inefficient search */
222 const char* t = s + i;
223 if (strncmp(t, "coding", 6) == 0) {
224 const char* begin = NULL;
225 t += 6;
226 if (t[0] != ':' && t[0] != '=')
227 continue;
228 do {
229 t++;
230 } while (t[0] == '\x20' || t[0] == '\t');
232 begin = t;
233 while (isalnum(Py_CHARMASK(t[0])) ||
234 t[0] == '-' || t[0] == '_' || t[0] == '.')
235 t++;
237 if (begin < t) {
238 char* r = new_string(begin, t - begin);
239 char* q = get_normal_name(r);
240 if (r != q) {
241 PyMem_FREE(r);
242 r = new_string(q, strlen(q));
244 return r;
248 return NULL;
251 /* Check whether the line contains a coding spec. If it does,
252 invoke the set_readline function for the new encoding.
253 This function receives the tok_state and the new encoding.
254 Return 1 on success, 0 on failure. */
256 static int
257 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
258 int set_readline(struct tok_state *, const char *))
260 char * cs;
261 int r = 1;
263 if (tok->cont_line)
264 /* It's a continuation line, so it can't be a coding spec. */
265 return 1;
266 cs = get_coding_spec(line, size);
267 if (cs != NULL) {
268 tok->read_coding_spec = 1;
269 if (tok->encoding == NULL) {
270 assert(tok->decoding_state == 1); /* raw */
271 if (strcmp(cs, "utf-8") == 0 ||
272 strcmp(cs, "iso-8859-1") == 0) {
273 tok->encoding = cs;
274 } else {
275 #ifdef Py_USING_UNICODE
276 r = set_readline(tok, cs);
277 if (r) {
278 tok->encoding = cs;
279 tok->decoding_state = -1;
281 else
282 PyMem_FREE(cs);
283 #else
284 /* Without Unicode support, we cannot
285 process the coding spec. Since there
286 won't be any Unicode literals, that
287 won't matter. */
288 PyMem_FREE(cs);
289 #endif
291 } else { /* then, compare cs with BOM */
292 r = (strcmp(tok->encoding, cs) == 0);
293 PyMem_FREE(cs);
296 if (!r) {
297 cs = tok->encoding;
298 if (!cs)
299 cs = "with BOM";
300 PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
302 return r;
305 /* See whether the file starts with a BOM. If it does,
306 invoke the set_readline function with the new encoding.
307 Return 1 on success, 0 on failure. */
309 static int
310 check_bom(int get_char(struct tok_state *),
311 void unget_char(int, struct tok_state *),
312 int set_readline(struct tok_state *, const char *),
313 struct tok_state *tok)
315 int ch = get_char(tok);
316 tok->decoding_state = 1;
317 if (ch == EOF) {
318 return 1;
319 } else if (ch == 0xEF) {
320 ch = get_char(tok);
321 if (ch != 0xBB)
322 goto NON_BOM;
323 ch = get_char(tok);
324 if (ch != 0xBF)
325 goto NON_BOM;
326 #if 0
327 /* Disable support for UTF-16 BOMs until a decision
328 is made whether this needs to be supported. */
329 } else if (ch == 0xFE) {
330 ch = get_char(tok);
331 if (ch != 0xFF)
332 goto NON_BOM;
333 if (!set_readline(tok, "utf-16-be"))
334 return 0;
335 tok->decoding_state = -1;
336 } else if (ch == 0xFF) {
337 ch = get_char(tok);
338 if (ch != 0xFE)
339 goto NON_BOM;
340 if (!set_readline(tok, "utf-16-le"))
341 return 0;
342 tok->decoding_state = -1;
343 #endif
344 } else {
345 unget_char(ch, tok);
346 return 1;
348 if (tok->encoding != NULL)
349 PyMem_FREE(tok->encoding);
350 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
351 return 1;
352 NON_BOM:
353 /* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
354 unget_char(0xFF, tok); /* XXX this will cause a syntax error */
355 return 1;
358 /* Read a line of text from TOK into S, using the stream in TOK.
359 Return NULL on failure, else S.
361 On entry, tok->decoding_buffer will be one of:
362 1) NULL: need to call tok->decoding_readline to get a new line
363 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
364 stored the result in tok->decoding_buffer
365 3) PyStringObject *: previous call to fp_readl did not have enough room
366 (in the s buffer) to copy entire contents of the line read
367 by tok->decoding_readline. tok->decoding_buffer has the overflow.
368 In this case, fp_readl is called in a loop (with an expanded buffer)
369 until the buffer ends with a '\n' (or until the end of the file is
370 reached): see tok_nextc and its calls to decoding_fgets.
373 static char *
374 fp_readl(char *s, int size, struct tok_state *tok)
376 #ifndef Py_USING_UNICODE
377 /* In a non-Unicode built, this should never be called. */
378 Py_FatalError("fp_readl should not be called in this build.");
379 return NULL; /* Keep compiler happy (not reachable) */
380 #else
381 PyObject* utf8 = NULL;
382 PyObject* buf = tok->decoding_buffer;
383 char *str;
384 Py_ssize_t utf8len;
386 /* Ask for one less byte so we can terminate it */
387 assert(size > 0);
388 size--;
390 if (buf == NULL) {
391 buf = PyObject_CallObject(tok->decoding_readline, NULL);
392 if (buf == NULL)
393 return error_ret(tok);
394 } else {
395 tok->decoding_buffer = NULL;
396 if (PyString_CheckExact(buf))
397 utf8 = buf;
399 if (utf8 == NULL) {
400 utf8 = PyUnicode_AsUTF8String(buf);
401 Py_DECREF(buf);
402 if (utf8 == NULL)
403 return error_ret(tok);
405 str = PyString_AsString(utf8);
406 utf8len = PyString_GET_SIZE(utf8);
407 if (utf8len > size) {
408 tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
409 if (tok->decoding_buffer == NULL) {
410 Py_DECREF(utf8);
411 return error_ret(tok);
413 utf8len = size;
415 memcpy(s, str, utf8len);
416 s[utf8len] = '\0';
417 Py_DECREF(utf8);
418 if (utf8len == 0)
419 return NULL; /* EOF */
420 return s;
421 #endif
424 /* Set the readline function for TOK to a StreamReader's
425 readline function. The StreamReader is named ENC.
427 This function is called from check_bom and check_coding_spec.
429 ENC is usually identical to the future value of tok->encoding,
430 except for the (currently unsupported) case of UTF-16.
432 Return 1 on success, 0 on failure. */
434 static int
435 fp_setreadl(struct tok_state *tok, const char* enc)
437 PyObject *reader, *stream, *readline;
439 /* XXX: constify filename argument. */
440 stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
441 if (stream == NULL)
442 return 0;
444 reader = PyCodec_StreamReader(enc, stream, NULL);
445 Py_DECREF(stream);
446 if (reader == NULL)
447 return 0;
449 readline = PyObject_GetAttrString(reader, "readline");
450 Py_DECREF(reader);
451 if (readline == NULL)
452 return 0;
454 tok->decoding_readline = readline;
455 return 1;
458 /* Fetch the next byte from TOK. */
460 static int fp_getc(struct tok_state *tok) {
461 return getc(tok->fp);
464 /* Unfetch the last byte back into TOK. */
466 static void fp_ungetc(int c, struct tok_state *tok) {
467 ungetc(c, tok->fp);
470 /* Read a line of input from TOK. Determine encoding
471 if necessary. */
473 static char *
474 decoding_fgets(char *s, int size, struct tok_state *tok)
476 char *line = NULL;
477 int badchar = 0;
478 for (;;) {
479 if (tok->decoding_state < 0) {
480 /* We already have a codec associated with
481 this input. */
482 line = fp_readl(s, size, tok);
483 break;
484 } else if (tok->decoding_state > 0) {
485 /* We want a 'raw' read. */
486 line = Py_UniversalNewlineFgets(s, size,
487 tok->fp, NULL);
488 break;
489 } else {
490 /* We have not yet determined the encoding.
491 If an encoding is found, use the file-pointer
492 reader functions from now on. */
493 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
494 return error_ret(tok);
495 assert(tok->decoding_state != 0);
498 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
499 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
500 return error_ret(tok);
503 #ifndef PGEN
504 /* The default encoding is ASCII, so make sure we don't have any
505 non-ASCII bytes in it. */
506 if (line && !tok->encoding) {
507 unsigned char *c;
508 for (c = (unsigned char *)line; *c; c++)
509 if (*c > 127) {
510 badchar = *c;
511 break;
514 if (badchar) {
515 char buf[500];
516 /* Need to add 1 to the line number, since this line
517 has not been counted, yet. */
518 sprintf(buf,
519 "Non-ASCII character '\\x%.2x' "
520 "in file %.200s on line %i, "
521 "but no encoding declared; "
522 "see http://www.python.org/peps/pep-0263.html for details",
523 badchar, tok->filename, tok->lineno + 1);
524 PyErr_SetString(PyExc_SyntaxError, buf);
525 return error_ret(tok);
527 #endif
528 return line;
531 static int
532 decoding_feof(struct tok_state *tok)
534 if (tok->decoding_state >= 0) {
535 return feof(tok->fp);
536 } else {
537 PyObject* buf = tok->decoding_buffer;
538 if (buf == NULL) {
539 buf = PyObject_CallObject(tok->decoding_readline, NULL);
540 if (buf == NULL) {
541 error_ret(tok);
542 return 1;
543 } else {
544 tok->decoding_buffer = buf;
547 return PyObject_Length(buf) == 0;
551 /* Fetch a byte from TOK, using the string buffer. */
553 static int
554 buf_getc(struct tok_state *tok) {
555 return Py_CHARMASK(*tok->str++);
558 /* Unfetch a byte from TOK, using the string buffer. */
560 static void
561 buf_ungetc(int c, struct tok_state *tok) {
562 tok->str--;
563 assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
566 /* Set the readline function for TOK to ENC. For the string-based
567 tokenizer, this means to just record the encoding. */
569 static int
570 buf_setreadl(struct tok_state *tok, const char* enc) {
571 tok->enc = enc;
572 return 1;
575 /* Return a UTF-8 encoding Python string object from the
576 C byte string STR, which is encoded with ENC. */
578 #ifdef Py_USING_UNICODE
579 static PyObject *
580 translate_into_utf8(const char* str, const char* enc) {
581 PyObject *utf8;
582 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
583 if (buf == NULL)
584 return NULL;
585 utf8 = PyUnicode_AsUTF8String(buf);
586 Py_DECREF(buf);
587 return utf8;
589 #endif
592 static char *
593 translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
594 int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length;
595 char *buf, *current;
596 char c = '\0';
597 buf = PyMem_MALLOC(needed_length);
598 if (buf == NULL) {
599 tok->done = E_NOMEM;
600 return NULL;
602 for (current = buf; *s; s++, current++) {
603 c = *s;
604 if (skip_next_lf) {
605 skip_next_lf = 0;
606 if (c == '\n') {
607 c = *++s;
608 if (!c)
609 break;
612 if (c == '\r') {
613 skip_next_lf = 1;
614 c = '\n';
616 *current = c;
618 /* If this is exec input, add a newline to the end of the string if
619 there isn't one already. */
620 if (exec_input && c != '\n') {
621 *current = '\n';
622 current++;
624 *current = '\0';
625 final_length = current - buf + 1;
626 if (final_length < needed_length && final_length)
627 /* should never fail */
628 buf = PyMem_REALLOC(buf, final_length);
629 return buf;
632 /* Decode a byte string STR for use as the buffer of TOK.
633 Look for encoding declarations inside STR, and record them
634 inside TOK. */
636 static const char *
637 decode_str(const char *input, int single, struct tok_state *tok)
639 PyObject* utf8 = NULL;
640 const char *str;
641 const char *s;
642 const char *newl[2] = {NULL, NULL};
643 int lineno = 0;
644 tok->input = str = translate_newlines(input, single, tok);
645 if (str == NULL)
646 return NULL;
647 tok->enc = NULL;
648 tok->str = str;
649 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
650 return error_ret(tok);
651 str = tok->str; /* string after BOM if any */
652 assert(str);
653 #ifdef Py_USING_UNICODE
654 if (tok->enc != NULL) {
655 utf8 = translate_into_utf8(str, tok->enc);
656 if (utf8 == NULL)
657 return error_ret(tok);
658 str = PyString_AsString(utf8);
660 #endif
661 for (s = str;; s++) {
662 if (*s == '\0') break;
663 else if (*s == '\n') {
664 assert(lineno < 2);
665 newl[lineno] = s;
666 lineno++;
667 if (lineno == 2) break;
670 tok->enc = NULL;
671 /* need to check line 1 and 2 separately since check_coding_spec
672 assumes a single line as input */
673 if (newl[0]) {
674 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
675 return error_ret(tok);
676 if (tok->enc == NULL && newl[1]) {
677 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
678 tok, buf_setreadl))
679 return error_ret(tok);
682 #ifdef Py_USING_UNICODE
683 if (tok->enc != NULL) {
684 assert(utf8 == NULL);
685 utf8 = translate_into_utf8(str, tok->enc);
686 if (utf8 == NULL)
687 return error_ret(tok);
688 str = PyString_AsString(utf8);
690 #endif
691 assert(tok->decoding_buffer == NULL);
692 tok->decoding_buffer = utf8; /* CAUTION */
693 return str;
696 #endif /* PGEN */
698 /* Set up tokenizer for string */
700 struct tok_state *
701 PyTokenizer_FromString(const char *str, int exec_input)
703 struct tok_state *tok = tok_new();
704 if (tok == NULL)
705 return NULL;
706 str = (char *)decode_str(str, exec_input, tok);
707 if (str == NULL) {
708 PyTokenizer_Free(tok);
709 return NULL;
712 /* XXX: constify members. */
713 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
714 return tok;
718 /* Set up tokenizer for file */
720 struct tok_state *
721 PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
723 struct tok_state *tok = tok_new();
724 if (tok == NULL)
725 return NULL;
726 if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
727 PyTokenizer_Free(tok);
728 return NULL;
730 tok->cur = tok->inp = tok->buf;
731 tok->end = tok->buf + BUFSIZ;
732 tok->fp = fp;
733 tok->prompt = ps1;
734 tok->nextprompt = ps2;
735 return tok;
739 /* Free a tok_state structure */
741 void
742 PyTokenizer_Free(struct tok_state *tok)
744 if (tok->encoding != NULL)
745 PyMem_FREE(tok->encoding);
746 #ifndef PGEN
747 Py_XDECREF(tok->decoding_readline);
748 Py_XDECREF(tok->decoding_buffer);
749 #endif
750 if (tok->fp != NULL && tok->buf != NULL)
751 PyMem_FREE(tok->buf);
752 if (tok->input)
753 PyMem_FREE((char *)tok->input);
754 PyMem_FREE(tok);
757 #if !defined(PGEN) && defined(Py_USING_UNICODE)
758 static int
759 tok_stdin_decode(struct tok_state *tok, char **inp)
761 PyObject *enc, *sysstdin, *decoded, *utf8;
762 const char *encoding;
763 char *converted;
765 if (PySys_GetFile((char *)"stdin", NULL) != stdin)
766 return 0;
767 sysstdin = PySys_GetObject("stdin");
768 if (sysstdin == NULL || !PyFile_Check(sysstdin))
769 return 0;
771 enc = ((PyFileObject *)sysstdin)->f_encoding;
772 if (enc == NULL || !PyString_Check(enc))
773 return 0;
774 Py_INCREF(enc);
776 encoding = PyString_AsString(enc);
777 decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
778 if (decoded == NULL)
779 goto error_clear;
781 utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
782 Py_DECREF(decoded);
783 if (utf8 == NULL)
784 goto error_clear;
786 assert(PyString_Check(utf8));
787 converted = new_string(PyString_AS_STRING(utf8),
788 PyString_GET_SIZE(utf8));
789 Py_DECREF(utf8);
790 if (converted == NULL)
791 goto error_nomem;
793 PyMem_FREE(*inp);
794 *inp = converted;
795 if (tok->encoding != NULL)
796 PyMem_FREE(tok->encoding);
797 tok->encoding = new_string(encoding, strlen(encoding));
798 if (tok->encoding == NULL)
799 goto error_nomem;
801 Py_DECREF(enc);
802 return 0;
804 error_nomem:
805 Py_DECREF(enc);
806 tok->done = E_NOMEM;
807 return -1;
809 error_clear:
810 /* Fallback to iso-8859-1: for backward compatibility */
811 Py_DECREF(enc);
812 PyErr_Clear();
813 return 0;
815 #endif
817 /* Get next char, updating state; error code goes into tok->done */
819 static int
820 tok_nextc(register struct tok_state *tok)
822 for (;;) {
823 if (tok->cur != tok->inp) {
824 return Py_CHARMASK(*tok->cur++); /* Fast path */
826 if (tok->done != E_OK)
827 return EOF;
828 if (tok->fp == NULL) {
829 char *end = strchr(tok->inp, '\n');
830 if (end != NULL)
831 end++;
832 else {
833 end = strchr(tok->inp, '\0');
834 if (end == tok->inp) {
835 tok->done = E_EOF;
836 return EOF;
839 if (tok->start == NULL)
840 tok->buf = tok->cur;
841 tok->line_start = tok->cur;
842 tok->lineno++;
843 tok->inp = end;
844 return Py_CHARMASK(*tok->cur++);
846 if (tok->prompt != NULL) {
847 char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
848 if (tok->nextprompt != NULL)
849 tok->prompt = tok->nextprompt;
850 if (newtok == NULL)
851 tok->done = E_INTR;
852 else if (*newtok == '\0') {
853 PyMem_FREE(newtok);
854 tok->done = E_EOF;
856 #if !defined(PGEN) && defined(Py_USING_UNICODE)
857 else if (tok_stdin_decode(tok, &newtok) != 0)
858 PyMem_FREE(newtok);
859 #endif
860 else if (tok->start != NULL) {
861 size_t start = tok->start - tok->buf;
862 size_t oldlen = tok->cur - tok->buf;
863 size_t newlen = oldlen + strlen(newtok);
864 char *buf = tok->buf;
865 buf = (char *)PyMem_REALLOC(buf, newlen+1);
866 tok->lineno++;
867 if (buf == NULL) {
868 PyMem_FREE(tok->buf);
869 tok->buf = NULL;
870 PyMem_FREE(newtok);
871 tok->done = E_NOMEM;
872 return EOF;
874 tok->buf = buf;
875 tok->cur = tok->buf + oldlen;
876 tok->line_start = tok->cur;
877 strcpy(tok->buf + oldlen, newtok);
878 PyMem_FREE(newtok);
879 tok->inp = tok->buf + newlen;
880 tok->end = tok->inp + 1;
881 tok->start = tok->buf + start;
883 else {
884 tok->lineno++;
885 if (tok->buf != NULL)
886 PyMem_FREE(tok->buf);
887 tok->buf = newtok;
888 tok->line_start = tok->buf;
889 tok->cur = tok->buf;
890 tok->line_start = tok->buf;
891 tok->inp = strchr(tok->buf, '\0');
892 tok->end = tok->inp + 1;
895 else {
896 int done = 0;
897 Py_ssize_t cur = 0;
898 char *pt;
899 if (tok->start == NULL) {
900 if (tok->buf == NULL) {
901 tok->buf = (char *)
902 PyMem_MALLOC(BUFSIZ);
903 if (tok->buf == NULL) {
904 tok->done = E_NOMEM;
905 return EOF;
907 tok->end = tok->buf + BUFSIZ;
909 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
910 tok) == NULL) {
911 tok->done = E_EOF;
912 done = 1;
914 else {
915 tok->done = E_OK;
916 tok->inp = strchr(tok->buf, '\0');
917 done = tok->inp[-1] == '\n';
920 else {
921 cur = tok->cur - tok->buf;
922 if (decoding_feof(tok)) {
923 tok->done = E_EOF;
924 done = 1;
926 else
927 tok->done = E_OK;
929 tok->lineno++;
930 /* Read until '\n' or EOF */
931 while (!done) {
932 Py_ssize_t curstart = tok->start == NULL ? -1 :
933 tok->start - tok->buf;
934 Py_ssize_t curvalid = tok->inp - tok->buf;
935 Py_ssize_t newsize = curvalid + BUFSIZ;
936 char *newbuf = tok->buf;
937 newbuf = (char *)PyMem_REALLOC(newbuf,
938 newsize);
939 if (newbuf == NULL) {
940 tok->done = E_NOMEM;
941 tok->cur = tok->inp;
942 return EOF;
944 tok->buf = newbuf;
945 tok->inp = tok->buf + curvalid;
946 tok->end = tok->buf + newsize;
947 tok->start = curstart < 0 ? NULL :
948 tok->buf + curstart;
949 if (decoding_fgets(tok->inp,
950 (int)(tok->end - tok->inp),
951 tok) == NULL) {
952 /* Break out early on decoding
953 errors, as tok->buf will be NULL
955 if (tok->decoding_erred)
956 return EOF;
957 /* Last line does not end in \n,
958 fake one */
959 strcpy(tok->inp, "\n");
961 tok->inp = strchr(tok->inp, '\0');
962 done = tok->inp[-1] == '\n';
964 if (tok->buf != NULL) {
965 tok->cur = tok->buf + cur;
966 tok->line_start = tok->cur;
967 /* replace "\r\n" with "\n" */
968 /* For Mac leave the \r, giving a syntax error */
969 pt = tok->inp - 2;
970 if (pt >= tok->buf && *pt == '\r') {
971 *pt++ = '\n';
972 *pt = '\0';
973 tok->inp = pt;
977 if (tok->done != E_OK) {
978 if (tok->prompt != NULL)
979 PySys_WriteStderr("\n");
980 tok->cur = tok->inp;
981 return EOF;
984 /*NOTREACHED*/
988 /* Back-up one character */
990 static void
991 tok_backup(register struct tok_state *tok, register int c)
993 if (c != EOF) {
994 if (--tok->cur < tok->buf)
995 Py_FatalError("tok_backup: beginning of buffer");
996 if (*tok->cur != c)
997 *tok->cur = c;
1002 /* Return the token corresponding to a single character */
1005 PyToken_OneChar(int c)
1007 switch (c) {
1008 case '(': return LPAR;
1009 case ')': return RPAR;
1010 case '[': return LSQB;
1011 case ']': return RSQB;
1012 case ':': return COLON;
1013 case ',': return COMMA;
1014 case ';': return SEMI;
1015 case '+': return PLUS;
1016 case '-': return MINUS;
1017 case '*': return STAR;
1018 case '/': return SLASH;
1019 case '|': return VBAR;
1020 case '&': return AMPER;
1021 case '<': return LESS;
1022 case '>': return GREATER;
1023 case '=': return EQUAL;
1024 case '.': return DOT;
1025 case '%': return PERCENT;
1026 case '`': return BACKQUOTE;
1027 case '{': return LBRACE;
1028 case '}': return RBRACE;
1029 case '^': return CIRCUMFLEX;
1030 case '~': return TILDE;
1031 case '@': return AT;
1032 default: return OP;
1038 PyToken_TwoChars(int c1, int c2)
1040 switch (c1) {
1041 case '=':
1042 switch (c2) {
1043 case '=': return EQEQUAL;
1045 break;
1046 case '!':
1047 switch (c2) {
1048 case '=': return NOTEQUAL;
1050 break;
1051 case '<':
1052 switch (c2) {
1053 case '>': return NOTEQUAL;
1054 case '=': return LESSEQUAL;
1055 case '<': return LEFTSHIFT;
1057 break;
1058 case '>':
1059 switch (c2) {
1060 case '=': return GREATEREQUAL;
1061 case '>': return RIGHTSHIFT;
1063 break;
1064 case '+':
1065 switch (c2) {
1066 case '=': return PLUSEQUAL;
1068 break;
1069 case '-':
1070 switch (c2) {
1071 case '=': return MINEQUAL;
1073 break;
1074 case '*':
1075 switch (c2) {
1076 case '*': return DOUBLESTAR;
1077 case '=': return STAREQUAL;
1079 break;
1080 case '/':
1081 switch (c2) {
1082 case '/': return DOUBLESLASH;
1083 case '=': return SLASHEQUAL;
1085 break;
1086 case '|':
1087 switch (c2) {
1088 case '=': return VBAREQUAL;
1090 break;
1091 case '%':
1092 switch (c2) {
1093 case '=': return PERCENTEQUAL;
1095 break;
1096 case '&':
1097 switch (c2) {
1098 case '=': return AMPEREQUAL;
1100 break;
1101 case '^':
1102 switch (c2) {
1103 case '=': return CIRCUMFLEXEQUAL;
1105 break;
1107 return OP;
1111 PyToken_ThreeChars(int c1, int c2, int c3)
1113 switch (c1) {
1114 case '<':
1115 switch (c2) {
1116 case '<':
1117 switch (c3) {
1118 case '=':
1119 return LEFTSHIFTEQUAL;
1121 break;
1123 break;
1124 case '>':
1125 switch (c2) {
1126 case '>':
1127 switch (c3) {
1128 case '=':
1129 return RIGHTSHIFTEQUAL;
1131 break;
1133 break;
1134 case '*':
1135 switch (c2) {
1136 case '*':
1137 switch (c3) {
1138 case '=':
1139 return DOUBLESTAREQUAL;
1141 break;
1143 break;
1144 case '/':
1145 switch (c2) {
1146 case '/':
1147 switch (c3) {
1148 case '=':
1149 return DOUBLESLASHEQUAL;
1151 break;
1153 break;
1155 return OP;
1158 static int
1159 indenterror(struct tok_state *tok)
1161 if (tok->alterror) {
1162 tok->done = E_TABSPACE;
1163 tok->cur = tok->inp;
1164 return 1;
1166 if (tok->altwarning) {
1167 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1168 "in indentation\n", tok->filename);
1169 tok->altwarning = 0;
1171 return 0;
1175 /* Get next token, after space stripping etc. */
1177 static int
1178 tok_get(register struct tok_state *tok, char **p_start, char **p_end)
1180 register int c;
1181 int blankline;
1183 *p_start = *p_end = NULL;
1184 nextline:
1185 tok->start = NULL;
1186 blankline = 0;
1188 /* Get indentation level */
1189 if (tok->atbol) {
1190 register int col = 0;
1191 register int altcol = 0;
1192 tok->atbol = 0;
1193 for (;;) {
1194 c = tok_nextc(tok);
1195 if (c == ' ')
1196 col++, altcol++;
1197 else if (c == '\t') {
1198 col = (col/tok->tabsize + 1) * tok->tabsize;
1199 altcol = (altcol/tok->alttabsize + 1)
1200 * tok->alttabsize;
1202 else if (c == '\014') /* Control-L (formfeed) */
1203 col = altcol = 0; /* For Emacs users */
1204 else
1205 break;
1207 tok_backup(tok, c);
1208 if (c == '#' || c == '\n') {
1209 /* Lines with only whitespace and/or comments
1210 shouldn't affect the indentation and are
1211 not passed to the parser as NEWLINE tokens,
1212 except *totally* empty lines in interactive
1213 mode, which signal the end of a command group. */
1214 if (col == 0 && c == '\n' && tok->prompt != NULL)
1215 blankline = 0; /* Let it through */
1216 else
1217 blankline = 1; /* Ignore completely */
1218 /* We can't jump back right here since we still
1219 may need to skip to the end of a comment */
1221 if (!blankline && tok->level == 0) {
1222 if (col == tok->indstack[tok->indent]) {
1223 /* No change */
1224 if (altcol != tok->altindstack[tok->indent]) {
1225 if (indenterror(tok))
1226 return ERRORTOKEN;
1229 else if (col > tok->indstack[tok->indent]) {
1230 /* Indent -- always one */
1231 if (tok->indent+1 >= MAXINDENT) {
1232 tok->done = E_TOODEEP;
1233 tok->cur = tok->inp;
1234 return ERRORTOKEN;
1236 if (altcol <= tok->altindstack[tok->indent]) {
1237 if (indenterror(tok))
1238 return ERRORTOKEN;
1240 tok->pendin++;
1241 tok->indstack[++tok->indent] = col;
1242 tok->altindstack[tok->indent] = altcol;
1244 else /* col < tok->indstack[tok->indent] */ {
1245 /* Dedent -- any number, must be consistent */
1246 while (tok->indent > 0 &&
1247 col < tok->indstack[tok->indent]) {
1248 tok->pendin--;
1249 tok->indent--;
1251 if (col != tok->indstack[tok->indent]) {
1252 tok->done = E_DEDENT;
1253 tok->cur = tok->inp;
1254 return ERRORTOKEN;
1256 if (altcol != tok->altindstack[tok->indent]) {
1257 if (indenterror(tok))
1258 return ERRORTOKEN;
1264 tok->start = tok->cur;
1266 /* Return pending indents/dedents */
1267 if (tok->pendin != 0) {
1268 if (tok->pendin < 0) {
1269 tok->pendin++;
1270 return DEDENT;
1272 else {
1273 tok->pendin--;
1274 return INDENT;
1278 again:
1279 tok->start = NULL;
1280 /* Skip spaces */
1281 do {
1282 c = tok_nextc(tok);
1283 } while (c == ' ' || c == '\t' || c == '\014');
1285 /* Set start of current token */
1286 tok->start = tok->cur - 1;
1288 /* Skip comment, while looking for tab-setting magic */
1289 if (c == '#') {
1290 static char *tabforms[] = {
1291 "tab-width:", /* Emacs */
1292 ":tabstop=", /* vim, full form */
1293 ":ts=", /* vim, abbreviated form */
1294 "set tabsize=", /* will vi never die? */
1295 /* more templates can be added here to support other editors */
1297 char cbuf[80];
1298 char *tp, **cp;
1299 tp = cbuf;
1300 do {
1301 *tp++ = c = tok_nextc(tok);
1302 } while (c != EOF && c != '\n' &&
1303 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
1304 *tp = '\0';
1305 for (cp = tabforms;
1306 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
1307 cp++) {
1308 if ((tp = strstr(cbuf, *cp))) {
1309 int newsize = atoi(tp + strlen(*cp));
1311 if (newsize >= 1 && newsize <= 40) {
1312 tok->tabsize = newsize;
1313 if (Py_VerboseFlag)
1314 PySys_WriteStderr(
1315 "Tab size set to %d\n",
1316 newsize);
1320 while (c != EOF && c != '\n')
1321 c = tok_nextc(tok);
1324 /* Check for EOF and errors now */
1325 if (c == EOF) {
1326 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1329 /* Identifier (most frequent token!) */
1330 if (isalpha(c) || c == '_') {
1331 /* Process r"", u"" and ur"" */
1332 switch (c) {
1333 case 'b':
1334 case 'B':
1335 c = tok_nextc(tok);
1336 if (c == 'r' || c == 'R')
1337 c = tok_nextc(tok);
1338 if (c == '"' || c == '\'')
1339 goto letter_quote;
1340 break;
1341 case 'r':
1342 case 'R':
1343 c = tok_nextc(tok);
1344 if (c == '"' || c == '\'')
1345 goto letter_quote;
1346 break;
1347 case 'u':
1348 case 'U':
1349 c = tok_nextc(tok);
1350 if (c == 'r' || c == 'R')
1351 c = tok_nextc(tok);
1352 if (c == '"' || c == '\'')
1353 goto letter_quote;
1354 break;
1356 while (isalnum(c) || c == '_') {
1357 c = tok_nextc(tok);
1359 tok_backup(tok, c);
1360 *p_start = tok->start;
1361 *p_end = tok->cur;
1362 return NAME;
1365 /* Newline */
1366 if (c == '\n') {
1367 tok->atbol = 1;
1368 if (blankline || tok->level > 0)
1369 goto nextline;
1370 *p_start = tok->start;
1371 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1372 tok->cont_line = 0;
1373 return NEWLINE;
1376 /* Period or number starting with period? */
1377 if (c == '.') {
1378 c = tok_nextc(tok);
1379 if (isdigit(c)) {
1380 goto fraction;
1382 else {
1383 tok_backup(tok, c);
1384 *p_start = tok->start;
1385 *p_end = tok->cur;
1386 return DOT;
1390 /* Number */
1391 if (isdigit(c)) {
1392 if (c == '0') {
1393 /* Hex, octal or binary -- maybe. */
1394 c = tok_nextc(tok);
1395 if (c == '.')
1396 goto fraction;
1397 #ifndef WITHOUT_COMPLEX
1398 if (c == 'j' || c == 'J')
1399 goto imaginary;
1400 #endif
1401 if (c == 'x' || c == 'X') {
1403 /* Hex */
1404 c = tok_nextc(tok);
1405 if (!isxdigit(c)) {
1406 tok->done = E_TOKEN;
1407 tok_backup(tok, c);
1408 return ERRORTOKEN;
1410 do {
1411 c = tok_nextc(tok);
1412 } while (isxdigit(c));
1414 else if (c == 'o' || c == 'O') {
1415 /* Octal */
1416 c = tok_nextc(tok);
1417 if (c < '0' || c >= '8') {
1418 tok->done = E_TOKEN;
1419 tok_backup(tok, c);
1420 return ERRORTOKEN;
1422 do {
1423 c = tok_nextc(tok);
1424 } while ('0' <= c && c < '8');
1426 else if (c == 'b' || c == 'B') {
1427 /* Binary */
1428 c = tok_nextc(tok);
1429 if (c != '0' && c != '1') {
1430 tok->done = E_TOKEN;
1431 tok_backup(tok, c);
1432 return ERRORTOKEN;
1434 do {
1435 c = tok_nextc(tok);
1436 } while (c == '0' || c == '1');
1438 else {
1439 int found_decimal = 0;
1440 /* Octal; c is first char of it */
1441 /* There's no 'isoctdigit' macro, sigh */
1442 while ('0' <= c && c < '8') {
1443 c = tok_nextc(tok);
1445 if (isdigit(c)) {
1446 found_decimal = 1;
1447 do {
1448 c = tok_nextc(tok);
1449 } while (isdigit(c));
1451 if (c == '.')
1452 goto fraction;
1453 else if (c == 'e' || c == 'E')
1454 goto exponent;
1455 #ifndef WITHOUT_COMPLEX
1456 else if (c == 'j' || c == 'J')
1457 goto imaginary;
1458 #endif
1459 else if (found_decimal) {
1460 tok->done = E_TOKEN;
1461 tok_backup(tok, c);
1462 return ERRORTOKEN;
1465 if (c == 'l' || c == 'L')
1466 c = tok_nextc(tok);
1468 else {
1469 /* Decimal */
1470 do {
1471 c = tok_nextc(tok);
1472 } while (isdigit(c));
1473 if (c == 'l' || c == 'L')
1474 c = tok_nextc(tok);
1475 else {
1476 /* Accept floating point numbers. */
1477 if (c == '.') {
1478 fraction:
1479 /* Fraction */
1480 do {
1481 c = tok_nextc(tok);
1482 } while (isdigit(c));
1484 if (c == 'e' || c == 'E') {
1485 exponent:
1486 /* Exponent part */
1487 c = tok_nextc(tok);
1488 if (c == '+' || c == '-')
1489 c = tok_nextc(tok);
1490 if (!isdigit(c)) {
1491 tok->done = E_TOKEN;
1492 tok_backup(tok, c);
1493 return ERRORTOKEN;
1495 do {
1496 c = tok_nextc(tok);
1497 } while (isdigit(c));
1499 #ifndef WITHOUT_COMPLEX
1500 if (c == 'j' || c == 'J')
1501 /* Imaginary part */
1502 imaginary:
1503 c = tok_nextc(tok);
1504 #endif
1507 tok_backup(tok, c);
1508 *p_start = tok->start;
1509 *p_end = tok->cur;
1510 return NUMBER;
1513 letter_quote:
1514 /* String */
1515 if (c == '\'' || c == '"') {
1516 Py_ssize_t quote2 = tok->cur - tok->start + 1;
1517 int quote = c;
1518 int triple = 0;
1519 int tripcount = 0;
1520 for (;;) {
1521 c = tok_nextc(tok);
1522 if (c == '\n') {
1523 if (!triple) {
1524 tok->done = E_EOLS;
1525 tok_backup(tok, c);
1526 return ERRORTOKEN;
1528 tripcount = 0;
1529 tok->cont_line = 1; /* multiline string. */
1531 else if (c == EOF) {
1532 if (triple)
1533 tok->done = E_EOFS;
1534 else
1535 tok->done = E_EOLS;
1536 tok->cur = tok->inp;
1537 return ERRORTOKEN;
1539 else if (c == quote) {
1540 tripcount++;
1541 if (tok->cur - tok->start == quote2) {
1542 c = tok_nextc(tok);
1543 if (c == quote) {
1544 triple = 1;
1545 tripcount = 0;
1546 continue;
1548 tok_backup(tok, c);
1550 if (!triple || tripcount == 3)
1551 break;
1553 else if (c == '\\') {
1554 tripcount = 0;
1555 c = tok_nextc(tok);
1556 if (c == EOF) {
1557 tok->done = E_EOLS;
1558 tok->cur = tok->inp;
1559 return ERRORTOKEN;
1562 else
1563 tripcount = 0;
1565 *p_start = tok->start;
1566 *p_end = tok->cur;
1567 return STRING;
1570 /* Line continuation */
1571 if (c == '\\') {
1572 c = tok_nextc(tok);
1573 if (c != '\n') {
1574 tok->done = E_LINECONT;
1575 tok->cur = tok->inp;
1576 return ERRORTOKEN;
1578 tok->cont_line = 1;
1579 goto again; /* Read next line */
1582 /* Check for two-character token */
1584 int c2 = tok_nextc(tok);
1585 int token = PyToken_TwoChars(c, c2);
1586 #ifndef PGEN
1587 if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
1588 if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
1589 "<> not supported in 3.x; use !=",
1590 tok->filename, tok->lineno,
1591 NULL, NULL)) {
1592 return ERRORTOKEN;
1595 #endif
1596 if (token != OP) {
1597 int c3 = tok_nextc(tok);
1598 int token3 = PyToken_ThreeChars(c, c2, c3);
1599 if (token3 != OP) {
1600 token = token3;
1601 } else {
1602 tok_backup(tok, c3);
1604 *p_start = tok->start;
1605 *p_end = tok->cur;
1606 return token;
1608 tok_backup(tok, c2);
1611 /* Keep track of parentheses nesting level */
1612 switch (c) {
1613 case '(':
1614 case '[':
1615 case '{':
1616 tok->level++;
1617 break;
1618 case ')':
1619 case ']':
1620 case '}':
1621 tok->level--;
1622 break;
1625 /* Punctuation character */
1626 *p_start = tok->start;
1627 *p_end = tok->cur;
1628 return PyToken_OneChar(c);
1632 PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1634 int result = tok_get(tok, p_start, p_end);
1635 if (tok->decoding_erred) {
1636 result = ERRORTOKEN;
1637 tok->done = E_DECODE;
1639 return result;
1642 /* This function is only called from parsetok. However, it cannot live
1643 there, as it must be empty for PGEN, and we can check for PGEN only
1644 in this file. */
1646 #if defined(PGEN) || !defined(Py_USING_UNICODE)
1647 char*
1648 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
1650 return NULL;
1652 #else
1653 #ifdef Py_USING_UNICODE
1654 static PyObject *
1655 dec_utf8(const char *enc, const char *text, size_t len) {
1656 PyObject *ret = NULL;
1657 PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
1658 if (unicode_text) {
1659 ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
1660 Py_DECREF(unicode_text);
1662 if (!ret) {
1663 PyErr_Clear();
1665 return ret;
1667 char *
1668 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
1670 char *text = NULL;
1671 if (tok->encoding) {
1672 /* convert source to original encondig */
1673 PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
1674 if (lineobj != NULL) {
1675 int linelen = PyString_Size(lineobj);
1676 const char *line = PyString_AsString(lineobj);
1677 text = PyObject_MALLOC(linelen + 1);
1678 if (text != NULL && line != NULL) {
1679 if (linelen)
1680 strncpy(text, line, linelen);
1681 text[linelen] = '\0';
1683 Py_DECREF(lineobj);
1685 /* adjust error offset */
1686 if (*offset > 1) {
1687 PyObject *offsetobj = dec_utf8(tok->encoding,
1688 tok->buf, *offset-1);
1689 if (offsetobj) {
1690 *offset = PyString_Size(offsetobj) + 1;
1691 Py_DECREF(offsetobj);
1697 return text;
1700 #endif /* defined(Py_USING_UNICODE) */
1701 #endif
1704 #ifdef Py_DEBUG
1706 void
1707 tok_dump(int type, char *start, char *end)
1709 printf("%s", _PyParser_TokenNames[type]);
1710 if (type == NAME || type == NUMBER || type == STRING || type == OP)
1711 printf("(%.*s)", (int)(end - start), start);
1714 #endif