Add missing issue number in Misc/NEWS entry.
[python.git] / Parser / tokenizer.c
blob1808c41fa2561fa3650247ea6f2e37283fd9b76b
2 /* Tokenizer implementation */
4 #include "Python.h"
5 #include "pgenheaders.h"
7 #include <ctype.h>
8 #include <assert.h>
10 #include "tokenizer.h"
11 #include "errcode.h"
13 #ifndef PGEN
14 #include "unicodeobject.h"
15 #include "stringobject.h"
16 #include "fileobject.h"
17 #include "codecs.h"
18 #include "abstract.h"
19 #include "pydebug.h"
20 #endif /* PGEN */
22 extern char *PyOS_Readline(FILE *, FILE *, char *);
23 /* Return malloc'ed string including trailing \n;
24 empty malloc'ed string for EOF;
25 NULL if interrupted */
27 /* Don't ever change this -- it would break the portability of Python code */
28 #define TABSIZE 8
30 /* Forward */
31 static struct tok_state *tok_new(void);
32 static int tok_nextc(struct tok_state *tok);
33 static void tok_backup(struct tok_state *tok, int c);
35 /* Token names */
37 char *_PyParser_TokenNames[] = {
38 "ENDMARKER",
39 "NAME",
40 "NUMBER",
41 "STRING",
42 "NEWLINE",
43 "INDENT",
44 "DEDENT",
45 "LPAR",
46 "RPAR",
47 "LSQB",
48 "RSQB",
49 "COLON",
50 "COMMA",
51 "SEMI",
52 "PLUS",
53 "MINUS",
54 "STAR",
55 "SLASH",
56 "VBAR",
57 "AMPER",
58 "LESS",
59 "GREATER",
60 "EQUAL",
61 "DOT",
62 "PERCENT",
63 "BACKQUOTE",
64 "LBRACE",
65 "RBRACE",
66 "EQEQUAL",
67 "NOTEQUAL",
68 "LESSEQUAL",
69 "GREATEREQUAL",
70 "TILDE",
71 "CIRCUMFLEX",
72 "LEFTSHIFT",
73 "RIGHTSHIFT",
74 "DOUBLESTAR",
75 "PLUSEQUAL",
76 "MINEQUAL",
77 "STAREQUAL",
78 "SLASHEQUAL",
79 "PERCENTEQUAL",
80 "AMPEREQUAL",
81 "VBAREQUAL",
82 "CIRCUMFLEXEQUAL",
83 "LEFTSHIFTEQUAL",
84 "RIGHTSHIFTEQUAL",
85 "DOUBLESTAREQUAL",
86 "DOUBLESLASH",
87 "DOUBLESLASHEQUAL",
88 "AT",
89 /* This table must match the #defines in token.h! */
90 "OP",
91 "<ERRORTOKEN>",
92 "<N_TOKENS>"
96 /* Create and initialize a new tok_state structure */
98 static struct tok_state *
99 tok_new(void)
101 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC(
102 sizeof(struct tok_state));
103 if (tok == NULL)
104 return NULL;
105 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
106 tok->done = E_OK;
107 tok->fp = NULL;
108 tok->input = NULL;
109 tok->tabsize = TABSIZE;
110 tok->indent = 0;
111 tok->indstack[0] = 0;
112 tok->atbol = 1;
113 tok->pendin = 0;
114 tok->prompt = tok->nextprompt = NULL;
115 tok->lineno = 0;
116 tok->level = 0;
117 tok->filename = NULL;
118 tok->altwarning = 0;
119 tok->alterror = 0;
120 tok->alttabsize = 1;
121 tok->altindstack[0] = 0;
122 tok->decoding_state = 0;
123 tok->decoding_erred = 0;
124 tok->read_coding_spec = 0;
125 tok->encoding = NULL;
126 tok->cont_line = 0;
127 #ifndef PGEN
128 tok->decoding_readline = NULL;
129 tok->decoding_buffer = NULL;
130 #endif
131 return tok;
134 static char *
135 new_string(const char *s, Py_ssize_t len)
137 char* result = (char *)PyMem_MALLOC(len + 1);
138 if (result != NULL) {
139 memcpy(result, s, len);
140 result[len] = '\0';
142 return result;
145 #ifdef PGEN
147 static char *
148 decoding_fgets(char *s, int size, struct tok_state *tok)
150 return fgets(s, size, tok->fp);
153 static int
154 decoding_feof(struct tok_state *tok)
156 return feof(tok->fp);
159 static char *
160 decode_str(const char *str, int exec_input, struct tok_state *tok)
162 return new_string(str, strlen(str));
165 #else /* PGEN */
167 static char *
168 error_ret(struct tok_state *tok) /* XXX */
170 tok->decoding_erred = 1;
171 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */
172 PyMem_FREE(tok->buf);
173 tok->buf = NULL;
174 return NULL; /* as if it were EOF */
178 static char *
179 get_normal_name(char *s) /* for utf-8 and latin-1 */
181 char buf[13];
182 int i;
183 for (i = 0; i < 12; i++) {
184 int c = s[i];
185 if (c == '\0')
186 break;
187 else if (c == '_')
188 buf[i] = '-';
189 else
190 buf[i] = tolower(c);
192 buf[i] = '\0';
193 if (strcmp(buf, "utf-8") == 0 ||
194 strncmp(buf, "utf-8-", 6) == 0)
195 return "utf-8";
196 else if (strcmp(buf, "latin-1") == 0 ||
197 strcmp(buf, "iso-8859-1") == 0 ||
198 strcmp(buf, "iso-latin-1") == 0 ||
199 strncmp(buf, "latin-1-", 8) == 0 ||
200 strncmp(buf, "iso-8859-1-", 11) == 0 ||
201 strncmp(buf, "iso-latin-1-", 12) == 0)
202 return "iso-8859-1";
203 else
204 return s;
207 /* Return the coding spec in S, or NULL if none is found. */
209 static char *
210 get_coding_spec(const char *s, Py_ssize_t size)
212 Py_ssize_t i;
213 /* Coding spec must be in a comment, and that comment must be
214 * the only statement on the source code line. */
215 for (i = 0; i < size - 6; i++) {
216 if (s[i] == '#')
217 break;
218 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
219 return NULL;
221 for (; i < size - 6; i++) { /* XXX inefficient search */
222 const char* t = s + i;
223 if (strncmp(t, "coding", 6) == 0) {
224 const char* begin = NULL;
225 t += 6;
226 if (t[0] != ':' && t[0] != '=')
227 continue;
228 do {
229 t++;
230 } while (t[0] == '\x20' || t[0] == '\t');
232 begin = t;
233 while (isalnum(Py_CHARMASK(t[0])) ||
234 t[0] == '-' || t[0] == '_' || t[0] == '.')
235 t++;
237 if (begin < t) {
238 char* r = new_string(begin, t - begin);
239 char* q = get_normal_name(r);
240 if (r != q) {
241 PyMem_FREE(r);
242 r = new_string(q, strlen(q));
244 return r;
248 return NULL;
251 /* Check whether the line contains a coding spec. If it does,
252 invoke the set_readline function for the new encoding.
253 This function receives the tok_state and the new encoding.
254 Return 1 on success, 0 on failure. */
256 static int
257 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
258 int set_readline(struct tok_state *, const char *))
260 char * cs;
261 int r = 1;
263 if (tok->cont_line)
264 /* It's a continuation line, so it can't be a coding spec. */
265 return 1;
266 cs = get_coding_spec(line, size);
267 if (cs != NULL) {
268 tok->read_coding_spec = 1;
269 if (tok->encoding == NULL) {
270 assert(tok->decoding_state == 1); /* raw */
271 if (strcmp(cs, "utf-8") == 0 ||
272 strcmp(cs, "iso-8859-1") == 0) {
273 tok->encoding = cs;
274 } else {
275 #ifdef Py_USING_UNICODE
276 r = set_readline(tok, cs);
277 if (r) {
278 tok->encoding = cs;
279 tok->decoding_state = -1;
281 else
282 PyMem_FREE(cs);
283 #else
284 /* Without Unicode support, we cannot
285 process the coding spec. Since there
286 won't be any Unicode literals, that
287 won't matter. */
288 PyMem_FREE(cs);
289 #endif
291 } else { /* then, compare cs with BOM */
292 r = (strcmp(tok->encoding, cs) == 0);
293 PyMem_FREE(cs);
296 if (!r) {
297 cs = tok->encoding;
298 if (!cs)
299 cs = "with BOM";
300 PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs);
302 return r;
305 /* See whether the file starts with a BOM. If it does,
306 invoke the set_readline function with the new encoding.
307 Return 1 on success, 0 on failure. */
309 static int
310 check_bom(int get_char(struct tok_state *),
311 void unget_char(int, struct tok_state *),
312 int set_readline(struct tok_state *, const char *),
313 struct tok_state *tok)
315 int ch = get_char(tok);
316 tok->decoding_state = 1;
317 if (ch == EOF) {
318 return 1;
319 } else if (ch == 0xEF) {
320 ch = get_char(tok);
321 if (ch != 0xBB)
322 goto NON_BOM;
323 ch = get_char(tok);
324 if (ch != 0xBF)
325 goto NON_BOM;
326 #if 0
327 /* Disable support for UTF-16 BOMs until a decision
328 is made whether this needs to be supported. */
329 } else if (ch == 0xFE) {
330 ch = get_char(tok);
331 if (ch != 0xFF)
332 goto NON_BOM;
333 if (!set_readline(tok, "utf-16-be"))
334 return 0;
335 tok->decoding_state = -1;
336 } else if (ch == 0xFF) {
337 ch = get_char(tok);
338 if (ch != 0xFE)
339 goto NON_BOM;
340 if (!set_readline(tok, "utf-16-le"))
341 return 0;
342 tok->decoding_state = -1;
343 #endif
344 } else {
345 unget_char(ch, tok);
346 return 1;
348 if (tok->encoding != NULL)
349 PyMem_FREE(tok->encoding);
350 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */
351 return 1;
352 NON_BOM:
353 /* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
354 unget_char(0xFF, tok); /* XXX this will cause a syntax error */
355 return 1;
358 /* Read a line of text from TOK into S, using the stream in TOK.
359 Return NULL on failure, else S.
361 On entry, tok->decoding_buffer will be one of:
362 1) NULL: need to call tok->decoding_readline to get a new line
363 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and
364 stored the result in tok->decoding_buffer
365 3) PyStringObject *: previous call to fp_readl did not have enough room
366 (in the s buffer) to copy entire contents of the line read
367 by tok->decoding_readline. tok->decoding_buffer has the overflow.
368 In this case, fp_readl is called in a loop (with an expanded buffer)
369 until the buffer ends with a '\n' (or until the end of the file is
370 reached): see tok_nextc and its calls to decoding_fgets.
373 static char *
374 fp_readl(char *s, int size, struct tok_state *tok)
376 #ifndef Py_USING_UNICODE
377 /* In a non-Unicode built, this should never be called. */
378 Py_FatalError("fp_readl should not be called in this build.");
379 return NULL; /* Keep compiler happy (not reachable) */
380 #else
381 PyObject* utf8 = NULL;
382 PyObject* buf = tok->decoding_buffer;
383 char *str;
384 Py_ssize_t utf8len;
386 /* Ask for one less byte so we can terminate it */
387 assert(size > 0);
388 size--;
390 if (buf == NULL) {
391 buf = PyObject_CallObject(tok->decoding_readline, NULL);
392 if (buf == NULL)
393 return error_ret(tok);
394 } else {
395 tok->decoding_buffer = NULL;
396 if (PyString_CheckExact(buf))
397 utf8 = buf;
399 if (utf8 == NULL) {
400 utf8 = PyUnicode_AsUTF8String(buf);
401 Py_DECREF(buf);
402 if (utf8 == NULL)
403 return error_ret(tok);
405 str = PyString_AsString(utf8);
406 utf8len = PyString_GET_SIZE(utf8);
407 if (utf8len > size) {
408 tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size);
409 if (tok->decoding_buffer == NULL) {
410 Py_DECREF(utf8);
411 return error_ret(tok);
413 utf8len = size;
415 memcpy(s, str, utf8len);
416 s[utf8len] = '\0';
417 Py_DECREF(utf8);
418 if (utf8len == 0)
419 return NULL; /* EOF */
420 return s;
421 #endif
424 /* Set the readline function for TOK to a StreamReader's
425 readline function. The StreamReader is named ENC.
427 This function is called from check_bom and check_coding_spec.
429 ENC is usually identical to the future value of tok->encoding,
430 except for the (currently unsupported) case of UTF-16.
432 Return 1 on success, 0 on failure. */
434 static int
435 fp_setreadl(struct tok_state *tok, const char* enc)
437 PyObject *reader, *stream, *readline;
439 /* XXX: constify filename argument. */
440 stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
441 if (stream == NULL)
442 return 0;
444 reader = PyCodec_StreamReader(enc, stream, NULL);
445 Py_DECREF(stream);
446 if (reader == NULL)
447 return 0;
449 readline = PyObject_GetAttrString(reader, "readline");
450 Py_DECREF(reader);
451 if (readline == NULL)
452 return 0;
454 tok->decoding_readline = readline;
455 return 1;
458 /* Fetch the next byte from TOK. */
460 static int fp_getc(struct tok_state *tok) {
461 return getc(tok->fp);
464 /* Unfetch the last byte back into TOK. */
466 static void fp_ungetc(int c, struct tok_state *tok) {
467 ungetc(c, tok->fp);
470 /* Read a line of input from TOK. Determine encoding
471 if necessary. */
473 static char *
474 decoding_fgets(char *s, int size, struct tok_state *tok)
476 char *line = NULL;
477 int badchar = 0;
478 for (;;) {
479 if (tok->decoding_state < 0) {
480 /* We already have a codec associated with
481 this input. */
482 line = fp_readl(s, size, tok);
483 break;
484 } else if (tok->decoding_state > 0) {
485 /* We want a 'raw' read. */
486 line = Py_UniversalNewlineFgets(s, size,
487 tok->fp, NULL);
488 break;
489 } else {
490 /* We have not yet determined the encoding.
491 If an encoding is found, use the file-pointer
492 reader functions from now on. */
493 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))
494 return error_ret(tok);
495 assert(tok->decoding_state != 0);
498 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {
499 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {
500 return error_ret(tok);
503 #ifndef PGEN
504 /* The default encoding is ASCII, so make sure we don't have any
505 non-ASCII bytes in it. */
506 if (line && !tok->encoding) {
507 unsigned char *c;
508 for (c = (unsigned char *)line; *c; c++)
509 if (*c > 127) {
510 badchar = *c;
511 break;
514 if (badchar) {
515 char buf[500];
516 /* Need to add 1 to the line number, since this line
517 has not been counted, yet. */
518 sprintf(buf,
519 "Non-ASCII character '\\x%.2x' "
520 "in file %.200s on line %i, "
521 "but no encoding declared; "
522 "see http://www.python.org/peps/pep-0263.html for details",
523 badchar, tok->filename, tok->lineno + 1);
524 PyErr_SetString(PyExc_SyntaxError, buf);
525 return error_ret(tok);
527 #endif
528 return line;
531 static int
532 decoding_feof(struct tok_state *tok)
534 if (tok->decoding_state >= 0) {
535 return feof(tok->fp);
536 } else {
537 PyObject* buf = tok->decoding_buffer;
538 if (buf == NULL) {
539 buf = PyObject_CallObject(tok->decoding_readline, NULL);
540 if (buf == NULL) {
541 error_ret(tok);
542 return 1;
543 } else {
544 tok->decoding_buffer = buf;
547 return PyObject_Length(buf) == 0;
551 /* Fetch a byte from TOK, using the string buffer. */
553 static int
554 buf_getc(struct tok_state *tok) {
555 return Py_CHARMASK(*tok->str++);
558 /* Unfetch a byte from TOK, using the string buffer. */
560 static void
561 buf_ungetc(int c, struct tok_state *tok) {
562 tok->str--;
563 assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */
566 /* Set the readline function for TOK to ENC. For the string-based
567 tokenizer, this means to just record the encoding. */
569 static int
570 buf_setreadl(struct tok_state *tok, const char* enc) {
571 tok->enc = enc;
572 return 1;
575 /* Return a UTF-8 encoding Python string object from the
576 C byte string STR, which is encoded with ENC. */
578 #ifdef Py_USING_UNICODE
579 static PyObject *
580 translate_into_utf8(const char* str, const char* enc) {
581 PyObject *utf8;
582 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);
583 if (buf == NULL)
584 return NULL;
585 utf8 = PyUnicode_AsUTF8String(buf);
586 Py_DECREF(buf);
587 return utf8;
589 #endif
592 static char *
593 translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
594 int skip_next_lf = 0, length = strlen(s), final_length;
595 char *buf, *current;
596 char c;
597 buf = PyMem_MALLOC(length + 2);
598 if (buf == NULL) {
599 tok->done = E_NOMEM;
600 return NULL;
602 for (current = buf; (c = *s++);) {
603 if (skip_next_lf) {
604 skip_next_lf = 0;
605 if (c == '\n') {
606 c = *s;
607 s++;
608 if (!c)
609 break;
612 if (c == '\r') {
613 skip_next_lf = 1;
614 c = '\n';
616 *current = c;
617 current++;
619 /* If this is exec input, add a newline to the end of the file if
620 there isn't one already. */
621 if (exec_input && *current != '\n') {
622 *current = '\n';
623 current++;
625 *current = '\0';
626 final_length = current - buf;
627 if (final_length < length && final_length)
628 /* should never fail */
629 buf = PyMem_REALLOC(buf, final_length + 1);
630 return buf;
633 /* Decode a byte string STR for use as the buffer of TOK.
634 Look for encoding declarations inside STR, and record them
635 inside TOK. */
637 static const char *
638 decode_str(const char *input, int single, struct tok_state *tok)
640 PyObject* utf8 = NULL;
641 const char *str;
642 const char *s;
643 const char *newl[2] = {NULL, NULL};
644 int lineno = 0;
645 tok->input = str = translate_newlines(input, single, tok);
646 if (str == NULL)
647 return NULL;
648 tok->enc = NULL;
649 tok->str = str;
650 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
651 return error_ret(tok);
652 str = tok->str; /* string after BOM if any */
653 assert(str);
654 #ifdef Py_USING_UNICODE
655 if (tok->enc != NULL) {
656 utf8 = translate_into_utf8(str, tok->enc);
657 if (utf8 == NULL)
658 return error_ret(tok);
659 str = PyString_AsString(utf8);
661 #endif
662 for (s = str;; s++) {
663 if (*s == '\0') break;
664 else if (*s == '\n') {
665 assert(lineno < 2);
666 newl[lineno] = s;
667 lineno++;
668 if (lineno == 2) break;
671 tok->enc = NULL;
672 /* need to check line 1 and 2 separately since check_coding_spec
673 assumes a single line as input */
674 if (newl[0]) {
675 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))
676 return error_ret(tok);
677 if (tok->enc == NULL && newl[1]) {
678 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],
679 tok, buf_setreadl))
680 return error_ret(tok);
683 #ifdef Py_USING_UNICODE
684 if (tok->enc != NULL) {
685 assert(utf8 == NULL);
686 utf8 = translate_into_utf8(str, tok->enc);
687 if (utf8 == NULL)
688 return error_ret(tok);
689 str = PyString_AsString(utf8);
691 #endif
692 assert(tok->decoding_buffer == NULL);
693 tok->decoding_buffer = utf8; /* CAUTION */
694 return str;
697 #endif /* PGEN */
699 /* Set up tokenizer for string */
701 struct tok_state *
702 PyTokenizer_FromString(const char *str, int exec_input)
704 struct tok_state *tok = tok_new();
705 if (tok == NULL)
706 return NULL;
707 str = (char *)decode_str(str, exec_input, tok);
708 if (str == NULL) {
709 PyTokenizer_Free(tok);
710 return NULL;
713 /* XXX: constify members. */
714 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
715 return tok;
719 /* Set up tokenizer for file */
721 struct tok_state *
722 PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2)
724 struct tok_state *tok = tok_new();
725 if (tok == NULL)
726 return NULL;
727 if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {
728 PyTokenizer_Free(tok);
729 return NULL;
731 tok->cur = tok->inp = tok->buf;
732 tok->end = tok->buf + BUFSIZ;
733 tok->fp = fp;
734 tok->prompt = ps1;
735 tok->nextprompt = ps2;
736 return tok;
740 /* Free a tok_state structure */
742 void
743 PyTokenizer_Free(struct tok_state *tok)
745 if (tok->encoding != NULL)
746 PyMem_FREE(tok->encoding);
747 #ifndef PGEN
748 Py_XDECREF(tok->decoding_readline);
749 Py_XDECREF(tok->decoding_buffer);
750 #endif
751 if (tok->fp != NULL && tok->buf != NULL)
752 PyMem_FREE(tok->buf);
753 if (tok->input)
754 PyMem_FREE((char *)tok->input);
755 PyMem_FREE(tok);
758 #if !defined(PGEN) && defined(Py_USING_UNICODE)
759 static int
760 tok_stdin_decode(struct tok_state *tok, char **inp)
762 PyObject *enc, *sysstdin, *decoded, *utf8;
763 const char *encoding;
764 char *converted;
766 if (PySys_GetFile((char *)"stdin", NULL) != stdin)
767 return 0;
768 sysstdin = PySys_GetObject("stdin");
769 if (sysstdin == NULL || !PyFile_Check(sysstdin))
770 return 0;
772 enc = ((PyFileObject *)sysstdin)->f_encoding;
773 if (enc == NULL || !PyString_Check(enc))
774 return 0;
775 Py_INCREF(enc);
777 encoding = PyString_AsString(enc);
778 decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);
779 if (decoded == NULL)
780 goto error_clear;
782 utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);
783 Py_DECREF(decoded);
784 if (utf8 == NULL)
785 goto error_clear;
787 assert(PyString_Check(utf8));
788 converted = new_string(PyString_AS_STRING(utf8),
789 PyString_GET_SIZE(utf8));
790 Py_DECREF(utf8);
791 if (converted == NULL)
792 goto error_nomem;
794 PyMem_FREE(*inp);
795 *inp = converted;
796 if (tok->encoding != NULL)
797 PyMem_FREE(tok->encoding);
798 tok->encoding = new_string(encoding, strlen(encoding));
799 if (tok->encoding == NULL)
800 goto error_nomem;
802 Py_DECREF(enc);
803 return 0;
805 error_nomem:
806 Py_DECREF(enc);
807 tok->done = E_NOMEM;
808 return -1;
810 error_clear:
811 /* Fallback to iso-8859-1: for backward compatibility */
812 Py_DECREF(enc);
813 PyErr_Clear();
814 return 0;
816 #endif
818 /* Get next char, updating state; error code goes into tok->done */
820 static int
821 tok_nextc(register struct tok_state *tok)
823 for (;;) {
824 if (tok->cur != tok->inp) {
825 return Py_CHARMASK(*tok->cur++); /* Fast path */
827 if (tok->done != E_OK)
828 return EOF;
829 if (tok->fp == NULL) {
830 char *end = strchr(tok->inp, '\n');
831 if (end != NULL)
832 end++;
833 else {
834 end = strchr(tok->inp, '\0');
835 if (end == tok->inp) {
836 tok->done = E_EOF;
837 return EOF;
840 if (tok->start == NULL)
841 tok->buf = tok->cur;
842 tok->line_start = tok->cur;
843 tok->lineno++;
844 tok->inp = end;
845 return Py_CHARMASK(*tok->cur++);
847 if (tok->prompt != NULL) {
848 char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);
849 if (tok->nextprompt != NULL)
850 tok->prompt = tok->nextprompt;
851 if (newtok == NULL)
852 tok->done = E_INTR;
853 else if (*newtok == '\0') {
854 PyMem_FREE(newtok);
855 tok->done = E_EOF;
857 #if !defined(PGEN) && defined(Py_USING_UNICODE)
858 else if (tok_stdin_decode(tok, &newtok) != 0)
859 PyMem_FREE(newtok);
860 #endif
861 else if (tok->start != NULL) {
862 size_t start = tok->start - tok->buf;
863 size_t oldlen = tok->cur - tok->buf;
864 size_t newlen = oldlen + strlen(newtok);
865 char *buf = tok->buf;
866 buf = (char *)PyMem_REALLOC(buf, newlen+1);
867 tok->lineno++;
868 if (buf == NULL) {
869 PyMem_FREE(tok->buf);
870 tok->buf = NULL;
871 PyMem_FREE(newtok);
872 tok->done = E_NOMEM;
873 return EOF;
875 tok->buf = buf;
876 tok->cur = tok->buf + oldlen;
877 tok->line_start = tok->cur;
878 strcpy(tok->buf + oldlen, newtok);
879 PyMem_FREE(newtok);
880 tok->inp = tok->buf + newlen;
881 tok->end = tok->inp + 1;
882 tok->start = tok->buf + start;
884 else {
885 tok->lineno++;
886 if (tok->buf != NULL)
887 PyMem_FREE(tok->buf);
888 tok->buf = newtok;
889 tok->line_start = tok->buf;
890 tok->cur = tok->buf;
891 tok->line_start = tok->buf;
892 tok->inp = strchr(tok->buf, '\0');
893 tok->end = tok->inp + 1;
896 else {
897 int done = 0;
898 Py_ssize_t cur = 0;
899 char *pt;
900 if (tok->start == NULL) {
901 if (tok->buf == NULL) {
902 tok->buf = (char *)
903 PyMem_MALLOC(BUFSIZ);
904 if (tok->buf == NULL) {
905 tok->done = E_NOMEM;
906 return EOF;
908 tok->end = tok->buf + BUFSIZ;
910 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),
911 tok) == NULL) {
912 tok->done = E_EOF;
913 done = 1;
915 else {
916 tok->done = E_OK;
917 tok->inp = strchr(tok->buf, '\0');
918 done = tok->inp[-1] == '\n';
921 else {
922 cur = tok->cur - tok->buf;
923 if (decoding_feof(tok)) {
924 tok->done = E_EOF;
925 done = 1;
927 else
928 tok->done = E_OK;
930 tok->lineno++;
931 /* Read until '\n' or EOF */
932 while (!done) {
933 Py_ssize_t curstart = tok->start == NULL ? -1 :
934 tok->start - tok->buf;
935 Py_ssize_t curvalid = tok->inp - tok->buf;
936 Py_ssize_t newsize = curvalid + BUFSIZ;
937 char *newbuf = tok->buf;
938 newbuf = (char *)PyMem_REALLOC(newbuf,
939 newsize);
940 if (newbuf == NULL) {
941 tok->done = E_NOMEM;
942 tok->cur = tok->inp;
943 return EOF;
945 tok->buf = newbuf;
946 tok->inp = tok->buf + curvalid;
947 tok->end = tok->buf + newsize;
948 tok->start = curstart < 0 ? NULL :
949 tok->buf + curstart;
950 if (decoding_fgets(tok->inp,
951 (int)(tok->end - tok->inp),
952 tok) == NULL) {
953 /* Break out early on decoding
954 errors, as tok->buf will be NULL
956 if (tok->decoding_erred)
957 return EOF;
958 /* Last line does not end in \n,
959 fake one */
960 strcpy(tok->inp, "\n");
962 tok->inp = strchr(tok->inp, '\0');
963 done = tok->inp[-1] == '\n';
965 if (tok->buf != NULL) {
966 tok->cur = tok->buf + cur;
967 tok->line_start = tok->cur;
968 /* replace "\r\n" with "\n" */
969 /* For Mac leave the \r, giving a syntax error */
970 pt = tok->inp - 2;
971 if (pt >= tok->buf && *pt == '\r') {
972 *pt++ = '\n';
973 *pt = '\0';
974 tok->inp = pt;
978 if (tok->done != E_OK) {
979 if (tok->prompt != NULL)
980 PySys_WriteStderr("\n");
981 tok->cur = tok->inp;
982 return EOF;
985 /*NOTREACHED*/
989 /* Back-up one character */
991 static void
992 tok_backup(register struct tok_state *tok, register int c)
994 if (c != EOF) {
995 if (--tok->cur < tok->buf)
996 Py_FatalError("tok_backup: beginning of buffer");
997 if (*tok->cur != c)
998 *tok->cur = c;
1003 /* Return the token corresponding to a single character */
1006 PyToken_OneChar(int c)
1008 switch (c) {
1009 case '(': return LPAR;
1010 case ')': return RPAR;
1011 case '[': return LSQB;
1012 case ']': return RSQB;
1013 case ':': return COLON;
1014 case ',': return COMMA;
1015 case ';': return SEMI;
1016 case '+': return PLUS;
1017 case '-': return MINUS;
1018 case '*': return STAR;
1019 case '/': return SLASH;
1020 case '|': return VBAR;
1021 case '&': return AMPER;
1022 case '<': return LESS;
1023 case '>': return GREATER;
1024 case '=': return EQUAL;
1025 case '.': return DOT;
1026 case '%': return PERCENT;
1027 case '`': return BACKQUOTE;
1028 case '{': return LBRACE;
1029 case '}': return RBRACE;
1030 case '^': return CIRCUMFLEX;
1031 case '~': return TILDE;
1032 case '@': return AT;
1033 default: return OP;
1039 PyToken_TwoChars(int c1, int c2)
1041 switch (c1) {
1042 case '=':
1043 switch (c2) {
1044 case '=': return EQEQUAL;
1046 break;
1047 case '!':
1048 switch (c2) {
1049 case '=': return NOTEQUAL;
1051 break;
1052 case '<':
1053 switch (c2) {
1054 case '>': return NOTEQUAL;
1055 case '=': return LESSEQUAL;
1056 case '<': return LEFTSHIFT;
1058 break;
1059 case '>':
1060 switch (c2) {
1061 case '=': return GREATEREQUAL;
1062 case '>': return RIGHTSHIFT;
1064 break;
1065 case '+':
1066 switch (c2) {
1067 case '=': return PLUSEQUAL;
1069 break;
1070 case '-':
1071 switch (c2) {
1072 case '=': return MINEQUAL;
1074 break;
1075 case '*':
1076 switch (c2) {
1077 case '*': return DOUBLESTAR;
1078 case '=': return STAREQUAL;
1080 break;
1081 case '/':
1082 switch (c2) {
1083 case '/': return DOUBLESLASH;
1084 case '=': return SLASHEQUAL;
1086 break;
1087 case '|':
1088 switch (c2) {
1089 case '=': return VBAREQUAL;
1091 break;
1092 case '%':
1093 switch (c2) {
1094 case '=': return PERCENTEQUAL;
1096 break;
1097 case '&':
1098 switch (c2) {
1099 case '=': return AMPEREQUAL;
1101 break;
1102 case '^':
1103 switch (c2) {
1104 case '=': return CIRCUMFLEXEQUAL;
1106 break;
1108 return OP;
1112 PyToken_ThreeChars(int c1, int c2, int c3)
1114 switch (c1) {
1115 case '<':
1116 switch (c2) {
1117 case '<':
1118 switch (c3) {
1119 case '=':
1120 return LEFTSHIFTEQUAL;
1122 break;
1124 break;
1125 case '>':
1126 switch (c2) {
1127 case '>':
1128 switch (c3) {
1129 case '=':
1130 return RIGHTSHIFTEQUAL;
1132 break;
1134 break;
1135 case '*':
1136 switch (c2) {
1137 case '*':
1138 switch (c3) {
1139 case '=':
1140 return DOUBLESTAREQUAL;
1142 break;
1144 break;
1145 case '/':
1146 switch (c2) {
1147 case '/':
1148 switch (c3) {
1149 case '=':
1150 return DOUBLESLASHEQUAL;
1152 break;
1154 break;
1156 return OP;
1159 static int
1160 indenterror(struct tok_state *tok)
1162 if (tok->alterror) {
1163 tok->done = E_TABSPACE;
1164 tok->cur = tok->inp;
1165 return 1;
1167 if (tok->altwarning) {
1168 PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
1169 "in indentation\n", tok->filename);
1170 tok->altwarning = 0;
1172 return 0;
1176 /* Get next token, after space stripping etc. */
1178 static int
1179 tok_get(register struct tok_state *tok, char **p_start, char **p_end)
1181 register int c;
1182 int blankline;
1184 *p_start = *p_end = NULL;
1185 nextline:
1186 tok->start = NULL;
1187 blankline = 0;
1189 /* Get indentation level */
1190 if (tok->atbol) {
1191 register int col = 0;
1192 register int altcol = 0;
1193 tok->atbol = 0;
1194 for (;;) {
1195 c = tok_nextc(tok);
1196 if (c == ' ')
1197 col++, altcol++;
1198 else if (c == '\t') {
1199 col = (col/tok->tabsize + 1) * tok->tabsize;
1200 altcol = (altcol/tok->alttabsize + 1)
1201 * tok->alttabsize;
1203 else if (c == '\014') /* Control-L (formfeed) */
1204 col = altcol = 0; /* For Emacs users */
1205 else
1206 break;
1208 tok_backup(tok, c);
1209 if (c == '#' || c == '\n') {
1210 /* Lines with only whitespace and/or comments
1211 shouldn't affect the indentation and are
1212 not passed to the parser as NEWLINE tokens,
1213 except *totally* empty lines in interactive
1214 mode, which signal the end of a command group. */
1215 if (col == 0 && c == '\n' && tok->prompt != NULL)
1216 blankline = 0; /* Let it through */
1217 else
1218 blankline = 1; /* Ignore completely */
1219 /* We can't jump back right here since we still
1220 may need to skip to the end of a comment */
1222 if (!blankline && tok->level == 0) {
1223 if (col == tok->indstack[tok->indent]) {
1224 /* No change */
1225 if (altcol != tok->altindstack[tok->indent]) {
1226 if (indenterror(tok))
1227 return ERRORTOKEN;
1230 else if (col > tok->indstack[tok->indent]) {
1231 /* Indent -- always one */
1232 if (tok->indent+1 >= MAXINDENT) {
1233 tok->done = E_TOODEEP;
1234 tok->cur = tok->inp;
1235 return ERRORTOKEN;
1237 if (altcol <= tok->altindstack[tok->indent]) {
1238 if (indenterror(tok))
1239 return ERRORTOKEN;
1241 tok->pendin++;
1242 tok->indstack[++tok->indent] = col;
1243 tok->altindstack[tok->indent] = altcol;
1245 else /* col < tok->indstack[tok->indent] */ {
1246 /* Dedent -- any number, must be consistent */
1247 while (tok->indent > 0 &&
1248 col < tok->indstack[tok->indent]) {
1249 tok->pendin--;
1250 tok->indent--;
1252 if (col != tok->indstack[tok->indent]) {
1253 tok->done = E_DEDENT;
1254 tok->cur = tok->inp;
1255 return ERRORTOKEN;
1257 if (altcol != tok->altindstack[tok->indent]) {
1258 if (indenterror(tok))
1259 return ERRORTOKEN;
1265 tok->start = tok->cur;
1267 /* Return pending indents/dedents */
1268 if (tok->pendin != 0) {
1269 if (tok->pendin < 0) {
1270 tok->pendin++;
1271 return DEDENT;
1273 else {
1274 tok->pendin--;
1275 return INDENT;
1279 again:
1280 tok->start = NULL;
1281 /* Skip spaces */
1282 do {
1283 c = tok_nextc(tok);
1284 } while (c == ' ' || c == '\t' || c == '\014');
1286 /* Set start of current token */
1287 tok->start = tok->cur - 1;
1289 /* Skip comment, while looking for tab-setting magic */
1290 if (c == '#') {
1291 static char *tabforms[] = {
1292 "tab-width:", /* Emacs */
1293 ":tabstop=", /* vim, full form */
1294 ":ts=", /* vim, abbreviated form */
1295 "set tabsize=", /* will vi never die? */
1296 /* more templates can be added here to support other editors */
1298 char cbuf[80];
1299 char *tp, **cp;
1300 tp = cbuf;
1301 do {
1302 *tp++ = c = tok_nextc(tok);
1303 } while (c != EOF && c != '\n' &&
1304 (size_t)(tp - cbuf + 1) < sizeof(cbuf));
1305 *tp = '\0';
1306 for (cp = tabforms;
1307 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
1308 cp++) {
1309 if ((tp = strstr(cbuf, *cp))) {
1310 int newsize = atoi(tp + strlen(*cp));
1312 if (newsize >= 1 && newsize <= 40) {
1313 tok->tabsize = newsize;
1314 if (Py_VerboseFlag)
1315 PySys_WriteStderr(
1316 "Tab size set to %d\n",
1317 newsize);
1321 while (c != EOF && c != '\n')
1322 c = tok_nextc(tok);
1325 /* Check for EOF and errors now */
1326 if (c == EOF) {
1327 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
1330 /* Identifier (most frequent token!) */
1331 if (isalpha(c) || c == '_') {
1332 /* Process r"", u"" and ur"" */
1333 switch (c) {
1334 case 'b':
1335 case 'B':
1336 c = tok_nextc(tok);
1337 if (c == 'r' || c == 'R')
1338 c = tok_nextc(tok);
1339 if (c == '"' || c == '\'')
1340 goto letter_quote;
1341 break;
1342 case 'r':
1343 case 'R':
1344 c = tok_nextc(tok);
1345 if (c == '"' || c == '\'')
1346 goto letter_quote;
1347 break;
1348 case 'u':
1349 case 'U':
1350 c = tok_nextc(tok);
1351 if (c == 'r' || c == 'R')
1352 c = tok_nextc(tok);
1353 if (c == '"' || c == '\'')
1354 goto letter_quote;
1355 break;
1357 while (isalnum(c) || c == '_') {
1358 c = tok_nextc(tok);
1360 tok_backup(tok, c);
1361 *p_start = tok->start;
1362 *p_end = tok->cur;
1363 return NAME;
1366 /* Newline */
1367 if (c == '\n') {
1368 tok->atbol = 1;
1369 if (blankline || tok->level > 0)
1370 goto nextline;
1371 *p_start = tok->start;
1372 *p_end = tok->cur - 1; /* Leave '\n' out of the string */
1373 tok->cont_line = 0;
1374 return NEWLINE;
1377 /* Period or number starting with period? */
1378 if (c == '.') {
1379 c = tok_nextc(tok);
1380 if (isdigit(c)) {
1381 goto fraction;
1383 else {
1384 tok_backup(tok, c);
1385 *p_start = tok->start;
1386 *p_end = tok->cur;
1387 return DOT;
1391 /* Number */
1392 if (isdigit(c)) {
1393 if (c == '0') {
1394 /* Hex, octal or binary -- maybe. */
1395 c = tok_nextc(tok);
1396 if (c == '.')
1397 goto fraction;
1398 #ifndef WITHOUT_COMPLEX
1399 if (c == 'j' || c == 'J')
1400 goto imaginary;
1401 #endif
1402 if (c == 'x' || c == 'X') {
1404 /* Hex */
1405 c = tok_nextc(tok);
1406 if (!isxdigit(c)) {
1407 tok->done = E_TOKEN;
1408 tok_backup(tok, c);
1409 return ERRORTOKEN;
1411 do {
1412 c = tok_nextc(tok);
1413 } while (isxdigit(c));
1415 else if (c == 'o' || c == 'O') {
1416 /* Octal */
1417 c = tok_nextc(tok);
1418 if (c < '0' || c >= '8') {
1419 tok->done = E_TOKEN;
1420 tok_backup(tok, c);
1421 return ERRORTOKEN;
1423 do {
1424 c = tok_nextc(tok);
1425 } while ('0' <= c && c < '8');
1427 else if (c == 'b' || c == 'B') {
1428 /* Binary */
1429 c = tok_nextc(tok);
1430 if (c != '0' && c != '1') {
1431 tok->done = E_TOKEN;
1432 tok_backup(tok, c);
1433 return ERRORTOKEN;
1435 do {
1436 c = tok_nextc(tok);
1437 } while (c == '0' || c == '1');
1439 else {
1440 int found_decimal = 0;
1441 /* Octal; c is first char of it */
1442 /* There's no 'isoctdigit' macro, sigh */
1443 while ('0' <= c && c < '8') {
1444 c = tok_nextc(tok);
1446 if (isdigit(c)) {
1447 found_decimal = 1;
1448 do {
1449 c = tok_nextc(tok);
1450 } while (isdigit(c));
1452 if (c == '.')
1453 goto fraction;
1454 else if (c == 'e' || c == 'E')
1455 goto exponent;
1456 #ifndef WITHOUT_COMPLEX
1457 else if (c == 'j' || c == 'J')
1458 goto imaginary;
1459 #endif
1460 else if (found_decimal) {
1461 tok->done = E_TOKEN;
1462 tok_backup(tok, c);
1463 return ERRORTOKEN;
1466 if (c == 'l' || c == 'L')
1467 c = tok_nextc(tok);
1469 else {
1470 /* Decimal */
1471 do {
1472 c = tok_nextc(tok);
1473 } while (isdigit(c));
1474 if (c == 'l' || c == 'L')
1475 c = tok_nextc(tok);
1476 else {
1477 /* Accept floating point numbers. */
1478 if (c == '.') {
1479 fraction:
1480 /* Fraction */
1481 do {
1482 c = tok_nextc(tok);
1483 } while (isdigit(c));
1485 if (c == 'e' || c == 'E') {
1486 exponent:
1487 /* Exponent part */
1488 c = tok_nextc(tok);
1489 if (c == '+' || c == '-')
1490 c = tok_nextc(tok);
1491 if (!isdigit(c)) {
1492 tok->done = E_TOKEN;
1493 tok_backup(tok, c);
1494 return ERRORTOKEN;
1496 do {
1497 c = tok_nextc(tok);
1498 } while (isdigit(c));
1500 #ifndef WITHOUT_COMPLEX
1501 if (c == 'j' || c == 'J')
1502 /* Imaginary part */
1503 imaginary:
1504 c = tok_nextc(tok);
1505 #endif
1508 tok_backup(tok, c);
1509 *p_start = tok->start;
1510 *p_end = tok->cur;
1511 return NUMBER;
1514 letter_quote:
1515 /* String */
1516 if (c == '\'' || c == '"') {
1517 Py_ssize_t quote2 = tok->cur - tok->start + 1;
1518 int quote = c;
1519 int triple = 0;
1520 int tripcount = 0;
1521 for (;;) {
1522 c = tok_nextc(tok);
1523 if (c == '\n') {
1524 if (!triple) {
1525 tok->done = E_EOLS;
1526 tok_backup(tok, c);
1527 return ERRORTOKEN;
1529 tripcount = 0;
1530 tok->cont_line = 1; /* multiline string. */
1532 else if (c == EOF) {
1533 if (triple)
1534 tok->done = E_EOFS;
1535 else
1536 tok->done = E_EOLS;
1537 tok->cur = tok->inp;
1538 return ERRORTOKEN;
1540 else if (c == quote) {
1541 tripcount++;
1542 if (tok->cur - tok->start == quote2) {
1543 c = tok_nextc(tok);
1544 if (c == quote) {
1545 triple = 1;
1546 tripcount = 0;
1547 continue;
1549 tok_backup(tok, c);
1551 if (!triple || tripcount == 3)
1552 break;
1554 else if (c == '\\') {
1555 tripcount = 0;
1556 c = tok_nextc(tok);
1557 if (c == EOF) {
1558 tok->done = E_EOLS;
1559 tok->cur = tok->inp;
1560 return ERRORTOKEN;
1563 else
1564 tripcount = 0;
1566 *p_start = tok->start;
1567 *p_end = tok->cur;
1568 return STRING;
1571 /* Line continuation */
1572 if (c == '\\') {
1573 c = tok_nextc(tok);
1574 if (c != '\n') {
1575 tok->done = E_LINECONT;
1576 tok->cur = tok->inp;
1577 return ERRORTOKEN;
1579 tok->cont_line = 1;
1580 goto again; /* Read next line */
1583 /* Check for two-character token */
1585 int c2 = tok_nextc(tok);
1586 int token = PyToken_TwoChars(c, c2);
1587 #ifndef PGEN
1588 if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {
1589 if (PyErr_WarnExplicit(PyExc_DeprecationWarning,
1590 "<> not supported in 3.x; use !=",
1591 tok->filename, tok->lineno,
1592 NULL, NULL)) {
1593 return ERRORTOKEN;
1596 #endif
1597 if (token != OP) {
1598 int c3 = tok_nextc(tok);
1599 int token3 = PyToken_ThreeChars(c, c2, c3);
1600 if (token3 != OP) {
1601 token = token3;
1602 } else {
1603 tok_backup(tok, c3);
1605 *p_start = tok->start;
1606 *p_end = tok->cur;
1607 return token;
1609 tok_backup(tok, c2);
1612 /* Keep track of parentheses nesting level */
1613 switch (c) {
1614 case '(':
1615 case '[':
1616 case '{':
1617 tok->level++;
1618 break;
1619 case ')':
1620 case ']':
1621 case '}':
1622 tok->level--;
1623 break;
1626 /* Punctuation character */
1627 *p_start = tok->start;
1628 *p_end = tok->cur;
1629 return PyToken_OneChar(c);
1633 PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
1635 int result = tok_get(tok, p_start, p_end);
1636 if (tok->decoding_erred) {
1637 result = ERRORTOKEN;
1638 tok->done = E_DECODE;
1640 return result;
1643 /* This function is only called from parsetok. However, it cannot live
1644 there, as it must be empty for PGEN, and we can check for PGEN only
1645 in this file. */
1647 #if defined(PGEN) || !defined(Py_USING_UNICODE)
1648 char*
1649 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
1651 return NULL;
1653 #else
1654 #ifdef Py_USING_UNICODE
1655 static PyObject *
1656 dec_utf8(const char *enc, const char *text, size_t len) {
1657 PyObject *ret = NULL;
1658 PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
1659 if (unicode_text) {
1660 ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
1661 Py_DECREF(unicode_text);
1663 if (!ret) {
1664 PyErr_Clear();
1666 return ret;
1668 char *
1669 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
1671 char *text = NULL;
1672 if (tok->encoding) {
1673 /* convert source to original encondig */
1674 PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
1675 if (lineobj != NULL) {
1676 int linelen = PyString_Size(lineobj);
1677 const char *line = PyString_AsString(lineobj);
1678 text = PyObject_MALLOC(linelen + 1);
1679 if (text != NULL && line != NULL) {
1680 if (linelen)
1681 strncpy(text, line, linelen);
1682 text[linelen] = '\0';
1684 Py_DECREF(lineobj);
1686 /* adjust error offset */
1687 if (*offset > 1) {
1688 PyObject *offsetobj = dec_utf8(tok->encoding,
1689 tok->buf, *offset-1);
1690 if (offsetobj) {
1691 *offset = PyString_Size(offsetobj) + 1;
1692 Py_DECREF(offsetobj);
1698 return text;
1701 #endif /* defined(Py_USING_UNICODE) */
1702 #endif
1705 #ifdef Py_DEBUG
1707 void
1708 tok_dump(int type, char *start, char *end)
1710 printf("%s", _PyParser_TokenNames[type]);
1711 if (type == NAME || type == NUMBER || type == STRING || type == OP)
1712 printf("(%.*s)", (int)(end - start), start);
1715 #endif