*new* smatch_scripts/summarize_errs.sh: script for iterating through a list of errors
[smatch.git] / tokenize.c
blob93dd007bbba0584df9eda47946a99e2032e8a140
1 /*
2 * This is a really stupid C tokenizer. It doesn't do any include
3 * files or anything complex at all. That's the preprocessor.
5 * Copyright (C) 2003 Transmeta Corp.
6 * 2003 Linus Torvalds
8 * Licensed under the Open Software License version 1.1
9 */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdarg.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <unistd.h>
18 #include "lib.h"
19 #include "allocate.h"
20 #include "token.h"
21 #include "symbol.h"
23 #define EOF (-1)
25 int input_stream_nr = 0;
26 struct stream *input_streams;
27 static int input_streams_allocated;
28 unsigned int tabstop = 8;
30 #define BUFSIZE (8192)
32 typedef struct {
33 int fd, offset, size;
34 int pos, line, nr;
35 int newline, whitespace;
36 struct token **tokenlist;
37 struct token *token;
38 unsigned char *buffer;
39 } stream_t;
41 const char *stream_name(int stream)
43 if (stream < 0 || stream > input_stream_nr)
44 return "<bad stream>";
45 return input_streams[stream].name;
48 static struct position stream_pos(stream_t *stream)
50 struct position pos;
51 pos.type = 0;
52 pos.stream = stream->nr;
53 pos.newline = stream->newline;
54 pos.whitespace = stream->whitespace;
55 pos.pos = stream->pos;
56 pos.line = stream->line;
57 pos.noexpand = 0;
58 return pos;
61 const char *show_special(int val)
63 static char buffer[4];
65 buffer[0] = val;
66 buffer[1] = 0;
67 if (val >= SPECIAL_BASE)
68 strcpy(buffer, (char *) combinations[val - SPECIAL_BASE]);
69 return buffer;
72 const char *show_ident(const struct ident *ident)
74 static char buffer[256];
75 if (!ident)
76 return "<noident>";
77 sprintf(buffer, "%.*s", ident->len, ident->name);
78 return buffer;
81 static char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
83 if (isprint(c)) {
84 if (c == escape || c == '\\')
85 *ptr++ = '\\';
86 *ptr++ = c;
87 return ptr;
89 *ptr++ = '\\';
90 switch (c) {
91 case '\n':
92 *ptr++ = 'n';
93 return ptr;
94 case '\t':
95 *ptr++ = 't';
96 return ptr;
98 if (!isdigit(next))
99 return ptr + sprintf(ptr, "%o", c);
101 return ptr + sprintf(ptr, "%03o", c);
104 const char *show_string(const struct string *string)
106 static char buffer[4 * MAX_STRING + 3];
107 char *ptr;
108 int i;
110 if (!string->length)
111 return "<bad_string>";
112 ptr = buffer;
113 *ptr++ = '"';
114 for (i = 0; i < string->length-1; i++) {
115 const char *p = string->data + i;
116 ptr = charstr(ptr, p[0], '"', p[1]);
118 *ptr++ = '"';
119 *ptr = '\0';
120 return buffer;
123 const char *show_token(const struct token *token)
125 static char buffer[256];
127 if (!token)
128 return "<no token>";
129 switch (token_type(token)) {
130 case TOKEN_ERROR:
131 return "syntax error";
133 case TOKEN_EOF:
134 return "end-of-input";
136 case TOKEN_IDENT:
137 return show_ident(token->ident);
139 case TOKEN_STRING:
140 return show_string(token->string);
142 case TOKEN_NUMBER:
143 return token->number;
145 case TOKEN_SPECIAL:
146 return show_special(token->special);
148 case TOKEN_CHAR: {
149 char *ptr = buffer;
150 int c = token->character;
151 *ptr++ = '\'';
152 ptr = charstr(ptr, c, '\'', 0);
153 *ptr++ = '\'';
154 *ptr++ = '\0';
155 return buffer;
158 case TOKEN_STREAMBEGIN:
159 sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
160 return buffer;
162 case TOKEN_STREAMEND:
163 sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream));
164 return buffer;
166 case TOKEN_UNTAINT:
167 sprintf(buffer, "<untaint>");
168 return buffer;
170 case TOKEN_ARG_COUNT:
171 sprintf(buffer, "<argcnt>");
172 return buffer;
174 default:
175 sprintf(buffer, "unhandled token type '%d' ", token_type(token));
176 return buffer;
180 int init_stream(const char *name, int fd, const char **next_path)
182 int stream = input_stream_nr;
183 struct stream *current;
185 if (stream >= input_streams_allocated) {
186 int newalloc = stream * 4 / 3 + 10;
187 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
188 if (!input_streams)
189 die("Unable to allocate more streams space");
190 input_streams_allocated = newalloc;
192 current = input_streams + stream;
193 memset(current, 0, sizeof(*current));
194 current->name = name;
195 current->fd = fd;
196 current->next_path = next_path;
197 current->path = NULL;
198 current->constant = CONSTANT_FILE_MAYBE;
199 input_stream_nr = stream+1;
200 return stream;
203 static struct token * alloc_token(stream_t *stream)
205 struct token *token = __alloc_token(0);
206 token->pos = stream_pos(stream);
207 return token;
211 * Argh... That was surprisingly messy - handling '\r' complicates the
212 * things a _lot_.
214 static int nextchar_slow(stream_t *stream)
216 int offset = stream->offset;
217 int size = stream->size;
218 int c;
219 int spliced = 0, had_cr, had_backslash, complain;
221 restart:
222 had_cr = had_backslash = complain = 0;
224 repeat:
225 if (offset >= size) {
226 if (stream->fd < 0)
227 goto got_eof;
228 size = read(stream->fd, stream->buffer, BUFSIZE);
229 if (size <= 0)
230 goto got_eof;
231 stream->size = size;
232 stream->offset = offset = 0;
235 c = stream->buffer[offset++];
237 if (had_cr && c != '\n')
238 complain = 1;
240 if (c == '\r') {
241 had_cr = 1;
242 goto repeat;
245 stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
247 if (c == '\n') {
248 stream->line++;
249 stream->pos = 0;
252 if (!had_backslash) {
253 if (c == '\\') {
254 had_backslash = 1;
255 goto repeat;
257 if (c == '\n')
258 stream->newline = 1;
259 } else {
260 if (c == '\n') {
261 if (complain)
262 warning(stream_pos(stream), "non-ASCII data stream");
263 spliced = 1;
264 goto restart;
266 stream->pos--;
267 offset--;
268 c = '\\';
271 out:
272 stream->offset = offset;
273 if (complain)
274 warning(stream_pos(stream), "non-ASCII data stream");
276 return c;
278 got_eof:
279 if (had_backslash) {
280 c = '\\';
281 goto out;
283 if (stream->pos)
284 warning(stream_pos(stream), "no newline at end of file");
285 else if (had_cr)
286 warning(stream_pos(stream), "non-ASCII data stream");
287 else if (spliced)
288 warning(stream_pos(stream), "backslash-newline at end of file");
289 return EOF;
293 * We want that as light as possible while covering all normal cases.
294 * Slow path (including the logics with line-splicing and EOF sanity
295 * checks) is in nextchar_slow().
297 static inline int nextchar(stream_t *stream)
299 int offset = stream->offset;
301 if (offset < stream->size) {
302 int c = stream->buffer[offset++];
303 static const char special[256] = {
304 ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
306 if (!special[c]) {
307 stream->offset = offset;
308 stream->pos++;
309 return c;
312 return nextchar_slow(stream);
315 struct token eof_token_entry;
317 static struct token *mark_eof(stream_t *stream)
319 struct token *end;
321 end = alloc_token(stream);
322 token_type(end) = TOKEN_STREAMEND;
323 end->pos.newline = 1;
325 eof_token_entry.next = &eof_token_entry;
326 eof_token_entry.pos.newline = 1;
328 end->next = &eof_token_entry;
329 *stream->tokenlist = end;
330 stream->tokenlist = NULL;
331 return end;
334 static void add_token(stream_t *stream)
336 struct token *token = stream->token;
338 stream->token = NULL;
339 token->next = NULL;
340 *stream->tokenlist = token;
341 stream->tokenlist = &token->next;
344 static void drop_token(stream_t *stream)
346 stream->newline |= stream->token->pos.newline;
347 stream->whitespace |= stream->token->pos.whitespace;
348 stream->token = NULL;
351 enum {
352 Letter = 1,
353 Digit = 2,
354 Hex = 4,
355 Exp = 8,
356 Dot = 16,
357 ValidSecond = 32,
360 static const long cclass[257] = {
361 ['0' + 1 ... '9' + 1] = Digit | Hex,
362 ['A' + 1 ... 'D' + 1] = Letter | Hex,
363 ['E' + 1] = Letter | Hex | Exp,
364 ['F' + 1] = Letter | Hex,
365 ['G' + 1 ... 'O' + 1] = Letter,
366 ['P' + 1] = Letter | Exp,
367 ['Q' + 1 ... 'Z' + 1] = Letter,
368 ['a' + 1 ... 'd' + 1] = Letter | Hex,
369 ['e' + 1] = Letter | Hex | Exp,
370 ['f' + 1] = Letter | Hex,
371 ['g' + 1 ... 'o' + 1] = Letter,
372 ['p' + 1] = Letter | Exp,
373 ['q' + 1 ... 'z' + 1] = Letter,
374 ['_' + 1] = Letter,
375 ['.' + 1] = Dot | ValidSecond,
376 ['=' + 1] = ValidSecond,
377 ['+' + 1] = ValidSecond,
378 ['-' + 1] = ValidSecond,
379 ['>' + 1] = ValidSecond,
380 ['<' + 1] = ValidSecond,
381 ['&' + 1] = ValidSecond,
382 ['|' + 1] = ValidSecond,
383 ['#' + 1] = ValidSecond,
387 * pp-number:
388 * digit
389 * . digit
390 * pp-number digit
391 * pp-number identifier-nodigit
392 * pp-number e sign
393 * pp-number E sign
394 * pp-number p sign
395 * pp-number P sign
396 * pp-number .
398 static int get_one_number(int c, int next, stream_t *stream)
400 struct token *token;
401 static char buffer[4095];
402 char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
403 int len;
405 *p++ = c;
406 for (;;) {
407 long class = cclass[next + 1];
408 if (!(class & (Dot | Digit | Letter)))
409 break;
410 if (p != buffer_end)
411 *p++ = next;
412 next = nextchar(stream);
413 if (class & Exp) {
414 if (next == '-' || next == '+') {
415 if (p != buffer_end)
416 *p++ = next;
417 next = nextchar(stream);
422 if (p == buffer_end) {
423 sparse_error(stream_pos(stream), "number token exceeds %td characters",
424 buffer_end - buffer);
425 // Pretend we saw just "1".
426 buffer[0] = '1';
427 p = buffer + 1;
430 *p++ = 0;
431 len = p - buffer;
432 buf = __alloc_bytes(len);
433 memcpy(buf, buffer, len);
435 token = stream->token;
436 token_type(token) = TOKEN_NUMBER;
437 token->number = buf;
438 add_token(stream);
440 return next;
443 static int escapechar(int first, int type, stream_t *stream, int *valp)
445 int next, value;
447 next = nextchar(stream);
448 value = first;
450 if (first == '\n')
451 warning(stream_pos(stream), "Newline in string or character constant");
453 if (first == '\\' && next != EOF) {
454 value = next;
455 next = nextchar(stream);
456 if (value != type) {
457 switch (value) {
458 case 'a':
459 value = '\a';
460 break;
461 case 'b':
462 value = '\b';
463 break;
464 case 't':
465 value = '\t';
466 break;
467 case 'n':
468 value = '\n';
469 break;
470 case 'v':
471 value = '\v';
472 break;
473 case 'f':
474 value = '\f';
475 break;
476 case 'r':
477 value = '\r';
478 break;
479 case 'e':
480 value = '\e';
481 break;
482 case '\\':
483 break;
484 case '?':
485 break;
486 case '\'':
487 break;
488 case '"':
489 break;
490 case '\n':
491 warning(stream_pos(stream), "Newline in string or character constant");
492 break;
493 case '0'...'7': {
494 int nr = 2;
495 value -= '0';
496 while (next >= '0' && next <= '9') {
497 value = (value << 3) + (next-'0');
498 next = nextchar(stream);
499 if (!--nr)
500 break;
502 value &= 0xff;
503 break;
505 case 'x': {
506 int hex = hexval(next);
507 if (hex < 16) {
508 value = hex;
509 next = nextchar(stream);
510 while ((hex = hexval(next)) < 16) {
511 value = (value << 4) + hex;
512 next = nextchar(stream);
514 value &= 0xff;
515 break;
518 /* Fall through */
519 default:
520 warning(stream_pos(stream), "Unknown escape '%c'", value);
523 /* Mark it as escaped */
524 value |= 0x100;
526 *valp = value;
527 return next;
530 static int get_char_token(int next, stream_t *stream)
532 int value;
533 struct token *token;
535 next = escapechar(next, '\'', stream, &value);
536 if (value == '\'' || next != '\'') {
537 sparse_error(stream_pos(stream), "Bad character constant");
538 drop_token(stream);
539 return next;
542 token = stream->token;
543 token_type(token) = TOKEN_CHAR;
544 token->character = value & 0xff;
546 add_token(stream);
547 return nextchar(stream);
550 static int get_string_token(int next, stream_t *stream)
552 static char buffer[MAX_STRING];
553 struct string *string;
554 struct token *token;
555 int len = 0;
557 for (;;) {
558 int val;
559 next = escapechar(next, '"', stream, &val);
560 if (val == '"')
561 break;
562 if (next == EOF) {
563 warning(stream_pos(stream), "End of file in middle of string");
564 return next;
566 if (len < MAX_STRING)
567 buffer[len] = val;
568 len++;
571 if (len > MAX_STRING) {
572 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
573 len = MAX_STRING;
576 string = __alloc_string(len+1);
577 memcpy(string->data, buffer, len);
578 string->data[len] = '\0';
579 string->length = len+1;
581 /* Pass it on.. */
582 token = stream->token;
583 token_type(token) = TOKEN_STRING;
584 token->string = string;
585 add_token(stream);
587 return next;
590 static int drop_stream_eoln(stream_t *stream)
592 drop_token(stream);
593 for (;;) {
594 switch (nextchar(stream)) {
595 case EOF:
596 return EOF;
597 case '\n':
598 return nextchar(stream);
603 static int drop_stream_comment(stream_t *stream)
605 int newline;
606 int next;
607 drop_token(stream);
608 newline = stream->newline;
610 next = nextchar(stream);
611 for (;;) {
612 int curr = next;
613 if (curr == EOF) {
614 warning(stream_pos(stream), "End of file in the middle of a comment");
615 return curr;
617 next = nextchar(stream);
618 if (curr == '*' && next == '/')
619 break;
621 stream->newline = newline;
622 return nextchar(stream);
625 unsigned char combinations[][4] = COMBINATION_STRINGS;
627 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
629 /* hash function for two-character punctuators - all give unique values */
630 #define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
633 * note that we won't get false positives - special_hash(0,0) is 0 and
634 * entry 0 is filled (by +=), so all the missing ones are OK.
636 static unsigned char hash_results[32][2] = {
637 #define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
638 RES('+', '='), /* 00 */
639 RES('/', '='), /* 01 */
640 RES('^', '='), /* 05 */
641 RES('&', '&'), /* 07 */
642 RES('#', '#'), /* 08 */
643 RES('<', '<'), /* 0a */
644 RES('<', '='), /* 0c */
645 RES('!', '='), /* 0e */
646 RES('%', '='), /* 0f */
647 RES('-', '-'), /* 10 */
648 RES('-', '='), /* 11 */
649 RES('-', '>'), /* 13 */
650 RES('=', '='), /* 15 */
651 RES('&', '='), /* 17 */
652 RES('*', '='), /* 18 */
653 RES('.', '.'), /* 1a */
654 RES('+', '+'), /* 1b */
655 RES('|', '='), /* 1c */
656 RES('>', '='), /* 1d */
657 RES('|', '|'), /* 1e */
658 RES('>', '>') /* 1f */
659 #undef RES
661 static int code[32] = {
662 #define CODE(c0, c1, value) [special_hash(c0, c1)] = value
663 CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
664 CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
665 CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
666 CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
667 CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
668 CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
669 CODE('<', '=', SPECIAL_LTE), /* 0c */
670 CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
671 CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
672 CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
673 CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
674 CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
675 CODE('=', '=', SPECIAL_EQUAL), /* 15 */
676 CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
677 CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
678 CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
679 CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
680 CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
681 CODE('>', '=', SPECIAL_GTE), /* 1d */
682 CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
683 CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
684 #undef CODE
687 static int get_one_special(int c, stream_t *stream)
689 struct token *token;
690 int next, value, i;
692 next = nextchar(stream);
695 * Check for numbers, strings, character constants, and comments
697 switch (c) {
698 case '.':
699 if (next >= '0' && next <= '9')
700 return get_one_number(c, next, stream);
701 break;
702 case '"':
703 return get_string_token(next, stream);
704 case '\'':
705 return get_char_token(next, stream);
706 case '/':
707 if (next == '/')
708 return drop_stream_eoln(stream);
709 if (next == '*')
710 return drop_stream_comment(stream);
714 * Check for combinations
716 value = c;
717 if (cclass[next + 1] & ValidSecond) {
718 i = special_hash(c, next);
719 if (hash_results[i][0] == c && hash_results[i][1] == next) {
720 value = code[i];
721 next = nextchar(stream);
722 if (value >= SPECIAL_LEFTSHIFT &&
723 next == "==."[value - SPECIAL_LEFTSHIFT]) {
724 value += 3;
725 next = nextchar(stream);
730 /* Pass it on.. */
731 token = stream->token;
732 token_type(token) = TOKEN_SPECIAL;
733 token->special = value;
734 add_token(stream);
735 return next;
738 #define IDENT_HASH_BITS (13)
739 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
740 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
742 #define ident_hash_init(c) (c)
743 #define ident_hash_add(oldhash,c) ((oldhash)*11 + (c))
744 #define ident_hash_end(hash) ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
746 static struct ident *hash_table[IDENT_HASH_SIZE];
747 static int ident_hit, ident_miss, idents;
749 void show_identifier_stats(void)
751 int i;
752 int distribution[100];
754 fprintf(stderr, "identifiers: %d hits, %d misses\n",
755 ident_hit, ident_miss);
757 for (i = 0; i < 100; i++)
758 distribution[i] = 0;
760 for (i = 0; i < IDENT_HASH_SIZE; i++) {
761 struct ident * ident = hash_table[i];
762 int count = 0;
764 while (ident) {
765 count++;
766 ident = ident->next;
768 if (count > 99)
769 count = 99;
770 distribution[count]++;
773 for (i = 0; i < 100; i++) {
774 if (distribution[i])
775 fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
779 static struct ident *alloc_ident(const char *name, int len)
781 struct ident *ident = __alloc_ident(len);
782 ident->symbols = NULL;
783 ident->len = len;
784 ident->tainted = 0;
785 memcpy(ident->name, name, len);
786 return ident;
789 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
791 ident->next = hash_table[hash];
792 hash_table[hash] = ident;
793 ident_miss++;
794 return ident;
797 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
799 struct ident *ident;
800 struct ident **p;
802 p = &hash_table[hash];
803 while ((ident = *p) != NULL) {
804 if (ident->len == (unsigned char) len) {
805 if (strncmp(name, ident->name, len) != 0)
806 goto next;
808 ident_hit++;
809 return ident;
811 next:
812 //misses++;
813 p = &ident->next;
815 ident = alloc_ident(name, len);
816 *p = ident;
817 ident->next = NULL;
818 ident_miss++;
819 idents++;
820 return ident;
823 static unsigned long hash_name(const char *name, int len)
825 unsigned long hash;
826 const unsigned char *p = (const unsigned char *)name;
828 hash = ident_hash_init(*p++);
829 while (--len) {
830 unsigned int i = *p++;
831 hash = ident_hash_add(hash, i);
833 return ident_hash_end(hash);
836 struct ident *hash_ident(struct ident *ident)
838 return insert_hash(ident, hash_name(ident->name, ident->len));
841 struct ident *built_in_ident(const char *name)
843 int len = strlen(name);
844 return create_hashed_ident(name, len, hash_name(name, len));
847 struct token *built_in_token(int stream, const char *name)
849 struct token *token;
851 token = __alloc_token(0);
852 token->pos.stream = stream;
853 token_type(token) = TOKEN_IDENT;
854 token->ident = built_in_ident(name);
855 return token;
858 static int get_one_identifier(int c, stream_t *stream)
860 struct token *token;
861 struct ident *ident;
862 unsigned long hash;
863 char buf[256];
864 int len = 1;
865 int next;
867 hash = ident_hash_init(c);
868 buf[0] = c;
869 for (;;) {
870 next = nextchar(stream);
871 if (!(cclass[next + 1] & (Letter | Digit)))
872 break;
873 if (len >= sizeof(buf))
874 break;
875 hash = ident_hash_add(hash, next);
876 buf[len] = next;
877 len++;
879 hash = ident_hash_end(hash);
881 ident = create_hashed_ident(buf, len, hash);
883 /* Pass it on.. */
884 token = stream->token;
885 token_type(token) = TOKEN_IDENT;
886 token->ident = ident;
887 add_token(stream);
888 return next;
891 static int get_one_token(int c, stream_t *stream)
893 long class = cclass[c + 1];
894 if (class & Digit)
895 return get_one_number(c, nextchar(stream), stream);
896 if (class & Letter)
897 return get_one_identifier(c, stream);
898 return get_one_special(c, stream);
901 static struct token *setup_stream(stream_t *stream, int idx, int fd,
902 unsigned char *buf, unsigned int buf_size)
904 struct token *begin;
906 stream->nr = idx;
907 stream->line = 1;
908 stream->newline = 1;
909 stream->whitespace = 0;
910 stream->pos = 0;
912 stream->token = NULL;
913 stream->fd = fd;
914 stream->offset = 0;
915 stream->size = buf_size;
916 stream->buffer = buf;
918 begin = alloc_token(stream);
919 token_type(begin) = TOKEN_STREAMBEGIN;
920 stream->tokenlist = &begin->next;
921 return begin;
924 static struct token *tokenize_stream(stream_t *stream)
926 int c = nextchar(stream);
927 while (c != EOF) {
928 if (!isspace(c)) {
929 struct token *token = alloc_token(stream);
930 stream->token = token;
931 stream->newline = 0;
932 stream->whitespace = 0;
933 c = get_one_token(c, stream);
934 continue;
936 stream->whitespace = 1;
937 c = nextchar(stream);
939 return mark_eof(stream);
942 struct token * tokenize_buffer(void *buffer, unsigned long size, struct token **endtoken)
944 stream_t stream;
945 struct token *begin;
947 begin = setup_stream(&stream, 0, -1, buffer, size);
948 *endtoken = tokenize_stream(&stream);
949 return begin;
952 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
954 struct token *begin, *end;
955 stream_t stream;
956 unsigned char buffer[BUFSIZE];
957 int idx;
959 idx = init_stream(name, fd, next_path);
960 if (idx < 0) {
961 // info(endtoken->pos, "File %s is const", name);
962 return endtoken;
965 begin = setup_stream(&stream, idx, fd, buffer, 0);
966 end = tokenize_stream(&stream);
967 if (endtoken)
968 end->next = endtoken;
969 return begin;