Take the rest of storage class keywords to parse.c
[smatch.git] / tokenize.c
blob6b67b96e67cc5473935aba44c7d9cf0c3a56b2d6
1 /*
2 * This is a really stupid C tokenizer. It doesn't do any include
3 * files or anything complex at all. That's the preprocessor.
5 * Copyright (C) 2003 Transmeta Corp.
6 * 2003 Linus Torvalds
8 * Licensed under the Open Software License version 1.1
9 */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdarg.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <unistd.h>
18 #include "lib.h"
19 #include "allocate.h"
20 #include "token.h"
21 #include "symbol.h"
23 #define EOF (-1)
25 int input_stream_nr = 0;
26 struct stream *input_streams;
27 static int input_streams_allocated;
28 unsigned int tabstop = 8;
30 #define BUFSIZE (8192)
32 typedef struct {
33 int fd, offset, size;
34 int pos, line, nr;
35 int newline, whitespace;
36 struct token **tokenlist;
37 struct token *token;
38 unsigned char *buffer;
39 } stream_t;
41 const char *stream_name(int stream)
43 if (stream < 0 || stream > input_stream_nr)
44 return "<bad stream>";
45 return input_streams[stream].name;
48 static struct position stream_pos(stream_t *stream)
50 struct position pos;
51 pos.type = 0;
52 pos.stream = stream->nr;
53 pos.newline = stream->newline;
54 pos.whitespace = stream->whitespace;
55 pos.pos = stream->pos;
56 pos.line = stream->line;
57 pos.noexpand = 0;
58 return pos;
61 const char *show_special(int val)
63 static char buffer[4];
65 buffer[0] = val;
66 buffer[1] = 0;
67 if (val >= SPECIAL_BASE)
68 strcpy(buffer, (char *) combinations[val - SPECIAL_BASE]);
69 return buffer;
72 const char *show_ident(const struct ident *ident)
74 static char buffer[256];
75 if (!ident)
76 return "<noident>";
77 sprintf(buffer, "%.*s", ident->len, ident->name);
78 return buffer;
81 static char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
83 if (isprint(c)) {
84 if (c == escape || c == '\\')
85 *ptr++ = '\\';
86 *ptr++ = c;
87 return ptr;
89 *ptr++ = '\\';
90 switch (c) {
91 case '\n':
92 *ptr++ = 'n';
93 return ptr;
94 case '\t':
95 *ptr++ = 't';
96 return ptr;
98 if (!isdigit(next))
99 return ptr + sprintf(ptr, "%o", c);
101 return ptr + sprintf(ptr, "%03o", c);
104 const char *show_string(const struct string *string)
106 static char buffer[4 * MAX_STRING + 3];
107 char *ptr;
108 int i;
110 if (!string->length)
111 return "<bad_string>";
112 ptr = buffer;
113 *ptr++ = '"';
114 for (i = 0; i < string->length-1; i++) {
115 const char *p = string->data + i;
116 ptr = charstr(ptr, p[0], '"', p[1]);
118 *ptr++ = '"';
119 *ptr = '\0';
120 return buffer;
123 const char *show_token(const struct token *token)
125 static char buffer[256];
127 if (!token)
128 return "<no token>";
129 switch (token_type(token)) {
130 case TOKEN_ERROR:
131 return "syntax error";
133 case TOKEN_EOF:
134 return "end-of-input";
136 case TOKEN_IDENT:
137 return show_ident(token->ident);
139 case TOKEN_STRING:
140 return show_string(token->string);
142 case TOKEN_NUMBER:
143 return token->number;
145 case TOKEN_SPECIAL:
146 return show_special(token->special);
148 case TOKEN_CHAR: {
149 char *ptr = buffer;
150 int c = token->character;
151 *ptr++ = '\'';
152 ptr = charstr(ptr, c, '\'', 0);
153 *ptr++ = '\'';
154 *ptr++ = '\0';
155 return buffer;
158 case TOKEN_STREAMBEGIN:
159 sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
160 return buffer;
162 case TOKEN_STREAMEND:
163 sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream));
164 return buffer;
166 default:
167 return "WTF???";
171 int init_stream(const char *name, int fd, const char **next_path)
173 int stream = input_stream_nr;
174 struct stream *current;
176 if (stream >= input_streams_allocated) {
177 int newalloc = stream * 4 / 3 + 10;
178 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
179 if (!input_streams)
180 die("Unable to allocate more streams space");
181 input_streams_allocated = newalloc;
183 current = input_streams + stream;
184 memset(current, 0, sizeof(*current));
185 current->name = name;
186 current->fd = fd;
187 current->next_path = next_path;
188 current->path = NULL;
189 current->constant = CONSTANT_FILE_MAYBE;
190 input_stream_nr = stream+1;
191 return stream;
194 static struct token * alloc_token(stream_t *stream)
196 struct token *token = __alloc_token(0);
197 token->pos = stream_pos(stream);
198 return token;
202 * Argh... That was surprisingly messy - handling '\r' complicates the
203 * things a _lot_.
205 static int nextchar_slow(stream_t *stream)
207 int offset = stream->offset;
208 int size = stream->size;
209 int c;
210 int spliced = 0, had_cr, had_backslash, complain;
212 restart:
213 had_cr = had_backslash = complain = 0;
215 repeat:
216 if (offset >= size) {
217 if (stream->fd < 0)
218 goto got_eof;
219 size = read(stream->fd, stream->buffer, BUFSIZE);
220 if (size <= 0)
221 goto got_eof;
222 stream->size = size;
223 stream->offset = offset = 0;
226 c = stream->buffer[offset++];
228 if (had_cr && c != '\n')
229 complain = 1;
231 if (c == '\r') {
232 had_cr = 1;
233 goto repeat;
236 stream->pos += (c == '\t') ? (tabstop - stream->pos % tabstop) : 1;
238 if (c == '\n') {
239 stream->line++;
240 stream->pos = 0;
243 if (!had_backslash) {
244 if (c == '\\') {
245 had_backslash = 1;
246 goto repeat;
248 if (c == '\n')
249 stream->newline = 1;
250 } else {
251 if (c == '\n') {
252 if (complain)
253 warning(stream_pos(stream), "non-ASCII data stream");
254 spliced = 1;
255 goto restart;
257 stream->pos--;
258 offset--;
259 c = '\\';
262 out:
263 stream->offset = offset;
264 if (complain)
265 warning(stream_pos(stream), "non-ASCII data stream");
267 return c;
269 got_eof:
270 if (had_backslash) {
271 c = '\\';
272 goto out;
274 if (stream->pos)
275 warning(stream_pos(stream), "no newline at end of file");
276 else if (had_cr)
277 warning(stream_pos(stream), "non-ASCII data stream");
278 else if (spliced)
279 warning(stream_pos(stream), "backslash-newline at end of file");
280 return EOF;
284 * We want that as light as possible while covering all normal cases.
285 * Slow path (including the logics with line-splicing and EOF sanity
286 * checks) is in nextchar_slow().
288 static inline int nextchar(stream_t *stream)
290 int offset = stream->offset;
292 if (offset < stream->size) {
293 int c = stream->buffer[offset++];
294 static const char special[256] = {
295 ['\t'] = 1, ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
297 if (!special[c]) {
298 stream->offset = offset;
299 stream->pos++;
300 return c;
303 return nextchar_slow(stream);
306 struct token eof_token_entry;
308 static struct token *mark_eof(stream_t *stream)
310 struct token *end;
312 end = alloc_token(stream);
313 token_type(end) = TOKEN_STREAMEND;
314 end->pos.newline = 1;
316 eof_token_entry.next = &eof_token_entry;
317 eof_token_entry.pos.newline = 1;
319 end->next = &eof_token_entry;
320 *stream->tokenlist = end;
321 stream->tokenlist = NULL;
322 return end;
325 static void add_token(stream_t *stream)
327 struct token *token = stream->token;
329 stream->token = NULL;
330 token->next = NULL;
331 *stream->tokenlist = token;
332 stream->tokenlist = &token->next;
335 static void drop_token(stream_t *stream)
337 stream->newline |= stream->token->pos.newline;
338 stream->whitespace |= stream->token->pos.whitespace;
339 stream->token = NULL;
342 enum {
343 Letter = 1,
344 Digit = 2,
345 Hex = 4,
346 Exp = 8,
347 Dot = 16,
348 ValidSecond = 32,
351 static const long cclass[257] = {
352 ['0' + 1 ... '9' + 1] = Digit | Hex,
353 ['A' + 1 ... 'D' + 1] = Letter | Hex,
354 ['E' + 1] = Letter | Hex | Exp,
355 ['F' + 1] = Letter | Hex,
356 ['G' + 1 ... 'O' + 1] = Letter,
357 ['P' + 1] = Letter | Exp,
358 ['Q' + 1 ... 'Z' + 1] = Letter,
359 ['a' + 1 ... 'd' + 1] = Letter | Hex,
360 ['e' + 1] = Letter | Hex | Exp,
361 ['f' + 1] = Letter | Hex,
362 ['g' + 1 ... 'o' + 1] = Letter,
363 ['p' + 1] = Letter | Exp,
364 ['q' + 1 ... 'z' + 1] = Letter,
365 ['_' + 1] = Letter,
366 ['.' + 1] = Dot | ValidSecond,
367 ['=' + 1] = ValidSecond,
368 ['+' + 1] = ValidSecond,
369 ['-' + 1] = ValidSecond,
370 ['>' + 1] = ValidSecond,
371 ['<' + 1] = ValidSecond,
372 ['&' + 1] = ValidSecond,
373 ['|' + 1] = ValidSecond,
374 ['#' + 1] = ValidSecond,
378 * pp-number:
379 * digit
380 * . digit
381 * pp-number digit
382 * pp-number identifier-nodigit
383 * pp-number e sign
384 * pp-number E sign
385 * pp-number p sign
386 * pp-number P sign
387 * pp-number .
389 static int get_one_number(int c, int next, stream_t *stream)
391 struct token *token;
392 static char buffer[4095];
393 char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
394 int len;
396 *p++ = c;
397 for (;;) {
398 long class = cclass[next + 1];
399 if (!(class & (Dot | Digit | Letter)))
400 break;
401 if (p != buffer_end)
402 *p++ = next;
403 next = nextchar(stream);
404 if (class & Exp) {
405 if (next == '-' || next == '+') {
406 if (p != buffer_end)
407 *p++ = next;
408 next = nextchar(stream);
413 if (p == buffer_end) {
414 sparse_error(stream_pos(stream), "number token exceeds %td characters",
415 buffer_end - buffer);
416 // Pretend we saw just "1".
417 buffer[0] = '1';
418 p = buffer + 1;
421 *p++ = 0;
422 len = p - buffer;
423 buf = __alloc_bytes(len);
424 memcpy(buf, buffer, len);
426 token = stream->token;
427 token_type(token) = TOKEN_NUMBER;
428 token->number = buf;
429 add_token(stream);
431 return next;
434 static int escapechar(int first, int type, stream_t *stream, int *valp)
436 int next, value;
438 next = nextchar(stream);
439 value = first;
441 if (first == '\n')
442 warning(stream_pos(stream), "Newline in string or character constant");
444 if (first == '\\' && next != EOF) {
445 value = next;
446 next = nextchar(stream);
447 if (value != type) {
448 switch (value) {
449 case 'a':
450 value = '\a';
451 break;
452 case 'b':
453 value = '\b';
454 break;
455 case 't':
456 value = '\t';
457 break;
458 case 'n':
459 value = '\n';
460 break;
461 case 'v':
462 value = '\v';
463 break;
464 case 'f':
465 value = '\f';
466 break;
467 case 'r':
468 value = '\r';
469 break;
470 case 'e':
471 value = '\e';
472 break;
473 case '\\':
474 break;
475 case '?':
476 break;
477 case '\'':
478 break;
479 case '"':
480 break;
481 case '\n':
482 warning(stream_pos(stream), "Newline in string or character constant");
483 break;
484 case '0'...'7': {
485 int nr = 2;
486 value -= '0';
487 while (next >= '0' && next <= '9') {
488 value = (value << 3) + (next-'0');
489 next = nextchar(stream);
490 if (!--nr)
491 break;
493 value &= 0xff;
494 break;
496 case 'x': {
497 int hex = hexval(next);
498 if (hex < 16) {
499 value = hex;
500 next = nextchar(stream);
501 while ((hex = hexval(next)) < 16) {
502 value = (value << 4) + hex;
503 next = nextchar(stream);
505 value &= 0xff;
506 break;
509 /* Fall through */
510 default:
511 warning(stream_pos(stream), "Unknown escape '%c'", value);
514 /* Mark it as escaped */
515 value |= 0x100;
517 *valp = value;
518 return next;
521 static int get_char_token(int next, stream_t *stream)
523 int value;
524 struct token *token;
526 next = escapechar(next, '\'', stream, &value);
527 if (value == '\'' || next != '\'') {
528 sparse_error(stream_pos(stream), "Bad character constant");
529 drop_token(stream);
530 return next;
533 token = stream->token;
534 token_type(token) = TOKEN_CHAR;
535 token->character = value & 0xff;
537 add_token(stream);
538 return nextchar(stream);
541 static int get_string_token(int next, stream_t *stream)
543 static char buffer[MAX_STRING];
544 struct string *string;
545 struct token *token;
546 int len = 0;
548 for (;;) {
549 int val;
550 next = escapechar(next, '"', stream, &val);
551 if (val == '"')
552 break;
553 if (next == EOF) {
554 warning(stream_pos(stream), "End of file in middle of string");
555 return next;
557 if (len < MAX_STRING)
558 buffer[len] = val;
559 len++;
562 if (len > MAX_STRING) {
563 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
564 len = MAX_STRING;
567 string = __alloc_string(len+1);
568 memcpy(string->data, buffer, len);
569 string->data[len] = '\0';
570 string->length = len+1;
572 /* Pass it on.. */
573 token = stream->token;
574 token_type(token) = TOKEN_STRING;
575 token->string = string;
576 add_token(stream);
578 return next;
581 static int drop_stream_eoln(stream_t *stream)
583 drop_token(stream);
584 for (;;) {
585 switch (nextchar(stream)) {
586 case EOF:
587 return EOF;
588 case '\n':
589 return nextchar(stream);
594 static int drop_stream_comment(stream_t *stream)
596 int newline;
597 int next;
598 drop_token(stream);
599 newline = stream->newline;
601 next = nextchar(stream);
602 for (;;) {
603 int curr = next;
604 if (curr == EOF) {
605 warning(stream_pos(stream), "End of file in the middle of a comment");
606 return curr;
608 next = nextchar(stream);
609 if (curr == '*' && next == '/')
610 break;
612 stream->newline = newline;
613 return nextchar(stream);
616 unsigned char combinations[][4] = COMBINATION_STRINGS;
618 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
620 /* hash function for two-character punctuators - all give unique values */
621 #define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
624 * note that we won't get false positives - special_hash(0,0) is 0 and
625 * entry 0 is filled (by +=), so all the missing ones are OK.
627 static unsigned char hash_results[32][2] = {
628 #define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
629 RES('+', '='), /* 00 */
630 RES('/', '='), /* 01 */
631 RES('^', '='), /* 05 */
632 RES('&', '&'), /* 07 */
633 RES('#', '#'), /* 08 */
634 RES('<', '<'), /* 0a */
635 RES('<', '='), /* 0c */
636 RES('!', '='), /* 0e */
637 RES('%', '='), /* 0f */
638 RES('-', '-'), /* 10 */
639 RES('-', '='), /* 11 */
640 RES('-', '>'), /* 13 */
641 RES('=', '='), /* 15 */
642 RES('&', '='), /* 17 */
643 RES('*', '='), /* 18 */
644 RES('.', '.'), /* 1a */
645 RES('+', '+'), /* 1b */
646 RES('|', '='), /* 1c */
647 RES('>', '='), /* 1d */
648 RES('|', '|'), /* 1e */
649 RES('>', '>') /* 1f */
650 #undef RES
652 static int code[32] = {
653 #define CODE(c0, c1, value) [special_hash(c0, c1)] = value
654 CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
655 CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
656 CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
657 CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
658 CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
659 CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
660 CODE('<', '=', SPECIAL_LTE), /* 0c */
661 CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
662 CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
663 CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
664 CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
665 CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
666 CODE('=', '=', SPECIAL_EQUAL), /* 15 */
667 CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
668 CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
669 CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
670 CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
671 CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
672 CODE('>', '=', SPECIAL_GTE), /* 1d */
673 CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
674 CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
675 #undef CODE
678 static int get_one_special(int c, stream_t *stream)
680 struct token *token;
681 int next, value, i;
683 next = nextchar(stream);
686 * Check for numbers, strings, character constants, and comments
688 switch (c) {
689 case '.':
690 if (next >= '0' && next <= '9')
691 return get_one_number(c, next, stream);
692 break;
693 case '"':
694 return get_string_token(next, stream);
695 case '\'':
696 return get_char_token(next, stream);
697 case '/':
698 if (next == '/')
699 return drop_stream_eoln(stream);
700 if (next == '*')
701 return drop_stream_comment(stream);
705 * Check for combinations
707 value = c;
708 if (cclass[next + 1] & ValidSecond) {
709 i = special_hash(c, next);
710 if (hash_results[i][0] == c && hash_results[i][1] == next) {
711 value = code[i];
712 next = nextchar(stream);
713 if (value >= SPECIAL_LEFTSHIFT &&
714 next == "==."[value - SPECIAL_LEFTSHIFT]) {
715 value += 3;
716 next = nextchar(stream);
721 /* Pass it on.. */
722 token = stream->token;
723 token_type(token) = TOKEN_SPECIAL;
724 token->special = value;
725 add_token(stream);
726 return next;
729 #define IDENT_HASH_BITS (13)
730 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
731 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
733 #define ident_hash_init(c) (c)
734 #define ident_hash_add(oldhash,c) ((oldhash)*11 + (c))
735 #define ident_hash_end(hash) ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
737 static struct ident *hash_table[IDENT_HASH_SIZE];
738 static int ident_hit, ident_miss, idents;
740 void show_identifier_stats(void)
742 int i;
743 int distribution[100];
745 fprintf(stderr, "identifiers: %d hits, %d misses\n",
746 ident_hit, ident_miss);
748 for (i = 0; i < 100; i++)
749 distribution[i] = 0;
751 for (i = 0; i < IDENT_HASH_SIZE; i++) {
752 struct ident * ident = hash_table[i];
753 int count = 0;
755 while (ident) {
756 count++;
757 ident = ident->next;
759 if (count > 99)
760 count = 99;
761 distribution[count]++;
764 for (i = 0; i < 100; i++) {
765 if (distribution[i])
766 fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
770 static struct ident *alloc_ident(const char *name, int len)
772 struct ident *ident = __alloc_ident(len);
773 ident->symbols = NULL;
774 ident->len = len;
775 ident->tainted = 0;
776 memcpy(ident->name, name, len);
777 return ident;
780 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
782 ident->next = hash_table[hash];
783 hash_table[hash] = ident;
784 ident_miss++;
785 return ident;
788 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
790 struct ident *ident;
791 struct ident **p;
793 p = &hash_table[hash];
794 while ((ident = *p) != NULL) {
795 if (ident->len == (unsigned char) len) {
796 if (strncmp(name, ident->name, len) != 0)
797 goto next;
799 ident_hit++;
800 return ident;
802 next:
803 //misses++;
804 p = &ident->next;
806 ident = alloc_ident(name, len);
807 *p = ident;
808 ident->next = NULL;
809 ident_miss++;
810 idents++;
811 return ident;
814 static unsigned long hash_name(const char *name, int len)
816 unsigned long hash;
817 const unsigned char *p = (const unsigned char *)name;
819 hash = ident_hash_init(*p++);
820 while (--len) {
821 unsigned int i = *p++;
822 hash = ident_hash_add(hash, i);
824 return ident_hash_end(hash);
827 struct ident *hash_ident(struct ident *ident)
829 return insert_hash(ident, hash_name(ident->name, ident->len));
832 struct ident *built_in_ident(const char *name)
834 int len = strlen(name);
835 return create_hashed_ident(name, len, hash_name(name, len));
838 struct token *built_in_token(int stream, const char *name)
840 struct token *token;
842 token = __alloc_token(0);
843 token->pos.stream = stream;
844 token_type(token) = TOKEN_IDENT;
845 token->ident = built_in_ident(name);
846 return token;
849 static int get_one_identifier(int c, stream_t *stream)
851 struct token *token;
852 struct ident *ident;
853 unsigned long hash;
854 char buf[256];
855 int len = 1;
856 int next;
858 hash = ident_hash_init(c);
859 buf[0] = c;
860 for (;;) {
861 next = nextchar(stream);
862 if (!(cclass[next + 1] & (Letter | Digit)))
863 break;
864 if (len >= sizeof(buf))
865 break;
866 hash = ident_hash_add(hash, next);
867 buf[len] = next;
868 len++;
870 hash = ident_hash_end(hash);
872 ident = create_hashed_ident(buf, len, hash);
874 /* Pass it on.. */
875 token = stream->token;
876 token_type(token) = TOKEN_IDENT;
877 token->ident = ident;
878 add_token(stream);
879 return next;
882 static int get_one_token(int c, stream_t *stream)
884 long class = cclass[c + 1];
885 if (class & Digit)
886 return get_one_number(c, nextchar(stream), stream);
887 if (class & Letter)
888 return get_one_identifier(c, stream);
889 return get_one_special(c, stream);
892 static struct token *setup_stream(stream_t *stream, int idx, int fd,
893 unsigned char *buf, unsigned int buf_size)
895 struct token *begin;
897 stream->nr = idx;
898 stream->line = 1;
899 stream->newline = 1;
900 stream->whitespace = 0;
901 stream->pos = 0;
903 stream->token = NULL;
904 stream->fd = fd;
905 stream->offset = 0;
906 stream->size = buf_size;
907 stream->buffer = buf;
909 begin = alloc_token(stream);
910 token_type(begin) = TOKEN_STREAMBEGIN;
911 stream->tokenlist = &begin->next;
912 return begin;
915 static struct token *tokenize_stream(stream_t *stream)
917 int c = nextchar(stream);
918 while (c != EOF) {
919 if (!isspace(c)) {
920 struct token *token = alloc_token(stream);
921 stream->token = token;
922 stream->newline = 0;
923 stream->whitespace = 0;
924 c = get_one_token(c, stream);
925 continue;
927 stream->whitespace = 1;
928 c = nextchar(stream);
930 return mark_eof(stream);
933 struct token * tokenize_buffer(void *buffer, unsigned long size, struct token **endtoken)
935 stream_t stream;
936 struct token *begin;
938 begin = setup_stream(&stream, 0, -1, buffer, size);
939 *endtoken = tokenize_stream(&stream);
940 return begin;
943 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
945 struct token *begin, *end;
946 stream_t stream;
947 unsigned char buffer[BUFSIZE];
948 int idx;
950 idx = init_stream(name, fd, next_path);
951 if (idx < 0) {
952 // info(endtoken->pos, "File %s is const", name);
953 return endtoken;
956 begin = setup_stream(&stream, idx, fd, buffer, 0);
957 end = tokenize_stream(&stream);
958 if (endtoken)
959 end->next = endtoken;
960 return begin;