new get_type_name function
[smatch.git] / tokenize.c
blobfbe4c5a525d4c5da5608af4e54cb99e379815bc8
1 /*
2 * This is a really stupid C tokenizer. It doesn't do any include
3 * files or anything complex at all. That's the preprocessor.
5 * Copyright (C) 2003 Transmeta Corp.
6 * 2003 Linus Torvalds
8 * Licensed under the Open Software License version 1.1
9 */
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <stdarg.h>
13 #include <stddef.h>
14 #include <string.h>
15 #include <ctype.h>
16 #include <unistd.h>
18 #include "lib.h"
19 #include "allocate.h"
20 #include "token.h"
21 #include "symbol.h"
23 #define EOF (-1)
25 int input_stream_nr = 0;
26 struct stream *input_streams;
27 static int input_streams_allocated;
29 #define BUFSIZE (8192)
31 typedef struct {
32 int fd, offset, size;
33 int pos, line, nr;
34 int newline, whitespace;
35 struct token **tokenlist;
36 struct token *token;
37 unsigned char *buffer;
38 } stream_t;
40 const char *stream_name(int stream)
42 if (stream < 0 || stream > input_stream_nr)
43 return "<bad stream>";
44 return input_streams[stream].name;
47 static struct position stream_pos(stream_t *stream)
49 struct position pos;
50 pos.type = 0;
51 pos.stream = stream->nr;
52 pos.newline = stream->newline;
53 pos.whitespace = stream->whitespace;
54 pos.pos = stream->pos;
55 pos.line = stream->line;
56 pos.noexpand = 0;
57 return pos;
60 const char *show_special(int val)
62 static char buffer[4];
64 buffer[0] = val;
65 buffer[1] = 0;
66 if (val >= SPECIAL_BASE)
67 strcpy(buffer, (char *) combinations[val - SPECIAL_BASE]);
68 return buffer;
71 const char *show_ident(const struct ident *ident)
73 static char buffer[256];
74 if (!ident)
75 return "<noident>";
76 sprintf(buffer, "%.*s", ident->len, ident->name);
77 return buffer;
80 static char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
82 if (isprint(c)) {
83 if (c == escape || c == '\\')
84 *ptr++ = '\\';
85 *ptr++ = c;
86 return ptr;
88 *ptr++ = '\\';
89 switch (c) {
90 case '\n':
91 *ptr++ = 'n';
92 return ptr;
93 case '\t':
94 *ptr++ = 't';
95 return ptr;
97 if (!isdigit(next))
98 return ptr + sprintf(ptr, "%o", c);
100 return ptr + sprintf(ptr, "%03o", c);
103 const char *show_string(const struct string *string)
105 static char buffer[4 * MAX_STRING + 3];
106 char *ptr;
107 int i;
109 if (!string->length)
110 return "<bad_string>";
111 ptr = buffer;
112 *ptr++ = '"';
113 for (i = 0; i < string->length-1; i++) {
114 const char *p = string->data + i;
115 ptr = charstr(ptr, p[0], '"', p[1]);
117 *ptr++ = '"';
118 *ptr = '\0';
119 return buffer;
122 const char *show_token(const struct token *token)
124 static char buffer[256];
126 if (!token)
127 return "<no token>";
128 switch (token_type(token)) {
129 case TOKEN_ERROR:
130 return "syntax error";
132 case TOKEN_EOF:
133 return "end-of-input";
135 case TOKEN_IDENT:
136 return show_ident(token->ident);
138 case TOKEN_STRING:
139 return show_string(token->string);
141 case TOKEN_NUMBER:
142 return token->number;
144 case TOKEN_SPECIAL:
145 return show_special(token->special);
147 case TOKEN_CHAR: {
148 char *ptr = buffer;
149 int c = token->character;
150 *ptr++ = '\'';
151 ptr = charstr(ptr, c, '\'', 0);
152 *ptr++ = '\'';
153 *ptr++ = '\0';
154 return buffer;
157 case TOKEN_STREAMBEGIN:
158 sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
159 return buffer;
161 case TOKEN_STREAMEND:
162 sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream));
163 return buffer;
165 default:
166 return "WTF???";
170 int init_stream(const char *name, int fd, const char **next_path)
172 int stream = input_stream_nr;
173 struct stream *current;
175 if (stream >= input_streams_allocated) {
176 int newalloc = stream * 4 / 3 + 10;
177 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
178 if (!input_streams)
179 die("Unable to allocate more streams space");
180 input_streams_allocated = newalloc;
182 current = input_streams + stream;
183 memset(current, 0, sizeof(*current));
184 current->name = name;
185 current->fd = fd;
186 current->next_path = next_path;
187 current->path = NULL;
188 current->constant = CONSTANT_FILE_MAYBE;
189 input_stream_nr = stream+1;
190 return stream;
193 static struct token * alloc_token(stream_t *stream)
195 struct token *token = __alloc_token(0);
196 token->pos = stream_pos(stream);
197 return token;
201 * Argh... That was surprisingly messy - handling '\r' complicates the
202 * things a _lot_.
204 static int nextchar_slow(stream_t *stream)
206 int offset = stream->offset;
207 int size = stream->size;
208 int c;
209 int spliced = 0, had_cr, had_backslash, complain;
211 restart:
212 had_cr = had_backslash = complain = 0;
214 repeat:
215 if (offset >= size) {
216 size = read(stream->fd, stream->buffer, BUFSIZE);
217 if (size <= 0)
218 goto got_eof;
219 stream->size = size;
220 stream->offset = offset = 0;
223 c = stream->buffer[offset++];
225 if (had_cr && c != '\n')
226 complain = 1;
228 if (c == '\r') {
229 had_cr = 1;
230 goto repeat;
233 stream->pos++;
235 if (c == '\n') {
236 stream->line++;
237 stream->pos = 0;
240 if (!had_backslash) {
241 if (c == '\\') {
242 had_backslash = 1;
243 goto repeat;
245 if (c == '\n')
246 stream->newline = 1;
247 } else {
248 if (c == '\n') {
249 if (complain)
250 warning(stream_pos(stream), "non-ASCII data stream");
251 spliced = 1;
252 goto restart;
254 stream->pos--;
255 offset--;
256 c = '\\';
259 out:
260 stream->offset = offset;
261 if (complain)
262 warning(stream_pos(stream), "non-ASCII data stream");
264 return c;
266 got_eof:
267 if (had_backslash) {
268 c = '\\';
269 goto out;
271 if (stream->pos)
272 warning(stream_pos(stream), "no newline at end of file");
273 else if (had_cr)
274 warning(stream_pos(stream), "non-ASCII data stream");
275 else if (spliced)
276 warning(stream_pos(stream), "backslash-newline at end of file");
277 return EOF;
281 * We want that as light as possible while covering all normal cases.
282 * Slow path (including the logics with line-splicing and EOF sanity
283 * checks) is in nextchar_slow().
285 static inline int nextchar(stream_t *stream)
287 int offset = stream->offset;
289 if (offset < stream->size) {
290 int c = stream->buffer[offset++];
291 static const char special[256] = {
292 ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
294 if (!special[c]) {
295 stream->offset = offset;
296 stream->pos++;
297 return c;
300 return nextchar_slow(stream);
303 struct token eof_token_entry;
305 static void mark_eof(stream_t *stream, struct token *end_token)
307 struct token *end;
309 end = alloc_token(stream);
310 token_type(end) = TOKEN_STREAMEND;
311 end->pos.newline = 1;
313 eof_token_entry.next = &eof_token_entry;
314 eof_token_entry.pos.newline = 1;
316 if (!end_token)
317 end_token = &eof_token_entry;
318 end->next = end_token;
319 *stream->tokenlist = end;
320 stream->tokenlist = NULL;
323 static void add_token(stream_t *stream)
325 struct token *token = stream->token;
327 stream->token = NULL;
328 token->next = NULL;
329 *stream->tokenlist = token;
330 stream->tokenlist = &token->next;
333 static void drop_token(stream_t *stream)
335 stream->newline |= stream->token->pos.newline;
336 stream->whitespace |= stream->token->pos.whitespace;
337 stream->token = NULL;
340 enum {
341 Letter = 1,
342 Digit = 2,
343 Hex = 4,
344 Exp = 8,
345 Dot = 16,
346 ValidSecond = 32,
349 static const long cclass[257] = {
350 ['0' + 1 ... '9' + 1] = Digit | Hex,
351 ['A' + 1 ... 'D' + 1] = Letter | Hex,
352 ['E' + 1] = Letter | Hex | Exp,
353 ['F' + 1] = Letter | Hex,
354 ['G' + 1 ... 'O' + 1] = Letter,
355 ['P' + 1] = Letter | Exp,
356 ['Q' + 1 ... 'Z' + 1] = Letter,
357 ['a' + 1 ... 'd' + 1] = Letter | Hex,
358 ['e' + 1] = Letter | Hex | Exp,
359 ['f' + 1] = Letter | Hex,
360 ['g' + 1 ... 'o' + 1] = Letter,
361 ['p' + 1] = Letter | Exp,
362 ['q' + 1 ... 'z' + 1] = Letter,
363 ['_' + 1] = Letter,
364 ['.' + 1] = Dot | ValidSecond,
365 ['=' + 1] = ValidSecond,
366 ['+' + 1] = ValidSecond,
367 ['-' + 1] = ValidSecond,
368 ['>' + 1] = ValidSecond,
369 ['<' + 1] = ValidSecond,
370 ['&' + 1] = ValidSecond,
371 ['|' + 1] = ValidSecond,
372 ['#' + 1] = ValidSecond,
376 * pp-number:
377 * digit
378 * . digit
379 * pp-number digit
380 * pp-number identifier-nodigit
381 * pp-number e sign
382 * pp-number E sign
383 * pp-number p sign
384 * pp-number P sign
385 * pp-number .
387 static int get_one_number(int c, int next, stream_t *stream)
389 struct token *token;
390 static char buffer[4095];
391 char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
392 int len;
394 *p++ = c;
395 for (;;) {
396 long class = cclass[next + 1];
397 if (!(class & (Dot | Digit | Letter)))
398 break;
399 if (p != buffer_end)
400 *p++ = next;
401 next = nextchar(stream);
402 if (class & Exp) {
403 if (next == '-' || next == '+') {
404 if (p != buffer_end)
405 *p++ = next;
406 next = nextchar(stream);
411 if (p == buffer_end) {
412 sparse_error(stream_pos(stream), "number token exceeds %td characters",
413 buffer_end - buffer);
414 // Pretend we saw just "1".
415 buffer[0] = '1';
416 p = buffer + 1;
419 *p++ = 0;
420 len = p - buffer;
421 buf = __alloc_bytes(len);
422 memcpy(buf, buffer, len);
424 token = stream->token;
425 token_type(token) = TOKEN_NUMBER;
426 token->number = buf;
427 add_token(stream);
429 return next;
432 static int escapechar(int first, int type, stream_t *stream, int *valp)
434 int next, value;
436 next = nextchar(stream);
437 value = first;
439 if (first == '\n')
440 warning(stream_pos(stream), "Newline in string or character constant");
442 if (first == '\\' && next != EOF) {
443 value = next;
444 next = nextchar(stream);
445 if (value != type) {
446 switch (value) {
447 case 'a':
448 value = '\a';
449 break;
450 case 'b':
451 value = '\b';
452 break;
453 case 't':
454 value = '\t';
455 break;
456 case 'n':
457 value = '\n';
458 break;
459 case 'v':
460 value = '\v';
461 break;
462 case 'f':
463 value = '\f';
464 break;
465 case 'r':
466 value = '\r';
467 break;
468 case 'e':
469 value = '\e';
470 break;
471 case '\\':
472 break;
473 case '?':
474 break;
475 case '\'':
476 break;
477 case '"':
478 break;
479 case '\n':
480 warning(stream_pos(stream), "Newline in string or character constant");
481 break;
482 case '0'...'7': {
483 int nr = 2;
484 value -= '0';
485 while (next >= '0' && next <= '9') {
486 value = (value << 3) + (next-'0');
487 next = nextchar(stream);
488 if (!--nr)
489 break;
491 value &= 0xff;
492 break;
494 case 'x': {
495 int hex = hexval(next);
496 if (hex < 16) {
497 value = hex;
498 next = nextchar(stream);
499 while ((hex = hexval(next)) < 16) {
500 value = (value << 4) + hex;
501 next = nextchar(stream);
503 value &= 0xff;
504 break;
507 /* Fall through */
508 default:
509 warning(stream_pos(stream), "Unknown escape '%c'", value);
512 /* Mark it as escaped */
513 value |= 0x100;
515 *valp = value;
516 return next;
519 static int get_char_token(int next, stream_t *stream)
521 int value;
522 struct token *token;
524 next = escapechar(next, '\'', stream, &value);
525 if (value == '\'' || next != '\'') {
526 sparse_error(stream_pos(stream), "Bad character constant");
527 drop_token(stream);
528 return next;
531 token = stream->token;
532 token_type(token) = TOKEN_CHAR;
533 token->character = value & 0xff;
535 add_token(stream);
536 return nextchar(stream);
539 static int get_string_token(int next, stream_t *stream)
541 static char buffer[MAX_STRING];
542 struct string *string;
543 struct token *token;
544 int len = 0;
546 for (;;) {
547 int val;
548 next = escapechar(next, '"', stream, &val);
549 if (val == '"')
550 break;
551 if (next == EOF) {
552 warning(stream_pos(stream), "End of file in middle of string");
553 return next;
555 if (len < MAX_STRING)
556 buffer[len] = val;
557 len++;
560 if (len > MAX_STRING) {
561 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
562 len = MAX_STRING;
565 string = __alloc_string(len+1);
566 memcpy(string->data, buffer, len);
567 string->data[len] = '\0';
568 string->length = len+1;
570 /* Pass it on.. */
571 token = stream->token;
572 token_type(token) = TOKEN_STRING;
573 token->string = string;
574 add_token(stream);
576 return next;
579 static int drop_stream_eoln(stream_t *stream)
581 int next = nextchar(stream);
582 drop_token(stream);
583 for (;;) {
584 int curr = next;
585 if (curr == EOF)
586 return next;
587 next = nextchar(stream);
588 if (curr == '\n')
589 return next;
593 static int drop_stream_comment(stream_t *stream)
595 int newline;
596 int next;
597 drop_token(stream);
598 newline = stream->newline;
600 next = nextchar(stream);
601 for (;;) {
602 int curr = next;
603 if (curr == EOF) {
604 warning(stream_pos(stream), "End of file in the middle of a comment");
605 return curr;
607 next = nextchar(stream);
608 if (curr == '*' && next == '/')
609 break;
611 stream->newline = newline;
612 return nextchar(stream);
615 unsigned char combinations[][4] = COMBINATION_STRINGS;
617 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
619 /* hash function for two-character punctuators - all give unique values */
620 #define special_hash(c0, c1) (((c0*8+c1*2)+((c0*8+c1*2)>>5))&31)
623 * note that we won't get false positives - special_hash(0,0) is 0 and
624 * entry 0 is filled (by +=), so all the missing ones are OK.
626 static unsigned char hash_results[32][2] = {
627 #define RES(c0, c1) [special_hash(c0, c1)] = {c0, c1}
628 RES('+', '='), /* 00 */
629 RES('/', '='), /* 01 */
630 RES('^', '='), /* 05 */
631 RES('&', '&'), /* 07 */
632 RES('#', '#'), /* 08 */
633 RES('<', '<'), /* 0a */
634 RES('<', '='), /* 0c */
635 RES('!', '='), /* 0e */
636 RES('%', '='), /* 0f */
637 RES('-', '-'), /* 10 */
638 RES('-', '='), /* 11 */
639 RES('-', '>'), /* 13 */
640 RES('=', '='), /* 15 */
641 RES('&', '='), /* 17 */
642 RES('*', '='), /* 18 */
643 RES('.', '.'), /* 1a */
644 RES('+', '+'), /* 1b */
645 RES('|', '='), /* 1c */
646 RES('>', '='), /* 1d */
647 RES('|', '|'), /* 1e */
648 RES('>', '>') /* 1f */
649 #undef RES
651 static int code[32] = {
652 #define CODE(c0, c1, value) [special_hash(c0, c1)] = value
653 CODE('+', '=', SPECIAL_ADD_ASSIGN), /* 00 */
654 CODE('/', '=', SPECIAL_DIV_ASSIGN), /* 01 */
655 CODE('^', '=', SPECIAL_XOR_ASSIGN), /* 05 */
656 CODE('&', '&', SPECIAL_LOGICAL_AND), /* 07 */
657 CODE('#', '#', SPECIAL_HASHHASH), /* 08 */
658 CODE('<', '<', SPECIAL_LEFTSHIFT), /* 0a */
659 CODE('<', '=', SPECIAL_LTE), /* 0c */
660 CODE('!', '=', SPECIAL_NOTEQUAL), /* 0e */
661 CODE('%', '=', SPECIAL_MOD_ASSIGN), /* 0f */
662 CODE('-', '-', SPECIAL_DECREMENT), /* 10 */
663 CODE('-', '=', SPECIAL_SUB_ASSIGN), /* 11 */
664 CODE('-', '>', SPECIAL_DEREFERENCE), /* 13 */
665 CODE('=', '=', SPECIAL_EQUAL), /* 15 */
666 CODE('&', '=', SPECIAL_AND_ASSIGN), /* 17 */
667 CODE('*', '=', SPECIAL_MUL_ASSIGN), /* 18 */
668 CODE('.', '.', SPECIAL_DOTDOT), /* 1a */
669 CODE('+', '+', SPECIAL_INCREMENT), /* 1b */
670 CODE('|', '=', SPECIAL_OR_ASSIGN), /* 1c */
671 CODE('>', '=', SPECIAL_GTE), /* 1d */
672 CODE('|', '|', SPECIAL_LOGICAL_OR), /* 1e */
673 CODE('>', '>', SPECIAL_RIGHTSHIFT) /* 1f */
674 #undef CODE
677 static int get_one_special(int c, stream_t *stream)
679 struct token *token;
680 int next, value, i;
682 next = nextchar(stream);
685 * Check for numbers, strings, character constants, and comments
687 switch (c) {
688 case '.':
689 if (next >= '0' && next <= '9')
690 return get_one_number(c, next, stream);
691 break;
692 case '"':
693 return get_string_token(next, stream);
694 case '\'':
695 return get_char_token(next, stream);
696 case '/':
697 if (next == '/')
698 return drop_stream_eoln(stream);
699 if (next == '*')
700 return drop_stream_comment(stream);
704 * Check for combinations
706 value = c;
707 if (cclass[next + 1] & ValidSecond) {
708 i = special_hash(c, next);
709 if (hash_results[i][0] == c && hash_results[i][1] == next) {
710 value = code[i];
711 next = nextchar(stream);
712 if (value >= SPECIAL_LEFTSHIFT &&
713 next == "==."[value - SPECIAL_LEFTSHIFT]) {
714 value += 3;
715 next = nextchar(stream);
720 /* Pass it on.. */
721 token = stream->token;
722 token_type(token) = TOKEN_SPECIAL;
723 token->special = value;
724 add_token(stream);
725 return next;
728 #define IDENT_HASH_BITS (13)
729 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
730 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
732 #define ident_hash_init(c) (c)
733 #define ident_hash_add(oldhash,c) ((oldhash)*11 + (c))
734 #define ident_hash_end(hash) ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
736 static struct ident *hash_table[IDENT_HASH_SIZE];
737 static int ident_hit, ident_miss, idents;
739 void show_identifier_stats(void)
741 int i;
742 int distribution[100];
744 fprintf(stderr, "identifiers: %d hits, %d misses\n",
745 ident_hit, ident_miss);
747 for (i = 0; i < 100; i++)
748 distribution[i] = 0;
750 for (i = 0; i < IDENT_HASH_SIZE; i++) {
751 struct ident * ident = hash_table[i];
752 int count = 0;
754 while (ident) {
755 count++;
756 ident = ident->next;
758 if (count > 99)
759 count = 99;
760 distribution[count]++;
763 for (i = 0; i < 100; i++) {
764 if (distribution[i])
765 fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
769 static struct ident *alloc_ident(const char *name, int len)
771 struct ident *ident = __alloc_ident(len);
772 ident->symbols = NULL;
773 ident->len = len;
774 ident->tainted = 0;
775 memcpy(ident->name, name, len);
776 return ident;
779 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
781 ident->next = hash_table[hash];
782 hash_table[hash] = ident;
783 ident_miss++;
784 return ident;
787 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
789 struct ident *ident;
790 struct ident **p;
792 p = &hash_table[hash];
793 while ((ident = *p) != NULL) {
794 if (ident->len == (unsigned char) len) {
795 const char *n = name;
796 const char *m = ident->name;
797 int l = len;
798 do {
799 if (*n != *m)
800 goto next;
801 n++;
802 m++;
803 } while (--l);
805 ident_hit++;
806 return ident;
808 next:
809 //misses++;
810 p = &ident->next;
812 ident = alloc_ident(name, len);
813 *p = ident;
814 ident->next = NULL;
815 ident_miss++;
816 idents++;
817 return ident;
820 static unsigned long hash_name(const char *name, int len)
822 unsigned long hash;
823 const unsigned char *p = (const unsigned char *)name;
825 hash = ident_hash_init(*p++);
826 while (--len) {
827 unsigned int i = *p++;
828 hash = ident_hash_add(hash, i);
830 return ident_hash_end(hash);
833 struct ident *hash_ident(struct ident *ident)
835 return insert_hash(ident, hash_name(ident->name, ident->len));
838 struct ident *built_in_ident(const char *name)
840 int len = strlen(name);
841 return create_hashed_ident(name, len, hash_name(name, len));
844 struct token *built_in_token(int stream, const char *name)
846 struct token *token;
848 token = __alloc_token(0);
849 token->pos.stream = stream;
850 token_type(token) = TOKEN_IDENT;
851 token->ident = built_in_ident(name);
852 return token;
855 static int get_one_identifier(int c, stream_t *stream)
857 struct token *token;
858 struct ident *ident;
859 unsigned long hash;
860 char buf[256];
861 int len = 1;
862 int next;
864 hash = ident_hash_init(c);
865 buf[0] = c;
866 for (;;) {
867 next = nextchar(stream);
868 if (!(cclass[next + 1] & (Letter | Digit)))
869 break;
870 if (len >= sizeof(buf))
871 break;
872 hash = ident_hash_add(hash, next);
873 buf[len] = next;
874 len++;
876 hash = ident_hash_end(hash);
878 ident = create_hashed_ident(buf, len, hash);
880 /* Pass it on.. */
881 token = stream->token;
882 token_type(token) = TOKEN_IDENT;
883 token->ident = ident;
884 add_token(stream);
885 return next;
888 static int get_one_token(int c, stream_t *stream)
890 long class = cclass[c + 1];
891 if (class & Digit)
892 return get_one_number(c, nextchar(stream), stream);
893 if (class & Letter)
894 return get_one_identifier(c, stream);
895 return get_one_special(c, stream);
898 static struct token *setup_stream(stream_t *stream, int idx, int fd,
899 unsigned char *buf, unsigned int buf_size)
901 struct token *begin;
903 stream->nr = idx;
904 stream->line = 1;
905 stream->newline = 1;
906 stream->whitespace = 0;
907 stream->pos = 0;
909 stream->token = NULL;
910 stream->fd = fd;
911 stream->offset = 0;
912 stream->size = buf_size;
913 stream->buffer = buf;
915 begin = alloc_token(stream);
916 token_type(begin) = TOKEN_STREAMBEGIN;
917 stream->tokenlist = &begin->next;
918 return begin;
921 static void tokenize_stream(stream_t *stream, struct token *endtoken)
923 int c = nextchar(stream);
924 while (c != EOF) {
925 if (!isspace(c)) {
926 struct token *token = alloc_token(stream);
927 stream->token = token;
928 stream->newline = 0;
929 stream->whitespace = 0;
930 c = get_one_token(c, stream);
931 continue;
933 stream->whitespace = 1;
934 c = nextchar(stream);
936 mark_eof(stream, endtoken);
939 struct token * tokenize_buffer(void *buffer, unsigned long size, struct token *endtoken)
941 stream_t stream;
942 struct token *begin;
944 begin = setup_stream(&stream, 0, -1, buffer, size);
945 tokenize_stream(&stream, endtoken);
946 return begin;
949 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
951 struct token *begin;
952 stream_t stream;
953 unsigned char buffer[BUFSIZE];
954 int idx;
956 idx = init_stream(name, fd, next_path);
957 if (idx < 0) {
958 // info(endtoken->pos, "File %s is const", name);
959 return endtoken;
962 begin = setup_stream(&stream, idx, fd, buffer, 0);
963 tokenize_stream(&stream, endtoken);
964 return begin;