tokenize.c

   1 /*
   2  * This is a really stupid C tokenizer. It doesn't do any include
   3  * files or anything complex at all. That's the pre-processor.
   4  *
   5  * Copyright (C) 2003 Transmeta Corp.
   6  *               2003 Linus Torvalds
   7  *
   8  *  Licensed under the Open Software License version 1.1
   9  */
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <stdarg.h>
  13 #include <stddef.h>
  14 #include <string.h>
  15 #include <ctype.h>
  16 #include <unistd.h>
  17
  18 #include "lib.h"
  19 #include "allocate.h"
  20 #include "token.h"
  21 #include "symbol.h"
  22
  23 #define EOF (-1)
  24
  25 int input_stream_nr = 0;
  26 struct stream *input_streams;
  27 static int input_streams_allocated;
  28
  29 #define BUFSIZE (8192)
  30
  31 typedef struct {
  32         int fd, offset, size;
  33         int pos, line, nr;
  34         int newline, whitespace;
  35         struct token **tokenlist;
  36         struct token *token;
  37         unsigned char *buffer;
  38 } stream_t;
  39
  40 const char *stream_name(int stream)
  41 {
  42         if (stream < 0 || stream > input_stream_nr)
  43                 return "<bad stream>";
  44         return input_streams[stream].name;
  45 }
  46
  47 static struct position stream_pos(stream_t *stream)
  48 {
  49         struct position pos;
  50         pos.type = 0;
  51         pos.stream = stream->nr;
  52         pos.newline = stream->newline;
  53         pos.whitespace = stream->whitespace;
  54         pos.pos = stream->pos;
  55         pos.line = stream->line;
  56         pos.noexpand = 0;
  57         return pos;
  58 }
  59
  60 const char *show_special(int val)
  61 {
  62         static const char *combinations[] = COMBINATION_STRINGS;
  63         static char buffer[4];
  64
  65         buffer[0] = val;
  66         buffer[1] = 0;
  67         if (val >= SPECIAL_BASE)
  68                 strcpy(buffer, combinations[val - SPECIAL_BASE]);
  69         return buffer;
  70 }
  71
  72 const char *show_ident(const struct ident *ident)
  73 {
  74         static char buffer[256];
  75         if (!ident)
  76                 return "<noident>";
  77         sprintf(buffer, "%.*s", ident->len, ident->name);
  78         return buffer;
  79 }
  80
  81 static char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
  82 {
  83         if (isprint(c)) {
  84                 if (c == escape || c == '\\')
  85                         *ptr++ = '\\';
  86                 *ptr++ = c;
  87                 return ptr;
  88         }
  89         *ptr++ = '\\';
  90         switch (c) {
  91         case '\n':
  92                 *ptr++ = 'n';
  93                 return ptr;
  94         case '\t':
  95                 *ptr++ = 't';
  96                 return ptr;
  97         }
  98         if (!isdigit(next))
  99                 return ptr + sprintf(ptr, "%o", c);
 100
 101         return ptr + sprintf(ptr, "%03o", c);
 102 }
 103
 104 const char *show_string(const struct string *string)
 105 {
 106         static char buffer[4 * MAX_STRING + 3];
 107         char *ptr;
 108         int i;
 109
 110         if (!string->length)
 111                 return "<bad_string>";
 112         ptr = buffer;
 113         *ptr++ = '"';
 114         for (i = 0; i < string->length-1; i++) {
 115                 const char *p = string->data + i;
 116                 ptr = charstr(ptr, p[0], '"', p[1]);
 117         }
 118         *ptr++ = '"';
 119         *ptr = '\0';
 120         return buffer;
 121 }
 122
 123 const char *show_token(const struct token *token)
 124 {
 125         static char buffer[256];
 126
 127         if (!token)
 128                 return "<no token>";
 129         switch (token_type(token)) {
 130         case TOKEN_ERROR:
 131                 return "syntax error";
 132
 133         case TOKEN_EOF:
 134                 return "end-of-input";
 135
 136         case TOKEN_IDENT:
 137                 return show_ident(token->ident);
 138
 139         case TOKEN_STRING:
 140                 return show_string(token->string);
 141
 142         case TOKEN_NUMBER:
 143                 return token->number;
 144
 145         case TOKEN_SPECIAL:
 146                 return show_special(token->special);
 147
 148         case TOKEN_CHAR: {
 149                 char *ptr = buffer;
 150                 int c = token->character;
 151                 *ptr++ = '\'';
 152                 ptr = charstr(ptr, c, '\'', 0);
 153                 *ptr++ = '\'';
 154                 *ptr++ = '\0';
 155                 return buffer;
 156         }
 157
 158         case TOKEN_STREAMBEGIN:
 159                 sprintf(buffer, "<beginning of '%s'>", stream_name(token->pos.stream));
 160                 return buffer;
 161
 162         case TOKEN_STREAMEND:
 163                 sprintf(buffer, "<end of '%s'>", stream_name(token->pos.stream));
 164                 return buffer;
 165
 166         default:
 167                 return "WTF???";
 168         }
 169 }
 170
 171 int init_stream(const char *name, int fd, const char **next_path)
 172 {
 173         int stream = input_stream_nr;
 174         struct stream *current;
 175
 176         if (stream >= input_streams_allocated) {
 177                 int newalloc = stream * 4 / 3 + 10;
 178                 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
 179                 if (!input_streams)
 180                         die("Unable to allocate more streams space");
 181                 input_streams_allocated = newalloc;
 182         }
 183         current = input_streams + stream;
 184         memset(current, 0, sizeof(*current));
 185         current->name = name;
 186         current->fd = fd;
 187         current->next_path = next_path;
 188         current->path = NULL;
 189         current->constant = CONSTANT_FILE_MAYBE;
 190         input_stream_nr = stream+1;
 191         return stream;
 192 }
 193
 194 static struct token * alloc_token(stream_t *stream)
 195 {
 196         struct token *token = __alloc_token(0);
 197         token->pos = stream_pos(stream);
 198         return token;
 199 }
 200
 201 /*
 202  *  Argh...  That was surprisingly messy - handling '\r' complicates the
 203  *  things a _lot_.
 204  */
 205 static int nextchar_slow(stream_t *stream)
 206 {
 207         int offset = stream->offset;
 208         int size = stream->size;
 209         int c;
 210         int spliced = 0, had_cr, had_backslash, complain;
 211
 212 restart:
 213         had_cr = had_backslash = complain = 0;
 214
 215 repeat:
 216         if (offset >= size) {
 217                 size = read(stream->fd, stream->buffer, BUFSIZE);
 218                 if (size <= 0)
 219                         goto got_eof;
 220                 stream->size = size;
 221                 stream->offset = offset = 0;
 222         }
 223
 224         c = stream->buffer[offset++];
 225
 226         if (had_cr && c != '\n')
 227                 complain = 1;
 228
 229         if (c == '\r') {
 230                 had_cr = 1;
 231                 goto repeat;
 232         }
 233
 234         stream->pos++;
 235
 236         if (c == '\n') {
 237                 stream->line++;
 238                 stream->pos = 0;
 239         }
 240
 241         if (!had_backslash) {
 242                 if (c == '\\') {
 243                         had_backslash = 1;
 244                         goto repeat;
 245                 }
 246                 if (c == '\n')
 247                         stream->newline = 1;
 248         } else {
 249                 if (c == '\n') {
 250                         if (complain)
 251                                 warning(stream_pos(stream), "non-ASCII data stream");
 252                         spliced = 1;
 253                         goto restart;
 254                 }
 255                 stream->pos--;
 256                 offset--;
 257                 c = '\\';
 258         }
 259
 260 out:
 261         stream->offset = offset;
 262         if (complain)
 263                 warning(stream_pos(stream), "non-ASCII data stream");
 264
 265         return c;
 266
 267 got_eof:
 268         if (had_backslash) {
 269                 c = '\\';
 270                 goto out;
 271         }
 272         if (stream->pos)
 273                 warning(stream_pos(stream), "no newline at end of file");
 274         else if (had_cr)
 275                 warning(stream_pos(stream), "non-ASCII data stream");
 276         else if (spliced)
 277                 warning(stream_pos(stream), "backslash-newline at end of file");
 278         return EOF;
 279 }
 280
 281 /*
 282  *  We want that as light as possible while covering all normal cases.
 283  *  Slow path (including the logics with line-splicing and EOF sanity
 284  *  checks) is in nextchar_slow().
 285  */
 286 static int nextchar(stream_t *stream)
 287 {
 288         int offset = stream->offset;
 289
 290         if (offset < stream->size) {
 291                 int c = stream->buffer[offset++];
 292                 static const char special[256] = {
 293                         ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
 294                 };
 295                 if (!special[c]) {
 296                         stream->offset = offset;
 297                         stream->pos++;
 298                         return c;
 299                 }
 300         }
 301         return nextchar_slow(stream);
 302 }
 303
 304 struct token eof_token_entry;
 305
 306 static void mark_eof(stream_t *stream, struct token *end_token)
 307 {
 308         struct token *end;
 309
 310         end = alloc_token(stream);
 311         token_type(end) = TOKEN_STREAMEND;
 312         end->pos.newline = 1;
 313
 314         eof_token_entry.next = &eof_token_entry;
 315         eof_token_entry.pos.newline = 1;
 316
 317         if (!end_token)
 318                 end_token =  &eof_token_entry;
 319         end->next = end_token;
 320         *stream->tokenlist = end;
 321         stream->tokenlist = NULL;
 322 }
 323
 324 static void add_token(stream_t *stream)
 325 {
 326         struct token *token = stream->token;
 327
 328         stream->token = NULL;
 329         token->next = NULL;
 330         *stream->tokenlist = token;
 331         stream->tokenlist = &token->next;
 332 }
 333
 334 static void drop_token(stream_t *stream)
 335 {
 336         stream->newline |= stream->token->pos.newline;
 337         stream->whitespace |= stream->token->pos.whitespace;
 338         stream->token = NULL;
 339 }
 340
 341 enum {
 342         Letter = 1,
 343         Digit = 2,
 344         Hex = 4,
 345         Exp = 8,
 346         Dot = 16,
 347         ValidSecond = 32,
 348 };
 349
 350 static const long cclass[257] = {
 351         ['0' + 1 ... '9' + 1] = Digit | Hex,
 352         ['A' + 1 ... 'D' + 1] = Letter | Hex,
 353         ['E' + 1] = Letter | Hex | Exp,
 354         ['F' + 1] = Letter | Hex,
 355         ['G' + 1 ... 'O' + 1] = Letter,
 356         ['P' + 1] = Letter | Exp,
 357         ['Q' + 1 ... 'Z' + 1] = Letter,
 358         ['a' + 1 ... 'd' + 1] = Letter | Hex,
 359         ['e' + 1] = Letter | Hex | Exp,
 360         ['f' + 1] = Letter | Hex,
 361         ['g' + 1 ... 'o' + 1] = Letter,
 362         ['p' + 1] = Letter | Exp,
 363         ['q' + 1 ... 'z' + 1] = Letter,
 364         ['_' + 1] = Letter,
 365         ['.' + 1] = Dot | ValidSecond,
 366         ['=' + 1] = ValidSecond,
 367         ['+' + 1] = ValidSecond,
 368         ['-' + 1] = ValidSecond,
 369         ['>' + 1] = ValidSecond,
 370         ['<' + 1] = ValidSecond,
 371         ['&' + 1] = ValidSecond,
 372         ['|' + 1] = ValidSecond,
 373         ['#' + 1] = ValidSecond,
 374 };
 375
 376 /*
 377  * pp-number:
 378  *      digit
 379  *      . digit
 380  *      pp-number digit
 381  *      pp-number identifier-nodigit
 382  *      pp-number e sign
 383  *      pp-number E sign
 384  *      pp-number p sign
 385  *      pp-number P sign
 386  *      pp-number .
 387  */
 388 static int get_one_number(int c, int next, stream_t *stream)
 389 {
 390         struct token *token;
 391         static char buffer[4095];
 392         char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
 393         int len;
 394
 395         *p++ = c;
 396         for (;;) {
 397                 long class =  cclass[next + 1];
 398                 if (!(class & (Dot | Digit | Letter)))
 399                         break;
 400                 if (p != buffer_end)
 401                         *p++ = next;
 402                 next = nextchar(stream);
 403                 if (class & Exp) {
 404                         if (next == '-' || next == '+') {
 405                                 if (p != buffer_end)
 406                                         *p++ = next;
 407                                 next = nextchar(stream);
 408                         }
 409                 }
 410         }
 411
 412         if (p == buffer_end) {
 413                 error(stream_pos(stream), "number token exceeds %td characters",
 414                       buffer_end - buffer);
 415                 // Pretend we saw just "1".
 416                 buffer[0] = '1';
 417                 p = buffer + 1;
 418         }
 419
 420         *p++ = 0;
 421         len = p - buffer;
 422         buf = __alloc_bytes(len);
 423         memcpy(buf, buffer, len);
 424
 425         token = stream->token;
 426         token_type(token) = TOKEN_NUMBER;
 427         token->number = buf;
 428         add_token(stream);
 429
 430         return next;
 431 }
 432
 433 static int escapechar(int first, int type, stream_t *stream, int *valp)
 434 {
 435         int next, value;
 436
 437         next = nextchar(stream);
 438         value = first;
 439
 440         if (first == '\n')
 441                 warning(stream_pos(stream), "Newline in string or character constant");
 442
 443         if (first == '\\' && next != EOF) {
 444                 value = next;
 445                 next = nextchar(stream);
 446                 if (value != type) {
 447                         switch (value) {
 448                         case 'a':
 449                                 value = '\a';
 450                                 break;
 451                         case 'b':
 452                                 value = '\b';
 453                                 break;
 454                         case 't':
 455                                 value = '\t';
 456                                 break;
 457                         case 'n':
 458                                 value = '\n';
 459                                 break;
 460                         case 'v':
 461                                 value = '\v';
 462                                 break;
 463                         case 'f':
 464                                 value = '\f';
 465                                 break;
 466                         case 'r':
 467                                 value = '\r';
 468                                 break;
 469                         case 'e':
 470                                 value = '\e';
 471                                 break;
 472                         case '\\':
 473                                 break;
 474                         case '\'':
 475                                 break;
 476                         case '"':
 477                                 break;
 478                         case '\n':
 479                                 warning(stream_pos(stream), "Newline in string or character constant");
 480                                 break;
 481                         case '0'...'7': {
 482                                 int nr = 2;
 483                                 value -= '0';
 484                                 while (next >= '0' && next <= '9') {
 485                                         value = (value << 3) + (next-'0');
 486                                         next = nextchar(stream);
 487                                         if (!--nr)
 488                                                 break;
 489                                 }
 490                                 value &= 0xff;
 491                                 break;
 492                         }
 493                         case 'x': {
 494                                 int hex = hexval(next);
 495                                 if (hex < 16) {
 496                                         value = hex;
 497                                         next = nextchar(stream);
 498                                         while ((hex = hexval(next)) < 16) {
 499                                                 value = (value << 4) + hex;
 500                                                 next = nextchar(stream);
 501                                         }
 502                                         value &= 0xff;
 503                                         break;
 504                                 }
 505                         }
 506                         /* Fallthrough */
 507                         default:
 508                                 warning(stream_pos(stream), "Unknown escape '%c'", value);
 509                         }
 510                 }
 511                 /* Mark it as escaped */
 512                 value |= 0x100;
 513         }
 514         *valp = value;
 515         return next;
 516 }
 517
 518 static int get_char_token(int next, stream_t *stream)
 519 {
 520         int value;
 521         struct token *token;
 522
 523         next = escapechar(next, '\'', stream, &value);
 524         if (value == '\'' || next != '\'') {
 525                 warning(stream_pos(stream), "Bad character constant");
 526                 drop_token(stream);
 527                 return next;
 528         }
 529
 530         token = stream->token;
 531         token_type(token) = TOKEN_CHAR;
 532         token->character = value & 0xff;
 533
 534         add_token(stream);
 535         return nextchar(stream);
 536 }
 537
 538 static int get_string_token(int next, stream_t *stream)
 539 {
 540         static char buffer[MAX_STRING];
 541         struct string *string;
 542         struct token *token;
 543         int len = 0;
 544
 545         for (;;) {
 546                 int val;
 547                 next = escapechar(next, '"', stream, &val);
 548                 if (val == '"')
 549                         break;
 550                 if (next == EOF) {
 551                         warning(stream_pos(stream), "End of file in middle of string");
 552                         return next;
 553                 }
 554                 if (len < MAX_STRING)
 555                         buffer[len] = val;
 556                 len++;
 557         }
 558
 559         if (len > MAX_STRING) {
 560                 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 561                 len = MAX_STRING;
 562         }
 563
 564         string = __alloc_string(len+1);
 565         memcpy(string->data, buffer, len);
 566         string->data[len] = '\0';
 567         string->length = len+1;
 568
 569         /* Pass it on.. */
 570         token = stream->token;
 571         token_type(token) = TOKEN_STRING;
 572         token->string = string;
 573         add_token(stream);
 574
 575         return next;
 576 }
 577
 578 static int drop_stream_eoln(stream_t *stream)
 579 {
 580         int next = nextchar(stream);
 581         drop_token(stream);
 582         for (;;) {
 583                 int curr = next;
 584                 if (curr == EOF)
 585                         return next;
 586                 next = nextchar(stream);
 587                 if (curr == '\n')
 588                         return next;
 589         }
 590 }
 591
 592 static int drop_stream_comment(stream_t *stream)
 593 {
 594         int newline;
 595         int next;
 596         drop_token(stream);
 597         newline = stream->newline;
 598
 599         next = nextchar(stream);
 600         for (;;) {
 601                 int curr = next;
 602                 if (curr == EOF) {
 603                         warning(stream_pos(stream), "End of file in the middle of a comment");
 604                         return curr;
 605                 }
 606                 next = nextchar(stream);
 607                 if (curr == '*' && next == '/')
 608                         break;
 609         }
 610         stream->newline = newline;
 611         return nextchar(stream);
 612 }
 613
 614 unsigned char combinations[][3] = COMBINATION_STRINGS;
 615
 616 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
 617
 618 static int get_one_special(int c, stream_t *stream)
 619 {
 620         struct token *token;
 621         unsigned char c1, c2, c3;
 622         int next, value, i;
 623         unsigned char *comb;
 624
 625         next = nextchar(stream);
 626
 627         /*
 628          * Check for numbers, strings, character constants, and comments
 629          */
 630         switch (c) {
 631         case '.':
 632                 if (next >= '0' && next <= '9')
 633                         return get_one_number(c, next, stream);
 634                 break;
 635         case '"':
 636                 return get_string_token(next, stream);
 637         case '\'':
 638                 return get_char_token(next, stream);
 639         case '/':
 640                 if (next == '/')
 641                         return drop_stream_eoln(stream);
 642                 if (next == '*')
 643                         return drop_stream_comment(stream);
 644         }
 645
 646         /*
 647          * Check for combinations
 648          */
 649         value = c;
 650         if (cclass[next + 1] & ValidSecond) {
 651                 comb = combinations[0];
 652                 c1 = c; c2 = next; c3 = 0;
 653                 for (i = 0; i < NR_COMBINATIONS; i++) {
 654                         if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
 655                                 value = i + SPECIAL_BASE;
 656                                 next = nextchar(stream);
 657                                 if (c3)
 658                                         break;
 659                                 c3 = next;
 660                         }
 661                         comb += 3;
 662                 }
 663         }
 664
 665         /* Pass it on.. */
 666         token = stream->token;
 667         token_type(token) = TOKEN_SPECIAL;
 668         token->special = value;
 669         add_token(stream);
 670         return next;
 671 }
 672
 673 #define IDENT_HASH_BITS (13)
 674 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
 675 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
 676
 677 #define ident_hash_init(c)              (c)
 678 #define ident_hash_add(oldhash,c)       ((oldhash)*11 + (c))
 679 #define ident_hash_end(hash)            ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
 680
 681 static struct ident *hash_table[IDENT_HASH_SIZE];
 682 static int ident_hit, ident_miss, idents;
 683
 684 void show_identifier_stats(void)
 685 {
 686         int i;
 687         int distribution[100];
 688
 689         fprintf(stderr, "identifiers: %d hits, %d misses\n",
 690                 ident_hit, ident_miss);
 691
 692         for (i = 0; i < 100; i++)
 693                 distribution[i] = 0;
 694
 695         for (i = 0; i < IDENT_HASH_SIZE; i++) {
 696                 struct ident * ident = hash_table[i];
 697                 int count = 0;
 698
 699                 while (ident) {
 700                         count++;
 701                         ident = ident->next;
 702                 }
 703                 if (count > 99)
 704                         count = 99;
 705                 distribution[count]++;
 706         }
 707
 708         for (i = 0; i < 100; i++) {
 709                 if (distribution[i])
 710                         fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
 711         }
 712 }
 713
 714 static struct ident *alloc_ident(const char *name, int len)
 715 {
 716         struct ident *ident = __alloc_ident(len);
 717         ident->symbols = NULL;
 718         ident->len = len;
 719         ident->tainted = 0;
 720         memcpy(ident->name, name, len);
 721         return ident;
 722 }
 723
 724 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
 725 {
 726         ident->next = hash_table[hash];
 727         hash_table[hash] = ident;
 728         ident_miss++;
 729         return ident;
 730 }
 731
 732 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
 733 {
 734         struct ident *ident;
 735         struct ident **p;
 736
 737         p = &hash_table[hash];
 738         while ((ident = *p) != NULL) {
 739                 if (ident->len == (unsigned char) len) {
 740                         const char *n = name;
 741                         const char *m = ident->name;
 742                         int l = len;
 743                         do {
 744                                 if (*n != *m)
 745                                         goto next;
 746                                 n++;
 747                                 m++;
 748                         } while (--l);
 749
 750                         ident_hit++;
 751                         return ident;
 752                 }
 753 next:
 754                 //misses++;
 755                 p = &ident->next;
 756         }
 757         ident = alloc_ident(name, len);
 758         *p = ident;
 759         ident->next = NULL;
 760         ident_miss++;
 761         idents++;
 762         return ident;
 763 }
 764
 765 static unsigned long hash_name(const char *name, int len)
 766 {
 767         unsigned long hash;
 768         const unsigned char *p = (const unsigned char *)name;
 769
 770         hash = ident_hash_init(*p++);
 771         while (--len) {
 772                 unsigned int i = *p++;
 773                 hash = ident_hash_add(hash, i);
 774         }
 775         return ident_hash_end(hash);
 776 }
 777
 778 struct ident *hash_ident(struct ident *ident)
 779 {
 780         return insert_hash(ident, hash_name(ident->name, ident->len));
 781 }
 782
 783 struct ident *built_in_ident(const char *name)
 784 {
 785         int len = strlen(name);
 786         return create_hashed_ident(name, len, hash_name(name, len));
 787 }
 788
 789 struct token *built_in_token(int stream, const char *name)
 790 {
 791         struct token *token;
 792
 793         token = __alloc_token(0);
 794         token->pos.stream = stream;
 795         token_type(token) = TOKEN_IDENT;
 796         token->ident = built_in_ident(name);
 797         return token;
 798 }
 799
 800 static int get_one_identifier(int c, stream_t *stream)
 801 {
 802         struct token *token;
 803         struct ident *ident;
 804         unsigned long hash;
 805         char buf[256];
 806         int len = 1;
 807         int next;
 808
 809         hash = ident_hash_init(c);
 810         buf[0] = c;
 811         for (;;) {
 812                 next = nextchar(stream);
 813                 if (!(cclass[next + 1] & (Letter | Digit)))
 814                         break;
 815                 if (len >= sizeof(buf))
 816                         break;
 817                 hash = ident_hash_add(hash, next);
 818                 buf[len] = next;
 819                 len++;
 820         };
 821         hash = ident_hash_end(hash);
 822
 823         ident = create_hashed_ident(buf, len, hash);
 824
 825         /* Pass it on.. */
 826         token = stream->token;
 827         token_type(token) = TOKEN_IDENT;
 828         token->ident = ident;
 829         add_token(stream);
 830         return next;
 831 }
 832
 833 static int get_one_token(int c, stream_t *stream)
 834 {
 835         long class = cclass[c + 1];
 836         if (class & Digit)
 837                 return get_one_number(c, nextchar(stream), stream);
 838         if (class & Letter)
 839                 return get_one_identifier(c, stream);
 840         return get_one_special(c, stream);
 841 }
 842
 843 static struct token *setup_stream(stream_t *stream, int idx, int fd,
 844         unsigned char *buf, unsigned int buf_size)
 845 {
 846         struct token *begin;
 847
 848         stream->nr = idx;
 849         stream->line = 1;
 850         stream->newline = 1;
 851         stream->whitespace = 0;
 852         stream->pos = 0;
 853
 854         stream->token = NULL;
 855         stream->fd = fd;
 856         stream->offset = 0;
 857         stream->size = buf_size;
 858         stream->buffer = buf;
 859
 860         begin = alloc_token(stream);
 861         token_type(begin) = TOKEN_STREAMBEGIN;
 862         stream->tokenlist = &begin->next;
 863         return begin;
 864 }
 865
 866 static void tokenize_stream(stream_t *stream, struct token *endtoken)
 867 {
 868         int c = nextchar(stream);
 869         while (c != EOF) {
 870                 if (!isspace(c)) {
 871                         struct token *token = alloc_token(stream);
 872                         stream->token = token;
 873                         stream->newline = 0;
 874                         stream->whitespace = 0;
 875                         c = get_one_token(c, stream);
 876                         continue;
 877                 }
 878                 stream->whitespace = 1;
 879                 c = nextchar(stream);
 880         }
 881         mark_eof(stream, endtoken);
 882 }
 883
 884 struct token * tokenize_buffer(void *buffer, unsigned long size, struct token *endtoken)
 885 {
 886         stream_t stream;
 887         struct token *begin;
 888
 889         begin = setup_stream(&stream, 0, -1, buffer, size);
 890         tokenize_stream(&stream, endtoken);
 891         return begin;
 892 }
 893
 894 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
 895 {
 896         struct token *begin;
 897         stream_t stream;
 898         unsigned char buffer[BUFSIZE];
 899         int idx;
 900
 901         idx = init_stream(name, fd, next_path);
 902         if (idx < 0) {
 903                 // info(endtoken->pos, "File %s is const", name);
 904                 return endtoken;
 905         }
 906
 907         begin = setup_stream(&stream, idx, fd, buffer, 0);
 908         tokenize_stream(&stream, endtoken);
 909         return begin;
 910 }