tokenize.c

   1 /*
   2  * This is a really stupid C tokenizer. It doesn't do any include
   3  * files or anything complex at all. That's the pre-processor.
   4  *
   5  * Copyright (C) 2003 Transmeta Corp.
   6  *               2003 Linus Torvalds
   7  *
   8  *  Licensed under the Open Software License version 1.1
   9  */
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <stdarg.h>
  13 #include <stddef.h>
  14 #include <string.h>
  15 #include <ctype.h>
  16 #include <unistd.h>
  17 #include <sys/stat.h>
  18
  19 #include "lib.h"
  20 #include "token.h"
  21 #include "symbol.h"
  22
  23 #define EOF (-1)
  24
  25 int input_stream_nr = 0;
  26 struct stream *input_streams;
  27 static int input_streams_allocated;
  28
  29 #define BUFSIZE (8192)
  30
  31 typedef struct {
  32         int fd, offset, size;
  33         int pos, line, nr;
  34         int newline, whitespace;
  35         struct token **tokenlist;
  36         struct token *token;
  37         unsigned char *buffer;
  38 } stream_t;
  39
  40 struct position stream_pos(stream_t *stream)
  41 {
  42         struct position pos;
  43         pos.type = 0;
  44         pos.stream = stream->nr;
  45         pos.newline = stream->newline;
  46         pos.whitespace = stream->whitespace;
  47         pos.pos = stream->pos;
  48         pos.line = stream->line;
  49         pos.noexpand = 0;
  50         return pos;
  51 }
  52
  53 const char *show_special(int val)
  54 {
  55         static const char *combinations[] = COMBINATION_STRINGS;
  56         static char buffer[4];
  57
  58         buffer[0] = val;
  59         buffer[1] = 0;
  60         if (val >= SPECIAL_BASE)
  61                 strcpy(buffer, combinations[val - SPECIAL_BASE]);
  62         return buffer;
  63 }
  64
  65 const char *show_ident(const struct ident *ident)
  66 {
  67         static char buffer[256];
  68         if (!ident)
  69                 return "<noident>";
  70         sprintf(buffer, "%.*s", ident->len, ident->name);
  71         return buffer;
  72 }
  73
  74 char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
  75 {
  76         if (isprint(c)) {
  77                 if (c == escape || c == '\\')
  78                         *ptr++ = '\\';
  79                 *ptr++ = c;
  80                 return ptr;
  81         }
  82         *ptr++ = '\\';
  83         switch (c) {
  84         case '\n':
  85                 *ptr++ = 'n';
  86                 return ptr;
  87         case '\t':
  88                 *ptr++ = 't';
  89                 return ptr;
  90         }
  91         if (!isdigit(next))
  92                 return ptr + sprintf(ptr, "%o", c);
  93
  94         return ptr + sprintf(ptr, "%03o", c);
  95 }
  96
  97 const char *show_string(const struct string *string)
  98 {
  99         static char buffer[4 * MAX_STRING + 3];
 100         char *ptr;
 101         int i;
 102
 103         if (!string->length)
 104                 return "<bad_string>";
 105         ptr = buffer;
 106         *ptr++ = '"';
 107         for (i = 0; i < string->length-1; i++) {
 108                 const unsigned char *p = string->data + i;
 109                 ptr = charstr(ptr, p[0], '"', p[1]);
 110         }
 111         *ptr++ = '"';
 112         *ptr = '\0';
 113         return buffer;
 114 }
 115
 116 const char *show_token(const struct token *token)
 117 {
 118         static char buffer[256];
 119
 120         if (!token)
 121                 return "<no token>";
 122         switch (token_type(token)) {
 123         case TOKEN_ERROR:
 124                 return "syntax error";
 125
 126         case TOKEN_EOF:
 127                 return "end-of-input";
 128
 129         case TOKEN_IDENT:
 130                 return show_ident(token->ident);
 131
 132         case TOKEN_STRING:
 133                 return show_string(token->string);
 134
 135         case TOKEN_NUMBER:
 136                 return token->number;
 137
 138         case TOKEN_SPECIAL:
 139                 return show_special(token->special);
 140
 141         case TOKEN_CHAR: {
 142                 char *ptr = buffer;
 143                 int c = token->character;
 144                 *ptr++ = '\'';
 145                 ptr = charstr(ptr, c, '\'', 0);
 146                 *ptr++ = '\'';
 147                 *ptr++ = '\0';
 148                 return buffer;
 149         }
 150
 151         case TOKEN_STREAMBEGIN:
 152                 sprintf(buffer, "<beginning of '%s'>", (input_streams + token->pos.stream)->name);
 153                 return buffer;
 154
 155         case TOKEN_STREAMEND:
 156                 sprintf(buffer, "<end of '%s'>", (input_streams + token->pos.stream)->name);
 157                 return buffer;
 158
 159         default:
 160                 return "WTF???";
 161         }
 162 }
 163
 164 int init_stream(const char *name, int fd, const char **next_path)
 165 {
 166         int stream = input_stream_nr;
 167         struct stream *current;
 168         struct stat st;
 169
 170         if (stream >= input_streams_allocated) {
 171                 int newalloc = stream * 4 / 3 + 10;
 172                 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
 173                 if (!input_streams)
 174                         die("Unable to allocate more streams space");
 175                 input_streams_allocated = newalloc;
 176         }
 177         current = input_streams + stream;
 178         memset(current, 0, sizeof(*current));
 179         current->name = name;
 180         current->fd = fd;
 181         current->next_path = next_path;
 182         current->constant = CONSTANT_FILE_MAYBE;
 183         if (fd >= 0 && fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
 184                 int i;
 185
 186                 for (i = 0; i < stream; i++) {
 187                         struct stream *s = input_streams + i;
 188                         if (s->constant == CONSTANT_FILE_YES &&
 189                             identical_files(s, &st, name) &&
 190                             lookup_symbol(s->protect, NS_MACRO))
 191                                 return -1;
 192                 }
 193
 194                 current->dev = st.st_dev;
 195                 current->ino = st.st_ino;
 196         }
 197         input_stream_nr = stream+1;
 198         return stream;
 199 }
 200
 201 static struct token * alloc_token(stream_t *stream)
 202 {
 203         struct token *token = __alloc_token(0);
 204         token->pos = stream_pos(stream);
 205         return token;
 206 }
 207
 208 /*
 209  *  Argh...  That was surprisingly messy - handling '\r' complicates the
 210  *  things a _lot_.
 211  */
 212 static int nextchar_slow(stream_t *stream)
 213 {
 214         int offset = stream->offset;
 215         int size = stream->size;
 216         int c;
 217         int spliced = 0, had_cr, had_backslash, complain;
 218
 219 restart:
 220         had_cr = had_backslash = complain = 0;
 221
 222 repeat:
 223         if (offset >= size) {
 224                 size = read(stream->fd, stream->buffer, BUFSIZE);
 225                 if (size <= 0)
 226                         goto got_eof;
 227                 stream->size = size;
 228                 stream->offset = offset = 0;
 229         }
 230
 231         c = stream->buffer[offset++];
 232
 233         if (had_cr && c != '\n')
 234                 complain = 1;
 235
 236         if (c == '\r') {
 237                 had_cr = 1;
 238                 goto repeat;
 239         }
 240
 241         stream->pos++;
 242
 243         if (c == '\n') {
 244                 stream->line++;
 245                 stream->pos = 0;
 246         }
 247
 248         if (!had_backslash) {
 249                 if (c == '\\') {
 250                         had_backslash = 1;
 251                         goto repeat;
 252                 }
 253                 if (c == '\n')
 254                         stream->newline = 1;
 255         } else {
 256                 if (c == '\n') {
 257                         if (complain)
 258                                 warning(stream_pos(stream), "non-ASCII data stream");
 259                         spliced = 1;
 260                         goto restart;
 261                 }
 262                 stream->pos--;
 263                 offset--;
 264                 c = '\\';
 265         }
 266
 267 out:
 268         stream->offset = offset;
 269         if (complain)
 270                 warning(stream_pos(stream), "non-ASCII data stream");
 271
 272         return c;
 273
 274 got_eof:
 275         if (had_backslash) {
 276                 c = '\\';
 277                 goto out;
 278         }
 279         if (stream->pos)
 280                 warning(stream_pos(stream), "no newline at end of file");
 281         else if (had_cr)
 282                 warning(stream_pos(stream), "non-ASCII data stream");
 283         else if (spliced)
 284                 warning(stream_pos(stream), "backslash-newline at end of file");
 285         return EOF;
 286 }
 287
 288 /*
 289  *  We want that as light as possible while covering all normal cases.
 290  *  Slow path (including the logics with line-splicing and EOF sanity
 291  *  checks) is in nextchar_slow().
 292  */
 293 static int nextchar(stream_t *stream)
 294 {
 295         int offset = stream->offset;
 296
 297         if (offset < stream->size) {
 298                 int c = stream->buffer[offset++];
 299                 static const char special[256] = {
 300                         ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
 301                 };
 302                 if (!special[c]) {
 303                         stream->offset = offset;
 304                         stream->pos++;
 305                         return c;
 306                 }
 307         }
 308         return nextchar_slow(stream);
 309 }
 310
 311 struct token eof_token_entry;
 312
 313 static void mark_eof(stream_t *stream, struct token *end_token)
 314 {
 315         struct token *end;
 316
 317         end = alloc_token(stream);
 318         token_type(end) = TOKEN_STREAMEND;
 319         end->pos.newline = 1;
 320
 321         eof_token_entry.next = &eof_token_entry;
 322         eof_token_entry.pos.newline = 1;
 323
 324         if (!end_token)
 325                 end_token =  &eof_token_entry;
 326         end->next = end_token;
 327         *stream->tokenlist = end;
 328         stream->tokenlist = NULL;
 329 }
 330
 331 static void add_token(stream_t *stream)
 332 {
 333         struct token *token = stream->token;
 334
 335         stream->token = NULL;
 336         token->next = NULL;
 337         *stream->tokenlist = token;
 338         stream->tokenlist = &token->next;
 339 }
 340
 341 static void drop_token(stream_t *stream)
 342 {
 343         stream->newline |= stream->token->pos.newline;
 344         stream->whitespace |= stream->token->pos.whitespace;
 345         stream->token = NULL;
 346 }
 347
 348 enum {
 349         Letter = 1,
 350         Digit = 2,
 351         Hex = 4,
 352         Exp = 8,
 353         Dot = 16,
 354         ValidSecond = 32,
 355 };
 356
 357 static const long cclass[257] = {
 358         ['0' + 1 ... '9' + 1] = Digit | Hex,
 359         ['A' + 1 ... 'D' + 1] = Letter | Hex,
 360         ['E' + 1] = Letter | Hex | Exp,
 361         ['F' + 1] = Letter | Hex,
 362         ['G' + 1 ... 'O' + 1] = Letter,
 363         ['P' + 1] = Letter | Exp,
 364         ['Q' + 1 ... 'Z' + 1] = Letter,
 365         ['a' + 1 ... 'd' + 1] = Letter | Hex,
 366         ['e' + 1] = Letter | Hex | Exp,
 367         ['f' + 1] = Letter | Hex,
 368         ['g' + 1 ... 'o' + 1] = Letter,
 369         ['p' + 1] = Letter | Exp,
 370         ['q' + 1 ... 'z' + 1] = Letter,
 371         ['_' + 1] = Letter,
 372         ['.' + 1] = Dot | ValidSecond,
 373         ['=' + 1] = ValidSecond,
 374         ['+' + 1] = ValidSecond,
 375         ['-' + 1] = ValidSecond,
 376         ['>' + 1] = ValidSecond,
 377         ['<' + 1] = ValidSecond,
 378         ['&' + 1] = ValidSecond,
 379         ['|' + 1] = ValidSecond,
 380         ['#' + 1] = ValidSecond,
 381 };
 382
 383 /*
 384  * pp-number:
 385  *      digit
 386  *      . digit
 387  *      pp-number digit
 388  *      pp-number identifier-nodigit
 389  *      pp-number e sign
 390  *      pp-number E sign
 391  *      pp-number p sign
 392  *      pp-number P sign
 393  *      pp-number .
 394  */
 395 static int get_one_number(int c, int next, stream_t *stream)
 396 {
 397         struct token *token;
 398         static char buffer[4095];
 399         char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
 400         int len;
 401
 402         *p++ = c;
 403         for (;;) {
 404                 long class =  cclass[next + 1];
 405                 if (!(class & (Dot | Digit | Letter)))
 406                         break;
 407                 if (p != buffer_end)
 408                         *p++ = next;
 409                 next = nextchar(stream);
 410                 if (class & Exp) {
 411                         if (next == '-' || next == '+') {
 412                                 if (p != buffer_end)
 413                                         *p++ = next;
 414                                 next = nextchar(stream);
 415                         }
 416                 }
 417         }
 418
 419         if (p == buffer_end) {
 420                 error(stream_pos(stream), "number token exceeds %td characters",
 421                       buffer_end - buffer);
 422                 // Pretend we saw just "1".
 423                 buffer[0] = '1';
 424                 p = buffer + 1;
 425         }
 426
 427         *p++ = 0;
 428         len = p - buffer;
 429         buf = __alloc_bytes(len);
 430         memcpy(buf, buffer, len);
 431
 432         token = stream->token;
 433         token_type(token) = TOKEN_NUMBER;
 434         token->number = buf;
 435         add_token(stream);
 436
 437         return next;
 438 }
 439
 440 static int escapechar(int first, int type, stream_t *stream, int *valp)
 441 {
 442         int next, value;
 443
 444         next = nextchar(stream);
 445         value = first;
 446
 447         if (first == '\n')
 448                 warning(stream_pos(stream), "Newline in string or character constant");
 449
 450         if (first == '\\' && next != EOF) {
 451                 value = next;
 452                 next = nextchar(stream);
 453                 if (value != type) {
 454                         switch (value) {
 455                         case 'a':
 456                                 value = '\a';
 457                                 break;
 458                         case 'b':
 459                                 value = '\b';
 460                                 break;
 461                         case 't':
 462                                 value = '\t';
 463                                 break;
 464                         case 'n':
 465                                 value = '\n';
 466                                 break;
 467                         case 'v':
 468                                 value = '\v';
 469                                 break;
 470                         case 'f':
 471                                 value = '\f';
 472                                 break;
 473                         case 'r':
 474                                 value = '\r';
 475                                 break;
 476                         case 'e':
 477                                 value = '\e';
 478                                 break;
 479                         case '\\':
 480                                 break;
 481                         case '\'':
 482                                 break;
 483                         case '"':
 484                                 break;
 485                         case '\n':
 486                                 warning(stream_pos(stream), "Newline in string or character constant");
 487                                 break;
 488                         case '0'...'7': {
 489                                 int nr = 2;
 490                                 value -= '0';
 491                                 while (next >= '0' && next <= '9') {
 492                                         value = (value << 3) + (next-'0');
 493                                         next = nextchar(stream);
 494                                         if (!--nr)
 495                                                 break;
 496                                 }
 497                                 value &= 0xff;
 498                                 break;
 499                         }
 500                         case 'x': {
 501                                 int hex = hexval(next);
 502                                 if (hex < 16) {
 503                                         value = hex;
 504                                         next = nextchar(stream);
 505                                         while ((hex = hexval(next)) < 16) {
 506                                                 value = (value << 4) + hex;
 507                                                 next = nextchar(stream);
 508                                         }
 509                                         value &= 0xff;
 510                                         break;
 511                                 }
 512                         }
 513                         /* Fallthrough */
 514                         default:
 515                                 warning(stream_pos(stream), "Unknown escape '%c'", value);
 516                         }
 517                 }
 518                 /* Mark it as escaped */
 519                 value |= 0x100;
 520         }
 521         *valp = value;
 522         return next;
 523 }
 524
 525 static int get_char_token(int next, stream_t *stream)
 526 {
 527         int value;
 528         struct token *token;
 529
 530         next = escapechar(next, '\'', stream, &value);
 531         if (value == '\'' || next != '\'') {
 532                 warning(stream_pos(stream), "Bad character constant");
 533                 drop_token(stream);
 534                 return next;
 535         }
 536
 537         token = stream->token;
 538         token_type(token) = TOKEN_CHAR;
 539         token->character = value & 0xff;
 540
 541         add_token(stream);
 542         return nextchar(stream);
 543 }
 544
 545 static int get_string_token(int next, stream_t *stream)
 546 {
 547         static char buffer[MAX_STRING];
 548         struct string *string;
 549         struct token *token;
 550         int len = 0;
 551
 552         for (;;) {
 553                 int val;
 554                 next = escapechar(next, '"', stream, &val);
 555                 if (val == '"')
 556                         break;
 557                 if (next == EOF) {
 558                         warning(stream_pos(stream), "End of file in middle of string");
 559                         return next;
 560                 }
 561                 if (len < MAX_STRING)
 562                         buffer[len] = val;
 563                 len++;
 564         }
 565
 566         if (len > MAX_STRING) {
 567                 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 568                 len = MAX_STRING;
 569         }
 570
 571         string = __alloc_string(len+1);
 572         memcpy(string->data, buffer, len);
 573         string->data[len] = '\0';
 574         string->length = len+1;
 575
 576         /* Pass it on.. */
 577         token = stream->token;
 578         token_type(token) = TOKEN_STRING;
 579         token->string = string;
 580         add_token(stream);
 581
 582         return next;
 583 }
 584
 585 static int drop_stream_eoln(stream_t *stream)
 586 {
 587         int next = nextchar(stream);
 588         drop_token(stream);
 589         for (;;) {
 590                 int curr = next;
 591                 if (curr == EOF)
 592                         return next;
 593                 next = nextchar(stream);
 594                 if (curr == '\n')
 595                         return next;
 596         }
 597 }
 598
 599 static int drop_stream_comment(stream_t *stream)
 600 {
 601         int newline;
 602         int next;
 603         drop_token(stream);
 604         newline = stream->newline;
 605
 606         next = nextchar(stream);
 607         for (;;) {
 608                 int curr = next;
 609                 if (curr == EOF) {
 610                         warning(stream_pos(stream), "End of file in the middle of a comment");
 611                         return curr;
 612                 }
 613                 next = nextchar(stream);
 614                 if (curr == '*' && next == '/')
 615                         break;
 616         }
 617         stream->newline = newline;
 618         return nextchar(stream);
 619 }
 620
 621 unsigned char combinations[][3] = COMBINATION_STRINGS;
 622
 623 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
 624
 625 static int get_one_special(int c, stream_t *stream)
 626 {
 627         struct token *token;
 628         unsigned char c1, c2, c3;
 629         int next, value, i;
 630         char *comb;
 631
 632         next = nextchar(stream);
 633
 634         /*
 635          * Check for numbers, strings, character constants, and comments
 636          */
 637         switch (c) {
 638         case '.':
 639                 if (next >= '0' && next <= '9')
 640                         return get_one_number(c, next, stream);
 641                 break;
 642         case '"':
 643                 return get_string_token(next, stream);
 644         case '\'':
 645                 return get_char_token(next, stream);
 646         case '/':
 647                 if (next == '/')
 648                         return drop_stream_eoln(stream);
 649                 if (next == '*')
 650                         return drop_stream_comment(stream);
 651         }
 652
 653         /*
 654          * Check for combinations
 655          */
 656         value = c;
 657         if (cclass[next + 1] & ValidSecond) {
 658                 comb = combinations[0];
 659                 c1 = c; c2 = next; c3 = 0;
 660                 for (i = 0; i < NR_COMBINATIONS; i++) {
 661                         if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
 662                                 value = i + SPECIAL_BASE;
 663                                 next = nextchar(stream);
 664                                 if (c3)
 665                                         break;
 666                                 c3 = next;
 667                         }
 668                         comb += 3;
 669                 }
 670         }
 671
 672         /* Pass it on.. */
 673         token = stream->token;
 674         token_type(token) = TOKEN_SPECIAL;
 675         token->special = value;
 676         add_token(stream);
 677         return next;
 678 }
 679
 680 #define IDENT_HASH_BITS (13)
 681 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
 682 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
 683
 684 #define ident_hash_init(c)              (c)
 685 #define ident_hash_add(oldhash,c)       ((oldhash)*11 + (c))
 686 #define ident_hash_end(hash)            ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
 687
 688 static struct ident *hash_table[IDENT_HASH_SIZE];
 689 int ident_hit, ident_miss, idents;
 690
 691 void show_identifier_stats(void)
 692 {
 693         int i;
 694         int distribution[100];
 695
 696         fprintf(stderr, "identifiers: %d hits, %d misses\n",
 697                 ident_hit, ident_miss);
 698
 699         for (i = 0; i < 100; i++)
 700                 distribution[i] = 0;
 701
 702         for (i = 0; i < IDENT_HASH_SIZE; i++) {
 703                 struct ident * ident = hash_table[i];
 704                 int count = 0;
 705
 706                 while (ident) {
 707                         count++;
 708                         ident = ident->next;
 709                 }
 710                 if (count > 99)
 711                         count = 99;
 712                 distribution[count]++;
 713         }
 714
 715         for (i = 0; i < 100; i++) {
 716                 if (distribution[i])
 717                         fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
 718         }
 719 }
 720
 721 static struct ident *alloc_ident(const char *name, int len)
 722 {
 723         struct ident *ident = __alloc_ident(len);
 724         ident->symbols = NULL;
 725         ident->len = len;
 726         ident->tainted = 0;
 727         memcpy(ident->name, name, len);
 728         return ident;
 729 }
 730
 731 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
 732 {
 733         ident->next = hash_table[hash];
 734         hash_table[hash] = ident;
 735         ident_miss++;
 736         return ident;
 737 }
 738
 739 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
 740 {
 741         struct ident *ident;
 742         struct ident **p;
 743
 744         p = &hash_table[hash];
 745         while ((ident = *p) != NULL) {
 746                 if (ident->len == (unsigned char) len) {
 747                         const char *n = name;
 748                         const char *m = ident->name;
 749                         int l = len;
 750                         do {
 751                                 if (*n != *m)
 752                                         goto next;
 753                                 n++;
 754                                 m++;
 755                         } while (--l);
 756
 757                         ident_hit++;
 758                         return ident;
 759                 }
 760 next:
 761                 //misses++;
 762                 p = &ident->next;
 763         }
 764         ident = alloc_ident(name, len);
 765         *p = ident;
 766         ident->next = NULL;
 767         ident_miss++;
 768         idents++;
 769         return ident;
 770 }
 771
 772 static unsigned long hash_name(const char *name, int len)
 773 {
 774         unsigned long hash;
 775         const unsigned char *p = (const unsigned char *)name;
 776
 777         hash = ident_hash_init(*p++);
 778         while (--len) {
 779                 unsigned int i = *p++;
 780                 hash = ident_hash_add(hash, i);
 781         }
 782         return ident_hash_end(hash);
 783 }
 784
 785 struct ident *hash_ident(struct ident *ident)
 786 {
 787         return insert_hash(ident, hash_name(ident->name, ident->len));
 788 }
 789
 790 struct ident *built_in_ident(const char *name)
 791 {
 792         int len = strlen(name);
 793         return create_hashed_ident(name, len, hash_name(name, len));
 794 }
 795
 796 struct token *built_in_token(int stream, const char *name)
 797 {
 798         struct token *token;
 799
 800         token = __alloc_token(0);
 801         token->pos.stream = stream;
 802         token_type(token) = TOKEN_IDENT;
 803         token->ident = built_in_ident(name);
 804         return token;
 805 }
 806
 807 static int get_one_identifier(int c, stream_t *stream)
 808 {
 809         struct token *token;
 810         struct ident *ident;
 811         unsigned long hash;
 812         char buf[256];
 813         int len = 1;
 814         int next;
 815
 816         hash = ident_hash_init(c);
 817         buf[0] = c;
 818         for (;;) {
 819                 next = nextchar(stream);
 820                 if (!(cclass[next + 1] & (Letter | Digit)))
 821                         break;
 822                 if (len >= sizeof(buf))
 823                         break;
 824                 hash = ident_hash_add(hash, next);
 825                 buf[len] = next;
 826                 len++;
 827         };
 828         hash = ident_hash_end(hash);
 829
 830         ident = create_hashed_ident(buf, len, hash);
 831
 832         /* Pass it on.. */
 833         token = stream->token;
 834         token_type(token) = TOKEN_IDENT;
 835         token->ident = ident;
 836         add_token(stream);
 837         return next;
 838 }
 839
 840 static int get_one_token(int c, stream_t *stream)
 841 {
 842         long class = cclass[c + 1];
 843         if (class & Digit)
 844                 return get_one_number(c, nextchar(stream), stream);
 845         if (class & Letter)
 846                 return get_one_identifier(c, stream);
 847         return get_one_special(c, stream);
 848 }
 849
 850 static struct token *setup_stream(stream_t *stream, int idx, int fd,
 851         unsigned char *buf, unsigned int buf_size)
 852 {
 853         struct token *begin;
 854
 855         stream->nr = idx;
 856         stream->line = 1;
 857         stream->newline = 1;
 858         stream->whitespace = 0;
 859         stream->pos = 0;
 860
 861         stream->token = NULL;
 862         stream->fd = fd;
 863         stream->offset = 0;
 864         stream->size = buf_size;
 865         stream->buffer = buf;
 866
 867         begin = alloc_token(stream);
 868         token_type(begin) = TOKEN_STREAMBEGIN;
 869         stream->tokenlist = &begin->next;
 870         return begin;
 871 }
 872
 873 static void tokenize_stream(stream_t *stream, struct token *endtoken)
 874 {
 875         int c = nextchar(stream);
 876         while (c != EOF) {
 877                 if (!isspace(c)) {
 878                         struct token *token = alloc_token(stream);
 879                         stream->token = token;
 880                         stream->newline = 0;
 881                         stream->whitespace = 0;
 882                         c = get_one_token(c, stream);
 883                         continue;
 884                 }
 885                 stream->whitespace = 1;
 886                 c = nextchar(stream);
 887         }
 888         mark_eof(stream, endtoken);
 889 }
 890
 891 struct token * tokenize_buffer(unsigned char *buffer, unsigned long size, struct token *endtoken)
 892 {
 893         stream_t stream;
 894         struct token *begin;
 895
 896         begin = setup_stream(&stream, 0, -1, buffer, size);
 897         tokenize_stream(&stream, endtoken);
 898         return begin;
 899 }
 900
 901 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
 902 {
 903         struct token *begin;
 904         stream_t stream;
 905         unsigned char buffer[BUFSIZE];
 906         int idx;
 907
 908         idx = init_stream(name, fd, next_path);
 909         if (idx < 0) {
 910                 // info(endtoken->pos, "File %s is const", name);
 911                 return endtoken;
 912         }
 913
 914         begin = setup_stream(&stream, idx, fd, buffer, 0);
 915         tokenize_stream(&stream, endtoken);
 916         return begin;
 917 }