tokenize.c

   1 /*
   2  * This is a really stupid C tokenizer. It doesn't do any include
   3  * files or anything complex at all. That's the pre-processor.
   4  *
   5  * Copyright (C) 2003 Transmeta Corp.
   6  *               2003 Linus Torvalds
   7  *
   8  *  Licensed under the Open Software License version 1.1
   9  */
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <stdarg.h>
  13 #include <stddef.h>
  14 #include <string.h>
  15 #include <ctype.h>
  16 #include <unistd.h>
  17 #include <sys/stat.h>
  18
  19 #include "lib.h"
  20 #include "token.h"
  21 #include "symbol.h"
  22
  23 #define EOF (-1)
  24
  25 int input_stream_nr = 0;
  26 struct stream *input_streams;
  27 static int input_streams_allocated;
  28
  29 #define BUFSIZE (8192)
  30
  31 typedef struct {
  32         int fd, offset, size;
  33         struct position pos;
  34         struct token **tokenlist;
  35         struct token *token;
  36         unsigned char *buffer;
  37 } stream_t;
  38
  39
  40 const char *show_special(int val)
  41 {
  42         static const char *combinations[] = COMBINATION_STRINGS;
  43         static char buffer[4];
  44
  45         buffer[0] = val;
  46         buffer[1] = 0;
  47         if (val >= SPECIAL_BASE)
  48                 strcpy(buffer, combinations[val - SPECIAL_BASE]);
  49         return buffer;
  50 }
  51
  52 const char *show_ident(const struct ident *ident)
  53 {
  54         static char buffer[256];
  55         if (!ident)
  56                 return "<noident>";
  57         sprintf(buffer, "%.*s", ident->len, ident->name);
  58         return buffer;
  59 }
  60
  61 char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
  62 {
  63         if (isprint(c)) {
  64                 if (c == escape || c == '\\')
  65                         *ptr++ = '\\';
  66                 *ptr++ = c;
  67                 return ptr;
  68         }
  69         *ptr++ = '\\';
  70         switch (c) {
  71         case '\n':
  72                 *ptr++ = 'n';
  73                 return ptr;
  74         case '\t':
  75                 *ptr++ = 't';
  76                 return ptr;
  77         }
  78         if (!isdigit(next))
  79                 return ptr + sprintf(ptr, "%o", c);
  80
  81         return ptr + sprintf(ptr, "%03o", c);
  82 }
  83
  84 const char *show_string(const struct string *string)
  85 {
  86         static char buffer[4 * MAX_STRING + 3];
  87         char *ptr;
  88         int i;
  89
  90         if (!string->length)
  91                 return "<bad_string>";
  92         ptr = buffer;
  93         *ptr++ = '"';
  94         for (i = 0; i < string->length-1; i++) {
  95                 const unsigned char *p = string->data + i;
  96                 ptr = charstr(ptr, p[0], '"', p[1]);
  97         }
  98         *ptr++ = '"';
  99         *ptr = '\0';
 100         return buffer;
 101 }
 102
 103 const char *show_token(const struct token *token)
 104 {
 105         static char buffer[256];
 106
 107         if (!token)
 108                 return "<no token>";
 109         switch (token_type(token)) {
 110         case TOKEN_ERROR:
 111                 return "syntax error";
 112
 113         case TOKEN_EOF:
 114                 return "end-of-input";
 115
 116         case TOKEN_IDENT:
 117                 return show_ident(token->ident);
 118
 119         case TOKEN_STRING:
 120                 return show_string(token->string);
 121
 122         case TOKEN_NUMBER:
 123                 return token->number;
 124
 125         case TOKEN_SPECIAL:
 126                 return show_special(token->special);
 127
 128         case TOKEN_CHAR: {
 129                 char *ptr = buffer;
 130                 int c = token->character;
 131                 *ptr++ = '\'';
 132                 ptr = charstr(ptr, c, '\'', 0);
 133                 *ptr++ = '\'';
 134                 *ptr++ = '\0';
 135                 return buffer;
 136         }
 137
 138         case TOKEN_STREAMBEGIN:
 139                 sprintf(buffer, "<beginning of '%s'>", (input_streams + token->pos.stream)->name);
 140                 return buffer;
 141
 142         case TOKEN_STREAMEND:
 143                 sprintf(buffer, "<end of '%s'>", (input_streams + token->pos.stream)->name);
 144                 return buffer;
 145
 146         default:
 147                 return "WTF???";
 148         }
 149 }
 150
 151 int init_stream(const char *name, int fd, const char **next_path)
 152 {
 153         int stream = input_stream_nr;
 154         struct stream *current;
 155         struct stat st;
 156
 157         if (stream >= input_streams_allocated) {
 158                 int newalloc = stream * 4 / 3 + 10;
 159                 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
 160                 if (!input_streams)
 161                         die("Unable to allocate more streams space");
 162                 input_streams_allocated = newalloc;
 163         }
 164         current = input_streams + stream;
 165         memset(current, 0, sizeof(*current));
 166         current->name = name;
 167         current->fd = fd;
 168         current->next_path = next_path;
 169         current->constant = CONSTANT_FILE_MAYBE;
 170         if (fd >= 0 && fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
 171                 int i;
 172
 173                 for (i = 0; i < stream; i++) {
 174                         struct stream *s = input_streams + i;
 175                         if (s->constant == CONSTANT_FILE_YES &&
 176                             identical_files(s, &st, name) &&
 177                             lookup_symbol(s->protect, NS_MACRO))
 178                                 return -1;
 179                 }
 180
 181                 current->dev = st.st_dev;
 182                 current->ino = st.st_ino;
 183         }
 184         input_stream_nr = stream+1;
 185         return stream;
 186 }
 187
 188 static struct token * alloc_token(stream_t *stream)
 189 {
 190         struct token *token = __alloc_token(0);
 191         token->pos = stream->pos;
 192         return token;
 193 }
 194
 195 /*
 196  *  Argh...  That was surprisingly messy - handling '\r' complicates the
 197  *  things a _lot_.
 198  */
 199 static int nextchar_slow(stream_t *stream)
 200 {
 201         int offset = stream->offset;
 202         int size = stream->size;
 203         int c;
 204         int spliced = 0, had_cr, had_backslash, complain;
 205
 206 restart:
 207         had_cr = had_backslash = complain = 0;
 208
 209 repeat:
 210         if (offset >= size) {
 211                 size = read(stream->fd, stream->buffer, BUFSIZE);
 212                 if (size <= 0)
 213                         goto got_eof;
 214                 stream->size = size;
 215                 stream->offset = offset = 0;
 216         }
 217
 218         c = stream->buffer[offset++];
 219
 220         if (had_cr && c != '\n')
 221                 complain = 1;
 222
 223         if (c == '\r') {
 224                 had_cr = 1;
 225                 goto repeat;
 226         }
 227
 228         stream->pos.pos++;
 229
 230         if (c == '\n') {
 231                 stream->pos.line++;
 232                 stream->pos.pos = 0;
 233         }
 234
 235         if (!had_backslash) {
 236                 if (c == '\\') {
 237                         had_backslash = 1;
 238                         goto repeat;
 239                 }
 240                 if (c == '\n')
 241                         stream->pos.newline = 1;
 242         } else {
 243                 if (c == '\n') {
 244                         if (complain)
 245                                 warning(stream->pos, "non-ASCII data stream");
 246                         spliced = 1;
 247                         goto restart;
 248                 }
 249                 stream->pos.pos--;
 250                 offset--;
 251                 c = '\\';
 252         }
 253
 254 out:
 255         stream->offset = offset;
 256         if (complain)
 257                 warning(stream->pos, "non-ASCII data stream");
 258
 259         return c;
 260
 261 got_eof:
 262         if (had_backslash) {
 263                 c = '\\';
 264                 goto out;
 265         }
 266         if (stream->pos.pos)
 267                 warning(stream->pos, "no newline at end of file");
 268         else if (had_cr)
 269                 warning(stream->pos, "non-ASCII data stream");
 270         else if (spliced)
 271                 warning(stream->pos, "backslash-newline at end of file");
 272         return EOF;
 273 }
 274
 275 /*
 276  *  We want that as light as possible while covering all normal cases.
 277  *  Slow path (including the logics with line-splicing and EOF sanity
 278  *  checks) is in nextchar_slow().
 279  */
 280 static inline int nextchar(stream_t *stream)
 281 {
 282         int offset = stream->offset;
 283
 284         if (offset < stream->size) {
 285                 int c = stream->buffer[offset++];
 286                 unsigned char next;
 287                 switch (c) {
 288                 case '\r':
 289                         break;
 290                 case '\n':
 291                         stream->offset = offset;
 292                         stream->pos.line++;
 293                         stream->pos.newline = 1;
 294                         stream->pos.pos = 0;
 295                         return '\n';
 296                 case '\\':
 297                         if (offset >= stream->size)
 298                                 break;
 299                         next = stream->buffer[offset];
 300                         if (next == '\n' || next == '\r')
 301                                 break;
 302                         /* fallthru */
 303                 default:
 304                         stream->offset = offset;
 305                         stream->pos.pos++;
 306                         return c;
 307                 }
 308         }
 309         return nextchar_slow(stream);
 310 }
 311
 312 struct token eof_token_entry;
 313
 314 static void mark_eof(stream_t *stream, struct token *end_token)
 315 {
 316         struct token *end;
 317
 318         end = alloc_token(stream);
 319         token_type(end) = TOKEN_STREAMEND;
 320         end->pos.newline = 1;
 321
 322         eof_token_entry.next = &eof_token_entry;
 323         eof_token_entry.pos.newline = 1;
 324
 325         if (!end_token)
 326                 end_token =  &eof_token_entry;
 327         end->next = end_token;
 328         *stream->tokenlist = end;
 329         stream->tokenlist = NULL;
 330 }
 331
 332 static void add_token(stream_t *stream)
 333 {
 334         struct token *token = stream->token;
 335
 336         stream->token = NULL;
 337         token->next = NULL;
 338         *stream->tokenlist = token;
 339         stream->tokenlist = &token->next;
 340 }
 341
 342 static void drop_token(stream_t *stream)
 343 {
 344         stream->pos.newline |= stream->token->pos.newline;
 345         stream->pos.whitespace |= stream->token->pos.whitespace;
 346         stream->token = NULL;
 347 }
 348
 349 enum {
 350         Letter = 1,
 351         Digit = 2,
 352         Hex = 4,
 353         Exp = 8,
 354         Dot = 16,
 355         ValidSecond = 32,
 356 };
 357
 358 static const long cclass[257] = {
 359         ['0' + 1 ... '9' + 1] = Digit | Hex,
 360         ['A' + 1 ... 'D' + 1] = Letter | Hex,
 361         ['E' + 1] = Letter | Hex | Exp,
 362         ['F' + 1] = Letter | Hex,
 363         ['G' + 1 ... 'O' + 1] = Letter,
 364         ['P' + 1] = Letter | Exp,
 365         ['Q' + 1 ... 'Z' + 1] = Letter,
 366         ['a' + 1 ... 'd' + 1] = Letter | Hex,
 367         ['e' + 1] = Letter | Hex | Exp,
 368         ['f' + 1] = Letter | Hex,
 369         ['g' + 1 ... 'o' + 1] = Letter,
 370         ['p' + 1] = Letter | Exp,
 371         ['q' + 1 ... 'z' + 1] = Letter,
 372         ['_' + 1] = Letter,
 373         ['.' + 1] = Dot | ValidSecond,
 374         ['=' + 1] = ValidSecond,
 375         ['+' + 1] = ValidSecond,
 376         ['-' + 1] = ValidSecond,
 377         ['>' + 1] = ValidSecond,
 378         ['<' + 1] = ValidSecond,
 379         ['&' + 1] = ValidSecond,
 380         ['|' + 1] = ValidSecond,
 381         ['#' + 1] = ValidSecond,
 382 };
 383
 384 /*
 385  * pp-number:
 386  *      digit
 387  *      . digit
 388  *      pp-number digit
 389  *      pp-number identifier-nodigit
 390  *      pp-number e sign
 391  *      pp-number E sign
 392  *      pp-number p sign
 393  *      pp-number P sign
 394  *      pp-number .
 395  */
 396 static int get_one_number(int c, int next, stream_t *stream)
 397 {
 398         struct token *token;
 399         static char buffer[4095];
 400         char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
 401         int len;
 402
 403         *p++ = c;
 404         for (;;) {
 405                 long class =  cclass[next + 1];
 406                 if (!(class & (Dot | Digit | Letter)))
 407                         break;
 408                 if (p != buffer_end)
 409                         *p++ = next;
 410                 next = nextchar(stream);
 411                 if (class & Exp) {
 412                         if (next == '-' || next == '+') {
 413                                 if (p != buffer_end)
 414                                         *p++ = next;
 415                                 next = nextchar(stream);
 416                         }
 417                 }
 418         }
 419
 420         if (p == buffer_end) {
 421                 error(stream->pos, "number token exceeds %td characters",
 422                       buffer_end - buffer);
 423                 // Pretend we saw just "1".
 424                 buffer[0] = '1';
 425                 p = buffer + 1;
 426         }
 427
 428         *p++ = 0;
 429         len = p - buffer;
 430         buf = __alloc_bytes(len);
 431         memcpy(buf, buffer, len);
 432
 433         token = stream->token;
 434         token_type(token) = TOKEN_NUMBER;
 435         token->number = buf;
 436         add_token(stream);
 437
 438         return next;
 439 }
 440
 441 static int escapechar(int first, int type, stream_t *stream, int *valp)
 442 {
 443         int next, value;
 444
 445         next = nextchar(stream);
 446         value = first;
 447
 448         if (first == '\n')
 449                 warning(stream->pos, "Newline in string or character constant");
 450
 451         if (first == '\\' && next != EOF) {
 452                 value = next;
 453                 next = nextchar(stream);
 454                 if (value != type) {
 455                         switch (value) {
 456                         case 'a':
 457                                 value = '\a';
 458                                 break;
 459                         case 'b':
 460                                 value = '\b';
 461                                 break;
 462                         case 't':
 463                                 value = '\t';
 464                                 break;
 465                         case 'n':
 466                                 value = '\n';
 467                                 break;
 468                         case 'v':
 469                                 value = '\v';
 470                                 break;
 471                         case 'f':
 472                                 value = '\f';
 473                                 break;
 474                         case 'r':
 475                                 value = '\r';
 476                                 break;
 477                         case 'e':
 478                                 value = '\e';
 479                                 break;
 480                         case '\\':
 481                                 break;
 482                         case '\'':
 483                                 break;
 484                         case '"':
 485                                 break;
 486                         case '\n':
 487                                 warning(stream->pos, "Newline in string or character constant");
 488                                 break;
 489                         case '0'...'7': {
 490                                 int nr = 2;
 491                                 value -= '0';
 492                                 while (next >= '0' && next <= '9') {
 493                                         value = (value << 3) + (next-'0');
 494                                         next = nextchar(stream);
 495                                         if (!--nr)
 496                                                 break;
 497                                 }
 498                                 value &= 0xff;
 499                                 break;
 500                         }
 501                         case 'x': {
 502                                 int hex = hexval(next);
 503                                 if (hex < 16) {
 504                                         value = hex;
 505                                         next = nextchar(stream);
 506                                         while ((hex = hexval(next)) < 16) {
 507                                                 value = (value << 4) + hex;
 508                                                 next = nextchar(stream);
 509                                         }
 510                                         value &= 0xff;
 511                                         break;
 512                                 }
 513                         }
 514                         /* Fallthrough */
 515                         default:
 516                                 warning(stream->pos, "Unknown escape '%c'", value);
 517                         }
 518                 }
 519                 /* Mark it as escaped */
 520                 value |= 0x100;
 521         }
 522         *valp = value;
 523         return next;
 524 }
 525
 526 static int get_char_token(int next, stream_t *stream)
 527 {
 528         int value;
 529         struct token *token;
 530
 531         next = escapechar(next, '\'', stream, &value);
 532         if (value == '\'' || next != '\'') {
 533                 warning(stream->pos, "Bad character constant");
 534                 drop_token(stream);
 535                 return next;
 536         }
 537
 538         token = stream->token;
 539         token_type(token) = TOKEN_CHAR;
 540         token->character = value & 0xff;
 541
 542         add_token(stream);
 543         return nextchar(stream);
 544 }
 545
 546 static int get_string_token(int next, stream_t *stream)
 547 {
 548         static char buffer[MAX_STRING];
 549         struct string *string;
 550         struct token *token;
 551         int len = 0;
 552
 553         for (;;) {
 554                 int val;
 555                 next = escapechar(next, '"', stream, &val);
 556                 if (val == '"')
 557                         break;
 558                 if (next == EOF) {
 559                         warning(stream->pos, "End of file in middle of string");
 560                         return next;
 561                 }
 562                 if (len < MAX_STRING)
 563                         buffer[len] = val;
 564                 len++;
 565         }
 566
 567         if (len > MAX_STRING) {
 568                 warning(stream->pos, "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 569                 len = MAX_STRING;
 570         }
 571
 572         string = __alloc_string(len+1);
 573         memcpy(string->data, buffer, len);
 574         string->data[len] = '\0';
 575         string->length = len+1;
 576
 577         /* Pass it on.. */
 578         token = stream->token;
 579         token_type(token) = TOKEN_STRING;
 580         token->string = string;
 581         add_token(stream);
 582
 583         return next;
 584 }
 585
 586 static int drop_stream_eoln(stream_t *stream)
 587 {
 588         int next = nextchar(stream);
 589         drop_token(stream);
 590         for (;;) {
 591                 int curr = next;
 592                 if (curr == EOF)
 593                         return next;
 594                 next = nextchar(stream);
 595                 if (curr == '\n')
 596                         return next;
 597         }
 598 }
 599
 600 static int drop_stream_comment(stream_t *stream)
 601 {
 602         int newline;
 603         int next;
 604         drop_token(stream);
 605         newline = stream->pos.newline;
 606
 607         next = nextchar(stream);
 608         for (;;) {
 609                 int curr = next;
 610                 if (curr == EOF) {
 611                         warning(stream->pos, "End of file in the middle of a comment");
 612                         return curr;
 613                 }
 614                 next = nextchar(stream);
 615                 if (curr == '*' && next == '/')
 616                         break;
 617         }
 618         stream->pos.newline = newline;
 619         return nextchar(stream);
 620 }
 621
 622 unsigned char combinations[][3] = COMBINATION_STRINGS;
 623
 624 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
 625
 626 static int get_one_special(int c, stream_t *stream)
 627 {
 628         struct token *token;
 629         unsigned char c1, c2, c3;
 630         int next, value, i;
 631         char *comb;
 632
 633         next = nextchar(stream);
 634
 635         /*
 636          * Check for numbers, strings, character constants, and comments
 637          */
 638         switch (c) {
 639         case '.':
 640                 if (next >= '0' && next <= '9')
 641                         return get_one_number(c, next, stream);
 642                 break;
 643         case '"':
 644                 return get_string_token(next, stream);
 645         case '\'':
 646                 return get_char_token(next, stream);
 647         case '/':
 648                 if (next == '/')
 649                         return drop_stream_eoln(stream);
 650                 if (next == '*')
 651                         return drop_stream_comment(stream);
 652         }
 653
 654         /*
 655          * Check for combinations
 656          */
 657         value = c;
 658         if (cclass[next + 1] & ValidSecond) {
 659                 comb = combinations[0];
 660                 c1 = c; c2 = next; c3 = 0;
 661                 for (i = 0; i < NR_COMBINATIONS; i++) {
 662                         if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
 663                                 value = i + SPECIAL_BASE;
 664                                 next = nextchar(stream);
 665                                 if (c3)
 666                                         break;
 667                                 c3 = next;
 668                         }
 669                         comb += 3;
 670                 }
 671         }
 672
 673         /* Pass it on.. */
 674         token = stream->token;
 675         token_type(token) = TOKEN_SPECIAL;
 676         token->special = value;
 677         add_token(stream);
 678         return next;
 679 }
 680
 681 #define IDENT_HASH_BITS (13)
 682 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
 683 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
 684
 685 #define ident_hash_init(c)              (c)
 686 #define ident_hash_add(oldhash,c)       ((oldhash)*11 + (c))
 687 #define ident_hash_end(hash)            ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
 688
 689 static struct ident *hash_table[IDENT_HASH_SIZE];
 690 int ident_hit, ident_miss, idents;
 691
 692 void show_identifier_stats(void)
 693 {
 694         int i;
 695         int distribution[100];
 696
 697         fprintf(stderr, "identifiers: %d hits, %d misses\n",
 698                 ident_hit, ident_miss);
 699
 700         for (i = 0; i < 100; i++)
 701                 distribution[i] = 0;
 702
 703         for (i = 0; i < IDENT_HASH_SIZE; i++) {
 704                 struct ident * ident = hash_table[i];
 705                 int count = 0;
 706
 707                 while (ident) {
 708                         count++;
 709                         ident = ident->next;
 710                 }
 711                 if (count > 99)
 712                         count = 99;
 713                 distribution[count]++;
 714         }
 715
 716         for (i = 0; i < 100; i++) {
 717                 if (distribution[i])
 718                         fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
 719         }
 720 }
 721
 722 static struct ident *alloc_ident(const char *name, int len)
 723 {
 724         struct ident *ident = __alloc_ident(len);
 725         ident->symbols = NULL;
 726         ident->len = len;
 727         ident->tainted = 0;
 728         memcpy(ident->name, name, len);
 729         return ident;
 730 }
 731
 732 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
 733 {
 734         ident->next = hash_table[hash];
 735         hash_table[hash] = ident;
 736         ident_miss++;
 737         return ident;
 738 }
 739
 740 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
 741 {
 742         struct ident *ident;
 743         struct ident **p;
 744
 745         p = &hash_table[hash];
 746         while ((ident = *p) != NULL) {
 747                 if (ident->len == len && !memcmp(ident->name, name, len)) {
 748                         ident_hit++;
 749                         return ident;
 750                 }
 751                 //misses++;
 752                 p = &ident->next;
 753         }
 754         ident = alloc_ident(name, len);
 755         *p = ident;
 756         ident->next = NULL;
 757         ident_miss++;
 758         idents++;
 759         return ident;
 760 }
 761
 762 static unsigned long hash_name(const char *name, int len)
 763 {
 764         unsigned long hash;
 765         const unsigned char *p = (const unsigned char *)name;
 766
 767         hash = ident_hash_init(*p++);
 768         while (--len) {
 769                 unsigned int i = *p++;
 770                 hash = ident_hash_add(hash, i);
 771         }
 772         return ident_hash_end(hash);
 773 }
 774
 775 struct ident *hash_ident(struct ident *ident)
 776 {
 777         return insert_hash(ident, hash_name(ident->name, ident->len));
 778 }
 779
 780 struct ident *built_in_ident(const char *name)
 781 {
 782         int len = strlen(name);
 783         return create_hashed_ident(name, len, hash_name(name, len));
 784 }
 785
 786 struct token *built_in_token(int stream, const char *name)
 787 {
 788         struct token *token;
 789
 790         token = __alloc_token(0);
 791         token->pos.stream = stream;
 792         token_type(token) = TOKEN_IDENT;
 793         token->ident = built_in_ident(name);
 794         return token;
 795 }
 796
 797 static int get_one_identifier(int c, stream_t *stream)
 798 {
 799         struct token *token;
 800         struct ident *ident;
 801         unsigned long hash;
 802         char buf[256];
 803         int len = 1;
 804         int next;
 805
 806         hash = ident_hash_init(c);
 807         buf[0] = c;
 808         for (;;) {
 809                 next = nextchar(stream);
 810                 if (!(cclass[next + 1] & (Letter | Digit)))
 811                         break;
 812                 if (len >= sizeof(buf))
 813                         break;
 814                 hash = ident_hash_add(hash, next);
 815                 buf[len] = next;
 816                 len++;
 817         };
 818         hash = ident_hash_end(hash);
 819
 820         ident = create_hashed_ident(buf, len, hash);
 821
 822         /* Pass it on.. */
 823         token = stream->token;
 824         token_type(token) = TOKEN_IDENT;
 825         token->ident = ident;
 826         add_token(stream);
 827         return next;
 828 }
 829
 830 static int get_one_token(int c, stream_t *stream)
 831 {
 832         long class = cclass[c + 1];
 833         if (class & Digit)
 834                 return get_one_number(c, nextchar(stream), stream);
 835         if (class & Letter)
 836                 return get_one_identifier(c, stream);
 837         return get_one_special(c, stream);
 838 }
 839
 840 static struct token *setup_stream(stream_t *stream, int idx, int fd,
 841         unsigned char *buf, unsigned int buf_size)
 842 {
 843         struct token *begin;
 844
 845         stream->pos.stream = idx;
 846         stream->pos.line = 1;
 847         stream->pos.newline = 1;
 848         stream->pos.whitespace = 0;
 849         stream->pos.pos = 0;
 850         stream->pos.noexpand = 0;
 851
 852         stream->token = NULL;
 853         stream->fd = fd;
 854         stream->offset = 0;
 855         stream->size = buf_size;
 856         stream->buffer = buf;
 857
 858         begin = alloc_token(stream);
 859         token_type(begin) = TOKEN_STREAMBEGIN;
 860         stream->tokenlist = &begin->next;
 861         return begin;
 862 }
 863
 864 static void tokenize_stream(stream_t *stream, struct token *endtoken)
 865 {
 866         int c = nextchar(stream);
 867         while (c != EOF) {
 868                 if (!isspace(c)) {
 869                         struct token *token = alloc_token(stream);
 870                         stream->token = token;
 871                         stream->pos.newline = 0;
 872                         stream->pos.whitespace = 0;
 873                         c = get_one_token(c, stream);
 874                         continue;
 875                 }
 876                 stream->pos.whitespace = 1;
 877                 c = nextchar(stream);
 878         }
 879         mark_eof(stream, endtoken);
 880 }
 881
 882 struct token * tokenize_buffer(unsigned char *buffer, unsigned long size, struct token *endtoken)
 883 {
 884         stream_t stream;
 885         struct token *begin;
 886
 887         begin = setup_stream(&stream, 0, -1, buffer, size);
 888         tokenize_stream(&stream, endtoken);
 889         return begin;
 890 }
 891
 892 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
 893 {
 894         struct token *begin;
 895         stream_t stream;
 896         unsigned char buffer[BUFSIZE];
 897         int idx;
 898
 899         idx = init_stream(name, fd, next_path);
 900         if (idx < 0) {
 901                 // info(endtoken->pos, "File %s is const", name);
 902                 return endtoken;
 903         }
 904
 905         begin = setup_stream(&stream, idx, fd, buffer, 0);
 906         tokenize_stream(&stream, endtoken);
 907         return begin;
 908 }