tokenize.c

   1 /*
   2  * This is a really stupid C tokenizer. It doesn't do any include
   3  * files or anything complex at all. That's the pre-processor.
   4  *
   5  * Copyright (C) 2003 Transmeta Corp.
   6  *               2003 Linus Torvalds
   7  *
   8  *  Licensed under the Open Software License version 1.1
   9  */
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <stdarg.h>
  13 #include <stddef.h>
  14 #include <string.h>
  15 #include <ctype.h>
  16 #include <unistd.h>
  17 #include <sys/stat.h>
  18
  19 #include "lib.h"
  20 #include "allocate.h"
  21 #include "token.h"
  22 #include "symbol.h"
  23
  24 #define EOF (-1)
  25
  26 int input_stream_nr = 0;
  27 struct stream *input_streams;
  28 static int input_streams_allocated;
  29
  30 #define BUFSIZE (8192)
  31
  32 typedef struct {
  33         int fd, offset, size;
  34         int pos, line, nr;
  35         int newline, whitespace;
  36         struct token **tokenlist;
  37         struct token *token;
  38         unsigned char *buffer;
  39 } stream_t;
  40
  41 struct position stream_pos(stream_t *stream)
  42 {
  43         struct position pos;
  44         pos.type = 0;
  45         pos.stream = stream->nr;
  46         pos.newline = stream->newline;
  47         pos.whitespace = stream->whitespace;
  48         pos.pos = stream->pos;
  49         pos.line = stream->line;
  50         pos.noexpand = 0;
  51         return pos;
  52 }
  53
  54 const char *show_special(int val)
  55 {
  56         static const char *combinations[] = COMBINATION_STRINGS;
  57         static char buffer[4];
  58
  59         buffer[0] = val;
  60         buffer[1] = 0;
  61         if (val >= SPECIAL_BASE)
  62                 strcpy(buffer, combinations[val - SPECIAL_BASE]);
  63         return buffer;
  64 }
  65
  66 const char *show_ident(const struct ident *ident)
  67 {
  68         static char buffer[256];
  69         if (!ident)
  70                 return "<noident>";
  71         sprintf(buffer, "%.*s", ident->len, ident->name);
  72         return buffer;
  73 }
  74
  75 char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
  76 {
  77         if (isprint(c)) {
  78                 if (c == escape || c == '\\')
  79                         *ptr++ = '\\';
  80                 *ptr++ = c;
  81                 return ptr;
  82         }
  83         *ptr++ = '\\';
  84         switch (c) {
  85         case '\n':
  86                 *ptr++ = 'n';
  87                 return ptr;
  88         case '\t':
  89                 *ptr++ = 't';
  90                 return ptr;
  91         }
  92         if (!isdigit(next))
  93                 return ptr + sprintf(ptr, "%o", c);
  94
  95         return ptr + sprintf(ptr, "%03o", c);
  96 }
  97
  98 const char *show_string(const struct string *string)
  99 {
 100         static char buffer[4 * MAX_STRING + 3];
 101         char *ptr;
 102         int i;
 103
 104         if (!string->length)
 105                 return "<bad_string>";
 106         ptr = buffer;
 107         *ptr++ = '"';
 108         for (i = 0; i < string->length-1; i++) {
 109                 const unsigned char *p = string->data + i;
 110                 ptr = charstr(ptr, p[0], '"', p[1]);
 111         }
 112         *ptr++ = '"';
 113         *ptr = '\0';
 114         return buffer;
 115 }
 116
 117 const char *show_token(const struct token *token)
 118 {
 119         static char buffer[256];
 120
 121         if (!token)
 122                 return "<no token>";
 123         switch (token_type(token)) {
 124         case TOKEN_ERROR:
 125                 return "syntax error";
 126
 127         case TOKEN_EOF:
 128                 return "end-of-input";
 129
 130         case TOKEN_IDENT:
 131                 return show_ident(token->ident);
 132
 133         case TOKEN_STRING:
 134                 return show_string(token->string);
 135
 136         case TOKEN_NUMBER:
 137                 return token->number;
 138
 139         case TOKEN_SPECIAL:
 140                 return show_special(token->special);
 141
 142         case TOKEN_CHAR: {
 143                 char *ptr = buffer;
 144                 int c = token->character;
 145                 *ptr++ = '\'';
 146                 ptr = charstr(ptr, c, '\'', 0);
 147                 *ptr++ = '\'';
 148                 *ptr++ = '\0';
 149                 return buffer;
 150         }
 151
 152         case TOKEN_STREAMBEGIN:
 153                 sprintf(buffer, "<beginning of '%s'>", (input_streams + token->pos.stream)->name);
 154                 return buffer;
 155
 156         case TOKEN_STREAMEND:
 157                 sprintf(buffer, "<end of '%s'>", (input_streams + token->pos.stream)->name);
 158                 return buffer;
 159
 160         default:
 161                 return "WTF???";
 162         }
 163 }
 164
 165 int init_stream(const char *name, int fd, const char **next_path)
 166 {
 167         int stream = input_stream_nr;
 168         struct stream *current;
 169         struct stat st;
 170
 171         if (stream >= input_streams_allocated) {
 172                 int newalloc = stream * 4 / 3 + 10;
 173                 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
 174                 if (!input_streams)
 175                         die("Unable to allocate more streams space");
 176                 input_streams_allocated = newalloc;
 177         }
 178         current = input_streams + stream;
 179         memset(current, 0, sizeof(*current));
 180         current->name = name;
 181         current->fd = fd;
 182         current->next_path = next_path;
 183         current->constant = CONSTANT_FILE_MAYBE;
 184         if (fd >= 0 && fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
 185                 int i;
 186
 187                 for (i = 0; i < stream; i++) {
 188                         struct stream *s = input_streams + i;
 189                         if (s->constant == CONSTANT_FILE_YES &&
 190                             identical_files(s, &st, name) &&
 191                             lookup_symbol(s->protect, NS_MACRO))
 192                                 return -1;
 193                 }
 194
 195                 current->dev = st.st_dev;
 196                 current->ino = st.st_ino;
 197         }
 198         input_stream_nr = stream+1;
 199         return stream;
 200 }
 201
 202 static struct token * alloc_token(stream_t *stream)
 203 {
 204         struct token *token = __alloc_token(0);
 205         token->pos = stream_pos(stream);
 206         return token;
 207 }
 208
 209 /*
 210  *  Argh...  That was surprisingly messy - handling '\r' complicates the
 211  *  things a _lot_.
 212  */
 213 static int nextchar_slow(stream_t *stream)
 214 {
 215         int offset = stream->offset;
 216         int size = stream->size;
 217         int c;
 218         int spliced = 0, had_cr, had_backslash, complain;
 219
 220 restart:
 221         had_cr = had_backslash = complain = 0;
 222
 223 repeat:
 224         if (offset >= size) {
 225                 size = read(stream->fd, stream->buffer, BUFSIZE);
 226                 if (size <= 0)
 227                         goto got_eof;
 228                 stream->size = size;
 229                 stream->offset = offset = 0;
 230         }
 231
 232         c = stream->buffer[offset++];
 233
 234         if (had_cr && c != '\n')
 235                 complain = 1;
 236
 237         if (c == '\r') {
 238                 had_cr = 1;
 239                 goto repeat;
 240         }
 241
 242         stream->pos++;
 243
 244         if (c == '\n') {
 245                 stream->line++;
 246                 stream->pos = 0;
 247         }
 248
 249         if (!had_backslash) {
 250                 if (c == '\\') {
 251                         had_backslash = 1;
 252                         goto repeat;
 253                 }
 254                 if (c == '\n')
 255                         stream->newline = 1;
 256         } else {
 257                 if (c == '\n') {
 258                         if (complain)
 259                                 warning(stream_pos(stream), "non-ASCII data stream");
 260                         spliced = 1;
 261                         goto restart;
 262                 }
 263                 stream->pos--;
 264                 offset--;
 265                 c = '\\';
 266         }
 267
 268 out:
 269         stream->offset = offset;
 270         if (complain)
 271                 warning(stream_pos(stream), "non-ASCII data stream");
 272
 273         return c;
 274
 275 got_eof:
 276         if (had_backslash) {
 277                 c = '\\';
 278                 goto out;
 279         }
 280         if (stream->pos)
 281                 warning(stream_pos(stream), "no newline at end of file");
 282         else if (had_cr)
 283                 warning(stream_pos(stream), "non-ASCII data stream");
 284         else if (spliced)
 285                 warning(stream_pos(stream), "backslash-newline at end of file");
 286         return EOF;
 287 }
 288
 289 /*
 290  *  We want that as light as possible while covering all normal cases.
 291  *  Slow path (including the logics with line-splicing and EOF sanity
 292  *  checks) is in nextchar_slow().
 293  */
 294 static int nextchar(stream_t *stream)
 295 {
 296         int offset = stream->offset;
 297
 298         if (offset < stream->size) {
 299                 int c = stream->buffer[offset++];
 300                 static const char special[256] = {
 301                         ['\r'] = 1, ['\n'] = 1, ['\\'] = 1
 302                 };
 303                 if (!special[c]) {
 304                         stream->offset = offset;
 305                         stream->pos++;
 306                         return c;
 307                 }
 308         }
 309         return nextchar_slow(stream);
 310 }
 311
 312 struct token eof_token_entry;
 313
 314 static void mark_eof(stream_t *stream, struct token *end_token)
 315 {
 316         struct token *end;
 317
 318         end = alloc_token(stream);
 319         token_type(end) = TOKEN_STREAMEND;
 320         end->pos.newline = 1;
 321
 322         eof_token_entry.next = &eof_token_entry;
 323         eof_token_entry.pos.newline = 1;
 324
 325         if (!end_token)
 326                 end_token =  &eof_token_entry;
 327         end->next = end_token;
 328         *stream->tokenlist = end;
 329         stream->tokenlist = NULL;
 330 }
 331
 332 static void add_token(stream_t *stream)
 333 {
 334         struct token *token = stream->token;
 335
 336         stream->token = NULL;
 337         token->next = NULL;
 338         *stream->tokenlist = token;
 339         stream->tokenlist = &token->next;
 340 }
 341
 342 static void drop_token(stream_t *stream)
 343 {
 344         stream->newline |= stream->token->pos.newline;
 345         stream->whitespace |= stream->token->pos.whitespace;
 346         stream->token = NULL;
 347 }
 348
 349 enum {
 350         Letter = 1,
 351         Digit = 2,
 352         Hex = 4,
 353         Exp = 8,
 354         Dot = 16,
 355         ValidSecond = 32,
 356 };
 357
 358 static const long cclass[257] = {
 359         ['0' + 1 ... '9' + 1] = Digit | Hex,
 360         ['A' + 1 ... 'D' + 1] = Letter | Hex,
 361         ['E' + 1] = Letter | Hex | Exp,
 362         ['F' + 1] = Letter | Hex,
 363         ['G' + 1 ... 'O' + 1] = Letter,
 364         ['P' + 1] = Letter | Exp,
 365         ['Q' + 1 ... 'Z' + 1] = Letter,
 366         ['a' + 1 ... 'd' + 1] = Letter | Hex,
 367         ['e' + 1] = Letter | Hex | Exp,
 368         ['f' + 1] = Letter | Hex,
 369         ['g' + 1 ... 'o' + 1] = Letter,
 370         ['p' + 1] = Letter | Exp,
 371         ['q' + 1 ... 'z' + 1] = Letter,
 372         ['_' + 1] = Letter,
 373         ['.' + 1] = Dot | ValidSecond,
 374         ['=' + 1] = ValidSecond,
 375         ['+' + 1] = ValidSecond,
 376         ['-' + 1] = ValidSecond,
 377         ['>' + 1] = ValidSecond,
 378         ['<' + 1] = ValidSecond,
 379         ['&' + 1] = ValidSecond,
 380         ['|' + 1] = ValidSecond,
 381         ['#' + 1] = ValidSecond,
 382 };
 383
 384 /*
 385  * pp-number:
 386  *      digit
 387  *      . digit
 388  *      pp-number digit
 389  *      pp-number identifier-nodigit
 390  *      pp-number e sign
 391  *      pp-number E sign
 392  *      pp-number p sign
 393  *      pp-number P sign
 394  *      pp-number .
 395  */
 396 static int get_one_number(int c, int next, stream_t *stream)
 397 {
 398         struct token *token;
 399         static char buffer[4095];
 400         char *p = buffer, *buf, *buffer_end = buffer + sizeof (buffer);
 401         int len;
 402
 403         *p++ = c;
 404         for (;;) {
 405                 long class =  cclass[next + 1];
 406                 if (!(class & (Dot | Digit | Letter)))
 407                         break;
 408                 if (p != buffer_end)
 409                         *p++ = next;
 410                 next = nextchar(stream);
 411                 if (class & Exp) {
 412                         if (next == '-' || next == '+') {
 413                                 if (p != buffer_end)
 414                                         *p++ = next;
 415                                 next = nextchar(stream);
 416                         }
 417                 }
 418         }
 419
 420         if (p == buffer_end) {
 421                 error(stream_pos(stream), "number token exceeds %td characters",
 422                       buffer_end - buffer);
 423                 // Pretend we saw just "1".
 424                 buffer[0] = '1';
 425                 p = buffer + 1;
 426         }
 427
 428         *p++ = 0;
 429         len = p - buffer;
 430         buf = __alloc_bytes(len);
 431         memcpy(buf, buffer, len);
 432
 433         token = stream->token;
 434         token_type(token) = TOKEN_NUMBER;
 435         token->number = buf;
 436         add_token(stream);
 437
 438         return next;
 439 }
 440
 441 static int escapechar(int first, int type, stream_t *stream, int *valp)
 442 {
 443         int next, value;
 444
 445         next = nextchar(stream);
 446         value = first;
 447
 448         if (first == '\n')
 449                 warning(stream_pos(stream), "Newline in string or character constant");
 450
 451         if (first == '\\' && next != EOF) {
 452                 value = next;
 453                 next = nextchar(stream);
 454                 if (value != type) {
 455                         switch (value) {
 456                         case 'a':
 457                                 value = '\a';
 458                                 break;
 459                         case 'b':
 460                                 value = '\b';
 461                                 break;
 462                         case 't':
 463                                 value = '\t';
 464                                 break;
 465                         case 'n':
 466                                 value = '\n';
 467                                 break;
 468                         case 'v':
 469                                 value = '\v';
 470                                 break;
 471                         case 'f':
 472                                 value = '\f';
 473                                 break;
 474                         case 'r':
 475                                 value = '\r';
 476                                 break;
 477                         case 'e':
 478                                 value = '\e';
 479                                 break;
 480                         case '\\':
 481                                 break;
 482                         case '\'':
 483                                 break;
 484                         case '"':
 485                                 break;
 486                         case '\n':
 487                                 warning(stream_pos(stream), "Newline in string or character constant");
 488                                 break;
 489                         case '0'...'7': {
 490                                 int nr = 2;
 491                                 value -= '0';
 492                                 while (next >= '0' && next <= '9') {
 493                                         value = (value << 3) + (next-'0');
 494                                         next = nextchar(stream);
 495                                         if (!--nr)
 496                                                 break;
 497                                 }
 498                                 value &= 0xff;
 499                                 break;
 500                         }
 501                         case 'x': {
 502                                 int hex = hexval(next);
 503                                 if (hex < 16) {
 504                                         value = hex;
 505                                         next = nextchar(stream);
 506                                         while ((hex = hexval(next)) < 16) {
 507                                                 value = (value << 4) + hex;
 508                                                 next = nextchar(stream);
 509                                         }
 510                                         value &= 0xff;
 511                                         break;
 512                                 }
 513                         }
 514                         /* Fallthrough */
 515                         default:
 516                                 warning(stream_pos(stream), "Unknown escape '%c'", value);
 517                         }
 518                 }
 519                 /* Mark it as escaped */
 520                 value |= 0x100;
 521         }
 522         *valp = value;
 523         return next;
 524 }
 525
 526 static int get_char_token(int next, stream_t *stream)
 527 {
 528         int value;
 529         struct token *token;
 530
 531         next = escapechar(next, '\'', stream, &value);
 532         if (value == '\'' || next != '\'') {
 533                 warning(stream_pos(stream), "Bad character constant");
 534                 drop_token(stream);
 535                 return next;
 536         }
 537
 538         token = stream->token;
 539         token_type(token) = TOKEN_CHAR;
 540         token->character = value & 0xff;
 541
 542         add_token(stream);
 543         return nextchar(stream);
 544 }
 545
 546 static int get_string_token(int next, stream_t *stream)
 547 {
 548         static char buffer[MAX_STRING];
 549         struct string *string;
 550         struct token *token;
 551         int len = 0;
 552
 553         for (;;) {
 554                 int val;
 555                 next = escapechar(next, '"', stream, &val);
 556                 if (val == '"')
 557                         break;
 558                 if (next == EOF) {
 559                         warning(stream_pos(stream), "End of file in middle of string");
 560                         return next;
 561                 }
 562                 if (len < MAX_STRING)
 563                         buffer[len] = val;
 564                 len++;
 565         }
 566
 567         if (len > MAX_STRING) {
 568                 warning(stream_pos(stream), "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 569                 len = MAX_STRING;
 570         }
 571
 572         string = __alloc_string(len+1);
 573         memcpy(string->data, buffer, len);
 574         string->data[len] = '\0';
 575         string->length = len+1;
 576
 577         /* Pass it on.. */
 578         token = stream->token;
 579         token_type(token) = TOKEN_STRING;
 580         token->string = string;
 581         add_token(stream);
 582
 583         return next;
 584 }
 585
 586 static int drop_stream_eoln(stream_t *stream)
 587 {
 588         int next = nextchar(stream);
 589         drop_token(stream);
 590         for (;;) {
 591                 int curr = next;
 592                 if (curr == EOF)
 593                         return next;
 594                 next = nextchar(stream);
 595                 if (curr == '\n')
 596                         return next;
 597         }
 598 }
 599
 600 static int drop_stream_comment(stream_t *stream)
 601 {
 602         int newline;
 603         int next;
 604         drop_token(stream);
 605         newline = stream->newline;
 606
 607         next = nextchar(stream);
 608         for (;;) {
 609                 int curr = next;
 610                 if (curr == EOF) {
 611                         warning(stream_pos(stream), "End of file in the middle of a comment");
 612                         return curr;
 613                 }
 614                 next = nextchar(stream);
 615                 if (curr == '*' && next == '/')
 616                         break;
 617         }
 618         stream->newline = newline;
 619         return nextchar(stream);
 620 }
 621
 622 unsigned char combinations[][3] = COMBINATION_STRINGS;
 623
 624 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
 625
 626 static int get_one_special(int c, stream_t *stream)
 627 {
 628         struct token *token;
 629         unsigned char c1, c2, c3;
 630         int next, value, i;
 631         char *comb;
 632
 633         next = nextchar(stream);
 634
 635         /*
 636          * Check for numbers, strings, character constants, and comments
 637          */
 638         switch (c) {
 639         case '.':
 640                 if (next >= '0' && next <= '9')
 641                         return get_one_number(c, next, stream);
 642                 break;
 643         case '"':
 644                 return get_string_token(next, stream);
 645         case '\'':
 646                 return get_char_token(next, stream);
 647         case '/':
 648                 if (next == '/')
 649                         return drop_stream_eoln(stream);
 650                 if (next == '*')
 651                         return drop_stream_comment(stream);
 652         }
 653
 654         /*
 655          * Check for combinations
 656          */
 657         value = c;
 658         if (cclass[next + 1] & ValidSecond) {
 659                 comb = combinations[0];
 660                 c1 = c; c2 = next; c3 = 0;
 661                 for (i = 0; i < NR_COMBINATIONS; i++) {
 662                         if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
 663                                 value = i + SPECIAL_BASE;
 664                                 next = nextchar(stream);
 665                                 if (c3)
 666                                         break;
 667                                 c3 = next;
 668                         }
 669                         comb += 3;
 670                 }
 671         }
 672
 673         /* Pass it on.. */
 674         token = stream->token;
 675         token_type(token) = TOKEN_SPECIAL;
 676         token->special = value;
 677         add_token(stream);
 678         return next;
 679 }
 680
 681 #define IDENT_HASH_BITS (13)
 682 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
 683 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
 684
 685 #define ident_hash_init(c)              (c)
 686 #define ident_hash_add(oldhash,c)       ((oldhash)*11 + (c))
 687 #define ident_hash_end(hash)            ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
 688
 689 static struct ident *hash_table[IDENT_HASH_SIZE];
 690 int ident_hit, ident_miss, idents;
 691
 692 void show_identifier_stats(void)
 693 {
 694         int i;
 695         int distribution[100];
 696
 697         fprintf(stderr, "identifiers: %d hits, %d misses\n",
 698                 ident_hit, ident_miss);
 699
 700         for (i = 0; i < 100; i++)
 701                 distribution[i] = 0;
 702
 703         for (i = 0; i < IDENT_HASH_SIZE; i++) {
 704                 struct ident * ident = hash_table[i];
 705                 int count = 0;
 706
 707                 while (ident) {
 708                         count++;
 709                         ident = ident->next;
 710                 }
 711                 if (count > 99)
 712                         count = 99;
 713                 distribution[count]++;
 714         }
 715
 716         for (i = 0; i < 100; i++) {
 717                 if (distribution[i])
 718                         fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
 719         }
 720 }
 721
 722 static struct ident *alloc_ident(const char *name, int len)
 723 {
 724         struct ident *ident = __alloc_ident(len);
 725         ident->symbols = NULL;
 726         ident->len = len;
 727         ident->tainted = 0;
 728         memcpy(ident->name, name, len);
 729         return ident;
 730 }
 731
 732 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
 733 {
 734         ident->next = hash_table[hash];
 735         hash_table[hash] = ident;
 736         ident_miss++;
 737         return ident;
 738 }
 739
 740 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
 741 {
 742         struct ident *ident;
 743         struct ident **p;
 744
 745         p = &hash_table[hash];
 746         while ((ident = *p) != NULL) {
 747                 if (ident->len == (unsigned char) len) {
 748                         const char *n = name;
 749                         const char *m = ident->name;
 750                         int l = len;
 751                         do {
 752                                 if (*n != *m)
 753                                         goto next;
 754                                 n++;
 755                                 m++;
 756                         } while (--l);
 757
 758                         ident_hit++;
 759                         return ident;
 760                 }
 761 next:
 762                 //misses++;
 763                 p = &ident->next;
 764         }
 765         ident = alloc_ident(name, len);
 766         *p = ident;
 767         ident->next = NULL;
 768         ident_miss++;
 769         idents++;
 770         return ident;
 771 }
 772
 773 static unsigned long hash_name(const char *name, int len)
 774 {
 775         unsigned long hash;
 776         const unsigned char *p = (const unsigned char *)name;
 777
 778         hash = ident_hash_init(*p++);
 779         while (--len) {
 780                 unsigned int i = *p++;
 781                 hash = ident_hash_add(hash, i);
 782         }
 783         return ident_hash_end(hash);
 784 }
 785
 786 struct ident *hash_ident(struct ident *ident)
 787 {
 788         return insert_hash(ident, hash_name(ident->name, ident->len));
 789 }
 790
 791 struct ident *built_in_ident(const char *name)
 792 {
 793         int len = strlen(name);
 794         return create_hashed_ident(name, len, hash_name(name, len));
 795 }
 796
 797 struct token *built_in_token(int stream, const char *name)
 798 {
 799         struct token *token;
 800
 801         token = __alloc_token(0);
 802         token->pos.stream = stream;
 803         token_type(token) = TOKEN_IDENT;
 804         token->ident = built_in_ident(name);
 805         return token;
 806 }
 807
 808 static int get_one_identifier(int c, stream_t *stream)
 809 {
 810         struct token *token;
 811         struct ident *ident;
 812         unsigned long hash;
 813         char buf[256];
 814         int len = 1;
 815         int next;
 816
 817         hash = ident_hash_init(c);
 818         buf[0] = c;
 819         for (;;) {
 820                 next = nextchar(stream);
 821                 if (!(cclass[next + 1] & (Letter | Digit)))
 822                         break;
 823                 if (len >= sizeof(buf))
 824                         break;
 825                 hash = ident_hash_add(hash, next);
 826                 buf[len] = next;
 827                 len++;
 828         };
 829         hash = ident_hash_end(hash);
 830
 831         ident = create_hashed_ident(buf, len, hash);
 832
 833         /* Pass it on.. */
 834         token = stream->token;
 835         token_type(token) = TOKEN_IDENT;
 836         token->ident = ident;
 837         add_token(stream);
 838         return next;
 839 }
 840
 841 static int get_one_token(int c, stream_t *stream)
 842 {
 843         long class = cclass[c + 1];
 844         if (class & Digit)
 845                 return get_one_number(c, nextchar(stream), stream);
 846         if (class & Letter)
 847                 return get_one_identifier(c, stream);
 848         return get_one_special(c, stream);
 849 }
 850
 851 static struct token *setup_stream(stream_t *stream, int idx, int fd,
 852         unsigned char *buf, unsigned int buf_size)
 853 {
 854         struct token *begin;
 855
 856         stream->nr = idx;
 857         stream->line = 1;
 858         stream->newline = 1;
 859         stream->whitespace = 0;
 860         stream->pos = 0;
 861
 862         stream->token = NULL;
 863         stream->fd = fd;
 864         stream->offset = 0;
 865         stream->size = buf_size;
 866         stream->buffer = buf;
 867
 868         begin = alloc_token(stream);
 869         token_type(begin) = TOKEN_STREAMBEGIN;
 870         stream->tokenlist = &begin->next;
 871         return begin;
 872 }
 873
 874 static void tokenize_stream(stream_t *stream, struct token *endtoken)
 875 {
 876         int c = nextchar(stream);
 877         while (c != EOF) {
 878                 if (!isspace(c)) {
 879                         struct token *token = alloc_token(stream);
 880                         stream->token = token;
 881                         stream->newline = 0;
 882                         stream->whitespace = 0;
 883                         c = get_one_token(c, stream);
 884                         continue;
 885                 }
 886                 stream->whitespace = 1;
 887                 c = nextchar(stream);
 888         }
 889         mark_eof(stream, endtoken);
 890 }
 891
 892 struct token * tokenize_buffer(unsigned char *buffer, unsigned long size, struct token *endtoken)
 893 {
 894         stream_t stream;
 895         struct token *begin;
 896
 897         begin = setup_stream(&stream, 0, -1, buffer, size);
 898         tokenize_stream(&stream, endtoken);
 899         return begin;
 900 }
 901
 902 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
 903 {
 904         struct token *begin;
 905         stream_t stream;
 906         unsigned char buffer[BUFSIZE];
 907         int idx;
 908
 909         idx = init_stream(name, fd, next_path);
 910         if (idx < 0) {
 911                 // info(endtoken->pos, "File %s is const", name);
 912                 return endtoken;
 913         }
 914
 915         begin = setup_stream(&stream, idx, fd, buffer, 0);
 916         tokenize_stream(&stream, endtoken);
 917         return begin;
 918 }