tokenize.c

   1 /*
   2  * This is a really stupid C tokenizer. It doesn't do any include
   3  * files or anything complex at all. That's the pre-processor.
   4  *
   5  * Copyright (C) 2003 Transmeta Corp.
   6  *               2003 Linus Torvalds
   7  *
   8  *  Licensed under the Open Software License version 1.1
   9  */
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <stdarg.h>
  13 #include <stddef.h>
  14 #include <string.h>
  15 #include <ctype.h>
  16 #include <unistd.h>
  17 #include <sys/stat.h>
  18
  19 #include "lib.h"
  20 #include "token.h"
  21 #include "symbol.h"
  22
  23 #define EOF (-1)
  24
  25 int input_stream_nr = 0;
  26 struct stream *input_streams;
  27 static int input_streams_allocated;
  28
  29 #define BUFSIZE (8192)
  30
  31 typedef struct {
  32         int fd, offset, size;
  33         struct position pos;
  34         struct token **tokenlist;
  35         struct token *token;
  36         unsigned char *buffer;
  37 } stream_t;
  38
  39
  40 const char *show_special(int val)
  41 {
  42         static const char *combinations[] = COMBINATION_STRINGS;
  43         static char buffer[4];
  44
  45         buffer[0] = val;
  46         buffer[1] = 0;
  47         if (val >= SPECIAL_BASE)
  48                 strcpy(buffer, combinations[val - SPECIAL_BASE]);
  49         return buffer;
  50 }
  51
  52 const char *show_ident(const struct ident *ident)
  53 {
  54         static char buffer[256];
  55         if (!ident)
  56                 return "<noident>";
  57         sprintf(buffer, "%.*s", ident->len, ident->name);
  58         return buffer;
  59 }
  60
  61 char *charstr(char *ptr, unsigned char c, unsigned char escape, unsigned char next)
  62 {
  63         if (isprint(c)) {
  64                 if (c == escape || c == '\\')
  65                         *ptr++ = '\\';
  66                 *ptr++ = c;
  67                 return ptr;
  68         }
  69         *ptr++ = '\\';
  70         switch (c) {
  71         case '\n':
  72                 *ptr++ = 'n';
  73                 return ptr;
  74         case '\t':
  75                 *ptr++ = 't';
  76                 return ptr;
  77         }
  78         if (!isdigit(next))
  79                 return ptr + sprintf(ptr, "%o", c);
  80
  81         return ptr + sprintf(ptr, "%03o", c);
  82 }
  83
  84 const char *show_string(const struct string *string)
  85 {
  86         static char buffer[256];
  87         char *ptr;
  88         int i;
  89
  90         ptr = buffer;
  91         *ptr++ = '"';
  92         for (i = 0; i < string->length-1; i++) {
  93                 const unsigned char *p = string->data + i;
  94                 ptr = charstr(ptr, p[0], '"', p[1]);
  95         }
  96         *ptr++ = '"';
  97         *ptr = '\0';
  98         return buffer;
  99 }
 100
 101 const char *show_token(const struct token *token)
 102 {
 103         static char buffer[256];
 104
 105         if (!token)
 106                 return "<no token>";
 107         switch (token_type(token)) {
 108         case TOKEN_ERROR:
 109                 return "syntax error";
 110
 111         case TOKEN_EOF:
 112                 return "end-of-input";
 113
 114         case TOKEN_IDENT:
 115                 return show_ident(token->ident);
 116
 117         case TOKEN_STRING:
 118                 return show_string(token->string);
 119
 120         case TOKEN_NUMBER:
 121                 return token->number;
 122
 123         case TOKEN_SPECIAL:
 124                 return show_special(token->special);
 125
 126         case TOKEN_CHAR: {
 127                 char *ptr = buffer;
 128                 int c = token->character;
 129                 *ptr++ = '\'';
 130                 ptr = charstr(ptr, c, '\'', 0);
 131                 *ptr++ = '\'';
 132                 *ptr++ = '\0';
 133                 return buffer;
 134         }
 135
 136         case TOKEN_STREAMBEGIN:
 137                 sprintf(buffer, "<beginning of '%s'>", (input_streams + token->pos.stream)->name);
 138                 return buffer;
 139
 140         case TOKEN_STREAMEND:
 141                 sprintf(buffer, "<end of '%s'>", (input_streams + token->pos.stream)->name);
 142                 return buffer;
 143
 144         default:
 145                 return "WTF???";
 146         }
 147 }
 148
 149 int init_stream(const char *name, int fd, const char **next_path)
 150 {
 151         int stream = input_stream_nr;
 152         struct stream *current;
 153         struct stat st;
 154
 155         if (stream >= input_streams_allocated) {
 156                 int newalloc = stream * 4 / 3 + 10;
 157                 input_streams = realloc(input_streams, newalloc * sizeof(struct stream));
 158                 if (!input_streams)
 159                         die("Unable to allocate more streams space");
 160                 input_streams_allocated = newalloc;
 161         }
 162         current = input_streams + stream;
 163         memset(current, 0, sizeof(*current));
 164         current->name = name;
 165         current->fd = fd;
 166         current->next_path = next_path;
 167         current->constant = CONSTANT_FILE_MAYBE;
 168         if (fd >= 0 && fstat(fd, &st) == 0 && S_ISREG(st.st_mode)) {
 169                 int i;
 170
 171                 for (i = 0; i < stream; i++) {
 172                         struct stream *s = input_streams + i;
 173                         if (s->constant == CONSTANT_FILE_YES &&
 174                             identical_files(s, &st, name) &&
 175                             lookup_symbol(s->protect, NS_MACRO))
 176                                 return -1;
 177                 }
 178
 179                 current->dev = st.st_dev;
 180                 current->ino = st.st_ino;
 181         }
 182         input_stream_nr = stream+1;
 183         return stream;
 184 }
 185
 186 static struct token * alloc_token(stream_t *stream)
 187 {
 188         struct token *token = __alloc_token(0);
 189         token->pos = stream->pos;
 190         return token;
 191 }
 192
 193 /*
 194  *  Argh...  That was surprisingly messy - handling '\r' complicates the
 195  *  things a _lot_.
 196  */
 197 static int nextchar_slow(stream_t *stream)
 198 {
 199         int offset = stream->offset;
 200         int size = stream->size;
 201         int c;
 202         int spliced = 0, had_cr, had_backslash, complain;
 203
 204 restart:
 205         had_cr = had_backslash = complain = 0;
 206
 207 repeat:
 208         if (offset >= size) {
 209                 size = read(stream->fd, stream->buffer, BUFSIZE);
 210                 if (size <= 0)
 211                         goto got_eof;
 212                 stream->size = size;
 213                 stream->offset = offset = 0;
 214         }
 215
 216         c = stream->buffer[offset++];
 217
 218         if (had_cr && c != '\n')
 219                 complain = 1;
 220
 221         if (c == '\r') {
 222                 had_cr = 1;
 223                 goto repeat;
 224         }
 225
 226         stream->pos.pos++;
 227
 228         if (c == '\n') {
 229                 stream->pos.line++;
 230                 stream->pos.pos = 0;
 231         }
 232
 233         if (!had_backslash) {
 234                 if (c == '\\') {
 235                         had_backslash = 1;
 236                         goto repeat;
 237                 }
 238                 if (c == '\n')
 239                         stream->pos.newline = 1;
 240         } else {
 241                 if (c == '\n') {
 242                         if (complain)
 243                                 warning(stream->pos, "non-ASCII data stream");
 244                         spliced = 1;
 245                         goto restart;
 246                 }
 247                 stream->pos.pos--;
 248                 offset--;
 249                 c = '\\';
 250         }
 251
 252 out:
 253         stream->offset = offset;
 254         if (complain)
 255                 warning(stream->pos, "non-ASCII data stream");
 256
 257         return c;
 258
 259 got_eof:
 260         if (had_backslash) {
 261                 c = '\\';
 262                 goto out;
 263         }
 264         if (stream->pos.pos)
 265                 warning(stream->pos, "no newline at end of file");
 266         else if (had_cr)
 267                 warning(stream->pos, "non-ASCII data stream");
 268         else if (spliced)
 269                 warning(stream->pos, "backslash-newline at end of file");
 270         return EOF;
 271 }
 272
 273 /*
 274  *  We want that as light as possible while covering all normal cases.
 275  *  Slow path (including the logics with line-splicing and EOF sanity
 276  *  checks) is in nextchar_slow().
 277  */
 278 static inline int nextchar(stream_t *stream)
 279 {
 280         int offset = stream->offset;
 281
 282         if (offset < stream->size) {
 283                 int c = stream->buffer[offset++];
 284                 unsigned char next;
 285                 switch (c) {
 286                 case '\r':
 287                         break;
 288                 case '\n':
 289                         stream->offset = offset;
 290                         stream->pos.line++;
 291                         stream->pos.newline = 1;
 292                         stream->pos.pos = 0;
 293                         return '\n';
 294                 case '\\':
 295                         if (offset >= stream->size)
 296                                 break;
 297                         next = stream->buffer[offset];
 298                         if (next == '\n' || next == '\r')
 299                                 break;
 300                         /* fallthru */
 301                 default:
 302                         stream->offset = offset;
 303                         stream->pos.pos++;
 304                         return c;
 305                 }
 306         }
 307         return nextchar_slow(stream);
 308 }
 309
 310 struct token eof_token_entry;
 311
 312 static void mark_eof(stream_t *stream, struct token *end_token)
 313 {
 314         struct token *end;
 315
 316         end = alloc_token(stream);
 317         token_type(end) = TOKEN_STREAMEND;
 318         end->pos.newline = 1;
 319
 320         eof_token_entry.next = &eof_token_entry;
 321         eof_token_entry.pos.newline = 1;
 322
 323         if (!end_token)
 324                 end_token =  &eof_token_entry;
 325         end->next = end_token;
 326         *stream->tokenlist = end;
 327         stream->tokenlist = NULL;
 328 }
 329
 330 static void add_token(stream_t *stream)
 331 {
 332         struct token *token = stream->token;
 333
 334         stream->token = NULL;
 335         token->next = NULL;
 336         *stream->tokenlist = token;
 337         stream->tokenlist = &token->next;
 338 }
 339
 340 static void drop_token(stream_t *stream)
 341 {
 342         stream->pos.newline |= stream->token->pos.newline;
 343         stream->pos.whitespace |= stream->token->pos.whitespace;
 344         stream->token = NULL;
 345 }
 346
 347 enum {
 348         Letter = 1,
 349         Digit = 2,
 350         Hex = 4,
 351         Exp = 8,
 352         Dot = 16,
 353         ValidSecond = 32,
 354 };
 355
 356 static const long cclass[257] = {
 357         ['0' + 1 ... '9' + 1] = Digit | Hex,
 358         ['A' + 1 ... 'D' + 1] = Letter | Hex,
 359         ['E' + 1] = Letter | Hex | Exp,
 360         ['F' + 1] = Letter | Hex,
 361         ['G' + 1 ... 'O' + 1] = Letter,
 362         ['P' + 1] = Letter | Exp,
 363         ['Q' + 1 ... 'Z' + 1] = Letter,
 364         ['a' + 1 ... 'd' + 1] = Letter | Hex,
 365         ['e' + 1] = Letter | Hex | Exp,
 366         ['f' + 1] = Letter | Hex,
 367         ['g' + 1 ... 'o' + 1] = Letter,
 368         ['p' + 1] = Letter | Exp,
 369         ['q' + 1 ... 'z' + 1] = Letter,
 370         ['_' + 1] = Letter,
 371         ['.' + 1] = Dot | ValidSecond,
 372         ['=' + 1] = ValidSecond,
 373         ['+' + 1] = ValidSecond,
 374         ['-' + 1] = ValidSecond,
 375         ['>' + 1] = ValidSecond,
 376         ['<' + 1] = ValidSecond,
 377         ['&' + 1] = ValidSecond,
 378         ['|' + 1] = ValidSecond,
 379         ['#' + 1] = ValidSecond,
 380 };
 381
 382 /*
 383  * pp-number:
 384  *      digit
 385  *      . digit
 386  *      pp-number digit
 387  *      pp-number identifier-nodigit
 388  *      pp-number e sign
 389  *      pp-number E sign
 390  *      pp-number p sign
 391  *      pp-number P sign
 392  *      pp-number .
 393  */
 394 static int get_one_number(int c, int next, stream_t *stream)
 395 {
 396         struct token *token;
 397         static char buffer[256];
 398         char *p = buffer, *buf;
 399         int len;
 400
 401         *p++ = c;
 402         for (;;) {
 403                 long class =  cclass[next + 1];
 404                 if (!(class & (Dot | Digit | Letter)))
 405                         break;
 406                 *p++ = next;
 407                 next = nextchar(stream);
 408                 if (class & Exp) {
 409                         if (next == '-' || next == '+') {
 410                                 *p++ = next;
 411                                 next = nextchar(stream);
 412                         }
 413                 }
 414         }
 415         *p++ = 0;
 416         len = p - buffer;
 417         buf = __alloc_bytes(len);
 418         memcpy(buf, buffer, len);
 419
 420         token = stream->token;
 421         token_type(token) = TOKEN_NUMBER;
 422         token->number = buf;
 423         add_token(stream);
 424
 425         return next;
 426 }
 427
 428 static int escapechar(int first, int type, stream_t *stream, int *valp)
 429 {
 430         int next, value;
 431
 432         next = nextchar(stream);
 433         value = first;
 434
 435         if (first == '\n')
 436                 warning(stream->pos, "Newline in string or character constant");
 437
 438         if (first == '\\' && next != EOF) {
 439                 value = next;
 440                 next = nextchar(stream);
 441                 if (value != type) {
 442                         switch (value) {
 443                         case 'a':
 444                                 value = '\a';
 445                                 break;
 446                         case 'b':
 447                                 value = '\b';
 448                                 break;
 449                         case 't':
 450                                 value = '\t';
 451                                 break;
 452                         case 'n':
 453                                 value = '\n';
 454                                 break;
 455                         case 'v':
 456                                 value = '\v';
 457                                 break;
 458                         case 'f':
 459                                 value = '\f';
 460                                 break;
 461                         case 'r':
 462                                 value = '\r';
 463                                 break;
 464                         case 'e':
 465                                 value = '\e';
 466                                 break;
 467                         case '\\':
 468                                 break;
 469                         case '\'':
 470                                 break;
 471                         case '"':
 472                                 break;
 473                         case '\n':
 474                                 warning(stream->pos, "Newline in string or character constant");
 475                                 break;
 476                         case '0'...'7': {
 477                                 int nr = 2;
 478                                 value -= '0';
 479                                 while (next >= '0' && next <= '9') {
 480                                         value = (value << 3) + (next-'0');
 481                                         next = nextchar(stream);
 482                                         if (!--nr)
 483                                                 break;
 484                                 }
 485                                 value &= 0xff;
 486                                 break;
 487                         }
 488                         case 'x': {
 489                                 int hex = hexval(next);
 490                                 if (hex < 16) {
 491                                         value = hex;
 492                                         next = nextchar(stream);
 493                                         while ((hex = hexval(next)) < 16) {
 494                                                 value = (value << 4) + hex;
 495                                                 next = nextchar(stream);
 496                                         }
 497                                         value &= 0xff;
 498                                         break;
 499                                 }
 500                         }
 501                         /* Fallthrough */
 502                         default:
 503                                 warning(stream->pos, "Unknown escape '%c'", value);
 504                         }
 505                 }
 506                 /* Mark it as escaped */
 507                 value |= 0x100;
 508         }
 509         *valp = value;
 510         return next;
 511 }
 512
 513 static int get_char_token(int next, stream_t *stream)
 514 {
 515         int value;
 516         struct token *token;
 517
 518         next = escapechar(next, '\'', stream, &value);
 519         if (value == '\'' || next != '\'') {
 520                 warning(stream->pos, "Bad character constant");
 521                 drop_token(stream);
 522                 return next;
 523         }
 524
 525         token = stream->token;
 526         token_type(token) = TOKEN_CHAR;
 527         token->character = value & 0xff;
 528
 529         add_token(stream);
 530         return nextchar(stream);
 531 }
 532
 533 static int get_string_token(int next, stream_t *stream)
 534 {
 535         static char buffer[MAX_STRING];
 536         struct string *string;
 537         struct token *token;
 538         int len = 0;
 539
 540         for (;;) {
 541                 int val;
 542                 next = escapechar(next, '"', stream, &val);
 543                 if (val == '"')
 544                         break;
 545                 if (next == EOF) {
 546                         warning(stream->pos, "End of file in middle of string");
 547                         return next;
 548                 }
 549                 if (len < MAX_STRING)
 550                         buffer[len] = val;
 551                 len++;
 552         }
 553
 554         if (len > MAX_STRING) {
 555                 warning(stream->pos, "string too long (%d bytes, %d bytes max)", len, MAX_STRING);
 556                 len = MAX_STRING;
 557         }
 558
 559         string = __alloc_string(len+1);
 560         memcpy(string->data, buffer, len);
 561         string->data[len] = '\0';
 562         string->length = len+1;
 563
 564         /* Pass it on.. */
 565         token = stream->token;
 566         token_type(token) = TOKEN_STRING;
 567         token->string = string;
 568         add_token(stream);
 569
 570         return next;
 571 }
 572
 573 static int drop_stream_eoln(stream_t *stream)
 574 {
 575         int next = nextchar(stream);
 576         drop_token(stream);
 577         for (;;) {
 578                 int curr = next;
 579                 if (curr == EOF)
 580                         return next;
 581                 next = nextchar(stream);
 582                 if (curr == '\n')
 583                         return next;
 584         }
 585 }
 586
 587 static int drop_stream_comment(stream_t *stream)
 588 {
 589         int newline;
 590         int next;
 591         drop_token(stream);
 592         newline = stream->pos.newline;
 593
 594         next = nextchar(stream);
 595         for (;;) {
 596                 int curr = next;
 597                 if (curr == EOF) {
 598                         warning(stream->pos, "End of file in the middle of a comment");
 599                         return curr;
 600                 }
 601                 next = nextchar(stream);
 602                 if (curr == '*' && next == '/')
 603                         break;
 604         }
 605         stream->pos.newline = newline;
 606         return nextchar(stream);
 607 }
 608
 609 unsigned char combinations[][3] = COMBINATION_STRINGS;
 610
 611 #define NR_COMBINATIONS (SPECIAL_ARG_SEPARATOR - SPECIAL_BASE)
 612
 613 static int get_one_special(int c, stream_t *stream)
 614 {
 615         struct token *token;
 616         unsigned char c1, c2, c3;
 617         int next, value, i;
 618         char *comb;
 619
 620         next = nextchar(stream);
 621
 622         /*
 623          * Check for numbers, strings, character constants, and comments
 624          */
 625         switch (c) {
 626         case '.':
 627                 if (next >= '0' && next <= '9')
 628                         return get_one_number(c, next, stream);
 629                 break;
 630         case '"':
 631                 return get_string_token(next, stream);
 632         case '\'':
 633                 return get_char_token(next, stream);
 634         case '/':
 635                 if (next == '/')
 636                         return drop_stream_eoln(stream);
 637                 if (next == '*')
 638                         return drop_stream_comment(stream);
 639         }
 640
 641         /*
 642          * Check for combinations
 643          */
 644         value = c;
 645         if (cclass[next + 1] & ValidSecond) {
 646                 comb = combinations[0];
 647                 c1 = c; c2 = next; c3 = 0;
 648                 for (i = 0; i < NR_COMBINATIONS; i++) {
 649                         if (comb[0] == c1 && comb[1] == c2 && comb[2] == c3) {
 650                                 value = i + SPECIAL_BASE;
 651                                 next = nextchar(stream);
 652                                 if (c3)
 653                                         break;
 654                                 c3 = next;
 655                         }
 656                         comb += 3;
 657                 }
 658         }
 659
 660         /* Pass it on.. */
 661         token = stream->token;
 662         token_type(token) = TOKEN_SPECIAL;
 663         token->special = value;
 664         add_token(stream);
 665         return next;
 666 }
 667
 668 #define IDENT_HASH_BITS (10)
 669 #define IDENT_HASH_SIZE (1<<IDENT_HASH_BITS)
 670 #define IDENT_HASH_MASK (IDENT_HASH_SIZE-1)
 671
 672 #define ident_hash_init(c)              (c)
 673 #define ident_hash_add(oldhash,c)       ((oldhash)*11 + (c))
 674 #define ident_hash_end(hash)            ((((hash) >> IDENT_HASH_BITS) + (hash)) & IDENT_HASH_MASK)
 675
 676 static struct ident *hash_table[IDENT_HASH_SIZE];
 677 int ident_hit, ident_miss;
 678
 679 void show_identifier_stats(void)
 680 {
 681         int i;
 682         int distribution[100];
 683
 684         fprintf(stderr, "identifiers: %d hits, %d misses\n",
 685                 ident_hit, ident_miss);
 686
 687         for (i = 0; i < 100; i++)
 688                 distribution[i] = 0;
 689
 690         for (i = 0; i < IDENT_HASH_SIZE; i++) {
 691                 struct ident * ident = hash_table[i];
 692                 int count = 0;
 693
 694                 while (ident) {
 695                         count++;
 696                         ident = ident->next;
 697                 }
 698                 if (count > 99)
 699                         count = 99;
 700                 distribution[count]++;
 701         }
 702
 703         for (i = 0; i < 100; i++) {
 704                 if (distribution[i])
 705                         fprintf(stderr, "%2d: %d buckets\n", i, distribution[i]);
 706         }
 707 }
 708
 709 static struct ident *alloc_ident(const char *name, int len)
 710 {
 711         struct ident *ident = __alloc_ident(len);
 712         ident->symbols = NULL;
 713         ident->len = len;
 714         ident->tainted = 0;
 715         memcpy(ident->name, name, len);
 716         return ident;
 717 }
 718
 719 static struct ident * insert_hash(struct ident *ident, unsigned long hash)
 720 {
 721         ident->next = hash_table[hash];
 722         hash_table[hash] = ident;
 723         ident_miss++;
 724         return ident;
 725 }
 726
 727 static struct ident *create_hashed_ident(const char *name, int len, unsigned long hash)
 728 {
 729         struct ident *ident;
 730         struct ident **p;
 731
 732         p = &hash_table[hash];
 733         while ((ident = *p) != NULL) {
 734                 if (ident->len == len && !memcmp(ident->name, name, len)) {
 735                         ident_hit++;
 736                         return ident;
 737                 }
 738                 //misses++;
 739                 p = &ident->next;
 740         }
 741         ident = alloc_ident(name, len);
 742         *p = ident;
 743         ident->next = NULL;
 744         ident_miss++;
 745         return ident;
 746 }
 747
 748 static unsigned long hash_name(const char *name, int len)
 749 {
 750         unsigned long hash;
 751         const unsigned char *p = (const unsigned char *)name;
 752
 753         hash = ident_hash_init(*p++);
 754         while (--len) {
 755                 unsigned int i = *p++;
 756                 hash = ident_hash_add(hash, i);
 757         }
 758         return ident_hash_end(hash);
 759 }
 760
 761 struct ident *hash_ident(struct ident *ident)
 762 {
 763         return insert_hash(ident, hash_name(ident->name, ident->len));
 764 }
 765
 766 struct ident *built_in_ident(const char *name)
 767 {
 768         int len = strlen(name);
 769         return create_hashed_ident(name, len, hash_name(name, len));
 770 }
 771
 772 struct token *built_in_token(int stream, const char *name)
 773 {
 774         struct token *token;
 775
 776         token = __alloc_token(0);
 777         token->pos.stream = stream;
 778         token_type(token) = TOKEN_IDENT;
 779         token->ident = built_in_ident(name);
 780         return token;
 781 }
 782
 783 static int get_one_identifier(int c, stream_t *stream)
 784 {
 785         struct token *token;
 786         struct ident *ident;
 787         unsigned long hash;
 788         char buf[256];
 789         int len = 1;
 790         int next;
 791
 792         hash = ident_hash_init(c);
 793         buf[0] = c;
 794         for (;;) {
 795                 next = nextchar(stream);
 796                 if (!(cclass[next + 1] & (Letter | Digit)))
 797                         break;
 798                 if (len >= sizeof(buf))
 799                         break;
 800                 hash = ident_hash_add(hash, next);
 801                 buf[len] = next;
 802                 len++;
 803         };
 804         hash = ident_hash_end(hash);
 805
 806         ident = create_hashed_ident(buf, len, hash);
 807
 808         /* Pass it on.. */
 809         token = stream->token;
 810         token_type(token) = TOKEN_IDENT;
 811         token->ident = ident;
 812         add_token(stream);
 813         return next;
 814 }
 815
 816 static int get_one_token(int c, stream_t *stream)
 817 {
 818         long class = cclass[c + 1];
 819         if (class & Digit)
 820                 return get_one_number(c, nextchar(stream), stream);
 821         if (class & Letter)
 822                 return get_one_identifier(c, stream);
 823         return get_one_special(c, stream);
 824 }
 825
 826 static struct token *setup_stream(stream_t *stream, int idx, int fd,
 827         unsigned char *buf, unsigned int buf_size)
 828 {
 829         struct token *begin;
 830
 831         stream->pos.stream = idx;
 832         stream->pos.line = 1;
 833         stream->pos.newline = 1;
 834         stream->pos.whitespace = 0;
 835         stream->pos.pos = 0;
 836         stream->pos.noexpand = 0;
 837
 838         stream->token = NULL;
 839         stream->fd = fd;
 840         stream->offset = 0;
 841         stream->size = buf_size;
 842         stream->buffer = buf;
 843
 844         begin = alloc_token(stream);
 845         token_type(begin) = TOKEN_STREAMBEGIN;
 846         stream->tokenlist = &begin->next;
 847         return begin;
 848 }
 849
 850 static void tokenize_stream(stream_t *stream, struct token *endtoken)
 851 {
 852         int c = nextchar(stream);
 853         while (c != EOF) {
 854                 if (!isspace(c)) {
 855                         struct token *token = alloc_token(stream);
 856                         stream->token = token;
 857                         stream->pos.newline = 0;
 858                         stream->pos.whitespace = 0;
 859                         c = get_one_token(c, stream);
 860                         continue;
 861                 }
 862                 stream->pos.whitespace = 1;
 863                 c = nextchar(stream);
 864         }
 865         mark_eof(stream, endtoken);
 866 }
 867
 868 struct token * tokenize_buffer(unsigned char *buffer, unsigned long size, struct token *endtoken)
 869 {
 870         stream_t stream;
 871         struct token *begin;
 872
 873         begin = setup_stream(&stream, 0, -1, buffer, size);
 874         tokenize_stream(&stream, endtoken);
 875         return begin;
 876 }
 877
 878 struct token * tokenize(const char *name, int fd, struct token *endtoken, const char **next_path)
 879 {
 880         struct token *begin;
 881         stream_t stream;
 882         unsigned char buffer[BUFSIZE];
 883         int idx;
 884
 885         idx = init_stream(name, fd, next_path);
 886         if (idx < 0) {
 887                 // info(endtoken->pos, "File %s is const", name);
 888                 return endtoken;
 889         }
 890
 891         begin = setup_stream(&stream, idx, fd, buffer, 0);
 892         tokenize_stream(&stream, endtoken);
 893         return begin;
 894 }