acc: print ->str for debugging
[acc.git] / main.c
blobef6a90533054343d1e45530a126b1d4b2dad3337
1 /* Alexey's C compiler. */
2 #include <stdlib.h>
3 #include <sys/types.h>
4 #include <sys/stat.h>
5 #include <fcntl.h>
6 #include <stdio.h>
7 #include <errno.h>
8 #include <unistd.h>
9 #include <stdarg.h>
10 #include <stdint.h>
11 #include <inttypes.h>
12 #include <string.h>
13 #include <limits.h>
15 struct pos {
16 unsigned int line, column;
19 #ifdef __GNUC__
20 #define __noreturn __attribute((noreturn))
21 #define __printf(a, b) __attribute__((format(printf, a, b)))
22 #else
23 #define __noreturn
24 #define __printf(a, b)
25 #endif
27 static void warning(struct pos *pos, const char *fmt, ...) __printf(2, 3);
28 static void warning(struct pos *pos, const char *fmt, ...)
30 va_list args;
32 fprintf(stderr, "%u:%u: warning: ", pos->line, pos->column);
33 va_start(args, fmt);
34 vfprintf(stderr, fmt, args);
35 va_end(args);
36 fputc('\n', stderr);
39 static void error_exit(struct pos *pos, const char *fmt, ...) __printf(2, 3) __noreturn;
40 static void error_exit(struct pos *pos, const char *fmt, ...)
42 va_list args;
44 fprintf(stderr, "%u:%u: error: ", pos->line, pos->column);
45 va_start(args, fmt);
46 vfprintf(stderr, fmt, args);
47 va_end(args);
48 fputc('\n', stderr);
49 exit(EXIT_FAILURE);
52 static void perror_exit(const char *fmt, ...) __printf(1, 2) __noreturn;
53 static void perror_exit(const char *fmt, ...)
55 int old_errno = errno;
56 va_list args;
58 fputs("acc: ", stderr);
59 va_start(args, fmt);
60 vfprintf(stderr, fmt, args);
61 va_end(args);
62 fputs(": ", stderr);
63 errno = old_errno;
64 perror(NULL);
65 exit(EXIT_FAILURE);
68 static void _error_exit(const char *fmt, ...) __printf(1, 2) __noreturn;
69 static void _error_exit(const char *fmt, ...)
71 va_list args;
73 fputs("acc: error: ", stderr);
74 va_start(args, fmt);
75 vfprintf(stderr, fmt, args);
76 va_end(args);
77 fputc('\n', stderr);
78 exit(EXIT_FAILURE);
81 static void *xmalloc(size_t size)
83 void *p;
85 p = malloc(size);
86 if (!p)
87 perror_exit("%s: size %zu", __func__, size);
88 return p;
91 static void *xmemdup(const void *src, size_t n)
93 void *dst;
95 dst = xmalloc(n);
96 memcpy(dst, src, n);
97 return dst;
100 static ssize_t _xread(int fd, void *buf, size_t count)
102 ssize_t rv;
104 do {
105 rv = read(fd, buf, count);
106 } while (rv == -1 && (errno == EAGAIN || errno == EINTR));
107 return rv;
110 static void xread(int fd, void *buf, size_t count)
112 while (count > 0) {
113 ssize_t rv;
115 rv = _xread(fd, buf, count);
116 if (rv == -1)
117 perror_exit("read fd %d, buf %p, count %zu", fd, buf, count);
118 if (rv == 0)
119 _error_exit("fd %d truncated, buf %p, count %zu", fd, buf, count);
121 buf = (char *)buf + rv;
122 count -= rv;
126 static void fix_newline(char *c, unsigned int *nr_c)
128 unsigned int i;
130 i = 0;
131 while (i < *nr_c) {
132 if (c[i] == '\r' && i + 1 < *nr_c && c[i + 1] == '\n') {
133 memmove(&c[i], &c[i + 1], *nr_c - i - 1);
134 (*nr_c)--;
135 } else if (c[i] == '\r') {
136 c[i] = '\n';
137 i++;
138 } else
139 i++;
143 static struct pos *line_column(const char *c, unsigned int nr_c)
145 struct pos *pos;
146 unsigned int line, column;
147 unsigned int i;
149 if (nr_c >= 0xffffffff / sizeof(struct pos))
150 _error_exit("integer overflow nr_c %u", nr_c);
151 pos = xmalloc(nr_c * sizeof(struct pos));
153 line = 1;
154 column = 1;
155 for (i = 0; i < nr_c; i++) {
156 pos[i].line = line;
157 pos[i].column = column;
159 if (c[i] == '\n') {
160 line++;
161 column = 1;
162 } else
163 column++;
165 return pos;
168 static void warn_trigraph(const char *c, unsigned int nr_c, struct pos *pos)
170 unsigned int i;
172 i = 0;
173 while (i + 2 < nr_c) {
174 if (c[i] == '?' && c[i + 1] == '?') {
175 switch (c[i + 2]) {
176 case '=':case ')':case '!':
177 case '(':case '\'':case '>':
178 case '/':case '<':case '-':
179 warning(&pos[i], "trigraph sequence ??%c, ignoring", (unsigned char)c[i + 2]);
180 i += 3;
181 break;
182 default:
183 i++;
185 } else
186 i++;
190 static void delete_backslash_newline(char *c, unsigned int *nr_c, struct pos *pos)
192 unsigned int i;
194 i = 0;
195 while (i + 1 < *nr_c) {
196 if (c[i] == '\\' && c[i + 1] == '\n') {
197 unsigned int nr_to_move = *nr_c - i - 2;
199 memmove(&c[i], &c[i + 2], nr_to_move);
200 memmove(&pos[i], &pos[i + 2], nr_to_move * sizeof(struct pos));
201 (*nr_c) -= 2;
202 } else
203 i++;
207 struct pp_token {
208 struct pp_token *next;
209 enum pp_token_type {
210 PP_TOKEN_IDENTIFIER = UCHAR_MAX + 1,
211 PP_TOKEN_NUMBER,
212 PP_TOKEN_STRING,
213 PP_TOKEN_CHAR,
215 #define _2(c1, c2) (((c1) << 8) | (c2))
216 #define _3(c1, c2, c3) (((c1) << 16)| ((c2) << 8) | (c3))
217 PP_TOKEN_DOTDOTDOT = _3('.', '.', '.'),
218 PP_TOKEN_DEREFERENCE = _2('-', '>'),
219 PP_TOKEN_SUB_EQ = _2('-', '='),
220 PP_TOKEN_DEC = _2('-', '-'),
221 PP_TOKEN_ADD_EQ = _2('+', '='),
222 PP_TOKEN_INC = _2('+', '+'),
223 PP_TOKEN_AND_EQ = _2('&', '='),
224 PP_TOKEN_AND = _2('&', '&'),
225 PP_TOKEN_MUL_EQ = _2('*', '='),
226 PP_TOKEN_NOT_EQ = _2('!', '='),
227 PP_TOKEN_DIV_EQ = _2('/', '='),
228 PP_TOKEN_REM_EQ = _2('%', '='),
229 PP_TOKEN_LSHIFT_EQ = _3('<', '<', '='),
230 PP_TOKEN_LSHIFT = _2('<', '<'),
231 PP_TOKEN_LEQ = _2('<', '='),
232 PP_TOKEN_RSHIFT_EQ = _3('>', '>', '='),
233 PP_TOKEN_RSHIFT = _2('>', '>'),
234 PP_TOKEN_GEQ = _2('>', '='),
235 PP_TOKEN_EQ = _2('=', '='),
236 PP_TOKEN_XOR_EQ = _2('^', '='),
237 PP_TOKEN_OR_EQ = _2('|', '='),
238 PP_TOKEN_OR = _2('|', '|'),
239 PP_TOKEN_SHARPSHARP = _2('#', '#'),
240 #undef _2
241 #undef _3
242 } type;
243 char *str; /* string representation, if type is not enough */
244 struct pos pos;
247 static struct pp_token *pp_token_create(enum pp_token_type type, struct pos *pos)
249 struct pp_token *ppt;
251 ppt = xmalloc(sizeof(struct pp_token));
252 ppt->next = NULL;
253 ppt->type = type;
254 ppt->str = NULL;
255 ppt->pos = *pos;
256 return ppt;
259 /* [start, end) */
260 static void pp_token_add(struct pp_token *ppt, const char *c, unsigned int start, unsigned int end)
262 ppt->str = xmemdup(&c[start], end - start);
265 static void pp_token_free(struct pp_token *ppt_head)
267 struct pp_token *ppt;
269 ppt = ppt_head;
270 while (ppt) {
271 struct pp_token *next;
273 next = ppt->next;
274 if (ppt->str)
275 free(ppt->str);
276 free(ppt);
277 ppt = next;
281 static void pp_token_print(struct pp_token *ppt)
283 printf("%u:%u:\t", ppt->pos.line, ppt->pos.column);
284 switch (ppt->type) {
285 case '\n':
286 printf("\\n");
287 break;
288 case ' ':
289 printf("' '");
290 break;
291 case PP_TOKEN_IDENTIFIER:
292 printf("pp-identifier %s", ppt->str);
293 break;
294 case PP_TOKEN_NUMBER:
295 printf("pp-number %s", ppt->str);
296 break;
297 case PP_TOKEN_STRING:
298 printf("pp-string \"%s\"", ppt->str);
299 break;
300 case PP_TOKEN_CHAR:
301 printf("pp-char '%s'", ppt->str);
302 break;
303 case PP_TOKEN_DOTDOTDOT:
304 case PP_TOKEN_LSHIFT_EQ:
305 case PP_TOKEN_RSHIFT_EQ:
306 printf("%c%c%c", (ppt->type >> 16) & 0xff, (ppt->type >> 8) & 0xff, ppt->type & 0xff);
307 break;
308 case PP_TOKEN_DEREFERENCE:
309 case PP_TOKEN_SUB_EQ:
310 case PP_TOKEN_DEC:
311 case PP_TOKEN_ADD_EQ:
312 case PP_TOKEN_INC:
313 case PP_TOKEN_AND_EQ:
314 case PP_TOKEN_AND:
315 case PP_TOKEN_MUL_EQ:
316 case PP_TOKEN_NOT_EQ:
317 case PP_TOKEN_DIV_EQ:
318 case PP_TOKEN_REM_EQ:
319 case PP_TOKEN_LSHIFT:
320 case PP_TOKEN_LEQ:
321 case PP_TOKEN_RSHIFT:
322 case PP_TOKEN_GEQ:
323 case PP_TOKEN_EQ:
324 case PP_TOKEN_XOR_EQ:
325 case PP_TOKEN_OR_EQ:
326 case PP_TOKEN_OR:
327 case PP_TOKEN_SHARPSHARP:
328 printf("%c%c", (ppt->type >> 8) & 0xff, ppt->type & 0xff);
329 break;
330 default:
331 printf("%c", ppt->type);
333 putc('\n', stdout);
336 static int pp_nondigit(const char c)
338 return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
341 static int pp_octdigit(const char c)
343 return '0' <= c && c <= '7';
346 static int pp_digit(const char c)
348 return '0' <= c && c <= '9';
351 static int pp_hexdigit(const char c)
353 return pp_digit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
356 static unsigned int _pp_identifier_end(const char *c, unsigned int nr_c, unsigned int start)
358 unsigned int i;
360 i = start;
361 while (i < nr_c) {
362 if (pp_nondigit(c[i]) || pp_digit(c[i])) {
363 i++;
364 } else if (i + 5 < nr_c && c[i] == '\\' && c[i + 1] == 'u' &&
365 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5])) {
366 i += 2 + 4;
367 } else if (i + 9 < nr_c && c[i] == '\\' && c[i + 1] == 'U' &&
368 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5]) &&
369 pp_hexdigit(c[i + 6]) && pp_hexdigit(c[i + 7]) && pp_hexdigit(c[i + 8]) && pp_hexdigit(c[i + 9])) {
370 i += 2 + 4 + 4;
371 } else
372 return i;
374 return i;
377 static unsigned int pp_number_end(const char *c, unsigned int nr_c, unsigned int start)
379 unsigned int i;
381 i = start + 1;
382 while (i < nr_c) {
383 if ((c[i] == 'e' || c[i] == 'E' || c[i] == 'p' || c[i] == 'P') &&
384 i + 1 < nr_c && (c[i + 1] == '+' || c[i + 1] == '-')) {
385 i += 2;
386 } else if (pp_digit(c[i]) || pp_nondigit(c[i]) || c[i] == '.') {
387 i++;
388 } else if (c[i] == '\\' && i + 5 < nr_c && c[i + 1] == 'u' &&
389 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5])) {
390 i += 2 + 4;
391 } else if (c[i] == '\\' && i + 9 < nr_c && c[i + 1] == 'U' &&
392 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5]) &&
393 pp_hexdigit(c[i + 6]) && pp_hexdigit(c[i + 7]) && pp_hexdigit(c[i + 8]) && pp_hexdigit(c[i + 9])) {
394 i += 2 + 4 + 4;
395 } else
396 return i;
398 return i;
401 static unsigned int c_comment_end(const char *c, unsigned int nr_c, unsigned int start)
403 unsigned int i;
405 i = start + 2;
406 while (i + 1 < nr_c) {
407 if (c[i] == '*' && c[i + 1] == '/')
408 return i + 2;
409 i++;
411 return nr_c;
414 static unsigned int cpp_comment_end(const char *c, unsigned int nr_c, unsigned int start)
416 unsigned int i;
418 i = start + 2;
419 while (i < nr_c && c[i] != '\n')
420 i++;
421 return i;
424 static unsigned int escape_sequence_end(const char *c, unsigned int nr_c, unsigned int start, struct pos *_pos)
426 struct pos *pos = &_pos[start];
427 unsigned int i;
429 i = start + 1;
430 if (i >= nr_c)
431 error_exit(pos, "incomplete escape sequence");
432 switch (c[i]) {
433 case '\'':case '"':case '?':case '\\':
434 case 'a':case 'b':case 'f':case 'n':case 'r':case 't':case 'v':
435 return i + 1;
436 case '0':case '1':case '2':case '3':case '4':case '6':case '7':
437 if (i + 2 < nr_c && pp_octdigit(c[i + 1]) && pp_octdigit(c[i + 2]))
438 return i + 3;
439 if (i + 1 < nr_c && pp_octdigit(c[i + 1]))
440 return i + 2;
441 return i + 1;
442 case 'x':
443 i++;
444 while (i < nr_c && pp_hexdigit(c[i]))
445 i++;
446 if (i == start + 2)
447 error_exit(pos, "invalid hexadecimal escape sequence");
448 return i;
449 case 'u':
450 if (i + 4 < nr_c &&
451 pp_hexdigit(c[i + 1]) && pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]))
452 return i + 5;
453 error_exit(pos, "invalid universal character name");
454 case 'U':
455 if (i + 8 < nr_c &&
456 pp_hexdigit(c[i + 1]) && pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) &&
457 pp_hexdigit(c[i + 5]) && pp_hexdigit(c[i + 6]) && pp_hexdigit(c[i + 7]) && pp_hexdigit(c[i + 8]))
458 return i + 9;
459 error_exit(pos, "invalid universal character name");
460 default:
461 error_exit(pos, "invalid escape sequence");
465 static unsigned int pp_string_end(const char *c, unsigned int nr_c, unsigned int start, struct pos *_pos)
467 struct pos *pos = &_pos[start];
468 unsigned int i;
470 i = start + 1;
471 while (i < nr_c && c[i] != '"') {
472 switch (c[i]) {
473 case '\n':
474 goto incomplete;
475 case '\\':
476 i = escape_sequence_end(c, nr_c, i, _pos);
477 break;
478 default:
479 i++;
482 if (i >= nr_c)
483 goto incomplete;
484 return i + 1;
486 incomplete:
487 error_exit(pos, "incomplete string literal");
490 static unsigned int pp_char_end(const char *c, unsigned int nr_c, unsigned int start, struct pos *_pos)
492 struct pos *pos = &_pos[start];
493 unsigned int i;
495 i = start + 1;
496 while (i < nr_c && c[i] != '\'') {
497 switch (c[i]) {
498 case '\n':
499 goto incomplete;
500 case '\\':
501 i = escape_sequence_end(c, nr_c, i, _pos);
502 break;
503 default:
504 i++;
507 if (i >= nr_c)
508 goto incomplete;
509 if (i == start + 1)
510 goto empty;
511 return i + 1;
513 incomplete:
514 error_exit(pos, "incomplete character constant");
515 empty:
516 error_exit(pos, "empty character constant");
519 static unsigned int whitespace_end(const char *c, unsigned int nr_c, unsigned int start)
521 unsigned int i;
523 i = start;
524 while (i < nr_c && (c[i] == ' ' || c[i] == '\t'))
525 i++;
526 return i;
529 static struct pp_token *pp_tokenize(const char *c, unsigned int nr_c, struct pos *_pos)
531 struct pp_token *ppt_head, *ppt_tail;
532 int pp_directive_allowed;
533 unsigned int i;
535 ppt_head = NULL;
536 pp_directive_allowed = 1;
537 i = 0;
538 while (i < nr_c) {
539 struct pos *pos;
540 struct pp_token *ppt;
542 if (pp_directive_allowed) {
543 static const char _error[] = {'e', 'r', 'r', 'o', 'r'};
544 unsigned int sharp_start, directive_start, directive_end;
545 unsigned int j;
547 j = whitespace_end(c, nr_c, i);
548 if (j >= nr_c || c[j] != '#')
549 goto not_pp_directive;
550 sharp_start = j;
551 j = whitespace_end(c, nr_c, j + 1);
552 if (j >= nr_c) {
553 warning(&_pos[sharp_start], "empty preprocessor directive");
554 i = j;
555 continue;
557 if (c[j] == '\n') {
558 warning(&_pos[sharp_start], "empty preprocessor directive");
559 /* Eat newline after # */
560 i = j + 1;
561 continue;
563 directive_start = j;
564 while (j < nr_c && 'a' <= c[j] && c[j] <= 'z')
565 j++;
566 directive_end = j;
568 if (directive_end - directive_start == sizeof(_error) / sizeof(_error[0]) && memcmp(&c[directive_start], _error, sizeof(_error)) == 0)
569 error_exit(&_pos[sharp_start], "%s", "");
571 error_exit(&_pos[sharp_start], "unknown preprocessor directive");
574 not_pp_directive:
575 pos = &_pos[i];
576 switch (c[i]) {
577 unsigned int j;
579 case '\t':
580 case ' ':
581 ppt = pp_token_create(' ', pos);
582 i++;
583 break;
584 case '\n':
585 ppt = pp_token_create('\n', pos);
586 i++;
587 break;
588 case '[':case ']':
589 case '(':case ')':
590 case '{':case '}':
591 case '~':
592 case '?':
593 case ':':
594 case ';':
595 case ',':
596 pp_token_simple:
597 ppt = pp_token_create(c[i], pos);
598 i++;
599 break;
600 case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':
601 case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case 'n':
602 case 'o':case 'p':case 'q':case 'r':case 's':case 't':case 'u':
603 case 'v':case 'w':case 'x':case 'y':case 'z':
604 case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':
605 case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case 'N':
606 case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':case 'U':
607 case 'V':case 'W':case 'X':case 'Y':case 'Z':
608 case '_':
609 ppt = pp_token_create(PP_TOKEN_IDENTIFIER, pos);
610 j = _pp_identifier_end(c, nr_c, i + 1);
611 pp_token_add(ppt, c, i, j);
612 i = j;
613 break;
614 case '\\':
615 if (i + 5 < nr_c && c[i + 1] == 'u' &&
616 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5])) {
617 ppt = pp_token_create(PP_TOKEN_IDENTIFIER, pos);
618 j = _pp_identifier_end(c, nr_c, i + 2 + 4);
619 pp_token_add(ppt, c, i, j);
620 i = j;
621 } else if (i + 9 < nr_c && c[i + 1] == 'U' &&
622 pp_hexdigit(c[i + 2]) && pp_hexdigit(c[i + 3]) && pp_hexdigit(c[i + 4]) && pp_hexdigit(c[i + 5]) &&
623 pp_hexdigit(c[i + 6]) && pp_hexdigit(c[i + 7]) && pp_hexdigit(c[i + 8]) && pp_hexdigit(c[i + 9])) {
624 ppt = pp_token_create(PP_TOKEN_IDENTIFIER, pos);
625 j = _pp_identifier_end(c, nr_c, i + 2 + 4 + 4);
626 pp_token_add(ppt, c, i, j);
627 i = j;
628 } else
629 error_exit(pos, "unknown character %08"PRIx32, c[i]);
630 break;
631 case '0':case '1':case '2':case '3':case '4':
632 case '5':case '6':case '7':case '8':case '9':
633 ppt = pp_token_create(PP_TOKEN_NUMBER, pos);
634 j = pp_number_end(c, nr_c, i);
635 pp_token_add(ppt, c, i, j);
636 i = j;
637 break;
638 case '.':
639 if (i + 2 < nr_c && c[i + 1] == '.' && c[i + 2] == '.') {
640 ppt = pp_token_create(PP_TOKEN_DOTDOTDOT, pos);
641 i += 3;
642 } else if (i + 1 < nr_c && pp_digit(c[i + 1])) {
643 ppt = pp_token_create(PP_TOKEN_NUMBER, pos);
644 j = pp_number_end(c, nr_c, i + 1);
645 pp_token_add(ppt, c, i, j);
646 i = j;
647 } else
648 goto pp_token_simple;
649 break;
650 case '"':
651 ppt = pp_token_create(PP_TOKEN_STRING, pos);
652 j = pp_string_end(c, nr_c, i, _pos);
653 pp_token_add(ppt, c, i + 1, j - 1);
654 i = j;
655 break;
656 case '\'':
657 ppt = pp_token_create(PP_TOKEN_CHAR, pos);
658 j = pp_char_end(c, nr_c, i, _pos);
659 pp_token_add(ppt, c, i + 1, j - 1);
660 i = j;
661 break;
662 case '/':
663 if (i + 1 < nr_c && c[i + 1] == '*') {
664 ppt = pp_token_create(' ', pos);
665 i = c_comment_end(c, nr_c, i);
666 } else if (i + 1 < nr_c && c[i + 1] == '/') {
667 warning(pos, "C++ comment");
668 ppt = pp_token_create(' ', pos);
669 i = cpp_comment_end(c, nr_c, i);
670 } else if (i + 1 < nr_c && c[i + 1] == '=') {
671 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
672 i += 2;
673 } else
674 goto pp_token_simple;
675 break;
676 case '-':
677 if (i + 1 < nr_c && (c[i + 1] == '>' || c[i + 1] == '=' || c[i + 1] == '-')) {
678 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
679 i += 2;
680 } else
681 goto pp_token_simple;
682 break;
683 case '+':
684 case '&':
685 case '|':
686 if (i + 1 < nr_c && (c[i + 1] == '=' || c[i + 1] == c[i])) {
687 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
688 i += 2;
689 } else
690 goto pp_token_simple;
691 break;
692 case '*':
693 case '!':
694 case '%':
695 case '=':
696 case '^':
697 if (i + 1 < nr_c && c[i + 1] == '=') {
698 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
699 i += 2;
700 } else
701 goto pp_token_simple;
702 break;
703 case '<':
704 case '>':
705 if (i + 2 < nr_c && c[i + 1] == c[i] && c[i + 2] == '=') {
706 ppt = pp_token_create((c[i] << 16) | (c[i + 1] << 8) | c[i + 2], pos);
707 i += 3;
708 } else if (i + 1 < nr_c && (c[i + 1] == c[i] || c[i + 1] == '=')) {
709 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
710 i += 2;
711 } else
712 goto pp_token_simple;
713 break;
714 case '#':
715 if (i + 1 < nr_c && c[i + 1] == '#') {
716 ppt = pp_token_create((c[i] << 8) | c[i + 1], pos);
717 i += 2;
718 } else
719 goto pp_token_simple;
720 break;
721 default:
722 error_exit(pos, "unknown character %08"PRIx32, c[i]);
725 if (!ppt_head)
726 ppt_head = ppt;
727 else
728 ppt_tail->next = ppt;
729 ppt_tail = ppt;
731 if (ppt->type == '\n')
732 pp_directive_allowed = 1;
733 else if (ppt->type == ' ')
735 else
736 pp_directive_allowed = 0;
738 return ppt_head;
741 int main(int argc, char *argv[])
743 int fd;
744 struct stat st;
745 unsigned int st_size;
746 void *buf;
747 char *c;
748 unsigned int nr_c;
749 struct pos *pos;
750 struct pp_token *ppt_head;
752 if (argc < 2)
753 return EXIT_FAILURE;
755 fd = open(argv[1], O_RDONLY);
756 if (fd == -1)
757 perror_exit("open %s", argv[1]);
758 if (fstat(fd, &st) == -1)
759 perror_exit("fstat %s", argv[1]);
760 if (st.st_size < 0)
761 _error_exit("%s: negative st_size %"PRIdMAX, argv[1], (intmax_t)st.st_size);
762 st_size = (unsigned int)(uintmax_t)(intmax_t)st.st_size;
763 if ((uintmax_t)(intmax_t)st.st_size != (uintmax_t)st_size)
764 _error_exit("%s: too big st_size %"PRIdMAX, argv[1], (intmax_t)st.st_size);
766 buf = xmalloc(st_size);
767 xread(fd, buf, st_size);
768 close(fd);
770 c = buf;
771 nr_c = st_size;
773 fix_newline(c, &nr_c);
774 pos = line_column(c, nr_c);
775 warn_trigraph(c, nr_c, pos);
776 delete_backslash_newline(c, &nr_c, pos);
778 ppt_head = pp_tokenize(c, nr_c, pos);
779 free(c);
780 free(pos);
783 struct pp_token *ppt;
785 for (ppt = ppt_head; ppt; ppt = ppt->next)
786 pp_token_print(ppt);
789 pp_token_free(ppt_head);
791 return EXIT_SUCCESS;