1 /* Alexey's C compiler. */
16 unsigned int line
, column
;
20 #define __noreturn __attribute((noreturn))
21 #define __printf(a, b) __attribute__((format(printf, a, b)))
24 #define __printf(a, b)
27 static void warning(struct pos
*pos
, const char *fmt
, ...) __printf(2, 3);
28 static void warning(struct pos
*pos
, const char *fmt
, ...)
32 fprintf(stderr
, "%u:%u: warning: ", pos
->line
, pos
->column
);
34 vfprintf(stderr
, fmt
, args
);
39 static void error_exit(struct pos
*pos
, const char *fmt
, ...) __printf(2, 3) __noreturn
;
40 static void error_exit(struct pos
*pos
, const char *fmt
, ...)
44 fprintf(stderr
, "%u:%u: error: ", pos
->line
, pos
->column
);
46 vfprintf(stderr
, fmt
, args
);
52 static void perror_exit(const char *fmt
, ...) __printf(1, 2) __noreturn
;
53 static void perror_exit(const char *fmt
, ...)
55 int old_errno
= errno
;
58 fputs("acc: ", stderr
);
60 vfprintf(stderr
, fmt
, args
);
68 static void _error_exit(const char *fmt
, ...) __printf(1, 2) __noreturn
;
69 static void _error_exit(const char *fmt
, ...)
73 fputs("acc: error: ", stderr
);
75 vfprintf(stderr
, fmt
, args
);
81 static void *xmalloc(size_t size
)
87 perror_exit("%s: size %zu", __func__
, size
);
91 static void *xmemdup(const void *src
, size_t n
)
100 static ssize_t
_xread(int fd
, void *buf
, size_t count
)
105 rv
= read(fd
, buf
, count
);
106 } while (rv
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
110 static void xread(int fd
, void *buf
, size_t count
)
115 rv
= _xread(fd
, buf
, count
);
117 perror_exit("read fd %d, buf %p, count %zu", fd
, buf
, count
);
119 _error_exit("fd %d truncated, buf %p, count %zu", fd
, buf
, count
);
121 buf
= (char *)buf
+ rv
;
126 static void fix_newline(char *c
, unsigned int *nr_c
)
132 if (c
[i
] == '\r' && i
+ 1 < *nr_c
&& c
[i
+ 1] == '\n') {
133 memmove(&c
[i
], &c
[i
+ 1], *nr_c
- i
- 1);
135 } else if (c
[i
] == '\r') {
143 static struct pos
*line_column(const char *c
, unsigned int nr_c
)
146 unsigned int line
, column
;
149 if (nr_c
>= 0xffffffff / sizeof(struct pos
))
150 _error_exit("integer overflow nr_c %u", nr_c
);
151 pos
= xmalloc(nr_c
* sizeof(struct pos
));
155 for (i
= 0; i
< nr_c
; i
++) {
157 pos
[i
].column
= column
;
168 static void warn_trigraph(const char *c
, unsigned int nr_c
, struct pos
*pos
)
173 while (i
+ 2 < nr_c
) {
174 if (c
[i
] == '?' && c
[i
+ 1] == '?') {
176 case '=':case ')':case '!':
177 case '(':case '\'':case '>':
178 case '/':case '<':case '-':
179 warning(&pos
[i
], "trigraph sequence ??%c, ignoring", (unsigned char)c
[i
+ 2]);
190 static void delete_backslash_newline(char *c
, unsigned int *nr_c
, struct pos
*pos
)
195 while (i
+ 1 < *nr_c
) {
196 if (c
[i
] == '\\' && c
[i
+ 1] == '\n') {
197 unsigned int nr_to_move
= *nr_c
- i
- 2;
199 memmove(&c
[i
], &c
[i
+ 2], nr_to_move
);
200 memmove(&pos
[i
], &pos
[i
+ 2], nr_to_move
* sizeof(struct pos
));
208 struct pp_token
*next
;
210 PP_TOKEN_IDENTIFIER
= UCHAR_MAX
+ 1,
215 #define _2(c1, c2) (((c1) << 8) | (c2))
216 #define _3(c1, c2, c3) (((c1) << 16)| ((c2) << 8) | (c3))
217 PP_TOKEN_DOTDOTDOT
= _3('.', '.', '.'),
218 PP_TOKEN_DEREFERENCE
= _2('-', '>'),
219 PP_TOKEN_SUB_EQ
= _2('-', '='),
220 PP_TOKEN_DEC
= _2('-', '-'),
221 PP_TOKEN_ADD_EQ
= _2('+', '='),
222 PP_TOKEN_INC
= _2('+', '+'),
223 PP_TOKEN_AND_EQ
= _2('&', '='),
224 PP_TOKEN_AND
= _2('&', '&'),
225 PP_TOKEN_MUL_EQ
= _2('*', '='),
226 PP_TOKEN_NOT_EQ
= _2('!', '='),
227 PP_TOKEN_DIV_EQ
= _2('/', '='),
228 PP_TOKEN_REM_EQ
= _2('%', '='),
229 PP_TOKEN_LSHIFT_EQ
= _3('<', '<', '='),
230 PP_TOKEN_LSHIFT
= _2('<', '<'),
231 PP_TOKEN_LEQ
= _2('<', '='),
232 PP_TOKEN_RSHIFT_EQ
= _3('>', '>', '='),
233 PP_TOKEN_RSHIFT
= _2('>', '>'),
234 PP_TOKEN_GEQ
= _2('>', '='),
235 PP_TOKEN_EQ
= _2('=', '='),
236 PP_TOKEN_XOR_EQ
= _2('^', '='),
237 PP_TOKEN_OR_EQ
= _2('|', '='),
238 PP_TOKEN_OR
= _2('|', '|'),
239 PP_TOKEN_SHARPSHARP
= _2('#', '#'),
243 char *str
; /* string representation, if type is not enough */
247 static struct pp_token
*pp_token_create(enum pp_token_type type
, struct pos
*pos
)
249 struct pp_token
*ppt
;
251 ppt
= xmalloc(sizeof(struct pp_token
));
260 static void pp_token_add(struct pp_token
*ppt
, const char *c
, unsigned int start
, unsigned int end
)
262 ppt
->str
= xmemdup(&c
[start
], end
- start
);
265 static void pp_token_free(struct pp_token
*ppt_head
)
267 struct pp_token
*ppt
;
271 struct pp_token
*next
;
281 static void pp_token_print(struct pp_token
*ppt
)
283 printf("%u:%u:\t", ppt
->pos
.line
, ppt
->pos
.column
);
291 case PP_TOKEN_IDENTIFIER
:
292 printf("pp-identifier %s", ppt
->str
);
294 case PP_TOKEN_NUMBER
:
295 printf("pp-number %s", ppt
->str
);
297 case PP_TOKEN_STRING
:
298 printf("pp-string \"%s\"", ppt
->str
);
301 printf("pp-char '%s'", ppt
->str
);
303 case PP_TOKEN_DOTDOTDOT
:
304 case PP_TOKEN_LSHIFT_EQ
:
305 case PP_TOKEN_RSHIFT_EQ
:
306 printf("%c%c%c", (ppt
->type
>> 16) & 0xff, (ppt
->type
>> 8) & 0xff, ppt
->type
& 0xff);
308 case PP_TOKEN_DEREFERENCE
:
309 case PP_TOKEN_SUB_EQ
:
311 case PP_TOKEN_ADD_EQ
:
313 case PP_TOKEN_AND_EQ
:
315 case PP_TOKEN_MUL_EQ
:
316 case PP_TOKEN_NOT_EQ
:
317 case PP_TOKEN_DIV_EQ
:
318 case PP_TOKEN_REM_EQ
:
319 case PP_TOKEN_LSHIFT
:
321 case PP_TOKEN_RSHIFT
:
324 case PP_TOKEN_XOR_EQ
:
327 case PP_TOKEN_SHARPSHARP
:
328 printf("%c%c", (ppt
->type
>> 8) & 0xff, ppt
->type
& 0xff);
331 printf("%c", ppt
->type
);
336 static int pp_nondigit(const char c
)
338 return ('a' <= c
&& c
<= 'z') || ('A' <= c
&& c
<= 'Z') || c
== '_';
341 static int pp_octdigit(const char c
)
343 return '0' <= c
&& c
<= '7';
346 static int pp_digit(const char c
)
348 return '0' <= c
&& c
<= '9';
351 static int pp_hexdigit(const char c
)
353 return pp_digit(c
) || ('a' <= c
&& c
<= 'f') || ('A' <= c
&& c
<= 'F');
356 static unsigned int _pp_identifier_end(const char *c
, unsigned int nr_c
, unsigned int start
)
362 if (pp_nondigit(c
[i
]) || pp_digit(c
[i
])) {
364 } else if (i
+ 5 < nr_c
&& c
[i
] == '\\' && c
[i
+ 1] == 'u' &&
365 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5])) {
367 } else if (i
+ 9 < nr_c
&& c
[i
] == '\\' && c
[i
+ 1] == 'U' &&
368 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5]) &&
369 pp_hexdigit(c
[i
+ 6]) && pp_hexdigit(c
[i
+ 7]) && pp_hexdigit(c
[i
+ 8]) && pp_hexdigit(c
[i
+ 9])) {
377 static unsigned int pp_number_end(const char *c
, unsigned int nr_c
, unsigned int start
)
383 if ((c
[i
] == 'e' || c
[i
] == 'E' || c
[i
] == 'p' || c
[i
] == 'P') &&
384 i
+ 1 < nr_c
&& (c
[i
+ 1] == '+' || c
[i
+ 1] == '-')) {
386 } else if (pp_digit(c
[i
]) || pp_nondigit(c
[i
]) || c
[i
] == '.') {
388 } else if (c
[i
] == '\\' && i
+ 5 < nr_c
&& c
[i
+ 1] == 'u' &&
389 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5])) {
391 } else if (c
[i
] == '\\' && i
+ 9 < nr_c
&& c
[i
+ 1] == 'U' &&
392 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5]) &&
393 pp_hexdigit(c
[i
+ 6]) && pp_hexdigit(c
[i
+ 7]) && pp_hexdigit(c
[i
+ 8]) && pp_hexdigit(c
[i
+ 9])) {
401 static unsigned int c_comment_end(const char *c
, unsigned int nr_c
, unsigned int start
)
406 while (i
+ 1 < nr_c
) {
407 if (c
[i
] == '*' && c
[i
+ 1] == '/')
414 static unsigned int cpp_comment_end(const char *c
, unsigned int nr_c
, unsigned int start
)
419 while (i
< nr_c
&& c
[i
] != '\n')
424 static unsigned int escape_sequence_end(const char *c
, unsigned int nr_c
, unsigned int start
, struct pos
*_pos
)
426 struct pos
*pos
= &_pos
[start
];
431 error_exit(pos
, "incomplete escape sequence");
433 case '\'':case '"':case '?':case '\\':
434 case 'a':case 'b':case 'f':case 'n':case 'r':case 't':case 'v':
436 case '0':case '1':case '2':case '3':case '4':case '6':case '7':
437 if (i
+ 2 < nr_c
&& pp_octdigit(c
[i
+ 1]) && pp_octdigit(c
[i
+ 2]))
439 if (i
+ 1 < nr_c
&& pp_octdigit(c
[i
+ 1]))
444 while (i
< nr_c
&& pp_hexdigit(c
[i
]))
447 error_exit(pos
, "invalid hexadecimal escape sequence");
451 pp_hexdigit(c
[i
+ 1]) && pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]))
453 error_exit(pos
, "invalid universal character name");
456 pp_hexdigit(c
[i
+ 1]) && pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) &&
457 pp_hexdigit(c
[i
+ 5]) && pp_hexdigit(c
[i
+ 6]) && pp_hexdigit(c
[i
+ 7]) && pp_hexdigit(c
[i
+ 8]))
459 error_exit(pos
, "invalid universal character name");
461 error_exit(pos
, "invalid escape sequence");
465 static unsigned int pp_string_end(const char *c
, unsigned int nr_c
, unsigned int start
, struct pos
*_pos
)
467 struct pos
*pos
= &_pos
[start
];
471 while (i
< nr_c
&& c
[i
] != '"') {
476 i
= escape_sequence_end(c
, nr_c
, i
, _pos
);
487 error_exit(pos
, "incomplete string literal");
490 static unsigned int pp_char_end(const char *c
, unsigned int nr_c
, unsigned int start
, struct pos
*_pos
)
492 struct pos
*pos
= &_pos
[start
];
496 while (i
< nr_c
&& c
[i
] != '\'') {
501 i
= escape_sequence_end(c
, nr_c
, i
, _pos
);
514 error_exit(pos
, "incomplete character constant");
516 error_exit(pos
, "empty character constant");
519 static unsigned int whitespace_end(const char *c
, unsigned int nr_c
, unsigned int start
)
524 while (i
< nr_c
&& (c
[i
] == ' ' || c
[i
] == '\t'))
529 static struct pp_token
*pp_tokenize(const char *c
, unsigned int nr_c
, struct pos
*_pos
)
531 struct pp_token
*ppt_head
, *ppt_tail
;
532 int pp_directive_allowed
;
536 pp_directive_allowed
= 1;
540 struct pp_token
*ppt
;
542 if (pp_directive_allowed
) {
543 static const char _error
[] = {'e', 'r', 'r', 'o', 'r'};
544 unsigned int sharp_start
, directive_start
, directive_end
;
547 j
= whitespace_end(c
, nr_c
, i
);
548 if (j
>= nr_c
|| c
[j
] != '#')
549 goto not_pp_directive
;
551 j
= whitespace_end(c
, nr_c
, j
+ 1);
553 warning(&_pos
[sharp_start
], "empty preprocessor directive");
558 warning(&_pos
[sharp_start
], "empty preprocessor directive");
559 /* Eat newline after # */
564 while (j
< nr_c
&& 'a' <= c
[j
] && c
[j
] <= 'z')
568 if (directive_end
- directive_start
== sizeof(_error
) / sizeof(_error
[0]) && memcmp(&c
[directive_start
], _error
, sizeof(_error
)) == 0)
569 error_exit(&_pos
[sharp_start
], "%s", "");
571 error_exit(&_pos
[sharp_start
], "unknown preprocessor directive");
581 ppt
= pp_token_create(' ', pos
);
585 ppt
= pp_token_create('\n', pos
);
597 ppt
= pp_token_create(c
[i
], pos
);
600 case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':case 'g':
601 case 'h':case 'i':case 'j':case 'k':case 'l':case 'm':case 'n':
602 case 'o':case 'p':case 'q':case 'r':case 's':case 't':case 'u':
603 case 'v':case 'w':case 'x':case 'y':case 'z':
604 case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':case 'G':
605 case 'H':case 'I':case 'J':case 'K':case 'L':case 'M':case 'N':
606 case 'O':case 'P':case 'Q':case 'R':case 'S':case 'T':case 'U':
607 case 'V':case 'W':case 'X':case 'Y':case 'Z':
609 ppt
= pp_token_create(PP_TOKEN_IDENTIFIER
, pos
);
610 j
= _pp_identifier_end(c
, nr_c
, i
+ 1);
611 pp_token_add(ppt
, c
, i
, j
);
615 if (i
+ 5 < nr_c
&& c
[i
+ 1] == 'u' &&
616 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5])) {
617 ppt
= pp_token_create(PP_TOKEN_IDENTIFIER
, pos
);
618 j
= _pp_identifier_end(c
, nr_c
, i
+ 2 + 4);
619 pp_token_add(ppt
, c
, i
, j
);
621 } else if (i
+ 9 < nr_c
&& c
[i
+ 1] == 'U' &&
622 pp_hexdigit(c
[i
+ 2]) && pp_hexdigit(c
[i
+ 3]) && pp_hexdigit(c
[i
+ 4]) && pp_hexdigit(c
[i
+ 5]) &&
623 pp_hexdigit(c
[i
+ 6]) && pp_hexdigit(c
[i
+ 7]) && pp_hexdigit(c
[i
+ 8]) && pp_hexdigit(c
[i
+ 9])) {
624 ppt
= pp_token_create(PP_TOKEN_IDENTIFIER
, pos
);
625 j
= _pp_identifier_end(c
, nr_c
, i
+ 2 + 4 + 4);
626 pp_token_add(ppt
, c
, i
, j
);
629 error_exit(pos
, "unknown character %08"PRIx32
, c
[i
]);
631 case '0':case '1':case '2':case '3':case '4':
632 case '5':case '6':case '7':case '8':case '9':
633 ppt
= pp_token_create(PP_TOKEN_NUMBER
, pos
);
634 j
= pp_number_end(c
, nr_c
, i
);
635 pp_token_add(ppt
, c
, i
, j
);
639 if (i
+ 2 < nr_c
&& c
[i
+ 1] == '.' && c
[i
+ 2] == '.') {
640 ppt
= pp_token_create(PP_TOKEN_DOTDOTDOT
, pos
);
642 } else if (i
+ 1 < nr_c
&& pp_digit(c
[i
+ 1])) {
643 ppt
= pp_token_create(PP_TOKEN_NUMBER
, pos
);
644 j
= pp_number_end(c
, nr_c
, i
+ 1);
645 pp_token_add(ppt
, c
, i
, j
);
648 goto pp_token_simple
;
651 ppt
= pp_token_create(PP_TOKEN_STRING
, pos
);
652 j
= pp_string_end(c
, nr_c
, i
, _pos
);
653 pp_token_add(ppt
, c
, i
+ 1, j
- 1);
657 ppt
= pp_token_create(PP_TOKEN_CHAR
, pos
);
658 j
= pp_char_end(c
, nr_c
, i
, _pos
);
659 pp_token_add(ppt
, c
, i
+ 1, j
- 1);
663 if (i
+ 1 < nr_c
&& c
[i
+ 1] == '*') {
664 ppt
= pp_token_create(' ', pos
);
665 i
= c_comment_end(c
, nr_c
, i
);
666 } else if (i
+ 1 < nr_c
&& c
[i
+ 1] == '/') {
667 warning(pos
, "C++ comment");
668 ppt
= pp_token_create(' ', pos
);
669 i
= cpp_comment_end(c
, nr_c
, i
);
670 } else if (i
+ 1 < nr_c
&& c
[i
+ 1] == '=') {
671 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
674 goto pp_token_simple
;
677 if (i
+ 1 < nr_c
&& (c
[i
+ 1] == '>' || c
[i
+ 1] == '=' || c
[i
+ 1] == '-')) {
678 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
681 goto pp_token_simple
;
686 if (i
+ 1 < nr_c
&& (c
[i
+ 1] == '=' || c
[i
+ 1] == c
[i
])) {
687 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
690 goto pp_token_simple
;
697 if (i
+ 1 < nr_c
&& c
[i
+ 1] == '=') {
698 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
701 goto pp_token_simple
;
705 if (i
+ 2 < nr_c
&& c
[i
+ 1] == c
[i
] && c
[i
+ 2] == '=') {
706 ppt
= pp_token_create((c
[i
] << 16) | (c
[i
+ 1] << 8) | c
[i
+ 2], pos
);
708 } else if (i
+ 1 < nr_c
&& (c
[i
+ 1] == c
[i
] || c
[i
+ 1] == '=')) {
709 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
712 goto pp_token_simple
;
715 if (i
+ 1 < nr_c
&& c
[i
+ 1] == '#') {
716 ppt
= pp_token_create((c
[i
] << 8) | c
[i
+ 1], pos
);
719 goto pp_token_simple
;
722 error_exit(pos
, "unknown character %08"PRIx32
, c
[i
]);
728 ppt_tail
->next
= ppt
;
731 if (ppt
->type
== '\n')
732 pp_directive_allowed
= 1;
733 else if (ppt
->type
== ' ')
736 pp_directive_allowed
= 0;
741 int main(int argc
, char *argv
[])
745 unsigned int st_size
;
750 struct pp_token
*ppt_head
;
755 fd
= open(argv
[1], O_RDONLY
);
757 perror_exit("open %s", argv
[1]);
758 if (fstat(fd
, &st
) == -1)
759 perror_exit("fstat %s", argv
[1]);
761 _error_exit("%s: negative st_size %"PRIdMAX
, argv
[1], (intmax_t)st
.st_size
);
762 st_size
= (unsigned int)(uintmax_t)(intmax_t)st
.st_size
;
763 if ((uintmax_t)(intmax_t)st
.st_size
!= (uintmax_t)st_size
)
764 _error_exit("%s: too big st_size %"PRIdMAX
, argv
[1], (intmax_t)st
.st_size
);
766 buf
= xmalloc(st_size
);
767 xread(fd
, buf
, st_size
);
773 fix_newline(c
, &nr_c
);
774 pos
= line_column(c
, nr_c
);
775 warn_trigraph(c
, nr_c
, pos
);
776 delete_backslash_newline(c
, &nr_c
, pos
);
778 ppt_head
= pp_tokenize(c
, nr_c
, pos
);
783 struct pp_token
*ppt
;
785 for (ppt
= ppt_head
; ppt
; ppt
= ppt
->next
)
789 pp_token_free(ppt_head
);