1 /* Alexey's C compiler. */
15 unsigned int line
, column
;
19 #define __printf(a, b) __attribute__((format(printf, a, b)))
21 #define __printf(a, b)
24 static void warning(struct pos
*pos
, const char *fmt
, ...) __printf(2, 3);
25 static void warning(struct pos
*pos
, const char *fmt
, ...)
29 fprintf(stderr
, "%u:%u: warning: ", pos
->line
, pos
->column
);
31 vfprintf(stderr
, fmt
, args
);
36 static void perror_exit(const char *fmt
, ...) __printf(1, 2);
37 static void perror_exit(const char *fmt
, ...)
39 int old_errno
= errno
;
42 fputs("acc: ", stderr
);
44 vfprintf(stderr
, fmt
, args
);
52 static void error_exit(const char *fmt
, ...) __printf(1, 2);
53 static void error_exit(const char *fmt
, ...)
57 fputs("acc: error: ", stderr
);
59 vfprintf(stderr
, fmt
, args
);
65 static void *xmalloc(size_t size
)
71 perror_exit("%s: size %zu", __func__
, size
);
75 static ssize_t
_xread(int fd
, void *buf
, size_t count
)
80 rv
= read(fd
, buf
, count
);
81 } while (rv
== -1 && (errno
== EAGAIN
|| errno
== EINTR
));
85 static void xread(int fd
, void *buf
, size_t count
)
90 rv
= _xread(fd
, buf
, count
);
92 perror_exit("read fd %d, buf %p, count %zu", fd
, buf
, count
);
94 error_exit("fd %d truncated, buf %p, count %zu", fd
, buf
, count
);
96 buf
= (char *)buf
+ rv
;
101 static void convert_from_utf8(uint8_t *_c
, unsigned int _nr_c
, uint32_t **c
, unsigned int *nr_c
)
105 if (_nr_c
>= 0xffffffff / sizeof(uint32_t))
106 error_exit("integer overflow _nr_c %"PRIu32
, _nr_c
);
108 /* At worse all data is ASCII. */
109 *c
= xmalloc(_nr_c
* sizeof(uint32_t));
114 static const struct {
118 { 0x80, 0x00, 0 }, /* 0xxxxxxx */
119 { 0xe0, 0xc0, 0x80 }, /* 110xxxxx 10xxxxxx */
120 { 0xf0, 0xe0, 0x800 }, /* 1110xxxx 10xxxxxx 10xxxxxx */
121 { 0xf8, 0xf0, 0x10000 }, /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
123 unsigned int level
, j
;
127 while (level
< sizeof(_mask
) / sizeof(_mask
[0])) {
128 if ((_c
[i
] & _mask
[level
].mask1
) == _mask
[level
].res1
)
132 if (level
== sizeof(_mask
) / sizeof(_mask
[0]))
133 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8
, i
, _c
[i
]);
134 if (i
+ level
>= _nr_c
)
135 error_exit("truncated UTF-8 octet sequence at %u: %02"PRIx8
, i
, _c
[i
]);
136 for (j
= 0; j
< level
; j
++) {
137 if ((_c
[i
+ j
+ 1] & 0xc0) != 0x80)
138 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8
" ... %02"PRIx8
, i
+ j
+ 1, _c
[i
], _c
[i
+ j
+ 1]);
141 ch
= _c
[i
] & ~_mask
[level
].mask1
;
142 for (j
= 0; j
< level
; j
++)
143 ch
= (ch
<< 6) | (_c
[i
+ j
+ 1] & ~0xc0);
145 if (ch
< _mask
[level
].min
)
146 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8
, i
, _c
[i
]);
155 /* LINE SEPARATOR to catch \n. */
156 #define LS ((uint32_t)0x2028)
158 static void fix_newline(uint32_t *c
, unsigned int *nr_c
)
164 if (c
[i
] == 0x0d && i
+ 1 < *nr_c
&& c
[i
+ 1] == 0x0a) {
165 memmove(&c
[i
], &c
[i
+ 1], *nr_c
- i
- 1);
182 static struct pos
*line_column(const uint32_t *c
, unsigned int nr_c
)
185 unsigned int line
, column
;
188 if (nr_c
>= 0xffffffff / sizeof(struct pos
))
189 error_exit("integer overflow nr_c %u", nr_c
);
190 pos
= xmalloc(nr_c
* sizeof(struct pos
));
194 for (i
= 0; i
< nr_c
; i
++) {
196 pos
[i
].column
= column
;
207 static void warn_trigraph(const uint32_t *c
, unsigned int nr_c
, struct pos
*pos
)
212 while (i
+ 2 < nr_c
) {
213 if (c
[i
] == '?' && c
[i
+ 1] == '?') {
215 case '=':case ')':case '!':
216 case '(':case '\'':case '>':
217 case '/':case '<':case '-':
218 warning(&pos
[i
], "trigraph sequence ??%c, ignoring", c
[i
+ 2]);
229 int main(int argc
, char *argv
[])
233 unsigned int st_size
;
244 fd
= open(argv
[1], O_RDONLY
);
246 perror_exit("open %s", argv
[1]);
247 if (fstat(fd
, &st
) == -1)
248 perror_exit("fstat %s", argv
[1]);
250 error_exit("%s: negative st_size %"PRIdMAX
, argv
[1], (intmax_t)st
.st_size
);
251 st_size
= (unsigned int)(uintmax_t)(intmax_t)st
.st_size
;
252 if ((uintmax_t)(intmax_t)st
.st_size
!= (uintmax_t)st_size
)
253 error_exit("%s: too big st_size %"PRIdMAX
, argv
[1], (intmax_t)st
.st_size
);
255 buf
= xmalloc(st_size
);
256 xread(fd
, buf
, st_size
);
261 /* Skip UTF-8 "BOM" if any. */
262 if (st_size
>= 3 && _c
[0] == 0xef && _c
[1] == 0xbb && _c
[2] == 0xbf) {
266 convert_from_utf8(_c
, _nr_c
, &c
, &nr_c
);
269 fix_newline(c
, &nr_c
);
270 pos
= line_column(c
, nr_c
);
271 warn_trigraph(c
, nr_c
, pos
);
276 for (i
= 0; i
< nr_c
; i
++)
277 printf("%u:%u:\t%08x\n", pos
[i
].line
, pos
[i
].column
, c
[i
]);