acc: warn about trigraphs, do not replace
[acc.git] / main.c
blob93cee8a98fc9dae399dcb5b3f8067ec4e2a7163b
1 /* Alexey's C compiler. */
2 #include <stdlib.h>
3 #include <sys/types.h>
4 #include <sys/stat.h>
5 #include <fcntl.h>
6 #include <stdio.h>
7 #include <errno.h>
8 #include <unistd.h>
9 #include <stdarg.h>
10 #include <stdint.h>
11 #include <inttypes.h>
12 #include <string.h>
14 struct pos {
15 unsigned int line, column;
18 #ifdef __GNUC__
19 #define __printf(a, b) __attribute__((format(printf, a, b)))
20 #else
21 #define __printf(a, b)
22 #endif
24 static void warning(struct pos *pos, const char *fmt, ...) __printf(2, 3);
25 static void warning(struct pos *pos, const char *fmt, ...)
27 va_list args;
29 fprintf(stderr, "%u:%u: warning: ", pos->line, pos->column);
30 va_start(args, fmt);
31 vfprintf(stderr, fmt, args);
32 va_end(args);
33 fputc('\n', stderr);
36 static void perror_exit(const char *fmt, ...) __printf(1, 2);
37 static void perror_exit(const char *fmt, ...)
39 int old_errno = errno;
40 va_list args;
42 fputs("acc: ", stderr);
43 va_start(args, fmt);
44 vfprintf(stderr, fmt, args);
45 va_end(args);
46 fputs(": ", stderr);
47 errno = old_errno;
48 perror(NULL);
49 exit(EXIT_FAILURE);
52 static void error_exit(const char *fmt, ...) __printf(1, 2);
53 static void error_exit(const char *fmt, ...)
55 va_list args;
57 fputs("acc: error: ", stderr);
58 va_start(args, fmt);
59 vfprintf(stderr, fmt, args);
60 va_end(args);
61 fputc('\n', stderr);
62 exit(EXIT_FAILURE);
65 static void *xmalloc(size_t size)
67 void *p;
69 p = malloc(size);
70 if (!p)
71 perror_exit("%s: size %zu", __func__, size);
72 return p;
75 static ssize_t _xread(int fd, void *buf, size_t count)
77 ssize_t rv;
79 do {
80 rv = read(fd, buf, count);
81 } while (rv == -1 && (errno == EAGAIN || errno == EINTR));
82 return rv;
85 static void xread(int fd, void *buf, size_t count)
87 while (count > 0) {
88 ssize_t rv;
90 rv = _xread(fd, buf, count);
91 if (rv == -1)
92 perror_exit("read fd %d, buf %p, count %zu", fd, buf, count);
93 if (rv == 0)
94 error_exit("fd %d truncated, buf %p, count %zu", fd, buf, count);
96 buf = (char *)buf + rv;
97 count -= rv;
101 static void convert_from_utf8(uint8_t *_c, unsigned int _nr_c, uint32_t **c, unsigned int *nr_c)
103 unsigned int i;
105 if (_nr_c >= 0xffffffff / sizeof(uint32_t))
106 error_exit("integer overflow _nr_c %"PRIu32, _nr_c);
108 /* At worse all data is ASCII. */
109 *c = xmalloc(_nr_c * sizeof(uint32_t));
110 *nr_c = 0;
112 i = 0;
113 while (i < _nr_c) {
114 static const struct {
115 uint8_t mask1, res1;
116 uint32_t min;
117 } _mask[] = {
118 { 0x80, 0x00, 0 }, /* 0xxxxxxx */
119 { 0xe0, 0xc0, 0x80 }, /* 110xxxxx 10xxxxxx */
120 { 0xf0, 0xe0, 0x800 }, /* 1110xxxx 10xxxxxx 10xxxxxx */
121 { 0xf8, 0xf0, 0x10000 }, /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
123 unsigned int level, j;
124 uint32_t ch;
126 level = 0;
127 while (level < sizeof(_mask) / sizeof(_mask[0])) {
128 if ((_c[i] & _mask[level].mask1) == _mask[level].res1)
129 break;
130 level++;
132 if (level == sizeof(_mask) / sizeof(_mask[0]))
133 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8, i, _c[i]);
134 if (i + level >= _nr_c)
135 error_exit("truncated UTF-8 octet sequence at %u: %02"PRIx8, i, _c[i]);
136 for (j = 0; j < level; j++) {
137 if ((_c[i + j + 1] & 0xc0) != 0x80)
138 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8" ... %02"PRIx8, i + j + 1, _c[i], _c[i + j + 1]);
141 ch = _c[i] & ~_mask[level].mask1;
142 for (j = 0; j < level; j++)
143 ch = (ch << 6) | (_c[i + j + 1] & ~0xc0);
145 if (ch < _mask[level].min)
146 error_exit("invalid UTF-8 octet sequence at %u: %02"PRIx8, i, _c[i]);
148 i += level + 1;
150 (*c)[*nr_c] = ch;
151 (*nr_c)++;
155 /* LINE SEPARATOR to catch \n. */
156 #define LS ((uint32_t)0x2028)
158 static void fix_newline(uint32_t *c, unsigned int *nr_c)
160 unsigned int i;
162 i = 0;
163 while (i < *nr_c) {
164 if (c[i] == 0x0d && i + 1 < *nr_c && c[i + 1] == 0x0a) {
165 memmove(&c[i], &c[i + 1], *nr_c - i - 1);
166 (*nr_c)--;
168 switch (c[i]) {
169 case 0x0d:
170 case 0x0a:
171 case 0x85:
172 case 0x0b:
173 case 0x0c:
174 case 0x2028:
175 case 0x2029:
176 c[i] = LS;
178 i++;
182 static struct pos *line_column(const uint32_t *c, unsigned int nr_c)
184 struct pos *pos;
185 unsigned int line, column;
186 unsigned int i;
188 if (nr_c >= 0xffffffff / sizeof(struct pos))
189 error_exit("integer overflow nr_c %u", nr_c);
190 pos = xmalloc(nr_c * sizeof(struct pos));
192 line = 1;
193 column = 1;
194 for (i = 0; i < nr_c; i++) {
195 pos[i].line = line;
196 pos[i].column = column;
198 if (c[i] == LS) {
199 line++;
200 column = 1;
201 } else
202 column++;
204 return pos;
207 static void warn_trigraph(const uint32_t *c, unsigned int nr_c, struct pos *pos)
209 unsigned int i;
211 i = 0;
212 while (i + 2 < nr_c) {
213 if (c[i] == '?' && c[i + 1] == '?') {
214 switch (c[i + 2]) {
215 case '=':case ')':case '!':
216 case '(':case '\'':case '>':
217 case '/':case '<':case '-':
218 warning(&pos[i], "trigraph sequence ??%c, ignoring", c[i + 2]);
219 i += 3;
220 break;
221 default:
222 i++;
224 } else
225 i++;
229 int main(int argc, char *argv[])
231 int fd;
232 struct stat st;
233 unsigned int st_size;
234 void *buf;
235 uint8_t *_c;
236 unsigned int _nr_c;
237 uint32_t *c;
238 unsigned int nr_c;
239 struct pos *pos;
241 if (argc < 2)
242 return EXIT_FAILURE;
244 fd = open(argv[1], O_RDONLY);
245 if (fd == -1)
246 perror_exit("open %s", argv[1]);
247 if (fstat(fd, &st) == -1)
248 perror_exit("fstat %s", argv[1]);
249 if (st.st_size < 0)
250 error_exit("%s: negative st_size %"PRIdMAX, argv[1], (intmax_t)st.st_size);
251 st_size = (unsigned int)(uintmax_t)(intmax_t)st.st_size;
252 if ((uintmax_t)(intmax_t)st.st_size != (uintmax_t)st_size)
253 error_exit("%s: too big st_size %"PRIdMAX, argv[1], (intmax_t)st.st_size);
255 buf = xmalloc(st_size);
256 xread(fd, buf, st_size);
257 close(fd);
259 _c = buf;
260 _nr_c = st_size;
261 /* Skip UTF-8 "BOM" if any. */
262 if (st_size >= 3 && _c[0] == 0xef && _c[1] == 0xbb && _c[2] == 0xbf) {
263 _c += 3;
264 _nr_c -= 3;
266 convert_from_utf8(_c, _nr_c, &c, &nr_c);
267 free(buf);
269 fix_newline(c, &nr_c);
270 pos = line_column(c, nr_c);
271 warn_trigraph(c, nr_c, pos);
274 unsigned int i;
276 for (i = 0; i < nr_c; i++)
277 printf("%u:%u:\t%08x\n", pos[i].line, pos[i].column, c[i]);
280 free(c);
281 free(pos);
283 return EXIT_SUCCESS;