tok: allow large ds sections and string tokens
[neatcc.git] / tok.c
blobf3c14558b12019404d6acce5784f1408222d3238
1 #include <ctype.h>
2 #include <unistd.h>
3 #include <stdlib.h>
4 #include <string.h>
5 #include "gen.h"
6 #include "mem.h"
7 #include "ncc.h"
8 #include "tok.h"
10 static struct mem tok_mem; /* the data read via cpp_read() so far */
11 static struct mem str; /* the last tok_str() string */
12 static char *buf;
13 static int len;
14 static int cur;
15 static char name[NAMELEN];
16 static int next = -1;
17 static int pre;
19 static struct {
20 char *name;
21 unsigned id;
22 } kwds[] = {
23 {"void", TOK_VOID},
24 {"static", TOK_STATIC},
25 {"extern", TOK_EXTERN},
26 {"return", TOK_RETURN},
27 {"unsigned", TOK_UNSIGNED},
28 {"signed", TOK_SIGNED},
29 {"short", TOK_SHORT},
30 {"long", TOK_LONG},
31 {"int", TOK_INT},
32 {"char", TOK_CHAR},
33 {"struct", TOK_STRUCT},
34 {"union", TOK_UNION},
35 {"enum", TOK_ENUM},
36 {"typedef", TOK_TYPEDEF},
37 {"if", TOK_IF},
38 {"else", TOK_ELSE},
39 {"for", TOK_FOR},
40 {"while", TOK_WHILE},
41 {"do", TOK_DO},
42 {"switch", TOK_SWITCH},
43 {"case", TOK_CASE},
44 {"sizeof", TOK_SIZEOF},
45 {"break", TOK_BREAK},
46 {"continue", TOK_CONTINUE},
47 {"default", TOK_DEFAULT},
48 {"goto", TOK_GOTO},
51 static char *tok3[] = {
52 "<<=", ">>=", "...", "<<", ">>", "++", "--", "+=", "-=", "*=", "/=",
53 "%=", "|=", "&=", "^=", "&&", "||", "==", "!=", "<=", ">=", "->"
56 static int get_tok3(int num)
58 int i;
59 for (i = 0; i < LEN(tok3); i++)
60 if (num == TOK3(tok3[i]))
61 return num;
62 return 0;
65 static char *esc_code = "abefnrtv";
66 static char *esc = "\a\b\e\f\n\r\t\v";
67 static char *digs = "0123456789abcdef";
69 static int esc_char(int *c, char *s)
71 if (*s != '\\') {
72 *c = (unsigned char) *s;
73 return 1;
75 if (strchr(esc_code, s[1])) {
76 *c = esc[strchr(esc_code, s[1]) - esc_code];
77 return 2;
79 if (isdigit(s[1]) || s[1] == 'x') {
80 int ret = 0;
81 int base = 8;
82 int i = 1;
83 char *d;
84 if (s[1] == 'x') {
85 base = 16;
86 i++;
88 while ((d = memchr(digs, s[i], base))) {
89 ret *= base;
90 ret += d - digs;
91 i++;
93 *c = ret;
94 return i;
96 *c = (unsigned char) s[1];
97 return 2;
100 static long num;
101 static int num_bt;
103 int tok_num(long *n)
105 *n = num;
106 return num_bt;
109 static void readnum(void)
111 int base = 10;
112 num_bt = 4 | BT_SIGNED;
113 if (buf[cur] == '0' && buf[cur + 1] == 'x') {
114 num_bt &= ~BT_SIGNED;
115 base = 16;
116 cur += 2;
118 if (strchr(digs, tolower(buf[cur]))) {
119 long result = 0;
120 char *c;
121 if (base == 10 && buf[cur] == '0')
122 base = 8;
123 while (cur < len && (c = strchr(digs, tolower(buf[cur])))) {
124 result *= base;
125 result += c - digs;
126 cur++;
128 num = result;
129 while (cur < len) {
130 int c = tolower(buf[cur]);
131 if (c != 'u' && c != 'l')
132 break;
133 if (c == 'u')
134 num_bt &= ~BT_SIGNED;
135 if (c == 'l')
136 num_bt = (num_bt & BT_SIGNED) | LONGSZ;
137 cur++;
139 return;
141 if (buf[cur] == '\'') {
142 int ret;
143 cur += 2 + esc_char(&ret, buf + cur + 1);
144 num = ret;
145 return;
147 num = -1;
150 void tok_str(char **buf, int *len)
152 if (len)
153 *len = mem_len(&str) + 1;
154 if (buf)
155 *buf = mem_buf(&str);
158 static void readstr(struct mem *mem)
160 char *s = buf + cur;
161 char *e = buf + len;
162 int c;
163 s++;
164 while (s < e && *s != '"') {
165 if (*s == '\\') {
166 s += esc_char(&c, s);
167 mem_putc(mem, c);
168 } else {
169 mem_putc(mem, (unsigned char) *s++);
172 cur = s - buf + 1;
175 static int id_char(int c)
177 return isalnum(c) || c == '_';
180 static int skipws(void)
182 int clen;
183 char *cbuf;
184 while (1) {
185 if (cur == len) {
186 clen = 0;
187 while (!clen)
188 if (cpp_read(&cbuf, &clen))
189 return 1;
190 mem_put(&tok_mem, cbuf, clen);
191 buf = mem_buf(&tok_mem);
192 len = mem_len(&tok_mem);
194 while (cur < len && isspace(buf[cur]))
195 cur++;
196 if (cur == len)
197 continue;
198 if (buf[cur] == '\\' && buf[cur + 1] == '\n') {
199 cur += 2;
200 continue;
202 if (buf[cur] == '/' && buf[cur + 1] == '/') {
203 while (cur < len && buf[cur] != '\n')
204 cur++;
205 continue;
207 if (buf[cur] == '/' && buf[cur + 1] == '*') {
208 while (++cur < len) {
209 if (buf[cur] == '*' && buf[cur + 1] == '/') {
210 cur += 2;
211 break;
214 continue;
216 break;
218 return 0;
221 int tok_get(void)
223 int num;
224 if (next != -1) {
225 int tok = next;
226 next = -1;
227 return tok;
229 pre = cur;
230 if (skipws())
231 return TOK_EOF;
232 if (buf[cur] == '"') {
233 mem_cut(&str, 0);
234 while (buf[cur] == '"') {
235 readstr(&str);
236 if (skipws())
237 return TOK_EOF;
239 return TOK_STR;
241 if (isdigit(buf[cur]) || buf[cur] == '\'') {
242 readnum();
243 return TOK_NUM;
245 if (id_char(buf[cur])) {
246 char *s = name;
247 int i;
248 while (cur < len && id_char(buf[cur]))
249 *s++ = buf[cur++];
250 *s = '\0';
251 for (i = 0; i < LEN(kwds); i++)
252 if (!strcmp(kwds[i].name, name))
253 return kwds[i].id;
254 return TOK_NAME;
256 if (cur + 3 <= len && (num = get_tok3(TOK3(buf + cur)))) {
257 cur += 3;
258 return num;
260 if ((num = get_tok3(TOK2(buf + cur)))) {
261 cur += 2;
262 return num;
264 if (strchr(";,{}()[]<>*&!=+-/%?:|^~.", buf[cur]))
265 return buf[cur++];
266 return -1;
269 int tok_see(void)
271 if (next == -1)
272 next = tok_get();
273 return next;
276 char *tok_id(void)
278 return name;
281 long tok_addr(void)
283 return next == -1 ? cur : pre;
286 void tok_jump(long addr)
288 cur = addr;
289 pre = cur - 1;
290 next = -1;