arm: fixed minor typo
[neatcc.git] / tok.c
blob137b94bd4f2f97917c818c9d510c8f42e41b958c
1 /* neatcc tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <unistd.h>
7 #include "ncc.h"
9 static struct mem tok_mem; /* the data read via cpp_read() so far */
10 static struct mem tok; /* the previous token */
11 static char *buf;
12 static long off, off_pre; /* current and previous positions */
13 static long len;
14 static int tok_set; /* the current token was read */
16 static char *tok3[] = {
17 "<<=", ">>=", "...", "<<", ">>", "++", "--", "+=", "-=", "*=", "/=",
18 "%=", "|=", "&=", "^=", "&&", "||", "==", "!=", "<=", ">=", "->"
21 static char *find_tok3(char *r)
23 int i;
24 for (i = 0; i < LEN(tok3); i++) {
25 char *s = tok3[i];
26 if (s[0] == r[0] && s[1] == r[1] && (!s[2] || s[2] == r[2]))
27 return s;
29 return NULL;
32 static char *esc_code = "abefnrtv";
33 static char *esc = "\a\b\e\f\n\r\t\v";
34 static char *digs = "0123456789abcdef";
36 static int esc_char(int *c, char *s)
38 if (*s != '\\') {
39 *c = (unsigned char) *s;
40 return 1;
42 if (strchr(esc_code, (unsigned char) s[1])) {
43 *c = esc[strchr(esc_code, (unsigned char) s[1]) - esc_code];
44 return 2;
46 if (isdigit(s[1]) || s[1] == 'x') {
47 int ret = 0;
48 int base = 8;
49 int i = 1;
50 char *d;
51 if (s[1] == 'x') {
52 base = 16;
53 i++;
55 while ((d = memchr(digs, tolower(s[i]), base))) {
56 ret *= base;
57 ret += d - digs;
58 i++;
60 *c = ret;
61 return i;
63 *c = (unsigned char) s[1];
64 return 2;
67 long tok_num(char *tok, long *num)
69 int base = 10;
70 long num_bt = 4 | T_MSIGN;
71 if (tok[0] == '0' && tolower(tok[1]) == 'x') {
72 num_bt &= ~T_MSIGN;
73 base = 16;
74 tok += 2;
76 if (strchr(digs, tolower((unsigned char) tok[0]))) {
77 long result = 0;
78 char *c;
79 if (base == 10 && tok[0] == '0')
80 base = 8;
81 while (tok[0] && (c = strchr(digs, tolower((unsigned char) tok[0])))) {
82 result *= base;
83 result += c - digs;
84 tok++;
86 *num = result;
87 while (tok[0]) {
88 int c = tolower((unsigned char) tok[0]);
89 if (c != 'u' && c != 'l')
90 break;
91 if (c == 'u')
92 num_bt &= ~T_MSIGN;
93 if (c == 'l')
94 num_bt = (num_bt & T_MSIGN) | LONGSZ;
95 tok++;
97 return num_bt;
99 if (tok[0] == '\'') {
100 int ret;
101 esc_char(&ret, tok + 1);
102 *num = ret;
103 return num_bt;
105 return 0;
108 static int id_char(int c)
110 return isalnum(c) || c == '_';
113 static int skipws(void)
115 long clen;
116 char *cbuf;
117 while (1) {
118 if (off == len) {
119 clen = 0;
120 while (!clen)
121 if (cpp_read(&cbuf, &clen))
122 return 1;
123 mem_put(&tok_mem, cbuf, clen);
124 buf = mem_buf(&tok_mem);
125 len = mem_len(&tok_mem);
127 while (off < len && isspace(buf[off]))
128 off++;
129 if (off == len)
130 continue;
131 if (buf[off] == '\\' && buf[off + 1] == '\n') {
132 off += 2;
133 continue;
135 if (buf[off] == '/' && buf[off + 1] == '/') {
136 while (++off < len && buf[off] != '\n')
137 if (buf[off] == '\\')
138 off++;
139 continue;
141 if (buf[off] == '/' && buf[off + 1] == '*') {
142 while (++off < len) {
143 if (buf[off] == '*' && buf[off + 1] == '/') {
144 off += 2;
145 break;
148 continue;
150 break;
152 return 0;
155 static int tok_read(void)
157 char *t3;
158 int c;
159 off_pre = off;
160 mem_cut(&tok, 0);
161 if (skipws())
162 return 1;
163 if (buf[off] == '"') {
164 mem_putc(&tok, '"');
165 while (buf[off] == '"') {
166 off++;
167 while (off < len && buf[off] != '"') {
168 if (buf[off] == '\\') {
169 off += esc_char(&c, buf + off);
170 mem_putc(&tok, c);
171 } else {
172 mem_putc(&tok, (unsigned char) buf[off++]);
175 if (off >= len || buf[off++] != '"')
176 return 1;
177 if (skipws())
178 return 1;
180 mem_putc(&tok, '"');
181 return 0;
183 if (isdigit((unsigned char) buf[off])) {
184 if (buf[off] == '0' && (buf[off + 1] == 'x' || buf[off + 1] == 'X')) {
185 mem_putc(&tok, (unsigned char) buf[off++]);
186 mem_putc(&tok, (unsigned char) buf[off++]);
188 while (off < len && strchr(digs, tolower((unsigned char) buf[off])))
189 mem_putc(&tok, (unsigned char) buf[off++]);
190 while (off < len && strchr("uUlL", (unsigned char) buf[off]))
191 mem_putc(&tok, (unsigned char) buf[off++]);
192 return 0;
194 if (buf[off] == '\'') {
195 int c, i;
196 int n = esc_char(&c, buf + off + 1) + 1 + 1;
197 for (i = 0; i < n; i++)
198 mem_putc(&tok, (unsigned char) buf[off++]);
199 return 0;
201 if (id_char((unsigned char) buf[off])) {
202 while (off < len && id_char((unsigned char) buf[off]))
203 mem_putc(&tok, (unsigned char) buf[off++]);
204 return 0;
206 if (off + 2 <= len && (t3 = find_tok3(buf + off))) {
207 off += strlen(t3);
208 mem_put(&tok, t3, strlen(t3));
209 return 0;
211 if (strchr(";,{}()[]<>*&!=+-/%?:|^~.", (unsigned char) buf[off])) {
212 mem_putc(&tok, (unsigned char) buf[off++]);
213 return 0;
215 return 1;
218 char *tok_get(void)
220 if (!tok_set)
221 if (tok_read())
222 return "";
223 tok_set = 0;
224 return mem_buf(&tok);
227 char *tok_see(void)
229 if (!tok_set)
230 if (tok_read())
231 return "";
232 tok_set = 1;
233 return mem_buf(&tok);
236 long tok_len(void)
238 return mem_len(&tok);
241 long tok_addr(void)
243 return tok_set ? off_pre : off;
246 void tok_jump(long addr)
248 off = addr;
249 off_pre = -1;
250 tok_set = 0;
253 void tok_done(void)
255 mem_done(&tok);
256 mem_done(&tok_mem);