tok: return T_LETTER from char_type() for greek letters
[neateqn.git] / tok.c
blob4448f8e1f3be31eb410148d1c8a00fed269019ca
1 /* the preprocessor and tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "eqn.h"
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SEP "^~{}\"\n\t "
10 #define T_SOFTSEP (T_SEP "=:|.+-*/\\,()[]<>!")
12 static char *kwds[] = {
13 "fwd", "down", "back", "up",
14 "bold", "italic", "roman", "font", "fat", "size",
15 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
16 "left", "right", "over", "sqrt", "sub", "sup", "from", "to", "vcenter",
17 "delim", "define",
18 "gfont", "grfont", "gbfont",
19 "mark", "lineup",
22 static int tok_eqen; /* non-zero if inside .EQ/.EN */
23 static int tok_line; /* inside inline eqn block */
24 static int tok_part; /* partial line with inline eqn blocks */
25 static char tok[LNLEN]; /* current token */
26 static char tok_prev[LNLEN]; /* previous token */
27 static int tok_curtype; /* type of current token */
28 static int tok_cursep; /* current character is a separator */
29 static int tok_prevsep; /* previous character was a separator */
30 static int eqn_beg, eqn_end; /* inline eqn delimiters */
32 /* return zero if troff request .ab is read */
33 static int tok_req(int a, int b)
35 int eqln[LNLEN];
36 int i = 0;
37 int ret = 0;
38 eqln[i++] = in_next();
39 if (eqln[i - 1] != '.')
40 goto failed;
41 eqln[i++] = in_next();
42 while (eqln[i - 1] == ' ')
43 eqln[i++] = in_next();
44 if (eqln[i - 1] != a)
45 goto failed;
46 eqln[i++] = in_next();
47 if (eqln[i - 1] != b)
48 goto failed;
49 ret = 1;
50 failed:
51 while (i > 0)
52 in_back(eqln[--i]);
53 return ret;
56 /* read .EN */
57 static int tok_en(void)
59 return tok_req('E', 'N');
62 /* does the line start with eq */
63 static int tok_eq(char *s)
65 if (*s++ != '.')
66 return 0;
67 while (isspace(*s))
68 s++;
69 return s[0] == 'E' && s[1] == 'Q';
72 /* read the next input character */
73 static int tok_next(void)
75 int c;
76 if (!tok_eqen && !tok_line)
77 return 0;
78 c = in_next();
79 if (tok_eqen && c == '\n' && tok_en())
80 tok_eqen = 0;
81 if (tok_line && c == eqn_end) {
82 tok_line = 0;
83 return 0;
85 return c;
88 /* push back the last character read */
89 static void tok_back(int c)
91 if (tok_eqen || tok_line)
92 in_back(c);
95 /* read the next word */
96 static void tok_preview(char *s)
98 int c = in_next();
99 int n = 0;
100 while (c > 0 && !strchr(T_SEP, c) && (!tok_line || c != eqn_end)) {
101 s[n++] = c;
102 c = in_next();
104 s[n] = '\0';
105 in_back(c);
108 /* push back the given word */
109 static void tok_unpreview(char *s)
111 int n = strlen(s);
112 while (n > 0)
113 in_back((unsigned char) s[--n]);
116 /* read a keyword; return zero on success */
117 static int tok_keyword(void)
119 int i;
120 tok_preview(tok);
121 for (i = 0; i < LEN(kwds); i++)
122 if (!strcmp(kwds[i], tok))
123 return 0;
124 tok_unpreview(tok);
125 return 1;
128 /* read the next argument of a macro call; return zero if read a ',' */
129 static int tok_readarg(struct sbuf *sbuf)
131 int c = in_next();
132 while (c > 0 && c != ',' && c != ')') {
133 sbuf_add(sbuf, c);
134 c = in_next();
136 return c == ',' ? 0 : 1;
139 /* expand a macro; return zero on success */
140 static int tok_expand(void)
142 char *args[10] = {NULL};
143 struct sbuf sbufs[10];
144 int i, n = 0;
145 int pbeg;
146 tok_preview(tok);
147 if (!in_expand(tok, NULL))
148 return 0;
149 pbeg = in_macrocall(tok);
150 if (pbeg) {
151 tok_unpreview(tok + pbeg + 1);
152 tok[pbeg] = '\0';
153 while (n <= 9) {
154 sbuf_init(&sbufs[n]);
155 if (tok_readarg(&sbufs[n++]))
156 break;
158 for (i = 0; i < n; i++)
159 args[i] = sbuf_buf(&sbufs[i]);
160 in_expand(tok, args);
161 for (i = 0; i < n; i++)
162 sbuf_done(&sbufs[i]);
163 return 0;
165 tok_unpreview(tok);
166 return 1;
169 /* read until .EQ or eqn_beg */
170 int tok_eqn(void)
172 char ln[LNLEN];
173 char *s = ln;
174 int c;
175 tok_cursep = 1;
176 while ((c = in_next()) > 0) {
177 if (c == eqn_beg) {
178 *s = '\0';
179 printf(".%s %s \"%s\n",
180 tok_part ? "as" : "ds", EQNS, ln);
181 tok_part = 1;
182 tok_line = 1;
183 return 0;
185 *s++ = c;
186 if (c == '\n') {
187 *s = '\0';
188 s = ln;
190 if (c == '\n' && !tok_part) {
191 printf("%s", ln);
192 if (tok_eq(ln) && !tok_en()) {
193 tok_eqen = 1;
194 return 0;
197 if (c == '\n' && tok_part) {
198 printf("\\*%s%s", escarg(EQNS), ln);
199 tok_part = 0;
202 return 1;
205 /* collect the output of this eqn block */
206 void tok_eqnout(char *s)
208 char post[128];
209 sprintf(post, "\\s[\\n[%s]]\\f[\\n[%s]]", EQNSZ, EQNFN);
210 if (!tok_part)
211 printf("%s%s\n", s, post);
212 else
213 printf(".as %s \"%s%s\n", EQNS, s, post);
216 /* return the length of a utf-8 character based on its first byte */
217 static int utf8len(int c)
219 if (c > 0 && c <= 0x7f)
220 return 1;
221 if (c >= 0xfc)
222 return 6;
223 if (c >= 0xf8)
224 return 5;
225 if (c >= 0xf0)
226 return 4;
227 if (c >= 0xe0)
228 return 3;
229 if (c >= 0xc0)
230 return 2;
231 return c != 0;
234 /* return the type of a token */
235 static int char_type(char *s)
237 int c = (unsigned char) s[0];
238 if (isdigit(c))
239 return T_NUMBER;
240 if (c == '"')
241 return T_STRING;
242 if (def_punc(s))
243 return T_PUNC;
244 if (def_binop(s))
245 return T_BINOP;
246 if (def_relop(s))
247 return T_RELOP;
248 if (def_left(s))
249 return T_LEFT;
250 if (def_right(s))
251 return T_RIGHT;
252 if (c == '~' || c == '^')
253 return T_GAP;
254 if (ispunct(c) && (c != '\\' || !s[1]))
255 return T_ORD;
256 return T_LETTER;
259 /* read the next token */
260 static int tok_read(void)
262 char *s = tok;
263 int c, c2;
264 int i;
265 *s = '\0';
266 c = tok_next();
267 if (c <= 0)
268 return 1;
269 tok_prevsep = tok_cursep;
270 tok_cursep = !!strchr(T_SEP, c);
271 if (c == ' ' || c == '\n') {
272 while (c > 0 && (c == ' ' || c == '\n'))
273 c = tok_next();
274 tok_back(c);
275 *s++ = ' ';
276 *s = '\0';
277 tok_curtype = T_SPACE;
278 return 0;
280 if (c == '\t') {
281 *s++ = '\t';
282 *s = '\0';
283 tok_curtype = T_TAB;
284 return 0;
286 if (tok_prevsep) {
287 if (c == '$') {
288 c2 = tok_next();
289 if (c2 >= '1' && c2 <= '9' && !in_arg(c2 - '0'))
290 return tok_read();
291 tok_back(c2);
293 tok_back(c);
294 if (!tok_keyword()) {
295 tok_curtype = T_KEYWORD;
296 tok_cursep = 1;
297 return 0;
299 if (!tok_expand()) {
300 tok_cursep = 1;
301 return tok_read();
303 c = tok_next();
305 if (strchr(T_SOFTSEP, c)) {
306 *s++ = c;
307 if (c == '\\') {
308 c = tok_next();
309 if (c == '(') {
310 *s++ = c;
311 *s++ = tok_next();
312 *s++ = tok_next();
313 } else if (c == '[') {
314 while (c && c != ']') {
315 *s++ = c;
316 c = tok_next();
318 *s++ = ']';
320 } else if (c == '"') {
321 c = tok_next();
322 while (c > 0 && c != '"') {
323 if (c == '\\') {
324 c2 = tok_next();
325 if (c2 == '"')
326 c = '"';
327 else
328 tok_back(c2);
330 *s++ = c;
331 c = tok_next();
333 *s++ = '"';
334 } else {
335 /* two-character operators */
336 c2 = tok_next();
337 switch (T_BIN(c, c2)) {
338 case T_BIN('<', '='):
339 case T_BIN('>', '='):
340 case T_BIN('=', '='):
341 case T_BIN('!', '='):
342 case T_BIN('~', '='):
343 case T_BIN('>', '>'):
344 case T_BIN('<', '<'):
345 case T_BIN(':', '='):
346 case T_BIN('-', '>'):
347 case T_BIN('<', '-'):
348 case T_BIN('-', '+'):
349 *s++ = c2;
350 break;
351 default:
352 tok_back(c2);
355 *s = '\0';
356 tok_curtype = char_type(tok);
357 return 0;
359 *s++ = c;
360 i = utf8len(c);
361 while (--i > 0)
362 *s++ = tok_next();
363 *s = '\0';
364 tok_curtype = char_type(tok);
365 return 0;
368 /* current token */
369 char *tok_get(void)
371 return tok[0] ? tok : NULL;
374 /* current token type */
375 int tok_type(void)
377 return tok[0] ? tok_curtype : 0;
380 /* return nonzero if current token is a separator */
381 int tok_sep(void)
383 return !tok_get() || strchr(T_SEP, (unsigned char) tok_get()[0]) ||
384 tok_curtype == T_KEYWORD;
387 /* read the next token, return the previous */
388 char *tok_pop(void)
390 strcpy(tok_prev, tok);
391 tok_read();
392 return tok_prev[0] ? tok_prev : NULL;
395 /* like tok_pop() but read the next T_SEP-separated token */
396 char *tok_poptext(void)
398 while (tok_type() == T_SPACE)
399 tok_read();
400 tok_prev[0] = '\0';
401 do {
402 strcat(tok_prev, tok);
403 tok_read();
404 } while (tok[0] && !tok_sep());
405 return tok_prev[0] ? tok_prev : NULL;
408 /* skip spaces */
409 static void tok_blanks(void)
411 while (tok_type() == T_SPACE)
412 tok_pop();
415 /* if the next token is s, return zero and skip it */
416 int tok_jmp(char *s)
418 tok_blanks();
419 if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) {
420 tok_pop();
421 return 0;
423 if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get()))
424 return 1;
425 tok_pop();
426 return 0;
429 /* read delim command */
430 void tok_delim(void)
432 char delim[NMLEN];
433 tok_preview(delim);
434 if (!strcmp("off", delim)) {
435 eqn_beg = 0;
436 eqn_end = 0;
437 } else {
438 eqn_beg = delim[0];
439 eqn_end = delim[1];
443 /* read macro definition */
444 static void tok_macrodef(struct sbuf *def)
446 int c;
447 int delim;
448 c = in_next();
449 while (c > 0 && isspace(c))
450 c = in_next();
451 delim = c;
452 c = in_next();
453 while (c > 0 && c != delim) {
454 sbuf_add(def, c);
455 c = in_next();
459 /* read the next macro command */
460 void tok_macro(void)
462 char name[NMLEN];
463 struct sbuf def;
464 tok_preview(name);
465 sbuf_init(&def);
466 tok_macrodef(&def);
467 in_define(name, sbuf_buf(&def));
468 sbuf_done(&def);