eqn: fix crash for unterminated pile and matrix col
[neateqn.git] / tok.c
blob2194ab687f8ceea6ddbfe8bcf76f2f689fd7a5d4
1 /* the preprocessor and tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "eqn.h"
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SOFTSEP ("^~{}(),\"\n\t =:|.+-*/\\,()[]<>!")
10 #define ESAVE "\\E*[.eqnbeg]\\R'" EQNFN "0 \\En(.f'\\R'" EQNSZ "0 \\En(.s'"
11 #define ELOAD "\\f[\\En[" EQNFN "0]]\\s[\\En[" EQNSZ "0]]\\E*[.eqnend]"
13 static char *kwds[] = {
14 "fwd", "down", "back", "up",
15 "bold", "italic", "roman", "font", "fat", "size",
16 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
17 "sub", "sup", "from", "to", "vcenter",
18 "left", "right", "over", "sqrt",
19 "pile", "lpile", "cpile", "rpile", "above",
20 "matrix", "col", "ccol", "lcol", "rcol",
21 "delim", "define",
22 "gfont", "grfont", "gbfont", "gsize", "set", "chartype",
23 "mark", "lineup", "bracketsizes", "bracketpieces", "breakcost",
26 static int tok_eqen; /* non-zero if inside .EQ/.EN */
27 static int tok_line; /* inside inline eqn block */
28 static int tok_part; /* partial line with inline eqn blocks */
29 static char tok[LNLEN]; /* current token */
30 static char tok_prev[LNLEN]; /* previous token */
31 static int tok_curtype; /* type of current token */
32 static int tok_cursep; /* current character is a separator */
33 static int tok_prevsep; /* previous character was a separator */
34 static int eqn_beg, eqn_end; /* inline eqn delimiters */
36 /* return zero if troff request .ab is read */
37 static int tok_req(int a, int b)
39 int eqln[LNLEN];
40 int i = 0;
41 int ret = 0;
42 eqln[i++] = src_next();
43 if (eqln[i - 1] != '.')
44 goto failed;
45 eqln[i++] = src_next();
46 while (eqln[i - 1] == ' ' && i < sizeof(eqln) - 4)
47 eqln[i++] = src_next();
48 if (eqln[i - 1] != a)
49 goto failed;
50 eqln[i++] = src_next();
51 if (eqln[i - 1] != b)
52 goto failed;
53 ret = 1;
54 failed:
55 while (i > 0)
56 src_back(eqln[--i]);
57 return ret;
60 /* read .EN */
61 static int tok_en(void)
63 return tok_req('E', 'N');
66 /* does the line start with eq */
67 static int tok_eq(char *s)
69 if (*s++ != '.')
70 return 0;
71 while (isspace((unsigned char) *s))
72 s++;
73 return s[0] == 'E' && s[1] == 'Q';
76 /* read an lf request */
77 static int tok_lf(char *s)
79 if (*s++ != '.')
80 return 0;
81 while (isspace((unsigned char) *s))
82 s++;
83 if (*s++ != 'l' || *s++ != 'f')
84 return 0;
85 while (isspace((unsigned char) *s))
86 s++;
87 if (isdigit((unsigned char) *s))
88 src_lineset(atoi(s));
89 return 1;
92 /* read the next input character */
93 static int tok_next(void)
95 int c;
96 if (!tok_eqen && !tok_line)
97 return 0;
98 c = src_next();
99 if (tok_eqen && c == '\n' && tok_en())
100 tok_eqen = 0;
101 if (tok_line && (src_top() && c == eqn_end)) {
102 tok_line = 0;
103 return 0;
105 return c;
108 /* push back the last character read */
109 static void tok_back(int c)
111 if (tok_eqen || tok_line)
112 src_back(c);
115 static int readchar(char *dst)
117 int c = src_next();
118 dst[0] = c;
119 if (c == '\\') {
120 c = src_next();
121 dst[1] = c;
122 if (c == '(') {
123 dst[2] = src_next();
124 dst[3] = src_next();
125 return 4;
127 if (c == '[') {
128 int n = 2;
129 while (c > 0 && c != ']') {
130 c = src_next();
131 dst[n++] = c;
133 return n;
135 return 2;
137 return c > 0;
140 /* read the next word; if opstop, stop at open parenthesis */
141 static void tok_preview(char *s, int opstop)
143 int c = src_next();
144 int n = 0;
145 if (c > 0 && def_chopped(c)) {
146 src_back(c);
147 n = readchar(s);
148 } else {
149 while (c > 0 && (!def_chopped(c) && (!opstop || c != '(')) &&
150 (!tok_line || (!src_top() || c != eqn_end))) {
151 src_back(c);
152 n += readchar(s + n);
153 c = src_next();
155 src_back(c);
157 s[n] = '\0';
160 /* push back the given word */
161 static void tok_unpreview(char *s)
163 int n = strlen(s);
164 while (n > 0)
165 src_back((unsigned char) s[--n]);
168 /* read a keyword; return zero on success */
169 static int tok_keyword(void)
171 int i;
172 tok_preview(tok, 0);
173 for (i = 0; i < LEN(kwds); i++)
174 if (!strcmp(kwds[i], tok))
175 return 0;
176 tok_unpreview(tok);
177 return 1;
180 /* read the next argument of a macro call; return zero if read a ',' */
181 static int tok_readarg(struct sbuf *sbuf)
183 int c = src_next();
184 int pdepth = 0; /* number of nested parenthesis */
185 int quotes = 0; /* inside double quotes */
186 while (c > 0 && (pdepth || quotes || (c != ',' && c != ')'))) {
187 sbuf_add(sbuf, c);
188 if (!quotes && c == ')')
189 pdepth++;
190 if (!quotes && c == '(')
191 pdepth--;
192 if (c == '"')
193 quotes = 1 - quotes;
194 if (c == '\\') {
195 sbuf_add(sbuf, c = src_next());
196 if (c == '*' || c == 'n')
197 sbuf_add(sbuf, c = src_next());
198 if (c == '(') {
199 sbuf_add(sbuf, c = src_next());
200 sbuf_add(sbuf, c = src_next());
201 } else if (c == '[') {
202 while (c > 0 && c != ']')
203 sbuf_add(sbuf, c = src_next());
206 c = src_next();
208 return c == ',' ? 0 : 1;
211 /* expand a macro; return zero on success */
212 static int tok_expand(void)
214 char *args[10] = {NULL};
215 struct sbuf sbufs[10];
216 int i, n = 0;
217 tok_preview(tok, 1);
218 if (src_macro(tok)) {
219 int c = src_next();
220 src_back(c);
221 if (c == '(') { /* macro arguments follow */
222 src_next();
223 while (n <= 9) {
224 sbuf_init(&sbufs[n]);
225 if (tok_readarg(&sbufs[n++]))
226 break;
229 for (i = 0; i < n; i++)
230 args[i] = sbuf_buf(&sbufs[i]);
231 src_expand(tok, args);
232 for (i = 0; i < n; i++)
233 sbuf_done(&sbufs[i]);
234 return 0;
236 tok_unpreview(tok);
237 return 1;
240 /* read until .EQ or eqn_beg */
241 int tok_eqn(void)
243 struct sbuf ln;
244 int c;
245 tok_cursep = 1;
246 sbuf_init(&ln);
247 while ((c = src_next()) > 0) {
248 if (c == eqn_beg) {
249 printf(".eo\n");
250 printf(".%s %s \"%s\n",
251 tok_part ? "as" : "ds", EQNS, sbuf_buf(&ln));
252 sbuf_done(&ln);
253 printf(".ec\n");
254 tok_part = 1;
255 tok_line = 1;
256 return 0;
258 sbuf_add(&ln, c);
259 if (c == '\n' && !tok_part) {
260 printf("%s", sbuf_buf(&ln));
261 tok_lf(sbuf_buf(&ln));
262 if (tok_eq(sbuf_buf(&ln)) && !tok_en()) {
263 tok_eqen = 1;
264 sbuf_done(&ln);
265 return 0;
268 if (c == '\n' && tok_part) {
269 printf(".lf %d\n", src_lineget());
270 printf("\\*%s%s", escarg(EQNS), sbuf_buf(&ln));
271 tok_part = 0;
273 if (c == '\n')
274 sbuf_cut(&ln, 0);
276 sbuf_done(&ln);
277 return 1;
280 /* collect the output of this eqn block */
281 void tok_eqnout(char *s)
283 if (!tok_part) {
284 printf(".ds %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
285 printf(".lf %d\n", src_lineget() - 1);
286 printf("\\&\\*%s\n", escarg(EQNS));
287 } else {
288 printf(".as %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
292 /* return the length of a utf-8 character based on its first byte */
293 static int utf8len(int c)
295 if (~c & 0x80)
296 return c > 0;
297 if (~c & 0x40)
298 return 1;
299 if (~c & 0x20)
300 return 2;
301 if (~c & 0x10)
302 return 3;
303 if (~c & 0x08)
304 return 4;
305 return 1;
308 /* return the type of a token */
309 static int char_type(char *s)
311 int c = (unsigned char) s[0];
312 int t;
313 if (isdigit(c))
314 return T_NUMBER;
315 if (c == '"')
316 return T_STRING;
317 if ((t = def_type(s)) >= 0)
318 return t;
319 if (c == '~' || c == '^')
320 return T_GAP;
321 if (ispunct(c) && (c != '\\' || !s[1]))
322 return T_ORD;
323 return T_LETTER;
326 /* read the next token */
327 static int tok_read(void)
329 char *s = tok;
330 char *e = tok + sizeof(tok) - 2;
331 int c, c2;
332 int i;
333 *s = '\0';
334 c = tok_next();
335 if (c <= 0)
336 return 1;
337 tok_prevsep = tok_cursep;
338 tok_cursep = def_chopped(c);
339 if (tok_cursep)
340 tok_prevsep = 1;
341 if (c == ' ' || c == '\n') {
342 while (c > 0 && (c == ' ' || c == '\n'))
343 c = tok_next();
344 tok_back(c);
345 *s++ = ' ';
346 *s = '\0';
347 tok_curtype = T_SPACE;
348 return 0;
350 if (c == '\t') {
351 *s++ = '\t';
352 *s = '\0';
353 tok_curtype = T_TAB;
354 return 0;
356 if (tok_prevsep) {
357 if (c == '$') {
358 c2 = tok_next();
359 if (c2 >= '1' && c2 <= '9' && !src_arg(c2 - '0')) {
360 tok_cursep = 1;
361 return tok_read();
363 tok_back(c2);
365 tok_back(c);
366 if (!tok_keyword()) {
367 tok_curtype = T_KEYWORD;
368 tok_cursep = 1;
369 return 0;
371 if (!tok_expand()) {
372 tok_cursep = 1;
373 return tok_read();
375 c = tok_next();
377 if (strchr(T_SOFTSEP, c)) {
378 *s++ = c;
379 if (c == '\\') {
380 c = tok_next();
381 if (c == '(') {
382 *s++ = c;
383 *s++ = tok_next();
384 *s++ = tok_next();
385 } else if (c == '[') {
386 while (c && c != ']') {
387 if (s < e)
388 *s++ = c;
389 c = tok_next();
391 *s++ = ']';
393 } else if (c == '"') {
394 c = tok_next();
395 while (c > 0 && c != '"') {
396 if (c == '\\') {
397 c2 = tok_next();
398 if (c2 == '"')
399 c = '"';
400 else
401 tok_back(c2);
403 if (s < e)
404 *s++ = c;
405 c = tok_next();
407 *s++ = '"';
408 } else {
409 /* two-character operators */
410 c2 = tok_next();
411 switch (T_BIN(c, c2)) {
412 case T_BIN('<', '='):
413 case T_BIN('>', '='):
414 case T_BIN('=', '='):
415 case T_BIN('!', '='):
416 case T_BIN('>', '>'):
417 case T_BIN('<', '<'):
418 case T_BIN(':', '='):
419 case T_BIN('-', '>'):
420 case T_BIN('<', '-'):
421 case T_BIN('-', '+'):
422 *s++ = c2;
423 break;
424 default:
425 tok_back(c2);
428 *s = '\0';
429 tok_curtype = char_type(tok);
430 return 0;
432 *s++ = c;
433 i = utf8len(c);
434 while (--i > 0 && s < e)
435 *s++ = tok_next();
436 *s = '\0';
437 tok_curtype = char_type(tok);
438 return 0;
441 /* current token */
442 char *tok_get(void)
444 return tok[0] ? tok : NULL;
447 /* current token type */
448 int tok_type(void)
450 return tok[0] ? tok_curtype : 0;
453 /* return nonzero if current token chops the equation */
454 int tok_chops(int soft)
456 if (!tok_get() || tok_curtype == T_KEYWORD)
457 return 1;
458 if (soft)
459 return strchr(T_SOFTSEP, (unsigned char) tok_get()[0]) != NULL;
460 return def_chopped((unsigned char) tok_get()[0]);
463 /* read the next token, return the previous */
464 char *tok_pop(void)
466 strcpy(tok_prev, tok);
467 tok_read();
468 return tok_prev[0] ? tok_prev : NULL;
471 /* like tok_pop() but ignore T_SPACE tokens; if sep, read until chopped */
472 char *tok_poptext(int sep)
474 while (tok_type() == T_SPACE)
475 tok_read();
476 tok_prev[0] = '\0';
477 do {
478 strcat(tok_prev, tok);
479 tok_read();
480 } while (tok[0] && !tok_chops(!sep));
481 return tok_prev[0] ? tok_prev : NULL;
484 /* skip spaces */
485 static void tok_blanks(void)
487 while (tok_type() == T_SPACE)
488 tok_pop();
491 /* if the next token is s, return zero and skip it */
492 int tok_jmp(char *s)
494 tok_blanks();
495 if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) {
496 tok_pop();
497 return 0;
499 if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get()))
500 return 1;
501 tok_pop();
502 return 0;
505 /* read delim command */
506 void tok_delim(void)
508 char delim[NMLEN];
509 tok_preview(delim, 0);
510 if (!strcmp("off", delim)) {
511 eqn_beg = 0;
512 eqn_end = 0;
513 } else {
514 eqn_beg = delim[0];
515 eqn_end = delim[1];
519 /* read macro definition */
520 static void tok_macrodef(struct sbuf *def)
522 int c;
523 int delim;
524 c = src_next();
525 while (c > 0 && isspace(c))
526 c = src_next();
527 delim = c;
528 c = src_next();
529 while (c > 0 && c != delim) {
530 sbuf_add(def, c);
531 c = src_next();
535 /* read the next macro command */
536 void tok_macro(void)
538 char name[NMLEN];
539 struct sbuf def;
540 tok_preview(name, 0);
541 sbuf_init(&def);
542 tok_macrodef(&def);
543 src_define(name, sbuf_buf(&def));
544 sbuf_done(&def);
547 /* return 1 if inside inline equations */
548 int tok_inline(void)
550 return tok_line;