box: breaking equations after binary and relational operators
[neateqn.git] / tok.c
blobcee408ec581910c8311a08579b6d1c5f0e9e7970
1 /* the preprocessor and tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "eqn.h"
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SEP "^~{}\"\n\t "
10 #define T_SOFTSEP (T_SEP "=:|.+-*/\\,()[]<>!")
11 #define ESAVE "\\R'" EQNFN "0 \\En(.f'\\R'" EQNSZ "0 \\En(.s'"
12 #define ELOAD "\\f[\\En[" EQNFN "0]]\\s[\\En[" EQNSZ "0]]"
14 static char *kwds[] = {
15 "fwd", "down", "back", "up",
16 "bold", "italic", "roman", "font", "fat", "size",
17 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
18 "sub", "sup", "from", "to", "vcenter",
19 "left", "right", "over", "sqrt",
20 "pile", "lpile", "cpile", "rpile", "above",
21 "matrix", "col", "ccol", "lcol", "rcol",
22 "delim", "define",
23 "gfont", "grfont", "gbfont", "gsize", "set", "chartype",
24 "mark", "lineup", "bracketsizes", "bracketpieces",
27 static int tok_eqen; /* non-zero if inside .EQ/.EN */
28 static int tok_line; /* inside inline eqn block */
29 static int tok_part; /* partial line with inline eqn blocks */
30 static char tok[LNLEN]; /* current token */
31 static char tok_prev[LNLEN]; /* previous token */
32 static int tok_curtype; /* type of current token */
33 static int tok_cursep; /* current character is a separator */
34 static int tok_prevsep; /* previous character was a separator */
35 static int eqn_beg, eqn_end; /* inline eqn delimiters */
37 /* return zero if troff request .ab is read */
38 static int tok_req(int a, int b)
40 int eqln[LNLEN];
41 int i = 0;
42 int ret = 0;
43 eqln[i++] = in_next();
44 if (eqln[i - 1] != '.')
45 goto failed;
46 eqln[i++] = in_next();
47 while (eqln[i - 1] == ' ')
48 eqln[i++] = in_next();
49 if (eqln[i - 1] != a)
50 goto failed;
51 eqln[i++] = in_next();
52 if (eqln[i - 1] != b)
53 goto failed;
54 ret = 1;
55 failed:
56 while (i > 0)
57 in_back(eqln[--i]);
58 return ret;
61 /* read .EN */
62 static int tok_en(void)
64 return tok_req('E', 'N');
67 /* does the line start with eq */
68 static int tok_eq(char *s)
70 if (*s++ != '.')
71 return 0;
72 while (isspace((unsigned char) *s))
73 s++;
74 return s[0] == 'E' && s[1] == 'Q';
77 /* read an lf request */
78 static int tok_lf(char *s)
80 if (*s++ != '.')
81 return 0;
82 while (isspace((unsigned char) *s))
83 s++;
84 if (*s++ != 'l' || *s++ != 'f')
85 return 0;
86 while (isspace((unsigned char) *s))
87 s++;
88 if (isdigit((unsigned char) *s))
89 in_lineset(atoi(s));
90 return 1;
93 /* read the next input character */
94 static int tok_next(void)
96 int c;
97 if (!tok_eqen && !tok_line)
98 return 0;
99 c = in_next();
100 if (tok_eqen && c == '\n' && tok_en())
101 tok_eqen = 0;
102 if (tok_line && (in_top() && c == eqn_end)) {
103 tok_line = 0;
104 return 0;
106 return c;
109 /* push back the last character read */
110 static void tok_back(int c)
112 if (tok_eqen || tok_line)
113 in_back(c);
116 /* read the next word */
117 static void tok_preview(char *s)
119 int c = in_next();
120 int n = 0;
121 while (c > 0 && !strchr(T_SEP, c) &&
122 (!tok_line || (!in_top() || c != eqn_end))) {
123 s[n++] = c;
124 c = in_next();
126 s[n] = '\0';
127 in_back(c);
130 /* push back the given word */
131 static void tok_unpreview(char *s)
133 int n = strlen(s);
134 while (n > 0)
135 in_back((unsigned char) s[--n]);
138 /* read a keyword; return zero on success */
139 static int tok_keyword(void)
141 int i;
142 tok_preview(tok);
143 for (i = 0; i < LEN(kwds); i++)
144 if (!strcmp(kwds[i], tok))
145 return 0;
146 tok_unpreview(tok);
147 return 1;
150 /* read the next argument of a macro call; return zero if read a ',' */
151 static int tok_readarg(struct sbuf *sbuf)
153 int c = in_next();
154 int pdepth = 0; /* number of nested parenthesis */
155 while (c > 0 && (pdepth || (c != ',' && c != ')'))) {
156 sbuf_add(sbuf, c);
157 if (c == ')')
158 pdepth++;
159 if (c == '(')
160 pdepth--;
161 c = in_next();
163 return c == ',' ? 0 : 1;
166 /* expand a macro; return zero on success */
167 static int tok_expand(void)
169 char *args[10] = {NULL};
170 struct sbuf sbufs[10];
171 int i, n = 0;
172 int pbeg;
173 tok_preview(tok);
174 if (!in_expand(tok, NULL))
175 return 0;
176 pbeg = in_macrocall(tok);
177 if (pbeg) {
178 tok_unpreview(tok + pbeg + 1);
179 tok[pbeg] = '\0';
180 while (n <= 9) {
181 sbuf_init(&sbufs[n]);
182 if (tok_readarg(&sbufs[n++]))
183 break;
185 for (i = 0; i < n; i++)
186 args[i] = sbuf_buf(&sbufs[i]);
187 in_expand(tok, args);
188 for (i = 0; i < n; i++)
189 sbuf_done(&sbufs[i]);
190 return 0;
192 tok_unpreview(tok);
193 return 1;
196 /* read until .EQ or eqn_beg */
197 int tok_eqn(void)
199 char ln[LNLEN];
200 char *s = ln;
201 int c;
202 tok_cursep = 1;
203 while ((c = in_next()) > 0) {
204 if (c == eqn_beg) {
205 *s = '\0';
206 printf(".%s %s \"%s\n",
207 tok_part ? "as" : "ds", EQNS, ln);
208 tok_part = 1;
209 tok_line = 1;
210 return 0;
212 *s++ = c;
213 if (c == '\n') {
214 *s = '\0';
215 s = ln;
217 if (c == '\n' && !tok_part) {
218 printf("%s", ln);
219 tok_lf(ln);
220 if (tok_eq(ln) && !tok_en()) {
221 tok_eqen = 1;
222 return 0;
225 if (c == '\n' && tok_part) {
226 printf(".lf %d\n", in_lineget());
227 printf("\\*%s%s", escarg(EQNS), ln);
228 tok_part = 0;
231 return 1;
234 /* collect the output of this eqn block */
235 void tok_eqnout(char *s)
237 if (!tok_part) {
238 printf(".ds %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
239 printf(".lf %d\n", in_lineget() - 1);
240 printf("\\&\\*%s\n", escarg(EQNS));
241 } else {
242 printf(".as %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
246 /* return the length of a utf-8 character based on its first byte */
247 static int utf8len(int c)
249 if (c > 0 && c <= 0x7f)
250 return 1;
251 if (c >= 0xfc)
252 return 6;
253 if (c >= 0xf8)
254 return 5;
255 if (c >= 0xf0)
256 return 4;
257 if (c >= 0xe0)
258 return 3;
259 if (c >= 0xc0)
260 return 2;
261 return c != 0;
264 /* return the type of a token */
265 static int char_type(char *s)
267 int c = (unsigned char) s[0];
268 int t;
269 if (isdigit(c))
270 return T_NUMBER;
271 if (c == '"')
272 return T_STRING;
273 if ((t = def_type(s)) >= 0)
274 return t;
275 if (c == '~' || c == '^')
276 return T_GAP;
277 if (ispunct(c) && (c != '\\' || !s[1]))
278 return T_ORD;
279 return T_LETTER;
282 /* read the next token */
283 static int tok_read(void)
285 char *s = tok;
286 int c, c2;
287 int i;
288 *s = '\0';
289 c = tok_next();
290 if (c <= 0)
291 return 1;
292 tok_prevsep = tok_cursep;
293 tok_cursep = !!strchr(T_SEP, c);
294 if (c == ' ' || c == '\n') {
295 while (c > 0 && (c == ' ' || c == '\n'))
296 c = tok_next();
297 tok_back(c);
298 *s++ = ' ';
299 *s = '\0';
300 tok_curtype = T_SPACE;
301 return 0;
303 if (c == '\t') {
304 *s++ = '\t';
305 *s = '\0';
306 tok_curtype = T_TAB;
307 return 0;
309 if (tok_prevsep) {
310 if (c == '$') {
311 c2 = tok_next();
312 if (c2 >= '1' && c2 <= '9' && !in_arg(c2 - '0'))
313 return tok_read();
314 tok_back(c2);
316 tok_back(c);
317 if (!tok_keyword()) {
318 tok_curtype = T_KEYWORD;
319 tok_cursep = 1;
320 return 0;
322 if (!tok_expand()) {
323 tok_cursep = 1;
324 return tok_read();
326 c = tok_next();
328 if (strchr(T_SOFTSEP, c)) {
329 *s++ = c;
330 if (c == '\\') {
331 c = tok_next();
332 if (c == '(') {
333 *s++ = c;
334 *s++ = tok_next();
335 *s++ = tok_next();
336 } else if (c == '[') {
337 while (c && c != ']') {
338 *s++ = c;
339 c = tok_next();
341 *s++ = ']';
343 } else if (c == '"') {
344 c = tok_next();
345 while (c > 0 && c != '"') {
346 if (c == '\\') {
347 c2 = tok_next();
348 if (c2 == '"')
349 c = '"';
350 else
351 tok_back(c2);
353 *s++ = c;
354 c = tok_next();
356 *s++ = '"';
357 } else {
358 /* two-character operators */
359 c2 = tok_next();
360 switch (T_BIN(c, c2)) {
361 case T_BIN('<', '='):
362 case T_BIN('>', '='):
363 case T_BIN('=', '='):
364 case T_BIN('!', '='):
365 case T_BIN('>', '>'):
366 case T_BIN('<', '<'):
367 case T_BIN(':', '='):
368 case T_BIN('-', '>'):
369 case T_BIN('<', '-'):
370 case T_BIN('-', '+'):
371 *s++ = c2;
372 break;
373 default:
374 tok_back(c2);
377 *s = '\0';
378 tok_curtype = char_type(tok);
379 return 0;
381 *s++ = c;
382 i = utf8len(c);
383 while (--i > 0)
384 *s++ = tok_next();
385 *s = '\0';
386 tok_curtype = char_type(tok);
387 return 0;
390 /* current token */
391 char *tok_get(void)
393 return tok[0] ? tok : NULL;
396 /* current token type */
397 int tok_type(void)
399 return tok[0] ? tok_curtype : 0;
402 /* return nonzero if current token is a separator */
403 int tok_sep(int soft)
405 return !tok_get() || tok_curtype == T_KEYWORD ||
406 strchr(soft ? T_SOFTSEP : T_SEP, (unsigned char) tok_get()[0]);
409 /* read the next token, return the previous */
410 char *tok_pop(void)
412 strcpy(tok_prev, tok);
413 tok_read();
414 return tok_prev[0] ? tok_prev : NULL;
417 /* like tok_pop() but ignore T_SPACE tokens; if sep, read until T_SEP */
418 char *tok_poptext(int sep)
420 while (tok_type() == T_SPACE)
421 tok_read();
422 tok_prev[0] = '\0';
423 do {
424 strcat(tok_prev, tok);
425 tok_read();
426 } while (tok[0] && !tok_sep(!sep));
427 return tok_prev[0] ? tok_prev : NULL;
430 /* skip spaces */
431 static void tok_blanks(void)
433 while (tok_type() == T_SPACE)
434 tok_pop();
437 /* if the next token is s, return zero and skip it */
438 int tok_jmp(char *s)
440 tok_blanks();
441 if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) {
442 tok_pop();
443 return 0;
445 if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get()))
446 return 1;
447 tok_pop();
448 return 0;
451 /* read delim command */
452 void tok_delim(void)
454 char delim[NMLEN];
455 tok_preview(delim);
456 if (!strcmp("off", delim)) {
457 eqn_beg = 0;
458 eqn_end = 0;
459 } else {
460 eqn_beg = delim[0];
461 eqn_end = delim[1];
465 /* read macro definition */
466 static void tok_macrodef(struct sbuf *def)
468 int c;
469 int delim;
470 c = in_next();
471 while (c > 0 && isspace(c))
472 c = in_next();
473 delim = c;
474 c = in_next();
475 while (c > 0 && c != delim) {
476 sbuf_add(def, c);
477 c = in_next();
481 /* read the next macro command */
482 void tok_macro(void)
484 char name[NMLEN];
485 struct sbuf def;
486 tok_preview(name);
487 sbuf_init(&def);
488 tok_macrodef(&def);
489 in_define(name, sbuf_buf(&def));
490 sbuf_done(&def);
493 /* return 1 if inside inline equations */
494 int tok_inline(void)
496 return tok_line;