tok: allow parting characters to be redefined
[neateqn.git] / tok.c
blob319293fa2a1f057ea6b91ec89ef79906d42f6a41
1 /* the preprocessor and tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "eqn.h"
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SOFTSEP ("^~{}(),\"\n\t =:|.+-*/\\,()[]<>!")
10 #define ESAVE "\\E*[.eqnbeg]\\R'" EQNFN "0 \\En(.f'\\R'" EQNSZ "0 \\En(.s'"
11 #define ELOAD "\\f[\\En[" EQNFN "0]]\\s[\\En[" EQNSZ "0]]\\E*[.eqnend]"
13 static char *kwds[] = {
14 "fwd", "down", "back", "up",
15 "bold", "italic", "roman", "font", "fat", "size",
16 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
17 "sub", "sup", "from", "to", "vcenter",
18 "left", "right", "over", "sqrt",
19 "pile", "lpile", "cpile", "rpile", "above",
20 "matrix", "col", "ccol", "lcol", "rcol",
21 "delim", "define",
22 "gfont", "grfont", "gbfont", "gsize", "set", "chartype",
23 "mark", "lineup", "bracketsizes", "bracketpieces", "breakcost",
26 static int tok_eqen; /* non-zero if inside .EQ/.EN */
27 static int tok_line; /* inside inline eqn block */
28 static int tok_part; /* partial line with inline eqn blocks */
29 static char tok[LNLEN]; /* current token */
30 static char tok_prev[LNLEN]; /* previous token */
31 static int tok_curtype; /* type of current token */
32 static int tok_cursep; /* current character is a separator */
33 static int tok_prevsep; /* previous character was a separator */
34 static int eqn_beg, eqn_end; /* inline eqn delimiters */
36 /* return zero if troff request .ab is read */
37 static int tok_req(int a, int b)
39 int eqln[LNLEN];
40 int i = 0;
41 int ret = 0;
42 eqln[i++] = src_next();
43 if (eqln[i - 1] != '.')
44 goto failed;
45 eqln[i++] = src_next();
46 while (eqln[i - 1] == ' ' && i < sizeof(eqln) - 4)
47 eqln[i++] = src_next();
48 if (eqln[i - 1] != a)
49 goto failed;
50 eqln[i++] = src_next();
51 if (eqln[i - 1] != b)
52 goto failed;
53 ret = 1;
54 failed:
55 while (i > 0)
56 src_back(eqln[--i]);
57 return ret;
60 /* read .EN */
61 static int tok_en(void)
63 return tok_req('E', 'N');
66 /* does the line start with eq */
67 static int tok_eq(char *s)
69 if (*s++ != '.')
70 return 0;
71 while (isspace((unsigned char) *s))
72 s++;
73 return s[0] == 'E' && s[1] == 'Q';
76 /* read an lf request */
77 static int tok_lf(char *s)
79 if (*s++ != '.')
80 return 0;
81 while (isspace((unsigned char) *s))
82 s++;
83 if (*s++ != 'l' || *s++ != 'f')
84 return 0;
85 while (isspace((unsigned char) *s))
86 s++;
87 if (isdigit((unsigned char) *s))
88 src_lineset(atoi(s));
89 return 1;
92 /* read the next input character */
93 static int tok_next(void)
95 int c;
96 if (!tok_eqen && !tok_line)
97 return 0;
98 c = src_next();
99 if (tok_eqen && c == '\n' && tok_en())
100 tok_eqen = 0;
101 if (tok_line && (src_top() && c == eqn_end)) {
102 tok_line = 0;
103 return 0;
105 return c;
108 /* push back the last character read */
109 static void tok_back(int c)
111 if (tok_eqen || tok_line)
112 src_back(c);
115 /* read the next word */
116 static void tok_preview(char *s)
118 int c = src_next();
119 int n = 0;
120 if (c > 0 && def_chopped(c)) {
121 s[n++] = c;
122 s[n] = '\0';
123 return;
125 while (c > 0 && !def_chopped(c) && (!tok_line || (!src_top() || c != eqn_end))) {
126 s[n++] = c;
127 c = src_next();
129 s[n] = '\0';
130 src_back(c);
133 /* push back the given word */
134 static void tok_unpreview(char *s)
136 int n = strlen(s);
137 while (n > 0)
138 src_back((unsigned char) s[--n]);
141 /* read a keyword; return zero on success */
142 static int tok_keyword(void)
144 int i;
145 tok_preview(tok);
146 for (i = 0; i < LEN(kwds); i++)
147 if (!strcmp(kwds[i], tok))
148 return 0;
149 tok_unpreview(tok);
150 return 1;
153 /* read the next argument of a macro call; return zero if read a ',' */
154 static int tok_readarg(struct sbuf *sbuf)
156 int c = src_next();
157 int pdepth = 0; /* number of nested parenthesis */
158 int quotes = 0; /* inside double quotes */
159 while (c > 0 && (pdepth || quotes || (c != ',' && c != ')'))) {
160 sbuf_add(sbuf, c);
161 if (!quotes && c == ')')
162 pdepth++;
163 if (!quotes && c == '(')
164 pdepth--;
165 if (c == '"')
166 quotes = 1 - quotes;
167 if (c == '\\') {
168 sbuf_add(sbuf, c = src_next());
169 if (c == '*' || c == 'n')
170 sbuf_add(sbuf, c = src_next());
171 if (c == '(') {
172 sbuf_add(sbuf, c = src_next());
173 sbuf_add(sbuf, c = src_next());
174 } else if (c == '[') {
175 while (c > 0 && c != ']')
176 sbuf_add(sbuf, c = src_next());
179 c = src_next();
181 return c == ',' ? 0 : 1;
184 /* expand a macro; return zero on success */
185 static int tok_expand(void)
187 char *args[10] = {NULL};
188 struct sbuf sbufs[10];
189 int i, n = 0;
190 tok_preview(tok);
191 if (src_macro(tok)) {
192 int c = src_next();
193 src_back(c);
194 if (c == '(') { /* macro arguments follow */
195 src_next();
196 while (n <= 9) {
197 sbuf_init(&sbufs[n]);
198 if (tok_readarg(&sbufs[n++]))
199 break;
202 for (i = 0; i < n; i++)
203 args[i] = sbuf_buf(&sbufs[i]);
204 src_expand(tok, args);
205 for (i = 0; i < n; i++)
206 sbuf_done(&sbufs[i]);
207 return 0;
209 tok_unpreview(tok);
210 return 1;
213 /* read until .EQ or eqn_beg */
214 int tok_eqn(void)
216 struct sbuf ln;
217 int c;
218 tok_cursep = 1;
219 sbuf_init(&ln);
220 while ((c = src_next()) > 0) {
221 if (c == eqn_beg) {
222 printf(".eo\n");
223 printf(".%s %s \"%s\n",
224 tok_part ? "as" : "ds", EQNS, sbuf_buf(&ln));
225 sbuf_done(&ln);
226 printf(".ec\n");
227 tok_part = 1;
228 tok_line = 1;
229 return 0;
231 sbuf_add(&ln, c);
232 if (c == '\n' && !tok_part) {
233 printf("%s", sbuf_buf(&ln));
234 tok_lf(sbuf_buf(&ln));
235 if (tok_eq(sbuf_buf(&ln)) && !tok_en()) {
236 tok_eqen = 1;
237 sbuf_done(&ln);
238 return 0;
241 if (c == '\n' && tok_part) {
242 printf(".lf %d\n", src_lineget());
243 printf("\\*%s%s", escarg(EQNS), sbuf_buf(&ln));
244 tok_part = 0;
246 if (c == '\n')
247 sbuf_cut(&ln, 0);
249 sbuf_done(&ln);
250 return 1;
253 /* collect the output of this eqn block */
254 void tok_eqnout(char *s)
256 if (!tok_part) {
257 printf(".ds %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
258 printf(".lf %d\n", src_lineget() - 1);
259 printf("\\&\\*%s\n", escarg(EQNS));
260 } else {
261 printf(".as %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
265 /* return the length of a utf-8 character based on its first byte */
266 static int utf8len(int c)
268 if (c > 0 && c <= 0x7f)
269 return 1;
270 if (c >= 0xfc)
271 return 6;
272 if (c >= 0xf8)
273 return 5;
274 if (c >= 0xf0)
275 return 4;
276 if (c >= 0xe0)
277 return 3;
278 if (c >= 0xc0)
279 return 2;
280 return c != 0;
283 /* return the type of a token */
284 static int char_type(char *s)
286 int c = (unsigned char) s[0];
287 int t;
288 if (isdigit(c))
289 return T_NUMBER;
290 if (c == '"')
291 return T_STRING;
292 if ((t = def_type(s)) >= 0)
293 return t;
294 if (c == '~' || c == '^')
295 return T_GAP;
296 if (ispunct(c) && (c != '\\' || !s[1]))
297 return T_ORD;
298 return T_LETTER;
301 /* read the next token */
302 static int tok_read(void)
304 char *s = tok;
305 char *e = tok + sizeof(tok) - 2;
306 int c, c2;
307 int i;
308 *s = '\0';
309 c = tok_next();
310 if (c <= 0)
311 return 1;
312 tok_prevsep = tok_cursep;
313 tok_cursep = def_chopped(c);
314 if (tok_cursep)
315 tok_prevsep = 1;
316 if (c == ' ' || c == '\n') {
317 while (c > 0 && (c == ' ' || c == '\n'))
318 c = tok_next();
319 tok_back(c);
320 *s++ = ' ';
321 *s = '\0';
322 tok_curtype = T_SPACE;
323 return 0;
325 if (c == '\t') {
326 *s++ = '\t';
327 *s = '\0';
328 tok_curtype = T_TAB;
329 return 0;
331 if (tok_prevsep) {
332 if (c == '$') {
333 c2 = tok_next();
334 if (c2 >= '1' && c2 <= '9' && !src_arg(c2 - '0')) {
335 tok_cursep = 1;
336 return tok_read();
338 tok_back(c2);
340 tok_back(c);
341 if (!tok_keyword()) {
342 tok_curtype = T_KEYWORD;
343 tok_cursep = 1;
344 return 0;
346 if (!tok_expand()) {
347 tok_cursep = 1;
348 return tok_read();
350 c = tok_next();
352 if (strchr(T_SOFTSEP, c)) {
353 *s++ = c;
354 if (c == '\\') {
355 c = tok_next();
356 if (c == '(') {
357 *s++ = c;
358 *s++ = tok_next();
359 *s++ = tok_next();
360 } else if (c == '[') {
361 while (c && c != ']') {
362 if (s < e)
363 *s++ = c;
364 c = tok_next();
366 *s++ = ']';
368 } else if (c == '"') {
369 c = tok_next();
370 while (c > 0 && c != '"') {
371 if (c == '\\') {
372 c2 = tok_next();
373 if (c2 == '"')
374 c = '"';
375 else
376 tok_back(c2);
378 if (s < e)
379 *s++ = c;
380 c = tok_next();
382 *s++ = '"';
383 } else {
384 /* two-character operators */
385 c2 = tok_next();
386 switch (T_BIN(c, c2)) {
387 case T_BIN('<', '='):
388 case T_BIN('>', '='):
389 case T_BIN('=', '='):
390 case T_BIN('!', '='):
391 case T_BIN('>', '>'):
392 case T_BIN('<', '<'):
393 case T_BIN(':', '='):
394 case T_BIN('-', '>'):
395 case T_BIN('<', '-'):
396 case T_BIN('-', '+'):
397 *s++ = c2;
398 break;
399 default:
400 tok_back(c2);
403 *s = '\0';
404 tok_curtype = char_type(tok);
405 return 0;
407 *s++ = c;
408 i = utf8len(c);
409 while (--i > 0 && s < e)
410 *s++ = tok_next();
411 *s = '\0';
412 tok_curtype = char_type(tok);
413 return 0;
416 /* current token */
417 char *tok_get(void)
419 return tok[0] ? tok : NULL;
422 /* current token type */
423 int tok_type(void)
425 return tok[0] ? tok_curtype : 0;
428 /* return nonzero if current token is a chops the equation */
429 int tok_chops(int soft)
431 if (!tok_get() || tok_curtype == T_KEYWORD)
432 return 1;
433 if (soft)
434 return strchr(T_SOFTSEP, (unsigned char) tok_get()[0]) != NULL ;
435 return def_chopped((unsigned char) tok_get()[0]);
438 /* read the next token, return the previous */
439 char *tok_pop(void)
441 strcpy(tok_prev, tok);
442 tok_read();
443 return tok_prev[0] ? tok_prev : NULL;
446 /* like tok_pop() but ignore T_SPACE tokens; if sep, read until chopped */
447 char *tok_poptext(int sep)
449 while (tok_type() == T_SPACE)
450 tok_read();
451 tok_prev[0] = '\0';
452 do {
453 strcat(tok_prev, tok);
454 tok_read();
455 } while (tok[0] && !tok_chops(!sep));
456 return tok_prev[0] ? tok_prev : NULL;
459 /* skip spaces */
460 static void tok_blanks(void)
462 while (tok_type() == T_SPACE)
463 tok_pop();
466 /* if the next token is s, return zero and skip it */
467 int tok_jmp(char *s)
469 tok_blanks();
470 if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) {
471 tok_pop();
472 return 0;
474 if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get()))
475 return 1;
476 tok_pop();
477 return 0;
480 /* read delim command */
481 void tok_delim(void)
483 char delim[NMLEN];
484 tok_preview(delim);
485 if (!strcmp("off", delim)) {
486 eqn_beg = 0;
487 eqn_end = 0;
488 } else {
489 eqn_beg = delim[0];
490 eqn_end = delim[1];
494 /* read macro definition */
495 static void tok_macrodef(struct sbuf *def)
497 int c;
498 int delim;
499 c = src_next();
500 while (c > 0 && isspace(c))
501 c = src_next();
502 delim = c;
503 c = src_next();
504 while (c > 0 && c != delim) {
505 sbuf_add(def, c);
506 c = src_next();
510 /* read the next macro command */
511 void tok_macro(void)
513 char name[NMLEN];
514 struct sbuf def;
515 tok_preview(name);
516 sbuf_init(&def);
517 tok_macrodef(&def);
518 src_define(name, sbuf_buf(&def));
519 sbuf_done(&def);
522 /* return 1 if inside inline equations */
523 int tok_inline(void)
525 return tok_line;