README: make the instructions for CM fonts more concise
[neateqn.git] / tok.c
blobf9d8fbda47faf935f251ccb276d01914043dab38
1 /* the preprocessor and tokenizer */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "eqn.h"
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SEP "^~{}\"\n\t "
10 #define T_SOFTSEP (T_SEP "=:|.+-*/\\,()[]<>!")
11 #define ESAVE "\\E*[.eqnbeg]\\R'" EQNFN "0 \\En(.f'\\R'" EQNSZ "0 \\En(.s'"
12 #define ELOAD "\\f[\\En[" EQNFN "0]]\\s[\\En[" EQNSZ "0]]\\E*[.eqnend]"
14 static char *kwds[] = {
15 "fwd", "down", "back", "up",
16 "bold", "italic", "roman", "font", "fat", "size",
17 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
18 "sub", "sup", "from", "to", "vcenter",
19 "left", "right", "over", "sqrt",
20 "pile", "lpile", "cpile", "rpile", "above",
21 "matrix", "col", "ccol", "lcol", "rcol",
22 "delim", "define",
23 "gfont", "grfont", "gbfont", "gsize", "set", "chartype",
24 "mark", "lineup", "bracketsizes", "bracketpieces", "breakcost",
27 static int tok_eqen; /* non-zero if inside .EQ/.EN */
28 static int tok_line; /* inside inline eqn block */
29 static int tok_part; /* partial line with inline eqn blocks */
30 static char tok[LNLEN]; /* current token */
31 static char tok_prev[LNLEN]; /* previous token */
32 static int tok_curtype; /* type of current token */
33 static int tok_cursep; /* current character is a separator */
34 static int tok_prevsep; /* previous character was a separator */
35 static int eqn_beg, eqn_end; /* inline eqn delimiters */
37 /* return zero if troff request .ab is read */
38 static int tok_req(int a, int b)
40 int eqln[LNLEN];
41 int i = 0;
42 int ret = 0;
43 eqln[i++] = in_next();
44 if (eqln[i - 1] != '.')
45 goto failed;
46 eqln[i++] = in_next();
47 while (eqln[i - 1] == ' ' && i < sizeof(eqln) - 4)
48 eqln[i++] = in_next();
49 if (eqln[i - 1] != a)
50 goto failed;
51 eqln[i++] = in_next();
52 if (eqln[i - 1] != b)
53 goto failed;
54 ret = 1;
55 failed:
56 while (i > 0)
57 in_back(eqln[--i]);
58 return ret;
61 /* read .EN */
62 static int tok_en(void)
64 return tok_req('E', 'N');
67 /* does the line start with eq */
68 static int tok_eq(char *s)
70 if (*s++ != '.')
71 return 0;
72 while (isspace((unsigned char) *s))
73 s++;
74 return s[0] == 'E' && s[1] == 'Q';
77 /* read an lf request */
78 static int tok_lf(char *s)
80 if (*s++ != '.')
81 return 0;
82 while (isspace((unsigned char) *s))
83 s++;
84 if (*s++ != 'l' || *s++ != 'f')
85 return 0;
86 while (isspace((unsigned char) *s))
87 s++;
88 if (isdigit((unsigned char) *s))
89 in_lineset(atoi(s));
90 return 1;
93 /* read the next input character */
94 static int tok_next(void)
96 int c;
97 if (!tok_eqen && !tok_line)
98 return 0;
99 c = in_next();
100 if (tok_eqen && c == '\n' && tok_en())
101 tok_eqen = 0;
102 if (tok_line && (in_top() && c == eqn_end)) {
103 tok_line = 0;
104 return 0;
106 return c;
109 /* push back the last character read */
110 static void tok_back(int c)
112 if (tok_eqen || tok_line)
113 in_back(c);
116 /* read the next word */
117 static void tok_preview(char *s)
119 int c = in_next();
120 int n = 0;
121 while (c > 0 && !strchr(T_SEP, c) &&
122 (!tok_line || (!in_top() || c != eqn_end))) {
123 s[n++] = c;
124 c = in_next();
126 s[n] = '\0';
127 in_back(c);
130 /* push back the given word */
131 static void tok_unpreview(char *s)
133 int n = strlen(s);
134 while (n > 0)
135 in_back((unsigned char) s[--n]);
138 /* read a keyword; return zero on success */
139 static int tok_keyword(void)
141 int i;
142 tok_preview(tok);
143 for (i = 0; i < LEN(kwds); i++)
144 if (!strcmp(kwds[i], tok))
145 return 0;
146 tok_unpreview(tok);
147 return 1;
150 /* read the next argument of a macro call; return zero if read a ',' */
151 static int tok_readarg(struct sbuf *sbuf)
153 int c = in_next();
154 int pdepth = 0; /* number of nested parenthesis */
155 while (c > 0 && (pdepth || (c != ',' && c != ')'))) {
156 sbuf_add(sbuf, c);
157 if (c == ')')
158 pdepth++;
159 if (c == '(')
160 pdepth--;
161 c = in_next();
163 return c == ',' ? 0 : 1;
166 /* expand a macro; return zero on success */
167 static int tok_expand(void)
169 char *args[10] = {NULL};
170 struct sbuf sbufs[10];
171 int i, n = 0;
172 int pbeg;
173 tok_preview(tok);
174 if (!in_expand(tok, NULL))
175 return 0;
176 pbeg = in_macrocall(tok);
177 if (pbeg) {
178 tok_unpreview(tok + pbeg + 1);
179 tok[pbeg] = '\0';
180 while (n <= 9) {
181 sbuf_init(&sbufs[n]);
182 if (tok_readarg(&sbufs[n++]))
183 break;
185 for (i = 0; i < n; i++)
186 args[i] = sbuf_buf(&sbufs[i]);
187 in_expand(tok, args);
188 for (i = 0; i < n; i++)
189 sbuf_done(&sbufs[i]);
190 return 0;
192 tok_unpreview(tok);
193 return 1;
196 /* read until .EQ or eqn_beg */
197 int tok_eqn(void)
199 struct sbuf ln;
200 int c;
201 tok_cursep = 1;
202 sbuf_init(&ln);
203 while ((c = in_next()) > 0) {
204 if (c == eqn_beg) {
205 printf(".eo\n");
206 printf(".%s %s \"%s\n",
207 tok_part ? "as" : "ds", EQNS, sbuf_buf(&ln));
208 sbuf_done(&ln);
209 printf(".ec\n");
210 tok_part = 1;
211 tok_line = 1;
212 return 0;
214 sbuf_add(&ln, c);
215 if (c == '\n' && !tok_part) {
216 printf("%s", sbuf_buf(&ln));
217 tok_lf(sbuf_buf(&ln));
218 if (tok_eq(sbuf_buf(&ln)) && !tok_en()) {
219 tok_eqen = 1;
220 sbuf_done(&ln);
221 return 0;
224 if (c == '\n' && tok_part) {
225 printf(".lf %d\n", in_lineget());
226 printf("\\*%s%s", escarg(EQNS), sbuf_buf(&ln));
227 tok_part = 0;
229 if (c == '\n')
230 sbuf_cut(&ln, 0);
232 sbuf_done(&ln);
233 return 1;
236 /* collect the output of this eqn block */
237 void tok_eqnout(char *s)
239 if (!tok_part) {
240 printf(".ds %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
241 printf(".lf %d\n", in_lineget() - 1);
242 printf("\\&\\*%s\n", escarg(EQNS));
243 } else {
244 printf(".as %s \"%s%s%s\n", EQNS, ESAVE, s, ELOAD);
248 /* return the length of a utf-8 character based on its first byte */
249 static int utf8len(int c)
251 if (c > 0 && c <= 0x7f)
252 return 1;
253 if (c >= 0xfc)
254 return 6;
255 if (c >= 0xf8)
256 return 5;
257 if (c >= 0xf0)
258 return 4;
259 if (c >= 0xe0)
260 return 3;
261 if (c >= 0xc0)
262 return 2;
263 return c != 0;
266 /* return the type of a token */
267 static int char_type(char *s)
269 int c = (unsigned char) s[0];
270 int t;
271 if (isdigit(c))
272 return T_NUMBER;
273 if (c == '"')
274 return T_STRING;
275 if ((t = def_type(s)) >= 0)
276 return t;
277 if (c == '~' || c == '^')
278 return T_GAP;
279 if (ispunct(c) && (c != '\\' || !s[1]))
280 return T_ORD;
281 return T_LETTER;
284 /* read the next token */
285 static int tok_read(void)
287 char *s = tok;
288 char *e = tok + sizeof(tok) - 2;
289 int c, c2;
290 int i;
291 *s = '\0';
292 c = tok_next();
293 if (c <= 0)
294 return 1;
295 tok_prevsep = tok_cursep;
296 tok_cursep = !!strchr(T_SEP, c);
297 if (c == ' ' || c == '\n') {
298 while (c > 0 && (c == ' ' || c == '\n'))
299 c = tok_next();
300 tok_back(c);
301 *s++ = ' ';
302 *s = '\0';
303 tok_curtype = T_SPACE;
304 return 0;
306 if (c == '\t') {
307 *s++ = '\t';
308 *s = '\0';
309 tok_curtype = T_TAB;
310 return 0;
312 if (tok_prevsep) {
313 if (c == '$') {
314 c2 = tok_next();
315 if (c2 >= '1' && c2 <= '9' && !in_arg(c2 - '0'))
316 return tok_read();
317 tok_back(c2);
319 tok_back(c);
320 if (!tok_keyword()) {
321 tok_curtype = T_KEYWORD;
322 tok_cursep = 1;
323 return 0;
325 if (!tok_expand()) {
326 tok_cursep = 1;
327 return tok_read();
329 c = tok_next();
331 if (strchr(T_SOFTSEP, c)) {
332 *s++ = c;
333 if (c == '\\') {
334 c = tok_next();
335 if (c == '(') {
336 *s++ = c;
337 *s++ = tok_next();
338 *s++ = tok_next();
339 } else if (c == '[') {
340 while (c && c != ']') {
341 if (s < e)
342 *s++ = c;
343 c = tok_next();
345 *s++ = ']';
347 } else if (c == '"') {
348 c = tok_next();
349 while (c > 0 && c != '"') {
350 if (c == '\\') {
351 c2 = tok_next();
352 if (c2 == '"')
353 c = '"';
354 else
355 tok_back(c2);
357 if (s < e)
358 *s++ = c;
359 c = tok_next();
361 *s++ = '"';
362 } else {
363 /* two-character operators */
364 c2 = tok_next();
365 switch (T_BIN(c, c2)) {
366 case T_BIN('<', '='):
367 case T_BIN('>', '='):
368 case T_BIN('=', '='):
369 case T_BIN('!', '='):
370 case T_BIN('>', '>'):
371 case T_BIN('<', '<'):
372 case T_BIN(':', '='):
373 case T_BIN('-', '>'):
374 case T_BIN('<', '-'):
375 case T_BIN('-', '+'):
376 *s++ = c2;
377 break;
378 default:
379 tok_back(c2);
382 *s = '\0';
383 tok_curtype = char_type(tok);
384 return 0;
386 *s++ = c;
387 i = utf8len(c);
388 while (--i > 0 && s < e)
389 *s++ = tok_next();
390 *s = '\0';
391 tok_curtype = char_type(tok);
392 return 0;
395 /* current token */
396 char *tok_get(void)
398 return tok[0] ? tok : NULL;
401 /* current token type */
402 int tok_type(void)
404 return tok[0] ? tok_curtype : 0;
407 /* return nonzero if current token is a separator */
408 int tok_sep(int soft)
410 return !tok_get() || tok_curtype == T_KEYWORD ||
411 strchr(soft ? T_SOFTSEP : T_SEP, (unsigned char) tok_get()[0]);
414 /* read the next token, return the previous */
415 char *tok_pop(void)
417 strcpy(tok_prev, tok);
418 tok_read();
419 return tok_prev[0] ? tok_prev : NULL;
422 /* like tok_pop() but ignore T_SPACE tokens; if sep, read until T_SEP */
423 char *tok_poptext(int sep)
425 while (tok_type() == T_SPACE)
426 tok_read();
427 tok_prev[0] = '\0';
428 do {
429 strcat(tok_prev, tok);
430 tok_read();
431 } while (tok[0] && !tok_sep(!sep));
432 return tok_prev[0] ? tok_prev : NULL;
435 /* skip spaces */
436 static void tok_blanks(void)
438 while (tok_type() == T_SPACE)
439 tok_pop();
442 /* if the next token is s, return zero and skip it */
443 int tok_jmp(char *s)
445 tok_blanks();
446 if (tok_get() && !s[1] && strchr("{}~^\t", s[0]) && !strcmp(s, tok_get())) {
447 tok_pop();
448 return 0;
450 if (tok_type() != T_KEYWORD || !tok_get() || strcmp(s, tok_get()))
451 return 1;
452 tok_pop();
453 return 0;
456 /* read delim command */
457 void tok_delim(void)
459 char delim[NMLEN];
460 tok_preview(delim);
461 if (!strcmp("off", delim)) {
462 eqn_beg = 0;
463 eqn_end = 0;
464 } else {
465 eqn_beg = delim[0];
466 eqn_end = delim[1];
470 /* read macro definition */
471 static void tok_macrodef(struct sbuf *def)
473 int c;
474 int delim;
475 c = in_next();
476 while (c > 0 && isspace(c))
477 c = in_next();
478 delim = c;
479 c = in_next();
480 while (c > 0 && c != delim) {
481 sbuf_add(def, c);
482 c = in_next();
486 /* read the next macro command */
487 void tok_macro(void)
489 char name[NMLEN];
490 struct sbuf def;
491 tok_preview(name);
492 sbuf_init(&def);
493 tok_macrodef(&def);
494 in_define(name, sbuf_buf(&def));
495 sbuf_done(&def);
498 /* return 1 if inside inline equations */
499 int tok_inline(void)
501 return tok_line;