1 /* the preprocessor and tokenizer */
8 #define T_BIN(c1, c2) (((c1) << 8) | (c2))
9 #define T_SEP "^~{}\"\n\t "
10 #define T_SOFTSEP (T_SEP "=:|.+-*/\\,()[]<>!")
12 static char *kwds
[] = {
13 "fwd", "down", "back", "up",
14 "bold", "italic", "roman", "font", "fat", "size",
15 "bar", "dot", "dotdot", "dyad", "hat", "under", "vec", "tilde",
16 "left", "right", "over", "sqrt", "sub", "sup", "from", "to", "vcenter",
18 "gfont", "grfont", "gbfont",
22 static int tok_eqen
; /* non-zero if inside .EQ/.EN */
23 static int tok_line
; /* inside inline eqn block */
24 static int tok_part
; /* partial line with inline eqn blocks */
25 static char tok
[LNLEN
]; /* current token */
26 static char tok_prev
[LNLEN
]; /* previous token */
27 static int tok_curtype
; /* type of current token */
28 static int tok_cursep
; /* current character is a separator */
29 static int tok_prevsep
; /* previous character was a separator */
30 static int eqn_beg
, eqn_end
; /* inline eqn delimiters */
32 /* return zero if troff request .ab is read */
33 static int tok_req(int a
, int b
)
38 eqln
[i
++] = in_next();
39 if (eqln
[i
- 1] != '.')
41 eqln
[i
++] = in_next();
42 while (eqln
[i
- 1] == ' ')
43 eqln
[i
++] = in_next();
46 eqln
[i
++] = in_next();
57 static int tok_en(void)
59 return tok_req('E', 'N');
62 /* does the line start with eq */
63 static int tok_eq(char *s
)
69 return s
[0] == 'E' && s
[1] == 'Q';
72 /* read the next input character */
73 static int tok_next(void)
76 if (!tok_eqen
&& !tok_line
)
79 if (tok_eqen
&& c
== '\n' && tok_en())
81 if (tok_line
&& c
== eqn_end
) {
88 /* push back the last character read */
89 static void tok_back(int c
)
91 if (tok_eqen
|| tok_line
)
95 /* read the next word */
96 static void tok_preview(char *s
)
100 while (c
> 0 && !strchr(T_SEP
, c
) && (!tok_line
|| c
!= eqn_end
)) {
108 /* push back the given word */
109 static void tok_unpreview(char *s
)
113 in_back((unsigned char) s
[--n
]);
116 /* read a keyword; return zero on success */
117 static int tok_keyword(void)
121 for (i
= 0; i
< LEN(kwds
); i
++)
122 if (!strcmp(kwds
[i
], tok
))
128 /* read the next argument of a macro call; return zero if read a ',' */
129 static int tok_readarg(struct sbuf
*sbuf
)
132 while (c
> 0 && c
!= ',' && c
!= ')') {
136 return c
== ',' ? 0 : 1;
139 /* expand a macro; return zero on success */
140 static int tok_expand(void)
142 char *args
[10] = {NULL
};
143 struct sbuf sbufs
[10];
147 if (!in_expand(tok
, NULL
))
149 pbeg
= in_macrocall(tok
);
151 tok_unpreview(tok
+ pbeg
+ 1);
154 sbuf_init(&sbufs
[n
]);
155 if (tok_readarg(&sbufs
[n
++]))
158 for (i
= 0; i
< n
; i
++)
159 args
[i
] = sbuf_buf(&sbufs
[i
]);
160 in_expand(tok
, args
);
161 for (i
= 0; i
< n
; i
++)
162 sbuf_done(&sbufs
[i
]);
169 /* read until .EQ or eqn_beg */
176 while ((c
= in_next()) > 0) {
179 printf(".%s %s \"%s\n",
180 tok_part
? "as" : "ds", EQNS
, ln
);
190 if (c
== '\n' && !tok_part
) {
192 if (tok_eq(ln
) && !tok_en()) {
197 if (c
== '\n' && tok_part
) {
198 printf("\\*%s%s", escarg(EQNS
), ln
);
205 /* collect the output of this eqn block */
206 void tok_eqnout(char *s
)
209 sprintf(post
, "\\s[\\n[%s]]\\f[\\n[%s]]", EQNSZ
, EQNFN
);
211 printf("%s%s\n", s
, post
);
213 printf(".as %s \"%s%s\n", EQNS
, s
, post
);
216 /* return the length of a utf-8 character based on its first byte */
217 static int utf8len(int c
)
219 if (c
> 0 && c
<= 0x7f)
234 /* return the type of a token */
235 static int char_type(char *s
)
237 int c
= (unsigned char) s
[0];
252 if (c
== '~' || c
== '^')
254 if (ispunct(c
) && (c
!= '\\' || !s
[1]))
259 /* read the next token */
260 static int tok_read(void)
269 tok_prevsep
= tok_cursep
;
270 tok_cursep
= !!strchr(T_SEP
, c
);
271 if (c
== ' ' || c
== '\n') {
272 while (c
> 0 && (c
== ' ' || c
== '\n'))
277 tok_curtype
= T_SPACE
;
289 if (c2
>= '1' && c2
<= '9' && !in_arg(c2
- '0'))
294 if (!tok_keyword()) {
295 tok_curtype
= T_KEYWORD
;
305 if (strchr(T_SOFTSEP
, c
)) {
313 } else if (c
== '[') {
314 while (c
&& c
!= ']') {
320 } else if (c
== '"') {
322 while (c
> 0 && c
!= '"') {
335 /* two-character operators */
337 switch (T_BIN(c
, c2
)) {
338 case T_BIN('<', '='):
339 case T_BIN('>', '='):
340 case T_BIN('=', '='):
341 case T_BIN('!', '='):
342 case T_BIN('~', '='):
343 case T_BIN('>', '>'):
344 case T_BIN('<', '<'):
345 case T_BIN(':', '='):
346 case T_BIN('-', '>'):
347 case T_BIN('<', '-'):
348 case T_BIN('-', '+'):
356 tok_curtype
= char_type(tok
);
364 tok_curtype
= char_type(tok
);
371 return tok
[0] ? tok
: NULL
;
374 /* current token type */
377 return tok
[0] ? tok_curtype
: 0;
380 /* return nonzero if current token is a separator */
383 return !tok_get() || strchr(T_SEP
, (unsigned char) tok_get()[0]) ||
384 tok_curtype
== T_KEYWORD
;
387 /* read the next token, return the previous */
390 strcpy(tok_prev
, tok
);
392 return tok_prev
[0] ? tok_prev
: NULL
;
395 /* like tok_pop() but read the next T_SEP-separated token */
396 char *tok_poptext(void)
398 while (tok_type() == T_SPACE
)
402 strcat(tok_prev
, tok
);
404 } while (tok
[0] && !tok_sep());
405 return tok_prev
[0] ? tok_prev
: NULL
;
409 static void tok_blanks(void)
411 while (tok_type() == T_SPACE
)
415 /* if the next token is s, return zero and skip it */
419 if (tok_get() && !s
[1] && strchr("{}~^\t", s
[0]) && !strcmp(s
, tok_get())) {
423 if (tok_type() != T_KEYWORD
|| !tok_get() || strcmp(s
, tok_get()))
429 /* read delim command */
434 if (!strcmp("off", delim
)) {
443 /* read macro definition */
444 static void tok_macrodef(struct sbuf
*def
)
449 while (c
> 0 && isspace(c
))
453 while (c
> 0 && c
!= delim
) {
459 /* read the next macro command */
467 in_define(name
, sbuf_buf(&def
));