ren: disable .char for diverted text
[neatroff.git] / char.c
blob6c859e77ee031c4c6248b106fce157fedaae27b8
1 #include <ctype.h>
2 #include <stdio.h>
3 #include <string.h>
4 #include "roff.h"
6 int utf8len(int c)
8 if (c > 0 && c <= 0x7f)
9 return 1;
10 if (c >= 0xfc)
11 return 6;
12 if (c >= 0xf8)
13 return 5;
14 if (c >= 0xf0)
15 return 4;
16 if (c >= 0xe0)
17 return 3;
18 if (c >= 0xc0)
19 return 2;
20 return c != 0;
23 /* return nonzero if s is a single utf-8 character */
24 int utf8one(char *s)
26 return !s[utf8len((unsigned char) *s)];
29 int utf8read(char **s, char *d)
31 int l = utf8len((unsigned char) **s);
32 int i;
33 for (i = 0; i < l; i++)
34 d[i] = (*s)[i];
35 d[l] = '\0';
36 *s += l;
37 return l;
40 int utf8next(char *s, int (*next)(void))
42 int c = next();
43 int l = utf8len(c);
44 int i;
45 if (c < 0)
46 return 0;
47 s[0] = c;
48 for (i = 1; i < l; i++)
49 s[i] = next();
50 s[l] = '\0';
51 return l;
55 * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
57 * character returned contents of c
58 * x '\0' x
59 * \4x c_ni \4x
60 * \\x '\\' \\x
61 * \\(xy '(' xy
62 * \\[xyz] '[' xyz
63 * \\C'xyz' 'C' xyz
65 int charnext(char *c, int (*next)(void), void (*back)(int))
67 int l, n;
68 if (!utf8next(c, next))
69 return -1;
70 if (c[0] == c_ni) {
71 utf8next(c + 1, next);
72 return c_ni;
74 if (c[0] == c_ec) {
75 utf8next(c + 1, next);
76 if (c[1] == '(') {
77 l = utf8next(c, next);
78 l += utf8next(c + l, next);
79 return '(';
80 } else if (!n_cp && c[1] == '[') {
81 l = 0;
82 n = next();
83 while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
84 c[l++] = n;
85 n = next();
87 c[l] = '\0';
88 return '[';
89 } else if (c[1] == 'C') {
90 argnext(c, 'C', next, back);
91 return 'C';
93 return '\\';
95 return '\0';
98 /* like nextchar(), but return -1 if delim was read */
99 int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
101 int t = charnext(c, next, back);
102 return strcmp(c, delim) ? t : -1;
105 /* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
106 void charnext_str(char *d, char *c)
108 int c0 = (unsigned char) c[0];
109 if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
110 strcpy(d, c);
111 return;
113 if (!c[2] && utf8len(c0) == 1)
114 sprintf(d, "%c(%s", c_ec, c);
115 else
116 sprintf(d, "%cC'%s'", c_ec, c);
119 /* like charnext() for string buffers */
120 int charread(char **s, char *c)
122 int ret;
123 sstr_push(*s);
124 ret = charnext(c, sstr_next, sstr_back);
125 *s = sstr_pop();
126 return ret;
129 /* read the argument of a troff escape sequence */
130 void argnext(char *d, int cmd, int (*next)(void), void (*back)(int))
132 char delim[GNLEN], cs[GNLEN];
133 int c;
134 if (strchr(ESC_P, cmd)) {
135 c = next();
136 if (cmd == 's' && (c == '-' || c == '+')) {
137 *d++ = c;
138 c = next();
140 if (c == '(') {
141 *d++ = next();
142 *d++ = next();
143 } else if (!n_cp && c == '[') {
144 c = next();
145 while (c > 0 && c != '\n' && c != ']') {
146 *d++ = c;
147 c = next();
149 } else {
150 *d++ = c;
151 if (cmd == 's' && c >= '1' && c <= '3') {
152 c = next();
153 if (isdigit(c))
154 *d++ = c;
155 else
156 back(c);
160 if (strchr(ESC_Q, cmd)) {
161 charnext(delim, next, back);
162 while (charnext_delim(cs, next, back, delim) >= 0) {
163 charnext_str(d, cs);
164 d = strchr(d, '\0');
167 *d = '\0';
170 /* this is called only for internal neatroff strings */
171 void argread(char **sp, char *d, int cmd)
173 char *s = *sp;
174 int q;
175 if (strchr(ESC_P, cmd)) {
176 if (cmd == 's' && (*s == '-' || *s == '+'))
177 *d++ = *s++;
178 if (*s == '(') {
179 s++;
180 *d++ = *s++;
181 *d++ = *s++;
182 } else if (!n_cp && *s == '[') {
183 s++;
184 while (*s && *s != ']')
185 *d++ = *s++;
186 if (*s == ']')
187 s++;
188 } else {
189 *d++ = *s++;
190 if (cmd == 's' && s[-1] >= '1' && s[-1] <= '3')
191 if (isdigit(*s))
192 *d++ = *s++;
195 if (strchr(ESC_Q, cmd)) {
196 q = *s++;
197 while (*s && *s != q)
198 *d++ = *s++;
199 if (*s == q)
200 s++;
202 if (cmd == 'z')
203 *d++ = *s++;
204 *d = '\0';
205 *sp = s;
209 * read a glyph or an escape sequence
211 * This functions reads from s either an output troff request
212 * (only the ones emitted by wb.c) or a glyph name and updates
213 * s. The return value is the name of the troff request (the
214 * argument is copied into d) or zero for glyph names (it is
215 * copied into d). Returns -1 when the end of s is reached.
217 int escread(char **s, char *d)
219 char *r = d;
220 if (!**s)
221 return -1;
222 utf8read(s, d);
223 if (d[0] == c_ec) {
224 utf8read(s, d + 1);
225 if (d[1] == '(') {
226 utf8read(s, d);
227 utf8read(s, d + strlen(d));
228 } else if (!n_cp && d[1] == '[') {
229 while (**s && **s != ']')
230 *r++ = *(*s)++;
231 if (**s == ']')
232 (*s)++;
233 } else if (strchr("CDfhmsvXx", d[1])) {
234 int c = d[1];
235 argread(s, d, d[1]);
236 return c == 'C' ? 0 : c;
239 if (d[0] == c_ni)
240 utf8read(s, d + 1);
241 return 0;
245 * string streams: provide next()/back() interface for string buffers
247 * Functions like charnext() require a next()/back() interface
248 * for reading input streams. In order to provide this interface
249 * for string buffers, the following functions can be used:
251 * sstr_push(s);
252 * charnext(c, sstr_next, sstr_back);
253 * sstr_pop();
255 * The calls to sstr_push()/sstr_pop() may be nested.
257 static char *sstr_bufs[NSSTR]; /* buffer stack */
258 static int sstr_n; /* numbers of items in sstr_bufs[] */
259 static char *sstr_s; /* current buffer */
261 void sstr_push(char *s)
263 sstr_bufs[sstr_n++] = sstr_s;
264 sstr_s = s;
267 char *sstr_pop(void)
269 char *ret = sstr_s;
270 sstr_s = sstr_bufs[--sstr_n];
271 return ret;
274 int sstr_next(void)
276 return *sstr_s ? (unsigned char) *sstr_s++ : -1;
279 void sstr_back(int c)
281 sstr_s--;