fmt: consider the whole paragraphs before traps
[neatroff.git] / char.c
blobba657bebf85e087e79a3b12e3108dfe67a96d4b3
1 /* reading characters and escapes */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "roff.h"
7 /* return the length of a utf-8 character based on its first byte */
8 int utf8len(int c)
10 if (c > 0 && c <= 0x7f)
11 return 1;
12 if (c >= 0xfc)
13 return 6;
14 if (c >= 0xf8)
15 return 5;
16 if (c >= 0xf0)
17 return 4;
18 if (c >= 0xe0)
19 return 3;
20 if (c >= 0xc0)
21 return 2;
22 return c != 0;
25 /* return nonzero if s is a single utf-8 character */
26 int utf8one(char *s)
28 return !s[utf8len((unsigned char) *s)];
31 /* read a utf-8 character from s and copy it to d */
32 int utf8read(char **s, char *d)
34 int l = utf8len((unsigned char) **s);
35 int i;
36 for (i = 0; i < l; i++)
37 d[i] = (*s)[i];
38 d[l] = '\0';
39 *s += l;
40 return l;
43 /* read a utf-8 character with next() and copy it to s */
44 int utf8next(char *s, int (*next)(void))
46 int c = next();
47 int l = utf8len(c);
48 int i;
49 if (c < 0)
50 return 0;
51 s[0] = c;
52 for (i = 1; i < l; i++)
53 s[i] = next();
54 s[l] = '\0';
55 return l;
59 * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
61 * character returned contents of c
62 * x '\0' x
63 * \4x c_ni \4x
64 * \\x '\\' \\x
65 * \\(xy '(' xy
66 * \\[xyz] '[' xyz
67 * \\C'xyz' 'C' xyz
69 int charnext(char *c, int (*next)(void), void (*back)(int))
71 int l, n;
72 if (!utf8next(c, next))
73 return -1;
74 if (c[0] == c_ni) {
75 utf8next(c + 1, next);
76 return c_ni;
78 if (c[0] == c_ec) {
79 utf8next(c + 1, next);
80 if (c[1] == '(') {
81 l = utf8next(c, next);
82 l += utf8next(c + l, next);
83 return '(';
84 } else if (!n_cp && c[1] == '[') {
85 l = 0;
86 n = next();
87 while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
88 c[l++] = n;
89 n = next();
91 c[l] = '\0';
92 return '[';
93 } else if (c[1] == 'C') {
94 argnext(c, 'C', next, back);
95 return 'C';
97 return '\\';
99 return '\0';
102 /* like nextchar(), but return -1 if delim was read */
103 int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
105 int t = charnext(c, next, back);
106 return strcmp(c, delim) ? t : -1;
109 /* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
110 void charnext_str(char *d, char *c)
112 int c0 = (unsigned char) c[0];
113 if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
114 strcpy(d, c);
115 return;
117 if (!c[2] && utf8len(c0) == 1)
118 sprintf(d, "%c(%s", c_ec, c);
119 else
120 sprintf(d, "%cC'%s'", c_ec, c);
123 /* like charnext() for string buffers */
124 int charread(char **s, char *c)
126 int ret;
127 sstr_push(*s);
128 ret = charnext(c, sstr_next, sstr_back);
129 *s = sstr_pop();
130 return ret;
133 /* read the argument of a troff escape sequence */
134 void argnext(char *d, int cmd, int (*next)(void), void (*back)(int))
136 char delim[GNLEN], cs[GNLEN];
137 int c;
138 if (strchr(ESC_P, cmd)) {
139 c = next();
140 if (cmd == 's' && (c == '-' || c == '+')) {
141 *d++ = c;
142 c = next();
144 if (c == '(') {
145 *d++ = next();
146 *d++ = next();
147 } else if (!n_cp && c == '[') {
148 c = next();
149 while (c > 0 && c != '\n' && c != ']') {
150 *d++ = c;
151 c = next();
153 } else {
154 *d++ = c;
155 if (cmd == 's' && c >= '1' && c <= '3') {
156 c = next();
157 if (isdigit(c))
158 *d++ = c;
159 else
160 back(c);
164 if (strchr(ESC_Q, cmd)) {
165 charnext(delim, next, back);
166 while (charnext_delim(cs, next, back, delim) >= 0) {
167 charnext_str(d, cs);
168 d = strchr(d, '\0');
171 *d = '\0';
174 /* this is called only for internal neatroff strings */
175 void argread(char **sp, char *d, int cmd)
177 char *s = *sp;
178 int q;
179 if (strchr(ESC_P, cmd)) {
180 if (cmd == 's' && (*s == '-' || *s == '+'))
181 *d++ = *s++;
182 if (*s == '(') {
183 s++;
184 *d++ = *s++;
185 *d++ = *s++;
186 } else if (!n_cp && *s == '[') {
187 s++;
188 while (*s && *s != ']')
189 *d++ = *s++;
190 if (*s == ']')
191 s++;
192 } else {
193 *d++ = *s++;
194 if (cmd == 's' && s[-1] >= '1' && s[-1] <= '3')
195 if (isdigit(*s))
196 *d++ = *s++;
199 if (strchr(ESC_Q, cmd)) {
200 q = *s++;
201 while (*s && *s != q)
202 *d++ = *s++;
203 if (*s == q)
204 s++;
206 if (cmd == 'z')
207 *d++ = *s++;
208 *d = '\0';
209 *sp = s;
213 * read a glyph or an escape sequence
215 * This function reads from s either an output troff request
216 * (only the ones emitted by wb.c) or a glyph name and updates
217 * s. The return value is the name of the troff request (the
218 * argument is copied into d) or zero for glyph names (it is
219 * copied into d). Returns -1 when the end of s is reached.
221 int escread(char **s, char *d)
223 char *r = d;
224 if (!**s)
225 return -1;
226 utf8read(s, d);
227 if (d[0] == c_ec) {
228 utf8read(s, d + 1);
229 if (d[1] == '(') {
230 utf8read(s, d);
231 utf8read(s, d + strlen(d));
232 } else if (!n_cp && d[1] == '[') {
233 while (**s && **s != ']')
234 *r++ = *(*s)++;
235 if (**s == ']')
236 (*s)++;
237 } else if (strchr("CDfhmsvXx", d[1])) {
238 int c = d[1];
239 argread(s, d, d[1]);
240 return c == 'C' ? 0 : c;
243 if (d[0] == c_ni)
244 utf8read(s, d + 1);
245 return 0;
249 * string streams: provide next()/back() interface for string buffers
251 * Functions like charnext() require a next()/back() interface
252 * for reading input streams. In order to provide this interface
253 * for string buffers, the following functions can be used:
255 * sstr_push(s);
256 * charnext(c, sstr_next, sstr_back);
257 * sstr_pop();
259 * The calls to sstr_push()/sstr_pop() may be nested.
261 static char *sstr_bufs[NSSTR]; /* buffer stack */
262 static int sstr_n; /* numbers of items in sstr_bufs[] */
263 static char *sstr_s; /* current buffer */
265 void sstr_push(char *s)
267 sstr_bufs[sstr_n++] = sstr_s;
268 sstr_s = s;
271 char *sstr_pop(void)
273 char *ret = sstr_s;
274 sstr_s = sstr_bufs[--sstr_n];
275 return ret;
278 int sstr_next(void)
280 return *sstr_s ? (unsigned char) *sstr_s++ : -1;
283 void sstr_back(int c)
285 sstr_s--;