hyph: do not read more than GNLEN characters in hy_cget()
[neatroff.git] / char.c
blobd0a4abca9c1c9040f988183b6afac01f0b7bcccf
1 /* reading characters and escapes */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include "roff.h"
7 /* return the length of a utf-8 character based on its first byte */
8 int utf8len(int c)
10 if (c > 0 && c <= 0x7f)
11 return 1;
12 if (c >= 0xfc)
13 return 6;
14 if (c >= 0xf8)
15 return 5;
16 if (c >= 0xf0)
17 return 4;
18 if (c >= 0xe0)
19 return 3;
20 if (c >= 0xc0)
21 return 2;
22 return c != 0;
25 /* return nonzero if s is a single utf-8 character */
26 int utf8one(char *s)
28 return !s[utf8len((unsigned char) *s)];
31 /* read a utf-8 character from s and copy it to d */
32 int utf8read(char **s, char *d)
34 int l = utf8len((unsigned char) **s);
35 int i;
36 for (i = 0; i < l; i++)
37 d[i] = (*s)[i];
38 d[l] = '\0';
39 *s += l;
40 return l;
43 /* read a utf-8 character with next() and copy it to s */
44 int utf8next(char *s, int (*next)(void))
46 int c = next();
47 int l = utf8len(c);
48 int i;
49 if (c < 0)
50 return 0;
51 s[0] = c;
52 for (i = 1; i < l; i++)
53 s[i] = next();
54 s[l] = '\0';
55 return l;
58 /* read quoted arguments of escape sequences (ESC_Q) */
59 void quotednext(char *d, int (*next)(void), void (*back)(int))
61 char delim[GNLEN], cs[GNLEN];
62 charnext(delim, next, back);
63 while (charnext_delim(cs, next, back, delim) >= 0) {
64 charnext_str(d, cs);
65 d = strchr(d, '\0');
69 /* read unquoted arguments of escape sequences (ESC_P) */
70 void unquotednext(char *d, int cmd, int (*next)(void), void (*back)(int))
72 int c = next();
73 if (cmd == 's' && (c == '-' || c == '+')) {
74 cmd = c;
75 *d++ = c;
76 c = next();
78 if (c == '(') {
79 *d++ = next();
80 *d++ = next();
81 } else if (!n_cp && c == '[') {
82 c = next();
83 while (c > 0 && c != '\n' && c != ']') {
84 *d++ = c;
85 c = next();
87 } else {
88 *d++ = c;
89 if (cmd == 's' && c >= '1' && c <= '3') {
90 c = next();
91 if (isdigit(c))
92 *d++ = c;
93 else
94 back(c);
97 *d = '\0';
101 * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
103 * character returned contents of c
104 * x '\0' x
105 * \4x c_ni \4x
106 * \\x '\\' \\x
107 * \\(xy '(' xy
108 * \\[xyz] '[' xyz
109 * \\C'xyz' 'C' xyz
111 int charnext(char *c, int (*next)(void), void (*back)(int))
113 int l, n;
114 if (!utf8next(c, next))
115 return -1;
116 if (c[0] == c_ni) {
117 c[1] = next();
118 c[2] = '\0';
119 return c_ni;
121 if (c[0] == c_ec) {
122 c[1] = next();
123 c[2] = '\0';
124 if (c[1] == '(') {
125 c[0] = next();
126 c[1] = next();
127 return '(';
128 } else if (!n_cp && c[1] == '[') {
129 l = 0;
130 n = next();
131 while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
132 c[l++] = n;
133 n = next();
135 c[l] = '\0';
136 return '[';
137 } else if (c[1] == 'C') {
138 quotednext(c, next, back);
139 return 'C';
141 return '\\';
143 return '\0';
146 /* like nextchar(), but return -1 if delim was read */
147 int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
149 int t = charnext(c, next, back);
150 return strcmp(c, delim) ? t : -1;
153 /* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
154 void charnext_str(char *d, char *c)
156 int c0 = (unsigned char) c[0];
157 if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
158 strcpy(d, c);
159 return;
161 if (!c[2] && utf8len(c0) == 1)
162 sprintf(d, "%c(%s", c_ec, c);
163 else
164 sprintf(d, "%cC'%s'", c_ec, c);
167 /* like charnext() for string buffers */
168 int charread(char **s, char *c)
170 int ret;
171 sstr_push(*s);
172 ret = charnext(c, sstr_next, sstr_back);
173 *s = sstr_pop();
174 return ret;
177 /* like charnext_delim() for string buffers */
178 int charread_delim(char **s, char *c, char *delim)
180 int ret;
181 sstr_push(*s);
182 ret = charnext_delim(c, sstr_next, sstr_back, delim);
183 *s = sstr_pop();
184 return ret;
187 /* read quoted arguments; this is called only for internal neatroff strings */
188 static void quotedread(char **sp, char *d)
190 char *s = *sp;
191 int q = *s++;
192 while (*s && *s != q)
193 *d++ = *s++;
194 if (*s == q)
195 s++;
196 *d = '\0';
197 *sp = s;
200 /* read unquoted arguments; this is called only for internal neatroff strings */
201 static void unquotedread(char **sp, char *d)
203 char *s = *sp;
204 if (*s == '(') {
205 s++;
206 *d++ = *s++;
207 *d++ = *s++;
208 } else if (!n_cp && *s == '[') {
209 s++;
210 while (*s && *s != ']')
211 *d++ = *s++;
212 if (*s == ']')
213 s++;
214 } else {
215 *d++ = *s++;
217 *d = '\0';
218 *sp = s;
222 * read a glyph or an escape sequence
224 * This function reads from s either an output troff request
225 * (only the ones emitted by wb.c) or a glyph name and updates
226 * s. The return value is the name of the troff request (the
227 * argument is copied into d) or zero for glyph names (it is
228 * copied into d). Returns -1 when the end of s is reached.
230 int escread(char **s, char *d)
232 char *r = d;
233 if (!**s)
234 return -1;
235 utf8read(s, d);
236 if (d[0] == c_ec) {
237 d[1] = *(*s)++;
238 d[2] = '\0';
239 if (d[1] == '(') {
240 d[0] = *(*s)++;
241 d[1] = *(*s)++;
242 } else if (!n_cp && d[1] == '[') {
243 while (**s && **s != ']')
244 *r++ = *(*s)++;
245 if (**s == ']')
246 (*s)++;
247 } else if (strchr("CDfhmsvXx", d[1])) {
248 int c = d[1];
249 d[0] = '\0';
250 if (strchr(ESC_P, c))
251 unquotedread(s, d);
252 if (strchr(ESC_Q, c))
253 quotedread(s, d);
254 return c == 'C' ? 0 : c;
257 if (d[0] == c_ni)
258 utf8read(s, d + 1);
259 return 0;
263 * string streams: provide next()/back() interface for string buffers
265 * Functions like charnext() require a next()/back() interface
266 * for reading input streams. In order to provide this interface
267 * for string buffers, the following functions can be used:
269 * sstr_push(s);
270 * charnext(c, sstr_next, sstr_back);
271 * sstr_pop();
273 * The calls to sstr_push()/sstr_pop() may be nested.
275 static char *sstr_bufs[NSSTR]; /* buffer stack */
276 static int sstr_n; /* numbers of items in sstr_bufs[] */
277 static char *sstr_s; /* current buffer */
279 void sstr_push(char *s)
281 sstr_bufs[sstr_n++] = sstr_s;
282 sstr_s = s;
285 char *sstr_pop(void)
287 char *ret = sstr_s;
288 sstr_s = sstr_bufs[--sstr_n];
289 return ret;
292 int sstr_next(void)
294 return *sstr_s ? (unsigned char) *sstr_s++ : -1;
297 void sstr_back(int c)
299 sstr_s--;