reg: use snprintf for string values in num_str()
[neatroff.git] / char.c
blob6958738c3e136991a956672d99f45f0286af1656
1 /* reading characters and escapes */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include "roff.h"
8 /* return the length of a utf-8 character based on its first byte */
9 int utf8len(int c)
11 if (~c & 0xc0) /* ASCII or invalid */
12 return c > 0;
13 if (~c & 0x20)
14 return 2;
15 if (~c & 0x10)
16 return 3;
17 if (~c & 0x08)
18 return 4;
19 return 1;
22 /* return nonzero if s is a single utf-8 character */
23 int utf8one(char *s)
25 return !s[utf8len((unsigned char) *s)];
28 /* read a utf-8 character from s and copy it to d */
29 int utf8read(char **s, char *d)
31 int l = utf8len((unsigned char) **s);
32 int i;
33 for (i = 0; i < l; i++)
34 d[i] = (*s)[i];
35 d[l] = '\0';
36 *s += l;
37 return l;
40 /* read a utf-8 character with next() and copy it to s */
41 int utf8next(char *s, int (*next)(void))
43 int c = next();
44 int l = utf8len(c);
45 int i;
46 if (c < 0)
47 return 0;
48 s[0] = c;
49 for (i = 1; i < l; i++)
50 s[i] = next();
51 s[l] = '\0';
52 return l;
55 /* read quoted arguments of escape sequences (ESC_Q) */
56 char *quotednext(int (*next)(void), void (*back)(int))
58 char delim[GNLEN], cs[GNLEN];
59 struct sbuf sb;
60 char d[GNLEN];
61 charnext(delim, next, back);
62 sbuf_init(&sb);
63 while (charnext_delim(cs, next, back, delim) >= 0) {
64 charnext_str(d, cs);
65 sbuf_append(&sb, d);
67 return sbuf_out(&sb);
70 /* read unquoted arguments of escape sequences (ESC_P) */
71 char *unquotednext(int cmd, int (*next)(void), void (*back)(int))
73 int c = next();
74 struct sbuf sb;
75 sbuf_init(&sb);
76 if (cmd == 's' && (c == '-' || c == '+')) {
77 cmd = c;
78 sbuf_add(&sb, c);
79 c = next();
81 if (c == '(') {
82 sbuf_add(&sb, next());
83 sbuf_add(&sb, next());
84 } else if (!n_cp && c == '[') {
85 c = next();
86 while (c > 0 && c != '\n' && c != ']') {
87 sbuf_add(&sb, c);
88 c = next();
90 } else {
91 sbuf_add(&sb, c);
92 if (n_cp && cmd == 's' && c >= '1' && c <= '3') {
93 c = next();
94 if (isdigit(c))
95 sbuf_add(&sb, c);
96 else
97 back(c);
100 return sbuf_out(&sb);
104 * read the next character or escape sequence (x, \x, \(xy, \[xyz], \C'xyz')
106 * character returned contents of c
107 * x '\0' x
108 * \4x c_ni \4x
109 * \\x '\\' \\x
110 * \\(xy '(' xy
111 * \\[xyz] '[' xyz
112 * \\C'xyz' 'C' xyz
114 int charnext(char *c, int (*next)(void), void (*back)(int))
116 int l, n;
117 if (!utf8next(c, next))
118 return -1;
119 if (c[0] == c_ni) {
120 utf8next(c + 1, next);
121 return c_ni;
123 if (c[0] == c_ec) {
124 utf8next(c + 1, next);
125 if (c[1] == '(') {
126 l = utf8next(c, next);
127 l += utf8next(c + l, next);
128 return '(';
129 } else if (!n_cp && c[1] == '[') {
130 l = 0;
131 n = next();
132 while (n >= 0 && n != '\n' && n != ']' && l < GNLEN - 1) {
133 c[l++] = n;
134 n = next();
136 c[l] = '\0';
137 return '[';
138 } else if (c[1] == 'C') {
139 char *chr = quotednext(next, back);
140 snprintf(c, GNLEN, "%s", chr);
141 free(chr);
142 return 'C';
144 return '\\';
146 return '\0';
149 /* like nextchar(), but return -1 if delim was read */
150 int charnext_delim(char *c, int (*next)(void), void (*back)(int), char *delim)
152 int t = charnext(c, next, back);
153 return strcmp(c, delim) ? t : -1;
156 /* convert back the character read from nextchar() (e.g. xy -> \\(xy) */
157 void charnext_str(char *d, char *c)
159 int c0 = (unsigned char) c[0];
160 if (c0 == c_ec || c0 == c_ni || !c[1] || utf8one(c)) {
161 strcpy(d, c);
162 return;
164 if (!c[2] && utf8len(c0) == 1)
165 sprintf(d, "%c(%s", c_ec, c);
166 else
167 sprintf(d, "%cC'%s'", c_ec, c);
170 /* like charnext() for string buffers */
171 int charread(char **s, char *c)
173 int ret;
174 sstr_push(*s);
175 ret = charnext(c, sstr_next, sstr_back);
176 *s = sstr_pop();
177 return ret;
180 /* like charnext_delim() for string buffers */
181 int charread_delim(char **s, char *c, char *delim)
183 int ret;
184 sstr_push(*s);
185 ret = charnext_delim(c, sstr_next, sstr_back, delim);
186 *s = sstr_pop();
187 return ret;
190 /* read quoted arguments; this is called only for internal neatroff strings */
191 static void quotedread(char **sp, char *d)
193 char *s = *sp;
194 int q = *s++;
195 while (*s && *s != q)
196 *d++ = *s++;
197 if (*s == q)
198 s++;
199 *d = '\0';
200 *sp = s;
203 /* read unquoted arguments; this is called only for internal neatroff strings */
204 static void unquotedread(char **sp, char *d)
206 char *s = *sp;
207 if (*s == '(') {
208 s++;
209 *d++ = *s++;
210 *d++ = *s++;
211 } else if (!n_cp && *s == '[') {
212 s++;
213 while (*s && *s != ']')
214 *d++ = *s++;
215 if (*s == ']')
216 s++;
217 } else {
218 *d++ = *s++;
220 *d = '\0';
221 *sp = s;
225 * read a glyph or an escape sequence
227 * This function reads from s either an output troff request
228 * (only the ones emitted by wb.c) or a glyph name and updates
229 * s. The return value is the name of the troff request (the
230 * argument is copied into d) or zero for glyph names (it is
231 * copied into d). Returns -1 when the end of s is reached.
232 * Note that to d, a pointer to a static array is assigned.
234 int escread(char **s, char **d)
236 static char buf[1 << 12];
237 char *r;
238 if (!**s)
239 return -1;
240 r = buf;
241 *d = buf;
242 utf8read(s, r);
243 if (r[0] == c_ec) {
244 utf8read(s, r + 1);
245 if (r[1] == '(') {
246 utf8read(s, r);
247 utf8read(s, r + strlen(r));
248 } else if (!n_cp && r[1] == '[') {
249 while (**s && **s != ']')
250 *r++ = *(*s)++;
251 *r = '\0';
252 if (**s == ']')
253 (*s)++;
254 } else if (strchr("CDfhmsvXx<>", r[1])) {
255 int c = r[1];
256 r[0] = '\0';
257 if (strchr(ESC_P, c))
258 unquotedread(s, r);
259 if (strchr(ESC_Q, c))
260 quotedread(s, r);
261 return c == 'C' ? 0 : c;
263 } else if (r[0] == c_ni) {
264 utf8read(s, r + 1);
266 return 0;
270 * string streams: provide next()/back() interface for string buffers
272 * Functions like charnext() require a next()/back() interface
273 * for reading input streams. In order to provide this interface
274 * for string buffers, the following functions can be used:
276 * sstr_push(s);
277 * charnext(c, sstr_next, sstr_back);
278 * sstr_pop();
280 * The calls to sstr_push()/sstr_pop() may be nested.
282 static char *sstr_bufs[NSSTR]; /* buffer stack */
283 static int sstr_n; /* numbers of items in sstr_bufs[] */
284 static char *sstr_s; /* current buffer */
286 void sstr_push(char *s)
288 sstr_bufs[sstr_n++] = sstr_s;
289 sstr_s = s;
292 char *sstr_pop(void)
294 char *ret = sstr_s;
295 sstr_s = sstr_bufs[--sstr_n];
296 return ret;
299 int sstr_next(void)
301 return *sstr_s ? (unsigned char) *sstr_s++ : -1;
304 void sstr_back(int c)
306 sstr_s--;