version 1.7.3.0
[socat.git] / nestlex.c
blob97778bce8b5bcc2a38b634043a846729277c8f66
1 /* source: nestlex.c */
2 /* Copyright Gerhard Rieger 2006-2010 */
3 /* Published under the GNU General Public License V.2, see file COPYING */
5 /* a function for lexical scanning of nested character patterns */
7 #include "config.h"
8 #include "mytypes.h"
10 #include "sysincludes.h"
13 /* sub: scan a string and copy its value to output string
14 end scanning when an unescaped, unnested string from ends array is found
15 does not copy the end pattern
16 does not write a trailing \0 to token
17 allows escaping with \ and quoting (\ and quotes are removed)
18 allows nesting with div. parens
19 returns -1 if out string was too small
20 returns 1 if addr ended unexpectedly
21 returns 0 if token could be extracted successfully
23 int nestlex(const char **addr, /* input string; aft points to end token */
24 char **token, /* output token; aft points to first unwritten
25 char (caller might want to set it to \0) */
26 size_t *len, /* remaining bytes in token space (incl. \0) */
27 const char *ends[], /* list of end strings */
28 const char *hquotes[],/* list of strings that quote (hard qu.) */
29 const char *squotes[],/* list of strings that quote softly */
30 const char *nests[],/* list of strings that start nesting;
31 every second one is matching end */
32 bool dropquotes, /* drop the outermost quotes */
33 bool c_esc, /* solve C char escapes: \n \t \0 etc */
34 bool html_esc /* solve HTML char escapes: %0d %08 etc */
35 ) {
36 const char *in = *addr; /* pointer into input string */
37 const char **endx; /* loops over end patterns */
38 const char **quotx; /* loops over quote patterns */
39 const char **nestx; /* loops over nest patterns */
40 char *out = *token; /* pointer into output token */
41 char c;
42 int i;
43 int result;
45 while (true) {
47 /* is this end of input string? */
48 if (*in == 0) {
50 break; /* end of string */
53 /* first check the end patterns (e.g. for ']') */
54 endx = ends; i = 0;
55 while (*endx) {
56 if (!strncmp(in, *endx, strlen(*endx))) {
57 /* this end pattern matches */
58 *addr = in;
59 *token = out;
60 return 0;
62 ++endx;
65 /* check for hard quoting pattern */
66 quotx = hquotes;
67 while (hquotes && *quotx) {
68 if (!strncmp(in, *quotx, strlen(*quotx))) {
69 /* this quote pattern matches */
70 const char *endnest[2];
71 if (dropquotes) {
72 /* we strip this quote */
73 in += strlen(*quotx);
74 } else {
75 for (i = strlen(*quotx); i > 0; --i) {
76 *out++ = *in++;
77 if (--*len <= 0) { *addr = in; *token = out; return -1; }
80 /* we call nestlex recursively */
81 endnest[0] = *quotx;
82 endnest[1] = NULL;
83 result =
84 nestlex(&in, &out, len, endnest, NULL/*hquotes*/,
85 NULL/*squotes*/, NULL/*nests*/,
86 false, c_esc, html_esc);
87 if (result == 0 && dropquotes) {
88 /* we strip this quote */
89 in += strlen(*quotx);
90 } else {
91 /* we copy the trailing quote */
92 for (i = strlen(*quotx); i > 0; --i) {
93 *out++ = *in++;
94 if (--*len <= 0) { *addr = in; *token = out; return -1; }
98 break;
100 ++quotx;
102 if (hquotes && *quotx != NULL) {
103 /* there was a quote; string might continue with hard quote */
104 continue;
107 /* check for soft quoting pattern */
108 quotx = squotes;
109 while (squotes && *quotx) {
110 if (!strncmp(in, *quotx, strlen(*quotx))) {
111 /* this quote pattern matches */
112 /* we strip this quote */
113 /* we call nestlex recursively */
114 const char *endnest[2];
115 if (dropquotes) {
116 /* we strip this quote */
117 in += strlen(*quotx);
118 } else {
119 for (i = strlen(*quotx); i > 0; --i) {
120 *out++ = *in++;
121 if (--*len <= 0) { *addr = in; *token = out; return -1; }
124 endnest[0] = *quotx;
125 endnest[1] = NULL;
126 result =
127 nestlex(&in, &out, len, endnest, hquotes,
128 squotes, nests,
129 false, c_esc, html_esc);
131 if (result == 0 && dropquotes) {
132 /* we strip the trailing quote */
133 in += strlen(*quotx);
134 } else {
135 /* we copy the trailing quote */
136 for (i = strlen(*quotx); i > 0; --i) {
137 *out++ = *in++;
138 if (--*len <= 0) { *addr = in; *token = out; return -1; }
141 break;
143 ++quotx;
145 if (squotes && *quotx != NULL) {
146 /* there was a soft quote; string might continue with any quote */
147 continue;
150 /* check patterns that start a nested clause */
151 nestx = nests; i = 0;
152 while (nests && *nestx) {
153 if (!strncmp(in, *nestx, strlen(*nestx))) {
154 /* this nest pattern matches */
155 const char *endnest[2];
156 endnest[0] = nestx[1];
157 endnest[1] = NULL;
159 for (i = strlen(nestx[1]); i > 0; --i) {
160 *out++ = *in++;
161 if (--*len <= 0) { *addr = in; *token = out; return -1; }
164 result =
165 nestlex(&in, &out, len, endnest, hquotes, squotes, nests,
166 false, c_esc, html_esc);
167 if (result == 0) {
168 /* copy endnest */
169 i = strlen(nestx[1]); while (i > 0) {
170 *out++ = *in++;
171 if (--*len <= 0) {
172 *addr = in;
173 *token = out;
174 return -1;
176 --i;
179 break;
181 nestx += 2; /* skip matching end pattern in table */
183 if (nests && *nestx) {
184 /* we handled a nested expression, continue loop */
185 continue;
188 /* "normal" data, possibly escaped */
189 c = *in++;
190 if (c == '\\') {
191 /* found a plain \ escaped part */
192 c = *in++;
193 if (c == 0) { /* Warn("trailing '\\'");*/ break; }
194 if (c_esc) { /* solve C char escapes: \n \t \0 etc */
195 switch (c) {
196 case '0': c = '\0'; break;
197 case 'a': c = '\a'; break;
198 case 'b': c = '\b'; break;
199 case 'f': c = '\f'; break;
200 case 'n': c = '\n'; break;
201 case 'r': c = '\r'; break;
202 case 't': c = '\t'; break;
203 case 'v': c = '\v'; break;
204 #if LATER
205 case 'x': !!! 1 to 2 hex digits; break;
206 case 'u': !!! 4 hex digits?; break;
207 case 'U': !!! 8 hex digits?; break;
208 #endif
209 default: break;
212 *out++ = c;
213 --*len;
214 if (*len == 0) {
215 *addr = in;
216 *token = out;
217 return -1; /* output overflow */
219 continue;
222 /* just a simple char */
223 *out++ = c;
224 --*len;
225 if (*len == 0) {
226 *addr = in;
227 *token = out;
228 return -1; /* output overflow */
232 /* never come here? */
234 *addr = in;
235 *token = out;
236 return 0; /* OK */