1 /* source: nestlex.c */
2 /* Copyright Gerhard Rieger and contributors (see file CHANGES) */
3 /* Published under the GNU General Public License V.2, see file COPYING */
5 /* a function for lexical scanning of nested character patterns */
10 #include "sysincludes.h"
12 static int _nestlex(const char **addr
,
16 const char *hquotes
[],
17 const char *squotes
[],
24 /* sub: scan a string and copy its value to output string
25 end scanning when an unescaped, unnested string from ends array is found
26 does not copy the end pattern
27 does not write a trailing \0 to token
28 allows escaping with \ and quoting (\ and quotes are removed)
29 allows nesting with div. parens
30 returns -1 if out string was too small
31 returns 1 if addr ended unexpectedly
32 returns 0 if token could be extracted successfully
34 int nestlex(const char **addr
, /* input string; aft points to end token */
35 char **token
, /* output token; aft points to first unwritten
36 char (caller might want to set it to \0) */
37 size_t *len
, /* remaining bytes in token space (incl. \0) */
38 const char *ends
[], /* list of end strings */
39 const char *hquotes
[],/* list of strings that quote (hard qu.) */
40 const char *squotes
[],/* list of strings that quote softly */
41 const char *nests
[],/* list of strings that start nesting;
42 every second one is matching end */
43 bool dropquotes
, /* drop the outermost quotes */
44 bool c_esc
, /* solve C char escapes: \n \t \0 etc */
45 bool html_esc
/* solve HTML char escapes: %0d %08 etc */
48 _nestlex(addr
, token
, (ptrdiff_t *)len
, ends
, hquotes
, squotes
, nests
,
49 dropquotes
, c_esc
, html_esc
);
52 static int _nestlex(const char **addr
,
56 const char *hquotes
[],
57 const char *squotes
[],
63 const char *in
= *addr
; /* pointer into input string */
64 const char **endx
; /* loops over end patterns */
65 const char **quotx
; /* loops over quote patterns */
66 const char **nestx
; /* loops over nest patterns */
67 char *out
= *token
; /* pointer into output token */
74 /* is this end of input string? */
77 break; /* end of string */
80 /* first check the end patterns (e.g. for ']') */
83 if (!strncmp(in
, *endx
, strlen(*endx
))) {
84 /* this end pattern matches */
92 /* check for hard quoting pattern */
94 while (hquotes
&& *quotx
) {
95 if (!strncmp(in
, *quotx
, strlen(*quotx
))) {
96 /* this quote pattern matches */
97 const char *endnest
[2];
99 /* we strip this quote */
100 in
+= strlen(*quotx
);
102 for (i
= strlen(*quotx
); i
> 0; --i
) {
104 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
107 /* we call _nestlex recursively */
111 _nestlex(&in
, &out
, len
, endnest
, NULL
/*hquotes*/,
112 NULL
/*squotes*/, NULL
/*nests*/,
113 false, c_esc
, html_esc
);
114 if (result
== 0 && dropquotes
) {
115 /* we strip this quote */
116 in
+= strlen(*quotx
);
117 } else if (result
< 0) {
118 *addr
= in
; *token
= out
; return result
;
120 /* we copy the trailing quote */
121 for (i
= strlen(*quotx
); i
> 0; --i
) {
123 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
131 if (hquotes
&& *quotx
!= NULL
) {
132 /* there was a quote; string might continue with hard quote */
136 /* check for soft quoting pattern */
138 while (squotes
&& *quotx
) {
139 if (!strncmp(in
, *quotx
, strlen(*quotx
))) {
140 /* this quote pattern matches */
141 /* we strip this quote */
142 /* we call _nestlex recursively */
143 const char *endnest
[2];
145 /* we strip this quote */
146 in
+= strlen(*quotx
);
148 for (i
= strlen(*quotx
); i
> 0; --i
) {
150 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
156 _nestlex(&in
, &out
, len
, endnest
, hquotes
,
158 false, c_esc
, html_esc
);
160 if (result
== 0 && dropquotes
) {
161 /* we strip the trailing quote */
162 in
+= strlen(*quotx
);
163 } else if (result
< 0) {
164 *addr
= in
; *token
= out
; return result
;
166 /* we copy the trailing quote */
167 for (i
= strlen(*quotx
); i
> 0; --i
) {
169 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
176 if (squotes
&& *quotx
!= NULL
) {
177 /* there was a soft quote; string might continue with any quote */
181 /* check patterns that start a nested clause */
182 nestx
= nests
; i
= 0;
183 while (nests
&& *nestx
) {
184 if (!strncmp(in
, *nestx
, strlen(*nestx
))) {
185 /* this nest pattern matches */
186 const char *endnest
[2];
187 endnest
[0] = nestx
[1];
190 for (i
= strlen(nestx
[1]); i
> 0; --i
) {
192 if (--*len
<= 0) { *addr
= in
; *token
= out
; return -1; }
196 _nestlex(&in
, &out
, len
, endnest
, hquotes
, squotes
, nests
,
197 false, c_esc
, html_esc
);
200 i
= strlen(nestx
[1]); while (i
> 0) {
209 } else if (result
< 0) {
210 *addr
= in
; *token
= out
; return result
;
214 nestx
+= 2; /* skip matching end pattern in table */
216 if (nests
&& *nestx
) {
217 /* we handled a nested expression, continue loop */
221 /* "normal" data, possibly escaped */
224 /* found a plain \ escaped part */
226 if (c
== 0) { /* Warn("trailing '\\'");*/ break; }
227 if (c_esc
) { /* solve C char escapes: \n \t \0 etc */
229 case '0': c
= '\0'; break;
230 case 'a': c
= '\a'; break;
231 case 'b': c
= '\b'; break;
232 case 'f': c
= '\f'; break;
233 case 'n': c
= '\n'; break;
234 case 'r': c
= '\r'; break;
235 case 't': c
= '\t'; break;
236 case 'v': c
= '\v'; break;
238 case 'x': !!! 1 to
2 hex digits
; break;
239 case 'u': !!! 4 hex digits
?; break;
240 case 'U': !!! 8 hex digits
?; break;
250 return -1; /* output overflow */
255 /* just a simple char */
261 return -1; /* output overflow */
265 /* never come here? */