Fix compilation with old g++ 3.3.5 and debian-sarge.
[wvstreams.git] / utils / wvtclstring.cc
blobe1b58bd3fdac5b385b0566a08dd8662b1059f288
1 /*
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4 */
5 #include "wvbackslash.h"
6 #include "wvbuf.h"
7 #include "wvstream.h"
8 #include "wvstring.h"
9 #include "wvstringmask.h"
10 #include "wvtclstring.h"
12 const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR);
13 const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR);
14 const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR);
16 static size_t wvtcl_escape(char *dst, const char *s, size_t s_len,
17 const WvStringMask &nasties, bool *verbatim = NULL)
19 if (verbatim) *verbatim = false;
21 // NULL strings remain such
22 if (s == NULL)
23 return 0;
24 // empty strings are just {}
25 if (s_len == 0)
27 if (dst)
29 dst[0] = '{';
30 dst[1] = '}';
32 return 2;
35 bool backslashify = false, inescape = false;
36 int len = 0, unprintables = 0, bracecount = 0;
37 const char *cptr, *cptr_end = s + s_len;
39 // figure out which method we need to use: backslashify or embrace.
40 // also count the number of unprintable characters we'll need to
41 // backslashify, if it turns out that's necessary.
42 for (cptr = s; cptr != cptr_end; cptr++)
44 // Assume we do nothing
45 if (dst) dst[len] = *cptr;
46 ++len;
48 if (!inescape && *cptr == '{')
49 bracecount++;
50 else if (!inescape && *cptr == '}')
51 bracecount--;
52 if (bracecount < 0)
53 backslashify = true;
55 bool doit = false;
56 switch (*cptr)
58 case WVTCL_ALWAYS_NASTY_CASE:
59 doit = true;
60 break;
61 default:
62 if (nasties[*cptr])
63 doit = true;
65 if (doit)
66 unprintables++;
68 if (*cptr == '\\')
69 inescape = !inescape;
70 else
71 inescape = false;
74 // if the braces aren't balanced, backslashify
75 if (bracecount != 0 || inescape)
76 backslashify = true;
78 if (!backslashify && !unprintables)
80 if (verbatim) *verbatim = true;
81 return len; // no work needed!
84 if (backslashify)
86 if (dst)
88 len = 0;
89 for (cptr = s; cptr != cptr_end; ++cptr)
91 bool doit = false;
92 switch (*cptr)
94 case WVTCL_ALWAYS_NASTY_CASE:
95 doit = true;
96 break;
97 default:
98 if (nasties[*cptr])
99 doit = true;
101 if (doit)
102 dst[len++] = '\\';
104 dst[len++] = *cptr;
106 return len;
108 else return len+unprintables;
110 else
112 // the embrace method: just take the string and put braces around it
113 if (dst)
115 len = 0;
116 dst[len++] = '{';
117 for (cptr = s; cptr != cptr_end; ++cptr)
118 dst[len++] = *cptr;
119 dst[len++] = '}';
120 return len;
122 else return len+2;
127 WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties)
129 size_t s_len = s.len();
131 bool verbatim;
132 size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim);
133 if (verbatim) return s;
135 WvString result;
136 result.setsize(len);
137 char *e = result.edit();
138 e += wvtcl_escape(e, s, s_len, nasties);
139 *e = '\0';
140 return result;
144 static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len,
145 bool *verbatim = NULL)
147 //printf(" unescape '%s'\n", (const char *)s);
149 // empty or NULL strings remain themselves
150 if (!s)
152 if (verbatim) *verbatim = true;
153 return 0;
156 if (verbatim) *verbatim = false;
158 // deal with embraced strings by simply removing the braces
159 if (s[0] == '{' && s[s_len-1] == '}')
161 if (dst) memcpy(dst, &s[1], s_len-2);
162 return s_len - 2;
165 bool skipquotes = false;
166 // deal with quoted strings by ignoring the quotes _and_ unbackslashifying.
167 if (s[0] == '"' && s[s_len-1] == '"')
168 skipquotes = true;
170 // otherwise, unbackslashify it.
171 const char *start = s, *end = &s[s_len];
172 if (skipquotes)
174 ++start;
175 --end;
177 size_t len = 0;
178 bool inescape = false;
179 for (; start != end; ++start)
181 if (*start == '\\')
183 if (inescape)
185 if (dst) dst[len] = *start;
186 len++;
187 inescape = false;
189 else
190 inescape = true;
192 else
194 inescape = false;
195 if (dst) dst[len] = *start;
196 len++;
199 return len;
203 WvString wvtcl_unescape(WvStringParm s)
205 size_t s_len = s.len();
207 bool verbatim;
208 size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim);
209 if (verbatim) return s;
211 WvString result;
212 result.setsize(len+1);
213 char *e = result.edit();
214 e += wvtcl_unescape(e, s, s_len);
215 *e = '\0';
216 return result;
220 WvString wvtcl_encode(WvList<WvString> &l, const WvStringMask &nasties,
221 const WvStringMask &splitchars)
223 int size = 0;
225 WvList<WvString>::Iter i(l);
226 int count = 0;
227 for (i.rewind(); i.next(); )
229 size += wvtcl_escape(NULL, *i, i->len(), nasties);
230 ++count;
233 WvString result;
234 result.setsize(size+(count-1)+1);
236 char *p = result.edit();
237 int j;
238 for (i.rewind(), j=0; i.next(); ++j)
240 p += wvtcl_escape(p, *i, i->len(), nasties);
241 if (j < count - 1)
242 *p++ = splitchars.first();
244 *p = '\0';
246 return result;
249 const size_t WVTCL_GETWORD_NONE (UINT_MAX);
251 static size_t wvtcl_getword(char *dst, const char *s, size_t s_len,
252 const WvStringMask &splitchars,
253 bool do_unescape, size_t *end = NULL)
255 //printf(" used=%d\n", origsize);
256 if (!s_len) return WVTCL_GETWORD_NONE;
258 bool inescape = false, inquote = false, incontinuation = false;
259 int bracecount = 0;
260 const char *origend = s + s_len;
261 const char *sptr, *eptr;
263 // skip leading separators
264 for (sptr = s; sptr != origend; sptr++)
266 if (!splitchars[*sptr])
267 break;
270 if (sptr == origend) // nothing left
271 return WVTCL_GETWORD_NONE;
273 // detect initial quote
274 if (*sptr == '"')
276 inquote = true;
277 eptr = sptr+1;
279 else
280 eptr = sptr;
282 // loop over string until something satisfactory is found
283 for (; eptr != origend; eptr++)
285 char ch = *eptr;
287 incontinuation = false;
289 if (inescape)
291 if (ch == '\n')
293 // technically we've finished the line-continuation
294 // sequence, but we require at least one more character
295 // in order to prove that there's a next line somewhere
296 // in the buffer. Otherwise we might stop parsing before
297 // we're "really" done if we're given input line-by-line.
299 // A better way to do this would be for getword() to *never*
300 // return a string unless it contains a separator character;
301 // then we wouldn't need this weird special case. But it
302 // don't work like that; we'll return the last word in the
303 // buffer even if it *doesn't* end in a separator character.
304 incontinuation = true;
306 inescape = false;
308 else if (ch == '\\')
310 inescape = true;
311 // now we need a character to complete the escape
313 else // not an escape sequence
315 // detect end of a quoted/unquoted string
316 if (bracecount == 0)
318 if (inquote)
320 if (ch == '"')
322 eptr++;
323 break;
326 else if (splitchars[ch])
327 break;
330 // match braces
331 if (!inquote)
333 if (ch == '{')
334 bracecount++;
335 else if (bracecount > 0 && ch == '}')
336 bracecount--;
341 if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
342 // not there yet...
343 return WVTCL_GETWORD_NONE;
345 //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr);
346 if (end) *end = eptr - s;
348 if (do_unescape)
349 return wvtcl_unescape(dst, sptr, eptr-sptr);
350 else
352 if (dst) memcpy(dst, sptr, eptr-sptr);
353 return eptr - sptr;
358 WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars,
359 bool do_unescape)
361 int origsize = buf.used();
362 const char *origptr = (const char *)buf.get(origsize);
364 size_t end;
365 size_t len = wvtcl_getword(NULL, origptr, origsize,
366 splitchars, do_unescape, &end);
367 if (len == WVTCL_GETWORD_NONE)
369 buf.unget(origsize);
370 return WvString::null;
373 WvString result;
374 result.setsize(len+1);
375 char *e = result.edit();
376 e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape);
377 *e = '\0';
379 buf.unget(origsize - end);
381 return result;
385 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
386 const WvStringMask &splitchars, bool do_unescape)
388 const char *s = _s;
389 size_t s_len = _s.len();
390 for (;;)
392 size_t end;
393 size_t len = wvtcl_getword(NULL, s, s_len,
394 splitchars, do_unescape, &end);
395 if (len == WVTCL_GETWORD_NONE)
396 break;
398 WvString *word = new WvString();
399 word->setsize(len+1);
401 char *e = word->edit();
402 e += wvtcl_getword(e, s, s_len, splitchars, do_unescape);
403 *e = '\0';
404 l.append(word, true);
406 s += end;
407 s_len -= end;