2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
5 #include "wvbackslash.h"
9 #include "wvstringmask.h"
10 #include "wvtclstring.h"
12 const WvStringMask
WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR
);
13 const WvStringMask
WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR
);
14 const WvStringMask
WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR
);
16 static size_t wvtcl_escape(char *dst
, const char *s
, size_t s_len
,
17 const WvStringMask
&nasties
, bool *verbatim
= NULL
)
19 if (verbatim
) *verbatim
= false;
21 // NULL strings remain such
24 // empty strings are just {}
35 bool backslashify
= false, inescape
= false;
36 int len
= 0, unprintables
= 0, bracecount
= 0;
37 const char *cptr
, *cptr_end
= s
+ s_len
;
39 // figure out which method we need to use: backslashify or embrace.
40 // also count the number of unprintable characters we'll need to
41 // backslashify, if it turns out that's necessary.
42 for (cptr
= s
; cptr
!= cptr_end
; cptr
++)
44 // Assume we do nothing
45 if (dst
) dst
[len
] = *cptr
;
48 if (!inescape
&& *cptr
== '{')
50 else if (!inescape
&& *cptr
== '}')
58 case WVTCL_ALWAYS_NASTY_CASE
:
74 // if the braces aren't balanced, backslashify
75 if (bracecount
!= 0 || inescape
)
78 if (!backslashify
&& !unprintables
)
80 if (verbatim
) *verbatim
= true;
81 return len
; // no work needed!
89 for (cptr
= s
; cptr
!= cptr_end
; ++cptr
)
94 case WVTCL_ALWAYS_NASTY_CASE
:
108 else return len
+unprintables
;
112 // the embrace method: just take the string and put braces around it
117 for (cptr
= s
; cptr
!= cptr_end
; ++cptr
)
127 WvString
wvtcl_escape(WvStringParm s
, const WvStringMask
&nasties
)
129 size_t s_len
= s
.len();
132 size_t len
= wvtcl_escape(NULL
, s
, s_len
, nasties
, &verbatim
);
133 if (verbatim
) return s
;
137 char *e
= result
.edit();
138 e
+= wvtcl_escape(e
, s
, s_len
, nasties
);
144 static size_t wvtcl_unescape(char *dst
, const char *s
, size_t s_len
,
145 bool *verbatim
= NULL
)
147 //printf(" unescape '%s'\n", (const char *)s);
149 // empty or NULL strings remain themselves
152 if (verbatim
) *verbatim
= true;
156 if (verbatim
) *verbatim
= false;
158 // deal with embraced strings by simply removing the braces
159 if (s
[0] == '{' && s
[s_len
-1] == '}')
161 if (dst
) memcpy(dst
, &s
[1], s_len
-2);
165 bool skipquotes
= false;
166 // deal with quoted strings by ignoring the quotes _and_ unbackslashifying.
167 if (s
[0] == '"' && s
[s_len
-1] == '"')
170 // otherwise, unbackslashify it.
171 const char *start
= s
, *end
= &s
[s_len
];
178 bool inescape
= false;
179 for (; start
!= end
; ++start
)
185 if (dst
) dst
[len
] = *start
;
195 if (dst
) dst
[len
] = *start
;
203 WvString
wvtcl_unescape(WvStringParm s
)
205 size_t s_len
= s
.len();
208 size_t len
= wvtcl_unescape(NULL
, s
, s_len
, &verbatim
);
209 if (verbatim
) return s
;
212 result
.setsize(len
+1);
213 char *e
= result
.edit();
214 e
+= wvtcl_unescape(e
, s
, s_len
);
220 WvString
wvtcl_encode(WvList
<WvString
> &l
, const WvStringMask
&nasties
,
221 const WvStringMask
&splitchars
)
225 WvList
<WvString
>::Iter
i(l
);
227 for (i
.rewind(); i
.next(); )
229 size
+= wvtcl_escape(NULL
, *i
, i
->len(), nasties
);
234 result
.setsize(size
+(count
-1)+1);
236 char *p
= result
.edit();
238 for (i
.rewind(), j
=0; i
.next(); ++j
)
240 p
+= wvtcl_escape(p
, *i
, i
->len(), nasties
);
242 *p
++ = splitchars
.first();
249 const size_t WVTCL_GETWORD_NONE (UINT_MAX
);
251 static size_t wvtcl_getword(char *dst
, const char *s
, size_t s_len
,
252 const WvStringMask
&splitchars
,
253 bool do_unescape
, size_t *end
= NULL
)
255 //printf(" used=%d\n", origsize);
256 if (!s_len
) return WVTCL_GETWORD_NONE
;
258 bool inescape
= false, inquote
= false, incontinuation
= false;
260 const char *origend
= s
+ s_len
;
261 const char *sptr
, *eptr
;
263 // skip leading separators
264 for (sptr
= s
; sptr
!= origend
; sptr
++)
266 if (!splitchars
[*sptr
])
270 if (sptr
== origend
) // nothing left
271 return WVTCL_GETWORD_NONE
;
273 // detect initial quote
282 // loop over string until something satisfactory is found
283 for (; eptr
!= origend
; eptr
++)
287 incontinuation
= false;
293 // technically we've finished the line-continuation
294 // sequence, but we require at least one more character
295 // in order to prove that there's a next line somewhere
296 // in the buffer. Otherwise we might stop parsing before
297 // we're "really" done if we're given input line-by-line.
299 // A better way to do this would be for getword() to *never*
300 // return a string unless it contains a separator character;
301 // then we wouldn't need this weird special case. But it
302 // don't work like that; we'll return the last word in the
303 // buffer even if it *doesn't* end in a separator character.
304 incontinuation
= true;
311 // now we need a character to complete the escape
313 else // not an escape sequence
315 // detect end of a quoted/unquoted string
326 else if (splitchars
[ch
])
335 else if (bracecount
> 0 && ch
== '}')
341 if (bracecount
|| sptr
==eptr
|| inquote
|| inescape
|| incontinuation
)
343 return WVTCL_GETWORD_NONE
;
345 //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr);
346 if (end
) *end
= eptr
- s
;
349 return wvtcl_unescape(dst
, sptr
, eptr
-sptr
);
352 if (dst
) memcpy(dst
, sptr
, eptr
-sptr
);
358 WvString
wvtcl_getword(WvBuf
&buf
, const WvStringMask
&splitchars
,
361 int origsize
= buf
.used();
362 const char *origptr
= (const char *)buf
.get(origsize
);
365 size_t len
= wvtcl_getword(NULL
, origptr
, origsize
,
366 splitchars
, do_unescape
, &end
);
367 if (len
== WVTCL_GETWORD_NONE
)
370 return WvString::null
;
374 result
.setsize(len
+1);
375 char *e
= result
.edit();
376 e
+= wvtcl_getword(e
, origptr
, origsize
, splitchars
, do_unescape
);
379 buf
.unget(origsize
- end
);
385 void wvtcl_decode(WvList
<WvString
> &l
, WvStringParm _s
,
386 const WvStringMask
&splitchars
, bool do_unescape
)
389 size_t s_len
= _s
.len();
393 size_t len
= wvtcl_getword(NULL
, s
, s_len
,
394 splitchars
, do_unescape
, &end
);
395 if (len
== WVTCL_GETWORD_NONE
)
398 WvString
*word
= new WvString();
399 word
->setsize(len
+1);
401 char *e
= word
->edit();
402 e
+= wvtcl_getword(e
, s
, s_len
, splitchars
, do_unescape
);
404 l
.append(word
, true);