2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
5 * Various little string functions...
7 * FIXME: and some other assorted crap that belongs anywhere but here.
10 #define __WVSTRUTILS_H
12 #include <sys/types.h> // for off_t
16 #include "wvstringlist.h"
23 * Various little string functions
28 * Add character c to the end of a string after removing
29 * terminating carriage returns/linefeeds if any.
31 * You need a buffer that's at least one character bigger than the
32 * current length of the string, including the terminating NULL.
34 char *terminate_string(char *string
, char c
);
37 * Trims whitespace from the beginning and end of the character string,
38 * including carriage return / linefeed characters. Modifies the string
39 * in place. Returns the new first character of the string, which points
40 * either at 'string' itself or some character contained therein.
42 * string is allowed to be NULL; returns NULL in that case.
44 char *trim_string(char *string
);
47 * Similar to above, but trims the string starting at the first occurrence of
50 char *trim_string(char *string
, char c
);
53 * return the string formed by concatenating string 'a' and string 'b' with
54 * the 'sep' character between them. For example,
55 * spacecat("xx", "yy", ";");
56 * returns "xx;yy", and
57 * spacecat("xx;;", "yy", ";")
58 * returns "xx;;;yy", and
59 * spacecat("xx;;", "yy", ";", true)
62 * This function is much faster than the more obvious WvString("%s;%s", a, b),
63 * so it's useful when you're producing a *lot* of string data.
65 WvString
spacecat(WvStringParm a
, WvStringParm b
, char sep
= ' ',
70 * Replaces all whitespace characters in the string with non-breaking spaces
71 * ( ) for use with web stuff.
73 char *non_breaking(const char *string
);
76 * Replace all instances of c1 with c2 for the first 'length' characters in
77 * 'string'. Ignores terminating NULL, so make sure you set 'length' correctly.
79 void replace_char(void *string
, char c1
, char c2
, int length
);
82 * Snip off the first part of 'haystack' if it consists of 'needle'.
84 char *snip_string(char *haystack
, char *needle
);
88 * In-place modify a character string so that all contained letters are
89 * in lower case. Returns 'string'.
91 char *strlwr(char *string
);
94 * In-place modify a character string so that all contained letters are
95 * in upper case. Returns 'string'.
97 char *strupr(char *string
);
101 /** Returns true if all characters in 'string' are isalnum() (alphanumeric). */
102 bool is_word(const char *string
);
105 * Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
106 * It is formatted with 16 bytes per line; each line has an address offset,
107 * hex representation, and printable representation.
109 * This is used mostly for debugging purposes. You can send the returned
110 * WvString object directly to a WvLog or any other WvStream for output.
112 WvString
hexdump_buffer(const void *buf
, size_t len
, bool charRep
= true);
115 * Returns true if 'c' is a newline or carriage return character.
116 * Increases code readability a bit.
118 bool isnewline(char c
);
121 * Converts escaped characters (things like %20 etc.) from web URLS
122 * into their normal ASCII representations. If you happen to be
123 * decoding PEM encoded stuff,or anything that has + signs in it that
124 * you don't want encoded as spaces, then set no_space to true, and
125 * it should "just work" for you.
127 WvString
url_decode(WvStringParm str
, bool no_space
= false);
131 * Converts all those pesky spaces, colons, and other nasties into nice
132 * unreadable Quasi-Unicode codes
134 WvString
url_encode(WvStringParm str
);
138 * Returns the difference between to dates in a human readable format
140 WvString
diff_dates(time_t t1
, time_t t2
);
144 * Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of
147 WvString
rfc822_date(time_t _when
= -1);
149 /** Returns an RFC1123-compatible date made out of _when */
150 WvString
rfc1123_date(time_t _when
);
152 /** Return the local date (TZ applied) out of _when */
153 WvString
local_date(time_t _when
= -1);
155 /** Return the local time (in format of ISO 8601) out of _when */
156 WvString
intl_time(time_t _when
= -1);
158 /** Return the local date (in format of ISO 8601) out of _when */
159 WvString
intl_date(time_t _when
= -1);
161 /** Return the local date and time (in format of ISO 8601) out of _when */
162 WvString
intl_datetime(time_t _when
= -1);
164 time_t intl_gmtoff(time_t t
);
168 * Similar to crypt(), but this randomly selects its own salt.
169 * This function is defined in strcrypt.cc. It chooses to use the DES
172 WvString
passwd_crypt(const char *str
);
176 * Similar to crypt(), but this randomly selects its own salt.
177 * This function is defined in strcrypt.cc. It chooses to use the MD5
180 WvString
passwd_md5(const char *str
);
183 * Returns a string with a backslash in front of every non alphanumeric
186 WvString
backslash_escape(WvStringParm s1
);
188 /** How many times does 'c' occur in "s"? */
189 int strcount(WvStringParm s
, const char c
);
192 * Example: encode_hostname_as_DN("www.fizzle.com")
193 * will result in dc=www,dc=fizzle,dc=com,cn=www.fizzle.com
195 WvString
encode_hostname_as_DN(WvStringParm hostname
);
198 * Given a hostname, turn it into a "nice" one. It has to start with a
199 * letter/number, END with a letter/number, have underscores converted to
200 * hyphens, and have no more than one hyphen in a row. If we can't do this
201 * and have any sort of answer, return "UNKNOWN".
203 WvString
nice_hostname(WvStringParm name
);
206 * Take a full path/file name and splits it up into respective pathname and
207 * filename. This can also be useful for splitting the toplevel directory off a
210 WvString
getfilename(WvStringParm fullname
);
211 WvString
getdirname(WvStringParm fullname
);
214 * Possible rounding methods for numbers -- remember from school?
219 ROUND_DOWN_AT_POINT_FIVE
,
220 ROUND_UP_AT_POINT_FIVE
,
225 * Given a number of blocks and a blocksize (default==1 byte), return a
226 * WvString containing a human-readable representation of blocks*blocksize.
227 * This function uses SI prefixes.
229 WvString
sizetoa(unsigned long long blocks
, unsigned long blocksize
= 1,
230 RoundingMethod rounding_method
= ROUND_UP_AT_POINT_FIVE
);
233 * Given a size in kilobyes, return a human readable size.
234 * This function uses SI prefixes (1 MB = 1 000 KB = 1 000 000 B).
236 WvString
sizektoa(unsigned long long kbytes
,
237 RoundingMethod rounding_method
= ROUND_UP_AT_POINT_FIVE
);
240 * Given a number of blocks and a blocksize (default==1 byte), return a
241 * WvString containing a human-readable representation of blocks*blocksize.
242 * This function uses IEC prefixes.
244 WvString
sizeitoa(unsigned long long blocks
, unsigned long blocksize
= 1,
245 RoundingMethod rounding_method
= ROUND_UP_AT_POINT_FIVE
);
248 * Given a size in kilobytes, return a human readable size.
249 * This function uses IEC prefixes.
251 WvString
sizekitoa(unsigned long long kbytes
,
252 RoundingMethod rounding_method
= ROUND_UP_AT_POINT_FIVE
);
254 /** Given a number of seconds, returns a formatted human-readable string
255 * saying how long the period is.
257 WvString
secondstoa(unsigned int total_seconds
);
260 * Finds a string in an array and returns its index.
261 * Returns -1 if not found.
263 int lookup(const char *str
, const char * const *table
,
264 bool case_sensitive
= false);
267 * Splits a string and adds each substring to a collection.
268 * coll : the collection of strings to add to
269 * _s : the string to split
270 * splitchars : the set of delimiter characters
271 * limit : the maximum number of elements to split
273 template<class StringCollection
>
274 void strcoll_split(StringCollection
&coll
, WvStringParm _s
,
275 const char *splitchars
= " \t", int limit
= 0)
278 char *sptr
= s
.edit(), *eptr
, oldc
;
280 // Simple if statement to catch (and add) empty (but not NULL) strings.
283 WvString
*emptyString
= new WvString("");
284 coll
.add(emptyString
, true);
287 // Needed to catch delimeters at the beginning of the string.
288 bool firstrun
= true;
290 while (sptr
&& *sptr
)
300 sptr
+= strspn(sptr
, splitchars
);
305 eptr
= sptr
+ strcspn(sptr
, splitchars
);
309 eptr
= sptr
+ strlen(sptr
);
315 WvString
*newstr
= new WvString(sptr
);
316 coll
.add(newstr
, true);
325 * Splits a string and adds each substring to a collection.
326 * this behaves differently in that it actually delimits the
327 * pieces as fields and returns them, it doesn't treat multiple
328 * delimeters as one and skip them.
330 * ie., parm1::parm2 -> 'parm1','','parm2' when delimited with ':'
332 * coll : the collection of strings to add to
333 * _s : the string to split
334 * splitchars : the set of delimiter characters
335 * limit : the maximum number of elements to split
337 template<class StringCollection
>
338 void strcoll_splitstrict(StringCollection
&coll
, WvStringParm _s
,
339 const char *splitchars
= " \t", int limit
= 0)
342 char *cur
= s
.edit();
351 coll
.add(new WvString(cur
), true);
355 int len
= strcspn(cur
, splitchars
);
359 coll
.add(new WvString(cur
), true);
362 if (!cur
[len
]) break;
368 #ifndef _WIN32 // don't have regex on win32
370 * Splits a string and adds each substring to a collection.
371 * coll : the collection of strings to add to
372 * _s : the string to split
373 * splitchars : the set of delimiter characters
374 * limit : the maximum number of elements to split
376 template<class StringCollection
>
377 void strcoll_split(StringCollection
&coll
, WvStringParm s
,
378 const WvRegex
®ex
, int limit
= 0)
381 int match_start
, match_end
;
384 while ((limit
== 0 || count
< limit
)
385 && regex
.continuable_match(&s
[start
], match_start
, match_end
)
388 WvString
*substr
= new WvString
;
389 int len
= match_start
;
390 substr
->setsize(len
+1);
391 memcpy(substr
->edit(), &s
[start
], len
);
392 substr
->edit()[len
] = '\0';
393 coll
.add(substr
, true);
398 if (limit
== 0 || count
< limit
)
400 WvString
*last
= new WvString(&s
[start
]);
402 coll
.add(last
, true);
409 * Concatenates all strings in a collection and returns the result.
410 * coll : the collection of strings to read from
411 * joinchars : the delimiter string to insert between strings
413 template<class StringCollection
>
414 WvString
strcoll_join(const StringCollection
&coll
,
415 const char *joinchars
= " \t")
417 size_t joinlen
= strlen(joinchars
);
419 typename
StringCollection::Iter
s(
420 const_cast<StringCollection
&>(coll
));
421 for (s
.rewind(); s
.next(); )
424 totlen
+= strlen(s
->cstr());
427 totlen
-= joinlen
; // no join chars at tail
430 total
.setsize(totlen
);
432 char *te
= total
.edit();
435 for (s
.rewind(); s
.next(); )
440 strcat(te
, joinchars
);
442 strcat(te
, s
->cstr());
448 * Replace any instances of "a" with "b" in "s". Kind of like sed, only
451 WvString
strreplace(WvStringParm s
, WvStringParm a
, WvStringParm b
);
453 /** Replace any consecutive instances of character c with a single one */
454 WvString
undupe(WvStringParm s
, char c
);
456 /** Do gethostname() without a fixed-length buffer */
459 /** Get the fqdn of the local host, using gethostbyname() and gethostname() */
460 WvString
fqdomainname();
462 /** Get the current working directory without a fixed-length buffer */
466 * Inserts SI-style spacing into a number
467 * (eg passing 9876543210 returns "9 876 543 210")
469 WvString
metriculate(const off_t i
);
472 * Returns everything in line (exclusively) after a.
473 * If a is not in line, "" is returned.
475 WvString
afterstr(WvStringParm line
, WvStringParm a
);
478 * Returns everything in line (exclusively) before 'a'.
479 * If a is not in line, line is returned.
481 WvString
beforestr(WvStringParm line
, WvStringParm a
);
484 * Returns the string of length len starting at pos in line.
485 * Error checking prevents seg fault.
486 * If pos > line.len()-1 return ""
487 * if pos+len > line.len() simply return from pos to end of line
489 WvString
substr(WvString line
, unsigned int pos
, unsigned int len
);
492 * Removes any trailing punctuation ('.', '?', or '!') from the line, and
493 * returns it in a new string. Does not modify line.
495 WvString
depunctuate(WvStringParm line
);
497 // Converts a string in decimal to an arbitrary numeric type
499 bool wvstring_to_num(WvStringParm str
, T
&n
)
504 for (const char *p
= str
; *p
; ++p
)
508 n
= n
* T(10) + T(*p
- '0');
510 else if ((const char *)str
== p
525 * Before using the C-style string escaping functions below, please consider
526 * using the functions in wvtclstring.h instead; they usualy lead to much more
527 * human readable and manageable results, and allow representation of
531 struct CStrExtraEscape
536 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES
[];
538 /// Converts data into a C-style string constant.
540 // If data is NULL, returns WvString::null; otherwise, returns an allocated
541 // WvString containing the C-style string constant that represents the data.
543 // All printable characters including space except " and \ are represented with
546 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
548 // All other characters are escaped in uppercase hex form, eg. \x9E
550 // The extra_escapes parameter allows for additional characters beyond
551 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
552 // escape { and } as \< and \>, which allows the resulting strings to be
553 // TCL-string coded without ridiculous double-escaping.
555 WvString
cstr_escape(const void *data
, size_t size
,
556 const CStrExtraEscape extra_escapes
[] = NULL
);
558 /// Converts a C-style string constant into data.
560 // This function does *not* include the trailing null that a C compiler would --
561 // if you want this null, put \0 at the end of the C-style string
563 // If cstr is correctly formatted and max_size is large enough for the
564 // resulting data, returns true and size will equal the size of the
565 // resulting data. If data is not NULL it will contain this data.
567 // If cstr is correctly formatted but max_size is too small for the resulting
568 // data, returns false and size will equal the minimum value of min_size
569 // for this function to have returned true. If data is non-NULL it will
570 // contain the first max_size bytes of resulting data.
572 // If cstr is incorrectly formatted, returns false and size will equal 0.
574 // This functions works just as well on multiple, whitespace-separated
575 // C-style strings as well. This allows you to concatenate strings produced
576 // by cstr_escape, and the result of cstr_unescape will be the data blocks
577 // concatenated together. This implies that the empty string corresponds
578 // to a valid data block of length zero; however, a null string still returns
581 // The extra_escapes parameter must match that used in the call to
582 // cstr_escape used to produce the escaped strings.
584 bool cstr_unescape(WvStringParm cstr
, void *data
, size_t max_size
, size_t &size
,
585 const CStrExtraEscape extra_escapes
[] = NULL
);
587 static inline bool is_int(const char *str
)
599 if (!isdigit(*str
++))
605 /// Converts a pointer into a string, like glibc's %p formatter would
607 WvString
ptr2str(void* ptr
);
610 /* Calls CryptProtectData on a string, and returns a BASE64 encoded version
611 * of the encrypted data, suitable for entering into Uniconf or other use.
613 * Unless you are debugging Windows somehow and want to examine what the
614 * label on encrypted data is, it's perfectly fine to leave the 'description'
617 WvString
wvprotectdata(WvStringParm data
, WvStringParm description
= "Data");
619 /* Accepts a BASE64 encoded string of encrypted data (encrypted via
620 * CryptProtectData, that is) and decrypts it and returns a WvString.
622 WvString
wvunprotectdata(WvStringParm data
);
625 #endif // __WVSTRUTILS_H