wvdbusserver: implement NameHasOwner request.
[wvstreams.git] / include / wvstrutils.h
blob935fa9766ab4ddbc2e31da48c3c08f47dc37cf5f
1 /* -*- Mode: C++ -*-
2 * Worldvisions Weaver Software:
3 * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
5 * Various little string functions...
6 *
7 * FIXME: and some other assorted crap that belongs anywhere but here.
8 */
9 #ifndef __WVSTRUTILS_H
10 #define __WVSTRUTILS_H
12 #include <sys/types.h> // for off_t
13 #include <time.h>
14 #include <ctype.h>
15 #include "wvstring.h"
16 #include "wvstringlist.h"
17 #include "wvhex.h"
18 #ifndef _WIN32
19 #include "wvregex.h"
20 #endif
22 /** \file
23 * Various little string functions
27 /**
28 * Add character c to the end of a string after removing
29 * terminating carriage returns/linefeeds if any.
31 * You need a buffer that's at least one character bigger than the
32 * current length of the string, including the terminating NULL.
34 char *terminate_string(char *string, char c);
36 /**
37 * Trims whitespace from the beginning and end of the character string,
38 * including carriage return / linefeed characters. Modifies the string
39 * in place. Returns the new first character of the string, which points
40 * either at 'string' itself or some character contained therein.
42 * string is allowed to be NULL; returns NULL in that case.
44 char *trim_string(char *string);
46 /**
47 * Similar to above, but trims the string starting at the first occurrence of
48 * c.
50 char *trim_string(char *string, char c);
52 /**
53 * return the string formed by concatenating string 'a' and string 'b' with
54 * the 'sep' character between them. For example,
55 * spacecat("xx", "yy", ";");
56 * returns "xx;yy", and
57 * spacecat("xx;;", "yy", ";")
58 * returns "xx;;;yy", and
59 * spacecat("xx;;", "yy", ";", true)
60 * returns "xx;yy".
62 * This function is much faster than the more obvious WvString("%s;%s", a, b),
63 * so it's useful when you're producing a *lot* of string data.
65 WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
66 bool onesep = false);
69 /**
70 * Replaces all whitespace characters in the string with non-breaking spaces
71 * (&nbsp;) for use with web stuff.
73 char *non_breaking(const char *string);
75 /**
76 * Replace all instances of c1 with c2 for the first 'length' characters in
77 * 'string'. Ignores terminating NULL, so make sure you set 'length' correctly.
79 void replace_char(void *string, char c1, char c2, int length);
81 /**
82 * Snip off the first part of 'haystack' if it consists of 'needle'.
84 char *snip_string(char *haystack, char *needle);
86 #ifndef _WIN32
87 /**
88 * In-place modify a character string so that all contained letters are
89 * in lower case. Returns 'string'.
91 char *strlwr(char *string);
93 /**
94 * In-place modify a character string so that all contained letters are
95 * in upper case. Returns 'string'.
97 char *strupr(char *string);
99 #endif
101 /** Returns true if all characters in 'string' are isalnum() (alphanumeric). */
102 bool is_word(const char *string);
105 * Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
106 * It is formatted with 16 bytes per line; each line has an address offset,
107 * hex representation, and printable representation.
109 * This is used mostly for debugging purposes. You can send the returned
110 * WvString object directly to a WvLog or any other WvStream for output.
112 WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
115 * Returns true if 'c' is a newline or carriage return character.
116 * Increases code readability a bit.
118 bool isnewline(char c);
121 * Converts escaped characters (things like %20 etc.) from web URLS
122 * into their normal ASCII representations. If you happen to be
123 * decoding PEM encoded stuff,or anything that has + signs in it that
124 * you don't want encoded as spaces, then set no_space to true, and
125 * it should "just work" for you.
127 WvString url_decode(WvStringParm str, bool no_space = false);
131 * Converts all those pesky spaces, colons, and other nasties into nice
132 * unreadable Quasi-Unicode codes
134 WvString url_encode(WvStringParm str);
138 * Returns the difference between to dates in a human readable format
140 WvString diff_dates(time_t t1, time_t t2);
144 * Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of
145 * the current time.
147 WvString rfc822_date(time_t _when = -1);
149 /** Returns an RFC1123-compatible date made out of _when */
150 WvString rfc1123_date(time_t _when);
152 /** Return the local date (TZ applied) out of _when */
153 WvString local_date(time_t _when = -1);
155 /** Return the local time (in format of ISO 8601) out of _when */
156 WvString intl_time(time_t _when = -1);
158 /** Return the local date (in format of ISO 8601) out of _when */
159 WvString intl_date(time_t _when = -1);
161 /** Return the local date and time (in format of ISO 8601) out of _when */
162 WvString intl_datetime(time_t _when = -1);
164 time_t intl_gmtoff(time_t t);
166 #ifndef _WIN32
168 * Similar to crypt(), but this randomly selects its own salt.
169 * This function is defined in strcrypt.cc. It chooses to use the DES
170 * engine.
172 WvString passwd_crypt(const char *str);
174 #endif
176 * Similar to crypt(), but this randomly selects its own salt.
177 * This function is defined in strcrypt.cc. It chooses to use the MD5
178 * engine.
180 WvString passwd_md5(const char *str);
183 * Returns a string with a backslash in front of every non alphanumeric
184 * character in s1.
186 WvString backslash_escape(WvStringParm s1);
188 /** How many times does 'c' occur in "s"? */
189 int strcount(WvStringParm s, const char c);
192 * Example: encode_hostname_as_DN("www.fizzle.com")
193 * will result in dc=www,dc=fizzle,dc=com,cn=www.fizzle.com
195 WvString encode_hostname_as_DN(WvStringParm hostname);
198 * Given a hostname, turn it into a "nice" one. It has to start with a
199 * letter/number, END with a letter/number, have underscores converted to
200 * hyphens, and have no more than one hyphen in a row. If we can't do this
201 * and have any sort of answer, return "UNKNOWN".
203 WvString nice_hostname(WvStringParm name);
206 * Take a full path/file name and splits it up into respective pathname and
207 * filename. This can also be useful for splitting the toplevel directory off a
208 * path.
210 WvString getfilename(WvStringParm fullname);
211 WvString getdirname(WvStringParm fullname);
214 * Possible rounding methods for numbers -- remember from school?
216 enum RoundingMethod
218 ROUND_DOWN,
219 ROUND_DOWN_AT_POINT_FIVE,
220 ROUND_UP_AT_POINT_FIVE,
221 ROUND_UP
225 * Given a number of blocks and a blocksize (default==1 byte), return a
226 * WvString containing a human-readable representation of blocks*blocksize.
227 * This function uses SI prefixes.
229 WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
230 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
233 * Given a size in kilobyes, return a human readable size.
234 * This function uses SI prefixes (1 MB = 1 000 KB = 1 000 000 B).
236 WvString sizektoa(unsigned long long kbytes,
237 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
240 * Given a number of blocks and a blocksize (default==1 byte), return a
241 * WvString containing a human-readable representation of blocks*blocksize.
242 * This function uses IEC prefixes.
244 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
245 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
248 * Given a size in kilobytes, return a human readable size.
249 * This function uses IEC prefixes.
251 WvString sizekitoa(unsigned long long kbytes,
252 RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
254 /** Given a number of seconds, returns a formatted human-readable string
255 * saying how long the period is.
257 WvString secondstoa(unsigned int total_seconds);
260 * Finds a string in an array and returns its index.
261 * Returns -1 if not found.
263 int lookup(const char *str, const char * const *table,
264 bool case_sensitive = false);
267 * Splits a string and adds each substring to a collection.
268 * coll : the collection of strings to add to
269 * _s : the string to split
270 * splitchars : the set of delimiter characters
271 * limit : the maximum number of elements to split
273 template<class StringCollection>
274 void strcoll_split(StringCollection &coll, WvStringParm _s,
275 const char *splitchars = " \t", int limit = 0)
277 WvString s(_s);
278 char *sptr = s.edit(), *eptr, oldc;
280 // Simple if statement to catch (and add) empty (but not NULL) strings.
281 if (sptr && !*sptr )
283 WvString *emptyString = new WvString("");
284 coll.add(emptyString, true);
287 // Needed to catch delimeters at the beginning of the string.
288 bool firstrun = true;
290 while (sptr && *sptr)
292 --limit;
294 if (firstrun)
296 firstrun = false;
298 else
300 sptr += strspn(sptr, splitchars);
303 if (limit)
305 eptr = sptr + strcspn(sptr, splitchars);
307 else
309 eptr = sptr + strlen(sptr);
312 oldc = *eptr;
313 *eptr = 0;
315 WvString *newstr = new WvString(sptr);
316 coll.add(newstr, true);
318 *eptr = oldc;
319 sptr = eptr;
325 * Splits a string and adds each substring to a collection.
326 * this behaves differently in that it actually delimits the
327 * pieces as fields and returns them, it doesn't treat multiple
328 * delimeters as one and skip them.
330 * ie., parm1::parm2 -> 'parm1','','parm2' when delimited with ':'
332 * coll : the collection of strings to add to
333 * _s : the string to split
334 * splitchars : the set of delimiter characters
335 * limit : the maximum number of elements to split
337 template<class StringCollection>
338 void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
339 const char *splitchars = " \t", int limit = 0)
341 WvString s(_s);
342 char *cur = s.edit();
344 if (!cur) return;
346 for (;;)
348 --limit;
349 if (!limit)
351 coll.add(new WvString(cur), true);
352 break;
355 int len = strcspn(cur, splitchars);
357 char tmp = cur[len];
358 cur[len] = 0;
359 coll.add(new WvString(cur), true);
360 cur[len] = tmp;
362 if (!cur[len]) break;
363 cur += len + 1;
368 #ifndef _WIN32 // don't have regex on win32
370 * Splits a string and adds each substring to a collection.
371 * coll : the collection of strings to add to
372 * _s : the string to split
373 * splitchars : the set of delimiter characters
374 * limit : the maximum number of elements to split
376 template<class StringCollection>
377 void strcoll_split(StringCollection &coll, WvStringParm s,
378 const WvRegex &regex, int limit = 0)
380 int start = 0;
381 int match_start, match_end;
382 int count = 0;
384 while ((limit == 0 || count < limit)
385 && regex.continuable_match(&s[start], match_start, match_end)
386 && match_end > 0)
388 WvString *substr = new WvString;
389 int len = match_start;
390 substr->setsize(len+1);
391 memcpy(substr->edit(), &s[start], len);
392 substr->edit()[len] = '\0';
393 coll.add(substr, true);
394 start += match_end;
395 ++count;
398 if (limit == 0 || count < limit)
400 WvString *last = new WvString(&s[start]);
401 last->unique();
402 coll.add(last, true);
405 #endif
409 * Concatenates all strings in a collection and returns the result.
410 * coll : the collection of strings to read from
411 * joinchars : the delimiter string to insert between strings
413 template<class StringCollection>
414 WvString strcoll_join(const StringCollection &coll,
415 const char *joinchars = " \t")
417 size_t joinlen = strlen(joinchars);
418 size_t totlen = 1;
419 typename StringCollection::Iter s(
420 const_cast<StringCollection&>(coll));
421 for (s.rewind(); s.next(); )
423 if (s->cstr())
424 totlen += strlen(s->cstr());
425 totlen += joinlen;
427 totlen -= joinlen; // no join chars at tail
429 WvString total;
430 total.setsize(totlen);
432 char *te = total.edit();
433 te[0] = 0;
434 bool first = true;
435 for (s.rewind(); s.next(); )
437 if (first)
438 first = false;
439 else
440 strcat(te, joinchars);
441 if (s->cstr())
442 strcat(te, s->cstr());
444 return total;
448 * Replace any instances of "a" with "b" in "s". Kind of like sed, only
449 * much dumber.
451 WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b);
453 /** Replace any consecutive instances of character c with a single one */
454 WvString undupe(WvStringParm s, char c);
456 /** Do gethostname() without a fixed-length buffer */
457 WvString hostname();
459 /** Get the fqdn of the local host, using gethostbyname() and gethostname() */
460 WvString fqdomainname();
462 /** Get the current working directory without a fixed-length buffer */
463 WvString wvgetcwd();
466 * Inserts SI-style spacing into a number
467 * (eg passing 9876543210 returns "9 876 543 210")
469 WvString metriculate(const off_t i);
472 * Returns everything in line (exclusively) after a.
473 * If a is not in line, "" is returned.
475 WvString afterstr(WvStringParm line, WvStringParm a);
478 * Returns everything in line (exclusively) before 'a'.
479 * If a is not in line, line is returned.
481 WvString beforestr(WvStringParm line, WvStringParm a);
484 * Returns the string of length len starting at pos in line.
485 * Error checking prevents seg fault.
486 * If pos > line.len()-1 return ""
487 * if pos+len > line.len() simply return from pos to end of line
489 WvString substr(WvString line, unsigned int pos, unsigned int len);
491 /**
492 * Removes any trailing punctuation ('.', '?', or '!') from the line, and
493 * returns it in a new string. Does not modify line.
495 WvString depunctuate(WvStringParm line);
497 // Converts a string in decimal to an arbitrary numeric type
498 template<class T>
499 bool wvstring_to_num(WvStringParm str, T &n)
501 bool neg = false;
502 n = 0;
504 for (const char *p = str; *p; ++p)
506 if (isdigit(*p))
508 n = n * T(10) + T(*p - '0');
510 else if ((const char *)str == p
511 && *p == '-')
513 neg = true;
515 else return false;
518 if (neg)
519 n = -n;
521 return true;
525 * Before using the C-style string escaping functions below, please consider
526 * using the functions in wvtclstring.h instead; they usualy lead to much more
527 * human readable and manageable results, and allow representation of
528 * lists of strings.
531 struct CStrExtraEscape
533 char ch;
534 const char *esc;
536 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
538 /// Converts data into a C-style string constant.
540 // If data is NULL, returns WvString::null; otherwise, returns an allocated
541 // WvString containing the C-style string constant that represents the data.
543 // All printable characters including space except " and \ are represented with
544 // escaping.
546 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
548 // All other characters are escaped in uppercase hex form, eg. \x9E
550 // The extra_escapes parameter allows for additional characters beyond
551 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
552 // escape { and } as \< and \>, which allows the resulting strings to be
553 // TCL-string coded without ridiculous double-escaping.
555 WvString cstr_escape(const void *data, size_t size,
556 const CStrExtraEscape extra_escapes[] = NULL);
558 /// Converts a C-style string constant into data.
560 // This function does *not* include the trailing null that a C compiler would --
561 // if you want this null, put \0 at the end of the C-style string
563 // If cstr is correctly formatted and max_size is large enough for the
564 // resulting data, returns true and size will equal the size of the
565 // resulting data. If data is not NULL it will contain this data.
567 // If cstr is correctly formatted but max_size is too small for the resulting
568 // data, returns false and size will equal the minimum value of min_size
569 // for this function to have returned true. If data is non-NULL it will
570 // contain the first max_size bytes of resulting data.
572 // If cstr is incorrectly formatted, returns false and size will equal 0.
574 // This functions works just as well on multiple, whitespace-separated
575 // C-style strings as well. This allows you to concatenate strings produced
576 // by cstr_escape, and the result of cstr_unescape will be the data blocks
577 // concatenated together. This implies that the empty string corresponds
578 // to a valid data block of length zero; however, a null string still returns
579 // an error.
581 // The extra_escapes parameter must match that used in the call to
582 // cstr_escape used to produce the escaped strings.
584 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
585 const CStrExtraEscape extra_escapes[] = NULL);
587 static inline bool is_int(const char *str)
589 if (!str)
590 return false;
592 if (*str == '-')
593 ++str;
595 if (!*str)
596 return false;
598 while (*str)
599 if (!isdigit(*str++))
600 return false;
602 return true;
605 /// Converts a pointer into a string, like glibc's %p formatter would
606 /// do.
607 WvString ptr2str(void* ptr);
609 #ifdef _WIN32
610 /* Calls CryptProtectData on a string, and returns a BASE64 encoded version
611 * of the encrypted data, suitable for entering into Uniconf or other use.
613 * Unless you are debugging Windows somehow and want to examine what the
614 * label on encrypted data is, it's perfectly fine to leave the 'description'
615 * parameter blank.
617 WvString wvprotectdata(WvStringParm data, WvStringParm description = "Data");
619 /* Accepts a BASE64 encoded string of encrypted data (encrypted via
620 * CryptProtectData, that is) and decrypts it and returns a WvString.
622 WvString wvunprotectdata(WvStringParm data);
623 #endif
625 #endif // __WVSTRUTILS_H