include/wvstrutils.h

   1 /* -*- Mode: C++ -*-
   2  * Worldvisions Weaver Software:
   3  *   Copyright (C) 1997-2002 Net Integration Technologies, Inc.
   4  *
   5  * Various little string functions...
   6  *
   7  * FIXME: and some other assorted crap that belongs anywhere but here.
   8  */
   9 #ifndef __WVSTRUTILS_H
  10 #define __WVSTRUTILS_H
  11
  12 #include <sys/types.h> // for off_t
  13 #include <time.h>
  14 #include <ctype.h>
  15 #include "wvstring.h"
  16 #include "wvstringlist.h"
  17 #include "wvhex.h"
  18 #ifndef _WIN32
  19 #include "wvregex.h"
  20 #endif
  21
  22 /** \file
  23  * Various little string functions
  24  */
  25
  26
  27 /**
  28  * Add character c to the end of a string after removing
  29  * terminating carriage returns/linefeeds if any.
  30  *
  31  * You need a buffer that's at least one character bigger than the
  32  * current length of the string, including the terminating NULL.
  33  */
  34 char *terminate_string(char *string, char c);
  35
  36 /**
  37  * Trims whitespace from the beginning and end of the character string,
  38  * including carriage return / linefeed characters. Modifies the string
  39  * in place. Returns the new first character of the string, which points
  40  * either at 'string' itself or some character contained therein.
  41  *
  42  * string is allowed to be NULL; returns NULL in that case.
  43  */
  44 char *trim_string(char *string);
  45
  46 /**
  47  * Similar to above, but trims the string starting at the first occurrence of
  48  * c.
  49  */
  50 char *trim_string(char *string, char c);
  51
  52 /**
  53  * return the string formed by concatenating string 'a' and string 'b' with
  54  * the 'sep' character between them.  For example,
  55  *     spacecat("xx", "yy", ";");
  56  * returns "xx;yy", and
  57  *    spacecat("xx;;", "yy", ";")
  58  * returns "xx;;;yy", and
  59  *    spacecat("xx;;", "yy", ";", true)
  60  * returns "xx;yy".
  61  *
  62  * This function is much faster than the more obvious WvString("%s;%s", a, b),
  63  * so it's useful when you're producing a *lot* of string data.
  64  */
  65 WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
  66                   bool onesep = false);
  67
  68
  69 /**
  70  * Replaces all whitespace characters in the string with non-breaking spaces
  71  * (&nbsp;) for use with web stuff.
  72  */
  73 char *non_breaking(const char *string);
  74
  75 /**
  76  * Replace all instances of c1 with c2 for the first 'length' characters in
  77  * 'string'. Ignores terminating NULL, so make sure you set 'length' correctly.
  78  */
  79 void replace_char(void *string, char c1, char c2, int length);
  80
  81 /**
  82  * Snip off the first part of 'haystack' if it consists of 'needle'.
  83  */
  84 char *snip_string(char *haystack, char *needle);
  85
  86 #ifndef _WIN32
  87 /**
  88  * In-place modify a character string so that all contained letters are
  89  * in lower case. Returns 'string'.
  90  */
  91 char *strlwr(char *string);
  92
  93 /**
  94  * In-place modify a character string so that all contained letters are
  95  * in upper case. Returns 'string'.
  96  */
  97 char *strupr(char *string);
  98
  99 #endif
 100
 101 /** Returns true if all characters in 'string' are isalnum() (alphanumeric). */
 102 bool is_word(const char *string);
 103
 104 /**
 105  * Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
 106  * It is formatted with 16 bytes per line; each line has an address offset,
 107  * hex representation, and printable representation.
 108  *
 109  * This is used mostly for debugging purposes. You can send the returned
 110  * WvString object directly to a WvLog or any other WvStream for output.
 111  */
 112 WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
 113
 114 /**
 115  * Returns true if 'c' is a newline or carriage return character.
 116  * Increases code readability a bit.
 117  */
 118 bool isnewline(char c);
 119
 120 /**
 121  * Converts escaped characters (things like %20 etc.) from web URLS
 122  * into their normal ASCII representations. If you happen to be
 123  * decoding PEM encoded stuff,or anything that has + signs in it that
 124  * you don't want encoded as spaces, then set no_space to true, and
 125  * it should "just work" for you.
 126  */
 127 WvString url_decode(WvStringParm str, bool no_space = false);
 128
 129
 130 /**
 131  * Converts all those pesky spaces, colons, and other nasties into nice
 132  * unreadable Quasi-Unicode codes
 133  */
 134 WvString url_encode(WvStringParm str);
 135
 136
 137 /**
 138  * Returns the difference between to dates in a human readable format
 139  */
 140 WvString  diff_dates(time_t t1, time_t t2);
 141
 142
 143 /**
 144  * Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of
 145  * the current time.
 146  */
 147 WvString rfc822_date(time_t _when = -1);
 148
 149 /** Returns an RFC1123-compatible date made out of _when */
 150 WvString rfc1123_date(time_t _when);
 151
 152 /** Return the local date (TZ applied) out of _when */
 153 WvString local_date(time_t _when = -1);
 154
 155 /** Return the local time (in format of ISO 8601) out of _when */
 156 WvString intl_time(time_t _when = -1);
 157
 158 /** Return the local date (in format of ISO 8601) out of _when */
 159 WvString intl_date(time_t _when = -1);
 160
 161 /** Return the local date and time (in format of ISO 8601) out of _when */
 162 WvString intl_datetime(time_t _when = -1);
 163
 164 time_t intl_gmtoff(time_t t);
 165
 166 #ifndef _WIN32
 167 /**
 168  * Similar to crypt(), but this randomly selects its own salt.
 169  * This function is defined in strcrypt.cc.  It chooses to use the DES
 170  * engine.
 171  */
 172 WvString passwd_crypt(const char *str);
 173
 174 #endif
 175 /**
 176  * Similar to crypt(), but this randomly selects its own salt.
 177  * This function is defined in strcrypt.cc.  It chooses to use the MD5
 178  * engine.
 179  */
 180 WvString passwd_md5(const char *str);
 181
 182 /**
 183  * Returns a string with a backslash in front of every non alphanumeric
 184  * character in s1.
 185  */
 186 WvString backslash_escape(WvStringParm s1);
 187
 188 /** How many times does 'c' occur in "s"? */
 189 int strcount(WvStringParm s, const char c);
 190
 191 /**
 192  * Example: encode_hostname_as_DN("www.fizzle.com")
 193  * will result in dc=www,dc=fizzle,dc=com,cn=www.fizzle.com
 194  */
 195 WvString encode_hostname_as_DN(WvStringParm hostname);
 196
 197 /**
 198  * Given a hostname, turn it into a "nice" one.  It has to start with a
 199  * letter/number, END with a letter/number, have underscores converted to
 200  * hyphens, and have no more than one hyphen in a row.  If we can't do this
 201  * and have any sort of answer, return "UNKNOWN".
 202  */
 203 WvString nice_hostname(WvStringParm name);
 204
 205 /**
 206  * Take a full path/file name and splits it up into respective pathname and
 207  * filename. This can also be useful for splitting the toplevel directory off a
 208  * path.
 209  */
 210 WvString getfilename(WvStringParm fullname);
 211 WvString getdirname(WvStringParm fullname);
 212
 213 /*
 214  * Possible rounding methods for numbers -- remember from school?
 215  */
 216 enum RoundingMethod
 217 {
 218     ROUND_DOWN,
 219     ROUND_DOWN_AT_POINT_FIVE,
 220     ROUND_UP_AT_POINT_FIVE,
 221     ROUND_UP
 222 };
 223
 224 /**
 225  * Given a number of blocks and a blocksize (default==1 byte), return a
 226  * WvString containing a human-readable representation of blocks*blocksize.
 227  * This function uses SI prefixes.
 228  */
 229 WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
 230                  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
 231
 232 /**
 233  * Given a size in kilobyes, return a human readable size.
 234  * This function uses SI prefixes (1 MB = 1 000 KB = 1 000 000 B).
 235  */
 236 WvString sizektoa(unsigned long long kbytes,
 237                   RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
 238
 239 /**
 240  * Given a number of blocks and a blocksize (default==1 byte), return a
 241  * WvString containing a human-readable representation of blocks*blocksize.
 242  * This function uses IEC prefixes.
 243  */
 244 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
 245                   RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
 246
 247 /**
 248  * Given a size in kilobytes, return a human readable size.
 249  * This function uses IEC prefixes.
 250  */
 251 WvString sizekitoa(unsigned long long kbytes,
 252                    RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
 253
 254 /** Given a number of seconds, returns a formatted human-readable string
 255  * saying how long the period is.
 256  */
 257 WvString secondstoa(unsigned int total_seconds);
 258
 259 /**
 260  * Finds a string in an array and returns its index.
 261  * Returns -1 if not found.
 262  */
 263 int lookup(const char *str, const char * const *table,
 264     bool case_sensitive = false);
 265
 266 /**
 267  * Splits a string and adds each substring to a collection.
 268  *   coll       : the collection of strings to add to
 269  *   _s         : the string to split
 270  *   splitchars : the set of delimiter characters
 271  *   limit      : the maximum number of elements to split
 272  */
 273 template<class StringCollection>
 274 void strcoll_split(StringCollection &coll, WvStringParm _s,
 275     const char *splitchars = " \t", int limit = 0)
 276 {
 277     WvString s(_s);
 278     char *sptr = s.edit(), *eptr, oldc;
 279
 280     // Simple if statement to catch (and add) empty (but not NULL) strings.
 281     if (sptr && !*sptr )
 282     {
 283         WvString *emptyString = new WvString("");
 284         coll.add(emptyString, true);
 285     }
 286
 287     // Needed to catch delimeters at the beginning of the string.
 288     bool firstrun = true;
 289
 290     while (sptr && *sptr)
 291     {
 292         --limit;
 293
 294         if (firstrun)
 295         {
 296             firstrun = false;
 297         }
 298         else
 299         {
 300             sptr += strspn(sptr, splitchars);
 301         }
 302
 303         if (limit)
 304         {
 305             eptr = sptr + strcspn(sptr, splitchars);
 306         }
 307         else
 308         {
 309             eptr = sptr + strlen(sptr);
 310         }
 311
 312         oldc = *eptr;
 313         *eptr = 0;
 314
 315         WvString *newstr = new WvString(sptr);
 316         coll.add(newstr, true);
 317
 318         *eptr = oldc;
 319         sptr = eptr;
 320     }
 321 }
 322
 323
 324 /**
 325  * Splits a string and adds each substring to a collection.
 326  *   this behaves differently in that it actually delimits the
 327  *   pieces as fields and returns them, it doesn't treat multiple
 328  *   delimeters as one and skip them.
 329  *
 330  *   ie., parm1::parm2 -> 'parm1','','parm2' when delimited with ':'
 331  *
 332  *   coll       : the collection of strings to add to
 333  *   _s         : the string to split
 334  *   splitchars : the set of delimiter characters
 335  *   limit      : the maximum number of elements to split
 336  */
 337 template<class StringCollection>
 338 void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
 339     const char *splitchars = " \t", int limit = 0)
 340 {
 341     WvString s(_s);
 342     char *cur = s.edit();
 343
 344     if (!cur) return;
 345
 346     for (;;)
 347     {
 348         --limit;
 349         if (!limit)
 350         {
 351             coll.add(new WvString(cur), true);
 352             break;
 353         }
 354
 355         int len = strcspn(cur, splitchars);
 356
 357         char tmp = cur[len];
 358         cur[len] = 0;
 359         coll.add(new WvString(cur), true);
 360         cur[len] = tmp;
 361
 362         if (!cur[len]) break;
 363         cur += len + 1;
 364     }
 365 }
 366
 367
 368 #ifndef _WIN32 // don't have regex on win32
 369 /**
 370  * Splits a string and adds each substring to a collection.
 371  *   coll       : the collection of strings to add to
 372  *   _s         : the string to split
 373  *   splitchars : the set of delimiter characters
 374  *   limit      : the maximum number of elements to split
 375  */
 376 template<class StringCollection>
 377 void strcoll_split(StringCollection &coll, WvStringParm s,
 378     const WvRegex &regex, int limit = 0)
 379 {
 380     int start = 0;
 381     int match_start, match_end;
 382     int count = 0;
 383
 384     while ((limit == 0 || count < limit)
 385             && regex.continuable_match(&s[start], match_start, match_end)
 386             && match_end > 0)
 387     {
 388         WvString *substr = new WvString;
 389         int len = match_start;
 390         substr->setsize(len+1);
 391         memcpy(substr->edit(), &s[start], len);
 392         substr->edit()[len] = '\0';
 393         coll.add(substr, true);
 394         start += match_end;
 395         ++count;
 396     }
 397
 398     if (limit == 0 || count < limit)
 399     {
 400         WvString *last = new WvString(&s[start]);
 401         last->unique();
 402         coll.add(last, true);
 403     }
 404 }
 405 #endif
 406
 407
 408 /**
 409  * Concatenates all strings in a collection and returns the result.
 410  *   coll      : the collection of strings to read from
 411  *   joinchars : the delimiter string to insert between strings
 412  */
 413 template<class StringCollection>
 414 WvString strcoll_join(const StringCollection &coll,
 415     const char *joinchars = " \t")
 416 {
 417     size_t joinlen = strlen(joinchars);
 418     size_t totlen = 1;
 419     typename StringCollection::Iter s(
 420         const_cast<StringCollection&>(coll));
 421     for (s.rewind(); s.next(); )
 422     {
 423         if (s->cstr())
 424             totlen += strlen(s->cstr());
 425         totlen += joinlen;
 426     }
 427     totlen -= joinlen; // no join chars at tail
 428
 429     WvString total;
 430     total.setsize(totlen);
 431
 432     char *te = total.edit();
 433     te[0] = 0;
 434     bool first = true;
 435     for (s.rewind(); s.next(); )
 436     {
 437         if (first)
 438             first = false;
 439         else
 440             strcat(te, joinchars);
 441         if (s->cstr())
 442             strcat(te, s->cstr());
 443     }
 444     return total;
 445 }
 446
 447 /**
 448  * Replace any instances of "a" with "b" in "s".  Kind of like sed, only
 449  * much dumber.
 450  */
 451 WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b);
 452
 453 /** Replace any consecutive instances of character c with a single one */
 454 WvString undupe(WvStringParm s, char c);
 455
 456 /** Do gethostname() without a fixed-length buffer */
 457 WvString hostname();
 458
 459 /** Get the fqdn of the local host, using gethostbyname() and gethostname() */
 460 WvString fqdomainname();
 461
 462 /** Get the current working directory without a fixed-length buffer */
 463 WvString wvgetcwd();
 464
 465 /**
 466  * Inserts SI-style spacing into a number
 467  * (eg passing 9876543210 returns "9 876 543 210")
 468  */
 469 WvString metriculate(const off_t i);
 470
 471 /**
 472  * Returns everything in line (exclusively) after a.
 473  * If a is not in line, "" is returned.
 474  */
 475 WvString afterstr(WvStringParm line, WvStringParm a);
 476
 477 /**
 478  * Returns everything in line (exclusively) before 'a'.
 479  * If a is not in line, line is returned.
 480  */
 481 WvString beforestr(WvStringParm line, WvStringParm a);
 482
 483 /**
 484  * Returns the string of length len starting at pos in line.
 485  * Error checking prevents seg fault.
 486  * If pos > line.len()-1 return ""
 487  * if pos+len > line.len() simply return from pos to end of line
 488  */
 489 WvString substr(WvString line, unsigned int pos, unsigned int len);
 490
 491 /**
 492  * Removes any trailing punctuation ('.', '?', or '!') from the line, and
 493  * returns it in a new string.  Does not modify line.
 494  */
 495 WvString depunctuate(WvStringParm line);
 496
 497 // Converts a string in decimal to an arbitrary numeric type
 498 template<class T>
 499 bool wvstring_to_num(WvStringParm str, T &n)
 500 {
 501     bool neg = false;
 502     n = 0;
 503
 504     for (const char *p = str; *p; ++p)
 505     {
 506         if (isdigit(*p))
 507         {
 508             n = n * T(10) + T(*p - '0');
 509         }
 510         else if ((const char *)str == p
 511                 && *p == '-')
 512         {
 513             neg = true;
 514         }
 515         else return false;
 516     }
 517
 518     if (neg)
 519         n = -n;
 520
 521     return true;
 522 }
 523
 524 /*
 525  * Before using the C-style string escaping functions below, please consider
 526  * using the functions in wvtclstring.h instead; they usualy lead to much more
 527  * human readable and manageable results, and allow representation of
 528  * lists of strings.
 529  */
 530
 531 struct CStrExtraEscape
 532 {
 533     char ch;
 534     const char *esc;
 535 };
 536 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
 537
 538 /// Converts data into a C-style string constant.
 539 //
 540 // If data is NULL, returns WvString::null; otherwise, returns an allocated
 541 // WvString containing the C-style string constant that represents the data.
 542 //
 543 // All printable characters including space except " and \ are represented with
 544 // escaping.
 545 //
 546 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
 547 //
 548 // All other characters are escaped in uppercase hex form, eg. \x9E
 549 //
 550 // The extra_escapes parameter allows for additional characters beyond
 551 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
 552 // escape { and } as \< and \>, which allows the resulting strings to be
 553 // TCL-string coded without ridiculous double-escaping.
 554 //
 555 WvString cstr_escape(const void *data, size_t size,
 556         const CStrExtraEscape extra_escapes[] = NULL);
 557
 558 /// Converts a C-style string constant into data.
 559 //
 560 // This function does *not* include the trailing null that a C compiler would --
 561 //   if you want this null, put \0 at the end of the C-style string
 562 //
 563 // If cstr is correctly formatted and max_size is large enough for the
 564 // resulting data, returns true and size will equal the size of the
 565 // resulting data.  If data is not NULL it will contain this data.
 566 //
 567 // If cstr is correctly formatted but max_size is too small for the resulting
 568 // data, returns false and size will equal the minimum value of min_size
 569 // for this function to have returned true.  If data is non-NULL it will
 570 // contain the first max_size bytes of resulting data.
 571 //
 572 // If cstr is incorrectly formatted, returns false and size will equal 0.
 573 //
 574 // This functions works just as well on multiple, whitespace-separated
 575 // C-style strings as well.  This allows you to concatenate strings produced
 576 // by cstr_escape, and the result of cstr_unescape will be the data blocks
 577 // concatenated together.  This implies that the empty string corresponds
 578 // to a valid data block of length zero; however, a null string still returns
 579 // an error.
 580 //
 581 // The extra_escapes parameter must match that used in the call to
 582 // cstr_escape used to produce the escaped strings.
 583 //
 584 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
 585         const CStrExtraEscape extra_escapes[] = NULL);
 586
 587 static inline bool is_int(const char *str)
 588 {
 589     if (!str)
 590         return false;
 591
 592     if (*str == '-')
 593         ++str;
 594
 595     if (!*str)
 596         return false;
 597
 598     while (*str)
 599         if (!isdigit(*str++))
 600             return false;
 601
 602     return true;
 603 }
 604
 605 /// Converts a pointer into a string, like glibc's %p formatter would
 606 /// do.
 607 WvString ptr2str(void* ptr);
 608
 609 #ifdef _WIN32
 610 /* Calls CryptProtectData on a string, and returns a BASE64 encoded version
 611  * of the encrypted data, suitable for entering into Uniconf or other use.
 612  *
 613  * Unless you are debugging Windows somehow and want to examine what the
 614  * label on encrypted data is, it's perfectly fine to leave the 'description'
 615  * parameter blank.
 616  */
 617 WvString wvprotectdata(WvStringParm data, WvStringParm description = "Data");
 618
 619 /* Accepts a BASE64 encoded string of encrypted data (encrypted via
 620  * CryptProtectData, that is) and decrypts it and returns a WvString.
 621  */
 622 WvString wvunprotectdata(WvStringParm data);
 623 #endif
 624
 625 #endif // __WVSTRUTILS_H