src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "cstringutil.h"
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44
  45 #include <cassert>
  46 #include <cctype>
  47 #include <cstring>
  48
  49 #include <sstream>
  50 #include <string>
  51 #include <vector>
  52
  53 #include "gromacs/utility/basedefinitions.h"
  54 #include "gromacs/utility/fatalerror.h"
  55 #include "gromacs/utility/futil.h"
  56 #include "gromacs/utility/smalloc.h"
  57 #include "gromacs/utility/sysinfo.h"
  58
  59 int continuing(char *s)
  60 {
  61     int sl;
  62     assert(s);
  63
  64     rtrim(s);
  65     sl = strlen(s);
  66     if ((sl > 0) && (s[sl-1] == CONTINUE))
  67     {
  68         s[sl-1] = 0;
  69         return TRUE;
  70     }
  71     else
  72     {
  73         return FALSE;
  74     }
  75 }
  76
  77
  78
  79 char *fgets2(char *line, int n, FILE *stream)
  80 {
  81     char *c;
  82     if (fgets(line, n, stream) == nullptr)
  83     {
  84         return nullptr;
  85     }
  86     if ((c = strchr(line, '\n')) != nullptr)
  87     {
  88         *c = '\0';
  89     }
  90     else
  91     {
  92         /* A line not ending in a newline can only occur at the end of a file,
  93          * or because of n being too small.
  94          * Since both cases occur very infrequently, we can check for EOF.
  95          */
  96         if (!feof(stream))
  97         {
  98             gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
  99         }
 100     }
 101     if ((c = strchr(line, '\r')) != nullptr)
 102     {
 103         *c = '\0';
 104     }
 105
 106     return line;
 107 }
 108
 109 void strip_comment (char *line)
 110 {
 111     char *c;
 112
 113     if (!line)
 114     {
 115         return;
 116     }
 117
 118     /* search for a comment mark and replace it by a zero */
 119     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 120     {
 121         (*c) = 0;
 122     }
 123 }
 124
 125 void upstring (char *str)
 126 {
 127     int i;
 128
 129     for (i = 0; (i < (int)strlen(str)); i++)
 130     {
 131         str[i] = toupper(str[i]);
 132     }
 133 }
 134
 135 void ltrim (char *str)
 136 {
 137     int   i, c;
 138
 139     if (nullptr == str)
 140     {
 141         return;
 142     }
 143
 144     c = 0;
 145     while (('\0' != str[c]) && isspace(str[c]))
 146     {
 147         c++;
 148     }
 149     if (c > 0)
 150     {
 151         for (i = c; ('\0' != str[i]); i++)
 152         {
 153             str[i-c] = str[i];
 154         }
 155         str[i-c] = '\0';
 156     }
 157 }
 158
 159 void rtrim (char *str)
 160 {
 161     int nul;
 162
 163     if (nullptr == str)
 164     {
 165         return;
 166     }
 167
 168     nul = strlen(str)-1;
 169     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 170     {
 171         str[nul] = '\0';
 172         nul--;
 173     }
 174 }
 175
 176 void trim (char *str)
 177 {
 178     ltrim (str);
 179     rtrim (str);
 180 }
 181
 182 void nice_header(FILE *out, const char *fn)
 183 {
 184     int            uid;
 185     char           userbuf[256];
 186     char           hostbuf[256];
 187     char           timebuf[STRLEN];
 188
 189     /* Print a nice header above the file */
 190     fprintf(out, "%c\n", COMMENTSIGN);
 191     fprintf(out, "%c\tFile '%s' was generated\n", COMMENTSIGN, fn ? fn : "unknown");
 192
 193     uid  = gmx_getuid();
 194     gmx_getusername(userbuf, 256);
 195     gmx_gethostname(hostbuf, 256);
 196     gmx_format_current_time(timebuf, STRLEN);
 197
 198     fprintf(out, "%c\tBy user: %s (%d)\n", COMMENTSIGN, userbuf, uid);
 199     fprintf(out, "%c\tOn host: %s\n", COMMENTSIGN, hostbuf);
 200     fprintf(out, "%c\tAt date: %s\n", COMMENTSIGN, timebuf);
 201     fprintf(out, "%c\n", COMMENTSIGN);
 202 }
 203
 204 int gmx_strcasecmp_min(const char *str1, const char *str2)
 205 {
 206     char ch1, ch2;
 207
 208     do
 209     {
 210         do
 211         {
 212             ch1 = toupper(*(str1++));
 213         }
 214         while ((ch1 == '-') || (ch1 == '_'));
 215         do
 216         {
 217             ch2 = toupper(*(str2++));
 218         }
 219         while ((ch2 == '-') || (ch2 == '_'));
 220
 221         if (ch1 != ch2)
 222         {
 223             return (ch1-ch2);
 224         }
 225     }
 226     while (ch1);
 227     return 0;
 228 }
 229
 230 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 231 {
 232     char  ch1, ch2;
 233     char *stri1, *stri2;
 234
 235     stri1 = (char *)str1;
 236     stri2 = (char *)str2;
 237     do
 238     {
 239         do
 240         {
 241             ch1 = toupper(*(str1++));
 242         }
 243         while ((ch1 == '-') || (ch1 == '_'));
 244         do
 245         {
 246             ch2 = toupper(*(str2++));
 247         }
 248         while ((ch2 == '-') || (ch2 == '_'));
 249
 250         if (ch1 != ch2)
 251         {
 252             return (ch1-ch2);
 253         }
 254     }
 255     while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 256     return 0;
 257 }
 258
 259 int gmx_strcasecmp(const char *str1, const char *str2)
 260 {
 261     char ch1, ch2;
 262
 263     do
 264     {
 265         ch1 = toupper(*(str1++));
 266         ch2 = toupper(*(str2++));
 267         if (ch1 != ch2)
 268         {
 269             return (ch1-ch2);
 270         }
 271     }
 272     while (ch1);
 273     return 0;
 274 }
 275
 276 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 277 {
 278     char ch1, ch2;
 279
 280     if (n == 0)
 281     {
 282         return 0;
 283     }
 284
 285     do
 286     {
 287         ch1 = toupper(*(str1++));
 288         ch2 = toupper(*(str2++));
 289         if (ch1 != ch2)
 290         {
 291             return (ch1-ch2);
 292         }
 293         n--;
 294     }
 295     while (ch1 && n);
 296     return 0;
 297 }
 298
 299 char *gmx_strdup(const char *src)
 300 {
 301     char *dest;
 302
 303     snew(dest, strlen(src)+1);
 304     strcpy(dest, src);
 305
 306     return dest;
 307 }
 308
 309 char *
 310 gmx_strndup(const char *src, int n)
 311 {
 312     int   len;
 313     char *dest;
 314
 315     len = strlen(src);
 316     if (len > n)
 317     {
 318         len = n;
 319     }
 320     snew(dest, len+1);
 321     strncpy(dest, src, len);
 322     dest[len] = 0;
 323     return dest;
 324 }
 325
 326 /* Magic hash init number for Dan J. Bernsteins algorithm.
 327  * Do NOT use any other value unless you really know what you are doing.
 328  */
 329 const unsigned int
 330     gmx_string_hash_init = 5381;
 331
 332
 333 unsigned int
 334 gmx_string_fullhash_func(const char *s, unsigned int hash_init)
 335 {
 336     int c;
 337
 338     while ((c = (*s++)) != '\0')
 339     {
 340         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 341     }
 342     return hash_init;
 343 }
 344
 345 unsigned int
 346 gmx_string_hash_func(const char *s, unsigned int hash_init)
 347 {
 348     int c;
 349
 350     while ((c = toupper(*s++)) != '\0')
 351     {
 352         if (isalnum(c))
 353         {
 354             hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 355         }
 356     }
 357     return hash_init;
 358 }
 359
 360 int
 361 gmx_wcmatch(const char *pattern, const char *str)
 362 {
 363     while (*pattern)
 364     {
 365         if (*pattern == '*')
 366         {
 367             /* Skip multiple wildcards in a sequence */
 368             while (*pattern == '*' || *pattern == '?')
 369             {
 370                 ++pattern;
 371                 /* For ?, we need to check that there are characters left
 372                  * in str. */
 373                 if (*pattern == '?')
 374                 {
 375                     if (*str == 0)
 376                     {
 377                         return GMX_NO_WCMATCH;
 378                     }
 379                     else
 380                     {
 381                         ++str;
 382                     }
 383                 }
 384             }
 385             /* If the pattern ends after the star, we have a match */
 386             if (*pattern == 0)
 387             {
 388                 return 0;
 389             }
 390             /* Match the rest against each possible suffix of str */
 391             while (*str)
 392             {
 393                 /* Only do the recursive call if the first character
 394                  * matches. We don't have to worry about wildcards here,
 395                  * since we have processed them above. */
 396                 if (*pattern == *str)
 397                 {
 398                     int rc;
 399                     /* Match the suffix, and return if a match or an error */
 400                     rc = gmx_wcmatch(pattern, str);
 401                     if (rc != GMX_NO_WCMATCH)
 402                     {
 403                         return rc;
 404                     }
 405                 }
 406                 ++str;
 407             }
 408             /* If no suffix of str matches, we don't have a match */
 409             return GMX_NO_WCMATCH;
 410         }
 411         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 412         {
 413             ++str;
 414         }
 415         else
 416         {
 417             return GMX_NO_WCMATCH;
 418         }
 419         ++pattern;
 420     }
 421     /* When the pattern runs out, we have a match if the string has ended. */
 422     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 423 }
 424
 425 char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 426 {
 427     char    *b2;
 428     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 429     gmx_bool bFirst, bFitsOnLine;
 430
 431     /* characters are copied from buf to b2 with possible spaces changed
 432      * into newlines and extra space added for indentation.
 433      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 434      * i0 points to the beginning of the current line (in buf, source)
 435      * lspace and l2space point to the last space on the current line
 436      * bFirst is set to prevent indentation of first line
 437      * bFitsOnLine says if the first space occurred before line_width, if
 438      * that is not the case, we have a word longer than line_width which
 439      * will also not fit on the next line, so we might as well keep it on
 440      * the current line (where it also won't fit, but looks better)
 441      */
 442
 443     b2    = nullptr;
 444     b2len = strlen(buf)+1+indent;
 445     snew(b2, b2len);
 446     i0 = i2 = 0;
 447     if (bIndentFirst)
 448     {
 449         for (i2 = 0; (i2 < indent); i2++)
 450         {
 451             b2[i2] = ' ';
 452         }
 453     }
 454     bFirst = TRUE;
 455     do
 456     {
 457         l2space = -1;
 458         /* find the last space before end of line */
 459         for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 460         {
 461             b2[i2++] = buf[i];
 462             /* remember the position of a space */
 463             if (buf[i] == ' ')
 464             {
 465                 lspace  = i;
 466                 l2space = i2-1;
 467             }
 468             /* if we have a newline before the line is full, reset counters */
 469             if (buf[i] == '\n' && buf[i+1])
 470             {
 471                 i0     = i+1;
 472                 b2len += indent;
 473                 srenew(b2, b2len);
 474                 /* add indentation after the newline */
 475                 for (j = 0; (j < indent); j++)
 476                 {
 477                     b2[i2++] = ' ';
 478                 }
 479             }
 480         }
 481         /* If we are at the last newline, copy it */
 482         if (buf[i] == '\n' && !buf[i+1])
 483         {
 484             b2[i2++] = buf[i++];
 485         }
 486         /* if we're not at the end of the string */
 487         if (buf[i])
 488         {
 489             /* check if one word does not fit on the line */
 490             bFitsOnLine = (i-i0 <= line_width);
 491             /* reset line counters to just after the space */
 492             i0 = lspace+1;
 493             i2 = l2space+1;
 494             /* if the words fit on the line, and we're beyond the indentation part */
 495             if ( (bFitsOnLine) && (l2space >= indent) )
 496             {
 497                 /* start a new line */
 498                 b2[l2space] = '\n';
 499                 /* and add indentation */
 500                 if (indent)
 501                 {
 502                     if (bFirst)
 503                     {
 504                         line_width -= indent;
 505                         bFirst      = FALSE;
 506                     }
 507                     b2len += indent;
 508                     srenew(b2, b2len);
 509                     for (j = 0; (j < indent); j++)
 510                     {
 511                         b2[i2++] = ' ';
 512                     }
 513                     /* no extra spaces after indent; */
 514                     while (buf[i0] == ' ')
 515                     {
 516                         i0++;
 517                     }
 518                 }
 519             }
 520         }
 521     }
 522     while (buf[i]);
 523     b2[i2] = '\0';
 524
 525     return b2;
 526 }
 527
 528 gmx_int64_t
 529 str_to_int64_t(const char *str, char **endptr)
 530 {
 531 #ifndef _MSC_VER
 532     return strtoll(str, endptr, 10);
 533 #else
 534     return _strtoi64(str, endptr, 10);
 535 #endif
 536 }
 537
 538 char *gmx_step_str(gmx_int64_t i, char *buf)
 539 {
 540     sprintf(buf, "%" GMX_PRId64, i);
 541     return buf;
 542 }
 543
 544 void parse_digits_from_string(const char *digitstring, int *ndigits, int **digitlist)
 545 {
 546     /* TODO use std::string, once gmx_gpu_opt_t is ready for it */
 547     if (nullptr == digitstring)
 548     {
 549         *ndigits   = 0;
 550         *digitlist = nullptr;
 551         return;
 552     }
 553
 554     if (strstr(digitstring, ",") != nullptr)
 555     {
 556         parse_digits_from_csv_string(digitstring, ndigits, digitlist);
 557     }
 558     else
 559     {
 560         parse_digits_from_plain_string(digitstring, ndigits, digitlist);
 561     }
 562 }
 563
 564 void parse_digits_from_plain_string(const char *digitstring, int *ndigits, int **digitlist)
 565 {
 566     int i;
 567
 568     if (nullptr == digitstring)
 569     {
 570         *ndigits   = 0;
 571         *digitlist = nullptr;
 572         return;
 573     }
 574
 575     *ndigits = strlen(digitstring);
 576
 577     snew(*digitlist, *ndigits);
 578
 579     for (i = 0; i < *ndigits; i++)
 580     {
 581         if (digitstring[i] < '0' || digitstring[i] > '9')
 582         {
 583             gmx_fatal(FARGS, "Invalid character in digit-only string: '%c'\n",
 584                       digitstring[i]);
 585         }
 586         (*digitlist)[i] = digitstring[i] - '0';
 587     }
 588 }
 589
 590 void parse_digits_from_csv_string(const char *digitstring, int *ndigits, int **digitlist)
 591 {
 592     if (nullptr == digitstring)
 593     {
 594         *ndigits   = 0;
 595         *digitlist = nullptr;
 596         return;
 597     }
 598
 599     std::vector<int>   digits;
 600     std::istringstream ss(digitstring);
 601     std::string        token;
 602     while (std::getline(ss, token, ','))
 603     {
 604         if (token.find_first_not_of("0123456789") != std::string::npos)
 605         {
 606             gmx_fatal(FARGS, "Invalid token in digit-only string: \"%s\"\n",
 607                       token.c_str());
 608         }
 609         int number = static_cast<int>(str_to_int64_t(token.c_str(), nullptr));
 610         digits.push_back(number);
 611     }
 612
 613     *ndigits = digits.size();
 614     snew(*digitlist, *ndigits);
 615     for (size_t i = 0; i < digits.size(); i++)
 616     {
 617         (*digitlist)[i] = digits[i];
 618     }
 619 }