src/gromacs/utility/cstringutil.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
   7  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   8  * and including many others, as listed in the AUTHORS file in the
   9  * top-level source directory and at http://www.gromacs.org.
  10  *
  11  * GROMACS is free software; you can redistribute it and/or
  12  * modify it under the terms of the GNU Lesser General Public License
  13  * as published by the Free Software Foundation; either version 2.1
  14  * of the License, or (at your option) any later version.
  15  *
  16  * GROMACS is distributed in the hope that it will be useful,
  17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  19  * Lesser General Public License for more details.
  20  *
  21  * You should have received a copy of the GNU Lesser General Public
  22  * License along with GROMACS; if not, see
  23  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  24  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  25  *
  26  * If you want to redistribute modifications to GROMACS, please
  27  * consider that scientific software is very special. Version
  28  * control is crucial - bugs must be traceable. We will be happy to
  29  * consider code for inclusion in the official distribution, but
  30  * derived work must not be called official GROMACS. Details are found
  31  * in the README & COPYING files - if they are missing, get the
  32  * official version at http://www.gromacs.org.
  33  *
  34  * To help us fund GROMACS development, we humbly ask that you cite
  35  * the research papers on the package. Check out http://www.gromacs.org.
  36  */
  37 /* This file is completely threadsafe - keep it that way! */
  38 #include "gmxpre.h"
  39
  40 #include "cstringutil.h"
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44
  45 #include <cassert>
  46 #include <cctype>
  47 #include <cstring>
  48
  49 #include <string>
  50 #include <vector>
  51
  52 #include "gromacs/utility/basedefinitions.h"
  53 #include "gromacs/utility/fatalerror.h"
  54 #include "gromacs/utility/futil.h"
  55 #include "gromacs/utility/smalloc.h"
  56
  57 //! Comment sign to use.
  58 #define COMMENTSIGN ';'
  59
  60 int continuing(char *s)
  61 {
  62     int sl;
  63     assert(s);
  64
  65     rtrim(s);
  66     sl = strlen(s);
  67     if ((sl > 0) && (s[sl-1] == CONTINUE))
  68     {
  69         s[sl-1] = 0;
  70         return TRUE;
  71     }
  72     else
  73     {
  74         return FALSE;
  75     }
  76 }
  77
  78
  79
  80 char *fgets2(char *line, int n, FILE *stream)
  81 {
  82     char *c;
  83     if (fgets(line, n, stream) == nullptr)
  84     {
  85         return nullptr;
  86     }
  87     if ((c = strchr(line, '\n')) != nullptr)
  88     {
  89         *c = '\0';
  90     }
  91     else
  92     {
  93         /* A line not ending in a newline can only occur at the end of a file,
  94          * or because of n being too small.
  95          * Since both cases occur very infrequently, we can check for EOF.
  96          */
  97         if (!feof(stream))
  98         {
  99             gmx_fatal(FARGS, "An input file contains a line longer than %d characters, while the buffer passed to fgets2 has size %d. The line starts with: '%20.20s'", n, n, line);
 100         }
 101     }
 102     if ((c = strchr(line, '\r')) != nullptr)
 103     {
 104         *c = '\0';
 105     }
 106
 107     return line;
 108 }
 109
 110 void strip_comment (char *line)
 111 {
 112     char *c;
 113
 114     if (!line)
 115     {
 116         return;
 117     }
 118
 119     /* search for a comment mark and replace it by a zero */
 120     if ((c = strchr(line, COMMENTSIGN)) != nullptr)
 121     {
 122         (*c) = 0;
 123     }
 124 }
 125
 126 void upstring (char *str)
 127 {
 128     int i;
 129
 130     for (i = 0; (i < (int)strlen(str)); i++)
 131     {
 132         str[i] = toupper(str[i]);
 133     }
 134 }
 135
 136 void ltrim (char *str)
 137 {
 138     int   i, c;
 139
 140     if (nullptr == str)
 141     {
 142         return;
 143     }
 144
 145     c = 0;
 146     while (('\0' != str[c]) && isspace(str[c]))
 147     {
 148         c++;
 149     }
 150     if (c > 0)
 151     {
 152         for (i = c; ('\0' != str[i]); i++)
 153         {
 154             str[i-c] = str[i];
 155         }
 156         str[i-c] = '\0';
 157     }
 158 }
 159
 160 void rtrim (char *str)
 161 {
 162     int nul;
 163
 164     if (nullptr == str)
 165     {
 166         return;
 167     }
 168
 169     nul = strlen(str)-1;
 170     while ((nul > 0) && ((str[nul] == ' ') || (str[nul] == '\t')) )
 171     {
 172         str[nul] = '\0';
 173         nul--;
 174     }
 175 }
 176
 177 void trim (char *str)
 178 {
 179     ltrim (str);
 180     rtrim (str);
 181 }
 182
 183 int gmx_strcasecmp_min(const char *str1, const char *str2)
 184 {
 185     char ch1, ch2;
 186
 187     do
 188     {
 189         do
 190         {
 191             ch1 = toupper(*(str1++));
 192         }
 193         while ((ch1 == '-') || (ch1 == '_'));
 194         do
 195         {
 196             ch2 = toupper(*(str2++));
 197         }
 198         while ((ch2 == '-') || (ch2 == '_'));
 199
 200         if (ch1 != ch2)
 201         {
 202             return (ch1-ch2);
 203         }
 204     }
 205     while (ch1);
 206     return 0;
 207 }
 208
 209 int gmx_strncasecmp_min(const char *str1, const char *str2, int n)
 210 {
 211     char  ch1, ch2;
 212     char *stri1, *stri2;
 213
 214     stri1 = (char *)str1;
 215     stri2 = (char *)str2;
 216     do
 217     {
 218         do
 219         {
 220             ch1 = toupper(*(str1++));
 221         }
 222         while ((ch1 == '-') || (ch1 == '_'));
 223         do
 224         {
 225             ch2 = toupper(*(str2++));
 226         }
 227         while ((ch2 == '-') || (ch2 == '_'));
 228
 229         if (ch1 != ch2)
 230         {
 231             return (ch1-ch2);
 232         }
 233     }
 234     while (ch1 && (str1-stri1 < n) && (str2-stri2 < n));
 235     return 0;
 236 }
 237
 238 int gmx_strcasecmp(const char *str1, const char *str2)
 239 {
 240     char ch1, ch2;
 241
 242     do
 243     {
 244         ch1 = toupper(*(str1++));
 245         ch2 = toupper(*(str2++));
 246         if (ch1 != ch2)
 247         {
 248             return (ch1-ch2);
 249         }
 250     }
 251     while (ch1);
 252     return 0;
 253 }
 254
 255 int gmx_strncasecmp(const char *str1, const char *str2, int n)
 256 {
 257     char ch1, ch2;
 258
 259     if (n == 0)
 260     {
 261         return 0;
 262     }
 263
 264     do
 265     {
 266         ch1 = toupper(*(str1++));
 267         ch2 = toupper(*(str2++));
 268         if (ch1 != ch2)
 269         {
 270             return (ch1-ch2);
 271         }
 272         n--;
 273     }
 274     while (ch1 && n);
 275     return 0;
 276 }
 277
 278 char *gmx_strdup(const char *src)
 279 {
 280     char *dest;
 281
 282     snew(dest, strlen(src)+1);
 283     strcpy(dest, src);
 284
 285     return dest;
 286 }
 287
 288 char *
 289 gmx_strndup(const char *src, int n)
 290 {
 291     int   len;
 292     char *dest;
 293
 294     len = strlen(src);
 295     if (len > n)
 296     {
 297         len = n;
 298     }
 299     snew(dest, len+1);
 300     strncpy(dest, src, len);
 301     dest[len] = 0;
 302     return dest;
 303 }
 304
 305 /* Magic hash init number for Dan J. Bernsteins algorithm.
 306  * Do NOT use any other value unless you really know what you are doing.
 307  */
 308 const unsigned int
 309     gmx_string_hash_init = 5381;
 310
 311
 312 unsigned int
 313 gmx_string_fullhash_func(const char *s, unsigned int hash_init)
 314 {
 315     int c;
 316
 317     while ((c = (*s++)) != '\0')
 318     {
 319         hash_init = ((hash_init << 5) + hash_init) ^ c; /* (hash * 33) xor c */
 320     }
 321     return hash_init;
 322 }
 323
 324 unsigned int
 325 gmx_string_hash_func(const char *s, unsigned int hash_init)
 326 {
 327     int c;
 328
 329     while ((c = toupper(*s++)) != '\0')
 330     {
 331         if (isalnum(c))
 332         {
 333             hash_init = ((hash_init << 5) + hash_init) ^ c;            /* (hash * 33) xor c */
 334         }
 335     }
 336     return hash_init;
 337 }
 338
 339 int
 340 gmx_wcmatch(const char *pattern, const char *str)
 341 {
 342     while (*pattern)
 343     {
 344         if (*pattern == '*')
 345         {
 346             /* Skip multiple wildcards in a sequence */
 347             while (*pattern == '*' || *pattern == '?')
 348             {
 349                 ++pattern;
 350                 /* For ?, we need to check that there are characters left
 351                  * in str. */
 352                 if (*pattern == '?')
 353                 {
 354                     if (*str == 0)
 355                     {
 356                         return GMX_NO_WCMATCH;
 357                     }
 358                     else
 359                     {
 360                         ++str;
 361                     }
 362                 }
 363             }
 364             /* If the pattern ends after the star, we have a match */
 365             if (*pattern == 0)
 366             {
 367                 return 0;
 368             }
 369             /* Match the rest against each possible suffix of str */
 370             while (*str)
 371             {
 372                 /* Only do the recursive call if the first character
 373                  * matches. We don't have to worry about wildcards here,
 374                  * since we have processed them above. */
 375                 if (*pattern == *str)
 376                 {
 377                     int rc;
 378                     /* Match the suffix, and return if a match or an error */
 379                     rc = gmx_wcmatch(pattern, str);
 380                     if (rc != GMX_NO_WCMATCH)
 381                     {
 382                         return rc;
 383                     }
 384                 }
 385                 ++str;
 386             }
 387             /* If no suffix of str matches, we don't have a match */
 388             return GMX_NO_WCMATCH;
 389         }
 390         else if ((*pattern == '?' && *str != 0) || *pattern == *str)
 391         {
 392             ++str;
 393         }
 394         else
 395         {
 396             return GMX_NO_WCMATCH;
 397         }
 398         ++pattern;
 399     }
 400     /* When the pattern runs out, we have a match if the string has ended. */
 401     return (*str == 0) ? 0 : GMX_NO_WCMATCH;
 402 }
 403
 404 char *wrap_lines(const char *buf, int line_width, int indent, gmx_bool bIndentFirst)
 405 {
 406     char    *b2;
 407     int      i, i0, i2, j, b2len, lspace = 0, l2space = 0;
 408     gmx_bool bFirst, bFitsOnLine;
 409
 410     /* characters are copied from buf to b2 with possible spaces changed
 411      * into newlines and extra space added for indentation.
 412      * i indexes buf (source buffer) and i2 indexes b2 (destination buffer)
 413      * i0 points to the beginning of the current line (in buf, source)
 414      * lspace and l2space point to the last space on the current line
 415      * bFirst is set to prevent indentation of first line
 416      * bFitsOnLine says if the first space occurred before line_width, if
 417      * that is not the case, we have a word longer than line_width which
 418      * will also not fit on the next line, so we might as well keep it on
 419      * the current line (where it also won't fit, but looks better)
 420      */
 421
 422     b2    = nullptr;
 423     b2len = strlen(buf)+1+indent;
 424     snew(b2, b2len);
 425     i0 = i2 = 0;
 426     if (bIndentFirst)
 427     {
 428         for (i2 = 0; (i2 < indent); i2++)
 429         {
 430             b2[i2] = ' ';
 431         }
 432     }
 433     bFirst = TRUE;
 434     do
 435     {
 436         l2space = -1;
 437         /* find the last space before end of line */
 438         for (i = i0; ((i-i0 < line_width) || (l2space == -1)) && (buf[i]); i++)
 439         {
 440             b2[i2++] = buf[i];
 441             /* remember the position of a space */
 442             if (buf[i] == ' ')
 443             {
 444                 lspace  = i;
 445                 l2space = i2-1;
 446             }
 447             /* if we have a newline before the line is full, reset counters */
 448             if (buf[i] == '\n' && buf[i+1])
 449             {
 450                 i0     = i+1;
 451                 b2len += indent;
 452                 srenew(b2, b2len);
 453                 /* add indentation after the newline */
 454                 for (j = 0; (j < indent); j++)
 455                 {
 456                     b2[i2++] = ' ';
 457                 }
 458             }
 459         }
 460         /* If we are at the last newline, copy it */
 461         if (buf[i] == '\n' && !buf[i+1])
 462         {
 463             b2[i2++] = buf[i++];
 464         }
 465         /* if we're not at the end of the string */
 466         if (buf[i])
 467         {
 468             /* check if one word does not fit on the line */
 469             bFitsOnLine = (i-i0 <= line_width);
 470             /* reset line counters to just after the space */
 471             i0 = lspace+1;
 472             i2 = l2space+1;
 473             /* if the words fit on the line, and we're beyond the indentation part */
 474             if ( (bFitsOnLine) && (l2space >= indent) )
 475             {
 476                 /* start a new line */
 477                 b2[l2space] = '\n';
 478                 /* and add indentation */
 479                 if (indent)
 480                 {
 481                     if (bFirst)
 482                     {
 483                         line_width -= indent;
 484                         bFirst      = FALSE;
 485                     }
 486                     b2len += indent;
 487                     srenew(b2, b2len);
 488                     for (j = 0; (j < indent); j++)
 489                     {
 490                         b2[i2++] = ' ';
 491                     }
 492                     /* no extra spaces after indent; */
 493                     while (buf[i0] == ' ')
 494                     {
 495                         i0++;
 496                     }
 497                 }
 498             }
 499         }
 500     }
 501     while (buf[i]);
 502     b2[i2] = '\0';
 503
 504     return b2;
 505 }
 506
 507 gmx_int64_t
 508 str_to_int64_t(const char *str, char **endptr)
 509 {
 510 #ifndef _MSC_VER
 511     return strtoll(str, endptr, 10);
 512 #else
 513     return _strtoi64(str, endptr, 10);
 514 #endif
 515 }
 516
 517 char *gmx_step_str(gmx_int64_t i, char *buf)
 518 {
 519     sprintf(buf, "%" GMX_PRId64, i);
 520     return buf;
 521 }