src/gromacs/topology/index.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include "index.h"
  41
  42 #include <cassert>
  43 #include <cctype>
  44 #include <cstdlib>
  45 #include <cstring>
  46
  47 #include <algorithm>
  48
  49 #include "gromacs/topology/atoms.h"
  50 #include "gromacs/topology/block.h"
  51 #include "gromacs/topology/invblock.h"
  52 #include "gromacs/topology/residuetypes.h"
  53 #include "gromacs/utility/arraysize.h"
  54 #include "gromacs/utility/cstringutil.h"
  55 #include "gromacs/utility/fatalerror.h"
  56 #include "gromacs/utility/futil.h"
  57 #include "gromacs/utility/smalloc.h"
  58 #include "gromacs/utility/strdb.h"
  59
  60 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  61 {
  62     char c;
  63
  64     if (bASK)
  65     {
  66         do
  67         {
  68             c = toupper(fgetc(stdin));
  69         } while ((c != 'Y') && (c != 'N'));
  70
  71         return (c == 'Y');
  72     }
  73     else
  74     {
  75         return FALSE;
  76     }
  77 }
  78
  79 void write_index(const char* outf, t_blocka* b, char** gnames, gmx_bool bDuplicate, int natoms)
  80 {
  81     FILE* out;
  82     int   i, j, k;
  83
  84     out = gmx_ffopen(outf, "w");
  85     /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
  86     for (i = 0; (i < b->nr); i++)
  87     {
  88         fprintf(out, "[ %s ]", gnames[i]);
  89         for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
  90         {
  91             const char sep = (k % 15 == 0 ? '\n' : ' ');
  92             fprintf(out, "%c%4d", sep, b->a[j] + 1);
  93         }
  94         fprintf(out, "\n");
  95     }
  96
  97     /* Duplicate copy, useful for computational electrophysiology double-layer setups */
  98     if (bDuplicate)
  99     {
 100         fprintf(stderr, "Duplicating the whole system with an atom offset of %d atoms.\n", natoms);
 101         for (i = 0; (i < b->nr); i++)
 102         {
 103             fprintf(out, "[ %s_copy ]", gnames[i]);
 104             for (k = 0, j = b->index[i]; j < b->index[i + 1]; j++, k++)
 105             {
 106                 const char sep = (k % 15 == 0 ? '\n' : ' ');
 107                 fprintf(out, "%c%4d", sep, b->a[j] + 1 + natoms);
 108             }
 109             fprintf(out, "\n");
 110         }
 111     }
 112
 113     gmx_ffclose(out);
 114 }
 115
 116 void add_grp(t_blocka* b, char*** gnames, gmx::ArrayRef<const int> a, const std::string& name)
 117 {
 118     srenew(b->index, b->nr + 2);
 119     srenew(*gnames, b->nr + 1);
 120     (*gnames)[b->nr] = gmx_strdup(name.c_str());
 121
 122     srenew(b->a, b->nra + a.size());
 123     for (gmx::index i = 0; (i < a.ssize()); i++)
 124     {
 125         b->a[b->nra++] = a[i];
 126     }
 127     b->nr++;
 128     b->index[b->nr] = b->nra;
 129 }
 130
 131 /*! \brief
 132  * Compare index in groups.
 133  *
 134  * Checks the index in \p a to the one in \p b at \p index.
 135  * If \p index is < 0, it is taken as relative to the end of \p b.
 136  *
 137  * \param[in] b Block with groups.
 138  * \param[in] a New group to compare to.
 139  * \param[in] index The index to check.
 140  * \returns True if groups are the same.
 141  */
 142 static bool grp_cmp(t_blocka* b, gmx::ArrayRef<const int> a, int index)
 143 {
 144     if (index < 0)
 145     {
 146         index = b->nr - 1 + index;
 147     }
 148     if (index >= b->nr)
 149     {
 150         gmx_fatal(FARGS, "no such index group %d in t_blocka (nr=%d)", index, b->nr);
 151     }
 152     /* compare sizes */
 153     if (a.ssize() != b->index[index + 1] - b->index[index])
 154     {
 155         return FALSE;
 156     }
 157     for (gmx::index i = 0; i < a.ssize(); i++)
 158     {
 159         if (a[i] != b->a[b->index[index] + i])
 160         {
 161             return false;
 162         }
 163     }
 164     return true;
 165 }
 166 //! Print out how many residues of a certain type are present.
 167 static void p_status(gmx::ArrayRef<const std::string> restype, gmx::ArrayRef<const std::string> typenames)
 168 {
 169     std::vector<int> counter(typenames.size(), 0);
 170     std::transform(typenames.begin(), typenames.end(), counter.begin(), [&restype](const std::string& typenm) {
 171         return std::count_if(restype.begin(), restype.end(), [&typenm](const std::string& res) {
 172             return gmx_strcasecmp(res.c_str(), typenm.c_str()) == 0;
 173         });
 174     });
 175
 176     for (int i = 0; (i < typenames.ssize()); i++)
 177     {
 178         if (counter[i] > 0)
 179         {
 180             printf("There are: %5d %10s residues\n", counter[i], typenames[i].c_str());
 181         }
 182     }
 183 }
 184
 185 /*! \brief
 186  * Return a new group of atoms with \p restype that match \p typestring,
 187  * or not matching if \p bMatch.
 188  *
 189  * \param[in] atoms Atoms data to use.
 190  * \param[in] restype Residuetypes to match.
 191  * \param[in] typestring Which type the residue should have.
 192  * \param[in] bMatch whether to return matching atoms or those that don't.
 193  * \returns Vector of atoms that match.
 194  */
 195 static std::vector<int> mk_aid(const t_atoms*                   atoms,
 196                                gmx::ArrayRef<const std::string> restype,
 197                                const std::string&               typestring,
 198                                bool                             bMatch)
 199 {
 200     std::vector<int> a;
 201     for (int i = 0; (i < atoms->nr); i++)
 202     {
 203         bool res = gmx_strcasecmp(restype[atoms->atom[i].resind].c_str(), typestring.c_str()) == 0;
 204         if (!bMatch)
 205         {
 206             res = !res;
 207         }
 208         if (res)
 209         {
 210             a.push_back(i);
 211         }
 212     }
 213
 214     return a;
 215 }
 216
 217 typedef struct
 218 {
 219     char*    rname;
 220     gmx_bool bNeg;
 221     char*    gname;
 222 } restp_t;
 223
 224 static void analyse_other(gmx::ArrayRef<std::string> restype,
 225                           const t_atoms*             atoms,
 226                           t_blocka*                  gb,
 227                           char***                    gn,
 228                           gmx_bool                   bASK,
 229                           gmx_bool                   bVerb)
 230 {
 231     restp_t* restp = nullptr;
 232     char**   attp  = nullptr;
 233     char *   rname, *aname;
 234     int      i, resind, natp, nrestp = 0;
 235
 236     for (i = 0; (i < atoms->nres); i++)
 237     {
 238         if (gmx_strcasecmp(restype[i].c_str(), "Protein")
 239             && gmx_strcasecmp(restype[i].c_str(), "DNA") && gmx_strcasecmp(restype[i].c_str(), "RNA")
 240             && gmx_strcasecmp(restype[i].c_str(), "Water"))
 241         {
 242             break;
 243         }
 244     }
 245     if (i < atoms->nres)
 246     {
 247         /* we have others */
 248         if (bVerb)
 249         {
 250             printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into "
 251                    "groups...\n");
 252         }
 253         for (int k = 0; (k < atoms->nr); k++)
 254         {
 255             resind = atoms->atom[k].resind;
 256             rname  = *atoms->resinfo[resind].name;
 257             if (gmx_strcasecmp(restype[resind].c_str(), "Protein")
 258                 && gmx_strcasecmp(restype[resind].c_str(), "DNA")
 259                 && gmx_strcasecmp(restype[resind].c_str(), "RNA")
 260                 && gmx_strcasecmp(restype[resind].c_str(), "Water"))
 261             {
 262                 int l;
 263                 for (l = 0; (l < nrestp); l++)
 264                 {
 265                     assert(restp);
 266                     if (strcmp(restp[l].rname, rname) == 0)
 267                     {
 268                         break;
 269                     }
 270                 }
 271                 if (l == nrestp)
 272                 {
 273                     srenew(restp, nrestp + 1);
 274                     restp[nrestp].rname = gmx_strdup(rname);
 275                     restp[nrestp].bNeg  = FALSE;
 276                     restp[nrestp].gname = gmx_strdup(rname);
 277                     nrestp++;
 278                 }
 279             }
 280         }
 281         for (int i = 0; (i < nrestp); i++)
 282         {
 283             std::vector<int> aid;
 284             for (int j = 0; (j < atoms->nr); j++)
 285             {
 286                 rname = *atoms->resinfo[atoms->atom[j].resind].name;
 287                 if ((strcmp(restp[i].rname, rname) == 0 && !restp[i].bNeg)
 288                     || (strcmp(restp[i].rname, rname) != 0 && restp[i].bNeg))
 289                 {
 290                     aid.push_back(j);
 291                 }
 292             }
 293             add_grp(gb, gn, aid, restp[i].gname);
 294             if (bASK)
 295             {
 296                 printf("split %s into atoms (y/n) ? ", restp[i].gname);
 297                 fflush(stdout);
 298                 if (gmx_ask_yesno(bASK))
 299                 {
 300                     natp = 0;
 301                     for (size_t k = 0; (k < aid.size()); k++)
 302                     {
 303                         aname = *atoms->atomname[aid[k]];
 304                         int l;
 305                         for (l = 0; (l < natp); l++)
 306                         {
 307                             if (strcmp(aname, attp[l]) == 0)
 308                             {
 309                                 break;
 310                             }
 311                         }
 312                         if (l == natp)
 313                         {
 314                             srenew(attp, ++natp);
 315                             attp[natp - 1] = aname;
 316                         }
 317                     }
 318                     if (natp > 1)
 319                     {
 320                         for (int l = 0; (l < natp); l++)
 321                         {
 322                             std::vector<int> aaid;
 323                             for (size_t k = 0; (k < aid.size()); k++)
 324                             {
 325                                 aname = *atoms->atomname[aid[k]];
 326                                 if (strcmp(aname, attp[l]) == 0)
 327                                 {
 328                                     aaid.push_back(aid[k]);
 329                                 }
 330                             }
 331                             add_grp(gb, gn, aaid, attp[l]);
 332                         }
 333                     }
 334                     sfree(attp);
 335                     attp = nullptr;
 336                 }
 337             }
 338             sfree(restp[i].rname);
 339             sfree(restp[i].gname);
 340         }
 341         sfree(restp);
 342     }
 343 }
 344
 345 /*! \brief
 346  * Data necessary to construct a single (protein) index group in
 347  * analyse_prot().
 348  */
 349 typedef struct gmx_help_make_index_group // NOLINT(clang-analyzer-optin.performance.Padding)
 350 {
 351     /** The set of atom names that will be used to form this index group */
 352     const char** defining_atomnames;
 353     /** Size of the defining_atomnames array */
 354     int num_defining_atomnames;
 355     /** Name of this index group */
 356     const char* group_name;
 357     /** Whether the above atom names name the atoms in the group, or
 358         those not in the group */
 359     gmx_bool bTakeComplement;
 360     /** The index in wholename gives the first item in the arrays of
 361        atomnames that should be tested with 'gmx_strncasecmp' in stead of
 362        gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 363        This is comparable to using a '*' wildcard at the end of specific
 364        atom names, but that is more involved to implement...
 365      */
 366     int wholename;
 367     /** Only create this index group if it differs from the one specified in compareto,
 368        where -1 means to always create this group. */
 369     int compareto;
 370 } t_gmx_help_make_index_group;
 371
 372 static void analyse_prot(gmx::ArrayRef<const std::string> restype,
 373                          const t_atoms*                   atoms,
 374                          t_blocka*                        gb,
 375                          char***                          gn,
 376                          gmx_bool                         bASK,
 377                          gmx_bool                         bVerb)
 378 {
 379     /* lists of atomnames to be used in constructing index groups: */
 380     static const char* pnoh[]   = { "H", "HN" };
 381     static const char* pnodum[] = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 382                                     "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 383     static const char* calpha[] = { "CA" };
 384     static const char* bb[]     = { "N", "CA", "C" };
 385     static const char* mc[]     = { "N", "CA", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 386     static const char* mcb[] = { "N", "CA", "CB", "C", "O", "O1", "O2", "OC1", "OC2", "OT", "OXT" };
 387     static const char* mch[] = { "N",  "CA",  "C",  "O",  "O1", "O2", "OC1", "OC2",
 388                                  "OT", "OXT", "H1", "H2", "H3", "H",  "HN" };
 389
 390     static const t_gmx_help_make_index_group constructing_data[] = {
 391         { nullptr, 0, "Protein", TRUE, -1, -1 },
 392         { pnoh, asize(pnoh), "Protein-H", TRUE, 0, -1 },
 393         { calpha, asize(calpha), "C-alpha", FALSE, -1, -1 },
 394         { bb, asize(bb), "Backbone", FALSE, -1, -1 },
 395         { mc, asize(mc), "MainChain", FALSE, -1, -1 },
 396         { mcb, asize(mcb), "MainChain+Cb", FALSE, -1, -1 },
 397         { mch, asize(mch), "MainChain+H", FALSE, -1, -1 },
 398         { mch, asize(mch), "SideChain", TRUE, -1, -1 },
 399         { mch, asize(mch), "SideChain-H", TRUE, 11, -1 },
 400         { pnodum, asize(pnodum), "Prot-Masses", TRUE, -1, 0 },
 401     };
 402     const int num_index_groups = asize(constructing_data);
 403
 404     int      n, j;
 405     int      npres;
 406     gmx_bool match;
 407     char     ndx_name[STRLEN], *atnm;
 408     int      i;
 409
 410     if (bVerb)
 411     {
 412         printf("Analysing Protein...\n");
 413     }
 414     std::vector<int> aid;
 415
 416     /* calculate the number of protein residues */
 417     npres = 0;
 418     for (i = 0; (i < atoms->nres); i++)
 419     {
 420         if (0 == gmx_strcasecmp(restype[i].c_str(), "Protein"))
 421         {
 422             npres++;
 423         }
 424     }
 425     /* find matching or complement atoms */
 426     for (i = 0; (i < num_index_groups); i++)
 427     {
 428         for (n = 0; (n < atoms->nr); n++)
 429         {
 430             if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind].c_str(), "Protein"))
 431             {
 432                 match = FALSE;
 433                 for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 434                 {
 435                     /* skip digits at beginning of atomname, e.g. 1H */
 436                     atnm = *atoms->atomname[n];
 437                     while (isdigit(atnm[0]))
 438                     {
 439                         atnm++;
 440                     }
 441                     if ((constructing_data[i].wholename == -1) || (j < constructing_data[i].wholename))
 442                     {
 443                         if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j], atnm))
 444                         {
 445                             match = TRUE;
 446                         }
 447                     }
 448                     else
 449                     {
 450                         if (0
 451                             == gmx_strncasecmp(constructing_data[i].defining_atomnames[j], atnm,
 452                                                strlen(constructing_data[i].defining_atomnames[j])))
 453                         {
 454                             match = TRUE;
 455                         }
 456                     }
 457                 }
 458                 if (constructing_data[i].bTakeComplement != match)
 459                 {
 460                     aid.push_back(n);
 461                 }
 462             }
 463         }
 464         /* if we want to add this group always or it differs from previous
 465            group, add it: */
 466         if (-1 == constructing_data[i].compareto || !grp_cmp(gb, aid, constructing_data[i].compareto - i))
 467         {
 468             add_grp(gb, gn, aid, constructing_data[i].group_name);
 469         }
 470         aid.clear();
 471     }
 472
 473     if (bASK)
 474     {
 475         for (i = 0; (i < num_index_groups); i++)
 476         {
 477             printf("Split %12s into %5d residues (y/n) ? ", constructing_data[i].group_name, npres);
 478             if (gmx_ask_yesno(bASK))
 479             {
 480                 int resind;
 481                 aid.clear();
 482                 for (n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 483                 {
 484                     resind = atoms->atom[n].resind;
 485                     for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 486                     {
 487                         match = FALSE;
 488                         for (j = 0; (j < constructing_data[i].num_defining_atomnames); j++)
 489                         {
 490                             if (0
 491                                 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],
 492                                                   *atoms->atomname[n]))
 493                             {
 494                                 match = TRUE;
 495                             }
 496                         }
 497                         if (constructing_data[i].bTakeComplement != match)
 498                         {
 499                             aid.push_back(n);
 500                         }
 501                     }
 502                     /* copy the residuename to the tail of the groupname */
 503                     if (!aid.empty())
 504                     {
 505                         t_resinfo* ri;
 506                         ri = &atoms->resinfo[resind];
 507                         sprintf(ndx_name, "%s_%s%d%c", constructing_data[i].group_name, *ri->name,
 508                                 ri->nr, ri->ic == ' ' ? '\0' : ri->ic);
 509                         add_grp(gb, gn, aid, ndx_name);
 510                         aid.clear();
 511                     }
 512                 }
 513             }
 514         }
 515         printf("Make group with sidechain and C=O swapped (y/n) ? ");
 516         if (gmx_ask_yesno(bASK))
 517         {
 518             /* Make swap sidechain C=O index */
 519             aid.clear();
 520             for (int n = 0; ((atoms->atom[n].resind < npres) && (n < atoms->nr));)
 521             {
 522                 int resind = atoms->atom[n].resind;
 523                 int hold   = -1;
 524                 for (; ((atoms->atom[n].resind == resind) && (n < atoms->nr)); n++)
 525                 {
 526                     if (strcmp("CA", *atoms->atomname[n]) == 0)
 527                     {
 528                         aid.push_back(n);
 529                         hold = aid.size();
 530                         aid.resize(aid.size() + 3);
 531                     }
 532                     else if (strcmp("C", *atoms->atomname[n]) == 0)
 533                     {
 534                         if (hold == -1)
 535                         {
 536                             gmx_incons("Atom naming problem");
 537                         }
 538                         aid[hold] = n;
 539                     }
 540                     else if (strcmp("O", *atoms->atomname[n]) == 0)
 541                     {
 542                         if (hold == -1)
 543                         {
 544                             gmx_incons("Atom naming problem");
 545                         }
 546                         aid[hold + 1] = n;
 547                     }
 548                     else if (strcmp("O1", *atoms->atomname[n]) == 0)
 549                     {
 550                         if (hold == -1)
 551                         {
 552                             gmx_incons("Atom naming problem");
 553                         }
 554                         aid[hold + 1] = n;
 555                     }
 556                     else
 557                     {
 558                         aid.push_back(n);
 559                     }
 560                 }
 561             }
 562             /* copy the residuename to the tail of the groupname */
 563             if (!aid.empty())
 564             {
 565                 add_grp(gb, gn, aid, "SwapSC-CO");
 566             }
 567         }
 568     }
 569 }
 570
 571
 572 void analyse(const t_atoms* atoms, t_blocka* gb, char*** gn, gmx_bool bASK, gmx_bool bVerb)
 573 {
 574     char* resnm;
 575     int   i;
 576     int   iwater, iion;
 577     int   nwater, nion;
 578
 579     if (bVerb)
 580     {
 581         printf("Analysing residue names:\n");
 582     }
 583     /* Create system group, every single atom */
 584     std::vector<int> aid(atoms->nr);
 585     for (i = 0; i < atoms->nr; i++)
 586     {
 587         aid[i] = i;
 588     }
 589     add_grp(gb, gn, aid, "System");
 590
 591     /* For every residue, get a pointer to the residue type name */
 592     ResidueType rt;
 593
 594     std::vector<std::string> restype;
 595     std::vector<std::string> previousTypename;
 596     if (atoms->nres > 0)
 597     {
 598         int i = 0;
 599
 600         resnm = *atoms->resinfo[i].name;
 601         restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 602         previousTypename.push_back(restype.back());
 603
 604         for (i = 1; i < atoms->nres; i++)
 605         {
 606             resnm = *atoms->resinfo[i].name;
 607             restype.emplace_back(rt.typeOfNamedDatabaseResidue(resnm));
 608
 609             /* Note that this does not lead to a N*N loop, but N*K, where
 610              * K is the number of residue _types_, which is small and independent of N.
 611              */
 612             bool found = false;
 613             for (size_t k = 0; k < previousTypename.size() && !found; k++)
 614             {
 615                 found = strcmp(restype.back().c_str(), previousTypename[k].c_str()) == 0;
 616             }
 617             if (!found)
 618             {
 619                 previousTypename.push_back(restype.back());
 620             }
 621         }
 622     }
 623
 624     if (bVerb)
 625     {
 626         p_status(restype, previousTypename);
 627     }
 628
 629     for (gmx::index k = 0; k < gmx::ssize(previousTypename); k++)
 630     {
 631         aid = mk_aid(atoms, restype, previousTypename[k], TRUE);
 632
 633         /* Check for special types to do fancy stuff with */
 634
 635         if (!gmx_strcasecmp(previousTypename[k].c_str(), "Protein") && !aid.empty())
 636         {
 637             /* PROTEIN */
 638             analyse_prot(restype, atoms, gb, gn, bASK, bVerb);
 639
 640             /* Create a Non-Protein group */
 641             aid = mk_aid(atoms, restype, "Protein", FALSE);
 642             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 643             {
 644                 add_grp(gb, gn, aid, "non-Protein");
 645             }
 646         }
 647         else if (!gmx_strcasecmp(previousTypename[k].c_str(), "Water") && !aid.empty())
 648         {
 649             add_grp(gb, gn, aid, previousTypename[k]);
 650             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 651             add_grp(gb, gn, aid, "SOL");
 652
 653
 654             /* Solvent, create a negated group too */
 655             aid = mk_aid(atoms, restype, "Water", FALSE);
 656             if ((!aid.empty()) && (gmx::ssize(aid) < atoms->nr))
 657             {
 658                 add_grp(gb, gn, aid, "non-Water");
 659             }
 660         }
 661         else if (!aid.empty())
 662         {
 663             /* Other groups */
 664             add_grp(gb, gn, aid, previousTypename[k]);
 665             analyse_other(restype, atoms, gb, gn, bASK, bVerb);
 666         }
 667     }
 668
 669
 670     /* Create a merged water_and_ions group */
 671     iwater = -1;
 672     iion   = -1;
 673     nwater = 0;
 674     nion   = 0;
 675
 676     for (i = 0; i < gb->nr; i++)
 677     {
 678         if (!gmx_strcasecmp((*gn)[i], "Water"))
 679         {
 680             iwater = i;
 681             nwater = gb->index[i + 1] - gb->index[i];
 682         }
 683         else if (!gmx_strcasecmp((*gn)[i], "Ion"))
 684         {
 685             iion = i;
 686             nion = gb->index[i + 1] - gb->index[i];
 687         }
 688     }
 689
 690     if (nwater > 0 && nion > 0)
 691     {
 692         srenew(gb->index, gb->nr + 2);
 693         srenew(*gn, gb->nr + 1);
 694         (*gn)[gb->nr] = gmx_strdup("Water_and_ions");
 695         srenew(gb->a, gb->nra + nwater + nion);
 696         if (nwater > 0)
 697         {
 698             for (i = gb->index[iwater]; i < gb->index[iwater + 1]; i++)
 699             {
 700                 gb->a[gb->nra++] = gb->a[i];
 701             }
 702         }
 703         if (nion > 0)
 704         {
 705             for (i = gb->index[iion]; i < gb->index[iion + 1]; i++)
 706             {
 707                 gb->a[gb->nra++] = gb->a[i];
 708             }
 709         }
 710         gb->nr++;
 711         gb->index[gb->nr] = gb->nra;
 712     }
 713 }
 714
 715
 716 void check_index(const char* gname, int n, int index[], const char* traj, int natoms)
 717 {
 718     int i;
 719
 720     for (i = 0; i < n; i++)
 721     {
 722         if (index[i] >= natoms)
 723         {
 724             gmx_fatal(FARGS,
 725                       "%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 726                       gname ? gname : "Index", i + 1, index[i] + 1, traj ? traj : "the trajectory",
 727                       natoms);
 728         }
 729         else if (index[i] < 0)
 730         {
 731             gmx_fatal(FARGS, "%s atom number (index[%d]=%d) is less than zero",
 732                       gname ? gname : "Index", i + 1, index[i] + 1);
 733         }
 734     }
 735 }
 736
 737 t_blocka* init_index(const char* gfile, char*** grpname)
 738 {
 739     FILE*     in;
 740     t_blocka* b;
 741     int       maxentries;
 742     int       i, j;
 743     char      line[STRLEN], *pt, str[STRLEN];
 744
 745     in = gmx_ffopen(gfile, "r");
 746     snew(b, 1);
 747     b->nr      = 0;
 748     b->index   = nullptr;
 749     b->nra     = 0;
 750     b->a       = nullptr;
 751     *grpname   = nullptr;
 752     maxentries = 0;
 753     while (get_a_line(in, line, STRLEN))
 754     {
 755         if (get_header(line, str))
 756         {
 757             b->nr++;
 758             srenew(b->index, b->nr + 1);
 759             srenew(*grpname, b->nr);
 760             if (b->nr == 1)
 761             {
 762                 b->index[0] = 0;
 763             }
 764             b->index[b->nr]       = b->index[b->nr - 1];
 765             (*grpname)[b->nr - 1] = gmx_strdup(str);
 766         }
 767         else
 768         {
 769             if (b->nr == 0)
 770             {
 771                 gmx_fatal(FARGS, "The first header of your indexfile is invalid");
 772             }
 773             pt = line;
 774             while (sscanf(pt, "%s", str) == 1)
 775             {
 776                 i = b->index[b->nr];
 777                 if (i >= maxentries)
 778                 {
 779                     maxentries += 1024;
 780                     srenew(b->a, maxentries);
 781                 }
 782                 assert(b->a != nullptr); // for clang analyzer
 783                 b->a[i] = strtol(str, nullptr, 10) - 1;
 784                 b->index[b->nr]++;
 785                 (b->nra)++;
 786                 pt = strstr(pt, str) + strlen(str);
 787             }
 788         }
 789     }
 790     gmx_ffclose(in);
 791
 792     for (i = 0; (i < b->nr); i++)
 793     {
 794         assert(b->a != nullptr); // for clang analyzer
 795         for (j = b->index[i]; (j < b->index[i + 1]); j++)
 796         {
 797             if (b->a[j] < 0)
 798             {
 799                 fprintf(stderr, "\nWARNING: negative index %d in group %s\n\n", b->a[j], (*grpname)[i]);
 800             }
 801         }
 802     }
 803
 804     return b;
 805 }
 806
 807 static void minstring(char* str)
 808 {
 809     int i;
 810
 811     for (i = 0; (i < static_cast<int>(strlen(str))); i++)
 812     {
 813         if (str[i] == '-')
 814         {
 815             str[i] = '_';
 816         }
 817     }
 818 }
 819
 820 int find_group(const char* s, int ngrps, char** grpname)
 821 {
 822     int      aa, i, n;
 823     char     string[STRLEN];
 824     gmx_bool bMultiple;
 825     bMultiple = FALSE;
 826     n         = strlen(s);
 827     aa        = -1;
 828     /* first look for whole name match */
 829     {
 830         for (i = 0; i < ngrps; i++)
 831         {
 832             if (gmx_strcasecmp_min(s, grpname[i]) == 0)
 833             {
 834                 if (aa != -1)
 835                 {
 836                     bMultiple = TRUE;
 837                 }
 838                 aa = i;
 839             }
 840         }
 841     }
 842     /* second look for first string match */
 843     if (aa == -1)
 844     {
 845         for (i = 0; i < ngrps; i++)
 846         {
 847             if (gmx_strncasecmp_min(s, grpname[i], n) == 0)
 848             {
 849                 if (aa != -1)
 850                 {
 851                     bMultiple = TRUE;
 852                 }
 853                 aa = i;
 854             }
 855         }
 856     }
 857     /* last look for arbitrary substring match */
 858     if (aa == -1)
 859     {
 860         char key[STRLEN];
 861         strncpy(key, s, sizeof(key) - 1);
 862         key[STRLEN - 1] = '\0';
 863         upstring(key);
 864         minstring(key);
 865         for (i = 0; i < ngrps; i++)
 866         {
 867             strncpy(string, grpname[i], STRLEN - 1);
 868             upstring(string);
 869             minstring(string);
 870             if (strstr(string, key) != nullptr)
 871             {
 872                 if (aa != -1)
 873                 {
 874                     bMultiple = TRUE;
 875                 }
 876                 aa = i;
 877             }
 878         }
 879     }
 880     if (bMultiple)
 881     {
 882         printf("Error: Multiple groups '%s' selected\n", s);
 883         aa = -1;
 884     }
 885     return aa;
 886 }
 887
 888 static int qgroup(int* a, int ngrps, char** grpname)
 889 {
 890     char     s[STRLEN];
 891     int      aa;
 892     gmx_bool bInRange;
 893     char*    end;
 894
 895     do
 896     {
 897         fprintf(stderr, "Select a group: ");
 898         do
 899         {
 900             if (scanf("%s", s) != 1)
 901             {
 902                 gmx_fatal(FARGS, "Cannot read from input");
 903             }
 904             trim(s); /* remove spaces */
 905         } while (strlen(s) == 0);
 906         aa = strtol(s, &end, 10);
 907         if (aa == 0 && end[0] != '\0') /* string entered */
 908         {
 909             aa = find_group(s, ngrps, grpname);
 910         }
 911         bInRange = (aa >= 0 && aa < ngrps);
 912         if (!bInRange)
 913         {
 914             printf("Error: No such group '%s'\n", s);
 915         }
 916     } while (!bInRange);
 917     printf("Selected %d: '%s'\n", aa, grpname[aa]);
 918     *a = aa;
 919     return aa;
 920 }
 921
 922 static void
 923 rd_groups(t_blocka* grps, char** grpname, char* gnames[], int ngrps, int isize[], int* index[], int grpnr[])
 924 {
 925     int i, j, gnr1;
 926
 927     if (grps->nr == 0)
 928     {
 929         gmx_fatal(FARGS, "Error: no groups in indexfile");
 930     }
 931     for (i = 0; (i < grps->nr); i++)
 932     {
 933         fprintf(stderr, "Group %5d (%15s) has %5d elements\n", i, grpname[i],
 934                 grps->index[i + 1] - grps->index[i]);
 935     }
 936     for (i = 0; (i < ngrps); i++)
 937     {
 938         if (grps->nr > 1)
 939         {
 940             do
 941             {
 942                 gnr1 = qgroup(&grpnr[i], grps->nr, grpname);
 943                 if ((gnr1 < 0) || (gnr1 >= grps->nr))
 944                 {
 945                     fprintf(stderr, "Select between %d and %d.\n", 0, grps->nr - 1);
 946                 }
 947             } while ((gnr1 < 0) || (gnr1 >= grps->nr));
 948         }
 949         else
 950         {
 951             fprintf(stderr, "There is one group in the index\n");
 952             gnr1 = 0;
 953         }
 954         gnames[i] = gmx_strdup(grpname[gnr1]);
 955         isize[i]  = grps->index[gnr1 + 1] - grps->index[gnr1];
 956         snew(index[i], isize[i]);
 957         for (j = 0; (j < isize[i]); j++)
 958         {
 959             index[i][j] = grps->a[grps->index[gnr1] + j];
 960         }
 961     }
 962 }
 963
 964 void rd_index(const char* statfile, int ngrps, int isize[], int* index[], char* grpnames[])
 965 {
 966     char**    gnames;
 967     t_blocka* grps;
 968     int*      grpnr;
 969
 970     snew(grpnr, ngrps);
 971     if (!statfile)
 972     {
 973         gmx_fatal(FARGS, "No index file specified");
 974     }
 975     grps = init_index(statfile, &gnames);
 976     rd_groups(grps, gnames, grpnames, ngrps, isize, index, grpnr);
 977     for (int i = 0; i < grps->nr; i++)
 978     {
 979         sfree(gnames[i]);
 980     }
 981     sfree(gnames);
 982     sfree(grpnr);
 983     done_blocka(grps);
 984     sfree(grps);
 985 }
 986
 987 void get_index(const t_atoms* atoms, const char* fnm, int ngrps, int isize[], int* index[], char* grpnames[])
 988 {
 989     char***   gnames;
 990     t_blocka* grps = nullptr;
 991     int*      grpnr;
 992
 993     snew(grpnr, ngrps);
 994     snew(gnames, 1);
 995     if (fnm != nullptr)
 996     {
 997         grps = init_index(fnm, gnames);
 998     }
 999     else if (atoms)
1000     {
1001         snew(grps, 1);
1002         snew(grps->index, 1);
1003         analyse(atoms, grps, gnames, FALSE, FALSE);
1004     }
1005     else
1006     {
1007         gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1008     }
1009
1010     rd_groups(grps, *gnames, grpnames, ngrps, isize, index, grpnr);
1011     for (int i = 0; i < grps->nr; ++i)
1012     {
1013         sfree((*gnames)[i]);
1014     }
1015     sfree(*gnames);
1016     sfree(gnames);
1017     sfree(grpnr);
1018     done_blocka(grps);
1019     sfree(grps);
1020 }
1021
1022 t_cluster_ndx* cluster_index(FILE* fplog, const char* ndx)
1023 {
1024     t_cluster_ndx* c;
1025     int            i;
1026
1027     snew(c, 1);
1028     c->clust    = init_index(ndx, &c->grpname);
1029     c->maxframe = -1;
1030     for (i = 0; (i < c->clust->nra); i++)
1031     {
1032         c->maxframe = std::max(c->maxframe, c->clust->a[i]);
1033     }
1034     fprintf(fplog ? fplog : stdout,
1035             "There are %d clusters containing %d structures, highest framenr is %d\n", c->clust->nr,
1036             c->clust->nra, c->maxframe);
1037     if (debug)
1038     {
1039         pr_blocka(debug, 0, "clust", c->clust, TRUE);
1040         for (i = 0; (i < c->clust->nra); i++)
1041         {
1042             if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1043             {
1044                 gmx_fatal(FARGS,
1045                           "Range check error for c->clust->a[%d] = %d\n"
1046                           "should be within 0 and %d",
1047                           i, c->clust->a[i], c->maxframe + 1);
1048             }
1049         }
1050     }
1051     c->inv_clust = make_invblocka(c->clust, c->maxframe);
1052
1053     return c;
1054 }