src/gmxlib/index.c

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team,
   6  * check out http://www.gromacs.org for more information.
   7  * Copyright (c) 2012, by the GROMACS development team, led by
   8  * David van der Spoel, Berk Hess, Erik Lindahl, and including many
   9  * others, as listed in the AUTHORS file in the top-level source
  10  * directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #ifdef HAVE_CONFIG_H
  39 #include <config.h>
  40 #endif
  41
  42 #include <ctype.h>
  43 #include <string.h>
  44 #include <assert.h>
  45 #include "sysstuff.h"
  46 #include "strdb.h"
  47 #include "futil.h"
  48 #include "macros.h"
  49 #include "names.h"
  50 #include "string2.h"
  51 #include "statutil.h"
  52 #include "confio.h"
  53 #include "main.h"
  54 #include "copyrite.h"
  55 #include "typedefs.h"
  56 #include "smalloc.h"
  57 #include "invblock.h"
  58 #include "macros.h"
  59 #include "index.h"
  60 #include "txtdump.h"
  61 #include "gmxfio.h"
  62
  63
  64
  65 const char gmx_residuetype_undefined[]="Other";
  66
  67 struct gmx_residuetype
  68 {
  69     int      n;
  70     char **  resname;
  71     char **  restype;
  72
  73 };
  74
  75
  76 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  77 {
  78   char c;
  79
  80   if (bASK) {
  81     do {
  82       c=toupper(fgetc(stdin));
  83     } while ((c != 'Y') && (c != 'N'));
  84
  85     return (c == 'Y');
  86   }
  87   else
  88     return FALSE;
  89 }
  90
  91 t_blocka *new_blocka(void)
  92 {
  93   t_blocka *block;
  94
  95   snew(block,1);
  96   snew(block->index,1);
  97
  98   return block;
  99 }
 100
 101 void write_index(const char *outf, t_blocka *b,char **gnames)
 102 {
 103   FILE *out;
 104   int  i,j,k;
 105
 106   out=gmx_fio_fopen(outf,"w");
 107   /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 108   for(i=0; (i<b->nr); i++) {
 109     fprintf(out,"[ %s ]\n",gnames[i]);
 110     for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 111       fprintf(out,"%4d ",b->a[j]+1);
 112       if ((k % 15) == 14)
 113         fprintf(out,"\n");
 114     }
 115     fprintf(out,"\n");
 116   }
 117   gmx_fio_fclose(out);
 118 }
 119
 120 void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 121 {
 122   int i;
 123
 124   srenew(b->index,b->nr+2);
 125   srenew(*gnames,b->nr+1);
 126   (*gnames)[b->nr]=strdup(name);
 127
 128   srenew(b->a,b->nra+nra);
 129   for(i=0; (i<nra); i++)
 130     b->a[b->nra++]=a[i];
 131   b->nr++;
 132   b->index[b->nr]=b->nra;
 133 }
 134
 135 /* compare index in `a' with group in `b' at `index',
 136    when `index'<0 it is relative to end of `b' */
 137 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 138 {
 139   int i;
 140
 141   if (index < 0)
 142     index = b->nr-1+index;
 143   if (index >= b->nr)
 144     gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 145   /* compare sizes */
 146   if ( nra != b->index[index+1] - b->index[index] )
 147     return FALSE;
 148   for(i=0; i<nra; i++)
 149     if ( a[i] != b->a[b->index[index]+i] )
 150       return FALSE;
 151   return TRUE;
 152 }
 153
 154 static void
 155 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 156 {
 157     int i,j;
 158     int found;
 159
 160     int * counter;
 161
 162     snew(counter,ntypes);
 163     for(i=0;i<ntypes;i++)
 164     {
 165         counter[i]=0;
 166     }
 167     for(i=0;i<nres;i++)
 168     {
 169         found=0;
 170         for(j=0;j<ntypes;j++)
 171         {
 172             if(!gmx_strcasecmp(restype[i],typenames[j]))
 173             {
 174                 counter[j]++;
 175             }
 176         }
 177     }
 178
 179     for(i=0; (i<ntypes); i++)
 180     {
 181         if (counter[i] > 0)
 182         {
 183             printf("There are: %5d %10s residues\n",counter[i],typenames[i]);
 184         }
 185     }
 186
 187     sfree(counter);
 188 }
 189
 190
 191 atom_id *
 192 mk_aid(t_atoms *atoms,const char ** restype,const char * typestring,int *nra,gmx_bool bMatch)
 193 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 194 {
 195     atom_id *a;
 196     int     i;
 197     int     res;
 198
 199     snew(a,atoms->nr);
 200     *nra=0;
 201     for(i=0; (i<atoms->nr); i++)
 202     {
 203         res=!gmx_strcasecmp(restype[atoms->atom[i].resind],typestring);
 204         if(bMatch==FALSE)
 205         {
 206             res=!res;
 207         }
 208         if(res)
 209         {
 210             a[(*nra)++]=i;
 211         }
 212     }
 213
 214     return a;
 215 }
 216
 217 typedef struct {
 218   char *rname;
 219   gmx_bool bNeg;
 220   char *gname;
 221 } restp_t;
 222
 223 static void analyse_other(const char ** restype,t_atoms *atoms,
 224                           t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 225 {
 226   restp_t *restp=NULL;
 227   char **attp=NULL;
 228   char *rname,*aname;
 229   atom_id *other_ndx,*aid,*aaid;
 230   int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 231
 232     for(i=0; (i<atoms->nres); i++)
 233     {
 234         if (gmx_strcasecmp(restype[i],"Protein") && gmx_strcasecmp(restype[i],"DNA") && gmx_strcasecmp(restype[i],"RNA") && gmx_strcasecmp(restype[i],"Water"))
 235         {
 236             break;
 237         }
 238     }
 239   if (i < atoms->nres) {
 240     /* we have others */
 241     if (bVerb)
 242       printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 243     snew(other_ndx,atoms->nr);
 244     for(k=0; (k<atoms->nr); k++) {
 245       resind = atoms->atom[k].resind;
 246       rname = *atoms->resinfo[resind].name;
 247         if (gmx_strcasecmp(restype[resind],"Protein") && gmx_strcasecmp(restype[resind],"DNA") &&
 248             gmx_strcasecmp(restype[resind],"RNA") && gmx_strcasecmp(restype[resind],"Water"))
 249         {
 250
 251         for(l=0; (l<nrestp); l++)
 252           if (strcmp(restp[l].rname,rname) == 0)
 253             break;
 254         if (l==nrestp) {
 255           srenew(restp,nrestp+1);
 256           restp[nrestp].rname = strdup(rname);
 257           restp[nrestp].bNeg  = FALSE;
 258           restp[nrestp].gname = strdup(rname);
 259           nrestp++;
 260
 261     }
 262       }
 263     }
 264     for(i=0; (i<nrestp); i++) {
 265       snew(aid,atoms->nr);
 266       naid=0;
 267       for(j=0; (j<atoms->nr); j++) {
 268         rname = *atoms->resinfo[atoms->atom[j].resind].name;
 269         if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 270             (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 271           aid[naid++] = j;
 272         }
 273       }
 274       add_grp(gb,gn,naid,aid,restp[i].gname);
 275       if (bASK) {
 276         printf("split %s into atoms (y/n) ? ",restp[i].gname);
 277         fflush(stdout);
 278         if (gmx_ask_yesno(bASK)) {
 279           natp=0;
 280           for(k=0; (k<naid); k++) {
 281             aname=*atoms->atomname[aid[k]];
 282             for(l=0; (l<natp); l++)
 283               if (strcmp(aname,attp[l]) == 0)
 284                 break;
 285             if (l == natp) {
 286               srenew(attp,++natp);
 287               attp[natp-1]=aname;
 288             }
 289           }
 290           if (natp > 1) {
 291             for(l=0; (l<natp); l++) {
 292               snew(aaid,naid);
 293               naaid=0;
 294               for(k=0; (k<naid); k++) {
 295                 aname=*atoms->atomname[aid[k]];
 296                 if (strcmp(aname,attp[l])==0)
 297                   aaid[naaid++]=aid[k];
 298               }
 299               add_grp(gb,gn,naaid,aaid,attp[l]);
 300               sfree(aaid);
 301             }
 302           }
 303           sfree(attp);
 304           attp=NULL;
 305         }
 306         sfree(aid);
 307       }
 308     }
 309     sfree(other_ndx);
 310   }
 311 }
 312
 313 /*! /brief Instances of this struct contain the data necessary to
 314  *         construct a single (protein) index group in
 315  *         analyse_prot(). */
 316 typedef struct gmx_help_make_index_group
 317 {
 318   /* The set of atom names that will be used to form this index group */
 319   const char **defining_atomnames;
 320   /* Size of the defining_atomnames array */
 321   const int num_defining_atomnames;
 322   /* Name of this index group */
 323   const char *group_name;
 324   /* Whether the above atom names name the atoms in the group, or
 325    * those not in the group */
 326   gmx_bool bTakeComplement;
 327   /* The index in wholename gives the first item in the arrays of
 328    * atomnames that should be tested with 'gmx_strncasecmp' in stead of
 329    * gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 330    * This is comparable to using a '*' wildcard at the end of specific
 331    * atom names, but that is more involved to implement...
 332    */
 333   int wholename;
 334   /* Only create this index group if it differs from the one specified in compareto,
 335      where -1 means to always create this group. */
 336   int compareto;
 337 } t_gmx_help_make_index_group;
 338
 339 static void analyse_prot(const char ** restype,t_atoms *atoms,
 340                          t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 341 {
 342   /* lists of atomnames to be used in constructing index groups: */
 343   static const char *pnoh[]    = { "H", "HN" };
 344   static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 345                              "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 346   static const char *calpha[]  = { "CA" };
 347   static const char *bb[]      = { "N","CA","C" };
 348   static const char *mc[]      = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT" };
 349   static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OC1","OC2","OT","OXT" };
 350   static const char *mch[]     = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT",
 351                                    "H1","H2","H3","H","HN" };
 352
 353   static const t_gmx_help_make_index_group constructing_data[] =
 354     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 355      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 356      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 357      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 358      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 359      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 360      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 361      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 362      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 363      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0},
 364     };
 365   const int num_index_groups = asize(constructing_data);
 366
 367   int     n,j;
 368   atom_id *aid;
 369   int     nra,nnpres,npres;
 370   gmx_bool    match;
 371   char    ndx_name[STRLEN],*atnm;
 372   int i;
 373
 374   if (bVerb)
 375   {
 376     printf("Analysing Protein...\n");
 377   }
 378   snew(aid,atoms->nr);
 379
 380   /* calculate the number of protein residues */
 381   npres=0;
 382   for(i=0; (i<atoms->nres); i++) {
 383     if (0 == gmx_strcasecmp(restype[i],"Protein")) {
 384       npres++;
 385     }
 386   }
 387   /* find matching or complement atoms */
 388   for(i=0; (i<(int)num_index_groups); i++) {
 389     nra=0;
 390     for(n=0; (n<atoms->nr); n++) {
 391       if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind],"Protein")) {
 392         match=FALSE;
 393         for(j=0; (j<constructing_data[i].num_defining_atomnames); j++) {
 394           /* skip digits at beginning of atomname, e.g. 1H */
 395           atnm=*atoms->atomname[n];
 396           while (isdigit(atnm[0])) {
 397             atnm++;
 398           }
 399           if ( (constructing_data[i].wholename==-1) || (j<constructing_data[i].wholename) ) {
 400             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],atnm)) {
 401               match=TRUE;
 402             }
 403           } else {
 404             if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],atnm,strlen(constructing_data[i].defining_atomnames[j]))) {
 405               match=TRUE;
 406             }
 407           }
 408         }
 409         if (constructing_data[i].bTakeComplement != match) {
 410           aid[nra++]=n;
 411         }
 412       }
 413     }
 414     /* if we want to add this group always or it differs from previous
 415        group, add it: */
 416     if ( -1 == constructing_data[i].compareto || !grp_cmp(gb,nra,aid,constructing_data[i].compareto-i) ) {
 417       add_grp(gb,gn,nra,aid,constructing_data[i].group_name);
 418     }
 419   }
 420
 421   if (bASK) {
 422     for(i=0; (i<(int)num_index_groups); i++) {
 423       printf("Split %12s into %5d residues (y/n) ? ",constructing_data[i].group_name,npres);
 424       if (gmx_ask_yesno(bASK)) {
 425         int resind;
 426         nra = 0;
 427         for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 428           resind = atoms->atom[n].resind;
 429           for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 430             match=FALSE;
 431             for(j=0;(j<constructing_data[i].num_defining_atomnames); j++) {
 432               if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],*atoms->atomname[n])) {
 433                 match=TRUE;
 434               }
 435             }
 436             if (constructing_data[i].bTakeComplement != match) {
 437               aid[nra++]=n;
 438             }
 439           }
 440           /* copy the residuename to the tail of the groupname */
 441           if (nra > 0) {
 442             t_resinfo *ri;
 443             ri = &atoms->resinfo[resind];
 444             sprintf(ndx_name,"%s_%s%d%c",
 445                     constructing_data[i].group_name,*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 446             add_grp(gb,gn,nra,aid,ndx_name);
 447             nra = 0;
 448           }
 449         }
 450       }
 451     }
 452     printf("Make group with sidechain and C=O swapped (y/n) ? ");
 453     if (gmx_ask_yesno(bASK)) {
 454       /* Make swap sidechain C=O index */
 455       int resind,hold;
 456       nra = 0;
 457       for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 458         resind = atoms->atom[n].resind;
 459         hold  = -1;
 460         for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 461           if (strcmp("CA",*atoms->atomname[n]) == 0) {
 462             aid[nra++]=n;
 463             hold=nra;
 464             nra+=2;
 465           } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 466             if (hold == -1) {
 467               gmx_incons("Atom naming problem");
 468             }
 469             aid[hold]=n;
 470           } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 471             if (hold == -1) {
 472               gmx_incons("Atom naming problem");
 473             }
 474             aid[hold+1]=n;
 475           } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 476             if (hold == -1) {
 477               gmx_incons("Atom naming problem");
 478             }
 479             aid[hold+1]=n;
 480           } else
 481             aid[nra++]=n;
 482       }
 483       /* copy the residuename to the tail of the groupname */
 484       if (nra > 0) {
 485         add_grp(gb,gn,nra,aid,"SwapSC-CO");
 486         nra = 0;
 487       }
 488     }
 489   }
 490   sfree(aid);
 491 }
 492
 493
 494
 495
 496 /* Return 0 if the name was found, otherwise -1.
 497  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 498  */
 499 int
 500 gmx_residuetype_get_type(gmx_residuetype_t rt,const char * resname, const char ** p_restype)
 501 {
 502     int    i,rc;
 503
 504     rc=-1;
 505     for(i=0;i<rt->n && rc;i++)
 506     {
 507         rc=gmx_strcasecmp(rt->resname[i],resname);
 508     }
 509
 510     *p_restype = (rc==0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 511
 512     return rc;
 513 }
 514
 515 int
 516 gmx_residuetype_add(gmx_residuetype_t rt,const char *newresname, const char *newrestype)
 517 {
 518     int     i;
 519     int     found;
 520     const char *  p_oldtype;
 521
 522     found = !gmx_residuetype_get_type(rt,newresname,&p_oldtype);
 523
 524     if(found && gmx_strcasecmp(p_oldtype,newrestype))
 525     {
 526         fprintf(stderr,"Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 527                 newresname,p_oldtype,newrestype);
 528     }
 529
 530     if(found==0)
 531     {
 532         srenew(rt->resname,rt->n+1);
 533         srenew(rt->restype,rt->n+1);
 534         rt->resname[rt->n]=strdup(newresname);
 535         rt->restype[rt->n]=strdup(newrestype);
 536         rt->n++;
 537     }
 538
 539     return 0;
 540 }
 541
 542
 543 int
 544 gmx_residuetype_init(gmx_residuetype_t *prt)
 545 {
 546     FILE *  db;
 547     char    line[STRLEN];
 548     char    resname[STRLEN],restype[STRLEN],dum[STRLEN];
 549     char *  p;
 550     int     i;
 551     struct gmx_residuetype *rt;
 552
 553     snew(rt,1);
 554     *prt=rt;
 555
 556     rt->n        = 0;
 557     rt->resname  = NULL;
 558     rt->restype = NULL;
 559
 560     db=libopen("residuetypes.dat");
 561
 562     while(get_a_line(db,line,STRLEN))
 563     {
 564         strip_comment(line);
 565         trim(line);
 566         if(line[0]!='\0')
 567         {
 568             if(sscanf(line,"%s %s %s",resname,restype,dum)!=2)
 569             {
 570                 gmx_fatal(FARGS,"Incorrect number of columns (2 expected) for line in residuetypes.dat");
 571             }
 572             gmx_residuetype_add(rt,resname,restype);
 573         }
 574     }
 575
 576     fclose(db);
 577
 578     return 0;
 579 }
 580
 581
 582
 583 int
 584 gmx_residuetype_destroy(gmx_residuetype_t rt)
 585 {
 586     int i;
 587
 588     for(i=0;i<rt->n;i++)
 589     {
 590         sfree(rt->resname[i]);
 591         sfree(rt->restype[i]);
 592     }
 593     sfree(rt->resname);
 594     sfree(rt->restype);
 595     sfree(rt);
 596
 597     return 0;
 598 }
 599
 600 int
 601 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 602                              const char ***       p_typenames,
 603                              int *                ntypes)
 604 {
 605     int      i,j,n;
 606     int      found;
 607     const char **  my_typename;
 608     char *   p;
 609
 610     n=0;
 611
 612     my_typename=NULL;
 613     for(i=0;i<rt->n;i++)
 614     {
 615         p=rt->restype[i];
 616         found=0;
 617         for(j=0;j<n && !found;j++)
 618         {
 619             found=!gmx_strcasecmp(p,my_typename[j]);
 620         }
 621
 622         if(!found)
 623         {
 624             srenew(my_typename,n+1);
 625             my_typename[n]=p;
 626             n++;
 627         }
 628     }
 629     *ntypes=n;
 630     *p_typenames=my_typename;
 631
 632     return 0;
 633 }
 634
 635
 636
 637 gmx_bool
 638 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 639 {
 640     gmx_bool rc;
 641     const char *p_type;
 642
 643     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 644        gmx_strcasecmp(p_type,"Protein")==0)
 645     {
 646         rc=TRUE;
 647     }
 648     else
 649     {
 650         rc=FALSE;
 651     }
 652     return rc;
 653 }
 654
 655 gmx_bool
 656 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 657 {
 658     gmx_bool rc;
 659     const char *p_type;
 660
 661     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 662        gmx_strcasecmp(p_type,"DNA")==0)
 663     {
 664         rc=TRUE;
 665     }
 666     else
 667     {
 668         rc=FALSE;
 669     }
 670     return rc;
 671 }
 672
 673 gmx_bool
 674 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 675 {
 676     gmx_bool rc;
 677     const char *p_type;
 678
 679     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 680        gmx_strcasecmp(p_type,"RNA")==0)
 681     {
 682         rc=TRUE;
 683     }
 684     else
 685     {
 686         rc=FALSE;
 687     }
 688     return rc;
 689 }
 690
 691 /* Return the size of the arrays */
 692 int
 693 gmx_residuetype_get_size(gmx_residuetype_t rt)
 694 {
 695     return rt->n;
 696 }
 697
 698 /* Search for a residuetype with name resnm within the
 699  * gmx_residuetype database. Return the index if found,
 700  * otherwise -1.
 701  */
 702 int
 703 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 704 {
 705     int i,rc;
 706
 707     rc=-1;
 708     for(i=0;i<rt->n && rc;i++)
 709     {
 710         rc=gmx_strcasecmp(rt->resname[i],resnm);
 711     }
 712
 713     return (0 == rc) ? i-1 : -1;
 714 }
 715
 716 /* Return the name of the residuetype with the given index, or
 717  * NULL if not found. */
 718 const char *
 719 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 720 {
 721   if(index >= 0 && index < rt->n) {
 722     return rt->resname[index];
 723   } else {
 724     return NULL;
 725   }
 726 }
 727
 728
 729
 730 void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 731 {
 732     gmx_residuetype_t rt=NULL;
 733     char    *resnm;
 734     atom_id *aid;
 735     const char **  restype;
 736     int     nra;
 737     int     i,k;
 738     size_t  j;
 739     int     ntypes;
 740     char *  p;
 741     const char ** p_typename;
 742     int     iwater,iion;
 743     int     nwater,nion;
 744     int     found;
 745
 746     if (bVerb)
 747     {
 748         printf("Analysing residue names:\n");
 749     }
 750     /* Create system group, every single atom */
 751     snew(aid,atoms->nr);
 752     for(i=0;i<atoms->nr;i++)
 753     {
 754         aid[i]=i;
 755     }
 756     add_grp(gb,gn,atoms->nr,aid,"System");
 757     sfree(aid);
 758
 759     /* For every residue, get a pointer to the residue type name */
 760     gmx_residuetype_init(&rt);
 761     assert(rt);
 762
 763     snew(restype,atoms->nres);
 764     ntypes = 0;
 765     p_typename = NULL;
 766     for(i=0;i<atoms->nres;i++)
 767     {
 768         resnm = *atoms->resinfo[i].name;
 769         gmx_residuetype_get_type(rt,resnm,&(restype[i]));
 770
 771         /* Note that this does not lead to a N*N loop, but N*K, where
 772          * K is the number of residue _types_, which is small and independent of N.
 773          */
 774         found = 0;
 775         for(k=0;k<ntypes && !found;k++)
 776         {
 777             found = !strcmp(restype[i],p_typename[k]);
 778         }
 779         if(!found)
 780         {
 781             srenew(p_typename,ntypes+1);
 782             p_typename[ntypes++] = strdup(restype[i]);
 783         }
 784     }
 785
 786     if (bVerb)
 787     {
 788         p_status(restype,atoms->nres,p_typename,ntypes);
 789     }
 790
 791     for(k=0;k<ntypes;k++)
 792     {
 793         aid=mk_aid(atoms,restype,p_typename[k],&nra,TRUE);
 794
 795         /* Check for special types to do fancy stuff with */
 796
 797         if(!gmx_strcasecmp(p_typename[k],"Protein") && nra>0)
 798         {
 799             sfree(aid);
 800             /* PROTEIN */
 801             analyse_prot(restype,atoms,gb,gn,bASK,bVerb);
 802
 803             /* Create a Non-Protein group */
 804             aid=mk_aid(atoms,restype,"Protein",&nra,FALSE);
 805             if ((nra > 0) && (nra < atoms->nr))
 806             {
 807                 add_grp(gb,gn,nra,aid,"non-Protein");
 808             }
 809             sfree(aid);
 810         }
 811         else if(!gmx_strcasecmp(p_typename[k],"Water") && nra>0)
 812         {
 813             add_grp(gb,gn,nra,aid,p_typename[k]);
 814             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 815             add_grp(gb,gn,nra,aid,"SOL");
 816
 817             sfree(aid);
 818
 819             /* Solvent, create a negated group too */
 820             aid=mk_aid(atoms,restype,"Water",&nra,FALSE);
 821             if ((nra > 0) && (nra < atoms->nr))
 822             {
 823                 add_grp(gb,gn,nra,aid,"non-Water");
 824             }
 825             sfree(aid);
 826         }
 827         else if(nra>0)
 828         {
 829             /* Other groups */
 830             add_grp(gb,gn,nra,aid,p_typename[k]);
 831             sfree(aid);
 832             analyse_other(restype,atoms,gb,gn,bASK,bVerb);
 833         }
 834     }
 835
 836     sfree(p_typename);
 837     sfree(restype);
 838     gmx_residuetype_destroy(rt);
 839
 840     /* Create a merged water_and_ions group */
 841     iwater = -1;
 842     iion   = -1;
 843     nwater = 0;
 844     nion   = 0;
 845
 846     for(i=0;i<gb->nr;i++)
 847     {
 848         if(!gmx_strcasecmp((*gn)[i],"Water"))
 849         {
 850             iwater = i;
 851             nwater = gb->index[i+1]-gb->index[i];
 852         }
 853         else if(!gmx_strcasecmp((*gn)[i],"Ion"))
 854         {
 855             iion = i;
 856             nion = gb->index[i+1]-gb->index[i];
 857         }
 858     }
 859
 860     if(nwater>0 && nion>0)
 861     {
 862         srenew(gb->index,gb->nr+2);
 863         srenew(*gn,gb->nr+1);
 864         (*gn)[gb->nr] = strdup("Water_and_ions");
 865         srenew(gb->a,gb->nra+nwater+nion);
 866         if(nwater>0)
 867         {
 868             for(i=gb->index[iwater];i<gb->index[iwater+1];i++)
 869             {
 870                 gb->a[gb->nra++] = gb->a[i];
 871             }
 872         }
 873         if(nion>0)
 874         {
 875             for(i=gb->index[iion];i<gb->index[iion+1];i++)
 876             {
 877                 gb->a[gb->nra++] = gb->a[i];
 878             }
 879         }
 880         gb->nr++;
 881         gb->index[gb->nr]=gb->nra;
 882     }
 883 }
 884
 885
 886 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 887 {
 888   int i;
 889
 890   for(i=0; i<n; i++)
 891     if (index[i] >= natoms)
 892       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 893                   gname ? gname : "Index",i+1, index[i]+1,
 894                   traj ? traj : "the trajectory",natoms);
 895     else if (index[i] < 0)
 896       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 897                 gname ? gname : "Index",i+1, index[i]+1);
 898 }
 899
 900 t_blocka *init_index(const char *gfile, char ***grpname)
 901 {
 902   FILE     *in;
 903   t_blocka  *b;
 904   int      a,maxentries;
 905   int      i,j,ng,nread;
 906   char     line[STRLEN],*pt,str[STRLEN];
 907
 908   in=gmx_fio_fopen(gfile,"r");
 909   snew(b,1);
 910   get_a_line(in,line,STRLEN);
 911   if ( line[0]=='[' ) {
 912     /* new format */
 913     b->nr=0;
 914     b->index=NULL;
 915     b->nra=0;
 916     b->a=NULL;
 917     *grpname=NULL;
 918     maxentries=0;
 919     do {
 920       if (get_header(line,str)) {
 921         b->nr++;
 922         srenew(b->index,b->nr+1);
 923         srenew(*grpname,b->nr);
 924         if (b->nr==1)
 925           b->index[0]=0;
 926         b->index[b->nr]=b->index[b->nr-1];
 927         (*grpname)[b->nr-1]=strdup(str);
 928       } else {
 929           if (b->nr==0)
 930           {
 931               gmx_fatal(FARGS,"The first header of your indexfile is invalid");
 932           }
 933         pt=line;
 934         while (sscanf(pt,"%s",str) == 1) {
 935           i=b->index[b->nr];
 936           if (i>=maxentries) {
 937             maxentries+=1024;
 938             srenew(b->a,maxentries);
 939           }
 940           b->a[i]=strtol(str, NULL, 10)-1;
 941           b->index[b->nr]++;
 942           (b->nra)++;
 943           pt=strstr(pt,str)+strlen(str);
 944         }
 945       }
 946     } while (get_a_line(in,line,STRLEN));
 947   }
 948   else {
 949     /* old format */
 950     sscanf(line,"%d%d",&b->nr,&b->nra);
 951     snew(b->index,b->nr+1);
 952     snew(*grpname,b->nr);
 953     b->index[0]=0;
 954     snew(b->a,b->nra);
 955     for (i=0; (i<b->nr); i++) {
 956       nread=fscanf(in,"%s%d",str,&ng);
 957       (*grpname)[i]=strdup(str);
 958       b->index[i+1]=b->index[i]+ng;
 959       if (b->index[i+1] > b->nra)
 960         gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 961       for(j=0; (j<ng); j++) {
 962         nread=fscanf(in,"%d",&a);
 963         b->a[b->index[i]+j]=a;
 964       }
 965     }
 966   }
 967   gmx_fio_fclose(in);
 968
 969   for(i=0; (i<b->nr); i++) {
 970     for(j=b->index[i]; (j<b->index[i+1]); j++) {
 971       if (b->a[j] < 0)
 972         fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 973                 b->a[j],(*grpname)[i]);
 974     }
 975   }
 976
 977   return b;
 978 }
 979
 980 static void minstring(char *str)
 981 {
 982   int i;
 983
 984   for (i=0; (i < (int)strlen(str)); i++)
 985     if (str[i]=='-')
 986       str[i]='_';
 987 }
 988
 989 int find_group(char s[], int ngrps, char **grpname)
 990 {
 991   int aa, i, n;
 992   char string[STRLEN];
 993   gmx_bool bMultiple;
 994
 995   bMultiple = FALSE;
 996   n = strlen(s);
 997   aa=NOTSET;
 998   /* first look for whole name match */
 999   if (aa==NOTSET)
1000     for(i=0; i<ngrps; i++)
1001       if (gmx_strcasecmp_min(s,grpname[i])==0) {
1002         if(aa!=NOTSET)
1003           bMultiple = TRUE;
1004         aa=i;
1005       }
1006   /* second look for first string match */
1007   if (aa==NOTSET)
1008     for(i=0; i<ngrps; i++)
1009       if (gmx_strncasecmp_min(s,grpname[i],n)==0) {
1010         if(aa!=NOTSET)
1011           bMultiple = TRUE;
1012         aa=i;
1013       }
1014   /* last look for arbitrary substring match */
1015   if (aa==NOTSET) {
1016     upstring(s);
1017     minstring(s);
1018     for(i=0; i<ngrps; i++) {
1019       strcpy(string, grpname[i]);
1020       upstring(string);
1021       minstring(string);
1022       if (strstr(string,s)!=NULL) {
1023         if(aa!=NOTSET)
1024           bMultiple = TRUE;
1025         aa=i;
1026       }
1027     }
1028   }
1029   if (bMultiple) {
1030     printf("Error: Multiple groups '%s' selected\n", s);
1031     aa=NOTSET;
1032   }
1033   return aa;
1034 }
1035
1036 static int qgroup(int *a, int ngrps, char **grpname)
1037 {
1038     char s[STRLEN];
1039     int  aa;
1040     gmx_bool bInRange;
1041     char *end;
1042
1043     do {
1044         fprintf(stderr,"Select a group: ");
1045         do {
1046             if ( scanf("%s",s)!=1 )
1047                 gmx_fatal(FARGS,"Cannot read from input");
1048             trim(s); /* remove spaces */
1049         } while (strlen(s)==0);
1050         aa = strtol(s, &end, 10);
1051         if (aa==0 && end[0] != '\0') /* string entered */
1052             aa = find_group(s, ngrps, grpname);
1053         bInRange = (aa >= 0 && aa < ngrps);
1054         if (!bInRange)
1055             printf("Error: No such group '%s'\n", s);
1056     } while (!bInRange);
1057     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1058     *a = aa;
1059     return aa;
1060 }
1061
1062 static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
1063                       int ngrps,int isize[],atom_id *index[],int grpnr[])
1064 {
1065   int i,j,gnr1;
1066
1067   if (grps->nr==0)
1068     gmx_fatal(FARGS,"Error: no groups in indexfile");
1069   for(i=0; (i<grps->nr); i++)
1070     fprintf(stderr,"Group %5d (%15s) has %5d elements\n",i,grpname[i],
1071            grps->index[i+1]-grps->index[i]);
1072   for(i=0; (i<ngrps); i++) {
1073     if (grps->nr > 1)
1074       do {
1075         gnr1=qgroup(&grpnr[i], grps->nr, grpname);
1076         if ((gnr1<0) || (gnr1>=grps->nr))
1077           fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
1078       } while ((gnr1<0) || (gnr1>=grps->nr));
1079     else {
1080       fprintf(stderr,"There is one group in the index\n");
1081       gnr1=0;
1082     }
1083     gnames[i]=strdup(grpname[gnr1]);
1084     isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
1085     snew(index[i],isize[i]);
1086     for(j=0; (j<isize[i]); j++)
1087       index[i][j]=grps->a[grps->index[gnr1]+j];
1088   }
1089 }
1090
1091 void rd_index(const char *statfile,int ngrps,int isize[],
1092               atom_id *index[],char *grpnames[])
1093 {
1094   char    **gnames;
1095   t_blocka *grps;
1096   int     *grpnr;
1097
1098   snew(grpnr,ngrps);
1099   if (!statfile)
1100     gmx_fatal(FARGS,"No index file specified");
1101   grps=init_index(statfile,&gnames);
1102   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1103 }
1104
1105 void rd_index_nrs(char *statfile,int ngrps,int isize[],
1106                   atom_id *index[],char *grpnames[],int grpnr[])
1107 {
1108   char    **gnames;
1109   t_blocka *grps;
1110
1111   if (!statfile)
1112     gmx_fatal(FARGS,"No index file specified");
1113   grps=init_index(statfile,&gnames);
1114
1115   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1116 }
1117
1118 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1119                int isize[], atom_id *index[],char *grpnames[])
1120 {
1121   char    ***gnames;
1122   t_blocka *grps = NULL;
1123   int     *grpnr;
1124
1125   snew(grpnr,ngrps);
1126   snew(gnames,1);
1127   if (fnm != NULL) {
1128     grps=init_index(fnm,gnames);
1129   }
1130   else if (atoms) {
1131     snew(grps,1);
1132     snew(grps->index,1);
1133     analyse(atoms,grps,gnames,FALSE,FALSE);
1134   }
1135   else
1136     gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1137
1138   rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
1139 }
1140
1141 t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
1142 {
1143   t_cluster_ndx *c;
1144   int i;
1145
1146   snew(c,1);
1147   c->clust     = init_index(ndx,&c->grpname);
1148   c->maxframe = -1;
1149   for(i=0; (i<c->clust->nra); i++)
1150     c->maxframe = max(c->maxframe,c->clust->a[i]);
1151   fprintf(fplog ? fplog : stdout,
1152           "There are %d clusters containing %d structures, highest framenr is %d\n",
1153           c->clust->nr,c->clust->nra,c->maxframe);
1154   if (debug) {
1155     pr_blocka(debug,0,"clust",c->clust,TRUE);
1156     for(i=0; (i<c->clust->nra); i++)
1157       if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1158         gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
1159                   "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
1160   }
1161   c->inv_clust=make_invblocka(c->clust,c->maxframe);
1162
1163   return c;
1164 }
1165