src/gmxlib/index.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROningen Mixture of Alchemy and Childrens' Stories
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <ctype.h>
  40 #include <string.h>
  41 #include "sysstuff.h"
  42 #include "strdb.h"
  43 #include "futil.h"
  44 #include "macros.h"
  45 #include "names.h"
  46 #include "string2.h"
  47 #include "statutil.h"
  48 #include "confio.h"
  49 #include "main.h"
  50 #include "copyrite.h"
  51 #include "typedefs.h"
  52 #include "smalloc.h"
  53 #include "invblock.h"
  54 #include "macros.h"
  55 #include "index.h"
  56 #include "txtdump.h"
  57 #include "gmxfio.h"
  58
  59
  60
  61 const char gmx_residuetype_undefined[]="Other";
  62
  63 struct gmx_residuetype
  64 {
  65     int      n;
  66     char **  resname;
  67     char **  restype;
  68
  69 };
  70
  71
  72 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  73 {
  74   char c;
  75
  76   if (bASK) {
  77     do {
  78       c=toupper(fgetc(stdin));
  79     } while ((c != 'Y') && (c != 'N'));
  80
  81     return (c == 'Y');
  82   }
  83   else
  84     return FALSE;
  85 }
  86
  87 t_blocka *new_blocka(void)
  88 {
  89   t_blocka *block;
  90
  91   snew(block,1);
  92   snew(block->index,1);
  93
  94   return block;
  95 }
  96
  97 void write_index(const char *outf, t_blocka *b,char **gnames)
  98 {
  99   FILE *out;
 100   int  i,j,k;
 101
 102   out=gmx_fio_fopen(outf,"w");
 103   /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 104   for(i=0; (i<b->nr); i++) {
 105     fprintf(out,"[ %s ]\n",gnames[i]);
 106     for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 107       fprintf(out,"%4d ",b->a[j]+1);
 108       if ((k % 15) == 14)
 109         fprintf(out,"\n");
 110     }
 111     fprintf(out,"\n");
 112   }
 113   gmx_fio_fclose(out);
 114 }
 115
 116 void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 117 {
 118   int i;
 119
 120   srenew(b->index,b->nr+2);
 121   srenew(*gnames,b->nr+1);
 122   (*gnames)[b->nr]=strdup(name);
 123
 124   srenew(b->a,b->nra+nra);
 125   for(i=0; (i<nra); i++)
 126     b->a[b->nra++]=a[i];
 127   b->nr++;
 128   b->index[b->nr]=b->nra;
 129 }
 130
 131 /* compare index in `a' with group in `b' at `index',
 132    when `index'<0 it is relative to end of `b' */
 133 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 134 {
 135   int i;
 136
 137   if (index < 0)
 138     index = b->nr-1+index;
 139   if (index >= b->nr)
 140     gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 141   /* compare sizes */
 142   if ( nra != b->index[index+1] - b->index[index] )
 143     return FALSE;
 144   for(i=0; i<nra; i++)
 145     if ( a[i] != b->a[b->index[index]+i] )
 146       return FALSE;
 147   return TRUE;
 148 }
 149
 150 static void
 151 p_status(const char **restype, int nres, const char **typenames, int ntypes, gmx_bool bVerb)
 152 {
 153     int i,j;
 154     int found;
 155
 156     int * counter;
 157
 158     snew(counter,ntypes);
 159     for(i=0;i<ntypes;i++)
 160     {
 161         counter[i]=0;
 162     }
 163     for(i=0;i<nres;i++)
 164     {
 165         found=0;
 166         for(j=0;j<ntypes;j++)
 167         {
 168             if(!gmx_strcasecmp(restype[i],typenames[j]))
 169             {
 170                 counter[j]++;
 171             }
 172         }
 173     }
 174
 175     if (bVerb)
 176     {
 177         for(i=0; (i<ntypes); i++)
 178         {
 179             if(counter[i]>0)
 180             {
 181                 printf("There are: %5d %10s residues\n",counter[i],typenames[i]);
 182             }
 183         }
 184     }
 185 }
 186
 187
 188 atom_id *
 189 mk_aid(t_atoms *atoms,const char ** restype,const char * typestring,int *nra,gmx_bool bMatch)
 190 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 191 {
 192     atom_id *a;
 193     int     i;
 194     int     res;
 195
 196     snew(a,atoms->nr);
 197     *nra=0;
 198     for(i=0; (i<atoms->nr); i++)
 199     {
 200         res=!gmx_strcasecmp(restype[atoms->atom[i].resind],typestring);
 201         if(bMatch==FALSE)
 202         {
 203             res=!res;
 204         }
 205         if(res)
 206         {
 207             a[(*nra)++]=i;
 208         }
 209     }
 210
 211     return a;
 212 }
 213
 214 typedef struct {
 215   char *rname;
 216   gmx_bool bNeg;
 217   char *gname;
 218 } restp_t;
 219
 220 static void analyse_other(const char ** restype,t_atoms *atoms,
 221                           t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 222 {
 223   restp_t *restp=NULL;
 224   char **attp=NULL;
 225   char *rname,*aname;
 226   atom_id *other_ndx,*aid,*aaid;
 227   int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 228
 229     for(i=0; (i<atoms->nres); i++)
 230     {
 231         if (gmx_strcasecmp(restype[i],"Protein") && gmx_strcasecmp(restype[i],"DNA") && gmx_strcasecmp(restype[i],"RNA") && gmx_strcasecmp(restype[i],"Water"))
 232         {
 233             break;
 234         }
 235     }
 236   if (i < atoms->nres) {
 237     /* we have others */
 238     if (bVerb)
 239       printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 240     snew(other_ndx,atoms->nr);
 241     for(k=0; (k<atoms->nr); k++) {
 242       resind = atoms->atom[k].resind;
 243       rname = *atoms->resinfo[resind].name;
 244         if (gmx_strcasecmp(restype[resind],"Protein") && gmx_strcasecmp(restype[resind],"DNA") &&
 245             gmx_strcasecmp(restype[resind],"RNA") && gmx_strcasecmp(restype[resind],"Water"))
 246         {
 247
 248         for(l=0; (l<nrestp); l++)
 249           if (strcmp(restp[l].rname,rname) == 0)
 250             break;
 251         if (l==nrestp) {
 252           srenew(restp,nrestp+1);
 253           restp[nrestp].rname = strdup(rname);
 254           restp[nrestp].bNeg  = FALSE;
 255           restp[nrestp].gname = strdup(rname);
 256           nrestp++;
 257
 258     }
 259       }
 260     }
 261     for(i=0; (i<nrestp); i++) {
 262       snew(aid,atoms->nr);
 263       naid=0;
 264       for(j=0; (j<atoms->nr); j++) {
 265         rname = *atoms->resinfo[atoms->atom[j].resind].name;
 266         if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 267             (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 268           aid[naid++] = j;
 269         }
 270       }
 271       add_grp(gb,gn,naid,aid,restp[i].gname);
 272       if (bASK) {
 273         printf("split %s into atoms (y/n) ? ",restp[i].gname);
 274         fflush(stdout);
 275         if (gmx_ask_yesno(bASK)) {
 276           natp=0;
 277           for(k=0; (k<naid); k++) {
 278             aname=*atoms->atomname[aid[k]];
 279             for(l=0; (l<natp); l++)
 280               if (strcmp(aname,attp[l]) == 0)
 281                 break;
 282             if (l == natp) {
 283               srenew(attp,++natp);
 284               attp[natp-1]=aname;
 285             }
 286           }
 287           if (natp > 1) {
 288             for(l=0; (l<natp); l++) {
 289               snew(aaid,naid);
 290               naaid=0;
 291               for(k=0; (k<naid); k++) {
 292                 aname=*atoms->atomname[aid[k]];
 293                 if (strcmp(aname,attp[l])==0)
 294                   aaid[naaid++]=aid[k];
 295               }
 296               add_grp(gb,gn,naaid,aaid,attp[l]);
 297               sfree(aaid);
 298             }
 299           }
 300           sfree(attp);
 301           attp=NULL;
 302         }
 303         sfree(aid);
 304       }
 305     }
 306     sfree(other_ndx);
 307   }
 308 }
 309
 310 static void analyse_prot(const char ** restype,t_atoms *atoms,
 311                          t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 312 {
 313   /* atomnames to be used in constructing index groups: */
 314   static const char *pnoh[]    = { "H" };
 315   static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 316                              "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 317   static const char *calpha[]  = { "CA" };
 318   static const char *bb[]      = { "N","CA","C" };
 319   static const char *mc[]      = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT" };
 320   static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OC1","OC2","OT","OXT" };
 321   static const char *mch[]     = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT",
 322                              "H1","H2","H3","H" };
 323   /* array of arrays of atomnames: */
 324   static const char **chains[] = { NULL,pnoh,calpha,bb,mc,mcb,mch,mch,mch,pnodum };
 325 #define NCH asize(chains)
 326   /* array of sizes of arrays of atomnames: */
 327   const int       sizes[NCH] = {
 328     0, asize(pnoh), asize(calpha), asize(bb),
 329     asize(mc), asize(mcb), asize(mch), asize(mch), asize(mch), asize(pnodum)
 330   };
 331   /* descriptive names of index groups */
 332   const char   *ch_name[NCH] = {
 333     "Protein", "Protein-H", "C-alpha", "Backbone",
 334     "MainChain", "MainChain+Cb", "MainChain+H", "SideChain", "SideChain-H",
 335     "Prot-Masses"
 336   };
 337   /* construct index group containing (TRUE) or excluding (FALSE)
 338      given atom names */
 339   const gmx_bool complement[NCH] = {
 340     TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE
 341   };
 342   const int  wholename[NCH]  = { -1, 0,-1,-1,-1,-1,-1,-1, 11,-1 };
 343   /* the index in wholename gives the first item in the arrays of
 344    * atomtypes that should be tested with 'gmx_strncasecmp' in stead of
 345    * gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 346    * This is comparable to using a '*' wildcard at the end of specific
 347    * atom names, but that is more involved to implement...
 348    */
 349   /* only add index group if it differs from the specified one,
 350      specify -1 to always add group */
 351   const int compareto[NCH] = { -1,-1,-1,-1,-1,-1,-1,-1,-1, 0 };
 352
 353   int     n,j;
 354   atom_id *aid;
 355   int     nra,nnpres,npres;
 356   gmx_bool    match;
 357   char    ndx_name[STRLEN],*atnm;
 358   int i;
 359
 360   if (bVerb)
 361   {
 362     printf("Analysing Protein...\n");
 363   }
 364   snew(aid,atoms->nr);
 365
 366   /* calculate the number of protein residues */
 367   npres=0;
 368   for(i=0; (i<atoms->nres); i++)
 369   if (!gmx_strcasecmp(restype[i],"Protein"))
 370   {
 371       npres++;
 372   }
 373   /* find matching or complement atoms */
 374   for(i=0; (i<(int)NCH); i++) {
 375     nra=0;
 376     for(n=0; (n<atoms->nr); n++) {
 377         if (!gmx_strcasecmp(restype[atoms->atom[n].resind],"Protein")) {
 378
 379         match=FALSE;
 380         for(j=0; (j<sizes[i]); j++) {
 381           /* skip digits at beginning of atomname, e.g. 1H */
 382           atnm=*atoms->atomname[n];
 383           while (isdigit(atnm[0]))
 384             atnm++;
 385           if ( (wholename[i]==-1) || (j<wholename[i]) ) {
 386             if (gmx_strcasecmp(chains[i][j],atnm) == 0)
 387               match=TRUE;
 388           } else {
 389             if (gmx_strncasecmp(chains[i][j],atnm,strlen(chains[i][j])) == 0)
 390               match=TRUE;
 391           }
 392         }
 393         if (match != complement[i])
 394           aid[nra++]=n;
 395       }
 396     }
 397     /* if we want to add this group always or it differs from previous
 398        group, add it: */
 399     if ( compareto[i] == -1 || !grp_cmp(gb,nra,aid,compareto[i]-i) )
 400       add_grp(gb,gn,nra,aid,ch_name[i]);
 401   }
 402
 403   if (bASK) {
 404     for(i=0; (i<(int)NCH); i++) {
 405       printf("Split %12s into %5d residues (y/n) ? ",ch_name[i],npres);
 406       if (gmx_ask_yesno(bASK)) {
 407         int resind;
 408         nra = 0;
 409         for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 410           resind = atoms->atom[n].resind;
 411           for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 412             match=FALSE;
 413             for(j=0;(j<sizes[i]); j++)
 414               if (gmx_strcasecmp(chains[i][j],*atoms->atomname[n]) == 0)
 415                 match=TRUE;
 416             if (match != complement[i])
 417               aid[nra++]=n;
 418           }
 419           /* copy the residuename to the tail of the groupname */
 420           if (nra > 0) {
 421             t_resinfo *ri;
 422             ri = &atoms->resinfo[resind];
 423             sprintf(ndx_name,"%s_%s%d%c",
 424                     ch_name[i],*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 425             add_grp(gb,gn,nra,aid,ndx_name);
 426             nra = 0;
 427           }
 428         }
 429       }
 430     }
 431     printf("Make group with sidechain and C=O swapped (y/n) ? ");
 432     if (gmx_ask_yesno(bASK)) {
 433       /* Make swap sidechain C=O index */
 434       int resind,hold;
 435       nra = 0;
 436       for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 437         resind = atoms->atom[n].resind;
 438         hold  = -1;
 439         for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 440           if (strcmp("CA",*atoms->atomname[n]) == 0) {
 441             aid[nra++]=n;
 442             hold=nra;
 443             nra+=2;
 444           } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 445             if (hold == -1)
 446               gmx_incons("Atom naming problem");
 447             aid[hold]=n;
 448           } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 449             if (hold == -1)
 450               gmx_incons("Atom naming problem");
 451             aid[hold+1]=n;
 452           } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 453             if (hold == -1)
 454               gmx_incons("Atom naming problem");
 455             aid[hold+1]=n;
 456           } else
 457             aid[nra++]=n;
 458       }
 459       /* copy the residuename to the tail of the groupname */
 460       if (nra > 0) {
 461         add_grp(gb,gn,nra,aid,"SwapSC-CO");
 462         nra = 0;
 463       }
 464     }
 465   }
 466   sfree(aid);
 467 }
 468
 469
 470
 471
 472 /* Return 0 if the name was found, otherwise -1.
 473  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 474  */
 475 int
 476 gmx_residuetype_get_type(gmx_residuetype_t rt,const char * resname, const char ** p_restype)
 477 {
 478     int    i,rc;
 479
 480     rc=-1;
 481     for(i=0;i<rt->n && rc;i++)
 482     {
 483         rc=gmx_strcasecmp(rt->resname[i],resname);
 484     }
 485
 486     *p_restype = (rc==0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 487
 488     return rc;
 489 }
 490
 491 int
 492 gmx_residuetype_add(gmx_residuetype_t rt,const char *newresname, const char *newrestype)
 493 {
 494     int     i;
 495     int     found;
 496     const char *  p_oldtype;
 497
 498     found = !gmx_residuetype_get_type(rt,newresname,&p_oldtype);
 499
 500     if(found && gmx_strcasecmp(p_oldtype,newrestype))
 501     {
 502         fprintf(stderr,"Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 503                 newresname,p_oldtype,newrestype);
 504     }
 505
 506     if(found==0)
 507     {
 508         srenew(rt->resname,rt->n+1);
 509         srenew(rt->restype,rt->n+1);
 510         rt->resname[rt->n]=strdup(newresname);
 511         rt->restype[rt->n]=strdup(newrestype);
 512         rt->n++;
 513     }
 514
 515     return 0;
 516 }
 517
 518
 519 int
 520 gmx_residuetype_init(gmx_residuetype_t *prt)
 521 {
 522     FILE *  db;
 523     char    line[STRLEN];
 524     char    resname[STRLEN],restype[STRLEN],dum[STRLEN];
 525     char *  p;
 526     int     i;
 527     struct gmx_residuetype *rt;
 528
 529     snew(rt,1);
 530     *prt=rt;
 531
 532     rt->n        = 0;
 533     rt->resname  = NULL;
 534     rt->restype = NULL;
 535
 536     db=libopen("residuetypes.dat");
 537
 538     while(get_a_line(db,line,STRLEN))
 539     {
 540         strip_comment(line);
 541         trim(line);
 542         if(line[0]!='\0')
 543         {
 544             if(sscanf(line,"%s %s %s",resname,restype,dum)!=2)
 545             {
 546                 gmx_fatal(FARGS,"Incorrect number of columns (2 expected) for line in residuetypes.dat");
 547             }
 548             gmx_residuetype_add(rt,resname,restype);
 549         }
 550     }
 551
 552     fclose(db);
 553
 554     return 0;
 555 }
 556
 557
 558
 559 int
 560 gmx_residuetype_destroy(gmx_residuetype_t rt)
 561 {
 562     int i;
 563
 564     for(i=0;i<rt->n;i++)
 565     {
 566         free(rt->resname[i]);
 567         free(rt->restype[i]);
 568     }
 569     free(rt);
 570
 571     return 0;
 572 }
 573
 574 int
 575 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 576                              const char ***       p_typenames,
 577                              int *                ntypes)
 578 {
 579     int      i,j,n;
 580     int      found;
 581     const char **  my_typename;
 582     char *   p;
 583
 584     n=0;
 585
 586     my_typename=NULL;
 587     for(i=0;i<rt->n;i++)
 588     {
 589         p=rt->restype[i];
 590         found=0;
 591         for(j=0;j<n && !found;j++)
 592         {
 593             found=!gmx_strcasecmp(p,my_typename[j]);
 594         }
 595
 596         if(!found)
 597         {
 598             srenew(my_typename,n+1);
 599             my_typename[n]=p;
 600             n++;
 601         }
 602     }
 603     *ntypes=n;
 604     *p_typenames=my_typename;
 605
 606     return 0;
 607 }
 608
 609
 610
 611 gmx_bool
 612 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 613 {
 614     gmx_bool rc;
 615     const char *p_type;
 616
 617     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 618        gmx_strcasecmp(p_type,"Protein")==0)
 619     {
 620         rc=TRUE;
 621     }
 622     else
 623     {
 624         rc=FALSE;
 625     }
 626     return rc;
 627 }
 628
 629 gmx_bool
 630 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 631 {
 632     gmx_bool rc;
 633     const char *p_type;
 634
 635     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 636        gmx_strcasecmp(p_type,"DNA")==0)
 637     {
 638         rc=TRUE;
 639     }
 640     else
 641     {
 642         rc=FALSE;
 643     }
 644     return rc;
 645 }
 646
 647 gmx_bool
 648 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 649 {
 650     gmx_bool rc;
 651     const char *p_type;
 652
 653     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 654        gmx_strcasecmp(p_type,"RNA")==0)
 655     {
 656         rc=TRUE;
 657     }
 658     else
 659     {
 660         rc=FALSE;
 661     }
 662     return rc;
 663 }
 664
 665 /* Return the size of the arrays */
 666 int
 667 gmx_residuetype_get_size(gmx_residuetype_t rt)
 668 {
 669     return rt->n;
 670 }
 671
 672 /* Search for a residuetype with name resnm within the
 673  * gmx_residuetype database. Return the index if found,
 674  * otherwise -1.
 675  */
 676 int
 677 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 678 {
 679     int i,rc;
 680
 681     rc=-1;
 682     for(i=0;i<rt->n && rc;i++)
 683     {
 684         rc=gmx_strcasecmp(rt->resname[i],resnm);
 685     }
 686
 687     return (0 == rc) ? i-1 : -1;
 688 }
 689
 690 /* Return the name of the residuetype with the given index, or
 691  * NULL if not found. */
 692 const char *
 693 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 694 {
 695   if(index >= 0 && index < rt->n) {
 696     return rt->resname[index];
 697   } else {
 698     return NULL;
 699   }
 700 }
 701
 702
 703
 704 void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 705 {
 706     gmx_residuetype_t rt;
 707     char    *resnm;
 708     atom_id *aid;
 709     const char **  restype;
 710     int     nra;
 711     int     i,k;
 712     size_t  j;
 713     int     ntypes;
 714     char *  p;
 715     const char ** p_typename;
 716     int     iwater,iion;
 717     int     nwater,nion;
 718     int     found;
 719
 720     if (bVerb)
 721     {
 722         printf("Analysing residue names:\n");
 723     }
 724     /* Create system group, every single atom */
 725     snew(aid,atoms->nr);
 726     for(i=0;i<atoms->nr;i++)
 727     {
 728         aid[i]=i;
 729     }
 730     add_grp(gb,gn,atoms->nr,aid,"System");
 731     sfree(aid);
 732
 733     /* For every residue, get a pointer to the residue type name */
 734     gmx_residuetype_init(&rt);
 735
 736     snew(restype,atoms->nres);
 737     ntypes = 0;
 738     p_typename = NULL;
 739     for(i=0;i<atoms->nres;i++)
 740     {
 741         resnm = *atoms->resinfo[i].name;
 742         gmx_residuetype_get_type(rt,resnm,&(restype[i]));
 743
 744         if(i==0)
 745         {
 746             snew(p_typename,1);
 747             p_typename[ntypes++] = strdup(restype[i]);
 748         }
 749         else
 750         {
 751             /* Note that this does not lead to a N*N loop, but N*K, where
 752              * K is the number of residue _types_, which is small and independent of N.
 753              */
 754             found = 0;
 755             for(k=0;k<i && !found;k++)
 756             {
 757                 found = !strcmp(restype[i],restype[k]);
 758             }
 759             if(!found)
 760             {
 761                 srenew(p_typename,ntypes+1);
 762                 p_typename[ntypes++] = strdup(restype[i]);
 763             }
 764         }
 765     }
 766
 767     p_status(restype,atoms->nres,p_typename,ntypes,bVerb);
 768
 769     for(k=0;k<ntypes;k++)
 770     {
 771         aid=mk_aid(atoms,restype,p_typename[k],&nra,TRUE);
 772
 773         /* Check for special types to do fancy stuff with */
 774
 775         if(!gmx_strcasecmp(p_typename[k],"Protein") && nra>0)
 776         {
 777             sfree(aid);
 778             /* PROTEIN */
 779             analyse_prot(restype,atoms,gb,gn,bASK,bVerb);
 780
 781             /* Create a Non-Protein group */
 782             aid=mk_aid(atoms,restype,"Protein",&nra,FALSE);
 783             if ((nra > 0) && (nra < atoms->nr))
 784             {
 785                 add_grp(gb,gn,nra,aid,"non-Protein");
 786             }
 787             sfree(aid);
 788         }
 789         else if(!gmx_strcasecmp(p_typename[k],"Water") && nra>0)
 790         {
 791             add_grp(gb,gn,nra,aid,p_typename[k]);
 792             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 793             add_grp(gb,gn,nra,aid,"SOL");
 794
 795             sfree(aid);
 796
 797             /* Solvent, create a negated group too */
 798             aid=mk_aid(atoms,restype,"Water",&nra,FALSE);
 799             if ((nra > 0) && (nra < atoms->nr))
 800             {
 801                 add_grp(gb,gn,nra,aid,"non-Water");
 802             }
 803             sfree(aid);
 804         }
 805         else if(nra>0)
 806         {
 807             /* Other groups */
 808             add_grp(gb,gn,nra,aid,p_typename[k]);
 809             sfree(aid);
 810             analyse_other(restype,atoms,gb,gn,bASK,bVerb);
 811         }
 812     }
 813
 814     sfree(p_typename);
 815     sfree(restype);
 816     gmx_residuetype_destroy(rt);
 817
 818     /* Create a merged water_and_ions group */
 819     iwater = -1;
 820     iion   = -1;
 821     nwater = 0;
 822     nion   = 0;
 823
 824     for(i=0;i<gb->nr;i++)
 825     {
 826         if(!gmx_strcasecmp((*gn)[i],"Water"))
 827         {
 828             iwater = i;
 829             nwater = gb->index[i+1]-gb->index[i];
 830         }
 831         else if(!gmx_strcasecmp((*gn)[i],"Ion"))
 832         {
 833             iion = i;
 834             nion = gb->index[i+1]-gb->index[i];
 835         }
 836     }
 837
 838     if(nwater>0 && nion>0)
 839     {
 840         srenew(gb->index,gb->nr+2);
 841         srenew(*gn,gb->nr+1);
 842         (*gn)[gb->nr] = strdup("Water_and_ions");
 843         srenew(gb->a,gb->nra+nwater+nion);
 844         if(nwater>0)
 845         {
 846             for(i=gb->index[iwater];i<gb->index[iwater+1];i++)
 847             {
 848                 gb->a[gb->nra++] = gb->a[i];
 849             }
 850         }
 851         if(nion>0)
 852         {
 853             for(i=gb->index[iion];i<gb->index[iion+1];i++)
 854             {
 855                 gb->a[gb->nra++] = gb->a[i];
 856             }
 857         }
 858         gb->nr++;
 859         gb->index[gb->nr]=gb->nra;
 860     }
 861 }
 862
 863
 864 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 865 {
 866   int i;
 867
 868   for(i=0; i<n; i++)
 869     if (index[i] >= natoms)
 870       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 871                   gname ? gname : "Index",i+1, index[i]+1,
 872                   traj ? traj : "the trajectory",natoms);
 873     else if (index[i] < 0)
 874       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 875                 gname ? gname : "Index",i+1, index[i]+1);
 876 }
 877
 878 t_blocka *init_index(const char *gfile, char ***grpname)
 879 {
 880   FILE     *in;
 881   t_blocka  *b;
 882   int      a,maxentries;
 883   int      i,j,ng,nread;
 884   char     line[STRLEN],*pt,str[STRLEN];
 885
 886   in=gmx_fio_fopen(gfile,"r");
 887   snew(b,1);
 888   get_a_line(in,line,STRLEN);
 889   if ( line[0]=='[' ) {
 890     /* new format */
 891     b->nr=0;
 892     b->index=NULL;
 893     b->nra=0;
 894     b->a=NULL;
 895     *grpname=NULL;
 896     maxentries=0;
 897     do {
 898       if (get_header(line,str)) {
 899         b->nr++;
 900         srenew(b->index,b->nr+1);
 901         srenew(*grpname,b->nr);
 902         if (b->nr==1)
 903           b->index[0]=0;
 904         b->index[b->nr]=b->index[b->nr-1];
 905         (*grpname)[b->nr-1]=strdup(str);
 906       } else {
 907         pt=line;
 908         while ((i=sscanf(pt,"%s",str)) == 1) {
 909           i=b->index[b->nr];
 910           if (i>=maxentries) {
 911             maxentries+=1024;
 912             srenew(b->a,maxentries);
 913           }
 914           b->a[i]=strtol(str, NULL, 10)-1;
 915           b->index[b->nr]++;
 916           (b->nra)++;
 917           pt=strstr(pt,str)+strlen(str);
 918         }
 919       }
 920     } while (get_a_line(in,line,STRLEN));
 921   }
 922   else {
 923     /* old format */
 924     sscanf(line,"%d%d",&b->nr,&b->nra);
 925     snew(b->index,b->nr+1);
 926     snew(*grpname,b->nr);
 927     b->index[0]=0;
 928     snew(b->a,b->nra);
 929     for (i=0; (i<b->nr); i++) {
 930       nread=fscanf(in,"%s%d",str,&ng);
 931       (*grpname)[i]=strdup(str);
 932       b->index[i+1]=b->index[i]+ng;
 933       if (b->index[i+1] > b->nra)
 934         gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 935       for(j=0; (j<ng); j++) {
 936         nread=fscanf(in,"%d",&a);
 937         b->a[b->index[i]+j]=a;
 938       }
 939     }
 940   }
 941   gmx_fio_fclose(in);
 942
 943   for(i=0; (i<b->nr); i++) {
 944     for(j=b->index[i]; (j<b->index[i+1]); j++) {
 945       if (b->a[j] < 0)
 946         fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 947                 b->a[j],(*grpname)[i]);
 948     }
 949   }
 950
 951   return b;
 952 }
 953
 954 static void minstring(char *str)
 955 {
 956   int i;
 957
 958   for (i=0; (i < (int)strlen(str)); i++)
 959     if (str[i]=='-')
 960       str[i]='_';
 961 }
 962
 963 int find_group(char s[], int ngrps, char **grpname)
 964 {
 965   int aa, i, n;
 966   char string[STRLEN];
 967   gmx_bool bMultiple;
 968
 969   bMultiple = FALSE;
 970   n = strlen(s);
 971   aa=NOTSET;
 972   /* first look for whole name match */
 973   if (aa==NOTSET)
 974     for(i=0; i<ngrps; i++)
 975       if (gmx_strcasecmp_min(s,grpname[i])==0) {
 976         if(aa!=NOTSET)
 977           bMultiple = TRUE;
 978         aa=i;
 979       }
 980   /* second look for first string match */
 981   if (aa==NOTSET)
 982     for(i=0; i<ngrps; i++)
 983       if (gmx_strncasecmp_min(s,grpname[i],n)==0) {
 984         if(aa!=NOTSET)
 985           bMultiple = TRUE;
 986         aa=i;
 987       }
 988   /* last look for arbitrary substring match */
 989   if (aa==NOTSET) {
 990     upstring(s);
 991     minstring(s);
 992     for(i=0; i<ngrps; i++) {
 993       strcpy(string, grpname[i]);
 994       upstring(string);
 995       minstring(string);
 996       if (strstr(string,s)!=NULL) {
 997         if(aa!=NOTSET)
 998           bMultiple = TRUE;
 999         aa=i;
1000       }
1001     }
1002   }
1003   if (bMultiple) {
1004     printf("Error: Multiple groups '%s' selected\n", s);
1005     aa=NOTSET;
1006   }
1007   return aa;
1008 }
1009
1010 static int qgroup(int *a, int ngrps, char **grpname)
1011 {
1012     char s[STRLEN];
1013     int  aa;
1014     gmx_bool bInRange;
1015     char *end;
1016
1017     do {
1018         fprintf(stderr,"Select a group: ");
1019         do {
1020             if ( scanf("%s",s)!=1 )
1021                 gmx_fatal(FARGS,"Cannot read from input");
1022             trim(s); /* remove spaces */
1023         } while (strlen(s)==0);
1024         aa = strtol(s, &end, 10);
1025         if (aa==0 && end[0] != '\0') /* string entered */
1026             aa = find_group(s, ngrps, grpname);
1027         bInRange = (aa >= 0 && aa < ngrps);
1028         if (!bInRange)
1029             printf("Error: No such group '%s'\n", s);
1030     } while (!bInRange);
1031     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1032     *a = aa;
1033     return aa;
1034 }
1035
1036 static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
1037                       int ngrps,int isize[],atom_id *index[],int grpnr[])
1038 {
1039   int i,j,gnr1;
1040
1041   if (grps->nr==0)
1042     gmx_fatal(FARGS,"Error: no groups in indexfile");
1043   for(i=0; (i<grps->nr); i++)
1044     fprintf(stderr,"Group %5d (%15s) has %5d elements\n",i,grpname[i],
1045            grps->index[i+1]-grps->index[i]);
1046   for(i=0; (i<ngrps); i++) {
1047     if (grps->nr > 1)
1048       do {
1049         gnr1=qgroup(&grpnr[i], grps->nr, grpname);
1050         if ((gnr1<0) || (gnr1>=grps->nr))
1051           fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
1052       } while ((gnr1<0) || (gnr1>=grps->nr));
1053     else {
1054       fprintf(stderr,"There is one group in the index\n");
1055       gnr1=0;
1056     }
1057     gnames[i]=strdup(grpname[gnr1]);
1058     isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
1059     snew(index[i],isize[i]);
1060     for(j=0; (j<isize[i]); j++)
1061       index[i][j]=grps->a[grps->index[gnr1]+j];
1062   }
1063 }
1064
1065 void rd_index(const char *statfile,int ngrps,int isize[],
1066               atom_id *index[],char *grpnames[])
1067 {
1068   char    **gnames;
1069   t_blocka *grps;
1070   int     *grpnr;
1071
1072   snew(grpnr,ngrps);
1073   if (!statfile)
1074     gmx_fatal(FARGS,"No index file specified");
1075   grps=init_index(statfile,&gnames);
1076   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1077 }
1078
1079 void rd_index_nrs(char *statfile,int ngrps,int isize[],
1080                   atom_id *index[],char *grpnames[],int grpnr[])
1081 {
1082   char    **gnames;
1083   t_blocka *grps;
1084
1085   if (!statfile)
1086     gmx_fatal(FARGS,"No index file specified");
1087   grps=init_index(statfile,&gnames);
1088
1089   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1090 }
1091
1092 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1093                int isize[], atom_id *index[],char *grpnames[])
1094 {
1095   char    ***gnames;
1096   t_blocka *grps = NULL;
1097   int     *grpnr;
1098
1099   snew(grpnr,ngrps);
1100   snew(gnames,1);
1101   if (fnm != NULL) {
1102     grps=init_index(fnm,gnames);
1103   }
1104   else if (atoms) {
1105     snew(grps,1);
1106     snew(grps->index,1);
1107     analyse(atoms,grps,gnames,FALSE,FALSE);
1108   }
1109   else
1110     gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1111
1112   rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
1113 }
1114
1115 t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
1116 {
1117   t_cluster_ndx *c;
1118   int i;
1119
1120   snew(c,1);
1121   c->clust     = init_index(ndx,&c->grpname);
1122   c->maxframe = -1;
1123   for(i=0; (i<c->clust->nra); i++)
1124     c->maxframe = max(c->maxframe,c->clust->a[i]);
1125   fprintf(fplog ? fplog : stdout,
1126           "There are %d clusters containing %d structures, highest framenr is %d\n",
1127           c->clust->nr,c->clust->nra,c->maxframe);
1128   if (debug) {
1129     pr_blocka(debug,0,"clust",c->clust,TRUE);
1130     for(i=0; (i<c->clust->nra); i++)
1131       if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1132         gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
1133                   "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
1134   }
1135   c->inv_clust=make_invblocka(c->clust,c->maxframe);
1136
1137   return c;
1138 }
1139