src/gmxlib/index.c

   1 /*
   2  *
   3  *                This source code is part of
   4  *
   5  *                 G   R   O   M   A   C   S
   6  *
   7  *          GROningen MAchine for Chemical Simulations
   8  *
   9  *                        VERSION 3.2.0
  10  * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
  11  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
  12  * Copyright (c) 2001-2004, The GROMACS development team,
  13  * check out http://www.gromacs.org for more information.
  14
  15  * This program is free software; you can redistribute it and/or
  16  * modify it under the terms of the GNU General Public License
  17  * as published by the Free Software Foundation; either version 2
  18  * of the License, or (at your option) any later version.
  19  *
  20  * If you want to redistribute modifications, please consider that
  21  * scientific software is very special. Version control is crucial -
  22  * bugs must be traceable. We will be happy to consider code for
  23  * inclusion in the official distribution, but derived work must not
  24  * be called official GROMACS. Details are found in the README & COPYING
  25  * files - if they are missing, get the official version at www.gromacs.org.
  26  *
  27  * To help us fund GROMACS development, we humbly ask that you cite
  28  * the papers on the package - you can find them in the top README file.
  29  *
  30  * For more info, check our website at http://www.gromacs.org
  31  *
  32  * And Hey:
  33  * GROningen Mixture of Alchemy and Childrens' Stories
  34  */
  35 #ifdef HAVE_CONFIG_H
  36 #include <config.h>
  37 #endif
  38
  39 #include <ctype.h>
  40 #include <string.h>
  41 #include "sysstuff.h"
  42 #include "strdb.h"
  43 #include "futil.h"
  44 #include "macros.h"
  45 #include "names.h"
  46 #include "string2.h"
  47 #include "statutil.h"
  48 #include "confio.h"
  49 #include "main.h"
  50 #include "copyrite.h"
  51 #include "typedefs.h"
  52 #include "smalloc.h"
  53 #include "invblock.h"
  54 #include "macros.h"
  55 #include "index.h"
  56 #include "txtdump.h"
  57 #include "gmxfio.h"
  58
  59
  60
  61 const char gmx_residuetype_undefined[]="Other";
  62
  63 struct gmx_residuetype
  64 {
  65     int      n;
  66     char **  resname;
  67     char **  restype;
  68
  69 };
  70
  71
  72 static gmx_bool gmx_ask_yesno(gmx_bool bASK)
  73 {
  74   char c;
  75
  76   if (bASK) {
  77     do {
  78       c=toupper(fgetc(stdin));
  79     } while ((c != 'Y') && (c != 'N'));
  80
  81     return (c == 'Y');
  82   }
  83   else
  84     return FALSE;
  85 }
  86
  87 t_blocka *new_blocka(void)
  88 {
  89   t_blocka *block;
  90
  91   snew(block,1);
  92   snew(block->index,1);
  93
  94   return block;
  95 }
  96
  97 void write_index(const char *outf, t_blocka *b,char **gnames)
  98 {
  99   FILE *out;
 100   int  i,j,k;
 101
 102   out=gmx_fio_fopen(outf,"w");
 103   /* fprintf(out,"%5d  %5d\n",b->nr,b->nra); */
 104   for(i=0; (i<b->nr); i++) {
 105     fprintf(out,"[ %s ]\n",gnames[i]);
 106     for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
 107       fprintf(out,"%4d ",b->a[j]+1);
 108       if ((k % 15) == 14)
 109         fprintf(out,"\n");
 110     }
 111     fprintf(out,"\n");
 112   }
 113   gmx_fio_fclose(out);
 114 }
 115
 116 void add_grp(t_blocka *b,char ***gnames,int nra,atom_id a[],const char *name)
 117 {
 118   int i;
 119
 120   srenew(b->index,b->nr+2);
 121   srenew(*gnames,b->nr+1);
 122   (*gnames)[b->nr]=strdup(name);
 123
 124   srenew(b->a,b->nra+nra);
 125   for(i=0; (i<nra); i++)
 126     b->a[b->nra++]=a[i];
 127   b->nr++;
 128   b->index[b->nr]=b->nra;
 129 }
 130
 131 /* compare index in `a' with group in `b' at `index',
 132    when `index'<0 it is relative to end of `b' */
 133 static gmx_bool grp_cmp(t_blocka *b, int nra, atom_id a[], int index)
 134 {
 135   int i;
 136
 137   if (index < 0)
 138     index = b->nr-1+index;
 139   if (index >= b->nr)
 140     gmx_fatal(FARGS,"no such index group %d in t_blocka (nr=%d)",index,b->nr);
 141   /* compare sizes */
 142   if ( nra != b->index[index+1] - b->index[index] )
 143     return FALSE;
 144   for(i=0; i<nra; i++)
 145     if ( a[i] != b->a[b->index[index]+i] )
 146       return FALSE;
 147   return TRUE;
 148 }
 149
 150 static void
 151 p_status(const char **restype, int nres, const char **typenames, int ntypes)
 152 {
 153     int i,j;
 154     int found;
 155
 156     int * counter;
 157
 158     snew(counter,ntypes);
 159     for(i=0;i<ntypes;i++)
 160     {
 161         counter[i]=0;
 162     }
 163     for(i=0;i<nres;i++)
 164     {
 165         found=0;
 166         for(j=0;j<ntypes;j++)
 167         {
 168             if(!gmx_strcasecmp(restype[i],typenames[j]))
 169             {
 170                 counter[j]++;
 171             }
 172         }
 173     }
 174
 175     for(i=0; (i<ntypes); i++)
 176     {
 177         if (counter[i] > 0)
 178         {
 179             printf("There are: %5d %10s residues\n",counter[i],typenames[i]);
 180         }
 181     }
 182
 183     sfree(counter);
 184 }
 185
 186
 187 atom_id *
 188 mk_aid(t_atoms *atoms,const char ** restype,const char * typestring,int *nra,gmx_bool bMatch)
 189 /* Make an array of atom_ids for all atoms with residuetypes matching typestring, or the opposite if bMatch is false */
 190 {
 191     atom_id *a;
 192     int     i;
 193     int     res;
 194
 195     snew(a,atoms->nr);
 196     *nra=0;
 197     for(i=0; (i<atoms->nr); i++)
 198     {
 199         res=!gmx_strcasecmp(restype[atoms->atom[i].resind],typestring);
 200         if(bMatch==FALSE)
 201         {
 202             res=!res;
 203         }
 204         if(res)
 205         {
 206             a[(*nra)++]=i;
 207         }
 208     }
 209
 210     return a;
 211 }
 212
 213 typedef struct {
 214   char *rname;
 215   gmx_bool bNeg;
 216   char *gname;
 217 } restp_t;
 218
 219 static void analyse_other(const char ** restype,t_atoms *atoms,
 220                           t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 221 {
 222   restp_t *restp=NULL;
 223   char **attp=NULL;
 224   char *rname,*aname;
 225   atom_id *other_ndx,*aid,*aaid;
 226   int  i,j,k,l,resind,naid,naaid,natp,nrestp=0;
 227
 228     for(i=0; (i<atoms->nres); i++)
 229     {
 230         if (gmx_strcasecmp(restype[i],"Protein") && gmx_strcasecmp(restype[i],"DNA") && gmx_strcasecmp(restype[i],"RNA") && gmx_strcasecmp(restype[i],"Water"))
 231         {
 232             break;
 233         }
 234     }
 235   if (i < atoms->nres) {
 236     /* we have others */
 237     if (bVerb)
 238       printf("Analysing residues not classified as Protein/DNA/RNA/Water and splitting into groups...\n");
 239     snew(other_ndx,atoms->nr);
 240     for(k=0; (k<atoms->nr); k++) {
 241       resind = atoms->atom[k].resind;
 242       rname = *atoms->resinfo[resind].name;
 243         if (gmx_strcasecmp(restype[resind],"Protein") && gmx_strcasecmp(restype[resind],"DNA") &&
 244             gmx_strcasecmp(restype[resind],"RNA") && gmx_strcasecmp(restype[resind],"Water"))
 245         {
 246
 247         for(l=0; (l<nrestp); l++)
 248           if (strcmp(restp[l].rname,rname) == 0)
 249             break;
 250         if (l==nrestp) {
 251           srenew(restp,nrestp+1);
 252           restp[nrestp].rname = strdup(rname);
 253           restp[nrestp].bNeg  = FALSE;
 254           restp[nrestp].gname = strdup(rname);
 255           nrestp++;
 256
 257     }
 258       }
 259     }
 260     for(i=0; (i<nrestp); i++) {
 261       snew(aid,atoms->nr);
 262       naid=0;
 263       for(j=0; (j<atoms->nr); j++) {
 264         rname = *atoms->resinfo[atoms->atom[j].resind].name;
 265         if ((strcmp(restp[i].rname,rname) == 0 && !restp[i].bNeg) ||
 266             (strcmp(restp[i].rname,rname) != 0 &&  restp[i].bNeg)) {
 267           aid[naid++] = j;
 268         }
 269       }
 270       add_grp(gb,gn,naid,aid,restp[i].gname);
 271       if (bASK) {
 272         printf("split %s into atoms (y/n) ? ",restp[i].gname);
 273         fflush(stdout);
 274         if (gmx_ask_yesno(bASK)) {
 275           natp=0;
 276           for(k=0; (k<naid); k++) {
 277             aname=*atoms->atomname[aid[k]];
 278             for(l=0; (l<natp); l++)
 279               if (strcmp(aname,attp[l]) == 0)
 280                 break;
 281             if (l == natp) {
 282               srenew(attp,++natp);
 283               attp[natp-1]=aname;
 284             }
 285           }
 286           if (natp > 1) {
 287             for(l=0; (l<natp); l++) {
 288               snew(aaid,naid);
 289               naaid=0;
 290               for(k=0; (k<naid); k++) {
 291                 aname=*atoms->atomname[aid[k]];
 292                 if (strcmp(aname,attp[l])==0)
 293                   aaid[naaid++]=aid[k];
 294               }
 295               add_grp(gb,gn,naaid,aaid,attp[l]);
 296               sfree(aaid);
 297             }
 298           }
 299           sfree(attp);
 300           attp=NULL;
 301         }
 302         sfree(aid);
 303       }
 304     }
 305     sfree(other_ndx);
 306   }
 307 }
 308
 309 /*! /brief Instances of this struct contain the data necessary to
 310  *         construct a single (protein) index group in
 311  *         analyse_prot(). */
 312 typedef struct gmx_help_make_index_group
 313 {
 314   /* The set of atom names that will be used to form this index group */
 315   const char **defining_atomnames;
 316   /* Size of the defining_atomnames array */
 317   const int num_defining_atomnames;
 318   /* Name of this index group */
 319   const char *group_name;
 320   /* Whether the above atom names name the atoms in the group, or
 321    * those not in the group */
 322   gmx_bool bTakeComplement;
 323   /* The index in wholename gives the first item in the arrays of
 324    * atomnames that should be tested with 'gmx_strncasecmp' in stead of
 325    * gmx_strcasecmp, or -1 if all items should be tested with strcasecmp
 326    * This is comparable to using a '*' wildcard at the end of specific
 327    * atom names, but that is more involved to implement...
 328    */
 329   int wholename;
 330   /* Only create this index group if it differs from the one specified in compareto,
 331      where -1 means to always create this group. */
 332   int compareto;
 333 } t_gmx_help_make_index_group;
 334
 335 static void analyse_prot(const char ** restype,t_atoms *atoms,
 336                          t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 337 {
 338   /* lists of atomnames to be used in constructing index groups: */
 339   static const char *pnoh[]    = { "H", "HN" };
 340   static const char *pnodum[]  = { "MN1",  "MN2",  "MCB1", "MCB2", "MCG1", "MCG2",
 341                              "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
 342   static const char *calpha[]  = { "CA" };
 343   static const char *bb[]      = { "N","CA","C" };
 344   static const char *mc[]      = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT" };
 345   static const char *mcb[]     = { "N","CA","CB","C","O","O1","O2","OC1","OC2","OT","OXT" };
 346   static const char *mch[]     = { "N","CA","C","O","O1","O2","OC1","OC2","OT","OXT",
 347                                    "H1","H2","H3","H","HN" };
 348
 349   static const t_gmx_help_make_index_group constructing_data[] =
 350     {{ NULL,   0, "Protein",      TRUE,  -1, -1},
 351      { pnoh,   asize(pnoh),   "Protein-H",    TRUE,  0,  -1},
 352      { calpha, asize(calpha), "C-alpha",      FALSE, -1, -1},
 353      { bb,     asize(bb),     "Backbone",     FALSE, -1, -1},
 354      { mc,     asize(mc),     "MainChain",    FALSE, -1, -1},
 355      { mcb,    asize(mcb),    "MainChain+Cb", FALSE, -1, -1},
 356      { mch,    asize(mch),    "MainChain+H",  FALSE, -1, -1},
 357      { mch,    asize(mch),    "SideChain",    TRUE,  -1, -1},
 358      { mch,    asize(mch),    "SideChain-H",  TRUE,  11, -1},
 359      { pnodum, asize(pnodum), "Prot-Masses",  TRUE,  -1, 0},
 360     };
 361   const int num_index_groups = asize(constructing_data);
 362
 363   int     n,j;
 364   atom_id *aid;
 365   int     nra,nnpres,npres;
 366   gmx_bool    match;
 367   char    ndx_name[STRLEN],*atnm;
 368   int i;
 369
 370   if (bVerb)
 371   {
 372     printf("Analysing Protein...\n");
 373   }
 374   snew(aid,atoms->nr);
 375
 376   /* calculate the number of protein residues */
 377   npres=0;
 378   for(i=0; (i<atoms->nres); i++) {
 379     if (0 == gmx_strcasecmp(restype[i],"Protein")) {
 380       npres++;
 381     }
 382   }
 383   /* find matching or complement atoms */
 384   for(i=0; (i<(int)num_index_groups); i++) {
 385     nra=0;
 386     for(n=0; (n<atoms->nr); n++) {
 387       if (0 == gmx_strcasecmp(restype[atoms->atom[n].resind],"Protein")) {
 388         match=FALSE;
 389         for(j=0; (j<constructing_data[i].num_defining_atomnames); j++) {
 390           /* skip digits at beginning of atomname, e.g. 1H */
 391           atnm=*atoms->atomname[n];
 392           while (isdigit(atnm[0])) {
 393             atnm++;
 394           }
 395           if ( (constructing_data[i].wholename==-1) || (j<constructing_data[i].wholename) ) {
 396             if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],atnm)) {
 397               match=TRUE;
 398             }
 399           } else {
 400             if (0 == gmx_strncasecmp(constructing_data[i].defining_atomnames[j],atnm,strlen(constructing_data[i].defining_atomnames[j]))) {
 401               match=TRUE;
 402             }
 403           }
 404         }
 405         if (constructing_data[i].bTakeComplement != match) {
 406           aid[nra++]=n;
 407         }
 408       }
 409     }
 410     /* if we want to add this group always or it differs from previous
 411        group, add it: */
 412     if ( -1 == constructing_data[i].compareto || !grp_cmp(gb,nra,aid,constructing_data[i].compareto-i) ) {
 413       add_grp(gb,gn,nra,aid,constructing_data[i].group_name);
 414     }
 415   }
 416
 417   if (bASK) {
 418     for(i=0; (i<(int)num_index_groups); i++) {
 419       printf("Split %12s into %5d residues (y/n) ? ",constructing_data[i].group_name,npres);
 420       if (gmx_ask_yesno(bASK)) {
 421         int resind;
 422         nra = 0;
 423         for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 424           resind = atoms->atom[n].resind;
 425           for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++) {
 426             match=FALSE;
 427             for(j=0;(j<constructing_data[i].num_defining_atomnames); j++) {
 428               if (0 == gmx_strcasecmp(constructing_data[i].defining_atomnames[j],*atoms->atomname[n])) {
 429                 match=TRUE;
 430               }
 431             }
 432             if (constructing_data[i].bTakeComplement != match) {
 433               aid[nra++]=n;
 434             }
 435           }
 436           /* copy the residuename to the tail of the groupname */
 437           if (nra > 0) {
 438             t_resinfo *ri;
 439             ri = &atoms->resinfo[resind];
 440             sprintf(ndx_name,"%s_%s%d%c",
 441                     constructing_data[i].group_name,*ri->name,ri->nr,ri->ic==' ' ? '\0' : ri->ic);
 442             add_grp(gb,gn,nra,aid,ndx_name);
 443             nra = 0;
 444           }
 445         }
 446       }
 447     }
 448     printf("Make group with sidechain and C=O swapped (y/n) ? ");
 449     if (gmx_ask_yesno(bASK)) {
 450       /* Make swap sidechain C=O index */
 451       int resind,hold;
 452       nra = 0;
 453       for(n=0;((atoms->atom[n].resind < npres) && (n<atoms->nr));) {
 454         resind = atoms->atom[n].resind;
 455         hold  = -1;
 456         for(;((atoms->atom[n].resind==resind) && (n<atoms->nr));n++)
 457           if (strcmp("CA",*atoms->atomname[n]) == 0) {
 458             aid[nra++]=n;
 459             hold=nra;
 460             nra+=2;
 461           } else if (strcmp("C",*atoms->atomname[n]) == 0) {
 462             if (hold == -1) {
 463               gmx_incons("Atom naming problem");
 464             }
 465             aid[hold]=n;
 466           } else if (strcmp("O",*atoms->atomname[n]) == 0) {
 467             if (hold == -1) {
 468               gmx_incons("Atom naming problem");
 469             }
 470             aid[hold+1]=n;
 471           } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
 472             if (hold == -1) {
 473               gmx_incons("Atom naming problem");
 474             }
 475             aid[hold+1]=n;
 476           } else
 477             aid[nra++]=n;
 478       }
 479       /* copy the residuename to the tail of the groupname */
 480       if (nra > 0) {
 481         add_grp(gb,gn,nra,aid,"SwapSC-CO");
 482         nra = 0;
 483       }
 484     }
 485   }
 486   sfree(aid);
 487 }
 488
 489
 490
 491
 492 /* Return 0 if the name was found, otherwise -1.
 493  * p_restype is set to a pointer to the type name, or 'Other' if we did not find it.
 494  */
 495 int
 496 gmx_residuetype_get_type(gmx_residuetype_t rt,const char * resname, const char ** p_restype)
 497 {
 498     int    i,rc;
 499
 500     rc=-1;
 501     for(i=0;i<rt->n && rc;i++)
 502     {
 503         rc=gmx_strcasecmp(rt->resname[i],resname);
 504     }
 505
 506     *p_restype = (rc==0) ? rt->restype[i-1] : gmx_residuetype_undefined;
 507
 508     return rc;
 509 }
 510
 511 int
 512 gmx_residuetype_add(gmx_residuetype_t rt,const char *newresname, const char *newrestype)
 513 {
 514     int     i;
 515     int     found;
 516     const char *  p_oldtype;
 517
 518     found = !gmx_residuetype_get_type(rt,newresname,&p_oldtype);
 519
 520     if(found && gmx_strcasecmp(p_oldtype,newrestype))
 521     {
 522         fprintf(stderr,"Warning: Residue '%s' already present with type '%s' in database, ignoring new type '%s'.",
 523                 newresname,p_oldtype,newrestype);
 524     }
 525
 526     if(found==0)
 527     {
 528         srenew(rt->resname,rt->n+1);
 529         srenew(rt->restype,rt->n+1);
 530         rt->resname[rt->n]=strdup(newresname);
 531         rt->restype[rt->n]=strdup(newrestype);
 532         rt->n++;
 533     }
 534
 535     return 0;
 536 }
 537
 538
 539 int
 540 gmx_residuetype_init(gmx_residuetype_t *prt)
 541 {
 542     FILE *  db;
 543     char    line[STRLEN];
 544     char    resname[STRLEN],restype[STRLEN],dum[STRLEN];
 545     char *  p;
 546     int     i;
 547     struct gmx_residuetype *rt;
 548
 549     snew(rt,1);
 550     *prt=rt;
 551
 552     rt->n        = 0;
 553     rt->resname  = NULL;
 554     rt->restype = NULL;
 555
 556     db=libopen("residuetypes.dat");
 557
 558     while(get_a_line(db,line,STRLEN))
 559     {
 560         strip_comment(line);
 561         trim(line);
 562         if(line[0]!='\0')
 563         {
 564             if(sscanf(line,"%s %s %s",resname,restype,dum)!=2)
 565             {
 566                 gmx_fatal(FARGS,"Incorrect number of columns (2 expected) for line in residuetypes.dat");
 567             }
 568             gmx_residuetype_add(rt,resname,restype);
 569         }
 570     }
 571
 572     fclose(db);
 573
 574     return 0;
 575 }
 576
 577
 578
 579 int
 580 gmx_residuetype_destroy(gmx_residuetype_t rt)
 581 {
 582     int i;
 583
 584     for(i=0;i<rt->n;i++)
 585     {
 586         free(rt->resname[i]);
 587         free(rt->restype[i]);
 588     }
 589     free(rt);
 590
 591     return 0;
 592 }
 593
 594 int
 595 gmx_residuetype_get_alltypes(gmx_residuetype_t    rt,
 596                              const char ***       p_typenames,
 597                              int *                ntypes)
 598 {
 599     int      i,j,n;
 600     int      found;
 601     const char **  my_typename;
 602     char *   p;
 603
 604     n=0;
 605
 606     my_typename=NULL;
 607     for(i=0;i<rt->n;i++)
 608     {
 609         p=rt->restype[i];
 610         found=0;
 611         for(j=0;j<n && !found;j++)
 612         {
 613             found=!gmx_strcasecmp(p,my_typename[j]);
 614         }
 615
 616         if(!found)
 617         {
 618             srenew(my_typename,n+1);
 619             my_typename[n]=p;
 620             n++;
 621         }
 622     }
 623     *ntypes=n;
 624     *p_typenames=my_typename;
 625
 626     return 0;
 627 }
 628
 629
 630
 631 gmx_bool
 632 gmx_residuetype_is_protein(gmx_residuetype_t rt, const char *resnm)
 633 {
 634     gmx_bool rc;
 635     const char *p_type;
 636
 637     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 638        gmx_strcasecmp(p_type,"Protein")==0)
 639     {
 640         rc=TRUE;
 641     }
 642     else
 643     {
 644         rc=FALSE;
 645     }
 646     return rc;
 647 }
 648
 649 gmx_bool
 650 gmx_residuetype_is_dna(gmx_residuetype_t rt, const char *resnm)
 651 {
 652     gmx_bool rc;
 653     const char *p_type;
 654
 655     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 656        gmx_strcasecmp(p_type,"DNA")==0)
 657     {
 658         rc=TRUE;
 659     }
 660     else
 661     {
 662         rc=FALSE;
 663     }
 664     return rc;
 665 }
 666
 667 gmx_bool
 668 gmx_residuetype_is_rna(gmx_residuetype_t rt, const char *resnm)
 669 {
 670     gmx_bool rc;
 671     const char *p_type;
 672
 673     if(gmx_residuetype_get_type(rt,resnm,&p_type)==0 &&
 674        gmx_strcasecmp(p_type,"RNA")==0)
 675     {
 676         rc=TRUE;
 677     }
 678     else
 679     {
 680         rc=FALSE;
 681     }
 682     return rc;
 683 }
 684
 685 /* Return the size of the arrays */
 686 int
 687 gmx_residuetype_get_size(gmx_residuetype_t rt)
 688 {
 689     return rt->n;
 690 }
 691
 692 /* Search for a residuetype with name resnm within the
 693  * gmx_residuetype database. Return the index if found,
 694  * otherwise -1.
 695  */
 696 int
 697 gmx_residuetype_get_index(gmx_residuetype_t rt, const char *resnm)
 698 {
 699     int i,rc;
 700
 701     rc=-1;
 702     for(i=0;i<rt->n && rc;i++)
 703     {
 704         rc=gmx_strcasecmp(rt->resname[i],resnm);
 705     }
 706
 707     return (0 == rc) ? i-1 : -1;
 708 }
 709
 710 /* Return the name of the residuetype with the given index, or
 711  * NULL if not found. */
 712 const char *
 713 gmx_residuetype_get_name(gmx_residuetype_t rt, int index)
 714 {
 715   if(index >= 0 && index < rt->n) {
 716     return rt->resname[index];
 717   } else {
 718     return NULL;
 719   }
 720 }
 721
 722
 723
 724 void analyse(t_atoms *atoms,t_blocka *gb,char ***gn,gmx_bool bASK,gmx_bool bVerb)
 725 {
 726     gmx_residuetype_t rt;
 727     char    *resnm;
 728     atom_id *aid;
 729     const char **  restype;
 730     int     nra;
 731     int     i,k;
 732     size_t  j;
 733     int     ntypes;
 734     char *  p;
 735     const char ** p_typename;
 736     int     iwater,iion;
 737     int     nwater,nion;
 738     int     found;
 739
 740     if (bVerb)
 741     {
 742         printf("Analysing residue names:\n");
 743     }
 744     /* Create system group, every single atom */
 745     snew(aid,atoms->nr);
 746     for(i=0;i<atoms->nr;i++)
 747     {
 748         aid[i]=i;
 749     }
 750     add_grp(gb,gn,atoms->nr,aid,"System");
 751     sfree(aid);
 752
 753     /* For every residue, get a pointer to the residue type name */
 754     gmx_residuetype_init(&rt);
 755
 756     snew(restype,atoms->nres);
 757     ntypes = 0;
 758     p_typename = NULL;
 759     for(i=0;i<atoms->nres;i++)
 760     {
 761         resnm = *atoms->resinfo[i].name;
 762         gmx_residuetype_get_type(rt,resnm,&(restype[i]));
 763
 764         /* Note that this does not lead to a N*N loop, but N*K, where
 765          * K is the number of residue _types_, which is small and independent of N.
 766          */
 767         found = 0;
 768         for(k=0;k<ntypes && !found;k++)
 769         {
 770             found = !strcmp(restype[i],p_typename[k]);
 771         }
 772         if(!found)
 773         {
 774             srenew(p_typename,ntypes+1);
 775             p_typename[ntypes++] = strdup(restype[i]);
 776         }
 777     }
 778
 779     if (bVerb)
 780     {
 781         p_status(restype,atoms->nres,p_typename,ntypes);
 782     }
 783
 784     for(k=0;k<ntypes;k++)
 785     {
 786         aid=mk_aid(atoms,restype,p_typename[k],&nra,TRUE);
 787
 788         /* Check for special types to do fancy stuff with */
 789
 790         if(!gmx_strcasecmp(p_typename[k],"Protein") && nra>0)
 791         {
 792             sfree(aid);
 793             /* PROTEIN */
 794             analyse_prot(restype,atoms,gb,gn,bASK,bVerb);
 795
 796             /* Create a Non-Protein group */
 797             aid=mk_aid(atoms,restype,"Protein",&nra,FALSE);
 798             if ((nra > 0) && (nra < atoms->nr))
 799             {
 800                 add_grp(gb,gn,nra,aid,"non-Protein");
 801             }
 802             sfree(aid);
 803         }
 804         else if(!gmx_strcasecmp(p_typename[k],"Water") && nra>0)
 805         {
 806             add_grp(gb,gn,nra,aid,p_typename[k]);
 807             /* Add this group as 'SOL' too, for backward compatibility with older gromacs versions */
 808             add_grp(gb,gn,nra,aid,"SOL");
 809
 810             sfree(aid);
 811
 812             /* Solvent, create a negated group too */
 813             aid=mk_aid(atoms,restype,"Water",&nra,FALSE);
 814             if ((nra > 0) && (nra < atoms->nr))
 815             {
 816                 add_grp(gb,gn,nra,aid,"non-Water");
 817             }
 818             sfree(aid);
 819         }
 820         else if(nra>0)
 821         {
 822             /* Other groups */
 823             add_grp(gb,gn,nra,aid,p_typename[k]);
 824             sfree(aid);
 825             analyse_other(restype,atoms,gb,gn,bASK,bVerb);
 826         }
 827     }
 828
 829     sfree(p_typename);
 830     sfree(restype);
 831     gmx_residuetype_destroy(rt);
 832
 833     /* Create a merged water_and_ions group */
 834     iwater = -1;
 835     iion   = -1;
 836     nwater = 0;
 837     nion   = 0;
 838
 839     for(i=0;i<gb->nr;i++)
 840     {
 841         if(!gmx_strcasecmp((*gn)[i],"Water"))
 842         {
 843             iwater = i;
 844             nwater = gb->index[i+1]-gb->index[i];
 845         }
 846         else if(!gmx_strcasecmp((*gn)[i],"Ion"))
 847         {
 848             iion = i;
 849             nion = gb->index[i+1]-gb->index[i];
 850         }
 851     }
 852
 853     if(nwater>0 && nion>0)
 854     {
 855         srenew(gb->index,gb->nr+2);
 856         srenew(*gn,gb->nr+1);
 857         (*gn)[gb->nr] = strdup("Water_and_ions");
 858         srenew(gb->a,gb->nra+nwater+nion);
 859         if(nwater>0)
 860         {
 861             for(i=gb->index[iwater];i<gb->index[iwater+1];i++)
 862             {
 863                 gb->a[gb->nra++] = gb->a[i];
 864             }
 865         }
 866         if(nion>0)
 867         {
 868             for(i=gb->index[iion];i<gb->index[iion+1];i++)
 869             {
 870                 gb->a[gb->nra++] = gb->a[i];
 871             }
 872         }
 873         gb->nr++;
 874         gb->index[gb->nr]=gb->nra;
 875     }
 876 }
 877
 878
 879 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
 880 {
 881   int i;
 882
 883   for(i=0; i<n; i++)
 884     if (index[i] >= natoms)
 885       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
 886                   gname ? gname : "Index",i+1, index[i]+1,
 887                   traj ? traj : "the trajectory",natoms);
 888     else if (index[i] < 0)
 889       gmx_fatal(FARGS,"%s atom number (index[%d]=%d) is less than zero",
 890                 gname ? gname : "Index",i+1, index[i]+1);
 891 }
 892
 893 t_blocka *init_index(const char *gfile, char ***grpname)
 894 {
 895   FILE     *in;
 896   t_blocka  *b;
 897   int      a,maxentries;
 898   int      i,j,ng,nread;
 899   char     line[STRLEN],*pt,str[STRLEN];
 900
 901   in=gmx_fio_fopen(gfile,"r");
 902   snew(b,1);
 903   get_a_line(in,line,STRLEN);
 904   if ( line[0]=='[' ) {
 905     /* new format */
 906     b->nr=0;
 907     b->index=NULL;
 908     b->nra=0;
 909     b->a=NULL;
 910     *grpname=NULL;
 911     maxentries=0;
 912     do {
 913       if (get_header(line,str)) {
 914         b->nr++;
 915         srenew(b->index,b->nr+1);
 916         srenew(*grpname,b->nr);
 917         if (b->nr==1)
 918           b->index[0]=0;
 919         b->index[b->nr]=b->index[b->nr-1];
 920         (*grpname)[b->nr-1]=strdup(str);
 921       } else {
 922         pt=line;
 923         while ((i=sscanf(pt,"%s",str)) == 1) {
 924           i=b->index[b->nr];
 925           if (i>=maxentries) {
 926             maxentries+=1024;
 927             srenew(b->a,maxentries);
 928           }
 929           b->a[i]=strtol(str, NULL, 10)-1;
 930           b->index[b->nr]++;
 931           (b->nra)++;
 932           pt=strstr(pt,str)+strlen(str);
 933         }
 934       }
 935     } while (get_a_line(in,line,STRLEN));
 936   }
 937   else {
 938     /* old format */
 939     sscanf(line,"%d%d",&b->nr,&b->nra);
 940     snew(b->index,b->nr+1);
 941     snew(*grpname,b->nr);
 942     b->index[0]=0;
 943     snew(b->a,b->nra);
 944     for (i=0; (i<b->nr); i++) {
 945       nread=fscanf(in,"%s%d",str,&ng);
 946       (*grpname)[i]=strdup(str);
 947       b->index[i+1]=b->index[i]+ng;
 948       if (b->index[i+1] > b->nra)
 949         gmx_fatal(FARGS,"Something wrong in your indexfile at group %s",str);
 950       for(j=0; (j<ng); j++) {
 951         nread=fscanf(in,"%d",&a);
 952         b->a[b->index[i]+j]=a;
 953       }
 954     }
 955   }
 956   gmx_fio_fclose(in);
 957
 958   for(i=0; (i<b->nr); i++) {
 959     for(j=b->index[i]; (j<b->index[i+1]); j++) {
 960       if (b->a[j] < 0)
 961         fprintf(stderr,"\nWARNING: negative index %d in group %s\n\n",
 962                 b->a[j],(*grpname)[i]);
 963     }
 964   }
 965
 966   return b;
 967 }
 968
 969 static void minstring(char *str)
 970 {
 971   int i;
 972
 973   for (i=0; (i < (int)strlen(str)); i++)
 974     if (str[i]=='-')
 975       str[i]='_';
 976 }
 977
 978 int find_group(char s[], int ngrps, char **grpname)
 979 {
 980   int aa, i, n;
 981   char string[STRLEN];
 982   gmx_bool bMultiple;
 983
 984   bMultiple = FALSE;
 985   n = strlen(s);
 986   aa=NOTSET;
 987   /* first look for whole name match */
 988   if (aa==NOTSET)
 989     for(i=0; i<ngrps; i++)
 990       if (gmx_strcasecmp_min(s,grpname[i])==0) {
 991         if(aa!=NOTSET)
 992           bMultiple = TRUE;
 993         aa=i;
 994       }
 995   /* second look for first string match */
 996   if (aa==NOTSET)
 997     for(i=0; i<ngrps; i++)
 998       if (gmx_strncasecmp_min(s,grpname[i],n)==0) {
 999         if(aa!=NOTSET)
1000           bMultiple = TRUE;
1001         aa=i;
1002       }
1003   /* last look for arbitrary substring match */
1004   if (aa==NOTSET) {
1005     upstring(s);
1006     minstring(s);
1007     for(i=0; i<ngrps; i++) {
1008       strcpy(string, grpname[i]);
1009       upstring(string);
1010       minstring(string);
1011       if (strstr(string,s)!=NULL) {
1012         if(aa!=NOTSET)
1013           bMultiple = TRUE;
1014         aa=i;
1015       }
1016     }
1017   }
1018   if (bMultiple) {
1019     printf("Error: Multiple groups '%s' selected\n", s);
1020     aa=NOTSET;
1021   }
1022   return aa;
1023 }
1024
1025 static int qgroup(int *a, int ngrps, char **grpname)
1026 {
1027     char s[STRLEN];
1028     int  aa;
1029     gmx_bool bInRange;
1030     char *end;
1031
1032     do {
1033         fprintf(stderr,"Select a group: ");
1034         do {
1035             if ( scanf("%s",s)!=1 )
1036                 gmx_fatal(FARGS,"Cannot read from input");
1037             trim(s); /* remove spaces */
1038         } while (strlen(s)==0);
1039         aa = strtol(s, &end, 10);
1040         if (aa==0 && end[0] != '\0') /* string entered */
1041             aa = find_group(s, ngrps, grpname);
1042         bInRange = (aa >= 0 && aa < ngrps);
1043         if (!bInRange)
1044             printf("Error: No such group '%s'\n", s);
1045     } while (!bInRange);
1046     printf("Selected %d: '%s'\n", aa, grpname[aa]);
1047     *a = aa;
1048     return aa;
1049 }
1050
1051 static void rd_groups(t_blocka *grps,char **grpname,char *gnames[],
1052                       int ngrps,int isize[],atom_id *index[],int grpnr[])
1053 {
1054   int i,j,gnr1;
1055
1056   if (grps->nr==0)
1057     gmx_fatal(FARGS,"Error: no groups in indexfile");
1058   for(i=0; (i<grps->nr); i++)
1059     fprintf(stderr,"Group %5d (%15s) has %5d elements\n",i,grpname[i],
1060            grps->index[i+1]-grps->index[i]);
1061   for(i=0; (i<ngrps); i++) {
1062     if (grps->nr > 1)
1063       do {
1064         gnr1=qgroup(&grpnr[i], grps->nr, grpname);
1065         if ((gnr1<0) || (gnr1>=grps->nr))
1066           fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
1067       } while ((gnr1<0) || (gnr1>=grps->nr));
1068     else {
1069       fprintf(stderr,"There is one group in the index\n");
1070       gnr1=0;
1071     }
1072     gnames[i]=strdup(grpname[gnr1]);
1073     isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
1074     snew(index[i],isize[i]);
1075     for(j=0; (j<isize[i]); j++)
1076       index[i][j]=grps->a[grps->index[gnr1]+j];
1077   }
1078 }
1079
1080 void rd_index(const char *statfile,int ngrps,int isize[],
1081               atom_id *index[],char *grpnames[])
1082 {
1083   char    **gnames;
1084   t_blocka *grps;
1085   int     *grpnr;
1086
1087   snew(grpnr,ngrps);
1088   if (!statfile)
1089     gmx_fatal(FARGS,"No index file specified");
1090   grps=init_index(statfile,&gnames);
1091   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1092 }
1093
1094 void rd_index_nrs(char *statfile,int ngrps,int isize[],
1095                   atom_id *index[],char *grpnames[],int grpnr[])
1096 {
1097   char    **gnames;
1098   t_blocka *grps;
1099
1100   if (!statfile)
1101     gmx_fatal(FARGS,"No index file specified");
1102   grps=init_index(statfile,&gnames);
1103
1104   rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
1105 }
1106
1107 void get_index(t_atoms *atoms, const char *fnm, int ngrps,
1108                int isize[], atom_id *index[],char *grpnames[])
1109 {
1110   char    ***gnames;
1111   t_blocka *grps = NULL;
1112   int     *grpnr;
1113
1114   snew(grpnr,ngrps);
1115   snew(gnames,1);
1116   if (fnm != NULL) {
1117     grps=init_index(fnm,gnames);
1118   }
1119   else if (atoms) {
1120     snew(grps,1);
1121     snew(grps->index,1);
1122     analyse(atoms,grps,gnames,FALSE,FALSE);
1123   }
1124   else
1125     gmx_incons("You need to supply a valid atoms structure or a valid index file name");
1126
1127   rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);
1128 }
1129
1130 t_cluster_ndx *cluster_index(FILE *fplog,const char *ndx)
1131 {
1132   t_cluster_ndx *c;
1133   int i;
1134
1135   snew(c,1);
1136   c->clust     = init_index(ndx,&c->grpname);
1137   c->maxframe = -1;
1138   for(i=0; (i<c->clust->nra); i++)
1139     c->maxframe = max(c->maxframe,c->clust->a[i]);
1140   fprintf(fplog ? fplog : stdout,
1141           "There are %d clusters containing %d structures, highest framenr is %d\n",
1142           c->clust->nr,c->clust->nra,c->maxframe);
1143   if (debug) {
1144     pr_blocka(debug,0,"clust",c->clust,TRUE);
1145     for(i=0; (i<c->clust->nra); i++)
1146       if ((c->clust->a[i] < 0) || (c->clust->a[i] > c->maxframe))
1147         gmx_fatal(FARGS,"Range check error for c->clust->a[%d] = %d\n"
1148                   "should be within 0 and %d",i,c->clust->a[i],c->maxframe+1);
1149   }
1150   c->inv_clust=make_invblocka(c->clust,c->maxframe);
1151
1152   return c;
1153 }
1154