Added conditional inclusion of config.h to source files
[gromacs.git] / src / gmxlib / index.c
blob4652b78c6368b68075f3d4791d345692837bb1f2
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 3.2.0
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
33 * And Hey:
34 * GROningen Mixture of Alchemy and Childrens' Stories
36 #ifdef HAVE_CONFIG_H
37 #include <config.h>
38 #endif
40 #include <ctype.h>
41 #include <string.h>
42 #include "sysstuff.h"
43 #include "strdb.h"
44 #include "futil.h"
45 #include "macros.h"
46 #include "names.h"
47 #include "string2.h"
48 #include "statutil.h"
49 #include "confio.h"
50 #include "assert.h"
51 #include "copyrite.h"
52 #include "typedefs.h"
53 #include "smalloc.h"
54 #include "index.h"
56 typedef enum { etOther, etProt, etDNA, erestNR } eRestp;
57 static const char *ResTP[erestNR] = { "OTHER", "PROTEIN", "DNA" };
59 static const char *Sugars[] = { "A", "T", "G", "C", "U" };
60 #define NDNA asize(Sugars)
62 static bool yn(bool bASK)
64 char c;
66 if (bASK) {
67 do {
68 c=toupper(fgetc(stdin));
69 } while ((c != 'Y') && (c != 'N'));
71 return (c == 'Y');
73 else
74 return FALSE;
77 t_block *new_block(void)
79 t_block *block;
81 snew(block,1);
82 snew(block->index,1);
84 return block;
87 void write_index(char *outf, t_block *b,char **gnames)
89 FILE *out;
90 int i,j,k;
92 out=ffopen(outf,"w");
93 /* fprintf(out,"%5d %5d\n",b->nr,b->nra); */
94 for(i=0; (i<b->nr); i++) {
95 fprintf(out,"[ %s ]\n",gnames[i]);
96 for(k=0,j=b->index[i]; j<b->index[i+1]; j++,k++) {
97 fprintf(out,"%4d ",b->a[j]+1);
98 if ((k % 15) == 14)
99 fprintf(out,"\n");
101 fprintf(out,"\n");
103 fclose(out);
106 void add_grp(t_block *b,char ***gnames,int nra,atom_id a[],const char *name)
108 int i;
110 srenew(b->index,b->nr+2);
111 srenew(*gnames,b->nr+1);
112 (*gnames)[b->nr]=strdup(name);
114 srenew(b->a,b->nra+nra);
115 for(i=0; (i<nra); i++)
116 b->a[b->nra++]=a[i];
117 b->nr++;
118 b->index[b->nr]=b->nra;
121 /* compare index in `a' with group in `b' at `index',
122 when `index'<0 it is relative to end of `b' */
123 static bool grp_cmp(t_block *b, int nra, atom_id a[], int index)
125 int i;
127 if (index < 0)
128 index = b->nr-1+index;
129 if (index >= b->nr)
130 fatal_error(0,"no such index group %d in t_block (nr=%d)",index,b->nr);
131 /* compare sizes */
132 if ( nra != b->index[index+1] - b->index[index] )
133 return FALSE;
134 for(i=0; i<nra; i++)
135 if ( a[i] != b->a[b->index[index]+i] )
136 return FALSE;
137 return TRUE;
140 static void p_status(int nres,eRestp restp[],bool bVerb)
142 int i,j,ntp[erestNR];
144 for(i=0; (i<erestNR); i++)
145 ntp[i]=0;
146 for(j=0; (j<nres); j++)
147 ntp[restp[j]]++;
149 if (bVerb)
150 for(i=0; (i<erestNR); i++)
151 printf("There are: %5d %10s residues\n",ntp[i],ResTP[i]);
154 atom_id *mk_aid(t_atoms *atoms,eRestp restp[],eRestp res,int *nra,
155 bool bTrue)
156 /* Make an array of atom_ids for all atoms with:
157 * (restp[i] == res) == bTrue
160 atom_id *a;
161 int i;
163 snew(a,atoms->nr);
164 *nra=0;
165 for(i=0; (i<atoms->nr); i++)
166 if ((restp[atoms->atom[i].resnr] == res) == bTrue)
167 a[(*nra)++]=i;
169 return a;
172 static void analyse_other(eRestp Restp[],t_atoms *atoms,
173 t_block *gb,char ***gn,bool bASK,bool bVerb)
175 char **restp=NULL;
176 char **attp=NULL;
177 char *rname,*aname;
178 atom_id *other_ndx,*aid,*aaid;
179 int i,j,k,l,resnr,naid,naaid,natp,nrestp=0;
181 for(i=0; (i<atoms->nres); i++)
182 if (Restp[i] == etOther)
183 break;
184 if (i < atoms->nres) {
185 /* we have others */
186 if (bVerb)
187 printf("Analysing Other...\n");
188 snew(other_ndx,atoms->nr);
189 for(k=0; (k<atoms->nr); k++) {
190 resnr=atoms->atom[k].resnr;
191 rname=*atoms->resname[resnr];
192 if (Restp[resnr] == etOther) {
193 for(l=0; (l<nrestp); l++)
194 if (strcmp(restp[l],rname) == 0)
195 break;
196 if (l==nrestp) {
197 srenew(restp,++nrestp);
198 restp[nrestp-1]=strdup(rname);
202 for(i=0; (i<nrestp); i++) {
203 snew(aid,atoms->nr);
204 naid=0;
205 for(j=0; (j<atoms->nr); j++) {
206 rname=*atoms->resname[atoms->atom[j].resnr];
207 if (strcmp(restp[i],rname) == 0)
208 aid[naid++] = j;
210 add_grp(gb,gn,naid,aid,restp[i]);
211 if (bASK) {
212 printf("split %s into atoms (y/n) ? ",restp[i]);
213 fflush(stdout);
214 if (yn(bASK)) {
215 natp=0;
216 for(k=0; (k<naid); k++) {
217 aname=*atoms->atomname[aid[k]];
218 for(l=0; (l<natp); l++)
219 if (strcmp(aname,attp[l]) == 0)
220 break;
221 if (l == natp) {
222 srenew(attp,++natp);
223 attp[natp-1]=aname;
226 if (natp > 1) {
227 for(l=0; (l<natp); l++) {
228 snew(aaid,naid);
229 naaid=0;
230 for(k=0; (k<naid); k++) {
231 aname=*atoms->atomname[aid[k]];
232 if (strcmp(aname,attp[l])==0)
233 aaid[naaid++]=aid[k];
235 add_grp(gb,gn,naaid,aaid,attp[l]);
236 sfree(aaid);
239 sfree(attp);
240 attp=NULL;
242 sfree(aid);
245 sfree(other_ndx);
249 static void analyse_prot(eRestp restp[],t_atoms *atoms,
250 t_block *gb,char ***gn,bool bASK,bool bVerb)
252 /* atomnames to be used in constructing index groups: */
253 const char *pnoh[] = { "H" };
254 const char *pnodum[] = { "MN1", "MN2", "MCB1", "MCB2", "MCG1", "MCG2",
255 "MCD1", "MCD2", "MCE1", "MCE2", "MNZ1", "MNZ2" };
256 const char *calpha[] = { "CA" };
257 const char *bb[] = { "N","CA","C" };
258 const char *mc[] = { "N","CA","C","O","O1","O2","OXT" };
259 const char *mcb[] = { "N","CA","CB","C","O","O1","O2","OT","OXT" };
260 const char *mch[] = { "N","CA","C","O","O1","O2","OT","OXT",
261 "H1","H2","H3","H" };
262 /* array of arrays of atomnames: */
263 const char **chains[] = { NULL,pnoh,calpha,bb,mc,mcb,mch,mch,mch,pnodum };
264 #define NCH asize(chains)
265 /* array of sizes of arrays of atomnames: */
266 const int sizes[NCH] = {
267 0, asize(pnoh), asize(calpha), asize(bb),
268 asize(mc), asize(mcb), asize(mch), asize(mch), asize(mch), asize(pnodum)
270 /* descriptive names of index groups */
271 const char *ch_name[NCH] = {
272 "Protein", "Protein-H", "C-alpha", "Backbone",
273 "MainChain", "MainChain+Cb", "MainChain+H", "SideChain", "SideChain-H",
274 "Prot-Masses"
276 /* construct index group containing (TRUE) or excluding (FALSE)
277 given atom names */
278 const bool complement[NCH] = {
279 TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE
281 const int wholename[NCH] = { -1, 0,-1,-1,-1,-1,-1,-1, 11,-1 };
282 /* the index in wholename gives the first item in the arrays of
283 * atomtypes that should be tested with 'strncasecmp' in stead of
284 * strcasecmp, or -1 if all items should be tested with strcasecmp
285 * This is comparable to using a '*' wildcard at the end of specific
286 * atom names, but that is more involved to implement...
288 /* only add index group if it differs from the specified one,
289 specify -1 to always add group */
290 const int compareto[NCH] = { -1,-1,-1,-1,-1,-1,-1,-1,-1, 0 };
292 int i,n,j;
293 atom_id *aid;
294 int nra,nnpres,npres;
295 bool match;
296 char ndx_name[STRLEN],*atnm;
298 if (bVerb)
299 printf("Analysing Protein...\n");
300 snew(aid,atoms->nr);
302 /* calculate the number of protein residues */
303 npres=0;
304 for(i=0; (i<atoms->nres); i++)
305 if (restp[i] == etProt)
306 npres++;
308 /* find matching or complement atoms */
309 for(i=0; (i<NCH); i++) {
310 nra=0;
311 for(n=0; (n<atoms->nr); n++) {
312 if (restp[atoms->atom[n].resnr] == etProt) {
313 match=FALSE;
314 for(j=0; (j<sizes[i]); j++) {
315 /* skip digits at beginning of atomname, e.g. 1H */
316 atnm=*atoms->atomname[n];
317 while (isdigit(atnm[0]))
318 atnm++;
319 if ( (wholename[i]==-1) || (j<wholename[i]) ) {
320 if (strcasecmp(chains[i][j],atnm) == 0)
321 match=TRUE;
322 } else {
323 if (strncasecmp(chains[i][j],atnm,strlen(chains[i][j])) == 0)
324 match=TRUE;
327 if (match != complement[i])
328 aid[nra++]=n;
331 /* if we want to add this group always or it differs from previous
332 group, add it: */
333 if ( compareto[i] == -1 || !grp_cmp(gb,nra,aid,compareto[i]-i) )
334 add_grp(gb,gn,nra,aid,ch_name[i]);
337 if (bASK) {
338 for(i=0; (i<NCH); i++) {
339 printf("Split %12s into %5d residues (y/n) ? ",ch_name[i],npres);
340 if (yn(bASK)) {
341 int resnr;
342 nra = 0;
343 for(n=0;((atoms->atom[n].resnr<npres) && (n<atoms->nr));) {
344 resnr = atoms->atom[n].resnr;
345 for(;((atoms->atom[n].resnr==resnr) && (n<atoms->nr));n++) {
346 match=FALSE;
347 for(j=0;(j<sizes[i]); j++)
348 if (strcasecmp(chains[i][j],*atoms->atomname[n]) == 0)
349 match=TRUE;
350 if (match != complement[i])
351 aid[nra++]=n;
353 /* copy the residuename to the tail of the groupname */
354 if (nra > 0) {
355 sprintf(ndx_name,"%s_%s%d",
356 ch_name[i],*atoms->resname[resnr],resnr+1);
357 add_grp(gb,gn,nra,aid,ndx_name);
358 nra = 0;
363 printf("Make group with sidechain and C=O swapped (y/n) ? ");
364 if (yn(bASK)) {
365 /* Make swap sidechain C=O index */
366 int resnr,hold;
367 nra = 0;
368 for(n=0;((atoms->atom[n].resnr<npres) && (n<atoms->nr));) {
369 resnr = atoms->atom[n].resnr;
370 hold = -1;
371 for(;((atoms->atom[n].resnr==resnr) && (n<atoms->nr));n++)
372 if (strcmp("CA",*atoms->atomname[n]) == 0) {
373 aid[nra++]=n;
374 hold=nra;
375 nra+=2;
376 } else if (strcmp("C",*atoms->atomname[n]) == 0) {
377 assert(hold != -1);
378 aid[hold]=n;
379 } else if (strcmp("O",*atoms->atomname[n]) == 0) {
380 assert(hold != -1);
381 aid[hold+1]=n;
382 } else if (strcmp("O1",*atoms->atomname[n]) == 0) {
383 assert(hold != -1);
384 aid[hold+1]=n;
385 } else
386 aid[nra++]=n;
388 /* copy the residuename to the tail of the groupname */
389 if (nra > 0) {
390 add_grp(gb,gn,nra,aid,"SwapSC-CO");
391 nra = 0;
395 sfree(aid);
398 static void analyse_dna(eRestp restp[],t_atoms *atoms,
399 t_block *gb,char ***gn,bool bASK,bool bVerb)
401 if (bVerb)
402 printf("Analysing DNA... (not really)\n");
403 if (debug)
404 printf("eRestp %p; atoms %p; gb %p; gn %p; bASK %s; bASK %s",
405 (void *)restp, (void *)atoms, (void *)gb, (void *)gn,
406 bool_names[bASK], bool_names[bVerb]);
409 t_aa_names *get_aa_names(void)
411 /* Read the database in aminoacids.dat */
412 t_aa_names *aan;
414 snew(aan,1);
416 aan->n = get_strings("aminoacids.dat",&(aan->aa));
417 /* qsort(aan->aa,aan->n,sizeof(aan->aa[0]),strcmp);*/
419 return aan;
422 bool is_protein(t_aa_names *aan,char *resnm)
424 /* gives true if resnm occurs in aminoacids.dat */
425 int i;
427 for(i=0; (i<aan->n); i++)
428 if (strcasecmp(aan->aa[i],resnm) == 0)
429 return TRUE;
431 return FALSE;
434 void done_aa_names(t_aa_names **aan)
436 /* Free memory */
437 int i;
439 for(i=0; (i<(*aan)->n); i++)
440 sfree((*aan)->aa[i]);
441 sfree((*aan)->aa);
442 sfree(*aan);
443 *aan = NULL;
446 void analyse(t_atoms *atoms,t_block *gb,char ***gn,bool bASK,bool bVerb)
448 t_aa_names *aan;
449 eRestp *restp;
450 char *resnm;
451 atom_id *aid;
452 int nra;
453 int i,j;
455 if (bVerb)
456 printf("Analysing residue names:\n");
457 snew(restp,atoms->nres);
458 aid=mk_aid(atoms,restp,etOther,&nra,TRUE);
459 add_grp(gb,gn,nra,aid,"System");
460 sfree(aid);
462 aan = get_aa_names();
463 for(i=0; (i<atoms->nres); i++) {
464 resnm=*atoms->resname[i];
465 if ((restp[i] == etOther) && is_protein(aan,resnm))
466 restp[i] = etProt;
467 if (restp[i] == etOther)
468 for(j=0; (j<NDNA); j++) {
469 if (strcasecmp(Sugars[j],resnm) == 0)
470 restp[i] = etDNA;
473 p_status(atoms->nres,restp,bVerb);
474 done_aa_names(&aan);
476 /* Protein */
477 aid=mk_aid(atoms,restp,etProt,&nra,TRUE);
478 if (nra > 0)
479 analyse_prot(restp,atoms,gb,gn,bASK,bVerb);
481 sfree(aid);
483 /* Non-Protein */
484 aid=mk_aid(atoms,restp,etProt,&nra,FALSE);
485 if ((nra > 0) && (nra < atoms->nr))
486 add_grp(gb,gn,nra,aid,"Non-Protein");
487 sfree(aid);
489 /* DNA */
490 aid=mk_aid(atoms,restp,etDNA,&nra,TRUE);
491 if (nra > 0) {
492 add_grp(gb,gn,nra,aid,"DNA");
493 analyse_dna(restp,atoms,gb,gn,bASK,bVerb);
495 sfree(aid);
497 /* Other */
498 analyse_other(restp,atoms,gb,gn,bASK,bVerb);
499 aid=mk_aid(atoms,restp,etOther,&nra,TRUE);
500 if ((nra > 0) && (nra < atoms->nr))
501 add_grp(gb,gn,nra,aid,"Other");
502 sfree(aid);
503 sfree(restp);
506 void check_index(char *gname,int n,atom_id index[],char *traj,int natoms)
508 int i;
510 for(i=0; i<n; i++)
511 if (index[i] >= natoms)
512 fatal_error(0,"%s atom number (index[%d]=%d) is larger than the number of atoms in %s (%d)",
513 gname ? gname : "Index",i+1, index[i]+1,
514 traj ? traj : "the trajectory",natoms);
517 t_block *init_index(char *gfile, char ***grpname)
519 FILE *in;
520 t_block *b;
521 int a,maxentries;
522 int i,j,ng;
523 char line[STRLEN],*pt,str[STRLEN];
525 in=ffopen(gfile,"r");
526 snew(b,1);
527 get_a_line(in,line,STRLEN);
528 if ( line[0]=='[' ) {
529 /* new format */
530 b->nr=0;
531 b->index=NULL;
532 b->nra=0;
533 b->a=NULL;
534 *grpname=NULL;
535 maxentries=0;
536 do {
537 if (get_header(line,str)) {
538 b->nr++;
539 srenew(b->index,b->nr+1);
540 srenew(*grpname,b->nr);
541 if (b->nr==1)
542 b->index[0]=0;
543 b->index[b->nr]=b->index[b->nr-1];
544 (*grpname)[b->nr-1]=strdup(str);
545 } else {
546 pt=line;
547 while ((i=sscanf(pt,"%s",str)) == 1) {
548 i=b->index[b->nr];
549 if (i>=maxentries) {
550 maxentries+=100;
551 srenew(b->a,maxentries);
553 b->a[i]=atoi(str)-1;
554 b->index[b->nr]++;
555 (b->nra)++;
556 pt=strstr(pt,str)+strlen(str);
559 } while (get_a_line(in,line,STRLEN));
561 else {
562 /* old format */
563 sscanf(line,"%d%d",&b->nr,&b->nra);
564 snew(b->index,b->nr+1);
565 snew(*grpname,b->nr);
566 b->index[0]=0;
567 snew(b->a,b->nra);
568 for (i=0; (i<b->nr); i++) {
569 fscanf(in,"%s%d",str,&ng);
570 (*grpname)[i]=strdup(str);
571 b->index[i+1]=b->index[i]+ng;
572 if (b->index[i+1] > b->nra)
573 fatal_error(0,"Something wrong in your indexfile at group %s",str);
574 for(j=0; (j<ng); j++) {
575 fscanf(in,"%d",&a);
576 b->a[b->index[i]+j]=a;
580 ffclose(in);
582 return b;
585 static void minstring(char *str)
587 int i;
589 for (i=0; (i < (int)strlen(str)); i++)
590 if (str[i]=='-')
591 str[i]='_';
594 int find_group(char s[], int ngrps, char **grpname)
596 int aa, i, n;
597 char string[STRLEN];
598 bool bMultiple;
600 bMultiple = FALSE;
601 n = strlen(s);
602 aa=NOTSET;
603 /* first look for whole name match */
604 if (aa==NOTSET)
605 for(i=0; i<ngrps; i++)
606 if (strcasecmp_min(s,grpname[i])==0) {
607 if(aa!=NOTSET)
608 bMultiple = TRUE;
609 aa=i;
611 /* second look for first string match */
612 if (aa==NOTSET)
613 for(i=0; i<ngrps; i++)
614 if (strncasecmp_min(s,grpname[i],n)==0) {
615 if(aa!=NOTSET)
616 bMultiple = TRUE;
617 aa=i;
619 /* last look for arbitrary substring match */
620 if (aa==NOTSET) {
621 upstring(s);
622 minstring(s);
623 for(i=0; i<ngrps; i++) {
624 strcpy(string, grpname[i]);
625 upstring(string);
626 minstring(string);
627 if (strstr(s,string)!=NULL) {
628 if(aa!=NOTSET)
629 bMultiple = TRUE;
630 aa=i;
634 if (bMultiple) {
635 printf("Error: Multiple groups '%s' selected\n", s);
636 aa=NOTSET;
638 return aa;
641 static int qgroup(int *a, int ngrps, char **grpname)
643 char s[STRLEN];
644 int aa;
645 bool bInRange;
647 do {
648 fprintf(stderr,"Select a group: ");
649 do {
650 if ( scanf("%s",s)!=1 )
651 fatal_error(0,"Cannot read from input");
652 trim(s); /* remove spaces */
653 } while (strlen(s)==0);
654 aa = atoi(s);
655 if (aa==0 && strcmp(s,"0")!=0 ) /* string entered */
656 aa = find_group(s, ngrps, grpname);
657 bInRange = aa>=0 && aa<ngrps;
658 if (!bInRange)
659 printf("Error: No such group '%s'\n", s);
660 } while (!bInRange);
661 printf("Selected %d: '%s'\n", aa, grpname[aa]);
662 *a = aa;
663 return aa;
666 static void rd_groups(t_block *grps,char **grpname,char *gnames[],
667 int ngrps,int isize[],atom_id *index[],int grpnr[])
669 int i,j,gnr1;
671 if (grps->nr==0)
672 fatal_error(0,"Error: no groups in indexfile");
673 for(i=0; (i<grps->nr); i++)
674 fprintf(stderr,"Group %5d (%12s) has %5d elements\n",i,grpname[i],
675 grps->index[i+1]-grps->index[i]);
676 for(i=0; (i<ngrps); i++) {
677 if (grps->nr > 1)
678 do {
679 gnr1=qgroup(&grpnr[i], grps->nr, grpname);
680 if ((gnr1<0) || (gnr1>=grps->nr))
681 fprintf(stderr,"Select between %d and %d.\n",0,grps->nr-1);
682 } while ((gnr1<0) || (gnr1>=grps->nr));
683 else {
684 fprintf(stderr,"There is one group in the index\n");
685 gnr1=0;
687 gnames[i]=strdup(grpname[gnr1]);
688 isize[i]=grps->index[gnr1+1]-grps->index[gnr1];
689 snew(index[i],isize[i]);
690 for(j=0; (j<isize[i]); j++)
691 index[i][j]=grps->a[grps->index[gnr1]+j];
695 void rd_index(char *statfile,int ngrps,int isize[],
696 atom_id *index[],char *grpnames[])
698 char **gnames;
699 t_block *grps;
700 int *grpnr;
702 snew(grpnr,ngrps);
703 if (!statfile)
704 fatal_error(0,"No index file specified");
705 grps=init_index(statfile,&gnames);
706 rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
709 void rd_index_nrs(char *statfile,int ngrps,int isize[],
710 atom_id *index[],char *grpnames[],int grpnr[])
712 char **gnames;
713 t_block *grps;
715 if (!statfile)
716 fatal_error(0,"No index file specified");
717 grps=init_index(statfile,&gnames);
719 rd_groups(grps,gnames,grpnames,ngrps,isize,index,grpnr);
722 void get_index(t_atoms *atoms, char *fnm, int ngrps,
723 int isize[], atom_id *index[],char *grpnames[])
725 char ***gnames;
726 t_block *grps;
727 int *grpnr;
729 snew(grpnr,ngrps);
730 snew(gnames,1);
731 if (fnm != NULL) {
732 grps=init_index(fnm,gnames);
734 else {
735 snew(grps,1);
736 snew(grps->index,1);
737 analyse(atoms,grps,gnames,FALSE,FALSE);
739 rd_groups(grps,*gnames,grpnames,ngrps,isize,index,grpnr);