A freeze group can now be allowed to move rigidly in some dimension by using "freezed...
[gromacs/rigid-bodies.git] / src / tools / gmx_cluster.c
blobc7652ef6a8f47abf5b90ad1e71931522366ad566
1 /*
2 *
3 * This source code is part of
4 *
5 * G R O M A C S
6 *
7 * GROningen MAchine for Chemical Simulations
8 *
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Green Red Orange Magenta Azure Cyan Skyblue
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38 #include <math.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include "macros.h"
42 #include "smalloc.h"
43 #include "typedefs.h"
44 #include "copyrite.h"
45 #include "statutil.h"
46 #include "tpxio.h"
47 #include "string2.h"
48 #include "vec.h"
49 #include "macros.h"
50 #include "index.h"
51 #include "random.h"
52 #include "pbc.h"
53 #include "rmpbc.h"
54 #include "xvgr.h"
55 #include "futil.h"
56 #include "matio.h"
57 #include "eigensolver.h"
58 #include "cmat.h"
59 #include "do_fit.h"
60 #include "trnio.h"
61 #include "viewit.h"
62 #include "gmx_ana.h"
64 /* macro's to print to two file pointers at once (i.e. stderr and log) */
65 #define lo_ffprintf(fp1,fp2,buf) \
66 fprintf(fp1,"%s",buf);\
67 fprintf(fp2,"%s",buf);
68 /* just print a prepared buffer to fp1 and fp2 */
69 #define ffprintf(fp1,fp2,buf) { lo_ffprintf(fp1,fp2,buf) }
70 /* prepare buffer with one argument, then print to fp1 and fp2 */
71 #define ffprintf1(fp1,fp2,buf,fmt,arg) {\
72 sprintf(buf,fmt,arg);\
73 lo_ffprintf(fp1,fp2,buf)\
75 /* prepare buffer with two arguments, then print to fp1 and fp2 */
76 #define ffprintf2(fp1,fp2,buf,fmt,arg1,arg2) {\
77 sprintf(buf,fmt,arg1,arg2);\
78 lo_ffprintf(fp1,fp2,buf)\
81 typedef struct {
82 int ncl;
83 int *cl;
84 } t_clusters;
86 typedef struct {
87 int nr;
88 int *nb;
89 } t_nnb;
91 void pr_energy(FILE *fp,real e)
93 fprintf(fp,"Energy: %8.4f\n",e);
96 void cp_index(int nn,int from[],int to[])
98 int i;
100 for(i=0; (i<nn); i++)
101 to[i]=from[i];
104 void mc_optimize(FILE *log,t_mat *m,int maxiter,int *seed,real kT)
106 real e[2],ei,ej,efac;
107 int *low_index;
108 int cur=0;
109 #define next (1-cur)
110 int i,isw,jsw,iisw,jjsw,nn;
112 fprintf(stderr,"\nDoing Monte Carlo clustering\n");
113 nn = m->nn;
114 snew(low_index,nn);
115 cp_index(nn,m->m_ind,low_index);
116 if (getenv("TESTMC")) {
117 e[cur] = mat_energy(m);
118 pr_energy(log,e[cur]);
119 fprintf(log,"Doing 1000 random swaps\n");
120 for(i=0; (i<1000); i++) {
121 do {
122 isw = nn*rando(seed);
123 jsw = nn*rando(seed);
124 } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
125 iisw = m->m_ind[isw];
126 jjsw = m->m_ind[jsw];
127 m->m_ind[isw] = jjsw;
128 m->m_ind[jsw] = iisw;
131 e[cur] = mat_energy(m);
132 pr_energy(log,e[cur]);
133 for(i=0; (i<maxiter); i++) {
134 do {
135 isw = nn*rando(seed);
136 jsw = nn*rando(seed);
137 } while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
139 iisw = m->m_ind[isw];
140 jjsw = m->m_ind[jsw];
141 ei = row_energy(nn,iisw,m->mat[jsw]);
142 ej = row_energy(nn,jjsw,m->mat[isw]);
144 e[next] = e[cur] + (ei+ej-EROW(m,isw)-EROW(m,jsw))/nn;
146 efac = kT ? exp((e[next]-e[cur])/kT) : -1;
147 if ((e[next] > e[cur]) || (efac > rando(seed))) {
149 if (e[next] > e[cur])
150 cp_index(nn,m->m_ind,low_index);
151 else
152 fprintf(log,"Taking uphill step\n");
154 /* Now swapping rows */
155 m->m_ind[isw] = jjsw;
156 m->m_ind[jsw] = iisw;
157 EROW(m,isw) = ei;
158 EROW(m,jsw) = ej;
159 cur = next;
160 fprintf(log,"Iter: %d Swapped %4d and %4d (now %g)",
161 i,isw,jsw,mat_energy(m));
162 pr_energy(log,e[cur]);
165 /* Now restore the highest energy index */
166 cp_index(nn,low_index,m->m_ind);
169 static void calc_dist(int nind,rvec x[],real **d)
171 int i,j;
172 real *xi;
173 rvec dx;
175 for(i=0; (i<nind-1); i++) {
176 xi=x[i];
177 for(j=i+1; (j<nind); j++) {
178 /* Should use pbc_dx when analysing multiple molecueles,
179 * but the box is not stored for every frame.
181 rvec_sub(xi,x[j],dx);
182 d[i][j]=norm(dx);
187 static real rms_dist(int isize,real **d,real **d_r)
189 int i,j;
190 real r,r2;
192 r2=0.0;
193 for(i=0; (i<isize-1); i++)
194 for(j=i+1; (j<isize); j++) {
195 r=d[i][j]-d_r[i][j];
196 r2+=r*r;
198 r2/=(isize*(isize-1))/2;
200 return sqrt(r2);
203 static int rms_dist_comp(const void *a,const void *b)
205 t_dist *da,*db;
207 da = (t_dist *)a;
208 db = (t_dist *)b;
210 if (da->dist - db->dist < 0)
211 return -1;
212 else if (da->dist - db->dist > 0)
213 return 1;
214 return 0;
217 static int clust_id_comp(const void *a,const void *b)
219 t_clustid *da,*db;
221 da = (t_clustid *)a;
222 db = (t_clustid *)b;
224 return da->clust - db->clust;
227 static int nrnb_comp(const void *a, const void *b)
229 t_nnb *da, *db;
231 da = (t_nnb *)a;
232 db = (t_nnb *)b;
234 /* return the b-a, we want highest first */
235 return db->nr - da->nr;
238 void gather(t_mat *m,real cutoff,t_clusters *clust)
240 t_clustid *c;
241 t_dist *d;
242 int i,j,k,nn,cid,n1,diff;
243 gmx_bool bChange;
245 /* First we sort the entries in the RMSD matrix */
246 n1 = m->nn;
247 nn = ((n1-1)*n1)/2;
248 snew(d,nn);
249 for(i=k=0; (i<n1); i++)
250 for(j=i+1; (j<n1); j++,k++) {
251 d[k].i = i;
252 d[k].j = j;
253 d[k].dist = m->mat[i][j];
255 if (k != nn)
256 gmx_incons("gather algortihm");
257 qsort(d,nn,sizeof(d[0]),rms_dist_comp);
259 /* Now we make a cluster index for all of the conformations */
260 c = new_clustid(n1);
262 /* Now we check the closest structures, and equalize their cluster numbers */
263 fprintf(stderr,"Linking structures ");
264 do {
265 fprintf(stderr,"*");
266 bChange=FALSE;
267 for(k=0; (k<nn) && (d[k].dist < cutoff); k++) {
268 diff = c[d[k].j].clust - c[d[k].i].clust;
269 if (diff) {
270 bChange = TRUE;
271 if (diff > 0)
272 c[d[k].j].clust = c[d[k].i].clust;
273 else
274 c[d[k].i].clust = c[d[k].j].clust;
277 } while (bChange);
278 fprintf(stderr,"\nSorting and renumbering clusters\n");
279 /* Sort on cluster number */
280 qsort(c,n1,sizeof(c[0]),clust_id_comp);
282 /* Renumber clusters */
283 cid = 1;
284 for(k=1; k<n1; k++) {
285 if (c[k].clust != c[k-1].clust) {
286 c[k-1].clust = cid;
287 cid ++;
288 } else
289 c[k-1].clust = cid;
291 c[k-1].clust = cid;
292 if (debug)
293 for(k=0; (k<n1); k++)
294 fprintf(debug,"Cluster index for conformation %d: %d\n",
295 c[k].conf,c[k].clust);
296 clust->ncl = cid;
297 for(k=0; k<n1; k++)
298 clust->cl[c[k].conf] = c[k].clust;
300 sfree(c);
301 sfree(d);
304 gmx_bool jp_same(int **nnb,int i,int j,int P)
306 gmx_bool bIn;
307 int k,ii,jj,pp;
309 bIn = FALSE;
310 for(k=0; nnb[i][k]>=0; k++)
311 bIn = bIn || (nnb[i][k] == j);
312 if (!bIn)
313 return FALSE;
315 bIn = FALSE;
316 for(k=0; nnb[j][k]>=0; k++)
317 bIn = bIn || (nnb[j][k] == i);
318 if (!bIn)
319 return FALSE;
321 pp=0;
322 for(ii=0; nnb[i][ii]>=0; ii++)
323 for(jj=0; nnb[j][jj]>=0; jj++)
324 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
325 pp++;
327 return (pp >= P);
330 static void jarvis_patrick(int n1,real **mat,int M,int P,
331 real rmsdcut,t_clusters *clust)
333 t_dist *row;
334 t_clustid *c;
335 int **nnb;
336 int i,j,k,cid,diff,max;
337 gmx_bool bChange;
338 real **mcpy=NULL;
340 if (rmsdcut < 0)
341 rmsdcut = 10000;
343 /* First we sort the entries in the RMSD matrix row by row.
344 * This gives us the nearest neighbor list.
346 snew(nnb,n1);
347 snew(row,n1);
348 for(i=0; (i<n1); i++) {
349 for(j=0; (j<n1); j++) {
350 row[j].j = j;
351 row[j].dist = mat[i][j];
353 qsort(row,n1,sizeof(row[0]),rms_dist_comp);
354 if (M>0) {
355 /* Put the M nearest neighbors in the list */
356 snew(nnb[i],M+1);
357 for(j=k=0; (k<M) && (j<n1) && (mat[i][row[j].j] < rmsdcut); j++)
358 if (row[j].j != i) {
359 nnb[i][k] = row[j].j;
360 k++;
362 nnb[i][k] = -1;
363 } else {
364 /* Put all neighbors nearer than rmsdcut in the list */
365 max=0;
366 k=0;
367 for(j=0; (j<n1) && (mat[i][row[j].j] < rmsdcut); j++)
368 if (row[j].j != i) {
369 if (k >= max) {
370 max += 10;
371 srenew(nnb[i],max);
373 nnb[i][k] = row[j].j;
374 k++;
376 if (k == max)
377 srenew(nnb[i],max+1);
378 nnb[i][k] = -1;
381 sfree(row);
382 if (debug) {
383 fprintf(debug,"Nearest neighborlist. M = %d, P = %d\n",M,P);
384 for(i=0; (i<n1); i++) {
385 fprintf(debug,"i:%5d nbs:",i);
386 for(j=0; nnb[i][j]>=0; j++)
387 fprintf(debug,"%5d[%5.3f]",nnb[i][j],mat[i][nnb[i][j]]);
388 fprintf(debug,"\n");
392 c = new_clustid(n1);
393 fprintf(stderr,"Linking structures ");
394 /* Use mcpy for temporary storage of booleans */
395 mcpy = mk_matrix(n1,n1,FALSE);
396 for(i=0; i<n1; i++)
397 for(j=i+1; j<n1; j++)
398 mcpy[i][j] = jp_same(nnb,i,j,P);
399 do {
400 fprintf(stderr,"*");
401 bChange=FALSE;
402 for(i=0; i<n1; i++) {
403 for(j=i+1; j<n1; j++)
404 if (mcpy[i][j]) {
405 diff = c[j].clust - c[i].clust;
406 if (diff) {
407 bChange = TRUE;
408 if (diff > 0)
409 c[j].clust = c[i].clust;
410 else
411 c[i].clust = c[j].clust;
415 } while (bChange);
417 fprintf(stderr,"\nSorting and renumbering clusters\n");
418 /* Sort on cluster number */
419 qsort(c,n1,sizeof(c[0]),clust_id_comp);
421 /* Renumber clusters */
422 cid = 1;
423 for(k=1; k<n1; k++) {
424 if (c[k].clust != c[k-1].clust) {
425 c[k-1].clust = cid;
426 cid ++;
427 } else
428 c[k-1].clust = cid;
430 c[k-1].clust = cid;
431 clust->ncl = cid;
432 for(k=0; k<n1; k++)
433 clust->cl[c[k].conf] = c[k].clust;
434 if (debug)
435 for(k=0; (k<n1); k++)
436 fprintf(debug,"Cluster index for conformation %d: %d\n",
437 c[k].conf,c[k].clust);
439 /* Again, I don't see the point in this... (AF) */
440 /* for(i=0; (i<n1); i++) { */
441 /* for(j=0; (j<n1); j++) */
442 /* mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
443 /* } */
444 /* for(i=0; (i<n1); i++) { */
445 /* for(j=0; (j<n1); j++) */
446 /* mat[i][j] = mcpy[i][j]; */
447 /* } */
448 done_matrix(n1,&mcpy);
450 sfree(c);
451 for(i=0; (i<n1); i++)
452 sfree(nnb[i]);
453 sfree(nnb);
456 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
458 int i,j;
460 /* dump neighbor list */
461 fprintf(fp,"%s",title);
462 for(i=0; (i<n1); i++) {
463 fprintf(fp,"i:%5d #:%5d nbs:",i,nnb[i].nr);
464 for(j=0; j<nnb[i].nr; j++)
465 fprintf(fp,"%5d",nnb[i].nb[j]);
466 fprintf(fp,"\n");
470 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
472 t_dist *row;
473 t_nnb *nnb;
474 int i,j,k,j1,max;
476 /* Put all neighbors nearer than rmsdcut in the list */
477 fprintf(stderr,"Making list of neighbors within cutoff ");
478 snew(nnb,n1);
479 snew(row,n1);
480 for(i=0; (i<n1); i++) {
481 max=0;
482 k=0;
483 /* put all neighbors within cut-off in list */
484 for(j=0; j<n1; j++)
485 if (mat[i][j] < rmsdcut) {
486 if (k >= max) {
487 max += 10;
488 srenew(nnb[i].nb,max);
490 nnb[i].nb[k] = j;
491 k++;
493 /* store nr of neighbors, we'll need that */
494 nnb[i].nr = k;
495 if (i%(1+n1/100)==0) fprintf(stderr,"%3d%%\b\b\b\b",(i*100+1)/n1);
497 fprintf(stderr,"%3d%%\n",100);
498 sfree(row);
500 /* sort neighbor list on number of neighbors, largest first */
501 qsort(nnb,n1,sizeof(nnb[0]),nrnb_comp);
503 if (debug) dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
505 /* turn first structure with all its neighbors (largest) into cluster
506 remove them from pool of structures and repeat for all remaining */
507 fprintf(stderr,"Finding clusters %4d", 0);
508 /* cluster id's start at 1: */
509 k=1;
510 while(nnb[0].nr) {
511 /* set cluster id (k) for first item in neighborlist */
512 for (j=0; j<nnb[0].nr; j++)
513 clust->cl[nnb[0].nb[j]] = k;
514 /* mark as done */
515 nnb[0].nr=0;
516 sfree(nnb[0].nb);
518 /* adjust number of neighbors for others, taking removals into account: */
519 for(i=1; i<n1 && nnb[i].nr; i++) {
520 j1=0;
521 for(j=0; j<nnb[i].nr; j++)
522 /* if this neighbor wasn't removed */
523 if ( clust->cl[nnb[i].nb[j]] == 0 ) {
524 /* shift the rest (j1<=j) */
525 nnb[i].nb[j1]=nnb[i].nb[j];
526 /* next */
527 j1++;
529 /* now j1 is the new number of neighbors */
530 nnb[i].nr=j1;
532 /* sort again on nnb[].nr, because we have new # neighbors: */
533 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
534 qsort(nnb,i,sizeof(nnb[0]),nrnb_comp);
536 fprintf(stderr,"\b\b\b\b%4d",k);
537 /* new cluster id */
538 k++;
540 fprintf(stderr,"\n");
541 sfree(nnb);
542 if (debug) {
543 fprintf(debug,"Clusters (%d):\n", k);
544 for(i=0; i<n1; i++)
545 fprintf(debug," %3d", clust->cl[i]);
546 fprintf(debug,"\n");
549 clust->ncl=k-1;
552 rvec **read_whole_trj(const char *fn,int isize,atom_id index[],int skip,
553 int *nframe, real **time,const output_env_t oenv,gmx_bool bPBC, gmx_rmpbc_t gpbc)
555 rvec **xx,*x;
556 matrix box;
557 real t;
558 int i,i0,j,max_nf;
559 int natom;
560 t_trxstatus *status;
563 max_nf = 0;
564 xx = NULL;
565 *time = NULL;
566 natom = read_first_x(oenv,&status,fn,&t,&x,box);
567 i = 0;
568 i0 = 0;
569 do {
570 if (bPBC) {
571 gmx_rmpbc(gpbc,natom,box,x);
573 if (i0 >= max_nf) {
574 max_nf += 10;
575 srenew(xx,max_nf);
576 srenew(*time,max_nf);
578 if ((i % skip) == 0) {
579 snew(xx[i0],isize);
580 /* Store only the interesting atoms */
581 for(j=0; (j<isize); j++)
582 copy_rvec(x[index[j]],xx[i0][j]);
583 (*time)[i0] = t;
584 i0 ++;
586 i++;
587 } while (read_next_x(oenv,status,&t,natom,x,box));
588 fprintf(stderr,"Allocated %lu bytes for frames\n",
589 (unsigned long) (max_nf*isize*sizeof(**xx)));
590 fprintf(stderr,"Read %d frames from trajectory %s\n",i0,fn);
591 *nframe = i0;
592 sfree(x);
594 return xx;
597 static int plot_clusters(int nf, real **mat, t_clusters *clust,
598 int nlevels, int minstruct)
600 int i,j,ncluster,ci;
601 int *cl_id,*nstruct,*strind;
603 snew(cl_id,nf);
604 snew(nstruct,nf);
605 snew(strind,nf);
606 for(i=0; i<nf; i++) {
607 strind[i] = 0;
608 cl_id[i] = clust->cl[i];
609 nstruct[cl_id[i]]++;
611 ncluster = 0;
612 for(i=0; i<nf; i++) {
613 if (nstruct[i] >= minstruct) {
614 ncluster++;
615 for(j=0; (j<nf); j++)
616 if (cl_id[j] == i)
617 strind[j] = ncluster;
620 ncluster++;
621 fprintf(stderr,"There are %d clusters with at least %d conformations\n",
622 ncluster,minstruct);
624 for(i=0; (i<nf); i++) {
625 ci = cl_id[i];
626 for(j=0; j<i; j++)
627 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct)) {
628 /* color different clusters with different colors, as long as
629 we don't run out of colors */
630 mat[i][j] = strind[i];
632 else
633 mat[i][j] = 0;
635 sfree(strind);
636 sfree(nstruct);
637 sfree(cl_id);
639 return ncluster;
642 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
644 int i,j,v;
646 for(i=0; i<nf; i++)
647 for(j=0; j<i; j++)
648 if (clust->cl[i] == clust->cl[j])
649 mat[i][j] = val;
650 else
651 mat[i][j] = 0;
654 static char *parse_filename(const char *fn, int maxnr)
656 int i;
657 char *fnout;
658 const char *ext;
659 char buf[STRLEN];
661 if (strchr(fn,'%'))
662 gmx_fatal(FARGS,"will not number filename %s containing '%c'",fn,'%');
663 /* number of digits needed in numbering */
664 i = (int)(log(maxnr)/log(10)) + 1;
665 /* split fn and ext */
666 ext = strrchr(fn, '.');
667 if (!ext)
668 gmx_fatal(FARGS,"cannot separate extension in filename %s",fn);
669 ext++;
670 /* insert e.g. '%03d' between fn and ext */
671 sprintf(buf,"%s%%0%dd.%s",fn,i,ext);
672 snew(fnout,strlen(buf)+1);
673 strcpy(fnout, buf);
675 return fnout;
678 static void ana_trans(t_clusters *clust, int nf,
679 const char *transfn, const char *ntransfn, FILE *log,
680 t_rgb rlo,t_rgb rhi,const output_env_t oenv)
682 FILE *fp;
683 real **trans,*axis;
684 int *ntrans;
685 int i,ntranst,maxtrans;
686 char buf[STRLEN];
688 snew(ntrans,clust->ncl);
689 snew(trans,clust->ncl);
690 snew(axis,clust->ncl);
691 for(i=0; i<clust->ncl; i++) {
692 axis[i]=i+1;
693 snew(trans[i],clust->ncl);
695 ntranst=0;
696 maxtrans=0;
697 for(i=1; i<nf; i++)
698 if(clust->cl[i] != clust->cl[i-1]) {
699 ntranst++;
700 ntrans[clust->cl[i-1]-1]++;
701 ntrans[clust->cl[i]-1]++;
702 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
703 maxtrans = max(maxtrans, trans[clust->cl[i]-1][clust->cl[i-1]-1]);
705 ffprintf2(stderr,log,buf,"Counted %d transitions in total, "
706 "max %d between two specific clusters\n",ntranst,maxtrans);
707 if (transfn) {
708 fp=ffopen(transfn,"w");
709 i = min(maxtrans+1, 80);
710 write_xpm(fp,0,"Cluster Transitions","# transitions",
711 "from cluster","to cluster",
712 clust->ncl, clust->ncl, axis, axis, trans,
713 0, maxtrans, rlo, rhi, &i);
714 ffclose(fp);
716 if (ntransfn) {
717 fp=xvgropen(ntransfn,"Cluster Transitions","Cluster #","# transitions",
718 oenv);
719 for(i=0; i<clust->ncl; i++)
720 fprintf(fp,"%5d %5d\n",i+1,ntrans[i]);
721 ffclose(fp);
723 sfree(ntrans);
724 for(i=0; i<clust->ncl; i++)
725 sfree(trans[i]);
726 sfree(trans);
727 sfree(axis);
730 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
731 int natom, t_atoms *atoms, rvec *xtps,
732 real *mass, rvec **xx, real *time,
733 int ifsize, atom_id *fitidx,
734 int iosize, atom_id *outidx,
735 const char *trxfn, const char *sizefn,
736 const char *transfn, const char *ntransfn,
737 const char *clustidfn, gmx_bool bAverage,
738 int write_ncl, int write_nst, real rmsmin,
739 gmx_bool bFit, FILE *log,t_rgb rlo,t_rgb rhi,
740 const output_env_t oenv)
742 FILE *fp=NULL;
743 char buf[STRLEN],buf1[40],buf2[40],buf3[40],*trxsfn;
744 t_trxstatus *trxout=NULL;
745 t_trxstatus *trxsout=NULL;
746 int i,i1,cl,nstr,*structure,first=0,midstr;
747 gmx_bool *bWrite=NULL;
748 real r,clrmsd,midrmsd;
749 rvec *xav=NULL;
750 matrix zerobox;
752 clear_mat(zerobox);
754 ffprintf1(stderr,log,buf,"\nFound %d clusters\n\n",clust->ncl);
755 trxsfn=NULL;
756 if (trxfn) {
757 /* do we write all structures? */
758 if (write_ncl) {
759 trxsfn = parse_filename(trxfn, max(write_ncl,clust->ncl));
760 snew(bWrite,nf);
762 ffprintf2(stderr,log,buf,"Writing %s structure for each cluster to %s\n",
763 bAverage ? "average" : "middle", trxfn);
764 if (write_ncl) {
765 /* find out what we want to tell the user:
766 Writing [all structures|structures with rmsd > %g] for
767 {all|first %d} clusters {with more than %d structures} to %s */
768 if (rmsmin>0.0)
769 sprintf(buf1,"structures with rmsd > %g",rmsmin);
770 else
771 sprintf(buf1,"all structures");
772 buf2[0]=buf3[0]='\0';
773 if (write_ncl>=clust->ncl) {
774 if (write_nst==0)
775 sprintf(buf2,"all ");
776 } else
777 sprintf(buf2,"the first %d ",write_ncl);
778 if (write_nst)
779 sprintf(buf3," with more than %d structures",write_nst);
780 sprintf(buf,"Writing %s for %sclusters%s to %s\n",buf1,buf2,buf3,trxsfn);
781 ffprintf(stderr,log,buf);
784 /* Prepare a reference structure for the orientation of the clusters */
785 if (bFit)
786 reset_x(ifsize,fitidx,natom,NULL,xtps,mass);
787 trxout = open_trx(trxfn,"w");
788 /* Calculate the average structure in each cluster, *
789 * all structures are fitted to the first struture of the cluster */
790 snew(xav,natom);
793 if (transfn || ntransfn)
794 ana_trans(clust, nf, transfn, ntransfn, log,rlo,rhi,oenv);
796 if (clustidfn) {
797 fp=xvgropen(clustidfn,"Clusters",output_env_get_xvgr_tlabel(oenv),"Cluster #",oenv);
798 fprintf(fp,"@ s0 symbol 2\n");
799 fprintf(fp,"@ s0 symbol size 0.2\n");
800 fprintf(fp,"@ s0 linestyle 0\n");
801 for(i=0; i<nf; i++)
802 fprintf(fp,"%8g %8d\n",time[i],clust->cl[i]);
803 ffclose(fp);
805 if (sizefn) {
806 fp=xvgropen(sizefn,"Cluster Sizes","Cluster #","# Structures",oenv);
807 fprintf(fp,"@g%d type %s\n",0,"bar");
809 snew(structure,nf);
810 fprintf(log,"\n%3s | %3s %4s | %6s %4s | cluster members\n",
811 "cl.","#st","rmsd","middle","rmsd");
812 for(cl=1; cl<=clust->ncl; cl++) {
813 /* prepare structures (fit, middle, average) */
814 if (xav)
815 for(i=0; i<natom;i++)
816 clear_rvec(xav[i]);
817 nstr=0;
818 for(i1=0; i1<nf; i1++)
819 if (clust->cl[i1] == cl) {
820 structure[nstr] = i1;
821 nstr++;
822 if (trxfn && (bAverage || write_ncl) ) {
823 if (bFit)
824 reset_x(ifsize,fitidx,natom,NULL,xx[i1],mass);
825 if (nstr == 1)
826 first = i1;
827 else if (bFit)
828 do_fit(natom,mass,xx[first],xx[i1]);
829 if (xav)
830 for(i=0; i<natom; i++)
831 rvec_inc(xav[i],xx[i1][i]);
834 if (sizefn)
835 fprintf(fp,"%8d %8d\n",cl,nstr);
836 clrmsd = 0;
837 midstr = 0;
838 midrmsd = 10000;
839 for(i1=0; i1<nstr; i1++) {
840 r = 0;
841 if (nstr > 1) {
842 for(i=0; i<nstr; i++)
843 if (i < i1)
844 r += rmsd[structure[i]][structure[i1]];
845 else
846 r += rmsd[structure[i1]][structure[i]];
847 r /= (nstr - 1);
849 if ( r < midrmsd ) {
850 midstr = structure[i1];
851 midrmsd = r;
853 clrmsd += r;
855 clrmsd /= nstr;
857 /* dump cluster info to logfile */
858 if (nstr > 1) {
859 sprintf(buf1,"%6.3f",clrmsd);
860 if (buf1[0] == '0')
861 buf1[0] = ' ';
862 sprintf(buf2,"%5.3f",midrmsd);
863 if (buf2[0] == '0')
864 buf2[0] = ' ';
865 } else {
866 sprintf(buf1,"%5s","");
867 sprintf(buf2,"%5s","");
869 fprintf(log,"%3d | %3d %s | %6g%s |",cl,nstr,buf1,time[midstr],buf2);
870 for(i=0; i<nstr; i++) {
871 if ((i % 7 == 0) && i)
872 sprintf(buf,"\n%3s | %3s %4s | %6s %4s |","","","","","");
873 else
874 buf[0] = '\0';
875 i1 = structure[i];
876 fprintf(log,"%s %6g",buf,time[i1]);
878 fprintf(log,"\n");
880 /* write structures to trajectory file(s) */
881 if (trxfn) {
882 if (write_ncl)
883 for(i=0; i<nstr; i++)
884 bWrite[i]=FALSE;
885 if ( cl < write_ncl+1 && nstr > write_nst ) {
886 /* Dump all structures for this cluster */
887 /* generate numbered filename (there is a %d in trxfn!) */
888 sprintf(buf,trxsfn,cl);
889 trxsout = open_trx(buf,"w");
890 for(i=0; i<nstr; i++) {
891 bWrite[i] = TRUE;
892 if (rmsmin>0.0)
893 for(i1=0; i1<i && bWrite[i]; i1++)
894 if (bWrite[i1])
895 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
896 if (bWrite[i])
897 write_trx(trxsout,iosize,outidx,atoms,i,time[structure[i]],zerobox,
898 xx[structure[i]],NULL,NULL);
900 close_trx(trxsout);
902 /* Dump the average structure for this cluster */
903 if (bAverage) {
904 for(i=0; i<natom; i++)
905 svmul(1.0/nstr,xav[i],xav[i]);
906 } else {
907 for(i=0; i<natom; i++)
908 copy_rvec(xx[midstr][i],xav[i]);
909 if (bFit)
910 reset_x(ifsize,fitidx,natom,NULL,xav,mass);
912 if (bFit)
913 do_fit(natom,mass,xtps,xav);
914 r = cl;
915 write_trx(trxout,iosize,outidx,atoms,cl,time[midstr],zerobox,xav,NULL,NULL);
918 /* clean up */
919 if (trxfn) {
920 close_trx(trxout);
921 sfree(xav);
922 if (write_ncl)
923 sfree(bWrite);
925 sfree(structure);
926 if (trxsfn)
927 sfree(trxsfn);
930 static void convert_mat(t_matrix *mat,t_mat *rms)
932 int i,j;
934 rms->n1 = mat->nx;
935 matrix2real(mat,rms->mat);
936 /* free input xpm matrix data */
937 for(i=0; i<mat->nx; i++)
938 sfree(mat->matrix[i]);
939 sfree(mat->matrix);
941 for(i=0; i<mat->nx; i++)
942 for(j=i; j<mat->nx; j++) {
943 rms->sumrms += rms->mat[i][j];
944 rms->maxrms = max(rms->maxrms, rms->mat[i][j]);
945 if (j!=i)
946 rms->minrms = min(rms->minrms, rms->mat[i][j]);
948 rms->nn = mat->nx;
951 int gmx_cluster(int argc,char *argv[])
953 const char *desc[] = {
954 "[TT]g_cluster[tt] can cluster structures using several different methods.",
955 "Distances between structures can be determined from a trajectory",
956 "or read from an [TT].xpm[tt] matrix file with the [TT]-dm[tt] option.",
957 "RMS deviation after fitting or RMS deviation of atom-pair distances",
958 "can be used to define the distance between structures.[PAR]",
960 "single linkage: add a structure to a cluster when its distance to any",
961 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
963 "Jarvis Patrick: add a structure to a cluster when this structure",
964 "and a structure in the cluster have each other as neighbors and",
965 "they have a least [TT]P[tt] neighbors in common. The neighbors",
966 "of a structure are the M closest structures or all structures within",
967 "[TT]cutoff[tt].[PAR]",
969 "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
971 "diagonalization: diagonalize the RMSD matrix.[PAR]",
973 "gromos: use algorithm as described in Daura [IT]et al.[it]",
974 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
975 "Count number of neighbors using cut-off, take structure with",
976 "largest number of neighbors with all its neighbors as cluster",
977 "and eliminate it from the pool of clusters. Repeat for remaining",
978 "structures in pool.[PAR]",
980 "When the clustering algorithm assigns each structure to exactly one",
981 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
982 "file is supplied, the structure with",
983 "the smallest average distance to the others or the average structure",
984 "or all structures for each cluster will be written to a trajectory",
985 "file. When writing all structures, separate numbered files are made",
986 "for each cluster.[PAR]",
988 "Two output files are always written:[BR]",
989 "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
990 "and a graphical depiction of the clusters in the lower right half",
991 "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
992 "when two structures are in the same cluster.",
993 "When [TT]-minstruct[tt] > 1 different colors will be used for each",
994 "cluster.[BR]",
995 "[TT]-g[tt] writes information on the options used and a detailed list",
996 "of all clusters and their members.[PAR]",
998 "Additionally, a number of optional output files can be written:[BR]",
999 "[TT]-dist[tt] writes the RMSD distribution.[BR]",
1000 "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1001 "diagonalization.[BR]",
1002 "[TT]-sz[tt] writes the cluster sizes.[BR]",
1003 "[TT]-tr[tt] writes a matrix of the number transitions between",
1004 "cluster pairs.[BR]",
1005 "[TT]-ntr[tt] writes the total number of transitions to or from",
1006 "each cluster.[BR]",
1007 "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1008 "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1009 "structure of each cluster or writes numbered files with cluster members",
1010 "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1011 "[TT]-nst[tt] and [TT]-rmsmin[tt]).[BR]",
1014 FILE *fp,*log;
1015 int i,i1,i2,j,nf,nrms;
1017 matrix box;
1018 rvec *xtps,*usextps,*x1,**xx=NULL;
1019 const char *fn,*trx_out_fn;
1020 t_clusters clust;
1021 t_mat *rms;
1022 real *eigval;
1023 t_topology top;
1024 int ePBC;
1025 t_atoms useatoms;
1026 t_matrix *readmat=NULL;
1027 real *tmp;
1029 int isize=0,ifsize=0,iosize=0;
1030 atom_id *index=NULL, *fitidx, *outidx;
1031 char *grpname;
1032 real rmsd,**d1,**d2,*time=NULL,time_invfac,*mass=NULL;
1033 char buf[STRLEN],buf1[80],title[STRLEN];
1034 gmx_bool bAnalyze,bUseRmsdCut,bJP_RMSD=FALSE,bReadMat,bReadTraj,bPBC=TRUE;
1036 int method,ncluster=0;
1037 static const char *methodname[] = {
1038 NULL, "linkage", "jarvis-patrick","monte-carlo",
1039 "diagonalization", "gromos", NULL
1041 enum { m_null, m_linkage, m_jarvis_patrick,
1042 m_monte_carlo, m_diagonalize, m_gromos, m_nr };
1043 /* Set colors for plotting: white = zero RMS, black = maximum */
1044 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1045 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1046 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1047 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1048 static int nlevels=40,skip=1;
1049 static real scalemax=-1.0,rmsdcut=0.1,rmsmin=0.0;
1050 gmx_bool bRMSdist=FALSE,bBinary=FALSE,bAverage=FALSE,bFit=TRUE;
1051 static int niter=10000,seed=1993,write_ncl=0,write_nst=1,minstruct=1;
1052 static real kT=1e-3;
1053 static int M=10,P=3;
1054 output_env_t oenv;
1055 gmx_rmpbc_t gpbc=NULL;
1057 t_pargs pa[] = {
1058 { "-dista", FALSE, etBOOL, {&bRMSdist},
1059 "Use RMSD of distances instead of RMS deviation" },
1060 { "-nlevels",FALSE,etINT, {&nlevels},
1061 "Discretize RMSD matrix in # levels" },
1062 { "-cutoff",FALSE, etREAL, {&rmsdcut},
1063 "RMSD cut-off (nm) for two structures to be neighbor" },
1064 { "-fit", FALSE, etBOOL, {&bFit},
1065 "Use least squares fitting before RMSD calculation" },
1066 { "-max", FALSE, etREAL, {&scalemax},
1067 "Maximum level in RMSD matrix" },
1068 { "-skip", FALSE, etINT, {&skip},
1069 "Only analyze every nr-th frame" },
1070 { "-av", FALSE, etBOOL, {&bAverage},
1071 "Write average iso middle structure for each cluster" },
1072 { "-wcl", FALSE, etINT, {&write_ncl},
1073 "Write all structures for first # clusters to numbered files" },
1074 { "-nst", FALSE, etINT, {&write_nst},
1075 "Only write all structures if more than # per cluster" },
1076 { "-rmsmin",FALSE, etREAL, {&rmsmin},
1077 "minimum rms difference with rest of cluster for writing structures" },
1078 { "-method",FALSE, etENUM, {methodname},
1079 "Method for cluster determination" },
1080 { "-minstruct", FALSE, etINT, {&minstruct},
1081 "Minimum number of structures in cluster for coloring in the [TT].xpm[tt] file" },
1082 { "-binary",FALSE, etBOOL, {&bBinary},
1083 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1084 "is given by [TT]-cutoff[tt]" },
1085 { "-M", FALSE, etINT, {&M},
1086 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1087 "0 is use cutoff" },
1088 { "-P", FALSE, etINT, {&P},
1089 "Number of identical nearest neighbors required to form a cluster" },
1090 { "-seed", FALSE, etINT, {&seed},
1091 "Random number seed for Monte Carlo clustering algorithm" },
1092 { "-niter", FALSE, etINT, {&niter},
1093 "Number of iterations for MC" },
1094 { "-kT", FALSE, etREAL, {&kT},
1095 "Boltzmann weighting factor for Monte Carlo optimization "
1096 "(zero turns off uphill steps)" },
1097 { "-pbc", FALSE, etBOOL,
1098 { &bPBC }, "PBC check" }
1100 t_filenm fnm[] = {
1101 { efTRX, "-f", NULL, ffOPTRD },
1102 { efTPS, "-s", NULL, ffOPTRD },
1103 { efNDX, NULL, NULL, ffOPTRD },
1104 { efXPM, "-dm", "rmsd", ffOPTRD },
1105 { efXPM, "-o", "rmsd-clust", ffWRITE },
1106 { efLOG, "-g", "cluster", ffWRITE },
1107 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1108 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1109 { efXVG, "-sz", "clust-size", ffOPTWR},
1110 { efXPM, "-tr", "clust-trans",ffOPTWR},
1111 { efXVG, "-ntr", "clust-trans",ffOPTWR},
1112 { efXVG, "-clid", "clust-id.xvg",ffOPTWR},
1113 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
1115 #define NFILE asize(fnm)
1117 CopyRight(stderr,argv[0]);
1118 parse_common_args(&argc,argv,
1119 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
1120 NFILE,fnm,asize(pa),pa,asize(desc),desc,0,NULL,
1121 &oenv);
1123 /* parse options */
1124 bReadMat = opt2bSet("-dm",NFILE,fnm);
1125 bReadTraj = opt2bSet("-f",NFILE,fnm) || !bReadMat;
1126 if ( opt2parg_bSet("-av",asize(pa),pa) ||
1127 opt2parg_bSet("-wcl",asize(pa),pa) ||
1128 opt2parg_bSet("-nst",asize(pa),pa) ||
1129 opt2parg_bSet("-rmsmin",asize(pa),pa) ||
1130 opt2bSet("-cl",NFILE,fnm) )
1131 trx_out_fn = opt2fn("-cl",NFILE,fnm);
1132 else
1133 trx_out_fn = NULL;
1134 if (bReadMat && output_env_get_time_factor(oenv)!=1) {
1135 fprintf(stderr,
1136 "\nWarning: assuming the time unit in %s is %s\n",
1137 opt2fn("-dm",NFILE,fnm),output_env_get_time_unit(oenv));
1139 if (trx_out_fn && !bReadTraj)
1140 fprintf(stderr,"\nWarning: "
1141 "cannot write cluster structures without reading trajectory\n"
1142 " ignoring option -cl %s\n", trx_out_fn);
1144 method=1;
1145 while ( method < m_nr && gmx_strcasecmp(methodname[0], methodname[method])!=0 )
1146 method++;
1147 if (method == m_nr)
1148 gmx_fatal(FARGS,"Invalid method");
1150 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1151 method == m_gromos );
1153 /* Open log file */
1154 log = ftp2FILE(efLOG,NFILE,fnm,"w");
1156 fprintf(stderr,"Using %s method for clustering\n",methodname[0]);
1157 fprintf(log,"Using %s method for clustering\n",methodname[0]);
1159 /* check input and write parameters to log file */
1160 bUseRmsdCut = FALSE;
1161 if (method == m_jarvis_patrick) {
1162 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff",asize(pa),pa);
1163 if ((M<0) || (M == 1))
1164 gmx_fatal(FARGS,"M (%d) must be 0 or larger than 1",M);
1165 if (M < 2) {
1166 sprintf(buf1,"Will use P=%d and RMSD cutoff (%g)",P,rmsdcut);
1167 bUseRmsdCut = TRUE;
1168 } else {
1169 if (P >= M)
1170 gmx_fatal(FARGS,"Number of neighbors required (P) must be less than M");
1171 if (bJP_RMSD) {
1172 sprintf(buf1,"Will use P=%d, M=%d and RMSD cutoff (%g)",P,M,rmsdcut);
1173 bUseRmsdCut = TRUE;
1174 } else
1175 sprintf(buf1,"Will use P=%d, M=%d",P,M);
1177 ffprintf1(stderr,log,buf,"%s for determining the neighbors\n\n",buf1);
1178 } else /* method != m_jarvis */
1179 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1180 if (bUseRmsdCut && method != m_jarvis_patrick)
1181 fprintf(log,"Using RMSD cutoff %g nm\n",rmsdcut);
1182 if ( method==m_monte_carlo )
1183 fprintf(log,"Using %d iterations\n",niter);
1185 if (skip < 1)
1186 gmx_fatal(FARGS,"skip (%d) should be >= 1",skip);
1188 /* get input */
1189 if (bReadTraj) {
1190 /* don't read mass-database as masses (and top) are not used */
1191 read_tps_conf(ftp2fn(efTPS,NFILE,fnm),buf,&top,&ePBC,&xtps,NULL,box,
1192 bAnalyze);
1193 if(bPBC) {
1194 gpbc = gmx_rmpbc_init(&top.idef,ePBC,top.atoms.nr,box);
1197 fprintf(stderr,"\nSelect group for least squares fit%s:\n",
1198 bReadMat?"":" and RMSD calculation");
1199 get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
1200 1,&ifsize,&fitidx,&grpname);
1201 if (trx_out_fn) {
1202 fprintf(stderr,"\nSelect group for output:\n");
1203 get_index(&(top.atoms),ftp2fn_null(efNDX,NFILE,fnm),
1204 1,&iosize,&outidx,&grpname);
1205 /* merge and convert both index groups: */
1206 /* first copy outidx to index. let outidx refer to elements in index */
1207 snew(index,iosize);
1208 isize = iosize;
1209 for(i=0; i<iosize; i++) {
1210 index[i]=outidx[i];
1211 outidx[i]=i;
1213 /* now lookup elements from fitidx in index, add them if necessary
1214 and also let fitidx refer to elements in index */
1215 for(i=0; i<ifsize; i++) {
1216 j=0;
1217 while (j<isize && index[j]!=fitidx[i])
1218 j++;
1219 if (j>=isize) {
1220 /* slow this way, but doesn't matter much */
1221 isize++;
1222 srenew(index,isize);
1224 index[j]=fitidx[i];
1225 fitidx[i]=j;
1227 } else { /* !trx_out_fn */
1228 isize = ifsize;
1229 snew(index, isize);
1230 for(i=0; i<ifsize; i++) {
1231 index[i]=fitidx[i];
1232 fitidx[i]=i;
1236 /* Initiate arrays */
1237 snew(d1,isize);
1238 snew(d2,isize);
1239 for(i=0; (i<isize); i++) {
1240 snew(d1[i],isize);
1241 snew(d2[i],isize);
1244 if (bReadTraj) {
1245 /* Loop over first coordinate file */
1246 fn = opt2fn("-f",NFILE,fnm);
1248 xx = read_whole_trj(fn,isize,index,skip,&nf,&time,oenv,bPBC,gpbc);
1249 output_env_conv_times(oenv, nf, time);
1250 if (!bRMSdist || bAnalyze) {
1251 /* Center all frames on zero */
1252 snew(mass,isize);
1253 for(i=0; i<ifsize; i++)
1254 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1255 if (bFit)
1256 for(i=0; i<nf; i++)
1257 reset_x(ifsize,fitidx,isize,NULL,xx[i],mass);
1259 if (bPBC)
1261 gmx_rmpbc_done(gpbc);
1265 if (bReadMat) {
1266 fprintf(stderr,"Reading rms distance matrix ");
1267 read_xpm_matrix(opt2fn("-dm",NFILE,fnm),&readmat);
1268 fprintf(stderr,"\n");
1269 if (readmat[0].nx != readmat[0].ny)
1270 gmx_fatal(FARGS,"Matrix (%dx%d) is not square",
1271 readmat[0].nx,readmat[0].ny);
1272 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1273 gmx_fatal(FARGS,"Matrix size (%dx%d) does not match the number of "
1274 "frames (%d)",readmat[0].nx,readmat[0].ny,nf);
1276 nf = readmat[0].nx;
1277 sfree(time);
1278 time = readmat[0].axis_x;
1279 time_invfac = output_env_get_time_invfactor(oenv);
1280 for(i=0; i<nf; i++)
1281 time[i] *= time_invfac;
1283 rms = init_mat(readmat[0].nx,method == m_diagonalize);
1284 convert_mat(&(readmat[0]),rms);
1286 nlevels = readmat[0].nmap;
1287 } else { /* !bReadMat */
1288 rms = init_mat(nf,method == m_diagonalize);
1289 nrms = (nf*(nf-1))/2;
1290 if (!bRMSdist) {
1291 fprintf(stderr,"Computing %dx%d RMS deviation matrix\n",nf,nf);
1292 snew(x1,isize);
1293 for(i1=0; (i1<nf); i1++) {
1294 for(i2=i1+1; (i2<nf); i2++) {
1295 for(i=0; i<isize; i++)
1296 copy_rvec(xx[i1][i],x1[i]);
1297 if (bFit)
1298 do_fit(isize,mass,xx[i2],x1);
1299 rmsd = rmsdev(isize,mass,xx[i2],x1);
1300 set_mat_entry(rms,i1,i2,rmsd);
1302 nrms -= (nf-i1-1);
1303 fprintf(stderr,"\r# RMSD calculations left: %d ",nrms);
1305 } else { /* bRMSdist */
1306 fprintf(stderr,"Computing %dx%d RMS distance deviation matrix\n",nf,nf);
1307 for(i1=0; (i1<nf); i1++) {
1308 calc_dist(isize,xx[i1],d1);
1309 for(i2=i1+1; (i2<nf); i2++) {
1310 calc_dist(isize,xx[i2],d2);
1311 set_mat_entry(rms,i1,i2,rms_dist(isize,d1,d2));
1313 nrms -= (nf-i1-1);
1314 fprintf(stderr,"\r# RMSD calculations left: %d ",nrms);
1317 fprintf(stderr,"\n\n");
1319 ffprintf2(stderr,log,buf,"The RMSD ranges from %g to %g nm\n",
1320 rms->minrms,rms->maxrms);
1321 ffprintf1(stderr,log,buf,"Average RMSD is %g\n",2*rms->sumrms/(nf*(nf-1)));
1322 ffprintf1(stderr,log,buf,"Number of structures for matrix %d\n",nf);
1323 ffprintf1(stderr,log,buf,"Energy of the matrix is %g nm\n",mat_energy(rms));
1324 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1325 fprintf(stderr,"WARNING: rmsd cutoff %g is outside range of rmsd values "
1326 "%g to %g\n",rmsdcut,rms->minrms,rms->maxrms);
1327 if (bAnalyze && (rmsmin < rms->minrms) )
1328 fprintf(stderr,"WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1329 rmsmin,rms->minrms);
1330 if (bAnalyze && (rmsmin > rmsdcut) )
1331 fprintf(stderr,"WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1332 rmsmin,rmsdcut);
1334 /* Plot the rmsd distribution */
1335 rmsd_distribution(opt2fn("-dist",NFILE,fnm),rms,oenv);
1337 if (bBinary) {
1338 for(i1=0; (i1 < nf); i1++)
1339 for(i2=0; (i2 < nf); i2++)
1340 if (rms->mat[i1][i2] < rmsdcut)
1341 rms->mat[i1][i2] = 0;
1342 else
1343 rms->mat[i1][i2] = 1;
1346 snew(clust.cl,nf);
1347 switch (method) {
1348 case m_linkage:
1349 /* Now sort the matrix and write it out again */
1350 gather(rms,rmsdcut,&clust);
1351 break;
1352 case m_diagonalize:
1353 /* Do a diagonalization */
1354 snew(eigval,nf);
1355 snew(tmp,nf*nf);
1356 memcpy(tmp,rms->mat[0],nf*nf*sizeof(real));
1357 eigensolver(tmp,nf,0,nf,eigval,rms->mat[0]);
1358 sfree(tmp);
1360 fp = xvgropen(opt2fn("-ev",NFILE,fnm),"RMSD matrix Eigenvalues",
1361 "Eigenvector index","Eigenvalues (nm\\S2\\N)",oenv);
1362 for(i=0; (i<nf); i++)
1363 fprintf(fp,"%10d %10g\n",i,eigval[i]);
1364 ffclose(fp);
1365 break;
1366 case m_monte_carlo:
1367 mc_optimize(log,rms,niter,&seed,kT);
1368 swap_mat(rms);
1369 reset_index(rms);
1370 break;
1371 case m_jarvis_patrick:
1372 jarvis_patrick(rms->nn,rms->mat,M,P,bJP_RMSD ? rmsdcut : -1,&clust);
1373 break;
1374 case m_gromos:
1375 gromos(rms->nn,rms->mat,rmsdcut,&clust);
1376 break;
1377 default:
1378 gmx_fatal(FARGS,"DEATH HORROR unknown method \"%s\"",methodname[0]);
1381 if (method == m_monte_carlo || method == m_diagonalize)
1382 fprintf(stderr,"Energy of the matrix after clustering is %g nm\n",
1383 mat_energy(rms));
1385 if (bAnalyze) {
1386 if (minstruct > 1) {
1387 ncluster = plot_clusters(nf,rms->mat,&clust,nlevels,minstruct);
1388 } else {
1389 mark_clusters(nf,rms->mat,rms->maxrms,&clust);
1391 init_t_atoms(&useatoms,isize,FALSE);
1392 snew(usextps, isize);
1393 useatoms.resinfo = top.atoms.resinfo;
1394 for(i=0; i<isize; i++) {
1395 useatoms.atomname[i]=top.atoms.atomname[index[i]];
1396 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1397 useatoms.nres = max(useatoms.nres,useatoms.atom[i].resind+1);
1398 copy_rvec(xtps[index[i]],usextps[i]);
1400 useatoms.nr=isize;
1401 analyze_clusters(nf,&clust,rms->mat,isize,&useatoms,usextps,mass,xx,time,
1402 ifsize,fitidx,iosize,outidx,
1403 bReadTraj?trx_out_fn:NULL,
1404 opt2fn_null("-sz",NFILE,fnm),
1405 opt2fn_null("-tr",NFILE,fnm),
1406 opt2fn_null("-ntr",NFILE,fnm),
1407 opt2fn_null("-clid",NFILE,fnm),
1408 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1409 rlo_bot,rhi_bot,oenv);
1411 ffclose(log);
1413 if (bBinary && !bAnalyze)
1414 /* Make the clustering visible */
1415 for(i2=0; (i2 < nf); i2++)
1416 for(i1=i2+1; (i1 < nf); i1++)
1417 if (rms->mat[i1][i2])
1418 rms->mat[i1][i2] = rms->maxrms;
1420 fp = opt2FILE("-o",NFILE,fnm,"w");
1421 fprintf(stderr,"Writing rms distance/clustering matrix ");
1422 if (bReadMat) {
1423 write_xpm(fp,0,readmat[0].title,readmat[0].legend,readmat[0].label_x,
1424 readmat[0].label_y,nf,nf,readmat[0].axis_x,readmat[0].axis_y,
1425 rms->mat,0.0,rms->maxrms,rlo_top,rhi_top,&nlevels);
1427 else {
1428 sprintf(buf,"Time (%s)",output_env_get_time_unit(oenv));
1429 sprintf(title,"RMS%sDeviation / Cluster Index",
1430 bRMSdist ? " Distance " : " ");
1431 if (minstruct > 1) {
1432 write_xpm_split(fp,0,title,"RMSD (nm)",buf,buf,
1433 nf,nf,time,time,rms->mat,0.0,rms->maxrms,&nlevels,
1434 rlo_top,rhi_top,0.0,(real) ncluster,
1435 &ncluster,TRUE,rlo_bot,rhi_bot);
1436 } else {
1437 write_xpm(fp,0,title,"RMSD (nm)",buf,buf,
1438 nf,nf,time,time,rms->mat,0.0,rms->maxrms,
1439 rlo_top,rhi_top,&nlevels);
1442 fprintf(stderr,"\n");
1443 ffclose(fp);
1445 /* now show what we've done */
1446 do_view(oenv,opt2fn("-o",NFILE,fnm),"-nxy");
1447 do_view(oenv,opt2fn_null("-sz",NFILE,fnm),"-nxy");
1448 if (method == m_diagonalize)
1449 do_view(oenv,opt2fn_null("-ev",NFILE,fnm),"-nxy");
1450 do_view(oenv,opt2fn("-dist",NFILE,fnm),"-nxy");
1451 if (bAnalyze) {
1452 do_view(oenv,opt2fn_null("-tr",NFILE,fnm),"-nxy");
1453 do_view(oenv,opt2fn_null("-ntr",NFILE,fnm),"-nxy");
1454 do_view(oenv,opt2fn_null("-clid",NFILE,fnm),"-nxy");
1457 /* Thank the user for her patience */
1458 thanx(stderr);
1460 return 0;