Fix problems with intel-mpi
[gromacs.git] / src / tools / gmx_cluster.c
blob604a24f84c95096f3275de8e65da5d51bf20fcc3
1 /*
3 * This source code is part of
5 * G R O M A C S
7 * GROningen MAchine for Chemical Simulations
9 * VERSION 3.2.0
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2004, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
32 * And Hey:
33 * Green Red Orange Magenta Azure Cyan Skyblue
35 #ifdef HAVE_CONFIG_H
36 #include <config.h>
37 #endif
38 #include <math.h>
39 #include <string.h>
40 #include <ctype.h>
41 #include "macros.h"
42 #include "smalloc.h"
43 #include "typedefs.h"
44 #include "copyrite.h"
45 #include "statutil.h"
46 #include "tpxio.h"
47 #include "string2.h"
48 #include "vec.h"
49 #include "macros.h"
50 #include "index.h"
51 #include "random.h"
52 #include "pbc.h"
53 #include "rmpbc.h"
54 #include "xvgr.h"
55 #include "futil.h"
56 #include "matio.h"
57 #include "cmat.h"
58 #include "do_fit.h"
59 #include "trnio.h"
60 #include "viewit.h"
61 #include "gmx_ana.h"
63 #include "gromacs/linearalgebra/eigensolver.h"
65 /* print to two file pointers at once (i.e. stderr and log) */
66 static inline
67 void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
69 fprintf(fp1, "%s", buf);
70 fprintf(fp2, "%s", buf);
73 /* just print a prepared buffer to fp1 and fp2 */
74 static inline
75 void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
77 lo_ffprintf(fp1, fp2, buf);
80 /* prepare buffer with one argument, then print to fp1 and fp2 */
81 static inline
82 void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
84 sprintf(buf, fmt, arg);
85 lo_ffprintf(fp1, fp2, buf);
88 /* prepare buffer with one argument, then print to fp1 and fp2 */
89 static inline
90 void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
92 sprintf(buf, fmt, arg);
93 lo_ffprintf(fp1, fp2, buf);
96 /* prepare buffer with one argument, then print to fp1 and fp2 */
97 static inline
98 void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
100 sprintf(buf, fmt, arg);
101 lo_ffprintf(fp1, fp2, buf);
104 /* prepare buffer with two arguments, then print to fp1 and fp2 */
105 static inline
106 void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
108 sprintf(buf, fmt, arg1, arg2);
109 lo_ffprintf(fp1, fp2, buf);
112 /* prepare buffer with two arguments, then print to fp1 and fp2 */
113 static inline
114 void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
116 sprintf(buf, fmt, arg1, arg2);
117 lo_ffprintf(fp1, fp2, buf);
120 /* prepare buffer with two arguments, then print to fp1 and fp2 */
121 static inline
122 void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
124 sprintf(buf, fmt, arg1, arg2);
125 lo_ffprintf(fp1, fp2, buf);
128 typedef struct {
129 int ncl;
130 int *cl;
131 } t_clusters;
133 typedef struct {
134 int nr;
135 int *nb;
136 } t_nnb;
138 void pr_energy(FILE *fp, real e)
140 fprintf(fp, "Energy: %8.4f\n", e);
143 void cp_index(int nn, int from[], int to[])
145 int i;
147 for (i = 0; (i < nn); i++)
149 to[i] = from[i];
153 void mc_optimize(FILE *log, t_mat *m, int maxiter, int *seed, real kT)
155 real e[2], ei, ej, efac;
156 int *low_index;
157 int cur = 0;
158 #define next (1-cur)
159 int i, isw, jsw, iisw, jjsw, nn;
161 fprintf(stderr, "\nDoing Monte Carlo clustering\n");
162 nn = m->nn;
163 snew(low_index, nn);
164 cp_index(nn, m->m_ind, low_index);
165 if (getenv("TESTMC"))
167 e[cur] = mat_energy(m);
168 pr_energy(log, e[cur]);
169 fprintf(log, "Doing 1000 random swaps\n");
170 for (i = 0; (i < 1000); i++)
174 isw = nn*rando(seed);
175 jsw = nn*rando(seed);
177 while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
178 iisw = m->m_ind[isw];
179 jjsw = m->m_ind[jsw];
180 m->m_ind[isw] = jjsw;
181 m->m_ind[jsw] = iisw;
184 e[cur] = mat_energy(m);
185 pr_energy(log, e[cur]);
186 for (i = 0; (i < maxiter); i++)
190 isw = nn*rando(seed);
191 jsw = nn*rando(seed);
193 while ((isw == jsw) || (isw >= nn) || (jsw >= nn));
195 iisw = m->m_ind[isw];
196 jjsw = m->m_ind[jsw];
197 ei = row_energy(nn, iisw, m->mat[jsw]);
198 ej = row_energy(nn, jjsw, m->mat[isw]);
200 e[next] = e[cur] + (ei+ej-EROW(m, isw)-EROW(m, jsw))/nn;
202 efac = kT ? exp((e[next]-e[cur])/kT) : -1;
203 if ((e[next] > e[cur]) || (efac > rando(seed)))
206 if (e[next] > e[cur])
208 cp_index(nn, m->m_ind, low_index);
210 else
212 fprintf(log, "Taking uphill step\n");
215 /* Now swapping rows */
216 m->m_ind[isw] = jjsw;
217 m->m_ind[jsw] = iisw;
218 EROW(m, isw) = ei;
219 EROW(m, jsw) = ej;
220 cur = next;
221 fprintf(log, "Iter: %d Swapped %4d and %4d (now %g)",
222 i, isw, jsw, mat_energy(m));
223 pr_energy(log, e[cur]);
226 /* Now restore the highest energy index */
227 cp_index(nn, low_index, m->m_ind);
230 static void calc_dist(int nind, rvec x[], real **d)
232 int i, j;
233 real *xi;
234 rvec dx;
236 for (i = 0; (i < nind-1); i++)
238 xi = x[i];
239 for (j = i+1; (j < nind); j++)
241 /* Should use pbc_dx when analysing multiple molecueles,
242 * but the box is not stored for every frame.
244 rvec_sub(xi, x[j], dx);
245 d[i][j] = norm(dx);
250 static real rms_dist(int isize, real **d, real **d_r)
252 int i, j;
253 real r, r2;
255 r2 = 0.0;
256 for (i = 0; (i < isize-1); i++)
258 for (j = i+1; (j < isize); j++)
260 r = d[i][j]-d_r[i][j];
261 r2 += r*r;
264 r2 /= (isize*(isize-1))/2;
266 return sqrt(r2);
269 static int rms_dist_comp(const void *a, const void *b)
271 t_dist *da, *db;
273 da = (t_dist *)a;
274 db = (t_dist *)b;
276 if (da->dist - db->dist < 0)
278 return -1;
280 else if (da->dist - db->dist > 0)
282 return 1;
284 return 0;
287 static int clust_id_comp(const void *a, const void *b)
289 t_clustid *da, *db;
291 da = (t_clustid *)a;
292 db = (t_clustid *)b;
294 return da->clust - db->clust;
297 static int nrnb_comp(const void *a, const void *b)
299 t_nnb *da, *db;
301 da = (t_nnb *)a;
302 db = (t_nnb *)b;
304 /* return the b-a, we want highest first */
305 return db->nr - da->nr;
308 void gather(t_mat *m, real cutoff, t_clusters *clust)
310 t_clustid *c;
311 t_dist *d;
312 int i, j, k, nn, cid, n1, diff;
313 gmx_bool bChange;
315 /* First we sort the entries in the RMSD matrix */
316 n1 = m->nn;
317 nn = ((n1-1)*n1)/2;
318 snew(d, nn);
319 for (i = k = 0; (i < n1); i++)
321 for (j = i+1; (j < n1); j++, k++)
323 d[k].i = i;
324 d[k].j = j;
325 d[k].dist = m->mat[i][j];
328 if (k != nn)
330 gmx_incons("gather algortihm");
332 qsort(d, nn, sizeof(d[0]), rms_dist_comp);
334 /* Now we make a cluster index for all of the conformations */
335 c = new_clustid(n1);
337 /* Now we check the closest structures, and equalize their cluster numbers */
338 fprintf(stderr, "Linking structures ");
341 fprintf(stderr, "*");
342 bChange = FALSE;
343 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
345 diff = c[d[k].j].clust - c[d[k].i].clust;
346 if (diff)
348 bChange = TRUE;
349 if (diff > 0)
351 c[d[k].j].clust = c[d[k].i].clust;
353 else
355 c[d[k].i].clust = c[d[k].j].clust;
360 while (bChange);
361 fprintf(stderr, "\nSorting and renumbering clusters\n");
362 /* Sort on cluster number */
363 qsort(c, n1, sizeof(c[0]), clust_id_comp);
365 /* Renumber clusters */
366 cid = 1;
367 for (k = 1; k < n1; k++)
369 if (c[k].clust != c[k-1].clust)
371 c[k-1].clust = cid;
372 cid++;
374 else
376 c[k-1].clust = cid;
379 c[k-1].clust = cid;
380 if (debug)
382 for (k = 0; (k < n1); k++)
384 fprintf(debug, "Cluster index for conformation %d: %d\n",
385 c[k].conf, c[k].clust);
388 clust->ncl = cid;
389 for (k = 0; k < n1; k++)
391 clust->cl[c[k].conf] = c[k].clust;
394 sfree(c);
395 sfree(d);
398 gmx_bool jp_same(int **nnb, int i, int j, int P)
400 gmx_bool bIn;
401 int k, ii, jj, pp;
403 bIn = FALSE;
404 for (k = 0; nnb[i][k] >= 0; k++)
406 bIn = bIn || (nnb[i][k] == j);
408 if (!bIn)
410 return FALSE;
413 bIn = FALSE;
414 for (k = 0; nnb[j][k] >= 0; k++)
416 bIn = bIn || (nnb[j][k] == i);
418 if (!bIn)
420 return FALSE;
423 pp = 0;
424 for (ii = 0; nnb[i][ii] >= 0; ii++)
426 for (jj = 0; nnb[j][jj] >= 0; jj++)
428 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
430 pp++;
435 return (pp >= P);
438 static void jarvis_patrick(int n1, real **mat, int M, int P,
439 real rmsdcut, t_clusters *clust)
441 t_dist *row;
442 t_clustid *c;
443 int **nnb;
444 int i, j, k, cid, diff, max;
445 gmx_bool bChange;
446 real **mcpy = NULL;
448 if (rmsdcut < 0)
450 rmsdcut = 10000;
453 /* First we sort the entries in the RMSD matrix row by row.
454 * This gives us the nearest neighbor list.
456 snew(nnb, n1);
457 snew(row, n1);
458 for (i = 0; (i < n1); i++)
460 for (j = 0; (j < n1); j++)
462 row[j].j = j;
463 row[j].dist = mat[i][j];
465 qsort(row, n1, sizeof(row[0]), rms_dist_comp);
466 if (M > 0)
468 /* Put the M nearest neighbors in the list */
469 snew(nnb[i], M+1);
470 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
472 if (row[j].j != i)
474 nnb[i][k] = row[j].j;
475 k++;
478 nnb[i][k] = -1;
480 else
482 /* Put all neighbors nearer than rmsdcut in the list */
483 max = 0;
484 k = 0;
485 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
487 if (row[j].j != i)
489 if (k >= max)
491 max += 10;
492 srenew(nnb[i], max);
494 nnb[i][k] = row[j].j;
495 k++;
498 if (k == max)
500 srenew(nnb[i], max+1);
502 nnb[i][k] = -1;
505 sfree(row);
506 if (debug)
508 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
509 for (i = 0; (i < n1); i++)
511 fprintf(debug, "i:%5d nbs:", i);
512 for (j = 0; nnb[i][j] >= 0; j++)
514 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
516 fprintf(debug, "\n");
520 c = new_clustid(n1);
521 fprintf(stderr, "Linking structures ");
522 /* Use mcpy for temporary storage of booleans */
523 mcpy = mk_matrix(n1, n1, FALSE);
524 for (i = 0; i < n1; i++)
526 for (j = i+1; j < n1; j++)
528 mcpy[i][j] = jp_same(nnb, i, j, P);
533 fprintf(stderr, "*");
534 bChange = FALSE;
535 for (i = 0; i < n1; i++)
537 for (j = i+1; j < n1; j++)
539 if (mcpy[i][j])
541 diff = c[j].clust - c[i].clust;
542 if (diff)
544 bChange = TRUE;
545 if (diff > 0)
547 c[j].clust = c[i].clust;
549 else
551 c[i].clust = c[j].clust;
558 while (bChange);
560 fprintf(stderr, "\nSorting and renumbering clusters\n");
561 /* Sort on cluster number */
562 qsort(c, n1, sizeof(c[0]), clust_id_comp);
564 /* Renumber clusters */
565 cid = 1;
566 for (k = 1; k < n1; k++)
568 if (c[k].clust != c[k-1].clust)
570 c[k-1].clust = cid;
571 cid++;
573 else
575 c[k-1].clust = cid;
578 c[k-1].clust = cid;
579 clust->ncl = cid;
580 for (k = 0; k < n1; k++)
582 clust->cl[c[k].conf] = c[k].clust;
584 if (debug)
586 for (k = 0; (k < n1); k++)
588 fprintf(debug, "Cluster index for conformation %d: %d\n",
589 c[k].conf, c[k].clust);
593 /* Again, I don't see the point in this... (AF) */
594 /* for(i=0; (i<n1); i++) { */
595 /* for(j=0; (j<n1); j++) */
596 /* mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
597 /* } */
598 /* for(i=0; (i<n1); i++) { */
599 /* for(j=0; (j<n1); j++) */
600 /* mat[i][j] = mcpy[i][j]; */
601 /* } */
602 done_matrix(n1, &mcpy);
604 sfree(c);
605 for (i = 0; (i < n1); i++)
607 sfree(nnb[i]);
609 sfree(nnb);
612 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
614 int i, j;
616 /* dump neighbor list */
617 fprintf(fp, "%s", title);
618 for (i = 0; (i < n1); i++)
620 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
621 for (j = 0; j < nnb[i].nr; j++)
623 fprintf(fp, "%5d", nnb[i].nb[j]);
625 fprintf(fp, "\n");
629 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
631 t_dist *row;
632 t_nnb *nnb;
633 int i, j, k, j1, max;
635 /* Put all neighbors nearer than rmsdcut in the list */
636 fprintf(stderr, "Making list of neighbors within cutoff ");
637 snew(nnb, n1);
638 snew(row, n1);
639 for (i = 0; (i < n1); i++)
641 max = 0;
642 k = 0;
643 /* put all neighbors within cut-off in list */
644 for (j = 0; j < n1; j++)
646 if (mat[i][j] < rmsdcut)
648 if (k >= max)
650 max += 10;
651 srenew(nnb[i].nb, max);
653 nnb[i].nb[k] = j;
654 k++;
657 /* store nr of neighbors, we'll need that */
658 nnb[i].nr = k;
659 if (i%(1+n1/100) == 0)
661 fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
664 fprintf(stderr, "%3d%%\n", 100);
665 sfree(row);
667 /* sort neighbor list on number of neighbors, largest first */
668 qsort(nnb, n1, sizeof(nnb[0]), nrnb_comp);
670 if (debug)
672 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
675 /* turn first structure with all its neighbors (largest) into cluster
676 remove them from pool of structures and repeat for all remaining */
677 fprintf(stderr, "Finding clusters %4d", 0);
678 /* cluster id's start at 1: */
679 k = 1;
680 while (nnb[0].nr)
682 /* set cluster id (k) for first item in neighborlist */
683 for (j = 0; j < nnb[0].nr; j++)
685 clust->cl[nnb[0].nb[j]] = k;
687 /* mark as done */
688 nnb[0].nr = 0;
689 sfree(nnb[0].nb);
691 /* adjust number of neighbors for others, taking removals into account: */
692 for (i = 1; i < n1 && nnb[i].nr; i++)
694 j1 = 0;
695 for (j = 0; j < nnb[i].nr; j++)
697 /* if this neighbor wasn't removed */
698 if (clust->cl[nnb[i].nb[j]] == 0)
700 /* shift the rest (j1<=j) */
701 nnb[i].nb[j1] = nnb[i].nb[j];
702 /* next */
703 j1++;
706 /* now j1 is the new number of neighbors */
707 nnb[i].nr = j1;
709 /* sort again on nnb[].nr, because we have new # neighbors: */
710 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
711 qsort(nnb, i, sizeof(nnb[0]), nrnb_comp);
713 fprintf(stderr, "\b\b\b\b%4d", k);
714 /* new cluster id */
715 k++;
717 fprintf(stderr, "\n");
718 sfree(nnb);
719 if (debug)
721 fprintf(debug, "Clusters (%d):\n", k);
722 for (i = 0; i < n1; i++)
724 fprintf(debug, " %3d", clust->cl[i]);
726 fprintf(debug, "\n");
729 clust->ncl = k-1;
732 rvec **read_whole_trj(const char *fn, int isize, atom_id index[], int skip,
733 int *nframe, real **time, const output_env_t oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
735 rvec **xx, *x;
736 matrix box;
737 real t;
738 int i, i0, j, max_nf;
739 int natom;
740 t_trxstatus *status;
743 max_nf = 0;
744 xx = NULL;
745 *time = NULL;
746 natom = read_first_x(oenv, &status, fn, &t, &x, box);
747 i = 0;
748 i0 = 0;
751 if (bPBC)
753 gmx_rmpbc(gpbc, natom, box, x);
755 if (i0 >= max_nf)
757 max_nf += 10;
758 srenew(xx, max_nf);
759 srenew(*time, max_nf);
761 if ((i % skip) == 0)
763 snew(xx[i0], isize);
764 /* Store only the interesting atoms */
765 for (j = 0; (j < isize); j++)
767 copy_rvec(x[index[j]], xx[i0][j]);
769 (*time)[i0] = t;
770 i0++;
772 i++;
774 while (read_next_x(oenv, status, &t, natom, x, box));
775 fprintf(stderr, "Allocated %lu bytes for frames\n",
776 (unsigned long) (max_nf*isize*sizeof(**xx)));
777 fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);
778 *nframe = i0;
779 sfree(x);
781 return xx;
784 static int plot_clusters(int nf, real **mat, t_clusters *clust,
785 int nlevels, int minstruct)
787 int i, j, ncluster, ci;
788 int *cl_id, *nstruct, *strind;
790 snew(cl_id, nf);
791 snew(nstruct, nf);
792 snew(strind, nf);
793 for (i = 0; i < nf; i++)
795 strind[i] = 0;
796 cl_id[i] = clust->cl[i];
797 nstruct[cl_id[i]]++;
799 ncluster = 0;
800 for (i = 0; i < nf; i++)
802 if (nstruct[i] >= minstruct)
804 ncluster++;
805 for (j = 0; (j < nf); j++)
807 if (cl_id[j] == i)
809 strind[j] = ncluster;
814 ncluster++;
815 fprintf(stderr, "There are %d clusters with at least %d conformations\n",
816 ncluster, minstruct);
818 for (i = 0; (i < nf); i++)
820 ci = cl_id[i];
821 for (j = 0; j < i; j++)
823 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
825 /* color different clusters with different colors, as long as
826 we don't run out of colors */
827 mat[i][j] = strind[i];
829 else
831 mat[i][j] = 0;
835 sfree(strind);
836 sfree(nstruct);
837 sfree(cl_id);
839 return ncluster;
842 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
844 int i, j, v;
846 for (i = 0; i < nf; i++)
848 for (j = 0; j < i; j++)
850 if (clust->cl[i] == clust->cl[j])
852 mat[i][j] = val;
854 else
856 mat[i][j] = 0;
862 static char *parse_filename(const char *fn, int maxnr)
864 int i;
865 char *fnout;
866 const char *ext;
867 char buf[STRLEN];
869 if (strchr(fn, '%'))
871 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
873 /* number of digits needed in numbering */
874 i = (int)(log(maxnr)/log(10)) + 1;
875 /* split fn and ext */
876 ext = strrchr(fn, '.');
877 if (!ext)
879 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
881 ext++;
882 /* insert e.g. '%03d' between fn and ext */
883 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
884 snew(fnout, strlen(buf)+1);
885 strcpy(fnout, buf);
887 return fnout;
890 static void ana_trans(t_clusters *clust, int nf,
891 const char *transfn, const char *ntransfn, FILE *log,
892 t_rgb rlo, t_rgb rhi, const output_env_t oenv)
894 FILE *fp;
895 real **trans, *axis;
896 int *ntrans;
897 int i, ntranst, maxtrans;
898 char buf[STRLEN];
900 snew(ntrans, clust->ncl);
901 snew(trans, clust->ncl);
902 snew(axis, clust->ncl);
903 for (i = 0; i < clust->ncl; i++)
905 axis[i] = i+1;
906 snew(trans[i], clust->ncl);
908 ntranst = 0;
909 maxtrans = 0;
910 for (i = 1; i < nf; i++)
912 if (clust->cl[i] != clust->cl[i-1])
914 ntranst++;
915 ntrans[clust->cl[i-1]-1]++;
916 ntrans[clust->cl[i]-1]++;
917 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
918 maxtrans = max(maxtrans, trans[clust->cl[i]-1][clust->cl[i-1]-1]);
921 ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
922 "max %d between two specific clusters\n", ntranst, maxtrans);
923 if (transfn)
925 fp = ffopen(transfn, "w");
926 i = min(maxtrans+1, 80);
927 write_xpm(fp, 0, "Cluster Transitions", "# transitions",
928 "from cluster", "to cluster",
929 clust->ncl, clust->ncl, axis, axis, trans,
930 0, maxtrans, rlo, rhi, &i);
931 ffclose(fp);
933 if (ntransfn)
935 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
936 oenv);
937 for (i = 0; i < clust->ncl; i++)
939 fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
941 ffclose(fp);
943 sfree(ntrans);
944 for (i = 0; i < clust->ncl; i++)
946 sfree(trans[i]);
948 sfree(trans);
949 sfree(axis);
952 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
953 int natom, t_atoms *atoms, rvec *xtps,
954 real *mass, rvec **xx, real *time,
955 int ifsize, atom_id *fitidx,
956 int iosize, atom_id *outidx,
957 const char *trxfn, const char *sizefn,
958 const char *transfn, const char *ntransfn,
959 const char *clustidfn, gmx_bool bAverage,
960 int write_ncl, int write_nst, real rmsmin,
961 gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
962 const output_env_t oenv)
964 FILE *fp = NULL;
965 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
966 t_trxstatus *trxout = NULL;
967 t_trxstatus *trxsout = NULL;
968 int i, i1, cl, nstr, *structure, first = 0, midstr;
969 gmx_bool *bWrite = NULL;
970 real r, clrmsd, midrmsd;
971 rvec *xav = NULL;
972 matrix zerobox;
974 clear_mat(zerobox);
976 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
977 trxsfn = NULL;
978 if (trxfn)
980 /* do we write all structures? */
981 if (write_ncl)
983 trxsfn = parse_filename(trxfn, max(write_ncl, clust->ncl));
984 snew(bWrite, nf);
986 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
987 bAverage ? "average" : "middle", trxfn);
988 if (write_ncl)
990 /* find out what we want to tell the user:
991 Writing [all structures|structures with rmsd > %g] for
992 {all|first %d} clusters {with more than %d structures} to %s */
993 if (rmsmin > 0.0)
995 sprintf(buf1, "structures with rmsd > %g", rmsmin);
997 else
999 sprintf(buf1, "all structures");
1001 buf2[0] = buf3[0] = '\0';
1002 if (write_ncl >= clust->ncl)
1004 if (write_nst == 0)
1006 sprintf(buf2, "all ");
1009 else
1011 sprintf(buf2, "the first %d ", write_ncl);
1013 if (write_nst)
1015 sprintf(buf3, " with more than %d structures", write_nst);
1017 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1018 ffprintf(stderr, log, buf);
1021 /* Prepare a reference structure for the orientation of the clusters */
1022 if (bFit)
1024 reset_x(ifsize, fitidx, natom, NULL, xtps, mass);
1026 trxout = open_trx(trxfn, "w");
1027 /* Calculate the average structure in each cluster, *
1028 * all structures are fitted to the first struture of the cluster */
1029 snew(xav, natom);
1032 if (transfn || ntransfn)
1034 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1037 if (clustidfn)
1039 fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1040 fprintf(fp, "@ s0 symbol 2\n");
1041 fprintf(fp, "@ s0 symbol size 0.2\n");
1042 fprintf(fp, "@ s0 linestyle 0\n");
1043 for (i = 0; i < nf; i++)
1045 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1047 ffclose(fp);
1049 if (sizefn)
1051 fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1052 fprintf(fp, "@g%d type %s\n", 0, "bar");
1054 snew(structure, nf);
1055 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n",
1056 "cl.", "#st", "rmsd", "middle", "rmsd");
1057 for (cl = 1; cl <= clust->ncl; cl++)
1059 /* prepare structures (fit, middle, average) */
1060 if (xav)
1062 for (i = 0; i < natom; i++)
1064 clear_rvec(xav[i]);
1067 nstr = 0;
1068 for (i1 = 0; i1 < nf; i1++)
1070 if (clust->cl[i1] == cl)
1072 structure[nstr] = i1;
1073 nstr++;
1074 if (trxfn && (bAverage || write_ncl) )
1076 if (bFit)
1078 reset_x(ifsize, fitidx, natom, NULL, xx[i1], mass);
1080 if (nstr == 1)
1082 first = i1;
1084 else if (bFit)
1086 do_fit(natom, mass, xx[first], xx[i1]);
1088 if (xav)
1090 for (i = 0; i < natom; i++)
1092 rvec_inc(xav[i], xx[i1][i]);
1098 if (sizefn)
1100 fprintf(fp, "%8d %8d\n", cl, nstr);
1102 clrmsd = 0;
1103 midstr = 0;
1104 midrmsd = 10000;
1105 for (i1 = 0; i1 < nstr; i1++)
1107 r = 0;
1108 if (nstr > 1)
1110 for (i = 0; i < nstr; i++)
1112 if (i < i1)
1114 r += rmsd[structure[i]][structure[i1]];
1116 else
1118 r += rmsd[structure[i1]][structure[i]];
1121 r /= (nstr - 1);
1123 if (r < midrmsd)
1125 midstr = structure[i1];
1126 midrmsd = r;
1128 clrmsd += r;
1130 clrmsd /= nstr;
1132 /* dump cluster info to logfile */
1133 if (nstr > 1)
1135 sprintf(buf1, "%6.3f", clrmsd);
1136 if (buf1[0] == '0')
1138 buf1[0] = ' ';
1140 sprintf(buf2, "%5.3f", midrmsd);
1141 if (buf2[0] == '0')
1143 buf2[0] = ' ';
1146 else
1148 sprintf(buf1, "%5s", "");
1149 sprintf(buf2, "%5s", "");
1151 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1152 for (i = 0; i < nstr; i++)
1154 if ((i % 7 == 0) && i)
1156 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1158 else
1160 buf[0] = '\0';
1162 i1 = structure[i];
1163 fprintf(log, "%s %6g", buf, time[i1]);
1165 fprintf(log, "\n");
1167 /* write structures to trajectory file(s) */
1168 if (trxfn)
1170 if (write_ncl)
1172 for (i = 0; i < nstr; i++)
1174 bWrite[i] = FALSE;
1177 if (cl < write_ncl+1 && nstr > write_nst)
1179 /* Dump all structures for this cluster */
1180 /* generate numbered filename (there is a %d in trxfn!) */
1181 sprintf(buf, trxsfn, cl);
1182 trxsout = open_trx(buf, "w");
1183 for (i = 0; i < nstr; i++)
1185 bWrite[i] = TRUE;
1186 if (rmsmin > 0.0)
1188 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1190 if (bWrite[i1])
1192 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1196 if (bWrite[i])
1198 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], zerobox,
1199 xx[structure[i]], NULL, NULL);
1202 close_trx(trxsout);
1204 /* Dump the average structure for this cluster */
1205 if (bAverage)
1207 for (i = 0; i < natom; i++)
1209 svmul(1.0/nstr, xav[i], xav[i]);
1212 else
1214 for (i = 0; i < natom; i++)
1216 copy_rvec(xx[midstr][i], xav[i]);
1218 if (bFit)
1220 reset_x(ifsize, fitidx, natom, NULL, xav, mass);
1223 if (bFit)
1225 do_fit(natom, mass, xtps, xav);
1227 r = cl;
1228 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], zerobox, xav, NULL, NULL);
1231 /* clean up */
1232 if (trxfn)
1234 close_trx(trxout);
1235 sfree(xav);
1236 if (write_ncl)
1238 sfree(bWrite);
1241 sfree(structure);
1242 if (trxsfn)
1244 sfree(trxsfn);
1248 static void convert_mat(t_matrix *mat, t_mat *rms)
1250 int i, j;
1252 rms->n1 = mat->nx;
1253 matrix2real(mat, rms->mat);
1254 /* free input xpm matrix data */
1255 for (i = 0; i < mat->nx; i++)
1257 sfree(mat->matrix[i]);
1259 sfree(mat->matrix);
1261 for (i = 0; i < mat->nx; i++)
1263 for (j = i; j < mat->nx; j++)
1265 rms->sumrms += rms->mat[i][j];
1266 rms->maxrms = max(rms->maxrms, rms->mat[i][j]);
1267 if (j != i)
1269 rms->minrms = min(rms->minrms, rms->mat[i][j]);
1273 rms->nn = mat->nx;
1276 int gmx_cluster(int argc, char *argv[])
1278 const char *desc[] = {
1279 "[TT]g_cluster[tt] can cluster structures using several different methods.",
1280 "Distances between structures can be determined from a trajectory",
1281 "or read from an [TT].xpm[tt] matrix file with the [TT]-dm[tt] option.",
1282 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1283 "can be used to define the distance between structures.[PAR]",
1285 "single linkage: add a structure to a cluster when its distance to any",
1286 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1288 "Jarvis Patrick: add a structure to a cluster when this structure",
1289 "and a structure in the cluster have each other as neighbors and",
1290 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1291 "of a structure are the M closest structures or all structures within",
1292 "[TT]cutoff[tt].[PAR]",
1294 "Monte Carlo: reorder the RMSD matrix using Monte Carlo.[PAR]",
1296 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1298 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1299 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1300 "Count number of neighbors using cut-off, take structure with",
1301 "largest number of neighbors with all its neighbors as cluster",
1302 "and eliminate it from the pool of clusters. Repeat for remaining",
1303 "structures in pool.[PAR]",
1305 "When the clustering algorithm assigns each structure to exactly one",
1306 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1307 "file is supplied, the structure with",
1308 "the smallest average distance to the others or the average structure",
1309 "or all structures for each cluster will be written to a trajectory",
1310 "file. When writing all structures, separate numbered files are made",
1311 "for each cluster.[PAR]",
1313 "Two output files are always written:[BR]",
1314 "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1315 "and a graphical depiction of the clusters in the lower right half",
1316 "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1317 "when two structures are in the same cluster.",
1318 "When [TT]-minstruct[tt] > 1 different colors will be used for each",
1319 "cluster.[BR]",
1320 "[TT]-g[tt] writes information on the options used and a detailed list",
1321 "of all clusters and their members.[PAR]",
1323 "Additionally, a number of optional output files can be written:[BR]",
1324 "[TT]-dist[tt] writes the RMSD distribution.[BR]",
1325 "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1326 "diagonalization.[BR]",
1327 "[TT]-sz[tt] writes the cluster sizes.[BR]",
1328 "[TT]-tr[tt] writes a matrix of the number transitions between",
1329 "cluster pairs.[BR]",
1330 "[TT]-ntr[tt] writes the total number of transitions to or from",
1331 "each cluster.[BR]",
1332 "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1333 "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1334 "structure of each cluster or writes numbered files with cluster members",
1335 "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1336 "[TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1337 "structure with the smallest average RMSD from all other structures",
1338 "of the cluster.[BR]",
1341 FILE *fp, *log;
1342 int i, i1, i2, j, nf, nrms;
1344 matrix box;
1345 rvec *xtps, *usextps, *x1, **xx = NULL;
1346 const char *fn, *trx_out_fn;
1347 t_clusters clust;
1348 t_mat *rms;
1349 real *eigenvalues;
1350 t_topology top;
1351 int ePBC;
1352 t_atoms useatoms;
1353 t_matrix *readmat = NULL;
1354 real *eigenvectors;
1356 int isize = 0, ifsize = 0, iosize = 0;
1357 atom_id *index = NULL, *fitidx, *outidx;
1358 char *grpname;
1359 real rmsd, **d1, **d2, *time = NULL, time_invfac, *mass = NULL;
1360 char buf[STRLEN], buf1[80], title[STRLEN];
1361 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1363 int method, ncluster = 0;
1364 static const char *methodname[] = {
1365 NULL, "linkage", "jarvis-patrick", "monte-carlo",
1366 "diagonalization", "gromos", NULL
1368 enum {
1369 m_null, m_linkage, m_jarvis_patrick,
1370 m_monte_carlo, m_diagonalize, m_gromos, m_nr
1372 /* Set colors for plotting: white = zero RMS, black = maximum */
1373 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1374 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1375 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1376 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1377 static int nlevels = 40, skip = 1;
1378 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1379 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1380 static int niter = 10000, seed = 1993, write_ncl = 0, write_nst = 1, minstruct = 1;
1381 static real kT = 1e-3;
1382 static int M = 10, P = 3;
1383 output_env_t oenv;
1384 gmx_rmpbc_t gpbc = NULL;
1386 t_pargs pa[] = {
1387 { "-dista", FALSE, etBOOL, {&bRMSdist},
1388 "Use RMSD of distances instead of RMS deviation" },
1389 { "-nlevels", FALSE, etINT, {&nlevels},
1390 "Discretize RMSD matrix in this number of levels" },
1391 { "-cutoff", FALSE, etREAL, {&rmsdcut},
1392 "RMSD cut-off (nm) for two structures to be neighbor" },
1393 { "-fit", FALSE, etBOOL, {&bFit},
1394 "Use least squares fitting before RMSD calculation" },
1395 { "-max", FALSE, etREAL, {&scalemax},
1396 "Maximum level in RMSD matrix" },
1397 { "-skip", FALSE, etINT, {&skip},
1398 "Only analyze every nr-th frame" },
1399 { "-av", FALSE, etBOOL, {&bAverage},
1400 "Write average iso middle structure for each cluster" },
1401 { "-wcl", FALSE, etINT, {&write_ncl},
1402 "Write the structures for this number of clusters to numbered files" },
1403 { "-nst", FALSE, etINT, {&write_nst},
1404 "Only write all structures if more than this number of structures per cluster" },
1405 { "-rmsmin", FALSE, etREAL, {&rmsmin},
1406 "minimum rms difference with rest of cluster for writing structures" },
1407 { "-method", FALSE, etENUM, {methodname},
1408 "Method for cluster determination" },
1409 { "-minstruct", FALSE, etINT, {&minstruct},
1410 "Minimum number of structures in cluster for coloring in the [TT].xpm[tt] file" },
1411 { "-binary", FALSE, etBOOL, {&bBinary},
1412 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1413 "is given by [TT]-cutoff[tt]" },
1414 { "-M", FALSE, etINT, {&M},
1415 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1416 "0 is use cutoff" },
1417 { "-P", FALSE, etINT, {&P},
1418 "Number of identical nearest neighbors required to form a cluster" },
1419 { "-seed", FALSE, etINT, {&seed},
1420 "Random number seed for Monte Carlo clustering algorithm" },
1421 { "-niter", FALSE, etINT, {&niter},
1422 "Number of iterations for MC" },
1423 { "-kT", FALSE, etREAL, {&kT},
1424 "Boltzmann weighting factor for Monte Carlo optimization "
1425 "(zero turns off uphill steps)" },
1426 { "-pbc", FALSE, etBOOL,
1427 { &bPBC }, "PBC check" }
1429 t_filenm fnm[] = {
1430 { efTRX, "-f", NULL, ffOPTRD },
1431 { efTPS, "-s", NULL, ffOPTRD },
1432 { efNDX, NULL, NULL, ffOPTRD },
1433 { efXPM, "-dm", "rmsd", ffOPTRD },
1434 { efXPM, "-o", "rmsd-clust", ffWRITE },
1435 { efLOG, "-g", "cluster", ffWRITE },
1436 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1437 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1438 { efXVG, "-sz", "clust-size", ffOPTWR},
1439 { efXPM, "-tr", "clust-trans", ffOPTWR},
1440 { efXVG, "-ntr", "clust-trans", ffOPTWR},
1441 { efXVG, "-clid", "clust-id.xvg", ffOPTWR},
1442 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
1444 #define NFILE asize(fnm)
1446 parse_common_args(&argc, argv,
1447 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
1448 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL,
1449 &oenv);
1451 /* parse options */
1452 bReadMat = opt2bSet("-dm", NFILE, fnm);
1453 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1454 if (opt2parg_bSet("-av", asize(pa), pa) ||
1455 opt2parg_bSet("-wcl", asize(pa), pa) ||
1456 opt2parg_bSet("-nst", asize(pa), pa) ||
1457 opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1458 opt2bSet("-cl", NFILE, fnm) )
1460 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1462 else
1464 trx_out_fn = NULL;
1466 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1468 fprintf(stderr,
1469 "\nWarning: assuming the time unit in %s is %s\n",
1470 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv));
1472 if (trx_out_fn && !bReadTraj)
1474 fprintf(stderr, "\nWarning: "
1475 "cannot write cluster structures without reading trajectory\n"
1476 " ignoring option -cl %s\n", trx_out_fn);
1479 method = 1;
1480 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1482 method++;
1484 if (method == m_nr)
1486 gmx_fatal(FARGS, "Invalid method");
1489 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1490 method == m_gromos );
1492 /* Open log file */
1493 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1495 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1496 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1498 /* check input and write parameters to log file */
1499 bUseRmsdCut = FALSE;
1500 if (method == m_jarvis_patrick)
1502 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1503 if ((M < 0) || (M == 1))
1505 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1507 if (M < 2)
1509 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1510 bUseRmsdCut = TRUE;
1512 else
1514 if (P >= M)
1516 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1518 if (bJP_RMSD)
1520 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1521 bUseRmsdCut = TRUE;
1523 else
1525 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1528 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1530 else /* method != m_jarvis */
1532 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1534 if (bUseRmsdCut && method != m_jarvis_patrick)
1536 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1538 if (method == m_monte_carlo)
1540 fprintf(log, "Using %d iterations\n", niter);
1543 if (skip < 1)
1545 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1548 /* get input */
1549 if (bReadTraj)
1551 /* don't read mass-database as masses (and top) are not used */
1552 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), buf, &top, &ePBC, &xtps, NULL, box,
1553 TRUE);
1554 if (bPBC)
1556 gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr, box);
1559 fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1560 bReadMat ? "" : " and RMSD calculation");
1561 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1562 1, &ifsize, &fitidx, &grpname);
1563 if (trx_out_fn)
1565 fprintf(stderr, "\nSelect group for output:\n");
1566 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1567 1, &iosize, &outidx, &grpname);
1568 /* merge and convert both index groups: */
1569 /* first copy outidx to index. let outidx refer to elements in index */
1570 snew(index, iosize);
1571 isize = iosize;
1572 for (i = 0; i < iosize; i++)
1574 index[i] = outidx[i];
1575 outidx[i] = i;
1577 /* now lookup elements from fitidx in index, add them if necessary
1578 and also let fitidx refer to elements in index */
1579 for (i = 0; i < ifsize; i++)
1581 j = 0;
1582 while (j < isize && index[j] != fitidx[i])
1584 j++;
1586 if (j >= isize)
1588 /* slow this way, but doesn't matter much */
1589 isize++;
1590 srenew(index, isize);
1592 index[j] = fitidx[i];
1593 fitidx[i] = j;
1596 else /* !trx_out_fn */
1598 isize = ifsize;
1599 snew(index, isize);
1600 for (i = 0; i < ifsize; i++)
1602 index[i] = fitidx[i];
1603 fitidx[i] = i;
1607 /* Initiate arrays */
1608 snew(d1, isize);
1609 snew(d2, isize);
1610 for (i = 0; (i < isize); i++)
1612 snew(d1[i], isize);
1613 snew(d2[i], isize);
1616 if (bReadTraj)
1618 /* Loop over first coordinate file */
1619 fn = opt2fn("-f", NFILE, fnm);
1621 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, oenv, bPBC, gpbc);
1622 output_env_conv_times(oenv, nf, time);
1623 if (!bRMSdist || bAnalyze)
1625 /* Center all frames on zero */
1626 snew(mass, isize);
1627 for (i = 0; i < ifsize; i++)
1629 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1631 if (bFit)
1633 for (i = 0; i < nf; i++)
1635 reset_x(ifsize, fitidx, isize, NULL, xx[i], mass);
1639 if (bPBC)
1641 gmx_rmpbc_done(gpbc);
1645 if (bReadMat)
1647 fprintf(stderr, "Reading rms distance matrix ");
1648 read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1649 fprintf(stderr, "\n");
1650 if (readmat[0].nx != readmat[0].ny)
1652 gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1653 readmat[0].nx, readmat[0].ny);
1655 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1657 gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1658 "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1661 nf = readmat[0].nx;
1662 sfree(time);
1663 time = readmat[0].axis_x;
1664 time_invfac = output_env_get_time_invfactor(oenv);
1665 for (i = 0; i < nf; i++)
1667 time[i] *= time_invfac;
1670 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1671 convert_mat(&(readmat[0]), rms);
1673 nlevels = readmat[0].nmap;
1675 else /* !bReadMat */
1677 rms = init_mat(nf, method == m_diagonalize);
1678 nrms = (nf*(nf-1))/2;
1679 if (!bRMSdist)
1681 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1682 snew(x1, isize);
1683 for (i1 = 0; (i1 < nf); i1++)
1685 for (i2 = i1+1; (i2 < nf); i2++)
1687 for (i = 0; i < isize; i++)
1689 copy_rvec(xx[i1][i], x1[i]);
1691 if (bFit)
1693 do_fit(isize, mass, xx[i2], x1);
1695 rmsd = rmsdev(isize, mass, xx[i2], x1);
1696 set_mat_entry(rms, i1, i2, rmsd);
1698 nrms -= (nf-i1-1);
1699 fprintf(stderr, "\r# RMSD calculations left: %d ", nrms);
1702 else /* bRMSdist */
1704 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1705 for (i1 = 0; (i1 < nf); i1++)
1707 calc_dist(isize, xx[i1], d1);
1708 for (i2 = i1+1; (i2 < nf); i2++)
1710 calc_dist(isize, xx[i2], d2);
1711 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1713 nrms -= (nf-i1-1);
1714 fprintf(stderr, "\r# RMSD calculations left: %d ", nrms);
1717 fprintf(stderr, "\n\n");
1719 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1720 rms->minrms, rms->maxrms);
1721 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1722 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1723 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g nm\n", mat_energy(rms));
1724 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1726 fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1727 "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1729 if (bAnalyze && (rmsmin < rms->minrms) )
1731 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1732 rmsmin, rms->minrms);
1734 if (bAnalyze && (rmsmin > rmsdcut) )
1736 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1737 rmsmin, rmsdcut);
1740 /* Plot the rmsd distribution */
1741 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1743 if (bBinary)
1745 for (i1 = 0; (i1 < nf); i1++)
1747 for (i2 = 0; (i2 < nf); i2++)
1749 if (rms->mat[i1][i2] < rmsdcut)
1751 rms->mat[i1][i2] = 0;
1753 else
1755 rms->mat[i1][i2] = 1;
1761 snew(clust.cl, nf);
1762 switch (method)
1764 case m_linkage:
1765 /* Now sort the matrix and write it out again */
1766 gather(rms, rmsdcut, &clust);
1767 break;
1768 case m_diagonalize:
1769 /* Do a diagonalization */
1770 snew(eigenvalues, nf);
1771 snew(eigenvectors, nf*nf);
1772 memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1773 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1774 sfree(eigenvectors);
1776 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1777 "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1778 for (i = 0; (i < nf); i++)
1780 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1782 ffclose(fp);
1783 break;
1784 case m_monte_carlo:
1785 mc_optimize(log, rms, niter, &seed, kT);
1786 swap_mat(rms);
1787 reset_index(rms);
1788 break;
1789 case m_jarvis_patrick:
1790 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1791 break;
1792 case m_gromos:
1793 gromos(rms->nn, rms->mat, rmsdcut, &clust);
1794 break;
1795 default:
1796 gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1799 if (method == m_monte_carlo || method == m_diagonalize)
1801 fprintf(stderr, "Energy of the matrix after clustering is %g nm\n",
1802 mat_energy(rms));
1805 if (bAnalyze)
1807 if (minstruct > 1)
1809 ncluster = plot_clusters(nf, rms->mat, &clust, nlevels, minstruct);
1811 else
1813 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1815 init_t_atoms(&useatoms, isize, FALSE);
1816 snew(usextps, isize);
1817 useatoms.resinfo = top.atoms.resinfo;
1818 for (i = 0; i < isize; i++)
1820 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1821 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1822 useatoms.nres = max(useatoms.nres, useatoms.atom[i].resind+1);
1823 copy_rvec(xtps[index[i]], usextps[i]);
1825 useatoms.nr = isize;
1826 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time,
1827 ifsize, fitidx, iosize, outidx,
1828 bReadTraj ? trx_out_fn : NULL,
1829 opt2fn_null("-sz", NFILE, fnm),
1830 opt2fn_null("-tr", NFILE, fnm),
1831 opt2fn_null("-ntr", NFILE, fnm),
1832 opt2fn_null("-clid", NFILE, fnm),
1833 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1834 rlo_bot, rhi_bot, oenv);
1836 ffclose(log);
1838 if (bBinary && !bAnalyze)
1840 /* Make the clustering visible */
1841 for (i2 = 0; (i2 < nf); i2++)
1843 for (i1 = i2+1; (i1 < nf); i1++)
1845 if (rms->mat[i1][i2])
1847 rms->mat[i1][i2] = rms->maxrms;
1853 fp = opt2FILE("-o", NFILE, fnm, "w");
1854 fprintf(stderr, "Writing rms distance/clustering matrix ");
1855 if (bReadMat)
1857 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1858 readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1859 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1861 else
1863 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1864 sprintf(title, "RMS%sDeviation / Cluster Index",
1865 bRMSdist ? " Distance " : " ");
1866 if (minstruct > 1)
1868 write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,
1869 nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
1870 rlo_top, rhi_top, 0.0, (real) ncluster,
1871 &ncluster, TRUE, rlo_bot, rhi_bot);
1873 else
1875 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1876 nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
1877 rlo_top, rhi_top, &nlevels);
1880 fprintf(stderr, "\n");
1881 ffclose(fp);
1883 /* now show what we've done */
1884 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
1885 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
1886 if (method == m_diagonalize)
1888 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
1890 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
1891 if (bAnalyze)
1893 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
1894 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
1895 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
1898 /* Thank the user for her patience */
1899 thanx(stderr);
1901 return 0;