Remove all unnecessary HAVE_CONFIG_H
[gromacs.git] / src / gromacs / gmxana / gmx_cluster.c
blob448c78f939e6695a9cde26aee731348d035e8a1f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "config.h"
39 #include <math.h>
40 #include <stdlib.h>
41 #include <string.h>
43 #include "macros.h"
44 #include "gromacs/utility/smalloc.h"
45 #include "typedefs.h"
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/fileio/tpxio.h"
48 #include "gromacs/fileio/trxio.h"
49 #include "gromacs/utility/cstringutil.h"
50 #include "gromacs/math/vec.h"
51 #include "macros.h"
52 #include "gromacs/topology/index.h"
53 #include "gromacs/random/random.h"
54 #include "gromacs/pbcutil/pbc.h"
55 #include "gromacs/pbcutil/rmpbc.h"
56 #include "gromacs/fileio/xvgr.h"
57 #include "gromacs/utility/futil.h"
58 #include "gromacs/fileio/matio.h"
59 #include "cmat.h"
60 #include "gromacs/fileio/trnio.h"
61 #include "viewit.h"
62 #include "gmx_ana.h"
64 #include "gromacs/linearalgebra/eigensolver.h"
65 #include "gromacs/math/do_fit.h"
66 #include "gromacs/utility/fatalerror.h"
68 /* print to two file pointers at once (i.e. stderr and log) */
69 static gmx_inline
70 void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
72 fprintf(fp1, "%s", buf);
73 fprintf(fp2, "%s", buf);
76 /* just print a prepared buffer to fp1 and fp2 */
77 static gmx_inline
78 void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
80 lo_ffprintf(fp1, fp2, buf);
83 /* prepare buffer with one argument, then print to fp1 and fp2 */
84 static gmx_inline
85 void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
87 sprintf(buf, fmt, arg);
88 lo_ffprintf(fp1, fp2, buf);
91 /* prepare buffer with one argument, then print to fp1 and fp2 */
92 static gmx_inline
93 void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
95 sprintf(buf, fmt, arg);
96 lo_ffprintf(fp1, fp2, buf);
99 /* prepare buffer with one argument, then print to fp1 and fp2 */
100 static gmx_inline
101 void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
103 sprintf(buf, fmt, arg);
104 lo_ffprintf(fp1, fp2, buf);
107 /* prepare buffer with two arguments, then print to fp1 and fp2 */
108 static gmx_inline
109 void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
111 sprintf(buf, fmt, arg1, arg2);
112 lo_ffprintf(fp1, fp2, buf);
115 /* prepare buffer with two arguments, then print to fp1 and fp2 */
116 static gmx_inline
117 void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
119 sprintf(buf, fmt, arg1, arg2);
120 lo_ffprintf(fp1, fp2, buf);
123 /* prepare buffer with two arguments, then print to fp1 and fp2 */
124 static gmx_inline
125 void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
127 sprintf(buf, fmt, arg1, arg2);
128 lo_ffprintf(fp1, fp2, buf);
131 typedef struct {
132 int ncl;
133 int *cl;
134 } t_clusters;
136 typedef struct {
137 int nr;
138 int *nb;
139 } t_nnb;
141 void pr_energy(FILE *fp, real e)
143 fprintf(fp, "Energy: %8.4f\n", e);
146 void cp_index(int nn, int from[], int to[])
148 int i;
150 for (i = 0; (i < nn); i++)
152 to[i] = from[i];
156 void mc_optimize(FILE *log, t_mat *m, real *time,
157 int maxiter, int nrandom,
158 int seed, real kT,
159 const char *conv, output_env_t oenv)
161 FILE *fp = NULL;
162 real ecur, enext, emin, prob, enorm;
163 int i, j, iswap, jswap, nn, nuphill = 0;
164 gmx_rng_t rng;
165 t_mat *minimum;
167 if (m->n1 != m->nn)
169 fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
170 return;
172 printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
173 printf("by reordering the frames to minimize the path between the two structures\n");
174 printf("that have the largest pairwise RMSD.\n");
176 iswap = jswap = -1;
177 enorm = m->mat[0][0];
178 for (i = 0; (i < m->n1); i++)
180 for (j = 0; (j < m->nn); j++)
182 if (m->mat[i][j] > enorm)
184 enorm = m->mat[i][j];
185 iswap = i;
186 jswap = j;
190 if ((iswap == -1) || (jswap == -1))
192 fprintf(stderr, "Matrix contains identical values in all fields\n");
193 return;
195 swap_rows(m, 0, iswap);
196 swap_rows(m, m->n1-1, jswap);
197 emin = ecur = mat_energy(m);
198 printf("Largest distance %g between %d and %d. Energy: %g.\n",
199 enorm, iswap, jswap, emin);
201 rng = gmx_rng_init(seed);
202 nn = m->nn;
204 /* Initiate and store global minimum */
205 minimum = init_mat(nn, m->b1D);
206 minimum->nn = nn;
207 copy_t_mat(minimum, m);
209 if (NULL != conv)
211 fp = xvgropen(conv, "Convergence of the MC optimization",
212 "Energy", "Step", oenv);
214 for (i = 0; (i < maxiter); i++)
216 /* Generate new swapping candidates */
219 iswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
220 jswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
222 while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
224 /* Apply swap and compute energy */
225 swap_rows(m, iswap, jswap);
226 enext = mat_energy(m);
228 /* Compute probability */
229 prob = 0;
230 if ((enext < ecur) || (i < nrandom))
232 prob = 1;
233 if (enext < emin)
235 /* Store global minimum */
236 copy_t_mat(minimum, m);
237 emin = enext;
240 else if (kT > 0)
242 /* Try Monte Carlo step */
243 prob = exp(-(enext-ecur)/(enorm*kT));
246 if ((prob == 1) || (gmx_rng_uniform_real(rng) < prob))
248 if (enext > ecur)
250 nuphill++;
253 fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
254 i, iswap, jswap, enext, prob);
255 if (NULL != fp)
257 fprintf(fp, "%6d %10g\n", i, enext);
259 ecur = enext;
261 else
263 swap_rows(m, jswap, iswap);
266 fprintf(log, "%d uphill steps were taken during optimization\n",
267 nuphill);
269 /* Now swap the matrix to get it into global minimum mode */
270 copy_t_mat(m, minimum);
272 fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
273 fprintf(log, "Global minimum energy %g\n", mat_energy(m));
274 fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
275 for (i = 0; (i < m->nn); i++)
277 fprintf(log, "%10g %5d %10g\n",
278 time[m->m_ind[i]],
279 m->m_ind[i],
280 (i < m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
283 if (NULL != fp)
285 fclose(fp);
289 static void calc_dist(int nind, rvec x[], real **d)
291 int i, j;
292 real *xi;
293 rvec dx;
295 for (i = 0; (i < nind-1); i++)
297 xi = x[i];
298 for (j = i+1; (j < nind); j++)
300 /* Should use pbc_dx when analysing multiple molecueles,
301 * but the box is not stored for every frame.
303 rvec_sub(xi, x[j], dx);
304 d[i][j] = norm(dx);
309 static real rms_dist(int isize, real **d, real **d_r)
311 int i, j;
312 real r, r2;
314 r2 = 0.0;
315 for (i = 0; (i < isize-1); i++)
317 for (j = i+1; (j < isize); j++)
319 r = d[i][j]-d_r[i][j];
320 r2 += r*r;
323 r2 /= (isize*(isize-1))/2;
325 return sqrt(r2);
328 static int rms_dist_comp(const void *a, const void *b)
330 t_dist *da, *db;
332 da = (t_dist *)a;
333 db = (t_dist *)b;
335 if (da->dist - db->dist < 0)
337 return -1;
339 else if (da->dist - db->dist > 0)
341 return 1;
343 return 0;
346 static int clust_id_comp(const void *a, const void *b)
348 t_clustid *da, *db;
350 da = (t_clustid *)a;
351 db = (t_clustid *)b;
353 return da->clust - db->clust;
356 static int nrnb_comp(const void *a, const void *b)
358 t_nnb *da, *db;
360 da = (t_nnb *)a;
361 db = (t_nnb *)b;
363 /* return the b-a, we want highest first */
364 return db->nr - da->nr;
367 void gather(t_mat *m, real cutoff, t_clusters *clust)
369 t_clustid *c;
370 t_dist *d;
371 int i, j, k, nn, cid, n1, diff;
372 gmx_bool bChange;
374 /* First we sort the entries in the RMSD matrix */
375 n1 = m->nn;
376 nn = ((n1-1)*n1)/2;
377 snew(d, nn);
378 for (i = k = 0; (i < n1); i++)
380 for (j = i+1; (j < n1); j++, k++)
382 d[k].i = i;
383 d[k].j = j;
384 d[k].dist = m->mat[i][j];
387 if (k != nn)
389 gmx_incons("gather algortihm");
391 qsort(d, nn, sizeof(d[0]), rms_dist_comp);
393 /* Now we make a cluster index for all of the conformations */
394 c = new_clustid(n1);
396 /* Now we check the closest structures, and equalize their cluster numbers */
397 fprintf(stderr, "Linking structures ");
400 fprintf(stderr, "*");
401 bChange = FALSE;
402 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
404 diff = c[d[k].j].clust - c[d[k].i].clust;
405 if (diff)
407 bChange = TRUE;
408 if (diff > 0)
410 c[d[k].j].clust = c[d[k].i].clust;
412 else
414 c[d[k].i].clust = c[d[k].j].clust;
419 while (bChange);
420 fprintf(stderr, "\nSorting and renumbering clusters\n");
421 /* Sort on cluster number */
422 qsort(c, n1, sizeof(c[0]), clust_id_comp);
424 /* Renumber clusters */
425 cid = 1;
426 for (k = 1; k < n1; k++)
428 if (c[k].clust != c[k-1].clust)
430 c[k-1].clust = cid;
431 cid++;
433 else
435 c[k-1].clust = cid;
438 c[k-1].clust = cid;
439 if (debug)
441 for (k = 0; (k < n1); k++)
443 fprintf(debug, "Cluster index for conformation %d: %d\n",
444 c[k].conf, c[k].clust);
447 clust->ncl = cid;
448 for (k = 0; k < n1; k++)
450 clust->cl[c[k].conf] = c[k].clust;
453 sfree(c);
454 sfree(d);
457 gmx_bool jp_same(int **nnb, int i, int j, int P)
459 gmx_bool bIn;
460 int k, ii, jj, pp;
462 bIn = FALSE;
463 for (k = 0; nnb[i][k] >= 0; k++)
465 bIn = bIn || (nnb[i][k] == j);
467 if (!bIn)
469 return FALSE;
472 bIn = FALSE;
473 for (k = 0; nnb[j][k] >= 0; k++)
475 bIn = bIn || (nnb[j][k] == i);
477 if (!bIn)
479 return FALSE;
482 pp = 0;
483 for (ii = 0; nnb[i][ii] >= 0; ii++)
485 for (jj = 0; nnb[j][jj] >= 0; jj++)
487 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
489 pp++;
494 return (pp >= P);
497 static void jarvis_patrick(int n1, real **mat, int M, int P,
498 real rmsdcut, t_clusters *clust)
500 t_dist *row;
501 t_clustid *c;
502 int **nnb;
503 int i, j, k, cid, diff, max;
504 gmx_bool bChange;
505 real **mcpy = NULL;
507 if (rmsdcut < 0)
509 rmsdcut = 10000;
512 /* First we sort the entries in the RMSD matrix row by row.
513 * This gives us the nearest neighbor list.
515 snew(nnb, n1);
516 snew(row, n1);
517 for (i = 0; (i < n1); i++)
519 for (j = 0; (j < n1); j++)
521 row[j].j = j;
522 row[j].dist = mat[i][j];
524 qsort(row, n1, sizeof(row[0]), rms_dist_comp);
525 if (M > 0)
527 /* Put the M nearest neighbors in the list */
528 snew(nnb[i], M+1);
529 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
531 if (row[j].j != i)
533 nnb[i][k] = row[j].j;
534 k++;
537 nnb[i][k] = -1;
539 else
541 /* Put all neighbors nearer than rmsdcut in the list */
542 max = 0;
543 k = 0;
544 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
546 if (row[j].j != i)
548 if (k >= max)
550 max += 10;
551 srenew(nnb[i], max);
553 nnb[i][k] = row[j].j;
554 k++;
557 if (k == max)
559 srenew(nnb[i], max+1);
561 nnb[i][k] = -1;
564 sfree(row);
565 if (debug)
567 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
568 for (i = 0; (i < n1); i++)
570 fprintf(debug, "i:%5d nbs:", i);
571 for (j = 0; nnb[i][j] >= 0; j++)
573 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
575 fprintf(debug, "\n");
579 c = new_clustid(n1);
580 fprintf(stderr, "Linking structures ");
581 /* Use mcpy for temporary storage of booleans */
582 mcpy = mk_matrix(n1, n1, FALSE);
583 for (i = 0; i < n1; i++)
585 for (j = i+1; j < n1; j++)
587 mcpy[i][j] = jp_same(nnb, i, j, P);
592 fprintf(stderr, "*");
593 bChange = FALSE;
594 for (i = 0; i < n1; i++)
596 for (j = i+1; j < n1; j++)
598 if (mcpy[i][j])
600 diff = c[j].clust - c[i].clust;
601 if (diff)
603 bChange = TRUE;
604 if (diff > 0)
606 c[j].clust = c[i].clust;
608 else
610 c[i].clust = c[j].clust;
617 while (bChange);
619 fprintf(stderr, "\nSorting and renumbering clusters\n");
620 /* Sort on cluster number */
621 qsort(c, n1, sizeof(c[0]), clust_id_comp);
623 /* Renumber clusters */
624 cid = 1;
625 for (k = 1; k < n1; k++)
627 if (c[k].clust != c[k-1].clust)
629 c[k-1].clust = cid;
630 cid++;
632 else
634 c[k-1].clust = cid;
637 c[k-1].clust = cid;
638 clust->ncl = cid;
639 for (k = 0; k < n1; k++)
641 clust->cl[c[k].conf] = c[k].clust;
643 if (debug)
645 for (k = 0; (k < n1); k++)
647 fprintf(debug, "Cluster index for conformation %d: %d\n",
648 c[k].conf, c[k].clust);
652 /* Again, I don't see the point in this... (AF) */
653 /* for(i=0; (i<n1); i++) { */
654 /* for(j=0; (j<n1); j++) */
655 /* mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
656 /* } */
657 /* for(i=0; (i<n1); i++) { */
658 /* for(j=0; (j<n1); j++) */
659 /* mat[i][j] = mcpy[i][j]; */
660 /* } */
661 done_matrix(n1, &mcpy);
663 sfree(c);
664 for (i = 0; (i < n1); i++)
666 sfree(nnb[i]);
668 sfree(nnb);
671 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
673 int i, j;
675 /* dump neighbor list */
676 fprintf(fp, "%s", title);
677 for (i = 0; (i < n1); i++)
679 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
680 for (j = 0; j < nnb[i].nr; j++)
682 fprintf(fp, "%5d", nnb[i].nb[j]);
684 fprintf(fp, "\n");
688 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
690 t_dist *row;
691 t_nnb *nnb;
692 int i, j, k, j1, max;
694 /* Put all neighbors nearer than rmsdcut in the list */
695 fprintf(stderr, "Making list of neighbors within cutoff ");
696 snew(nnb, n1);
697 snew(row, n1);
698 for (i = 0; (i < n1); i++)
700 max = 0;
701 k = 0;
702 /* put all neighbors within cut-off in list */
703 for (j = 0; j < n1; j++)
705 if (mat[i][j] < rmsdcut)
707 if (k >= max)
709 max += 10;
710 srenew(nnb[i].nb, max);
712 nnb[i].nb[k] = j;
713 k++;
716 /* store nr of neighbors, we'll need that */
717 nnb[i].nr = k;
718 if (i%(1+n1/100) == 0)
720 fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
723 fprintf(stderr, "%3d%%\n", 100);
724 sfree(row);
726 /* sort neighbor list on number of neighbors, largest first */
727 qsort(nnb, n1, sizeof(nnb[0]), nrnb_comp);
729 if (debug)
731 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
734 /* turn first structure with all its neighbors (largest) into cluster
735 remove them from pool of structures and repeat for all remaining */
736 fprintf(stderr, "Finding clusters %4d", 0);
737 /* cluster id's start at 1: */
738 k = 1;
739 while (nnb[0].nr)
741 /* set cluster id (k) for first item in neighborlist */
742 for (j = 0; j < nnb[0].nr; j++)
744 clust->cl[nnb[0].nb[j]] = k;
746 /* mark as done */
747 nnb[0].nr = 0;
748 sfree(nnb[0].nb);
750 /* adjust number of neighbors for others, taking removals into account: */
751 for (i = 1; i < n1 && nnb[i].nr; i++)
753 j1 = 0;
754 for (j = 0; j < nnb[i].nr; j++)
756 /* if this neighbor wasn't removed */
757 if (clust->cl[nnb[i].nb[j]] == 0)
759 /* shift the rest (j1<=j) */
760 nnb[i].nb[j1] = nnb[i].nb[j];
761 /* next */
762 j1++;
765 /* now j1 is the new number of neighbors */
766 nnb[i].nr = j1;
768 /* sort again on nnb[].nr, because we have new # neighbors: */
769 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
770 qsort(nnb, i, sizeof(nnb[0]), nrnb_comp);
772 fprintf(stderr, "\b\b\b\b%4d", k);
773 /* new cluster id */
774 k++;
776 fprintf(stderr, "\n");
777 sfree(nnb);
778 if (debug)
780 fprintf(debug, "Clusters (%d):\n", k);
781 for (i = 0; i < n1; i++)
783 fprintf(debug, " %3d", clust->cl[i]);
785 fprintf(debug, "\n");
788 clust->ncl = k-1;
791 rvec **read_whole_trj(const char *fn, int isize, atom_id index[], int skip,
792 int *nframe, real **time, const output_env_t oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
794 rvec **xx, *x;
795 matrix box;
796 real t;
797 int i, i0, j, max_nf;
798 int natom;
799 t_trxstatus *status;
802 max_nf = 0;
803 xx = NULL;
804 *time = NULL;
805 natom = read_first_x(oenv, &status, fn, &t, &x, box);
806 i = 0;
807 i0 = 0;
810 if (bPBC)
812 gmx_rmpbc(gpbc, natom, box, x);
814 if (i0 >= max_nf)
816 max_nf += 10;
817 srenew(xx, max_nf);
818 srenew(*time, max_nf);
820 if ((i % skip) == 0)
822 snew(xx[i0], isize);
823 /* Store only the interesting atoms */
824 for (j = 0; (j < isize); j++)
826 copy_rvec(x[index[j]], xx[i0][j]);
828 (*time)[i0] = t;
829 i0++;
831 i++;
833 while (read_next_x(oenv, status, &t, x, box));
834 fprintf(stderr, "Allocated %lu bytes for frames\n",
835 (unsigned long) (max_nf*isize*sizeof(**xx)));
836 fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);
837 *nframe = i0;
838 sfree(x);
840 return xx;
843 static int plot_clusters(int nf, real **mat, t_clusters *clust,
844 int minstruct)
846 int i, j, ncluster, ci;
847 int *cl_id, *nstruct, *strind;
849 snew(cl_id, nf);
850 snew(nstruct, nf);
851 snew(strind, nf);
852 for (i = 0; i < nf; i++)
854 strind[i] = 0;
855 cl_id[i] = clust->cl[i];
856 nstruct[cl_id[i]]++;
858 ncluster = 0;
859 for (i = 0; i < nf; i++)
861 if (nstruct[i] >= minstruct)
863 ncluster++;
864 for (j = 0; (j < nf); j++)
866 if (cl_id[j] == i)
868 strind[j] = ncluster;
873 ncluster++;
874 fprintf(stderr, "There are %d clusters with at least %d conformations\n",
875 ncluster, minstruct);
877 for (i = 0; (i < nf); i++)
879 ci = cl_id[i];
880 for (j = 0; j < i; j++)
882 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
884 /* color different clusters with different colors, as long as
885 we don't run out of colors */
886 mat[i][j] = strind[i];
888 else
890 mat[i][j] = 0;
894 sfree(strind);
895 sfree(nstruct);
896 sfree(cl_id);
898 return ncluster;
901 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
903 int i, j, v;
905 for (i = 0; i < nf; i++)
907 for (j = 0; j < i; j++)
909 if (clust->cl[i] == clust->cl[j])
911 mat[i][j] = val;
913 else
915 mat[i][j] = 0;
921 static char *parse_filename(const char *fn, int maxnr)
923 int i;
924 char *fnout;
925 const char *ext;
926 char buf[STRLEN];
928 if (strchr(fn, '%'))
930 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
932 /* number of digits needed in numbering */
933 i = (int)(log(maxnr)/log(10)) + 1;
934 /* split fn and ext */
935 ext = strrchr(fn, '.');
936 if (!ext)
938 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
940 ext++;
941 /* insert e.g. '%03d' between fn and ext */
942 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
943 snew(fnout, strlen(buf)+1);
944 strcpy(fnout, buf);
946 return fnout;
949 static void ana_trans(t_clusters *clust, int nf,
950 const char *transfn, const char *ntransfn, FILE *log,
951 t_rgb rlo, t_rgb rhi, const output_env_t oenv)
953 FILE *fp;
954 real **trans, *axis;
955 int *ntrans;
956 int i, ntranst, maxtrans;
957 char buf[STRLEN];
959 snew(ntrans, clust->ncl);
960 snew(trans, clust->ncl);
961 snew(axis, clust->ncl);
962 for (i = 0; i < clust->ncl; i++)
964 axis[i] = i+1;
965 snew(trans[i], clust->ncl);
967 ntranst = 0;
968 maxtrans = 0;
969 for (i = 1; i < nf; i++)
971 if (clust->cl[i] != clust->cl[i-1])
973 ntranst++;
974 ntrans[clust->cl[i-1]-1]++;
975 ntrans[clust->cl[i]-1]++;
976 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
977 maxtrans = max(maxtrans, trans[clust->cl[i]-1][clust->cl[i-1]-1]);
980 ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
981 "max %d between two specific clusters\n", ntranst, maxtrans);
982 if (transfn)
984 fp = gmx_ffopen(transfn, "w");
985 i = min(maxtrans+1, 80);
986 write_xpm(fp, 0, "Cluster Transitions", "# transitions",
987 "from cluster", "to cluster",
988 clust->ncl, clust->ncl, axis, axis, trans,
989 0, maxtrans, rlo, rhi, &i);
990 gmx_ffclose(fp);
992 if (ntransfn)
994 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
995 oenv);
996 for (i = 0; i < clust->ncl; i++)
998 fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
1000 gmx_ffclose(fp);
1002 sfree(ntrans);
1003 for (i = 0; i < clust->ncl; i++)
1005 sfree(trans[i]);
1007 sfree(trans);
1008 sfree(axis);
1011 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
1012 int natom, t_atoms *atoms, rvec *xtps,
1013 real *mass, rvec **xx, real *time,
1014 int ifsize, atom_id *fitidx,
1015 int iosize, atom_id *outidx,
1016 const char *trxfn, const char *sizefn,
1017 const char *transfn, const char *ntransfn,
1018 const char *clustidfn, gmx_bool bAverage,
1019 int write_ncl, int write_nst, real rmsmin,
1020 gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
1021 const output_env_t oenv)
1023 FILE *fp = NULL;
1024 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
1025 t_trxstatus *trxout = NULL;
1026 t_trxstatus *trxsout = NULL;
1027 int i, i1, cl, nstr, *structure, first = 0, midstr;
1028 gmx_bool *bWrite = NULL;
1029 real r, clrmsd, midrmsd;
1030 rvec *xav = NULL;
1031 matrix zerobox;
1033 clear_mat(zerobox);
1035 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1036 trxsfn = NULL;
1037 if (trxfn)
1039 /* do we write all structures? */
1040 if (write_ncl)
1042 trxsfn = parse_filename(trxfn, max(write_ncl, clust->ncl));
1043 snew(bWrite, nf);
1045 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1046 bAverage ? "average" : "middle", trxfn);
1047 if (write_ncl)
1049 /* find out what we want to tell the user:
1050 Writing [all structures|structures with rmsd > %g] for
1051 {all|first %d} clusters {with more than %d structures} to %s */
1052 if (rmsmin > 0.0)
1054 sprintf(buf1, "structures with rmsd > %g", rmsmin);
1056 else
1058 sprintf(buf1, "all structures");
1060 buf2[0] = buf3[0] = '\0';
1061 if (write_ncl >= clust->ncl)
1063 if (write_nst == 0)
1065 sprintf(buf2, "all ");
1068 else
1070 sprintf(buf2, "the first %d ", write_ncl);
1072 if (write_nst)
1074 sprintf(buf3, " with more than %d structures", write_nst);
1076 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1077 ffprintf(stderr, log, buf);
1080 /* Prepare a reference structure for the orientation of the clusters */
1081 if (bFit)
1083 reset_x(ifsize, fitidx, natom, NULL, xtps, mass);
1085 trxout = open_trx(trxfn, "w");
1086 /* Calculate the average structure in each cluster, *
1087 * all structures are fitted to the first struture of the cluster */
1088 snew(xav, natom);
1091 if (transfn || ntransfn)
1093 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1096 if (clustidfn)
1098 fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1099 if (output_env_get_print_xvgr_codes(oenv))
1101 fprintf(fp, "@ s0 symbol 2\n");
1102 fprintf(fp, "@ s0 symbol size 0.2\n");
1103 fprintf(fp, "@ s0 linestyle 0\n");
1105 for (i = 0; i < nf; i++)
1107 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1109 gmx_ffclose(fp);
1111 if (sizefn)
1113 fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1114 if (output_env_get_print_xvgr_codes(oenv))
1116 fprintf(fp, "@g%d type %s\n", 0, "bar");
1119 snew(structure, nf);
1120 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n",
1121 "cl.", "#st", "rmsd", "middle", "rmsd");
1122 for (cl = 1; cl <= clust->ncl; cl++)
1124 /* prepare structures (fit, middle, average) */
1125 if (xav)
1127 for (i = 0; i < natom; i++)
1129 clear_rvec(xav[i]);
1132 nstr = 0;
1133 for (i1 = 0; i1 < nf; i1++)
1135 if (clust->cl[i1] == cl)
1137 structure[nstr] = i1;
1138 nstr++;
1139 if (trxfn && (bAverage || write_ncl) )
1141 if (bFit)
1143 reset_x(ifsize, fitidx, natom, NULL, xx[i1], mass);
1145 if (nstr == 1)
1147 first = i1;
1149 else if (bFit)
1151 do_fit(natom, mass, xx[first], xx[i1]);
1153 if (xav)
1155 for (i = 0; i < natom; i++)
1157 rvec_inc(xav[i], xx[i1][i]);
1163 if (sizefn)
1165 fprintf(fp, "%8d %8d\n", cl, nstr);
1167 clrmsd = 0;
1168 midstr = 0;
1169 midrmsd = 10000;
1170 for (i1 = 0; i1 < nstr; i1++)
1172 r = 0;
1173 if (nstr > 1)
1175 for (i = 0; i < nstr; i++)
1177 if (i < i1)
1179 r += rmsd[structure[i]][structure[i1]];
1181 else
1183 r += rmsd[structure[i1]][structure[i]];
1186 r /= (nstr - 1);
1188 if (r < midrmsd)
1190 midstr = structure[i1];
1191 midrmsd = r;
1193 clrmsd += r;
1195 clrmsd /= nstr;
1197 /* dump cluster info to logfile */
1198 if (nstr > 1)
1200 sprintf(buf1, "%6.3f", clrmsd);
1201 if (buf1[0] == '0')
1203 buf1[0] = ' ';
1205 sprintf(buf2, "%5.3f", midrmsd);
1206 if (buf2[0] == '0')
1208 buf2[0] = ' ';
1211 else
1213 sprintf(buf1, "%5s", "");
1214 sprintf(buf2, "%5s", "");
1216 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1217 for (i = 0; i < nstr; i++)
1219 if ((i % 7 == 0) && i)
1221 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1223 else
1225 buf[0] = '\0';
1227 i1 = structure[i];
1228 fprintf(log, "%s %6g", buf, time[i1]);
1230 fprintf(log, "\n");
1232 /* write structures to trajectory file(s) */
1233 if (trxfn)
1235 if (write_ncl)
1237 for (i = 0; i < nstr; i++)
1239 bWrite[i] = FALSE;
1242 if (cl < write_ncl+1 && nstr > write_nst)
1244 /* Dump all structures for this cluster */
1245 /* generate numbered filename (there is a %d in trxfn!) */
1246 sprintf(buf, trxsfn, cl);
1247 trxsout = open_trx(buf, "w");
1248 for (i = 0; i < nstr; i++)
1250 bWrite[i] = TRUE;
1251 if (rmsmin > 0.0)
1253 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1255 if (bWrite[i1])
1257 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1261 if (bWrite[i])
1263 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], zerobox,
1264 xx[structure[i]], NULL, NULL);
1267 close_trx(trxsout);
1269 /* Dump the average structure for this cluster */
1270 if (bAverage)
1272 for (i = 0; i < natom; i++)
1274 svmul(1.0/nstr, xav[i], xav[i]);
1277 else
1279 for (i = 0; i < natom; i++)
1281 copy_rvec(xx[midstr][i], xav[i]);
1283 if (bFit)
1285 reset_x(ifsize, fitidx, natom, NULL, xav, mass);
1288 if (bFit)
1290 do_fit(natom, mass, xtps, xav);
1292 r = cl;
1293 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], zerobox, xav, NULL, NULL);
1296 /* clean up */
1297 if (trxfn)
1299 close_trx(trxout);
1300 sfree(xav);
1301 if (write_ncl)
1303 sfree(bWrite);
1306 sfree(structure);
1307 if (trxsfn)
1309 sfree(trxsfn);
1313 static void convert_mat(t_matrix *mat, t_mat *rms)
1315 int i, j;
1317 rms->n1 = mat->nx;
1318 matrix2real(mat, rms->mat);
1319 /* free input xpm matrix data */
1320 for (i = 0; i < mat->nx; i++)
1322 sfree(mat->matrix[i]);
1324 sfree(mat->matrix);
1326 for (i = 0; i < mat->nx; i++)
1328 for (j = i; j < mat->nx; j++)
1330 rms->sumrms += rms->mat[i][j];
1331 rms->maxrms = max(rms->maxrms, rms->mat[i][j]);
1332 if (j != i)
1334 rms->minrms = min(rms->minrms, rms->mat[i][j]);
1338 rms->nn = mat->nx;
1341 int gmx_cluster(int argc, char *argv[])
1343 const char *desc[] = {
1344 "[THISMODULE] can cluster structures using several different methods.",
1345 "Distances between structures can be determined from a trajectory",
1346 "or read from an [TT].xpm[tt] matrix file with the [TT]-dm[tt] option.",
1347 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1348 "can be used to define the distance between structures.[PAR]",
1350 "single linkage: add a structure to a cluster when its distance to any",
1351 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1353 "Jarvis Patrick: add a structure to a cluster when this structure",
1354 "and a structure in the cluster have each other as neighbors and",
1355 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1356 "of a structure are the M closest structures or all structures within",
1357 "[TT]cutoff[tt].[PAR]",
1359 "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1360 "the order of the frames is using the smallest possible increments.",
1361 "With this it is possible to make a smooth animation going from one",
1362 "structure to another with the largest possible (e.g.) RMSD between",
1363 "them, however the intermediate steps should be as small as possible.",
1364 "Applications could be to visualize a potential of mean force",
1365 "ensemble of simulations or a pulling simulation. Obviously the user",
1366 "has to prepare the trajectory well (e.g. by not superimposing frames).",
1367 "The final result can be inspect visually by looking at the matrix",
1368 "[TT].xpm[tt] file, which should vary smoothly from bottom to top.[PAR]",
1370 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1372 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1373 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1374 "Count number of neighbors using cut-off, take structure with",
1375 "largest number of neighbors with all its neighbors as cluster",
1376 "and eliminate it from the pool of clusters. Repeat for remaining",
1377 "structures in pool.[PAR]",
1379 "When the clustering algorithm assigns each structure to exactly one",
1380 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1381 "file is supplied, the structure with",
1382 "the smallest average distance to the others or the average structure",
1383 "or all structures for each cluster will be written to a trajectory",
1384 "file. When writing all structures, separate numbered files are made",
1385 "for each cluster.[PAR]",
1387 "Two output files are always written:[BR]",
1388 "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1389 "and a graphical depiction of the clusters in the lower right half",
1390 "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1391 "when two structures are in the same cluster.",
1392 "When [TT]-minstruct[tt] > 1 different colors will be used for each",
1393 "cluster.[BR]",
1394 "[TT]-g[tt] writes information on the options used and a detailed list",
1395 "of all clusters and their members.[PAR]",
1397 "Additionally, a number of optional output files can be written:[BR]",
1398 "[TT]-dist[tt] writes the RMSD distribution.[BR]",
1399 "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1400 "diagonalization.[BR]",
1401 "[TT]-sz[tt] writes the cluster sizes.[BR]",
1402 "[TT]-tr[tt] writes a matrix of the number transitions between",
1403 "cluster pairs.[BR]",
1404 "[TT]-ntr[tt] writes the total number of transitions to or from",
1405 "each cluster.[BR]",
1406 "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1407 "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1408 "structure of each cluster or writes numbered files with cluster members",
1409 "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1410 "[TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1411 "structure with the smallest average RMSD from all other structures",
1412 "of the cluster.[BR]",
1415 FILE *fp, *log;
1416 int nf, i, i1, i2, j;
1417 gmx_int64_t nrms = 0;
1419 matrix box;
1420 rvec *xtps, *usextps, *x1, **xx = NULL;
1421 const char *fn, *trx_out_fn;
1422 t_clusters clust;
1423 t_mat *rms, *orig = NULL;
1424 real *eigenvalues;
1425 t_topology top;
1426 int ePBC;
1427 t_atoms useatoms;
1428 t_matrix *readmat = NULL;
1429 real *eigenvectors;
1431 int isize = 0, ifsize = 0, iosize = 0;
1432 atom_id *index = NULL, *fitidx, *outidx;
1433 char *grpname;
1434 real rmsd, **d1, **d2, *time = NULL, time_invfac, *mass = NULL;
1435 char buf[STRLEN], buf1[80], title[STRLEN];
1436 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1438 int method, ncluster = 0;
1439 static const char *methodname[] = {
1440 NULL, "linkage", "jarvis-patrick", "monte-carlo",
1441 "diagonalization", "gromos", NULL
1443 enum {
1444 m_null, m_linkage, m_jarvis_patrick,
1445 m_monte_carlo, m_diagonalize, m_gromos, m_nr
1447 /* Set colors for plotting: white = zero RMS, black = maximum */
1448 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1449 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1450 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1451 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1452 static int nlevels = 40, skip = 1;
1453 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1454 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1455 static int niter = 10000, nrandom = 0, seed = 1993, write_ncl = 0, write_nst = 1, minstruct = 1;
1456 static real kT = 1e-3;
1457 static int M = 10, P = 3;
1458 output_env_t oenv;
1459 gmx_rmpbc_t gpbc = NULL;
1461 t_pargs pa[] = {
1462 { "-dista", FALSE, etBOOL, {&bRMSdist},
1463 "Use RMSD of distances instead of RMS deviation" },
1464 { "-nlevels", FALSE, etINT, {&nlevels},
1465 "Discretize RMSD matrix in this number of levels" },
1466 { "-cutoff", FALSE, etREAL, {&rmsdcut},
1467 "RMSD cut-off (nm) for two structures to be neighbor" },
1468 { "-fit", FALSE, etBOOL, {&bFit},
1469 "Use least squares fitting before RMSD calculation" },
1470 { "-max", FALSE, etREAL, {&scalemax},
1471 "Maximum level in RMSD matrix" },
1472 { "-skip", FALSE, etINT, {&skip},
1473 "Only analyze every nr-th frame" },
1474 { "-av", FALSE, etBOOL, {&bAverage},
1475 "Write average iso middle structure for each cluster" },
1476 { "-wcl", FALSE, etINT, {&write_ncl},
1477 "Write the structures for this number of clusters to numbered files" },
1478 { "-nst", FALSE, etINT, {&write_nst},
1479 "Only write all structures if more than this number of structures per cluster" },
1480 { "-rmsmin", FALSE, etREAL, {&rmsmin},
1481 "minimum rms difference with rest of cluster for writing structures" },
1482 { "-method", FALSE, etENUM, {methodname},
1483 "Method for cluster determination" },
1484 { "-minstruct", FALSE, etINT, {&minstruct},
1485 "Minimum number of structures in cluster for coloring in the [TT].xpm[tt] file" },
1486 { "-binary", FALSE, etBOOL, {&bBinary},
1487 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1488 "is given by [TT]-cutoff[tt]" },
1489 { "-M", FALSE, etINT, {&M},
1490 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1491 "0 is use cutoff" },
1492 { "-P", FALSE, etINT, {&P},
1493 "Number of identical nearest neighbors required to form a cluster" },
1494 { "-seed", FALSE, etINT, {&seed},
1495 "Random number seed for Monte Carlo clustering algorithm: <= 0 means generate" },
1496 { "-niter", FALSE, etINT, {&niter},
1497 "Number of iterations for MC" },
1498 { "-nrandom", FALSE, etINT, {&nrandom},
1499 "The first iterations for MC may be done complete random, to shuffle the frames" },
1500 { "-kT", FALSE, etREAL, {&kT},
1501 "Boltzmann weighting factor for Monte Carlo optimization "
1502 "(zero turns off uphill steps)" },
1503 { "-pbc", FALSE, etBOOL,
1504 { &bPBC }, "PBC check" }
1506 t_filenm fnm[] = {
1507 { efTRX, "-f", NULL, ffOPTRD },
1508 { efTPS, "-s", NULL, ffOPTRD },
1509 { efNDX, NULL, NULL, ffOPTRD },
1510 { efXPM, "-dm", "rmsd", ffOPTRD },
1511 { efXPM, "-om", "rmsd-raw", ffWRITE },
1512 { efXPM, "-o", "rmsd-clust", ffWRITE },
1513 { efLOG, "-g", "cluster", ffWRITE },
1514 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1515 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1516 { efXVG, "-conv", "mc-conv", ffOPTWR },
1517 { efXVG, "-sz", "clust-size", ffOPTWR},
1518 { efXPM, "-tr", "clust-trans", ffOPTWR},
1519 { efXVG, "-ntr", "clust-trans", ffOPTWR},
1520 { efXVG, "-clid", "clust-id.xvg", ffOPTWR},
1521 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
1523 #define NFILE asize(fnm)
1525 if (!parse_common_args(&argc, argv,
1526 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
1527 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL,
1528 &oenv))
1530 return 0;
1533 /* parse options */
1534 bReadMat = opt2bSet("-dm", NFILE, fnm);
1535 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1536 if (opt2parg_bSet("-av", asize(pa), pa) ||
1537 opt2parg_bSet("-wcl", asize(pa), pa) ||
1538 opt2parg_bSet("-nst", asize(pa), pa) ||
1539 opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1540 opt2bSet("-cl", NFILE, fnm) )
1542 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1544 else
1546 trx_out_fn = NULL;
1548 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1550 fprintf(stderr,
1551 "\nWarning: assuming the time unit in %s is %s\n",
1552 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv));
1554 if (trx_out_fn && !bReadTraj)
1556 fprintf(stderr, "\nWarning: "
1557 "cannot write cluster structures without reading trajectory\n"
1558 " ignoring option -cl %s\n", trx_out_fn);
1561 method = 1;
1562 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1564 method++;
1566 if (method == m_nr)
1568 gmx_fatal(FARGS, "Invalid method");
1571 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1572 method == m_gromos );
1574 /* Open log file */
1575 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1577 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1578 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1580 /* check input and write parameters to log file */
1581 bUseRmsdCut = FALSE;
1582 if (method == m_jarvis_patrick)
1584 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1585 if ((M < 0) || (M == 1))
1587 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1589 if (M < 2)
1591 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1592 bUseRmsdCut = TRUE;
1594 else
1596 if (P >= M)
1598 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1600 if (bJP_RMSD)
1602 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1603 bUseRmsdCut = TRUE;
1605 else
1607 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1610 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1612 else /* method != m_jarvis */
1614 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1616 if (bUseRmsdCut && method != m_jarvis_patrick)
1618 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1620 if (method == m_monte_carlo)
1622 fprintf(log, "Using %d iterations\n", niter);
1625 if (skip < 1)
1627 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1630 /* get input */
1631 if (bReadTraj)
1633 /* don't read mass-database as masses (and top) are not used */
1634 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), buf, &top, &ePBC, &xtps, NULL, box,
1635 TRUE);
1636 if (bPBC)
1638 gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);
1641 fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1642 bReadMat ? "" : " and RMSD calculation");
1643 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1644 1, &ifsize, &fitidx, &grpname);
1645 if (trx_out_fn)
1647 fprintf(stderr, "\nSelect group for output:\n");
1648 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1649 1, &iosize, &outidx, &grpname);
1650 /* merge and convert both index groups: */
1651 /* first copy outidx to index. let outidx refer to elements in index */
1652 snew(index, iosize);
1653 isize = iosize;
1654 for (i = 0; i < iosize; i++)
1656 index[i] = outidx[i];
1657 outidx[i] = i;
1659 /* now lookup elements from fitidx in index, add them if necessary
1660 and also let fitidx refer to elements in index */
1661 for (i = 0; i < ifsize; i++)
1663 j = 0;
1664 while (j < isize && index[j] != fitidx[i])
1666 j++;
1668 if (j >= isize)
1670 /* slow this way, but doesn't matter much */
1671 isize++;
1672 srenew(index, isize);
1674 index[j] = fitidx[i];
1675 fitidx[i] = j;
1678 else /* !trx_out_fn */
1680 isize = ifsize;
1681 snew(index, isize);
1682 for (i = 0; i < ifsize; i++)
1684 index[i] = fitidx[i];
1685 fitidx[i] = i;
1690 if (bReadTraj)
1692 /* Loop over first coordinate file */
1693 fn = opt2fn("-f", NFILE, fnm);
1695 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, oenv, bPBC, gpbc);
1696 output_env_conv_times(oenv, nf, time);
1697 if (!bRMSdist || bAnalyze)
1699 /* Center all frames on zero */
1700 snew(mass, isize);
1701 for (i = 0; i < ifsize; i++)
1703 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1705 if (bFit)
1707 for (i = 0; i < nf; i++)
1709 reset_x(ifsize, fitidx, isize, NULL, xx[i], mass);
1713 if (bPBC)
1715 gmx_rmpbc_done(gpbc);
1719 if (bReadMat)
1721 fprintf(stderr, "Reading rms distance matrix ");
1722 read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1723 fprintf(stderr, "\n");
1724 if (readmat[0].nx != readmat[0].ny)
1726 gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1727 readmat[0].nx, readmat[0].ny);
1729 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1731 gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1732 "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1735 nf = readmat[0].nx;
1736 sfree(time);
1737 time = readmat[0].axis_x;
1738 time_invfac = output_env_get_time_invfactor(oenv);
1739 for (i = 0; i < nf; i++)
1741 time[i] *= time_invfac;
1744 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1745 convert_mat(&(readmat[0]), rms);
1747 nlevels = readmat[0].nmap;
1749 else /* !bReadMat */
1751 rms = init_mat(nf, method == m_diagonalize);
1752 nrms = ((gmx_int64_t)nf*((gmx_int64_t)nf-1))/2;
1753 if (!bRMSdist)
1755 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1756 /* Initialize work array */
1757 snew(x1, isize);
1758 for (i1 = 0; i1 < nf; i1++)
1760 for (i2 = i1+1; i2 < nf; i2++)
1762 for (i = 0; i < isize; i++)
1764 copy_rvec(xx[i1][i], x1[i]);
1766 if (bFit)
1768 do_fit(isize, mass, xx[i2], x1);
1770 rmsd = rmsdev(isize, mass, xx[i2], x1);
1771 set_mat_entry(rms, i1, i2, rmsd);
1773 nrms -= (gmx_int64_t) (nf-i1-1);
1774 fprintf(stderr, "\r# RMSD calculations left: " "%"GMX_PRId64 " ", nrms);
1776 sfree(x1);
1778 else /* bRMSdist */
1780 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1782 /* Initiate work arrays */
1783 snew(d1, isize);
1784 snew(d2, isize);
1785 for (i = 0; (i < isize); i++)
1787 snew(d1[i], isize);
1788 snew(d2[i], isize);
1790 for (i1 = 0; i1 < nf; i1++)
1792 calc_dist(isize, xx[i1], d1);
1793 for (i2 = i1+1; (i2 < nf); i2++)
1795 calc_dist(isize, xx[i2], d2);
1796 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1798 nrms -= (nf-i1-1);
1799 fprintf(stderr, "\r# RMSD calculations left: " "%"GMX_PRId64 " ", nrms);
1801 /* Clean up work arrays */
1802 for (i = 0; (i < isize); i++)
1804 sfree(d1[i]);
1805 sfree(d2[i]);
1807 sfree(d1);
1808 sfree(d2);
1810 fprintf(stderr, "\n\n");
1812 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1813 rms->minrms, rms->maxrms);
1814 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1815 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1816 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1817 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1819 fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1820 "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1822 if (bAnalyze && (rmsmin < rms->minrms) )
1824 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1825 rmsmin, rms->minrms);
1827 if (bAnalyze && (rmsmin > rmsdcut) )
1829 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1830 rmsmin, rmsdcut);
1833 /* Plot the rmsd distribution */
1834 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1836 if (bBinary)
1838 for (i1 = 0; (i1 < nf); i1++)
1840 for (i2 = 0; (i2 < nf); i2++)
1842 if (rms->mat[i1][i2] < rmsdcut)
1844 rms->mat[i1][i2] = 0;
1846 else
1848 rms->mat[i1][i2] = 1;
1854 snew(clust.cl, nf);
1855 switch (method)
1857 case m_linkage:
1858 /* Now sort the matrix and write it out again */
1859 gather(rms, rmsdcut, &clust);
1860 break;
1861 case m_diagonalize:
1862 /* Do a diagonalization */
1863 snew(eigenvalues, nf);
1864 snew(eigenvectors, nf*nf);
1865 memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1866 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1867 sfree(eigenvectors);
1869 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1870 "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1871 for (i = 0; (i < nf); i++)
1873 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1875 gmx_ffclose(fp);
1876 break;
1877 case m_monte_carlo:
1878 orig = init_mat(rms->nn, FALSE);
1879 orig->nn = rms->nn;
1880 copy_t_mat(orig, rms);
1881 mc_optimize(log, rms, time, niter, nrandom, seed, kT,
1882 opt2fn_null("-conv", NFILE, fnm), oenv);
1883 break;
1884 case m_jarvis_patrick:
1885 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1886 break;
1887 case m_gromos:
1888 gromos(rms->nn, rms->mat, rmsdcut, &clust);
1889 break;
1890 default:
1891 gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1894 if (method == m_monte_carlo || method == m_diagonalize)
1896 fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
1897 mat_energy(rms));
1900 if (bAnalyze)
1902 if (minstruct > 1)
1904 ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1906 else
1908 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1910 init_t_atoms(&useatoms, isize, FALSE);
1911 snew(usextps, isize);
1912 useatoms.resinfo = top.atoms.resinfo;
1913 for (i = 0; i < isize; i++)
1915 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1916 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1917 useatoms.nres = max(useatoms.nres, useatoms.atom[i].resind+1);
1918 copy_rvec(xtps[index[i]], usextps[i]);
1920 useatoms.nr = isize;
1921 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time,
1922 ifsize, fitidx, iosize, outidx,
1923 bReadTraj ? trx_out_fn : NULL,
1924 opt2fn_null("-sz", NFILE, fnm),
1925 opt2fn_null("-tr", NFILE, fnm),
1926 opt2fn_null("-ntr", NFILE, fnm),
1927 opt2fn_null("-clid", NFILE, fnm),
1928 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1929 rlo_bot, rhi_bot, oenv);
1931 gmx_ffclose(log);
1933 if (bBinary && !bAnalyze)
1935 /* Make the clustering visible */
1936 for (i2 = 0; (i2 < nf); i2++)
1938 for (i1 = i2+1; (i1 < nf); i1++)
1940 if (rms->mat[i1][i2])
1942 rms->mat[i1][i2] = rms->maxrms;
1948 fp = opt2FILE("-o", NFILE, fnm, "w");
1949 fprintf(stderr, "Writing rms distance/clustering matrix ");
1950 if (bReadMat)
1952 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1953 readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1954 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1956 else
1958 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1959 sprintf(title, "RMS%sDeviation / Cluster Index",
1960 bRMSdist ? " Distance " : " ");
1961 if (minstruct > 1)
1963 write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,
1964 nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
1965 rlo_top, rhi_top, 0.0, (real) ncluster,
1966 &ncluster, TRUE, rlo_bot, rhi_bot);
1968 else
1970 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1971 nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
1972 rlo_top, rhi_top, &nlevels);
1975 fprintf(stderr, "\n");
1976 gmx_ffclose(fp);
1977 if (NULL != orig)
1979 fp = opt2FILE("-om", NFILE, fnm, "w");
1980 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1981 sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");
1982 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1983 nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
1984 rlo_top, rhi_top, &nlevels);
1985 gmx_ffclose(fp);
1986 done_mat(&orig);
1987 sfree(orig);
1989 /* now show what we've done */
1990 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
1991 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
1992 if (method == m_diagonalize)
1994 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
1996 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
1997 if (bAnalyze)
1999 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
2000 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
2001 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
2003 do_view(oenv, opt2fn_null("-conv", NFILE, fnm), NULL);
2005 return 0;