Merge release-4-6 into release-5-0
[gromacs.git] / src / gromacs / gmxana / gmx_cluster.c
blob4919de60897ffb785ffe390c425f728141cf093f
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #ifdef HAVE_CONFIG_H
38 #include <config.h>
39 #endif
41 #include <math.h>
42 #include <string.h>
44 #include "macros.h"
45 #include "gromacs/utility/smalloc.h"
46 #include "typedefs.h"
47 #include "gromacs/commandline/pargs.h"
48 #include "gromacs/fileio/tpxio.h"
49 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/utility/cstringutil.h"
51 #include "vec.h"
52 #include "macros.h"
53 #include "index.h"
54 #include "gromacs/random/random.h"
55 #include "pbc.h"
56 #include "rmpbc.h"
57 #include "xvgr.h"
58 #include "gromacs/fileio/futil.h"
59 #include "gromacs/fileio/matio.h"
60 #include "cmat.h"
61 #include "gromacs/fileio/trnio.h"
62 #include "viewit.h"
63 #include "gmx_ana.h"
65 #include "gromacs/linearalgebra/eigensolver.h"
66 #include "gromacs/math/do_fit.h"
67 #include "gromacs/legacyheaders/gmx_fatal.h"
69 /* print to two file pointers at once (i.e. stderr and log) */
70 static gmx_inline
71 void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
73 fprintf(fp1, "%s", buf);
74 fprintf(fp2, "%s", buf);
77 /* just print a prepared buffer to fp1 and fp2 */
78 static gmx_inline
79 void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
81 lo_ffprintf(fp1, fp2, buf);
84 /* prepare buffer with one argument, then print to fp1 and fp2 */
85 static gmx_inline
86 void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
88 sprintf(buf, fmt, arg);
89 lo_ffprintf(fp1, fp2, buf);
92 /* prepare buffer with one argument, then print to fp1 and fp2 */
93 static gmx_inline
94 void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
96 sprintf(buf, fmt, arg);
97 lo_ffprintf(fp1, fp2, buf);
100 /* prepare buffer with one argument, then print to fp1 and fp2 */
101 static gmx_inline
102 void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
104 sprintf(buf, fmt, arg);
105 lo_ffprintf(fp1, fp2, buf);
108 /* prepare buffer with two arguments, then print to fp1 and fp2 */
109 static gmx_inline
110 void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
112 sprintf(buf, fmt, arg1, arg2);
113 lo_ffprintf(fp1, fp2, buf);
116 /* prepare buffer with two arguments, then print to fp1 and fp2 */
117 static gmx_inline
118 void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
120 sprintf(buf, fmt, arg1, arg2);
121 lo_ffprintf(fp1, fp2, buf);
124 /* prepare buffer with two arguments, then print to fp1 and fp2 */
125 static gmx_inline
126 void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
128 sprintf(buf, fmt, arg1, arg2);
129 lo_ffprintf(fp1, fp2, buf);
132 typedef struct {
133 int ncl;
134 int *cl;
135 } t_clusters;
137 typedef struct {
138 int nr;
139 int *nb;
140 } t_nnb;
142 void pr_energy(FILE *fp, real e)
144 fprintf(fp, "Energy: %8.4f\n", e);
147 void cp_index(int nn, int from[], int to[])
149 int i;
151 for (i = 0; (i < nn); i++)
153 to[i] = from[i];
157 void mc_optimize(FILE *log, t_mat *m, real *time,
158 int maxiter, int nrandom,
159 int seed, real kT,
160 const char *conv, output_env_t oenv)
162 FILE *fp = NULL;
163 real ecur, enext, emin, prob, enorm;
164 int i, j, iswap, jswap, nn, nuphill = 0;
165 gmx_rng_t rng;
166 t_mat *minimum;
168 if (m->n1 != m->nn)
170 fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
171 return;
173 printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
174 printf("by reordering the frames to minimize the path between the two structures\n");
175 printf("that have the largest pairwise RMSD.\n");
177 iswap = jswap = -1;
178 enorm = m->mat[0][0];
179 for (i = 0; (i < m->n1); i++)
181 for (j = 0; (j < m->nn); j++)
183 if (m->mat[i][j] > enorm)
185 enorm = m->mat[i][j];
186 iswap = i;
187 jswap = j;
191 if ((iswap == -1) || (jswap == -1))
193 fprintf(stderr, "Matrix contains identical values in all fields\n");
194 return;
196 swap_rows(m, 0, iswap);
197 swap_rows(m, m->n1-1, jswap);
198 emin = ecur = mat_energy(m);
199 printf("Largest distance %g between %d and %d. Energy: %g.\n",
200 enorm, iswap, jswap, emin);
202 rng = gmx_rng_init(seed);
203 nn = m->nn;
205 /* Initiate and store global minimum */
206 minimum = init_mat(nn, m->b1D);
207 minimum->nn = nn;
208 copy_t_mat(minimum, m);
210 if (NULL != conv)
212 fp = xvgropen(conv, "Convergence of the MC optimization",
213 "Energy", "Step", oenv);
215 for (i = 0; (i < maxiter); i++)
217 /* Generate new swapping candidates */
220 iswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
221 jswap = 1+(nn-2)*gmx_rng_uniform_real(rng);
223 while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
225 /* Apply swap and compute energy */
226 swap_rows(m, iswap, jswap);
227 enext = mat_energy(m);
229 /* Compute probability */
230 prob = 0;
231 if ((enext < ecur) || (i < nrandom))
233 prob = 1;
234 if (enext < emin)
236 /* Store global minimum */
237 copy_t_mat(minimum, m);
238 emin = enext;
241 else if (kT > 0)
243 /* Try Monte Carlo step */
244 prob = exp(-(enext-ecur)/(enorm*kT));
247 if ((prob == 1) || (gmx_rng_uniform_real(rng) < prob))
249 if (enext > ecur)
251 nuphill++;
254 fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
255 i, iswap, jswap, enext, prob);
256 if (NULL != fp)
258 fprintf(fp, "%6d %10g\n", i, enext);
260 ecur = enext;
262 else
264 swap_rows(m, jswap, iswap);
267 fprintf(log, "%d uphill steps were taken during optimization\n",
268 nuphill);
270 /* Now swap the matrix to get it into global minimum mode */
271 copy_t_mat(m, minimum);
273 fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
274 fprintf(log, "Global minimum energy %g\n", mat_energy(m));
275 fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
276 for (i = 0; (i < m->nn); i++)
278 fprintf(log, "%10g %5d %10g\n",
279 time[m->m_ind[i]],
280 m->m_ind[i],
281 (i < m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
284 if (NULL != fp)
286 fclose(fp);
290 static void calc_dist(int nind, rvec x[], real **d)
292 int i, j;
293 real *xi;
294 rvec dx;
296 for (i = 0; (i < nind-1); i++)
298 xi = x[i];
299 for (j = i+1; (j < nind); j++)
301 /* Should use pbc_dx when analysing multiple molecueles,
302 * but the box is not stored for every frame.
304 rvec_sub(xi, x[j], dx);
305 d[i][j] = norm(dx);
310 static real rms_dist(int isize, real **d, real **d_r)
312 int i, j;
313 real r, r2;
315 r2 = 0.0;
316 for (i = 0; (i < isize-1); i++)
318 for (j = i+1; (j < isize); j++)
320 r = d[i][j]-d_r[i][j];
321 r2 += r*r;
324 r2 /= (isize*(isize-1))/2;
326 return sqrt(r2);
329 static int rms_dist_comp(const void *a, const void *b)
331 t_dist *da, *db;
333 da = (t_dist *)a;
334 db = (t_dist *)b;
336 if (da->dist - db->dist < 0)
338 return -1;
340 else if (da->dist - db->dist > 0)
342 return 1;
344 return 0;
347 static int clust_id_comp(const void *a, const void *b)
349 t_clustid *da, *db;
351 da = (t_clustid *)a;
352 db = (t_clustid *)b;
354 return da->clust - db->clust;
357 static int nrnb_comp(const void *a, const void *b)
359 t_nnb *da, *db;
361 da = (t_nnb *)a;
362 db = (t_nnb *)b;
364 /* return the b-a, we want highest first */
365 return db->nr - da->nr;
368 void gather(t_mat *m, real cutoff, t_clusters *clust)
370 t_clustid *c;
371 t_dist *d;
372 int i, j, k, nn, cid, n1, diff;
373 gmx_bool bChange;
375 /* First we sort the entries in the RMSD matrix */
376 n1 = m->nn;
377 nn = ((n1-1)*n1)/2;
378 snew(d, nn);
379 for (i = k = 0; (i < n1); i++)
381 for (j = i+1; (j < n1); j++, k++)
383 d[k].i = i;
384 d[k].j = j;
385 d[k].dist = m->mat[i][j];
388 if (k != nn)
390 gmx_incons("gather algortihm");
392 qsort(d, nn, sizeof(d[0]), rms_dist_comp);
394 /* Now we make a cluster index for all of the conformations */
395 c = new_clustid(n1);
397 /* Now we check the closest structures, and equalize their cluster numbers */
398 fprintf(stderr, "Linking structures ");
401 fprintf(stderr, "*");
402 bChange = FALSE;
403 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
405 diff = c[d[k].j].clust - c[d[k].i].clust;
406 if (diff)
408 bChange = TRUE;
409 if (diff > 0)
411 c[d[k].j].clust = c[d[k].i].clust;
413 else
415 c[d[k].i].clust = c[d[k].j].clust;
420 while (bChange);
421 fprintf(stderr, "\nSorting and renumbering clusters\n");
422 /* Sort on cluster number */
423 qsort(c, n1, sizeof(c[0]), clust_id_comp);
425 /* Renumber clusters */
426 cid = 1;
427 for (k = 1; k < n1; k++)
429 if (c[k].clust != c[k-1].clust)
431 c[k-1].clust = cid;
432 cid++;
434 else
436 c[k-1].clust = cid;
439 c[k-1].clust = cid;
440 if (debug)
442 for (k = 0; (k < n1); k++)
444 fprintf(debug, "Cluster index for conformation %d: %d\n",
445 c[k].conf, c[k].clust);
448 clust->ncl = cid;
449 for (k = 0; k < n1; k++)
451 clust->cl[c[k].conf] = c[k].clust;
454 sfree(c);
455 sfree(d);
458 gmx_bool jp_same(int **nnb, int i, int j, int P)
460 gmx_bool bIn;
461 int k, ii, jj, pp;
463 bIn = FALSE;
464 for (k = 0; nnb[i][k] >= 0; k++)
466 bIn = bIn || (nnb[i][k] == j);
468 if (!bIn)
470 return FALSE;
473 bIn = FALSE;
474 for (k = 0; nnb[j][k] >= 0; k++)
476 bIn = bIn || (nnb[j][k] == i);
478 if (!bIn)
480 return FALSE;
483 pp = 0;
484 for (ii = 0; nnb[i][ii] >= 0; ii++)
486 for (jj = 0; nnb[j][jj] >= 0; jj++)
488 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
490 pp++;
495 return (pp >= P);
498 static void jarvis_patrick(int n1, real **mat, int M, int P,
499 real rmsdcut, t_clusters *clust)
501 t_dist *row;
502 t_clustid *c;
503 int **nnb;
504 int i, j, k, cid, diff, max;
505 gmx_bool bChange;
506 real **mcpy = NULL;
508 if (rmsdcut < 0)
510 rmsdcut = 10000;
513 /* First we sort the entries in the RMSD matrix row by row.
514 * This gives us the nearest neighbor list.
516 snew(nnb, n1);
517 snew(row, n1);
518 for (i = 0; (i < n1); i++)
520 for (j = 0; (j < n1); j++)
522 row[j].j = j;
523 row[j].dist = mat[i][j];
525 qsort(row, n1, sizeof(row[0]), rms_dist_comp);
526 if (M > 0)
528 /* Put the M nearest neighbors in the list */
529 snew(nnb[i], M+1);
530 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
532 if (row[j].j != i)
534 nnb[i][k] = row[j].j;
535 k++;
538 nnb[i][k] = -1;
540 else
542 /* Put all neighbors nearer than rmsdcut in the list */
543 max = 0;
544 k = 0;
545 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
547 if (row[j].j != i)
549 if (k >= max)
551 max += 10;
552 srenew(nnb[i], max);
554 nnb[i][k] = row[j].j;
555 k++;
558 if (k == max)
560 srenew(nnb[i], max+1);
562 nnb[i][k] = -1;
565 sfree(row);
566 if (debug)
568 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
569 for (i = 0; (i < n1); i++)
571 fprintf(debug, "i:%5d nbs:", i);
572 for (j = 0; nnb[i][j] >= 0; j++)
574 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
576 fprintf(debug, "\n");
580 c = new_clustid(n1);
581 fprintf(stderr, "Linking structures ");
582 /* Use mcpy for temporary storage of booleans */
583 mcpy = mk_matrix(n1, n1, FALSE);
584 for (i = 0; i < n1; i++)
586 for (j = i+1; j < n1; j++)
588 mcpy[i][j] = jp_same(nnb, i, j, P);
593 fprintf(stderr, "*");
594 bChange = FALSE;
595 for (i = 0; i < n1; i++)
597 for (j = i+1; j < n1; j++)
599 if (mcpy[i][j])
601 diff = c[j].clust - c[i].clust;
602 if (diff)
604 bChange = TRUE;
605 if (diff > 0)
607 c[j].clust = c[i].clust;
609 else
611 c[i].clust = c[j].clust;
618 while (bChange);
620 fprintf(stderr, "\nSorting and renumbering clusters\n");
621 /* Sort on cluster number */
622 qsort(c, n1, sizeof(c[0]), clust_id_comp);
624 /* Renumber clusters */
625 cid = 1;
626 for (k = 1; k < n1; k++)
628 if (c[k].clust != c[k-1].clust)
630 c[k-1].clust = cid;
631 cid++;
633 else
635 c[k-1].clust = cid;
638 c[k-1].clust = cid;
639 clust->ncl = cid;
640 for (k = 0; k < n1; k++)
642 clust->cl[c[k].conf] = c[k].clust;
644 if (debug)
646 for (k = 0; (k < n1); k++)
648 fprintf(debug, "Cluster index for conformation %d: %d\n",
649 c[k].conf, c[k].clust);
653 /* Again, I don't see the point in this... (AF) */
654 /* for(i=0; (i<n1); i++) { */
655 /* for(j=0; (j<n1); j++) */
656 /* mcpy[c[i].conf][c[j].conf] = mat[i][j]; */
657 /* } */
658 /* for(i=0; (i<n1); i++) { */
659 /* for(j=0; (j<n1); j++) */
660 /* mat[i][j] = mcpy[i][j]; */
661 /* } */
662 done_matrix(n1, &mcpy);
664 sfree(c);
665 for (i = 0; (i < n1); i++)
667 sfree(nnb[i]);
669 sfree(nnb);
672 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
674 int i, j;
676 /* dump neighbor list */
677 fprintf(fp, "%s", title);
678 for (i = 0; (i < n1); i++)
680 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
681 for (j = 0; j < nnb[i].nr; j++)
683 fprintf(fp, "%5d", nnb[i].nb[j]);
685 fprintf(fp, "\n");
689 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
691 t_dist *row;
692 t_nnb *nnb;
693 int i, j, k, j1, max;
695 /* Put all neighbors nearer than rmsdcut in the list */
696 fprintf(stderr, "Making list of neighbors within cutoff ");
697 snew(nnb, n1);
698 snew(row, n1);
699 for (i = 0; (i < n1); i++)
701 max = 0;
702 k = 0;
703 /* put all neighbors within cut-off in list */
704 for (j = 0; j < n1; j++)
706 if (mat[i][j] < rmsdcut)
708 if (k >= max)
710 max += 10;
711 srenew(nnb[i].nb, max);
713 nnb[i].nb[k] = j;
714 k++;
717 /* store nr of neighbors, we'll need that */
718 nnb[i].nr = k;
719 if (i%(1+n1/100) == 0)
721 fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
724 fprintf(stderr, "%3d%%\n", 100);
725 sfree(row);
727 /* sort neighbor list on number of neighbors, largest first */
728 qsort(nnb, n1, sizeof(nnb[0]), nrnb_comp);
730 if (debug)
732 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
735 /* turn first structure with all its neighbors (largest) into cluster
736 remove them from pool of structures and repeat for all remaining */
737 fprintf(stderr, "Finding clusters %4d", 0);
738 /* cluster id's start at 1: */
739 k = 1;
740 while (nnb[0].nr)
742 /* set cluster id (k) for first item in neighborlist */
743 for (j = 0; j < nnb[0].nr; j++)
745 clust->cl[nnb[0].nb[j]] = k;
747 /* mark as done */
748 nnb[0].nr = 0;
749 sfree(nnb[0].nb);
751 /* adjust number of neighbors for others, taking removals into account: */
752 for (i = 1; i < n1 && nnb[i].nr; i++)
754 j1 = 0;
755 for (j = 0; j < nnb[i].nr; j++)
757 /* if this neighbor wasn't removed */
758 if (clust->cl[nnb[i].nb[j]] == 0)
760 /* shift the rest (j1<=j) */
761 nnb[i].nb[j1] = nnb[i].nb[j];
762 /* next */
763 j1++;
766 /* now j1 is the new number of neighbors */
767 nnb[i].nr = j1;
769 /* sort again on nnb[].nr, because we have new # neighbors: */
770 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
771 qsort(nnb, i, sizeof(nnb[0]), nrnb_comp);
773 fprintf(stderr, "\b\b\b\b%4d", k);
774 /* new cluster id */
775 k++;
777 fprintf(stderr, "\n");
778 sfree(nnb);
779 if (debug)
781 fprintf(debug, "Clusters (%d):\n", k);
782 for (i = 0; i < n1; i++)
784 fprintf(debug, " %3d", clust->cl[i]);
786 fprintf(debug, "\n");
789 clust->ncl = k-1;
792 rvec **read_whole_trj(const char *fn, int isize, atom_id index[], int skip,
793 int *nframe, real **time, const output_env_t oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
795 rvec **xx, *x;
796 matrix box;
797 real t;
798 int i, i0, j, max_nf;
799 int natom;
800 t_trxstatus *status;
803 max_nf = 0;
804 xx = NULL;
805 *time = NULL;
806 natom = read_first_x(oenv, &status, fn, &t, &x, box);
807 i = 0;
808 i0 = 0;
811 if (bPBC)
813 gmx_rmpbc(gpbc, natom, box, x);
815 if (i0 >= max_nf)
817 max_nf += 10;
818 srenew(xx, max_nf);
819 srenew(*time, max_nf);
821 if ((i % skip) == 0)
823 snew(xx[i0], isize);
824 /* Store only the interesting atoms */
825 for (j = 0; (j < isize); j++)
827 copy_rvec(x[index[j]], xx[i0][j]);
829 (*time)[i0] = t;
830 i0++;
832 i++;
834 while (read_next_x(oenv, status, &t, x, box));
835 fprintf(stderr, "Allocated %lu bytes for frames\n",
836 (unsigned long) (max_nf*isize*sizeof(**xx)));
837 fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);
838 *nframe = i0;
839 sfree(x);
841 return xx;
844 static int plot_clusters(int nf, real **mat, t_clusters *clust,
845 int minstruct)
847 int i, j, ncluster, ci;
848 int *cl_id, *nstruct, *strind;
850 snew(cl_id, nf);
851 snew(nstruct, nf);
852 snew(strind, nf);
853 for (i = 0; i < nf; i++)
855 strind[i] = 0;
856 cl_id[i] = clust->cl[i];
857 nstruct[cl_id[i]]++;
859 ncluster = 0;
860 for (i = 0; i < nf; i++)
862 if (nstruct[i] >= minstruct)
864 ncluster++;
865 for (j = 0; (j < nf); j++)
867 if (cl_id[j] == i)
869 strind[j] = ncluster;
874 ncluster++;
875 fprintf(stderr, "There are %d clusters with at least %d conformations\n",
876 ncluster, minstruct);
878 for (i = 0; (i < nf); i++)
880 ci = cl_id[i];
881 for (j = 0; j < i; j++)
883 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
885 /* color different clusters with different colors, as long as
886 we don't run out of colors */
887 mat[i][j] = strind[i];
889 else
891 mat[i][j] = 0;
895 sfree(strind);
896 sfree(nstruct);
897 sfree(cl_id);
899 return ncluster;
902 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
904 int i, j, v;
906 for (i = 0; i < nf; i++)
908 for (j = 0; j < i; j++)
910 if (clust->cl[i] == clust->cl[j])
912 mat[i][j] = val;
914 else
916 mat[i][j] = 0;
922 static char *parse_filename(const char *fn, int maxnr)
924 int i;
925 char *fnout;
926 const char *ext;
927 char buf[STRLEN];
929 if (strchr(fn, '%'))
931 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
933 /* number of digits needed in numbering */
934 i = (int)(log(maxnr)/log(10)) + 1;
935 /* split fn and ext */
936 ext = strrchr(fn, '.');
937 if (!ext)
939 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
941 ext++;
942 /* insert e.g. '%03d' between fn and ext */
943 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
944 snew(fnout, strlen(buf)+1);
945 strcpy(fnout, buf);
947 return fnout;
950 static void ana_trans(t_clusters *clust, int nf,
951 const char *transfn, const char *ntransfn, FILE *log,
952 t_rgb rlo, t_rgb rhi, const output_env_t oenv)
954 FILE *fp;
955 real **trans, *axis;
956 int *ntrans;
957 int i, ntranst, maxtrans;
958 char buf[STRLEN];
960 snew(ntrans, clust->ncl);
961 snew(trans, clust->ncl);
962 snew(axis, clust->ncl);
963 for (i = 0; i < clust->ncl; i++)
965 axis[i] = i+1;
966 snew(trans[i], clust->ncl);
968 ntranst = 0;
969 maxtrans = 0;
970 for (i = 1; i < nf; i++)
972 if (clust->cl[i] != clust->cl[i-1])
974 ntranst++;
975 ntrans[clust->cl[i-1]-1]++;
976 ntrans[clust->cl[i]-1]++;
977 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
978 maxtrans = max(maxtrans, trans[clust->cl[i]-1][clust->cl[i-1]-1]);
981 ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
982 "max %d between two specific clusters\n", ntranst, maxtrans);
983 if (transfn)
985 fp = gmx_ffopen(transfn, "w");
986 i = min(maxtrans+1, 80);
987 write_xpm(fp, 0, "Cluster Transitions", "# transitions",
988 "from cluster", "to cluster",
989 clust->ncl, clust->ncl, axis, axis, trans,
990 0, maxtrans, rlo, rhi, &i);
991 gmx_ffclose(fp);
993 if (ntransfn)
995 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
996 oenv);
997 for (i = 0; i < clust->ncl; i++)
999 fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
1001 gmx_ffclose(fp);
1003 sfree(ntrans);
1004 for (i = 0; i < clust->ncl; i++)
1006 sfree(trans[i]);
1008 sfree(trans);
1009 sfree(axis);
1012 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
1013 int natom, t_atoms *atoms, rvec *xtps,
1014 real *mass, rvec **xx, real *time,
1015 int ifsize, atom_id *fitidx,
1016 int iosize, atom_id *outidx,
1017 const char *trxfn, const char *sizefn,
1018 const char *transfn, const char *ntransfn,
1019 const char *clustidfn, gmx_bool bAverage,
1020 int write_ncl, int write_nst, real rmsmin,
1021 gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
1022 const output_env_t oenv)
1024 FILE *fp = NULL;
1025 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
1026 t_trxstatus *trxout = NULL;
1027 t_trxstatus *trxsout = NULL;
1028 int i, i1, cl, nstr, *structure, first = 0, midstr;
1029 gmx_bool *bWrite = NULL;
1030 real r, clrmsd, midrmsd;
1031 rvec *xav = NULL;
1032 matrix zerobox;
1034 clear_mat(zerobox);
1036 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1037 trxsfn = NULL;
1038 if (trxfn)
1040 /* do we write all structures? */
1041 if (write_ncl)
1043 trxsfn = parse_filename(trxfn, max(write_ncl, clust->ncl));
1044 snew(bWrite, nf);
1046 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1047 bAverage ? "average" : "middle", trxfn);
1048 if (write_ncl)
1050 /* find out what we want to tell the user:
1051 Writing [all structures|structures with rmsd > %g] for
1052 {all|first %d} clusters {with more than %d structures} to %s */
1053 if (rmsmin > 0.0)
1055 sprintf(buf1, "structures with rmsd > %g", rmsmin);
1057 else
1059 sprintf(buf1, "all structures");
1061 buf2[0] = buf3[0] = '\0';
1062 if (write_ncl >= clust->ncl)
1064 if (write_nst == 0)
1066 sprintf(buf2, "all ");
1069 else
1071 sprintf(buf2, "the first %d ", write_ncl);
1073 if (write_nst)
1075 sprintf(buf3, " with more than %d structures", write_nst);
1077 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1078 ffprintf(stderr, log, buf);
1081 /* Prepare a reference structure for the orientation of the clusters */
1082 if (bFit)
1084 reset_x(ifsize, fitidx, natom, NULL, xtps, mass);
1086 trxout = open_trx(trxfn, "w");
1087 /* Calculate the average structure in each cluster, *
1088 * all structures are fitted to the first struture of the cluster */
1089 snew(xav, natom);
1092 if (transfn || ntransfn)
1094 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1097 if (clustidfn)
1099 fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1100 if (output_env_get_print_xvgr_codes(oenv))
1102 fprintf(fp, "@ s0 symbol 2\n");
1103 fprintf(fp, "@ s0 symbol size 0.2\n");
1104 fprintf(fp, "@ s0 linestyle 0\n");
1106 for (i = 0; i < nf; i++)
1108 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1110 gmx_ffclose(fp);
1112 if (sizefn)
1114 fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1115 if (output_env_get_print_xvgr_codes(oenv))
1117 fprintf(fp, "@g%d type %s\n", 0, "bar");
1120 snew(structure, nf);
1121 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n",
1122 "cl.", "#st", "rmsd", "middle", "rmsd");
1123 for (cl = 1; cl <= clust->ncl; cl++)
1125 /* prepare structures (fit, middle, average) */
1126 if (xav)
1128 for (i = 0; i < natom; i++)
1130 clear_rvec(xav[i]);
1133 nstr = 0;
1134 for (i1 = 0; i1 < nf; i1++)
1136 if (clust->cl[i1] == cl)
1138 structure[nstr] = i1;
1139 nstr++;
1140 if (trxfn && (bAverage || write_ncl) )
1142 if (bFit)
1144 reset_x(ifsize, fitidx, natom, NULL, xx[i1], mass);
1146 if (nstr == 1)
1148 first = i1;
1150 else if (bFit)
1152 do_fit(natom, mass, xx[first], xx[i1]);
1154 if (xav)
1156 for (i = 0; i < natom; i++)
1158 rvec_inc(xav[i], xx[i1][i]);
1164 if (sizefn)
1166 fprintf(fp, "%8d %8d\n", cl, nstr);
1168 clrmsd = 0;
1169 midstr = 0;
1170 midrmsd = 10000;
1171 for (i1 = 0; i1 < nstr; i1++)
1173 r = 0;
1174 if (nstr > 1)
1176 for (i = 0; i < nstr; i++)
1178 if (i < i1)
1180 r += rmsd[structure[i]][structure[i1]];
1182 else
1184 r += rmsd[structure[i1]][structure[i]];
1187 r /= (nstr - 1);
1189 if (r < midrmsd)
1191 midstr = structure[i1];
1192 midrmsd = r;
1194 clrmsd += r;
1196 clrmsd /= nstr;
1198 /* dump cluster info to logfile */
1199 if (nstr > 1)
1201 sprintf(buf1, "%6.3f", clrmsd);
1202 if (buf1[0] == '0')
1204 buf1[0] = ' ';
1206 sprintf(buf2, "%5.3f", midrmsd);
1207 if (buf2[0] == '0')
1209 buf2[0] = ' ';
1212 else
1214 sprintf(buf1, "%5s", "");
1215 sprintf(buf2, "%5s", "");
1217 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1218 for (i = 0; i < nstr; i++)
1220 if ((i % 7 == 0) && i)
1222 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1224 else
1226 buf[0] = '\0';
1228 i1 = structure[i];
1229 fprintf(log, "%s %6g", buf, time[i1]);
1231 fprintf(log, "\n");
1233 /* write structures to trajectory file(s) */
1234 if (trxfn)
1236 if (write_ncl)
1238 for (i = 0; i < nstr; i++)
1240 bWrite[i] = FALSE;
1243 if (cl < write_ncl+1 && nstr > write_nst)
1245 /* Dump all structures for this cluster */
1246 /* generate numbered filename (there is a %d in trxfn!) */
1247 sprintf(buf, trxsfn, cl);
1248 trxsout = open_trx(buf, "w");
1249 for (i = 0; i < nstr; i++)
1251 bWrite[i] = TRUE;
1252 if (rmsmin > 0.0)
1254 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1256 if (bWrite[i1])
1258 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1262 if (bWrite[i])
1264 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], zerobox,
1265 xx[structure[i]], NULL, NULL);
1268 close_trx(trxsout);
1270 /* Dump the average structure for this cluster */
1271 if (bAverage)
1273 for (i = 0; i < natom; i++)
1275 svmul(1.0/nstr, xav[i], xav[i]);
1278 else
1280 for (i = 0; i < natom; i++)
1282 copy_rvec(xx[midstr][i], xav[i]);
1284 if (bFit)
1286 reset_x(ifsize, fitidx, natom, NULL, xav, mass);
1289 if (bFit)
1291 do_fit(natom, mass, xtps, xav);
1293 r = cl;
1294 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], zerobox, xav, NULL, NULL);
1297 /* clean up */
1298 if (trxfn)
1300 close_trx(trxout);
1301 sfree(xav);
1302 if (write_ncl)
1304 sfree(bWrite);
1307 sfree(structure);
1308 if (trxsfn)
1310 sfree(trxsfn);
1314 static void convert_mat(t_matrix *mat, t_mat *rms)
1316 int i, j;
1318 rms->n1 = mat->nx;
1319 matrix2real(mat, rms->mat);
1320 /* free input xpm matrix data */
1321 for (i = 0; i < mat->nx; i++)
1323 sfree(mat->matrix[i]);
1325 sfree(mat->matrix);
1327 for (i = 0; i < mat->nx; i++)
1329 for (j = i; j < mat->nx; j++)
1331 rms->sumrms += rms->mat[i][j];
1332 rms->maxrms = max(rms->maxrms, rms->mat[i][j]);
1333 if (j != i)
1335 rms->minrms = min(rms->minrms, rms->mat[i][j]);
1339 rms->nn = mat->nx;
1342 int gmx_cluster(int argc, char *argv[])
1344 const char *desc[] = {
1345 "[THISMODULE] can cluster structures using several different methods.",
1346 "Distances between structures can be determined from a trajectory",
1347 "or read from an [TT].xpm[tt] matrix file with the [TT]-dm[tt] option.",
1348 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1349 "can be used to define the distance between structures.[PAR]",
1351 "single linkage: add a structure to a cluster when its distance to any",
1352 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1354 "Jarvis Patrick: add a structure to a cluster when this structure",
1355 "and a structure in the cluster have each other as neighbors and",
1356 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1357 "of a structure are the M closest structures or all structures within",
1358 "[TT]cutoff[tt].[PAR]",
1360 "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1361 "the order of the frames is using the smallest possible increments.",
1362 "With this it is possible to make a smooth animation going from one",
1363 "structure to another with the largest possible (e.g.) RMSD between",
1364 "them, however the intermediate steps should be as small as possible.",
1365 "Applications could be to visualize a potential of mean force",
1366 "ensemble of simulations or a pulling simulation. Obviously the user",
1367 "has to prepare the trajectory well (e.g. by not superimposing frames).",
1368 "The final result can be inspect visually by looking at the matrix",
1369 "[TT].xpm[tt] file, which should vary smoothly from bottom to top.[PAR]",
1371 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1373 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1374 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1375 "Count number of neighbors using cut-off, take structure with",
1376 "largest number of neighbors with all its neighbors as cluster",
1377 "and eliminate it from the pool of clusters. Repeat for remaining",
1378 "structures in pool.[PAR]",
1380 "When the clustering algorithm assigns each structure to exactly one",
1381 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1382 "file is supplied, the structure with",
1383 "the smallest average distance to the others or the average structure",
1384 "or all structures for each cluster will be written to a trajectory",
1385 "file. When writing all structures, separate numbered files are made",
1386 "for each cluster.[PAR]",
1388 "Two output files are always written:[BR]",
1389 "[TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1390 "and a graphical depiction of the clusters in the lower right half",
1391 "When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1392 "when two structures are in the same cluster.",
1393 "When [TT]-minstruct[tt] > 1 different colors will be used for each",
1394 "cluster.[BR]",
1395 "[TT]-g[tt] writes information on the options used and a detailed list",
1396 "of all clusters and their members.[PAR]",
1398 "Additionally, a number of optional output files can be written:[BR]",
1399 "[TT]-dist[tt] writes the RMSD distribution.[BR]",
1400 "[TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1401 "diagonalization.[BR]",
1402 "[TT]-sz[tt] writes the cluster sizes.[BR]",
1403 "[TT]-tr[tt] writes a matrix of the number transitions between",
1404 "cluster pairs.[BR]",
1405 "[TT]-ntr[tt] writes the total number of transitions to or from",
1406 "each cluster.[BR]",
1407 "[TT]-clid[tt] writes the cluster number as a function of time.[BR]",
1408 "[TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1409 "structure of each cluster or writes numbered files with cluster members",
1410 "for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1411 "[TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1412 "structure with the smallest average RMSD from all other structures",
1413 "of the cluster.[BR]",
1416 FILE *fp, *log;
1417 int nf, i, i1, i2, j;
1418 gmx_int64_t nrms = 0;
1420 matrix box;
1421 rvec *xtps, *usextps, *x1, **xx = NULL;
1422 const char *fn, *trx_out_fn;
1423 t_clusters clust;
1424 t_mat *rms, *orig = NULL;
1425 real *eigenvalues;
1426 t_topology top;
1427 int ePBC;
1428 t_atoms useatoms;
1429 t_matrix *readmat = NULL;
1430 real *eigenvectors;
1432 int isize = 0, ifsize = 0, iosize = 0;
1433 atom_id *index = NULL, *fitidx, *outidx;
1434 char *grpname;
1435 real rmsd, **d1, **d2, *time = NULL, time_invfac, *mass = NULL;
1436 char buf[STRLEN], buf1[80], title[STRLEN];
1437 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1439 int method, ncluster = 0;
1440 static const char *methodname[] = {
1441 NULL, "linkage", "jarvis-patrick", "monte-carlo",
1442 "diagonalization", "gromos", NULL
1444 enum {
1445 m_null, m_linkage, m_jarvis_patrick,
1446 m_monte_carlo, m_diagonalize, m_gromos, m_nr
1448 /* Set colors for plotting: white = zero RMS, black = maximum */
1449 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1450 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1451 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1452 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1453 static int nlevels = 40, skip = 1;
1454 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1455 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1456 static int niter = 10000, nrandom = 0, seed = 1993, write_ncl = 0, write_nst = 1, minstruct = 1;
1457 static real kT = 1e-3;
1458 static int M = 10, P = 3;
1459 output_env_t oenv;
1460 gmx_rmpbc_t gpbc = NULL;
1462 t_pargs pa[] = {
1463 { "-dista", FALSE, etBOOL, {&bRMSdist},
1464 "Use RMSD of distances instead of RMS deviation" },
1465 { "-nlevels", FALSE, etINT, {&nlevels},
1466 "Discretize RMSD matrix in this number of levels" },
1467 { "-cutoff", FALSE, etREAL, {&rmsdcut},
1468 "RMSD cut-off (nm) for two structures to be neighbor" },
1469 { "-fit", FALSE, etBOOL, {&bFit},
1470 "Use least squares fitting before RMSD calculation" },
1471 { "-max", FALSE, etREAL, {&scalemax},
1472 "Maximum level in RMSD matrix" },
1473 { "-skip", FALSE, etINT, {&skip},
1474 "Only analyze every nr-th frame" },
1475 { "-av", FALSE, etBOOL, {&bAverage},
1476 "Write average iso middle structure for each cluster" },
1477 { "-wcl", FALSE, etINT, {&write_ncl},
1478 "Write the structures for this number of clusters to numbered files" },
1479 { "-nst", FALSE, etINT, {&write_nst},
1480 "Only write all structures if more than this number of structures per cluster" },
1481 { "-rmsmin", FALSE, etREAL, {&rmsmin},
1482 "minimum rms difference with rest of cluster for writing structures" },
1483 { "-method", FALSE, etENUM, {methodname},
1484 "Method for cluster determination" },
1485 { "-minstruct", FALSE, etINT, {&minstruct},
1486 "Minimum number of structures in cluster for coloring in the [TT].xpm[tt] file" },
1487 { "-binary", FALSE, etBOOL, {&bBinary},
1488 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1489 "is given by [TT]-cutoff[tt]" },
1490 { "-M", FALSE, etINT, {&M},
1491 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1492 "0 is use cutoff" },
1493 { "-P", FALSE, etINT, {&P},
1494 "Number of identical nearest neighbors required to form a cluster" },
1495 { "-seed", FALSE, etINT, {&seed},
1496 "Random number seed for Monte Carlo clustering algorithm: <= 0 means generate" },
1497 { "-niter", FALSE, etINT, {&niter},
1498 "Number of iterations for MC" },
1499 { "-nrandom", FALSE, etINT, {&nrandom},
1500 "The first iterations for MC may be done complete random, to shuffle the frames" },
1501 { "-kT", FALSE, etREAL, {&kT},
1502 "Boltzmann weighting factor for Monte Carlo optimization "
1503 "(zero turns off uphill steps)" },
1504 { "-pbc", FALSE, etBOOL,
1505 { &bPBC }, "PBC check" }
1507 t_filenm fnm[] = {
1508 { efTRX, "-f", NULL, ffOPTRD },
1509 { efTPS, "-s", NULL, ffOPTRD },
1510 { efNDX, NULL, NULL, ffOPTRD },
1511 { efXPM, "-dm", "rmsd", ffOPTRD },
1512 { efXPM, "-om", "rmsd-raw", ffWRITE },
1513 { efXPM, "-o", "rmsd-clust", ffWRITE },
1514 { efLOG, "-g", "cluster", ffWRITE },
1515 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1516 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1517 { efXVG, "-conv", "mc-conv", ffOPTWR },
1518 { efXVG, "-sz", "clust-size", ffOPTWR},
1519 { efXPM, "-tr", "clust-trans", ffOPTWR},
1520 { efXVG, "-ntr", "clust-trans", ffOPTWR},
1521 { efXVG, "-clid", "clust-id.xvg", ffOPTWR},
1522 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
1524 #define NFILE asize(fnm)
1526 if (!parse_common_args(&argc, argv,
1527 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT | PCA_BE_NICE,
1528 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, NULL,
1529 &oenv))
1531 return 0;
1534 /* parse options */
1535 bReadMat = opt2bSet("-dm", NFILE, fnm);
1536 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1537 if (opt2parg_bSet("-av", asize(pa), pa) ||
1538 opt2parg_bSet("-wcl", asize(pa), pa) ||
1539 opt2parg_bSet("-nst", asize(pa), pa) ||
1540 opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1541 opt2bSet("-cl", NFILE, fnm) )
1543 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1545 else
1547 trx_out_fn = NULL;
1549 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1551 fprintf(stderr,
1552 "\nWarning: assuming the time unit in %s is %s\n",
1553 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv));
1555 if (trx_out_fn && !bReadTraj)
1557 fprintf(stderr, "\nWarning: "
1558 "cannot write cluster structures without reading trajectory\n"
1559 " ignoring option -cl %s\n", trx_out_fn);
1562 method = 1;
1563 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1565 method++;
1567 if (method == m_nr)
1569 gmx_fatal(FARGS, "Invalid method");
1572 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1573 method == m_gromos );
1575 /* Open log file */
1576 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1578 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1579 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1581 /* check input and write parameters to log file */
1582 bUseRmsdCut = FALSE;
1583 if (method == m_jarvis_patrick)
1585 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1586 if ((M < 0) || (M == 1))
1588 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1590 if (M < 2)
1592 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1593 bUseRmsdCut = TRUE;
1595 else
1597 if (P >= M)
1599 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1601 if (bJP_RMSD)
1603 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1604 bUseRmsdCut = TRUE;
1606 else
1608 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1611 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1613 else /* method != m_jarvis */
1615 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1617 if (bUseRmsdCut && method != m_jarvis_patrick)
1619 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1621 if (method == m_monte_carlo)
1623 fprintf(log, "Using %d iterations\n", niter);
1626 if (skip < 1)
1628 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1631 /* get input */
1632 if (bReadTraj)
1634 /* don't read mass-database as masses (and top) are not used */
1635 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), buf, &top, &ePBC, &xtps, NULL, box,
1636 TRUE);
1637 if (bPBC)
1639 gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);
1642 fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1643 bReadMat ? "" : " and RMSD calculation");
1644 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1645 1, &ifsize, &fitidx, &grpname);
1646 if (trx_out_fn)
1648 fprintf(stderr, "\nSelect group for output:\n");
1649 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1650 1, &iosize, &outidx, &grpname);
1651 /* merge and convert both index groups: */
1652 /* first copy outidx to index. let outidx refer to elements in index */
1653 snew(index, iosize);
1654 isize = iosize;
1655 for (i = 0; i < iosize; i++)
1657 index[i] = outidx[i];
1658 outidx[i] = i;
1660 /* now lookup elements from fitidx in index, add them if necessary
1661 and also let fitidx refer to elements in index */
1662 for (i = 0; i < ifsize; i++)
1664 j = 0;
1665 while (j < isize && index[j] != fitidx[i])
1667 j++;
1669 if (j >= isize)
1671 /* slow this way, but doesn't matter much */
1672 isize++;
1673 srenew(index, isize);
1675 index[j] = fitidx[i];
1676 fitidx[i] = j;
1679 else /* !trx_out_fn */
1681 isize = ifsize;
1682 snew(index, isize);
1683 for (i = 0; i < ifsize; i++)
1685 index[i] = fitidx[i];
1686 fitidx[i] = i;
1691 if (bReadTraj)
1693 /* Loop over first coordinate file */
1694 fn = opt2fn("-f", NFILE, fnm);
1696 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, oenv, bPBC, gpbc);
1697 output_env_conv_times(oenv, nf, time);
1698 if (!bRMSdist || bAnalyze)
1700 /* Center all frames on zero */
1701 snew(mass, isize);
1702 for (i = 0; i < ifsize; i++)
1704 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1706 if (bFit)
1708 for (i = 0; i < nf; i++)
1710 reset_x(ifsize, fitidx, isize, NULL, xx[i], mass);
1714 if (bPBC)
1716 gmx_rmpbc_done(gpbc);
1720 if (bReadMat)
1722 fprintf(stderr, "Reading rms distance matrix ");
1723 read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1724 fprintf(stderr, "\n");
1725 if (readmat[0].nx != readmat[0].ny)
1727 gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1728 readmat[0].nx, readmat[0].ny);
1730 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1732 gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1733 "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1736 nf = readmat[0].nx;
1737 sfree(time);
1738 time = readmat[0].axis_x;
1739 time_invfac = output_env_get_time_invfactor(oenv);
1740 for (i = 0; i < nf; i++)
1742 time[i] *= time_invfac;
1745 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1746 convert_mat(&(readmat[0]), rms);
1748 nlevels = readmat[0].nmap;
1750 else /* !bReadMat */
1752 rms = init_mat(nf, method == m_diagonalize);
1753 nrms = ((gmx_int64_t)nf*((gmx_int64_t)nf-1))/2;
1754 if (!bRMSdist)
1756 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1757 /* Initialize work array */
1758 snew(x1, isize);
1759 for (i1 = 0; i1 < nf; i1++)
1761 for (i2 = i1+1; i2 < nf; i2++)
1763 for (i = 0; i < isize; i++)
1765 copy_rvec(xx[i1][i], x1[i]);
1767 if (bFit)
1769 do_fit(isize, mass, xx[i2], x1);
1771 rmsd = rmsdev(isize, mass, xx[i2], x1);
1772 set_mat_entry(rms, i1, i2, rmsd);
1774 nrms -= (gmx_int64_t) (nf-i1-1);
1775 fprintf(stderr, "\r# RMSD calculations left: " "%"GMX_PRId64 " ", nrms);
1777 sfree(x1);
1779 else /* bRMSdist */
1781 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1783 /* Initiate work arrays */
1784 snew(d1, isize);
1785 snew(d2, isize);
1786 for (i = 0; (i < isize); i++)
1788 snew(d1[i], isize);
1789 snew(d2[i], isize);
1791 for (i1 = 0; i1 < nf; i1++)
1793 calc_dist(isize, xx[i1], d1);
1794 for (i2 = i1+1; (i2 < nf); i2++)
1796 calc_dist(isize, xx[i2], d2);
1797 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1799 nrms -= (nf-i1-1);
1800 fprintf(stderr, "\r# RMSD calculations left: " "%"GMX_PRId64 " ", nrms);
1802 /* Clean up work arrays */
1803 for (i = 0; (i < isize); i++)
1805 sfree(d1[i]);
1806 sfree(d2[i]);
1808 sfree(d1);
1809 sfree(d2);
1811 fprintf(stderr, "\n\n");
1813 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1814 rms->minrms, rms->maxrms);
1815 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1816 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1817 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1818 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1820 fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1821 "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1823 if (bAnalyze && (rmsmin < rms->minrms) )
1825 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1826 rmsmin, rms->minrms);
1828 if (bAnalyze && (rmsmin > rmsdcut) )
1830 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1831 rmsmin, rmsdcut);
1834 /* Plot the rmsd distribution */
1835 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1837 if (bBinary)
1839 for (i1 = 0; (i1 < nf); i1++)
1841 for (i2 = 0; (i2 < nf); i2++)
1843 if (rms->mat[i1][i2] < rmsdcut)
1845 rms->mat[i1][i2] = 0;
1847 else
1849 rms->mat[i1][i2] = 1;
1855 snew(clust.cl, nf);
1856 switch (method)
1858 case m_linkage:
1859 /* Now sort the matrix and write it out again */
1860 gather(rms, rmsdcut, &clust);
1861 break;
1862 case m_diagonalize:
1863 /* Do a diagonalization */
1864 snew(eigenvalues, nf);
1865 snew(eigenvectors, nf*nf);
1866 memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1867 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1868 sfree(eigenvectors);
1870 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1871 "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1872 for (i = 0; (i < nf); i++)
1874 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1876 gmx_ffclose(fp);
1877 break;
1878 case m_monte_carlo:
1879 orig = init_mat(rms->nn, FALSE);
1880 orig->nn = rms->nn;
1881 copy_t_mat(orig, rms);
1882 mc_optimize(log, rms, time, niter, nrandom, seed, kT,
1883 opt2fn_null("-conv", NFILE, fnm), oenv);
1884 break;
1885 case m_jarvis_patrick:
1886 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1887 break;
1888 case m_gromos:
1889 gromos(rms->nn, rms->mat, rmsdcut, &clust);
1890 break;
1891 default:
1892 gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1895 if (method == m_monte_carlo || method == m_diagonalize)
1897 fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
1898 mat_energy(rms));
1901 if (bAnalyze)
1903 if (minstruct > 1)
1905 ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1907 else
1909 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1911 init_t_atoms(&useatoms, isize, FALSE);
1912 snew(usextps, isize);
1913 useatoms.resinfo = top.atoms.resinfo;
1914 for (i = 0; i < isize; i++)
1916 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1917 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1918 useatoms.nres = max(useatoms.nres, useatoms.atom[i].resind+1);
1919 copy_rvec(xtps[index[i]], usextps[i]);
1921 useatoms.nr = isize;
1922 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time,
1923 ifsize, fitidx, iosize, outidx,
1924 bReadTraj ? trx_out_fn : NULL,
1925 opt2fn_null("-sz", NFILE, fnm),
1926 opt2fn_null("-tr", NFILE, fnm),
1927 opt2fn_null("-ntr", NFILE, fnm),
1928 opt2fn_null("-clid", NFILE, fnm),
1929 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1930 rlo_bot, rhi_bot, oenv);
1932 gmx_ffclose(log);
1934 if (bBinary && !bAnalyze)
1936 /* Make the clustering visible */
1937 for (i2 = 0; (i2 < nf); i2++)
1939 for (i1 = i2+1; (i1 < nf); i1++)
1941 if (rms->mat[i1][i2])
1943 rms->mat[i1][i2] = rms->maxrms;
1949 fp = opt2FILE("-o", NFILE, fnm, "w");
1950 fprintf(stderr, "Writing rms distance/clustering matrix ");
1951 if (bReadMat)
1953 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1954 readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1955 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1957 else
1959 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1960 sprintf(title, "RMS%sDeviation / Cluster Index",
1961 bRMSdist ? " Distance " : " ");
1962 if (minstruct > 1)
1964 write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,
1965 nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
1966 rlo_top, rhi_top, 0.0, (real) ncluster,
1967 &ncluster, TRUE, rlo_bot, rhi_bot);
1969 else
1971 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1972 nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
1973 rlo_top, rhi_top, &nlevels);
1976 fprintf(stderr, "\n");
1977 gmx_ffclose(fp);
1978 if (NULL != orig)
1980 fp = opt2FILE("-om", NFILE, fnm, "w");
1981 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1982 sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");
1983 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1984 nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
1985 rlo_top, rhi_top, &nlevels);
1986 gmx_ffclose(fp);
1987 done_mat(&orig);
1988 sfree(orig);
1990 /* now show what we've done */
1991 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
1992 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
1993 if (method == m_diagonalize)
1995 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
1997 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
1998 if (bAnalyze)
2000 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
2001 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
2002 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
2004 do_view(oenv, opt2fn_null("-conv", NFILE, fnm), NULL);
2006 return 0;