Tidy: modernize-use-nullptr
[gromacs.git] / src / gromacs / gmxana / gmx_cluster.cpp
blob6dd4946104fce0724d2ef6d0d171f06eb68f78d0
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <cmath>
40 #include <cstdlib>
41 #include <cstring>
43 #include <algorithm>
45 #include "gromacs/commandline/pargs.h"
46 #include "gromacs/commandline/viewit.h"
47 #include "gromacs/fileio/confio.h"
48 #include "gromacs/fileio/matio.h"
49 #include "gromacs/fileio/trxio.h"
50 #include "gromacs/fileio/xvgr.h"
51 #include "gromacs/gmxana/cmat.h"
52 #include "gromacs/gmxana/gmx_ana.h"
53 #include "gromacs/linearalgebra/eigensolver.h"
54 #include "gromacs/math/do_fit.h"
55 #include "gromacs/math/vec.h"
56 #include "gromacs/pbcutil/pbc.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/random/threefry.h"
59 #include "gromacs/random/uniformintdistribution.h"
60 #include "gromacs/random/uniformrealdistribution.h"
61 #include "gromacs/topology/index.h"
62 #include "gromacs/topology/topology.h"
63 #include "gromacs/utility/arraysize.h"
64 #include "gromacs/utility/cstringutil.h"
65 #include "gromacs/utility/fatalerror.h"
66 #include "gromacs/utility/futil.h"
67 #include "gromacs/utility/smalloc.h"
69 /* print to two file pointers at once (i.e. stderr and log) */
70 static gmx_inline
71 void lo_ffprintf(FILE *fp1, FILE *fp2, const char *buf)
73 fprintf(fp1, "%s", buf);
74 fprintf(fp2, "%s", buf);
77 /* just print a prepared buffer to fp1 and fp2 */
78 static gmx_inline
79 void ffprintf(FILE *fp1, FILE *fp2, const char *buf)
81 lo_ffprintf(fp1, fp2, buf);
84 /* prepare buffer with one argument, then print to fp1 and fp2 */
85 static gmx_inline
86 void ffprintf_d(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg)
88 sprintf(buf, fmt, arg);
89 lo_ffprintf(fp1, fp2, buf);
92 /* prepare buffer with one argument, then print to fp1 and fp2 */
93 static gmx_inline
94 void ffprintf_g(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg)
96 sprintf(buf, fmt, arg);
97 lo_ffprintf(fp1, fp2, buf);
100 /* prepare buffer with one argument, then print to fp1 and fp2 */
101 static gmx_inline
102 void ffprintf_s(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg)
104 sprintf(buf, fmt, arg);
105 lo_ffprintf(fp1, fp2, buf);
108 /* prepare buffer with two arguments, then print to fp1 and fp2 */
109 static gmx_inline
110 void ffprintf_dd(FILE *fp1, FILE *fp2, char *buf, const char *fmt, int arg1, int arg2)
112 sprintf(buf, fmt, arg1, arg2);
113 lo_ffprintf(fp1, fp2, buf);
116 /* prepare buffer with two arguments, then print to fp1 and fp2 */
117 static gmx_inline
118 void ffprintf_gg(FILE *fp1, FILE *fp2, char *buf, const char *fmt, real arg1, real arg2)
120 sprintf(buf, fmt, arg1, arg2);
121 lo_ffprintf(fp1, fp2, buf);
124 /* prepare buffer with two arguments, then print to fp1 and fp2 */
125 static gmx_inline
126 void ffprintf_ss(FILE *fp1, FILE *fp2, char *buf, const char *fmt, const char *arg1, const char *arg2)
128 sprintf(buf, fmt, arg1, arg2);
129 lo_ffprintf(fp1, fp2, buf);
132 typedef struct {
133 int ncl;
134 int *cl;
135 } t_clusters;
137 typedef struct {
138 int nr;
139 int *nb;
140 } t_nnb;
142 void cp_index(int nn, int from[], int to[])
144 int i;
146 for (i = 0; (i < nn); i++)
148 to[i] = from[i];
152 void mc_optimize(FILE *log, t_mat *m, real *time,
153 int maxiter, int nrandom,
154 int seed, real kT,
155 const char *conv, gmx_output_env_t *oenv)
157 FILE *fp = nullptr;
158 real ecur, enext, emin, prob, enorm;
159 int i, j, iswap, jswap, nn, nuphill = 0;
160 t_mat *minimum;
162 if (seed == 0)
164 seed = static_cast<int>(gmx::makeRandomSeed());
166 gmx::DefaultRandomEngine rng(seed);
168 if (m->n1 != m->nn)
170 fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
171 return;
173 printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
174 printf("by reordering the frames to minimize the path between the two structures\n");
175 printf("that have the largest pairwise RMSD.\n");
176 printf("Using random seed %d.\n", seed);
178 iswap = jswap = -1;
179 enorm = m->mat[0][0];
180 for (i = 0; (i < m->n1); i++)
182 for (j = 0; (j < m->nn); j++)
184 if (m->mat[i][j] > enorm)
186 enorm = m->mat[i][j];
187 iswap = i;
188 jswap = j;
192 if ((iswap == -1) || (jswap == -1))
194 fprintf(stderr, "Matrix contains identical values in all fields\n");
195 return;
197 swap_rows(m, 0, iswap);
198 swap_rows(m, m->n1-1, jswap);
199 emin = ecur = mat_energy(m);
200 printf("Largest distance %g between %d and %d. Energy: %g.\n",
201 enorm, iswap, jswap, emin);
203 nn = m->nn;
205 /* Initiate and store global minimum */
206 minimum = init_mat(nn, m->b1D);
207 minimum->nn = nn;
208 copy_t_mat(minimum, m);
210 if (nullptr != conv)
212 fp = xvgropen(conv, "Convergence of the MC optimization",
213 "Energy", "Step", oenv);
216 gmx::UniformIntDistribution<int> intDistNN(1, nn-2); // [1,nn-2]
217 gmx::UniformRealDistribution<real> realDistOne; // [0,1)
219 for (i = 0; (i < maxiter); i++)
221 /* Generate new swapping candidates */
224 iswap = intDistNN(rng);
225 jswap = intDistNN(rng);
227 while ((iswap == jswap) || (iswap >= nn-1) || (jswap >= nn-1));
229 /* Apply swap and compute energy */
230 swap_rows(m, iswap, jswap);
231 enext = mat_energy(m);
233 /* Compute probability */
234 prob = 0;
235 if ((enext < ecur) || (i < nrandom))
237 prob = 1;
238 if (enext < emin)
240 /* Store global minimum */
241 copy_t_mat(minimum, m);
242 emin = enext;
245 else if (kT > 0)
247 /* Try Monte Carlo step */
248 prob = std::exp(-(enext-ecur)/(enorm*kT));
251 if (prob == 1 || realDistOne(rng) < prob)
253 if (enext > ecur)
255 nuphill++;
258 fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n",
259 i, iswap, jswap, enext, prob);
260 if (nullptr != fp)
262 fprintf(fp, "%6d %10g\n", i, enext);
264 ecur = enext;
266 else
268 swap_rows(m, jswap, iswap);
271 fprintf(log, "%d uphill steps were taken during optimization\n",
272 nuphill);
274 /* Now swap the matrix to get it into global minimum mode */
275 copy_t_mat(m, minimum);
277 fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
278 fprintf(log, "Global minimum energy %g\n", mat_energy(m));
279 fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
280 for (i = 0; (i < m->nn); i++)
282 fprintf(log, "%10g %5d %10g\n",
283 time[m->m_ind[i]],
284 m->m_ind[i],
285 (i < m->nn-1) ? m->mat[m->m_ind[i]][m->m_ind[i+1]] : 0);
288 if (nullptr != fp)
290 xvgrclose(fp);
294 static void calc_dist(int nind, rvec x[], real **d)
296 int i, j;
297 real *xi;
298 rvec dx;
300 for (i = 0; (i < nind-1); i++)
302 xi = x[i];
303 for (j = i+1; (j < nind); j++)
305 /* Should use pbc_dx when analysing multiple molecueles,
306 * but the box is not stored for every frame.
308 rvec_sub(xi, x[j], dx);
309 d[i][j] = norm(dx);
314 static real rms_dist(int isize, real **d, real **d_r)
316 int i, j;
317 real r, r2;
319 r2 = 0.0;
320 for (i = 0; (i < isize-1); i++)
322 for (j = i+1; (j < isize); j++)
324 r = d[i][j]-d_r[i][j];
325 r2 += r*r;
328 r2 /= (isize*(isize-1))/2;
330 return std::sqrt(r2);
333 static bool rms_dist_comp(const t_dist &a, const t_dist &b)
335 return a.dist < b.dist;
338 static bool clust_id_comp(const t_clustid &a, const t_clustid &b)
340 return a.clust < b.clust;
343 static bool nrnb_comp(const t_nnb &a, const t_nnb &b)
345 /* return b<a, we want highest first */
346 return b.nr < a.nr;
349 void gather(t_mat *m, real cutoff, t_clusters *clust)
351 t_clustid *c;
352 t_dist *d;
353 int i, j, k, nn, cid, n1, diff;
354 gmx_bool bChange;
356 /* First we sort the entries in the RMSD matrix */
357 n1 = m->nn;
358 nn = ((n1-1)*n1)/2;
359 snew(d, nn);
360 for (i = k = 0; (i < n1); i++)
362 for (j = i+1; (j < n1); j++, k++)
364 d[k].i = i;
365 d[k].j = j;
366 d[k].dist = m->mat[i][j];
369 if (k != nn)
371 gmx_incons("gather algortihm");
373 std::sort(d, d+nn, rms_dist_comp);
375 /* Now we make a cluster index for all of the conformations */
376 c = new_clustid(n1);
378 /* Now we check the closest structures, and equalize their cluster numbers */
379 fprintf(stderr, "Linking structures ");
382 fprintf(stderr, "*");
383 bChange = FALSE;
384 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
386 diff = c[d[k].j].clust - c[d[k].i].clust;
387 if (diff)
389 bChange = TRUE;
390 if (diff > 0)
392 c[d[k].j].clust = c[d[k].i].clust;
394 else
396 c[d[k].i].clust = c[d[k].j].clust;
401 while (bChange);
402 fprintf(stderr, "\nSorting and renumbering clusters\n");
403 /* Sort on cluster number */
404 std::sort(c, c+n1, clust_id_comp);
406 /* Renumber clusters */
407 cid = 1;
408 for (k = 1; k < n1; k++)
410 if (c[k].clust != c[k-1].clust)
412 c[k-1].clust = cid;
413 cid++;
415 else
417 c[k-1].clust = cid;
420 c[k-1].clust = cid;
421 if (debug)
423 for (k = 0; (k < n1); k++)
425 fprintf(debug, "Cluster index for conformation %d: %d\n",
426 c[k].conf, c[k].clust);
429 clust->ncl = cid;
430 for (k = 0; k < n1; k++)
432 clust->cl[c[k].conf] = c[k].clust;
435 sfree(c);
436 sfree(d);
439 gmx_bool jp_same(int **nnb, int i, int j, int P)
441 gmx_bool bIn;
442 int k, ii, jj, pp;
444 bIn = FALSE;
445 for (k = 0; nnb[i][k] >= 0; k++)
447 bIn = bIn || (nnb[i][k] == j);
449 if (!bIn)
451 return FALSE;
454 bIn = FALSE;
455 for (k = 0; nnb[j][k] >= 0; k++)
457 bIn = bIn || (nnb[j][k] == i);
459 if (!bIn)
461 return FALSE;
464 pp = 0;
465 for (ii = 0; nnb[i][ii] >= 0; ii++)
467 for (jj = 0; nnb[j][jj] >= 0; jj++)
469 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
471 pp++;
476 return (pp >= P);
479 static void jarvis_patrick(int n1, real **mat, int M, int P,
480 real rmsdcut, t_clusters *clust)
482 t_dist *row;
483 t_clustid *c;
484 int **nnb;
485 int i, j, k, cid, diff, maxval;
486 gmx_bool bChange;
487 real **mcpy = nullptr;
489 if (rmsdcut < 0)
491 rmsdcut = 10000;
494 /* First we sort the entries in the RMSD matrix row by row.
495 * This gives us the nearest neighbor list.
497 snew(nnb, n1);
498 snew(row, n1);
499 for (i = 0; (i < n1); i++)
501 for (j = 0; (j < n1); j++)
503 row[j].j = j;
504 row[j].dist = mat[i][j];
506 std::sort(row, row+n1, rms_dist_comp);
507 if (M > 0)
509 /* Put the M nearest neighbors in the list */
510 snew(nnb[i], M+1);
511 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
513 if (row[j].j != i)
515 nnb[i][k] = row[j].j;
516 k++;
519 nnb[i][k] = -1;
521 else
523 /* Put all neighbors nearer than rmsdcut in the list */
524 maxval = 0;
525 k = 0;
526 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
528 if (row[j].j != i)
530 if (k >= maxval)
532 maxval += 10;
533 srenew(nnb[i], maxval);
535 nnb[i][k] = row[j].j;
536 k++;
539 if (k == maxval)
541 srenew(nnb[i], maxval+1);
543 nnb[i][k] = -1;
546 sfree(row);
547 if (debug)
549 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
550 for (i = 0; (i < n1); i++)
552 fprintf(debug, "i:%5d nbs:", i);
553 for (j = 0; nnb[i][j] >= 0; j++)
555 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
557 fprintf(debug, "\n");
561 c = new_clustid(n1);
562 fprintf(stderr, "Linking structures ");
563 /* Use mcpy for temporary storage of booleans */
564 mcpy = mk_matrix(n1, n1, FALSE);
565 for (i = 0; i < n1; i++)
567 for (j = i+1; j < n1; j++)
569 mcpy[i][j] = jp_same(nnb, i, j, P);
574 fprintf(stderr, "*");
575 bChange = FALSE;
576 for (i = 0; i < n1; i++)
578 for (j = i+1; j < n1; j++)
580 if (mcpy[i][j])
582 diff = c[j].clust - c[i].clust;
583 if (diff)
585 bChange = TRUE;
586 if (diff > 0)
588 c[j].clust = c[i].clust;
590 else
592 c[i].clust = c[j].clust;
599 while (bChange);
601 fprintf(stderr, "\nSorting and renumbering clusters\n");
602 /* Sort on cluster number */
603 std::sort(c, c+n1, clust_id_comp);
605 /* Renumber clusters */
606 cid = 1;
607 for (k = 1; k < n1; k++)
609 if (c[k].clust != c[k-1].clust)
611 c[k-1].clust = cid;
612 cid++;
614 else
616 c[k-1].clust = cid;
619 c[k-1].clust = cid;
620 clust->ncl = cid;
621 for (k = 0; k < n1; k++)
623 clust->cl[c[k].conf] = c[k].clust;
625 if (debug)
627 for (k = 0; (k < n1); k++)
629 fprintf(debug, "Cluster index for conformation %d: %d\n",
630 c[k].conf, c[k].clust);
634 done_matrix(n1, &mcpy);
636 sfree(c);
637 for (i = 0; (i < n1); i++)
639 sfree(nnb[i]);
641 sfree(nnb);
644 static void dump_nnb (FILE *fp, const char *title, int n1, t_nnb *nnb)
646 int i, j;
648 /* dump neighbor list */
649 fprintf(fp, "%s", title);
650 for (i = 0; (i < n1); i++)
652 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
653 for (j = 0; j < nnb[i].nr; j++)
655 fprintf(fp, "%5d", nnb[i].nb[j]);
657 fprintf(fp, "\n");
661 static void gromos(int n1, real **mat, real rmsdcut, t_clusters *clust)
663 t_dist *row;
664 t_nnb *nnb;
665 int i, j, k, j1, maxval;
667 /* Put all neighbors nearer than rmsdcut in the list */
668 fprintf(stderr, "Making list of neighbors within cutoff ");
669 snew(nnb, n1);
670 snew(row, n1);
671 for (i = 0; (i < n1); i++)
673 maxval = 0;
674 k = 0;
675 /* put all neighbors within cut-off in list */
676 for (j = 0; j < n1; j++)
678 if (mat[i][j] < rmsdcut)
680 if (k >= maxval)
682 maxval += 10;
683 srenew(nnb[i].nb, maxval);
685 nnb[i].nb[k] = j;
686 k++;
689 /* store nr of neighbors, we'll need that */
690 nnb[i].nr = k;
691 if (i%(1+n1/100) == 0)
693 fprintf(stderr, "%3d%%\b\b\b\b", (i*100+1)/n1);
696 fprintf(stderr, "%3d%%\n", 100);
697 sfree(row);
699 /* sort neighbor list on number of neighbors, largest first */
700 std::sort(nnb, nnb+n1, nrnb_comp);
702 if (debug)
704 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
707 /* turn first structure with all its neighbors (largest) into cluster
708 remove them from pool of structures and repeat for all remaining */
709 fprintf(stderr, "Finding clusters %4d", 0);
710 /* cluster id's start at 1: */
711 k = 1;
712 while (nnb[0].nr)
714 /* set cluster id (k) for first item in neighborlist */
715 for (j = 0; j < nnb[0].nr; j++)
717 clust->cl[nnb[0].nb[j]] = k;
719 /* mark as done */
720 nnb[0].nr = 0;
721 sfree(nnb[0].nb);
723 /* adjust number of neighbors for others, taking removals into account: */
724 for (i = 1; i < n1 && nnb[i].nr; i++)
726 j1 = 0;
727 for (j = 0; j < nnb[i].nr; j++)
729 /* if this neighbor wasn't removed */
730 if (clust->cl[nnb[i].nb[j]] == 0)
732 /* shift the rest (j1<=j) */
733 nnb[i].nb[j1] = nnb[i].nb[j];
734 /* next */
735 j1++;
738 /* now j1 is the new number of neighbors */
739 nnb[i].nr = j1;
741 /* sort again on nnb[].nr, because we have new # neighbors: */
742 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
743 std::sort(nnb, nnb+i, nrnb_comp);
745 fprintf(stderr, "\b\b\b\b%4d", k);
746 /* new cluster id */
747 k++;
749 fprintf(stderr, "\n");
750 sfree(nnb);
751 if (debug)
753 fprintf(debug, "Clusters (%d):\n", k);
754 for (i = 0; i < n1; i++)
756 fprintf(debug, " %3d", clust->cl[i]);
758 fprintf(debug, "\n");
761 clust->ncl = k-1;
764 rvec **read_whole_trj(const char *fn, int isize, int index[], int skip,
765 int *nframe, real **time, const gmx_output_env_t *oenv, gmx_bool bPBC, gmx_rmpbc_t gpbc)
767 rvec **xx, *x;
768 matrix box;
769 real t;
770 int i, i0, j, max_nf;
771 int natom;
772 t_trxstatus *status;
775 max_nf = 0;
776 xx = nullptr;
777 *time = nullptr;
778 natom = read_first_x(oenv, &status, fn, &t, &x, box);
779 i = 0;
780 i0 = 0;
783 if (bPBC)
785 gmx_rmpbc(gpbc, natom, box, x);
787 if (i0 >= max_nf)
789 max_nf += 10;
790 srenew(xx, max_nf);
791 srenew(*time, max_nf);
793 if ((i % skip) == 0)
795 snew(xx[i0], isize);
796 /* Store only the interesting atoms */
797 for (j = 0; (j < isize); j++)
799 copy_rvec(x[index[j]], xx[i0][j]);
801 (*time)[i0] = t;
802 i0++;
804 i++;
806 while (read_next_x(oenv, status, &t, x, box));
807 fprintf(stderr, "Allocated %lu bytes for frames\n",
808 (unsigned long) (max_nf*isize*sizeof(**xx)));
809 fprintf(stderr, "Read %d frames from trajectory %s\n", i0, fn);
810 *nframe = i0;
811 sfree(x);
813 return xx;
816 static int plot_clusters(int nf, real **mat, t_clusters *clust,
817 int minstruct)
819 int i, j, ncluster, ci;
820 int *cl_id, *nstruct, *strind;
822 snew(cl_id, nf);
823 snew(nstruct, nf);
824 snew(strind, nf);
825 for (i = 0; i < nf; i++)
827 strind[i] = 0;
828 cl_id[i] = clust->cl[i];
829 nstruct[cl_id[i]]++;
831 ncluster = 0;
832 for (i = 0; i < nf; i++)
834 if (nstruct[i] >= minstruct)
836 ncluster++;
837 for (j = 0; (j < nf); j++)
839 if (cl_id[j] == i)
841 strind[j] = ncluster;
846 ncluster++;
847 fprintf(stderr, "There are %d clusters with at least %d conformations\n",
848 ncluster, minstruct);
850 for (i = 0; (i < nf); i++)
852 ci = cl_id[i];
853 for (j = 0; j < i; j++)
855 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
857 /* color different clusters with different colors, as long as
858 we don't run out of colors */
859 mat[i][j] = strind[i];
861 else
863 mat[i][j] = 0;
867 sfree(strind);
868 sfree(nstruct);
869 sfree(cl_id);
871 return ncluster;
874 static void mark_clusters(int nf, real **mat, real val, t_clusters *clust)
876 int i, j;
878 for (i = 0; i < nf; i++)
880 for (j = 0; j < i; j++)
882 if (clust->cl[i] == clust->cl[j])
884 mat[i][j] = val;
886 else
888 mat[i][j] = 0;
894 static char *parse_filename(const char *fn, int maxnr)
896 int i;
897 char *fnout;
898 const char *ext;
899 char buf[STRLEN];
901 if (std::strchr(fn, '%'))
903 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
905 /* number of digits needed in numbering */
906 i = static_cast<int>((std::log(static_cast<real>(maxnr))/std::log(10.0)) + 1);
907 /* split fn and ext */
908 ext = std::strrchr(fn, '.');
909 if (!ext)
911 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
913 ext++;
914 /* insert e.g. '%03d' between fn and ext */
915 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
916 snew(fnout, std::strlen(buf)+1);
917 std::strcpy(fnout, buf);
919 return fnout;
922 static void ana_trans(t_clusters *clust, int nf,
923 const char *transfn, const char *ntransfn, FILE *log,
924 t_rgb rlo, t_rgb rhi, const gmx_output_env_t *oenv)
926 FILE *fp;
927 real **trans, *axis;
928 int *ntrans;
929 int i, ntranst, maxtrans;
930 char buf[STRLEN];
932 snew(ntrans, clust->ncl);
933 snew(trans, clust->ncl);
934 snew(axis, clust->ncl);
935 for (i = 0; i < clust->ncl; i++)
937 axis[i] = i+1;
938 snew(trans[i], clust->ncl);
940 ntranst = 0;
941 maxtrans = 0;
942 for (i = 1; i < nf; i++)
944 if (clust->cl[i] != clust->cl[i-1])
946 ntranst++;
947 ntrans[clust->cl[i-1]-1]++;
948 ntrans[clust->cl[i]-1]++;
949 trans[clust->cl[i-1]-1][clust->cl[i]-1]++;
950 maxtrans = static_cast<int>(std::max(static_cast<real>(maxtrans), trans[clust->cl[i]-1][clust->cl[i-1]-1]));
953 ffprintf_dd(stderr, log, buf, "Counted %d transitions in total, "
954 "max %d between two specific clusters\n", ntranst, maxtrans);
955 if (transfn)
957 fp = gmx_ffopen(transfn, "w");
958 i = std::min(maxtrans+1, 80);
959 write_xpm(fp, 0, "Cluster Transitions", "# transitions",
960 "from cluster", "to cluster",
961 clust->ncl, clust->ncl, axis, axis, trans,
962 0, maxtrans, rlo, rhi, &i);
963 gmx_ffclose(fp);
965 if (ntransfn)
967 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions",
968 oenv);
969 for (i = 0; i < clust->ncl; i++)
971 fprintf(fp, "%5d %5d\n", i+1, ntrans[i]);
973 xvgrclose(fp);
975 sfree(ntrans);
976 for (i = 0; i < clust->ncl; i++)
978 sfree(trans[i]);
980 sfree(trans);
981 sfree(axis);
984 static void analyze_clusters(int nf, t_clusters *clust, real **rmsd,
985 int natom, t_atoms *atoms, rvec *xtps,
986 real *mass, rvec **xx, real *time,
987 int ifsize, int *fitidx,
988 int iosize, int *outidx,
989 const char *trxfn, const char *sizefn,
990 const char *transfn, const char *ntransfn,
991 const char *clustidfn, gmx_bool bAverage,
992 int write_ncl, int write_nst, real rmsmin,
993 gmx_bool bFit, FILE *log, t_rgb rlo, t_rgb rhi,
994 const gmx_output_env_t *oenv)
996 FILE *size_fp = nullptr;
997 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
998 t_trxstatus *trxout = nullptr;
999 t_trxstatus *trxsout = nullptr;
1000 int i, i1, cl, nstr, *structure, first = 0, midstr;
1001 gmx_bool *bWrite = nullptr;
1002 real r, clrmsd, midrmsd;
1003 rvec *xav = nullptr;
1004 matrix zerobox;
1006 clear_mat(zerobox);
1008 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1009 trxsfn = nullptr;
1010 if (trxfn)
1012 /* do we write all structures? */
1013 if (write_ncl)
1015 trxsfn = parse_filename(trxfn, std::max(write_ncl, clust->ncl));
1016 snew(bWrite, nf);
1018 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1019 bAverage ? "average" : "middle", trxfn);
1020 if (write_ncl)
1022 /* find out what we want to tell the user:
1023 Writing [all structures|structures with rmsd > %g] for
1024 {all|first %d} clusters {with more than %d structures} to %s */
1025 if (rmsmin > 0.0)
1027 sprintf(buf1, "structures with rmsd > %g", rmsmin);
1029 else
1031 sprintf(buf1, "all structures");
1033 buf2[0] = buf3[0] = '\0';
1034 if (write_ncl >= clust->ncl)
1036 if (write_nst == 0)
1038 sprintf(buf2, "all ");
1041 else
1043 sprintf(buf2, "the first %d ", write_ncl);
1045 if (write_nst)
1047 sprintf(buf3, " with more than %d structures", write_nst);
1049 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1050 ffprintf(stderr, log, buf);
1053 /* Prepare a reference structure for the orientation of the clusters */
1054 if (bFit)
1056 reset_x(ifsize, fitidx, natom, nullptr, xtps, mass);
1058 trxout = open_trx(trxfn, "w");
1059 /* Calculate the average structure in each cluster, *
1060 * all structures are fitted to the first struture of the cluster */
1061 snew(xav, natom);
1064 if (transfn || ntransfn)
1066 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1069 if (clustidfn)
1071 FILE *fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1072 if (output_env_get_print_xvgr_codes(oenv))
1074 fprintf(fp, "@ s0 symbol 2\n");
1075 fprintf(fp, "@ s0 symbol size 0.2\n");
1076 fprintf(fp, "@ s0 linestyle 0\n");
1078 for (i = 0; i < nf; i++)
1080 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1082 xvgrclose(fp);
1084 if (sizefn)
1086 size_fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1087 if (output_env_get_print_xvgr_codes(oenv))
1089 fprintf(size_fp, "@g%d type %s\n", 0, "bar");
1092 snew(structure, nf);
1093 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n",
1094 "cl.", "#st", "rmsd", "middle", "rmsd");
1095 for (cl = 1; cl <= clust->ncl; cl++)
1097 /* prepare structures (fit, middle, average) */
1098 if (xav)
1100 for (i = 0; i < natom; i++)
1102 clear_rvec(xav[i]);
1105 nstr = 0;
1106 for (i1 = 0; i1 < nf; i1++)
1108 if (clust->cl[i1] == cl)
1110 structure[nstr] = i1;
1111 nstr++;
1112 if (trxfn && (bAverage || write_ncl) )
1114 if (bFit)
1116 reset_x(ifsize, fitidx, natom, nullptr, xx[i1], mass);
1118 if (nstr == 1)
1120 first = i1;
1122 else if (bFit)
1124 do_fit(natom, mass, xx[first], xx[i1]);
1126 if (xav)
1128 for (i = 0; i < natom; i++)
1130 rvec_inc(xav[i], xx[i1][i]);
1136 if (sizefn)
1138 fprintf(size_fp, "%8d %8d\n", cl, nstr);
1140 clrmsd = 0;
1141 midstr = 0;
1142 midrmsd = 10000;
1143 for (i1 = 0; i1 < nstr; i1++)
1145 r = 0;
1146 if (nstr > 1)
1148 for (i = 0; i < nstr; i++)
1150 if (i < i1)
1152 r += rmsd[structure[i]][structure[i1]];
1154 else
1156 r += rmsd[structure[i1]][structure[i]];
1159 r /= (nstr - 1);
1161 if (r < midrmsd)
1163 midstr = structure[i1];
1164 midrmsd = r;
1166 clrmsd += r;
1168 clrmsd /= nstr;
1170 /* dump cluster info to logfile */
1171 if (nstr > 1)
1173 sprintf(buf1, "%6.3f", clrmsd);
1174 if (buf1[0] == '0')
1176 buf1[0] = ' ';
1178 sprintf(buf2, "%5.3f", midrmsd);
1179 if (buf2[0] == '0')
1181 buf2[0] = ' ';
1184 else
1186 sprintf(buf1, "%5s", "");
1187 sprintf(buf2, "%5s", "");
1189 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1190 for (i = 0; i < nstr; i++)
1192 if ((i % 7 == 0) && i)
1194 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1196 else
1198 buf[0] = '\0';
1200 i1 = structure[i];
1201 fprintf(log, "%s %6g", buf, time[i1]);
1203 fprintf(log, "\n");
1205 /* write structures to trajectory file(s) */
1206 if (trxfn)
1208 if (write_ncl)
1210 for (i = 0; i < nstr; i++)
1212 bWrite[i] = FALSE;
1215 if (cl < write_ncl+1 && nstr > write_nst)
1217 /* Dump all structures for this cluster */
1218 /* generate numbered filename (there is a %d in trxfn!) */
1219 sprintf(buf, trxsfn, cl);
1220 trxsout = open_trx(buf, "w");
1221 for (i = 0; i < nstr; i++)
1223 bWrite[i] = TRUE;
1224 if (rmsmin > 0.0)
1226 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1228 if (bWrite[i1])
1230 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1234 if (bWrite[i])
1236 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]], zerobox,
1237 xx[structure[i]], nullptr, nullptr);
1240 close_trx(trxsout);
1242 /* Dump the average structure for this cluster */
1243 if (bAverage)
1245 for (i = 0; i < natom; i++)
1247 svmul(1.0/nstr, xav[i], xav[i]);
1250 else
1252 for (i = 0; i < natom; i++)
1254 copy_rvec(xx[midstr][i], xav[i]);
1256 if (bFit)
1258 reset_x(ifsize, fitidx, natom, nullptr, xav, mass);
1261 if (bFit)
1263 do_fit(natom, mass, xtps, xav);
1265 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], zerobox, xav, nullptr, nullptr);
1268 /* clean up */
1269 if (trxfn)
1271 close_trx(trxout);
1272 sfree(xav);
1273 if (write_ncl)
1275 sfree(bWrite);
1278 sfree(structure);
1279 if (trxsfn)
1281 sfree(trxsfn);
1284 if (size_fp)
1286 xvgrclose(size_fp);
1290 static void convert_mat(t_matrix *mat, t_mat *rms)
1292 int i, j;
1294 rms->n1 = mat->nx;
1295 matrix2real(mat, rms->mat);
1296 /* free input xpm matrix data */
1297 for (i = 0; i < mat->nx; i++)
1299 sfree(mat->matrix[i]);
1301 sfree(mat->matrix);
1303 for (i = 0; i < mat->nx; i++)
1305 for (j = i; j < mat->nx; j++)
1307 rms->sumrms += rms->mat[i][j];
1308 rms->maxrms = std::max(rms->maxrms, rms->mat[i][j]);
1309 if (j != i)
1311 rms->minrms = std::min(rms->minrms, rms->mat[i][j]);
1315 rms->nn = mat->nx;
1318 int gmx_cluster(int argc, char *argv[])
1320 const char *desc[] = {
1321 "[THISMODULE] can cluster structures using several different methods.",
1322 "Distances between structures can be determined from a trajectory",
1323 "or read from an [REF].xpm[ref] matrix file with the [TT]-dm[tt] option.",
1324 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1325 "can be used to define the distance between structures.[PAR]",
1327 "single linkage: add a structure to a cluster when its distance to any",
1328 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1330 "Jarvis Patrick: add a structure to a cluster when this structure",
1331 "and a structure in the cluster have each other as neighbors and",
1332 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1333 "of a structure are the M closest structures or all structures within",
1334 "[TT]cutoff[tt].[PAR]",
1336 "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1337 "the order of the frames is using the smallest possible increments.",
1338 "With this it is possible to make a smooth animation going from one",
1339 "structure to another with the largest possible (e.g.) RMSD between",
1340 "them, however the intermediate steps should be as small as possible.",
1341 "Applications could be to visualize a potential of mean force",
1342 "ensemble of simulations or a pulling simulation. Obviously the user",
1343 "has to prepare the trajectory well (e.g. by not superimposing frames).",
1344 "The final result can be inspect visually by looking at the matrix",
1345 "[REF].xpm[ref] file, which should vary smoothly from bottom to top.[PAR]",
1347 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1349 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1350 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1351 "Count number of neighbors using cut-off, take structure with",
1352 "largest number of neighbors with all its neighbors as cluster",
1353 "and eliminate it from the pool of clusters. Repeat for remaining",
1354 "structures in pool.[PAR]",
1356 "When the clustering algorithm assigns each structure to exactly one",
1357 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1358 "file is supplied, the structure with",
1359 "the smallest average distance to the others or the average structure",
1360 "or all structures for each cluster will be written to a trajectory",
1361 "file. When writing all structures, separate numbered files are made",
1362 "for each cluster.[PAR]",
1364 "Two output files are always written:",
1366 " * [TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1367 " and a graphical depiction of the clusters in the lower right half",
1368 " When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1369 " when two structures are in the same cluster.",
1370 " When [TT]-minstruct[tt] > 1 different colors will be used for each",
1371 " cluster.",
1372 " * [TT]-g[tt] writes information on the options used and a detailed list",
1373 " of all clusters and their members.",
1376 "Additionally, a number of optional output files can be written:",
1378 " * [TT]-dist[tt] writes the RMSD distribution.",
1379 " * [TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1380 " diagonalization.",
1381 " * [TT]-sz[tt] writes the cluster sizes.",
1382 " * [TT]-tr[tt] writes a matrix of the number transitions between",
1383 " cluster pairs.",
1384 " * [TT]-ntr[tt] writes the total number of transitions to or from",
1385 " each cluster.",
1386 " * [TT]-clid[tt] writes the cluster number as a function of time.",
1387 " * [TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1388 " structure of each cluster or writes numbered files with cluster members",
1389 " for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1390 " [TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1391 " structure with the smallest average RMSD from all other structures",
1392 " of the cluster.",
1395 FILE *fp, *log;
1396 int nf, i, i1, i2, j;
1397 gmx_int64_t nrms = 0;
1399 matrix box;
1400 rvec *xtps, *usextps, *x1, **xx = nullptr;
1401 const char *fn, *trx_out_fn;
1402 t_clusters clust;
1403 t_mat *rms, *orig = nullptr;
1404 real *eigenvalues;
1405 t_topology top;
1406 int ePBC;
1407 t_atoms useatoms;
1408 t_matrix *readmat = nullptr;
1409 real *eigenvectors;
1411 int isize = 0, ifsize = 0, iosize = 0;
1412 int *index = nullptr, *fitidx = nullptr, *outidx = nullptr;
1413 char *grpname;
1414 real rmsd, **d1, **d2, *time = nullptr, time_invfac, *mass = nullptr;
1415 char buf[STRLEN], buf1[80], title[STRLEN];
1416 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1418 int method, ncluster = 0;
1419 static const char *methodname[] = {
1420 nullptr, "linkage", "jarvis-patrick", "monte-carlo",
1421 "diagonalization", "gromos", nullptr
1423 enum {
1424 m_null, m_linkage, m_jarvis_patrick,
1425 m_monte_carlo, m_diagonalize, m_gromos, m_nr
1427 /* Set colors for plotting: white = zero RMS, black = maximum */
1428 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1429 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1430 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1431 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1432 static int nlevels = 40, skip = 1;
1433 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1434 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1435 static int niter = 10000, nrandom = 0, seed = 0, write_ncl = 0, write_nst = 1, minstruct = 1;
1436 static real kT = 1e-3;
1437 static int M = 10, P = 3;
1438 gmx_output_env_t *oenv;
1439 gmx_rmpbc_t gpbc = nullptr;
1441 t_pargs pa[] = {
1442 { "-dista", FALSE, etBOOL, {&bRMSdist},
1443 "Use RMSD of distances instead of RMS deviation" },
1444 { "-nlevels", FALSE, etINT, {&nlevels},
1445 "Discretize RMSD matrix in this number of levels" },
1446 { "-cutoff", FALSE, etREAL, {&rmsdcut},
1447 "RMSD cut-off (nm) for two structures to be neighbor" },
1448 { "-fit", FALSE, etBOOL, {&bFit},
1449 "Use least squares fitting before RMSD calculation" },
1450 { "-max", FALSE, etREAL, {&scalemax},
1451 "Maximum level in RMSD matrix" },
1452 { "-skip", FALSE, etINT, {&skip},
1453 "Only analyze every nr-th frame" },
1454 { "-av", FALSE, etBOOL, {&bAverage},
1455 "Write average iso middle structure for each cluster" },
1456 { "-wcl", FALSE, etINT, {&write_ncl},
1457 "Write the structures for this number of clusters to numbered files" },
1458 { "-nst", FALSE, etINT, {&write_nst},
1459 "Only write all structures if more than this number of structures per cluster" },
1460 { "-rmsmin", FALSE, etREAL, {&rmsmin},
1461 "minimum rms difference with rest of cluster for writing structures" },
1462 { "-method", FALSE, etENUM, {methodname},
1463 "Method for cluster determination" },
1464 { "-minstruct", FALSE, etINT, {&minstruct},
1465 "Minimum number of structures in cluster for coloring in the [REF].xpm[ref] file" },
1466 { "-binary", FALSE, etBOOL, {&bBinary},
1467 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1468 "is given by [TT]-cutoff[tt]" },
1469 { "-M", FALSE, etINT, {&M},
1470 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1471 "0 is use cutoff" },
1472 { "-P", FALSE, etINT, {&P},
1473 "Number of identical nearest neighbors required to form a cluster" },
1474 { "-seed", FALSE, etINT, {&seed},
1475 "Random number seed for Monte Carlo clustering algorithm (0 means generate)" },
1476 { "-niter", FALSE, etINT, {&niter},
1477 "Number of iterations for MC" },
1478 { "-nrandom", FALSE, etINT, {&nrandom},
1479 "The first iterations for MC may be done complete random, to shuffle the frames" },
1480 { "-kT", FALSE, etREAL, {&kT},
1481 "Boltzmann weighting factor for Monte Carlo optimization "
1482 "(zero turns off uphill steps)" },
1483 { "-pbc", FALSE, etBOOL,
1484 { &bPBC }, "PBC check" }
1486 t_filenm fnm[] = {
1487 { efTRX, "-f", nullptr, ffOPTRD },
1488 { efTPS, "-s", nullptr, ffOPTRD },
1489 { efNDX, nullptr, nullptr, ffOPTRD },
1490 { efXPM, "-dm", "rmsd", ffOPTRD },
1491 { efXPM, "-om", "rmsd-raw", ffWRITE },
1492 { efXPM, "-o", "rmsd-clust", ffWRITE },
1493 { efLOG, "-g", "cluster", ffWRITE },
1494 { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1495 { efXVG, "-ev", "rmsd-eig", ffOPTWR },
1496 { efXVG, "-conv", "mc-conv", ffOPTWR },
1497 { efXVG, "-sz", "clust-size", ffOPTWR},
1498 { efXPM, "-tr", "clust-trans", ffOPTWR},
1499 { efXVG, "-ntr", "clust-trans", ffOPTWR},
1500 { efXVG, "-clid", "clust-id", ffOPTWR},
1501 { efTRX, "-cl", "clusters.pdb", ffOPTWR }
1503 #define NFILE asize(fnm)
1505 if (!parse_common_args(&argc, argv,
1506 PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT,
1507 NFILE, fnm, asize(pa), pa, asize(desc), desc, 0, nullptr,
1508 &oenv))
1510 return 0;
1513 /* parse options */
1514 bReadMat = opt2bSet("-dm", NFILE, fnm);
1515 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1516 if (opt2parg_bSet("-av", asize(pa), pa) ||
1517 opt2parg_bSet("-wcl", asize(pa), pa) ||
1518 opt2parg_bSet("-nst", asize(pa), pa) ||
1519 opt2parg_bSet("-rmsmin", asize(pa), pa) ||
1520 opt2bSet("-cl", NFILE, fnm) )
1522 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1524 else
1526 trx_out_fn = nullptr;
1528 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1530 fprintf(stderr,
1531 "\nWarning: assuming the time unit in %s is %s\n",
1532 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv));
1534 if (trx_out_fn && !bReadTraj)
1536 fprintf(stderr, "\nWarning: "
1537 "cannot write cluster structures without reading trajectory\n"
1538 " ignoring option -cl %s\n", trx_out_fn);
1541 method = 1;
1542 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1544 method++;
1546 if (method == m_nr)
1548 gmx_fatal(FARGS, "Invalid method");
1551 bAnalyze = (method == m_linkage || method == m_jarvis_patrick ||
1552 method == m_gromos );
1554 /* Open log file */
1555 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1557 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1558 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1560 /* check input and write parameters to log file */
1561 bUseRmsdCut = FALSE;
1562 if (method == m_jarvis_patrick)
1564 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1565 if ((M < 0) || (M == 1))
1567 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1569 if (M < 2)
1571 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1572 bUseRmsdCut = TRUE;
1574 else
1576 if (P >= M)
1578 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1580 if (bJP_RMSD)
1582 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1583 bUseRmsdCut = TRUE;
1585 else
1587 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1590 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1592 else /* method != m_jarvis */
1594 bUseRmsdCut = ( bBinary || method == m_linkage || method == m_gromos );
1596 if (bUseRmsdCut && method != m_jarvis_patrick)
1598 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1600 if (method == m_monte_carlo)
1602 fprintf(log, "Using %d iterations\n", niter);
1605 if (skip < 1)
1607 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1610 /* get input */
1611 if (bReadTraj)
1613 /* don't read mass-database as masses (and top) are not used */
1614 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xtps, nullptr, box,
1615 TRUE);
1616 if (bPBC)
1618 gpbc = gmx_rmpbc_init(&top.idef, ePBC, top.atoms.nr);
1621 fprintf(stderr, "\nSelect group for least squares fit%s:\n",
1622 bReadMat ? "" : " and RMSD calculation");
1623 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1624 1, &ifsize, &fitidx, &grpname);
1625 if (trx_out_fn)
1627 fprintf(stderr, "\nSelect group for output:\n");
1628 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm),
1629 1, &iosize, &outidx, &grpname);
1630 /* merge and convert both index groups: */
1631 /* first copy outidx to index. let outidx refer to elements in index */
1632 snew(index, iosize);
1633 isize = iosize;
1634 for (i = 0; i < iosize; i++)
1636 index[i] = outidx[i];
1637 outidx[i] = i;
1639 /* now lookup elements from fitidx in index, add them if necessary
1640 and also let fitidx refer to elements in index */
1641 for (i = 0; i < ifsize; i++)
1643 j = 0;
1644 while (j < isize && index[j] != fitidx[i])
1646 j++;
1648 if (j >= isize)
1650 /* slow this way, but doesn't matter much */
1651 isize++;
1652 srenew(index, isize);
1654 index[j] = fitidx[i];
1655 fitidx[i] = j;
1658 else /* !trx_out_fn */
1660 isize = ifsize;
1661 snew(index, isize);
1662 for (i = 0; i < ifsize; i++)
1664 index[i] = fitidx[i];
1665 fitidx[i] = i;
1670 if (bReadTraj)
1672 /* Loop over first coordinate file */
1673 fn = opt2fn("-f", NFILE, fnm);
1675 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, oenv, bPBC, gpbc);
1676 output_env_conv_times(oenv, nf, time);
1677 if (!bRMSdist || bAnalyze)
1679 /* Center all frames on zero */
1680 snew(mass, isize);
1681 for (i = 0; i < ifsize; i++)
1683 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1685 if (bFit)
1687 for (i = 0; i < nf; i++)
1689 reset_x(ifsize, fitidx, isize, nullptr, xx[i], mass);
1693 if (bPBC)
1695 gmx_rmpbc_done(gpbc);
1699 if (bReadMat)
1701 fprintf(stderr, "Reading rms distance matrix ");
1702 read_xpm_matrix(opt2fn("-dm", NFILE, fnm), &readmat);
1703 fprintf(stderr, "\n");
1704 if (readmat[0].nx != readmat[0].ny)
1706 gmx_fatal(FARGS, "Matrix (%dx%d) is not square",
1707 readmat[0].nx, readmat[0].ny);
1709 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1711 gmx_fatal(FARGS, "Matrix size (%dx%d) does not match the number of "
1712 "frames (%d)", readmat[0].nx, readmat[0].ny, nf);
1715 nf = readmat[0].nx;
1716 sfree(time);
1717 time = readmat[0].axis_x;
1718 time_invfac = output_env_get_time_invfactor(oenv);
1719 for (i = 0; i < nf; i++)
1721 time[i] *= time_invfac;
1724 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1725 convert_mat(&(readmat[0]), rms);
1727 nlevels = readmat[0].nmap;
1729 else /* !bReadMat */
1731 rms = init_mat(nf, method == m_diagonalize);
1732 nrms = (static_cast<gmx_int64_t>(nf)*static_cast<gmx_int64_t>(nf-1))/2;
1733 if (!bRMSdist)
1735 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1736 /* Initialize work array */
1737 snew(x1, isize);
1738 for (i1 = 0; i1 < nf; i1++)
1740 for (i2 = i1+1; i2 < nf; i2++)
1742 for (i = 0; i < isize; i++)
1744 copy_rvec(xx[i1][i], x1[i]);
1746 if (bFit)
1748 do_fit(isize, mass, xx[i2], x1);
1750 rmsd = rmsdev(isize, mass, xx[i2], x1);
1751 set_mat_entry(rms, i1, i2, rmsd);
1753 nrms -= nf-i1-1;
1754 fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 " ", nrms);
1755 fflush(stderr);
1757 sfree(x1);
1759 else /* bRMSdist */
1761 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1763 /* Initiate work arrays */
1764 snew(d1, isize);
1765 snew(d2, isize);
1766 for (i = 0; (i < isize); i++)
1768 snew(d1[i], isize);
1769 snew(d2[i], isize);
1771 for (i1 = 0; i1 < nf; i1++)
1773 calc_dist(isize, xx[i1], d1);
1774 for (i2 = i1+1; (i2 < nf); i2++)
1776 calc_dist(isize, xx[i2], d2);
1777 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1779 nrms -= nf-i1-1;
1780 fprintf(stderr, "\r# RMSD calculations left: " "%" GMX_PRId64 " ", nrms);
1781 fflush(stderr);
1783 /* Clean up work arrays */
1784 for (i = 0; (i < isize); i++)
1786 sfree(d1[i]);
1787 sfree(d2[i]);
1789 sfree(d1);
1790 sfree(d2);
1792 fprintf(stderr, "\n\n");
1794 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n",
1795 rms->minrms, rms->maxrms);
1796 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2*rms->sumrms/(nf*(nf-1)));
1797 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1798 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1799 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms) )
1801 fprintf(stderr, "WARNING: rmsd cutoff %g is outside range of rmsd values "
1802 "%g to %g\n", rmsdcut, rms->minrms, rms->maxrms);
1804 if (bAnalyze && (rmsmin < rms->minrms) )
1806 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n",
1807 rmsmin, rms->minrms);
1809 if (bAnalyze && (rmsmin > rmsdcut) )
1811 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n",
1812 rmsmin, rmsdcut);
1815 /* Plot the rmsd distribution */
1816 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1818 if (bBinary)
1820 for (i1 = 0; (i1 < nf); i1++)
1822 for (i2 = 0; (i2 < nf); i2++)
1824 if (rms->mat[i1][i2] < rmsdcut)
1826 rms->mat[i1][i2] = 0;
1828 else
1830 rms->mat[i1][i2] = 1;
1836 snew(clust.cl, nf);
1837 switch (method)
1839 case m_linkage:
1840 /* Now sort the matrix and write it out again */
1841 gather(rms, rmsdcut, &clust);
1842 break;
1843 case m_diagonalize:
1844 /* Do a diagonalization */
1845 snew(eigenvalues, nf);
1846 snew(eigenvectors, nf*nf);
1847 std::memcpy(eigenvectors, rms->mat[0], nf*nf*sizeof(real));
1848 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1849 sfree(eigenvectors);
1851 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues",
1852 "Eigenvector index", "Eigenvalues (nm\\S2\\N)", oenv);
1853 for (i = 0; (i < nf); i++)
1855 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1857 xvgrclose(fp);
1858 break;
1859 case m_monte_carlo:
1860 orig = init_mat(rms->nn, FALSE);
1861 orig->nn = rms->nn;
1862 copy_t_mat(orig, rms);
1863 mc_optimize(log, rms, time, niter, nrandom, seed, kT,
1864 opt2fn_null("-conv", NFILE, fnm), oenv);
1865 break;
1866 case m_jarvis_patrick:
1867 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1868 break;
1869 case m_gromos:
1870 gromos(rms->nn, rms->mat, rmsdcut, &clust);
1871 break;
1872 default:
1873 gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1876 if (method == m_monte_carlo || method == m_diagonalize)
1878 fprintf(stderr, "Energy of the matrix after clustering is %g.\n",
1879 mat_energy(rms));
1882 if (bAnalyze)
1884 if (minstruct > 1)
1886 ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1888 else
1890 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1892 init_t_atoms(&useatoms, isize, FALSE);
1893 snew(usextps, isize);
1894 useatoms.resinfo = top.atoms.resinfo;
1895 for (i = 0; i < isize; i++)
1897 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1898 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1899 useatoms.nres = std::max(useatoms.nres, useatoms.atom[i].resind+1);
1900 copy_rvec(xtps[index[i]], usextps[i]);
1902 useatoms.nr = isize;
1903 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time,
1904 ifsize, fitidx, iosize, outidx,
1905 bReadTraj ? trx_out_fn : nullptr,
1906 opt2fn_null("-sz", NFILE, fnm),
1907 opt2fn_null("-tr", NFILE, fnm),
1908 opt2fn_null("-ntr", NFILE, fnm),
1909 opt2fn_null("-clid", NFILE, fnm),
1910 bAverage, write_ncl, write_nst, rmsmin, bFit, log,
1911 rlo_bot, rhi_bot, oenv);
1913 gmx_ffclose(log);
1915 if (bBinary && !bAnalyze)
1917 /* Make the clustering visible */
1918 for (i2 = 0; (i2 < nf); i2++)
1920 for (i1 = i2+1; (i1 < nf); i1++)
1922 if (rms->mat[i1][i2])
1924 rms->mat[i1][i2] = rms->maxrms;
1930 fp = opt2FILE("-o", NFILE, fnm, "w");
1931 fprintf(stderr, "Writing rms distance/clustering matrix ");
1932 if (bReadMat)
1934 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1935 readmat[0].label_y, nf, nf, readmat[0].axis_x, readmat[0].axis_y,
1936 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1938 else
1940 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1941 sprintf(title, "RMS%sDeviation / Cluster Index",
1942 bRMSdist ? " Distance " : " ");
1943 if (minstruct > 1)
1945 write_xpm_split(fp, 0, title, "RMSD (nm)", buf, buf,
1946 nf, nf, time, time, rms->mat, 0.0, rms->maxrms, &nlevels,
1947 rlo_top, rhi_top, 0.0, ncluster,
1948 &ncluster, TRUE, rlo_bot, rhi_bot);
1950 else
1952 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1953 nf, nf, time, time, rms->mat, 0.0, rms->maxrms,
1954 rlo_top, rhi_top, &nlevels);
1957 fprintf(stderr, "\n");
1958 gmx_ffclose(fp);
1959 if (nullptr != orig)
1961 fp = opt2FILE("-om", NFILE, fnm, "w");
1962 sprintf(buf, "Time (%s)", output_env_get_time_unit(oenv));
1963 sprintf(title, "RMS%sDeviation", bRMSdist ? " Distance " : " ");
1964 write_xpm(fp, 0, title, "RMSD (nm)", buf, buf,
1965 nf, nf, time, time, orig->mat, 0.0, orig->maxrms,
1966 rlo_top, rhi_top, &nlevels);
1967 gmx_ffclose(fp);
1968 done_mat(&orig);
1969 sfree(orig);
1971 /* now show what we've done */
1972 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
1973 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
1974 if (method == m_diagonalize)
1976 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
1978 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
1979 if (bAnalyze)
1981 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
1982 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
1983 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
1985 do_view(oenv, opt2fn_null("-conv", NFILE, fnm), nullptr);
1987 return 0;