Make PBC type enumeration into PbcType enum class
[gromacs.git] / src / gromacs / gmxana / gmx_cluster.cpp
blob751c62f6d0f6515f428c441674a812634f64ffc0
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gmxpre.h"
40 #include <cmath>
41 #include <cstdlib>
42 #include <cstring>
44 #include <algorithm>
46 #include "gromacs/commandline/pargs.h"
47 #include "gromacs/commandline/viewit.h"
48 #include "gromacs/fileio/confio.h"
49 #include "gromacs/fileio/matio.h"
50 #include "gromacs/fileio/trxio.h"
51 #include "gromacs/fileio/xvgr.h"
52 #include "gromacs/gmxana/cmat.h"
53 #include "gromacs/gmxana/gmx_ana.h"
54 #include "gromacs/linearalgebra/eigensolver.h"
55 #include "gromacs/math/do_fit.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/pbcutil/pbc.h"
58 #include "gromacs/pbcutil/rmpbc.h"
59 #include "gromacs/random/threefry.h"
60 #include "gromacs/random/uniformintdistribution.h"
61 #include "gromacs/random/uniformrealdistribution.h"
62 #include "gromacs/topology/index.h"
63 #include "gromacs/topology/topology.h"
64 #include "gromacs/utility/arraysize.h"
65 #include "gromacs/utility/cstringutil.h"
66 #include "gromacs/utility/fatalerror.h"
67 #include "gromacs/utility/futil.h"
68 #include "gromacs/utility/smalloc.h"
69 #include "gromacs/utility/stringutil.h"
71 /* print to two file pointers at once (i.e. stderr and log) */
72 static inline void lo_ffprintf(FILE* fp1, FILE* fp2, const char* buf)
74 fprintf(fp1, "%s", buf);
75 fprintf(fp2, "%s", buf);
78 /* just print a prepared buffer to fp1 and fp2 */
79 static inline void ffprintf(FILE* fp1, FILE* fp2, const char* buf)
81 lo_ffprintf(fp1, fp2, buf);
84 /* prepare buffer with one argument, then print to fp1 and fp2 */
85 static inline void ffprintf_d(FILE* fp1, FILE* fp2, char* buf, const char* fmt, int arg)
87 sprintf(buf, fmt, arg);
88 lo_ffprintf(fp1, fp2, buf);
91 /* prepare buffer with one argument, then print to fp1 and fp2 */
92 static inline void ffprintf_g(FILE* fp1, FILE* fp2, char* buf, const char* fmt, real arg)
94 sprintf(buf, fmt, arg);
95 lo_ffprintf(fp1, fp2, buf);
98 /* prepare buffer with one argument, then print to fp1 and fp2 */
99 static inline void ffprintf_s(FILE* fp1, FILE* fp2, char* buf, const char* fmt, const char* arg)
101 sprintf(buf, fmt, arg);
102 lo_ffprintf(fp1, fp2, buf);
105 /* prepare buffer with two arguments, then print to fp1 and fp2 */
106 static inline void ffprintf_dd(FILE* fp1, FILE* fp2, char* buf, const char* fmt, int arg1, int arg2)
108 sprintf(buf, fmt, arg1, arg2);
109 lo_ffprintf(fp1, fp2, buf);
112 /* prepare buffer with two arguments, then print to fp1 and fp2 */
113 static inline void ffprintf_gg(FILE* fp1, FILE* fp2, char* buf, const char* fmt, real arg1, real arg2)
115 sprintf(buf, fmt, arg1, arg2);
116 lo_ffprintf(fp1, fp2, buf);
119 /* prepare buffer with two arguments, then print to fp1 and fp2 */
120 static inline void ffprintf_ss(FILE* fp1, FILE* fp2, char* buf, const char* fmt, const char* arg1, const char* arg2)
122 sprintf(buf, fmt, arg1, arg2);
123 lo_ffprintf(fp1, fp2, buf);
126 typedef struct
128 int ncl;
129 int* cl;
130 } t_clusters;
132 typedef struct
134 int nr;
135 int* nb;
136 } t_nnb;
138 static void mc_optimize(FILE* log,
139 t_mat* m,
140 real* time,
141 int maxiter,
142 int nrandom,
143 int seed,
144 real kT,
145 const char* conv,
146 gmx_output_env_t* oenv)
148 FILE* fp = nullptr;
149 real ecur, enext, emin, prob, enorm;
150 int i, j, iswap, jswap, nn, nuphill = 0;
151 t_mat* minimum;
153 if (seed == 0)
155 seed = static_cast<int>(gmx::makeRandomSeed());
157 gmx::DefaultRandomEngine rng(seed);
159 if (m->n1 != m->nn)
161 fprintf(stderr, "Can not do Monte Carlo optimization with a non-square matrix.\n");
162 return;
164 printf("\nDoing Monte Carlo optimization to find the smoothest trajectory\n");
165 printf("by reordering the frames to minimize the path between the two structures\n");
166 printf("that have the largest pairwise RMSD.\n");
167 printf("Using random seed %d.\n", seed);
169 iswap = jswap = -1;
170 enorm = m->mat[0][0];
171 for (i = 0; (i < m->n1); i++)
173 for (j = 0; (j < m->nn); j++)
175 if (m->mat[i][j] > enorm)
177 enorm = m->mat[i][j];
178 iswap = i;
179 jswap = j;
183 if ((iswap == -1) || (jswap == -1))
185 fprintf(stderr, "Matrix contains identical values in all fields\n");
186 return;
188 swap_rows(m, 0, iswap);
189 swap_rows(m, m->n1 - 1, jswap);
190 emin = ecur = mat_energy(m);
191 printf("Largest distance %g between %d and %d. Energy: %g.\n", enorm, iswap, jswap, emin);
193 nn = m->nn;
195 /* Initiate and store global minimum */
196 minimum = init_mat(nn, m->b1D);
197 minimum->nn = nn;
198 copy_t_mat(minimum, m);
200 if (nullptr != conv)
202 fp = xvgropen(conv, "Convergence of the MC optimization", "Energy", "Step", oenv);
205 gmx::UniformIntDistribution<int> intDistNN(1, nn - 2); // [1,nn-2]
206 gmx::UniformRealDistribution<real> realDistOne; // [0,1)
208 for (i = 0; (i < maxiter); i++)
210 /* Generate new swapping candidates */
213 iswap = intDistNN(rng);
214 jswap = intDistNN(rng);
215 } while ((iswap == jswap) || (iswap >= nn - 1) || (jswap >= nn - 1));
217 /* Apply swap and compute energy */
218 swap_rows(m, iswap, jswap);
219 enext = mat_energy(m);
221 /* Compute probability */
222 prob = 0;
223 if ((enext < ecur) || (i < nrandom))
225 prob = 1;
226 if (enext < emin)
228 /* Store global minimum */
229 copy_t_mat(minimum, m);
230 emin = enext;
233 else if (kT > 0)
235 /* Try Monte Carlo step */
236 prob = std::exp(-(enext - ecur) / (enorm * kT));
239 if (prob == 1 || realDistOne(rng) < prob)
241 if (enext > ecur)
243 nuphill++;
246 fprintf(log, "Iter: %d Swapped %4d and %4d (energy: %g prob: %g)\n", i, iswap, jswap,
247 enext, prob);
248 if (nullptr != fp)
250 fprintf(fp, "%6d %10g\n", i, enext);
252 ecur = enext;
254 else
256 swap_rows(m, jswap, iswap);
259 fprintf(log, "%d uphill steps were taken during optimization\n", nuphill);
261 /* Now swap the matrix to get it into global minimum mode */
262 copy_t_mat(m, minimum);
264 fprintf(log, "Global minimum energy %g\n", mat_energy(minimum));
265 fprintf(log, "Global minimum energy %g\n", mat_energy(m));
266 fprintf(log, "Swapped time and frame indices and RMSD to next neighbor:\n");
267 for (i = 0; (i < m->nn); i++)
269 fprintf(log, "%10g %5d %10g\n", time[m->m_ind[i]], m->m_ind[i],
270 (i < m->nn - 1) ? m->mat[m->m_ind[i]][m->m_ind[i + 1]] : 0);
273 if (nullptr != fp)
275 xvgrclose(fp);
279 static void calc_dist(int nind, rvec x[], real** d)
281 int i, j;
282 real* xi;
283 rvec dx;
285 for (i = 0; (i < nind - 1); i++)
287 xi = x[i];
288 for (j = i + 1; (j < nind); j++)
290 /* Should use pbc_dx when analysing multiple molecueles,
291 * but the box is not stored for every frame.
293 rvec_sub(xi, x[j], dx);
294 d[i][j] = norm(dx);
299 static real rms_dist(int isize, real** d, real** d_r)
301 int i, j;
302 real r, r2;
304 r2 = 0.0;
305 for (i = 0; (i < isize - 1); i++)
307 for (j = i + 1; (j < isize); j++)
309 r = d[i][j] - d_r[i][j];
310 r2 += r * r;
313 r2 /= gmx::exactDiv(isize * (isize - 1), 2);
315 return std::sqrt(r2);
318 static bool rms_dist_comp(const t_dist& a, const t_dist& b)
320 return a.dist < b.dist;
323 static bool clust_id_comp(const t_clustid& a, const t_clustid& b)
325 return a.clust < b.clust;
328 static bool nrnb_comp(const t_nnb& a, const t_nnb& b)
330 /* return b<a, we want highest first */
331 return b.nr < a.nr;
334 static void gather(t_mat* m, real cutoff, t_clusters* clust)
336 t_clustid* c;
337 t_dist* d;
338 int i, j, k, nn, cid, n1, diff;
339 gmx_bool bChange;
341 /* First we sort the entries in the RMSD matrix */
342 n1 = m->nn;
343 nn = ((n1 - 1) * n1) / 2;
344 snew(d, nn);
345 for (i = k = 0; (i < n1); i++)
347 for (j = i + 1; (j < n1); j++, k++)
349 d[k].i = i;
350 d[k].j = j;
351 d[k].dist = m->mat[i][j];
354 if (k != nn)
356 gmx_incons("gather algortihm");
358 std::sort(d, d + nn, rms_dist_comp);
360 /* Now we make a cluster index for all of the conformations */
361 c = new_clustid(n1);
363 /* Now we check the closest structures, and equalize their cluster numbers */
364 fprintf(stderr, "Linking structures ");
367 fprintf(stderr, "*");
368 bChange = FALSE;
369 for (k = 0; (k < nn) && (d[k].dist < cutoff); k++)
371 diff = c[d[k].j].clust - c[d[k].i].clust;
372 if (diff)
374 bChange = TRUE;
375 if (diff > 0)
377 c[d[k].j].clust = c[d[k].i].clust;
379 else
381 c[d[k].i].clust = c[d[k].j].clust;
385 } while (bChange);
386 fprintf(stderr, "\nSorting and renumbering clusters\n");
387 /* Sort on cluster number */
388 std::sort(c, c + n1, clust_id_comp);
390 /* Renumber clusters */
391 cid = 1;
392 for (k = 1; k < n1; k++)
394 if (c[k].clust != c[k - 1].clust)
396 c[k - 1].clust = cid;
397 cid++;
399 else
401 c[k - 1].clust = cid;
404 c[k - 1].clust = cid;
405 if (debug)
407 for (k = 0; (k < n1); k++)
409 fprintf(debug, "Cluster index for conformation %d: %d\n", c[k].conf, c[k].clust);
412 clust->ncl = cid;
413 for (k = 0; k < n1; k++)
415 clust->cl[c[k].conf] = c[k].clust;
418 sfree(c);
419 sfree(d);
422 static gmx_bool jp_same(int** nnb, int i, int j, int P)
424 gmx_bool bIn;
425 int k, ii, jj, pp;
427 bIn = FALSE;
428 for (k = 0; nnb[i][k] >= 0; k++)
430 bIn = bIn || (nnb[i][k] == j);
432 if (!bIn)
434 return FALSE;
437 bIn = FALSE;
438 for (k = 0; nnb[j][k] >= 0; k++)
440 bIn = bIn || (nnb[j][k] == i);
442 if (!bIn)
444 return FALSE;
447 pp = 0;
448 for (ii = 0; nnb[i][ii] >= 0; ii++)
450 for (jj = 0; nnb[j][jj] >= 0; jj++)
452 if ((nnb[i][ii] == nnb[j][jj]) && (nnb[i][ii] != -1))
454 pp++;
459 return (pp >= P);
462 static void jarvis_patrick(int n1, real** mat, int M, int P, real rmsdcut, t_clusters* clust)
464 t_dist* row;
465 t_clustid* c;
466 int** nnb;
467 int i, j, k, cid, diff, maxval;
468 gmx_bool bChange;
469 real** mcpy = nullptr;
471 if (rmsdcut < 0)
473 rmsdcut = 10000;
476 /* First we sort the entries in the RMSD matrix row by row.
477 * This gives us the nearest neighbor list.
479 snew(nnb, n1);
480 snew(row, n1);
481 for (i = 0; (i < n1); i++)
483 for (j = 0; (j < n1); j++)
485 row[j].j = j;
486 row[j].dist = mat[i][j];
488 std::sort(row, row + n1, rms_dist_comp);
489 if (M > 0)
491 /* Put the M nearest neighbors in the list */
492 snew(nnb[i], M + 1);
493 for (j = k = 0; (k < M) && (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
495 if (row[j].j != i)
497 nnb[i][k] = row[j].j;
498 k++;
501 nnb[i][k] = -1;
503 else
505 /* Put all neighbors nearer than rmsdcut in the list */
506 maxval = 0;
507 k = 0;
508 for (j = 0; (j < n1) && (mat[i][row[j].j] < rmsdcut); j++)
510 if (row[j].j != i)
512 if (k >= maxval)
514 maxval += 10;
515 srenew(nnb[i], maxval);
517 nnb[i][k] = row[j].j;
518 k++;
521 if (k == maxval)
523 srenew(nnb[i], maxval + 1);
525 nnb[i][k] = -1;
528 sfree(row);
529 if (debug)
531 fprintf(debug, "Nearest neighborlist. M = %d, P = %d\n", M, P);
532 for (i = 0; (i < n1); i++)
534 fprintf(debug, "i:%5d nbs:", i);
535 for (j = 0; nnb[i][j] >= 0; j++)
537 fprintf(debug, "%5d[%5.3f]", nnb[i][j], mat[i][nnb[i][j]]);
539 fprintf(debug, "\n");
543 c = new_clustid(n1);
544 fprintf(stderr, "Linking structures ");
545 /* Use mcpy for temporary storage of booleans */
546 mcpy = mk_matrix(n1, n1, FALSE);
547 for (i = 0; i < n1; i++)
549 for (j = i + 1; j < n1; j++)
551 mcpy[i][j] = static_cast<real>(jp_same(nnb, i, j, P));
556 fprintf(stderr, "*");
557 bChange = FALSE;
558 for (i = 0; i < n1; i++)
560 for (j = i + 1; j < n1; j++)
562 if (mcpy[i][j] != 0.0F)
564 diff = c[j].clust - c[i].clust;
565 if (diff)
567 bChange = TRUE;
568 if (diff > 0)
570 c[j].clust = c[i].clust;
572 else
574 c[i].clust = c[j].clust;
580 } while (bChange);
582 fprintf(stderr, "\nSorting and renumbering clusters\n");
583 /* Sort on cluster number */
584 std::sort(c, c + n1, clust_id_comp);
586 /* Renumber clusters */
587 cid = 1;
588 for (k = 1; k < n1; k++)
590 if (c[k].clust != c[k - 1].clust)
592 c[k - 1].clust = cid;
593 cid++;
595 else
597 c[k - 1].clust = cid;
600 c[k - 1].clust = cid;
601 clust->ncl = cid;
602 for (k = 0; k < n1; k++)
604 clust->cl[c[k].conf] = c[k].clust;
606 if (debug)
608 for (k = 0; (k < n1); k++)
610 fprintf(debug, "Cluster index for conformation %d: %d\n", c[k].conf, c[k].clust);
614 done_matrix(n1, &mcpy);
616 sfree(c);
617 for (i = 0; (i < n1); i++)
619 sfree(nnb[i]);
621 sfree(nnb);
624 static void dump_nnb(FILE* fp, const char* title, int n1, t_nnb* nnb)
626 int i, j;
628 /* dump neighbor list */
629 fprintf(fp, "%s", title);
630 for (i = 0; (i < n1); i++)
632 fprintf(fp, "i:%5d #:%5d nbs:", i, nnb[i].nr);
633 for (j = 0; j < nnb[i].nr; j++)
635 fprintf(fp, "%5d", nnb[i].nb[j]);
637 fprintf(fp, "\n");
641 static void gromos(int n1, real** mat, real rmsdcut, t_clusters* clust)
643 t_nnb* nnb;
644 int i, j, k, j1, maxval;
646 /* Put all neighbors nearer than rmsdcut in the list */
647 fprintf(stderr, "Making list of neighbors within cutoff ");
648 snew(nnb, n1);
649 for (i = 0; (i < n1); i++)
651 maxval = 0;
652 k = 0;
653 /* put all neighbors within cut-off in list */
654 for (j = 0; j < n1; j++)
656 if (mat[i][j] < rmsdcut)
658 if (k >= maxval)
660 maxval += 10;
661 srenew(nnb[i].nb, maxval);
663 nnb[i].nb[k] = j;
664 k++;
667 /* store nr of neighbors, we'll need that */
668 nnb[i].nr = k;
669 if (i % (1 + n1 / 100) == 0)
671 fprintf(stderr, "%3d%%\b\b\b\b", (i * 100 + 1) / n1);
674 fprintf(stderr, "%3d%%\n", 100);
676 /* sort neighbor list on number of neighbors, largest first */
677 std::sort(nnb, nnb + n1, nrnb_comp);
679 if (debug)
681 dump_nnb(debug, "Nearest neighborlist after sort.\n", n1, nnb);
684 /* turn first structure with all its neighbors (largest) into cluster
685 remove them from pool of structures and repeat for all remaining */
686 fprintf(stderr, "Finding clusters %4d", 0);
687 /* cluster id's start at 1: */
688 k = 1;
689 while (nnb[0].nr)
691 /* set cluster id (k) for first item in neighborlist */
692 for (j = 0; j < nnb[0].nr; j++)
694 clust->cl[nnb[0].nb[j]] = k;
696 /* mark as done */
697 nnb[0].nr = 0;
698 sfree(nnb[0].nb);
700 /* adjust number of neighbors for others, taking removals into account: */
701 for (i = 1; i < n1 && nnb[i].nr; i++)
703 j1 = 0;
704 for (j = 0; j < nnb[i].nr; j++)
706 /* if this neighbor wasn't removed */
707 if (clust->cl[nnb[i].nb[j]] == 0)
709 /* shift the rest (j1<=j) */
710 nnb[i].nb[j1] = nnb[i].nb[j];
711 /* next */
712 j1++;
715 /* now j1 is the new number of neighbors */
716 nnb[i].nr = j1;
718 /* sort again on nnb[].nr, because we have new # neighbors: */
719 /* but we only need to sort upto i, i.e. when nnb[].nr>0 */
720 std::sort(nnb, nnb + i, nrnb_comp);
722 fprintf(stderr, "\b\b\b\b%4d", k);
723 /* new cluster id */
724 k++;
726 fprintf(stderr, "\n");
727 sfree(nnb);
728 if (debug)
730 fprintf(debug, "Clusters (%d):\n", k);
731 for (i = 0; i < n1; i++)
733 fprintf(debug, " %3d", clust->cl[i]);
735 fprintf(debug, "\n");
738 clust->ncl = k - 1;
741 static rvec** read_whole_trj(const char* fn,
742 int isize,
743 const int index[],
744 int skip,
745 int* nframe,
746 real** time,
747 matrix** boxes,
748 int** frameindices,
749 const gmx_output_env_t* oenv,
750 gmx_bool bPBC,
751 gmx_rmpbc_t gpbc)
753 rvec ** xx, *x;
754 matrix box;
755 real t;
756 int i, j, max_nf;
757 int natom;
758 t_trxstatus* status;
761 max_nf = 0;
762 xx = nullptr;
763 *time = nullptr;
764 natom = read_first_x(oenv, &status, fn, &t, &x, box);
765 i = 0;
766 int clusterIndex = 0;
769 if (bPBC)
771 gmx_rmpbc(gpbc, natom, box, x);
773 if (clusterIndex >= max_nf)
775 max_nf += 10;
776 srenew(xx, max_nf);
777 srenew(*time, max_nf);
778 srenew(*boxes, max_nf);
779 srenew(*frameindices, max_nf);
781 if ((i % skip) == 0)
783 snew(xx[clusterIndex], isize);
784 /* Store only the interesting atoms */
785 for (j = 0; (j < isize); j++)
787 copy_rvec(x[index[j]], xx[clusterIndex][j]);
789 (*time)[clusterIndex] = t;
790 copy_mat(box, (*boxes)[clusterIndex]);
791 (*frameindices)[clusterIndex] = nframes_read(status);
792 clusterIndex++;
794 i++;
795 } while (read_next_x(oenv, status, &t, x, box));
796 fprintf(stderr, "Allocated %zu bytes for frames\n", (max_nf * isize * sizeof(**xx)));
797 fprintf(stderr, "Read %d frames from trajectory %s\n", clusterIndex, fn);
798 *nframe = clusterIndex;
799 sfree(x);
801 return xx;
804 static int plot_clusters(int nf, real** mat, t_clusters* clust, int minstruct)
806 int i, j, ncluster, ci;
807 int *cl_id, *nstruct, *strind;
809 snew(cl_id, nf);
810 snew(nstruct, nf);
811 snew(strind, nf);
812 for (i = 0; i < nf; i++)
814 strind[i] = 0;
815 cl_id[i] = clust->cl[i];
816 nstruct[cl_id[i]]++;
818 ncluster = 0;
819 for (i = 0; i < nf; i++)
821 if (nstruct[i] >= minstruct)
823 ncluster++;
824 for (j = 0; (j < nf); j++)
826 if (cl_id[j] == i)
828 strind[j] = ncluster;
833 ncluster++;
834 fprintf(stderr, "There are %d clusters with at least %d conformations\n", ncluster, minstruct);
836 for (i = 0; (i < nf); i++)
838 ci = cl_id[i];
839 for (j = 0; j < i; j++)
841 if ((ci == cl_id[j]) && (nstruct[ci] >= minstruct))
843 /* color different clusters with different colors, as long as
844 we don't run out of colors */
845 mat[i][j] = strind[i];
847 else
849 mat[i][j] = 0;
853 sfree(strind);
854 sfree(nstruct);
855 sfree(cl_id);
857 return ncluster;
860 static void mark_clusters(int nf, real** mat, real val, t_clusters* clust)
862 int i, j;
864 for (i = 0; i < nf; i++)
866 for (j = 0; j < i; j++)
868 if (clust->cl[i] == clust->cl[j])
870 mat[i][j] = val;
872 else
874 mat[i][j] = 0;
880 static char* parse_filename(const char* fn, int maxnr)
882 int i;
883 char* fnout;
884 const char* ext;
885 char buf[STRLEN];
887 if (std::strchr(fn, '%'))
889 gmx_fatal(FARGS, "will not number filename %s containing '%c'", fn, '%');
891 /* number of digits needed in numbering */
892 i = static_cast<int>((std::log(static_cast<real>(maxnr)) / std::log(10.0)) + 1);
893 /* split fn and ext */
894 ext = std::strrchr(fn, '.');
895 if (!ext)
897 gmx_fatal(FARGS, "cannot separate extension in filename %s", fn);
899 ext++;
900 /* insert e.g. '%03d' between fn and ext */
901 sprintf(buf, "%s%%0%dd.%s", fn, i, ext);
902 snew(fnout, std::strlen(buf) + 1);
903 std::strcpy(fnout, buf);
905 return fnout;
908 static void ana_trans(t_clusters* clust,
909 int nf,
910 const char* transfn,
911 const char* ntransfn,
912 FILE* log,
913 t_rgb rlo,
914 t_rgb rhi,
915 const gmx_output_env_t* oenv)
917 FILE* fp;
918 real **trans, *axis;
919 int* ntrans;
920 int i, ntranst, maxtrans;
921 char buf[STRLEN];
923 snew(ntrans, clust->ncl);
924 snew(trans, clust->ncl);
925 snew(axis, clust->ncl);
926 for (i = 0; i < clust->ncl; i++)
928 axis[i] = i + 1;
929 snew(trans[i], clust->ncl);
931 ntranst = 0;
932 maxtrans = 0;
933 for (i = 1; i < nf; i++)
935 if (clust->cl[i] != clust->cl[i - 1])
937 ntranst++;
938 ntrans[clust->cl[i - 1] - 1]++;
939 ntrans[clust->cl[i] - 1]++;
940 trans[clust->cl[i - 1] - 1][clust->cl[i] - 1]++;
941 maxtrans = static_cast<int>(std::max(static_cast<real>(maxtrans),
942 trans[clust->cl[i] - 1][clust->cl[i - 1] - 1]));
945 ffprintf_dd(stderr, log, buf,
946 "Counted %d transitions in total, "
947 "max %d between two specific clusters\n",
948 ntranst, maxtrans);
949 if (transfn)
951 fp = gmx_ffopen(transfn, "w");
952 i = std::min(maxtrans + 1, 80);
953 write_xpm(fp, 0, "Cluster Transitions", "# transitions", "from cluster", "to cluster",
954 clust->ncl, clust->ncl, axis, axis, trans, 0, maxtrans, rlo, rhi, &i);
955 gmx_ffclose(fp);
957 if (ntransfn)
959 fp = xvgropen(ntransfn, "Cluster Transitions", "Cluster #", "# transitions", oenv);
960 for (i = 0; i < clust->ncl; i++)
962 fprintf(fp, "%5d %5d\n", i + 1, ntrans[i]);
964 xvgrclose(fp);
966 sfree(ntrans);
967 for (i = 0; i < clust->ncl; i++)
969 sfree(trans[i]);
971 sfree(trans);
972 sfree(axis);
975 static void analyze_clusters(int nf,
976 t_clusters* clust,
977 real** rmsd,
978 int natom,
979 t_atoms* atoms,
980 rvec* xtps,
981 real* mass,
982 rvec** xx,
983 real* time,
984 matrix* boxes,
985 int* frameindices,
986 int ifsize,
987 int* fitidx,
988 int iosize,
989 int* outidx,
990 const char* trxfn,
991 const char* sizefn,
992 const char* transfn,
993 const char* ntransfn,
994 const char* clustidfn,
995 const char* clustndxfn,
996 gmx_bool bAverage,
997 int write_ncl,
998 int write_nst,
999 real rmsmin,
1000 gmx_bool bFit,
1001 FILE* log,
1002 t_rgb rlo,
1003 t_rgb rhi,
1004 const gmx_output_env_t* oenv)
1006 FILE* size_fp = nullptr;
1007 FILE* ndxfn = nullptr;
1008 char buf[STRLEN], buf1[40], buf2[40], buf3[40], *trxsfn;
1009 t_trxstatus* trxout = nullptr;
1010 t_trxstatus* trxsout = nullptr;
1011 int i, i1, cl, nstr, *structure, first = 0, midstr;
1012 gmx_bool* bWrite = nullptr;
1013 real r, clrmsd, midrmsd;
1014 rvec* xav = nullptr;
1015 matrix zerobox;
1017 clear_mat(zerobox);
1019 ffprintf_d(stderr, log, buf, "\nFound %d clusters\n\n", clust->ncl);
1020 trxsfn = nullptr;
1021 if (trxfn)
1023 /* do we write all structures? */
1024 if (write_ncl)
1026 trxsfn = parse_filename(trxfn, std::max(write_ncl, clust->ncl));
1027 snew(bWrite, nf);
1029 ffprintf_ss(stderr, log, buf, "Writing %s structure for each cluster to %s\n",
1030 bAverage ? "average" : "middle", trxfn);
1031 if (write_ncl)
1033 /* find out what we want to tell the user:
1034 Writing [all structures|structures with rmsd > %g] for
1035 {all|first %d} clusters {with more than %d structures} to %s */
1036 if (rmsmin > 0.0)
1038 sprintf(buf1, "structures with rmsd > %g", rmsmin);
1040 else
1042 sprintf(buf1, "all structures");
1044 buf2[0] = buf3[0] = '\0';
1045 if (write_ncl >= clust->ncl)
1047 if (write_nst == 0)
1049 sprintf(buf2, "all ");
1052 else
1054 sprintf(buf2, "the first %d ", write_ncl);
1056 if (write_nst)
1058 sprintf(buf3, " with more than %d structures", write_nst);
1060 sprintf(buf, "Writing %s for %sclusters%s to %s\n", buf1, buf2, buf3, trxsfn);
1061 ffprintf(stderr, log, buf);
1064 /* Prepare a reference structure for the orientation of the clusters */
1065 if (bFit)
1067 reset_x(ifsize, fitidx, natom, nullptr, xtps, mass);
1069 trxout = open_trx(trxfn, "w");
1070 /* Calculate the average structure in each cluster, *
1071 * all structures are fitted to the first struture of the cluster */
1072 snew(xav, natom);
1073 GMX_ASSERT(xav, "");
1076 if (transfn || ntransfn)
1078 ana_trans(clust, nf, transfn, ntransfn, log, rlo, rhi, oenv);
1081 if (clustidfn)
1083 FILE* fp = xvgropen(clustidfn, "Clusters", output_env_get_xvgr_tlabel(oenv), "Cluster #", oenv);
1084 if (output_env_get_print_xvgr_codes(oenv))
1086 fprintf(fp, "@ s0 symbol 2\n");
1087 fprintf(fp, "@ s0 symbol size 0.2\n");
1088 fprintf(fp, "@ s0 linestyle 0\n");
1090 for (i = 0; i < nf; i++)
1092 fprintf(fp, "%8g %8d\n", time[i], clust->cl[i]);
1094 xvgrclose(fp);
1096 if (sizefn)
1098 size_fp = xvgropen(sizefn, "Cluster Sizes", "Cluster #", "# Structures", oenv);
1099 if (output_env_get_print_xvgr_codes(oenv))
1101 fprintf(size_fp, "@g%d type %s\n", 0, "bar");
1104 if (clustndxfn && frameindices)
1106 ndxfn = gmx_ffopen(clustndxfn, "w");
1109 snew(structure, nf);
1110 fprintf(log, "\n%3s | %3s %4s | %6s %4s | cluster members\n", "cl.", "#st", "rmsd", "middle",
1111 "rmsd");
1112 for (cl = 1; cl <= clust->ncl; cl++)
1114 /* prepare structures (fit, middle, average) */
1115 if (xav)
1117 for (i = 0; i < natom; i++)
1119 clear_rvec(xav[i]);
1122 nstr = 0;
1123 for (i1 = 0; i1 < nf; i1++)
1125 if (clust->cl[i1] == cl)
1127 structure[nstr] = i1;
1128 nstr++;
1129 if (trxfn && (bAverage || write_ncl))
1131 if (bFit)
1133 reset_x(ifsize, fitidx, natom, nullptr, xx[i1], mass);
1135 if (nstr == 1)
1137 first = i1;
1139 else if (bFit)
1141 do_fit(natom, mass, xx[first], xx[i1]);
1143 if (xav)
1145 for (i = 0; i < natom; i++)
1147 rvec_inc(xav[i], xx[i1][i]);
1153 if (sizefn)
1155 fprintf(size_fp, "%8d %8d\n", cl, nstr);
1157 if (ndxfn)
1159 fprintf(ndxfn, "[Cluster_%04d]\n", cl);
1161 clrmsd = 0;
1162 midstr = 0;
1163 midrmsd = 10000;
1164 for (i1 = 0; i1 < nstr; i1++)
1166 r = 0;
1167 if (nstr > 1)
1169 for (i = 0; i < nstr; i++)
1171 if (i < i1)
1173 r += rmsd[structure[i]][structure[i1]];
1175 else
1177 r += rmsd[structure[i1]][structure[i]];
1180 r /= (nstr - 1);
1182 if (r < midrmsd)
1184 midstr = structure[i1];
1185 midrmsd = r;
1187 clrmsd += r;
1189 clrmsd /= nstr;
1191 /* dump cluster info to logfile */
1192 if (nstr > 1)
1194 sprintf(buf1, "%6.3f", clrmsd);
1195 if (buf1[0] == '0')
1197 buf1[0] = ' ';
1199 sprintf(buf2, "%5.3f", midrmsd);
1200 if (buf2[0] == '0')
1202 buf2[0] = ' ';
1205 else
1207 sprintf(buf1, "%5s", "");
1208 sprintf(buf2, "%5s", "");
1210 fprintf(log, "%3d | %3d %s | %6g%s |", cl, nstr, buf1, time[midstr], buf2);
1211 for (i = 0; i < nstr; i++)
1213 if ((i % 7 == 0) && i)
1215 sprintf(buf, "\n%3s | %3s %4s | %6s %4s |", "", "", "", "", "");
1216 if (ndxfn)
1218 fprintf(ndxfn, "\n");
1221 else
1223 buf[0] = '\0';
1225 i1 = structure[i];
1226 fprintf(log, "%s %6g", buf, time[i1]);
1227 if (ndxfn)
1229 fprintf(ndxfn, " %6d", frameindices[i1] + 1);
1232 fprintf(log, "\n");
1233 if (ndxfn)
1235 fprintf(ndxfn, "\n");
1238 /* write structures to trajectory file(s) */
1239 if (trxfn)
1241 if (write_ncl)
1243 for (i = 0; i < nstr; i++)
1245 bWrite[i] = FALSE;
1248 if (cl < write_ncl + 1 && nstr > write_nst)
1250 /* Dump all structures for this cluster */
1251 /* generate numbered filename (there is a %d in trxfn!) */
1252 sprintf(buf, trxsfn, cl);
1253 trxsout = open_trx(buf, "w");
1254 for (i = 0; i < nstr; i++)
1256 bWrite[i] = TRUE;
1257 if (rmsmin > 0.0)
1259 for (i1 = 0; i1 < i && bWrite[i]; i1++)
1261 if (bWrite[i1])
1263 bWrite[i] = rmsd[structure[i1]][structure[i]] > rmsmin;
1267 if (bWrite[i])
1269 write_trx(trxsout, iosize, outidx, atoms, i, time[structure[i]],
1270 boxes[structure[i]], xx[structure[i]], nullptr, nullptr);
1273 close_trx(trxsout);
1275 /* Dump the average structure for this cluster */
1276 if (bAverage)
1278 for (i = 0; i < natom; i++)
1280 svmul(1.0 / nstr, xav[i], xav[i]);
1283 else
1285 for (i = 0; i < natom; i++)
1287 copy_rvec(xx[midstr][i], xav[i]);
1289 if (bFit)
1291 reset_x(ifsize, fitidx, natom, nullptr, xav, mass);
1294 if (bFit)
1296 do_fit(natom, mass, xtps, xav);
1298 write_trx(trxout, iosize, outidx, atoms, cl, time[midstr], boxes[midstr], xav, nullptr, nullptr);
1301 /* clean up */
1302 if (trxfn)
1304 close_trx(trxout);
1305 sfree(xav);
1306 if (write_ncl)
1308 sfree(bWrite);
1311 sfree(structure);
1312 if (trxsfn)
1314 sfree(trxsfn);
1317 if (size_fp)
1319 xvgrclose(size_fp);
1321 if (ndxfn)
1323 gmx_ffclose(ndxfn);
1327 static void convert_mat(t_matrix* mat, t_mat* rms)
1329 int i, j;
1331 rms->n1 = mat->nx;
1332 matrix2real(mat, rms->mat);
1334 for (i = 0; i < mat->nx; i++)
1336 for (j = i; j < mat->nx; j++)
1338 rms->sumrms += rms->mat[i][j];
1339 rms->maxrms = std::max(rms->maxrms, rms->mat[i][j]);
1340 if (j != i)
1342 rms->minrms = std::min(rms->minrms, rms->mat[i][j]);
1346 rms->nn = mat->nx;
1349 int gmx_cluster(int argc, char* argv[])
1351 const char* desc[] = {
1352 "[THISMODULE] can cluster structures using several different methods.",
1353 "Distances between structures can be determined from a trajectory",
1354 "or read from an [REF].xpm[ref] matrix file with the [TT]-dm[tt] option.",
1355 "RMS deviation after fitting or RMS deviation of atom-pair distances",
1356 "can be used to define the distance between structures.[PAR]",
1358 "single linkage: add a structure to a cluster when its distance to any",
1359 "element of the cluster is less than [TT]cutoff[tt].[PAR]",
1361 "Jarvis Patrick: add a structure to a cluster when this structure",
1362 "and a structure in the cluster have each other as neighbors and",
1363 "they have a least [TT]P[tt] neighbors in common. The neighbors",
1364 "of a structure are the M closest structures or all structures within",
1365 "[TT]cutoff[tt].[PAR]",
1367 "Monte Carlo: reorder the RMSD matrix using Monte Carlo such that",
1368 "the order of the frames is using the smallest possible increments.",
1369 "With this it is possible to make a smooth animation going from one",
1370 "structure to another with the largest possible (e.g.) RMSD between",
1371 "them, however the intermediate steps should be as small as possible.",
1372 "Applications could be to visualize a potential of mean force",
1373 "ensemble of simulations or a pulling simulation. Obviously the user",
1374 "has to prepare the trajectory well (e.g. by not superimposing frames).",
1375 "The final result can be inspect visually by looking at the matrix",
1376 "[REF].xpm[ref] file, which should vary smoothly from bottom to top.[PAR]",
1378 "diagonalization: diagonalize the RMSD matrix.[PAR]",
1380 "gromos: use algorithm as described in Daura [IT]et al.[it]",
1381 "([IT]Angew. Chem. Int. Ed.[it] [BB]1999[bb], [IT]38[it], pp 236-240).",
1382 "Count number of neighbors using cut-off, take structure with",
1383 "largest number of neighbors with all its neighbors as cluster",
1384 "and eliminate it from the pool of clusters. Repeat for remaining",
1385 "structures in pool.[PAR]",
1387 "When the clustering algorithm assigns each structure to exactly one",
1388 "cluster (single linkage, Jarvis Patrick and gromos) and a trajectory",
1389 "file is supplied, the structure with",
1390 "the smallest average distance to the others or the average structure",
1391 "or all structures for each cluster will be written to a trajectory",
1392 "file. When writing all structures, separate numbered files are made",
1393 "for each cluster.[PAR]",
1395 "Two output files are always written:",
1397 " * [TT]-o[tt] writes the RMSD values in the upper left half of the matrix",
1398 " and a graphical depiction of the clusters in the lower right half",
1399 " When [TT]-minstruct[tt] = 1 the graphical depiction is black",
1400 " when two structures are in the same cluster.",
1401 " When [TT]-minstruct[tt] > 1 different colors will be used for each",
1402 " cluster.",
1403 " * [TT]-g[tt] writes information on the options used and a detailed list",
1404 " of all clusters and their members.",
1407 "Additionally, a number of optional output files can be written:",
1409 " * [TT]-dist[tt] writes the RMSD distribution.",
1410 " * [TT]-ev[tt] writes the eigenvectors of the RMSD matrix",
1411 " diagonalization.",
1412 " * [TT]-sz[tt] writes the cluster sizes.",
1413 " * [TT]-tr[tt] writes a matrix of the number transitions between",
1414 " cluster pairs.",
1415 " * [TT]-ntr[tt] writes the total number of transitions to or from",
1416 " each cluster.",
1417 " * [TT]-clid[tt] writes the cluster number as a function of time.",
1418 " * [TT]-clndx[tt] writes the frame numbers corresponding to the clusters to the",
1419 " specified index file to be read into trjconv.",
1420 " * [TT]-cl[tt] writes average (with option [TT]-av[tt]) or central",
1421 " structure of each cluster or writes numbered files with cluster members",
1422 " for a selected set of clusters (with option [TT]-wcl[tt], depends on",
1423 " [TT]-nst[tt] and [TT]-rmsmin[tt]). The center of a cluster is the",
1424 " structure with the smallest average RMSD from all other structures",
1425 " of the cluster.",
1428 FILE * fp, *log;
1429 int nf = 0, i, i1, i2, j;
1430 int64_t nrms = 0;
1432 matrix box;
1433 matrix* boxes = nullptr;
1434 rvec * xtps, *usextps, *x1, **xx = nullptr;
1435 const char *fn, *trx_out_fn;
1436 t_clusters clust;
1437 t_mat * rms, *orig = nullptr;
1438 real* eigenvalues;
1439 t_topology top;
1440 PbcType pbcType;
1441 t_atoms useatoms;
1442 real* eigenvectors;
1444 int isize = 0, ifsize = 0, iosize = 0;
1445 int * index = nullptr, *fitidx = nullptr, *outidx = nullptr, *frameindices = nullptr;
1446 char* grpname;
1447 real rmsd, **d1, **d2, *time = nullptr, time_invfac, *mass = nullptr;
1448 char buf[STRLEN], buf1[80];
1449 gmx_bool bAnalyze, bUseRmsdCut, bJP_RMSD = FALSE, bReadMat, bReadTraj, bPBC = TRUE;
1451 int method, ncluster = 0;
1452 static const char* methodname[] = { nullptr, "linkage", "jarvis-patrick",
1453 "monte-carlo", "diagonalization", "gromos",
1454 nullptr };
1455 enum
1457 m_null,
1458 m_linkage,
1459 m_jarvis_patrick,
1460 m_monte_carlo,
1461 m_diagonalize,
1462 m_gromos,
1463 m_nr
1465 /* Set colors for plotting: white = zero RMS, black = maximum */
1466 static t_rgb rlo_top = { 1.0, 1.0, 1.0 };
1467 static t_rgb rhi_top = { 0.0, 0.0, 0.0 };
1468 static t_rgb rlo_bot = { 1.0, 1.0, 1.0 };
1469 static t_rgb rhi_bot = { 0.0, 0.0, 1.0 };
1470 static int nlevels = 40, skip = 1;
1471 static real scalemax = -1.0, rmsdcut = 0.1, rmsmin = 0.0;
1472 gmx_bool bRMSdist = FALSE, bBinary = FALSE, bAverage = FALSE, bFit = TRUE;
1473 static int niter = 10000, nrandom = 0, seed = 0, write_ncl = 0, write_nst = 1, minstruct = 1;
1474 static real kT = 1e-3;
1475 static int M = 10, P = 3;
1476 gmx_output_env_t* oenv;
1477 gmx_rmpbc_t gpbc = nullptr;
1479 t_pargs pa[] = {
1480 { "-dista", FALSE, etBOOL, { &bRMSdist }, "Use RMSD of distances instead of RMS deviation" },
1481 { "-nlevels",
1482 FALSE,
1483 etINT,
1484 { &nlevels },
1485 "Discretize RMSD matrix in this number of levels" },
1486 { "-cutoff",
1487 FALSE,
1488 etREAL,
1489 { &rmsdcut },
1490 "RMSD cut-off (nm) for two structures to be neighbor" },
1491 { "-fit", FALSE, etBOOL, { &bFit }, "Use least squares fitting before RMSD calculation" },
1492 { "-max", FALSE, etREAL, { &scalemax }, "Maximum level in RMSD matrix" },
1493 { "-skip", FALSE, etINT, { &skip }, "Only analyze every nr-th frame" },
1494 { "-av",
1495 FALSE,
1496 etBOOL,
1497 { &bAverage },
1498 "Write average instead of middle structure for each cluster" },
1499 { "-wcl",
1500 FALSE,
1501 etINT,
1502 { &write_ncl },
1503 "Write the structures for this number of clusters to numbered files" },
1504 { "-nst",
1505 FALSE,
1506 etINT,
1507 { &write_nst },
1508 "Only write all structures if more than this number of structures per cluster" },
1509 { "-rmsmin",
1510 FALSE,
1511 etREAL,
1512 { &rmsmin },
1513 "minimum rms difference with rest of cluster for writing structures" },
1514 { "-method", FALSE, etENUM, { methodname }, "Method for cluster determination" },
1515 { "-minstruct",
1516 FALSE,
1517 etINT,
1518 { &minstruct },
1519 "Minimum number of structures in cluster for coloring in the [REF].xpm[ref] file" },
1520 { "-binary",
1521 FALSE,
1522 etBOOL,
1523 { &bBinary },
1524 "Treat the RMSD matrix as consisting of 0 and 1, where the cut-off "
1525 "is given by [TT]-cutoff[tt]" },
1526 { "-M",
1527 FALSE,
1528 etINT,
1529 { &M },
1530 "Number of nearest neighbors considered for Jarvis-Patrick algorithm, "
1531 "0 is use cutoff" },
1532 { "-P",
1533 FALSE,
1534 etINT,
1535 { &P },
1536 "Number of identical nearest neighbors required to form a cluster" },
1537 { "-seed",
1538 FALSE,
1539 etINT,
1540 { &seed },
1541 "Random number seed for Monte Carlo clustering algorithm (0 means generate)" },
1542 { "-niter", FALSE, etINT, { &niter }, "Number of iterations for MC" },
1543 { "-nrandom",
1544 FALSE,
1545 etINT,
1546 { &nrandom },
1547 "The first iterations for MC may be done complete random, to shuffle the frames" },
1548 { "-kT",
1549 FALSE,
1550 etREAL,
1551 { &kT },
1552 "Boltzmann weighting factor for Monte Carlo optimization "
1553 "(zero turns off uphill steps)" },
1554 { "-pbc", FALSE, etBOOL, { &bPBC }, "PBC check" }
1556 t_filenm fnm[] = {
1557 { efTRX, "-f", nullptr, ffOPTRD }, { efTPS, "-s", nullptr, ffREAD },
1558 { efNDX, nullptr, nullptr, ffOPTRD }, { efXPM, "-dm", "rmsd", ffOPTRD },
1559 { efXPM, "-om", "rmsd-raw", ffWRITE }, { efXPM, "-o", "rmsd-clust", ffWRITE },
1560 { efLOG, "-g", "cluster", ffWRITE }, { efXVG, "-dist", "rmsd-dist", ffOPTWR },
1561 { efXVG, "-ev", "rmsd-eig", ffOPTWR }, { efXVG, "-conv", "mc-conv", ffOPTWR },
1562 { efXVG, "-sz", "clust-size", ffOPTWR }, { efXPM, "-tr", "clust-trans", ffOPTWR },
1563 { efXVG, "-ntr", "clust-trans", ffOPTWR }, { efXVG, "-clid", "clust-id", ffOPTWR },
1564 { efTRX, "-cl", "clusters.pdb", ffOPTWR }, { efNDX, "-clndx", "clusters.ndx", ffOPTWR }
1566 #define NFILE asize(fnm)
1568 if (!parse_common_args(&argc, argv, PCA_CAN_VIEW | PCA_CAN_TIME | PCA_TIME_UNIT, NFILE, fnm,
1569 asize(pa), pa, asize(desc), desc, 0, nullptr, &oenv))
1571 return 0;
1574 /* parse options */
1575 bReadMat = opt2bSet("-dm", NFILE, fnm);
1576 bReadTraj = opt2bSet("-f", NFILE, fnm) || !bReadMat;
1577 if (opt2parg_bSet("-av", asize(pa), pa) || opt2parg_bSet("-wcl", asize(pa), pa)
1578 || opt2parg_bSet("-nst", asize(pa), pa) || opt2parg_bSet("-rmsmin", asize(pa), pa)
1579 || opt2bSet("-cl", NFILE, fnm))
1581 trx_out_fn = opt2fn("-cl", NFILE, fnm);
1583 else
1585 trx_out_fn = nullptr;
1587 if (bReadMat && output_env_get_time_factor(oenv) != 1)
1589 fprintf(stderr, "\nWarning: assuming the time unit in %s is %s\n",
1590 opt2fn("-dm", NFILE, fnm), output_env_get_time_unit(oenv).c_str());
1592 if (trx_out_fn && !bReadTraj)
1594 fprintf(stderr,
1595 "\nWarning: "
1596 "cannot write cluster structures without reading trajectory\n"
1597 " ignoring option -cl %s\n",
1598 trx_out_fn);
1601 method = 1;
1602 while (method < m_nr && gmx_strcasecmp(methodname[0], methodname[method]) != 0)
1604 method++;
1606 if (method == m_nr)
1608 gmx_fatal(FARGS, "Invalid method");
1611 bAnalyze = (method == m_linkage || method == m_jarvis_patrick || method == m_gromos);
1613 /* Open log file */
1614 log = ftp2FILE(efLOG, NFILE, fnm, "w");
1616 fprintf(stderr, "Using %s method for clustering\n", methodname[0]);
1617 fprintf(log, "Using %s method for clustering\n", methodname[0]);
1619 /* check input and write parameters to log file */
1620 bUseRmsdCut = FALSE;
1621 if (method == m_jarvis_patrick)
1623 bJP_RMSD = (M == 0) || opt2parg_bSet("-cutoff", asize(pa), pa);
1624 if ((M < 0) || (M == 1))
1626 gmx_fatal(FARGS, "M (%d) must be 0 or larger than 1", M);
1628 if (M < 2)
1630 sprintf(buf1, "Will use P=%d and RMSD cutoff (%g)", P, rmsdcut);
1631 bUseRmsdCut = TRUE;
1633 else
1635 if (P >= M)
1637 gmx_fatal(FARGS, "Number of neighbors required (P) must be less than M");
1639 if (bJP_RMSD)
1641 sprintf(buf1, "Will use P=%d, M=%d and RMSD cutoff (%g)", P, M, rmsdcut);
1642 bUseRmsdCut = TRUE;
1644 else
1646 sprintf(buf1, "Will use P=%d, M=%d", P, M);
1649 ffprintf_s(stderr, log, buf, "%s for determining the neighbors\n\n", buf1);
1651 else /* method != m_jarvis */
1653 bUseRmsdCut = (bBinary || method == m_linkage || method == m_gromos);
1655 if (bUseRmsdCut && method != m_jarvis_patrick)
1657 fprintf(log, "Using RMSD cutoff %g nm\n", rmsdcut);
1659 if (method == m_monte_carlo)
1661 fprintf(log, "Using %d iterations\n", niter);
1664 if (skip < 1)
1666 gmx_fatal(FARGS, "skip (%d) should be >= 1", skip);
1669 /* get input */
1670 if (bReadTraj)
1672 /* don't read mass-database as masses (and top) are not used */
1673 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &pbcType, &xtps, nullptr, box, TRUE);
1674 if (bPBC)
1676 gpbc = gmx_rmpbc_init(&top.idef, pbcType, top.atoms.nr);
1679 fprintf(stderr, "\nSelect group for least squares fit%s:\n", bReadMat ? "" : " and RMSD calculation");
1680 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm), 1, &ifsize, &fitidx, &grpname);
1681 if (trx_out_fn)
1683 fprintf(stderr, "\nSelect group for output:\n");
1684 get_index(&(top.atoms), ftp2fn_null(efNDX, NFILE, fnm), 1, &iosize, &outidx, &grpname);
1685 /* merge and convert both index groups: */
1686 /* first copy outidx to index. let outidx refer to elements in index */
1687 snew(index, iosize);
1688 isize = iosize;
1689 for (i = 0; i < iosize; i++)
1691 index[i] = outidx[i];
1692 outidx[i] = i;
1694 /* now lookup elements from fitidx in index, add them if necessary
1695 and also let fitidx refer to elements in index */
1696 for (i = 0; i < ifsize; i++)
1698 j = 0;
1699 while (j < isize && index[j] != fitidx[i])
1701 j++;
1703 if (j >= isize)
1705 /* slow this way, but doesn't matter much */
1706 isize++;
1707 srenew(index, isize);
1709 index[j] = fitidx[i];
1710 fitidx[i] = j;
1713 else /* !trx_out_fn */
1715 isize = ifsize;
1716 snew(index, isize);
1717 for (i = 0; i < ifsize; i++)
1719 index[i] = fitidx[i];
1720 fitidx[i] = i;
1725 if (bReadTraj)
1727 /* Loop over first coordinate file */
1728 fn = opt2fn("-f", NFILE, fnm);
1730 xx = read_whole_trj(fn, isize, index, skip, &nf, &time, &boxes, &frameindices, oenv, bPBC, gpbc);
1731 output_env_conv_times(oenv, nf, time);
1732 if (!bRMSdist || bAnalyze)
1734 /* Center all frames on zero */
1735 snew(mass, isize);
1736 for (i = 0; i < ifsize; i++)
1738 mass[fitidx[i]] = top.atoms.atom[index[fitidx[i]]].m;
1740 if (bFit)
1742 for (i = 0; i < nf; i++)
1744 reset_x(ifsize, fitidx, isize, nullptr, xx[i], mass);
1748 if (bPBC)
1750 gmx_rmpbc_done(gpbc);
1754 std::vector<t_matrix> readmat;
1755 if (bReadMat)
1757 fprintf(stderr, "Reading rms distance matrix ");
1758 readmat = read_xpm_matrix(opt2fn("-dm", NFILE, fnm));
1759 fprintf(stderr, "\n");
1760 if (readmat[0].nx != readmat[0].ny)
1762 gmx_fatal(FARGS, "Matrix (%dx%d) is not square", readmat[0].nx, readmat[0].ny);
1764 if (bReadTraj && bAnalyze && (readmat[0].nx != nf))
1766 gmx_fatal(FARGS,
1767 "Matrix size (%dx%d) does not match the number of "
1768 "frames (%d)",
1769 readmat[0].nx, readmat[0].ny, nf);
1772 nf = readmat[0].nx;
1773 sfree(time);
1774 time = readmat[0].axis_x.data();
1775 time_invfac = output_env_get_time_invfactor(oenv);
1776 for (i = 0; i < nf; i++)
1778 time[i] *= time_invfac;
1781 rms = init_mat(readmat[0].nx, method == m_diagonalize);
1782 convert_mat(&(readmat[0]), rms);
1784 nlevels = gmx::ssize(readmat[0].map);
1786 else /* !bReadMat */
1788 rms = init_mat(nf, method == m_diagonalize);
1789 nrms = (static_cast<int64_t>(nf) * static_cast<int64_t>(nf - 1)) / 2;
1790 if (!bRMSdist)
1792 fprintf(stderr, "Computing %dx%d RMS deviation matrix\n", nf, nf);
1793 /* Initialize work array */
1794 snew(x1, isize);
1795 for (i1 = 0; i1 < nf; i1++)
1797 for (i2 = i1 + 1; i2 < nf; i2++)
1799 for (i = 0; i < isize; i++)
1801 copy_rvec(xx[i1][i], x1[i]);
1803 if (bFit)
1805 do_fit(isize, mass, xx[i2], x1);
1807 rmsd = rmsdev(isize, mass, xx[i2], x1);
1808 set_mat_entry(rms, i1, i2, rmsd);
1810 nrms -= nf - i1 - 1;
1811 fprintf(stderr,
1812 "\r# RMSD calculations left: "
1813 "%" PRId64 " ",
1814 nrms);
1815 fflush(stderr);
1817 sfree(x1);
1819 else /* bRMSdist */
1821 fprintf(stderr, "Computing %dx%d RMS distance deviation matrix\n", nf, nf);
1823 /* Initiate work arrays */
1824 snew(d1, isize);
1825 snew(d2, isize);
1826 for (i = 0; (i < isize); i++)
1828 snew(d1[i], isize);
1829 snew(d2[i], isize);
1831 for (i1 = 0; i1 < nf; i1++)
1833 calc_dist(isize, xx[i1], d1);
1834 for (i2 = i1 + 1; (i2 < nf); i2++)
1836 calc_dist(isize, xx[i2], d2);
1837 set_mat_entry(rms, i1, i2, rms_dist(isize, d1, d2));
1839 nrms -= nf - i1 - 1;
1840 fprintf(stderr,
1841 "\r# RMSD calculations left: "
1842 "%" PRId64 " ",
1843 nrms);
1844 fflush(stderr);
1846 /* Clean up work arrays */
1847 for (i = 0; (i < isize); i++)
1849 sfree(d1[i]);
1850 sfree(d2[i]);
1852 sfree(d1);
1853 sfree(d2);
1855 fprintf(stderr, "\n\n");
1857 ffprintf_gg(stderr, log, buf, "The RMSD ranges from %g to %g nm\n", rms->minrms, rms->maxrms);
1858 ffprintf_g(stderr, log, buf, "Average RMSD is %g\n", 2 * rms->sumrms / (nf * (nf - 1)));
1859 ffprintf_d(stderr, log, buf, "Number of structures for matrix %d\n", nf);
1860 ffprintf_g(stderr, log, buf, "Energy of the matrix is %g.\n", mat_energy(rms));
1861 if (bUseRmsdCut && (rmsdcut < rms->minrms || rmsdcut > rms->maxrms))
1863 fprintf(stderr,
1864 "WARNING: rmsd cutoff %g is outside range of rmsd values "
1865 "%g to %g\n",
1866 rmsdcut, rms->minrms, rms->maxrms);
1868 if (bAnalyze && (rmsmin < rms->minrms))
1870 fprintf(stderr, "WARNING: rmsd minimum %g is below lowest rmsd value %g\n", rmsmin, rms->minrms);
1872 if (bAnalyze && (rmsmin > rmsdcut))
1874 fprintf(stderr, "WARNING: rmsd minimum %g is above rmsd cutoff %g\n", rmsmin, rmsdcut);
1877 /* Plot the rmsd distribution */
1878 rmsd_distribution(opt2fn("-dist", NFILE, fnm), rms, oenv);
1880 if (bBinary)
1882 for (i1 = 0; (i1 < nf); i1++)
1884 for (i2 = 0; (i2 < nf); i2++)
1886 if (rms->mat[i1][i2] < rmsdcut)
1888 rms->mat[i1][i2] = 0;
1890 else
1892 rms->mat[i1][i2] = 1;
1898 snew(clust.cl, nf);
1899 switch (method)
1901 case m_linkage:
1902 /* Now sort the matrix and write it out again */
1903 gather(rms, rmsdcut, &clust);
1904 break;
1905 case m_diagonalize:
1906 /* Do a diagonalization */
1907 snew(eigenvalues, nf);
1908 snew(eigenvectors, nf * nf);
1909 std::memcpy(eigenvectors, rms->mat[0], nf * nf * sizeof(real));
1910 eigensolver(eigenvectors, nf, 0, nf, eigenvalues, rms->mat[0]);
1911 sfree(eigenvectors);
1913 fp = xvgropen(opt2fn("-ev", NFILE, fnm), "RMSD matrix Eigenvalues", "Eigenvector index",
1914 "Eigenvalues (nm\\S2\\N)", oenv);
1915 for (i = 0; (i < nf); i++)
1917 fprintf(fp, "%10d %10g\n", i, eigenvalues[i]);
1919 xvgrclose(fp);
1920 break;
1921 case m_monte_carlo:
1922 orig = init_mat(rms->nn, FALSE);
1923 orig->nn = rms->nn;
1924 copy_t_mat(orig, rms);
1925 mc_optimize(log, rms, time, niter, nrandom, seed, kT, opt2fn_null("-conv", NFILE, fnm), oenv);
1926 break;
1927 case m_jarvis_patrick:
1928 jarvis_patrick(rms->nn, rms->mat, M, P, bJP_RMSD ? rmsdcut : -1, &clust);
1929 break;
1930 case m_gromos: gromos(rms->nn, rms->mat, rmsdcut, &clust); break;
1931 default: gmx_fatal(FARGS, "DEATH HORROR unknown method \"%s\"", methodname[0]);
1934 if (method == m_monte_carlo || method == m_diagonalize)
1936 fprintf(stderr, "Energy of the matrix after clustering is %g.\n", mat_energy(rms));
1939 if (bAnalyze)
1941 if (minstruct > 1)
1943 ncluster = plot_clusters(nf, rms->mat, &clust, minstruct);
1945 else
1947 mark_clusters(nf, rms->mat, rms->maxrms, &clust);
1949 init_t_atoms(&useatoms, isize, FALSE);
1950 snew(usextps, isize);
1951 useatoms.resinfo = top.atoms.resinfo;
1952 for (i = 0; i < isize; i++)
1954 useatoms.atomname[i] = top.atoms.atomname[index[i]];
1955 useatoms.atom[i].resind = top.atoms.atom[index[i]].resind;
1956 useatoms.nres = std::max(useatoms.nres, useatoms.atom[i].resind + 1);
1957 copy_rvec(xtps[index[i]], usextps[i]);
1959 useatoms.nr = isize;
1960 analyze_clusters(nf, &clust, rms->mat, isize, &useatoms, usextps, mass, xx, time, boxes,
1961 frameindices, ifsize, fitidx, iosize, outidx,
1962 bReadTraj ? trx_out_fn : nullptr, opt2fn_null("-sz", NFILE, fnm),
1963 opt2fn_null("-tr", NFILE, fnm), opt2fn_null("-ntr", NFILE, fnm),
1964 opt2fn_null("-clid", NFILE, fnm), opt2fn_null("-clndx", NFILE, fnm),
1965 bAverage, write_ncl, write_nst, rmsmin, bFit, log, rlo_bot, rhi_bot, oenv);
1966 sfree(boxes);
1967 sfree(frameindices);
1969 gmx_ffclose(log);
1971 if (bBinary && !bAnalyze)
1973 /* Make the clustering visible */
1974 for (i2 = 0; (i2 < nf); i2++)
1976 for (i1 = i2 + 1; (i1 < nf); i1++)
1978 if (rms->mat[i1][i2] != 0.0F)
1980 rms->mat[i1][i2] = rms->maxrms;
1986 fp = opt2FILE("-o", NFILE, fnm, "w");
1987 fprintf(stderr, "Writing rms distance/clustering matrix ");
1988 if (bReadMat)
1990 write_xpm(fp, 0, readmat[0].title, readmat[0].legend, readmat[0].label_x,
1991 readmat[0].label_y, nf, nf, readmat[0].axis_x.data(), readmat[0].axis_y.data(),
1992 rms->mat, 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
1994 else
1996 auto timeLabel = output_env_get_time_label(oenv);
1997 auto title = gmx::formatString("RMS%sDeviation / Cluster Index", bRMSdist ? " Distance " : " ");
1998 if (minstruct > 1)
2000 write_xpm_split(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel, nf, nf, time, time,
2001 rms->mat, 0.0, rms->maxrms, &nlevels, rlo_top, rhi_top, 0.0, ncluster,
2002 &ncluster, TRUE, rlo_bot, rhi_bot);
2004 else
2006 write_xpm(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel, nf, nf, time, time, rms->mat,
2007 0.0, rms->maxrms, rlo_top, rhi_top, &nlevels);
2010 fprintf(stderr, "\n");
2011 gmx_ffclose(fp);
2012 if (nullptr != orig)
2014 fp = opt2FILE("-om", NFILE, fnm, "w");
2015 auto timeLabel = output_env_get_time_label(oenv);
2016 auto title = gmx::formatString("RMS%sDeviation", bRMSdist ? " Distance " : " ");
2017 write_xpm(fp, 0, title, "RMSD (nm)", timeLabel, timeLabel, nf, nf, time, time, orig->mat,
2018 0.0, orig->maxrms, rlo_top, rhi_top, &nlevels);
2019 gmx_ffclose(fp);
2020 done_mat(&orig);
2021 sfree(orig);
2023 /* now show what we've done */
2024 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
2025 do_view(oenv, opt2fn_null("-sz", NFILE, fnm), "-nxy");
2026 if (method == m_diagonalize)
2028 do_view(oenv, opt2fn_null("-ev", NFILE, fnm), "-nxy");
2030 do_view(oenv, opt2fn("-dist", NFILE, fnm), "-nxy");
2031 if (bAnalyze)
2033 do_view(oenv, opt2fn_null("-tr", NFILE, fnm), "-nxy");
2034 do_view(oenv, opt2fn_null("-ntr", NFILE, fnm), "-nxy");
2035 do_view(oenv, opt2fn_null("-clid", NFILE, fnm), "-nxy");
2037 do_view(oenv, opt2fn_null("-conv", NFILE, fnm), nullptr);
2039 return 0;