Split lines with many copyright years
[gromacs.git] / src / gromacs / gmxana / gmx_rmsf.cpp
blob1884a9f6a7853f8f5f8772b66f6d9399cbfd0bb1
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
7 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
38 #include "gmxpre.h"
40 #include <cassert>
41 #include <cmath>
42 #include <cstring>
44 #include "gromacs/commandline/pargs.h"
45 #include "gromacs/commandline/viewit.h"
46 #include "gromacs/fileio/confio.h"
47 #include "gromacs/fileio/pdbio.h"
48 #include "gromacs/fileio/trxio.h"
49 #include "gromacs/fileio/xvgr.h"
50 #include "gromacs/gmxana/gmx_ana.h"
51 #include "gromacs/gmxana/princ.h"
52 #include "gromacs/linearalgebra/eigensolver.h"
53 #include "gromacs/math/do_fit.h"
54 #include "gromacs/math/functions.h"
55 #include "gromacs/math/utilities.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/pbcutil/rmpbc.h"
58 #include "gromacs/topology/index.h"
59 #include "gromacs/topology/topology.h"
60 #include "gromacs/utility/arraysize.h"
61 #include "gromacs/utility/cstringutil.h"
62 #include "gromacs/utility/futil.h"
63 #include "gromacs/utility/smalloc.h"
65 static real find_pdb_bfac(const t_atoms* atoms, t_resinfo* ri, char* atomnm)
67 char rresnm[8];
68 int i;
70 std::strcpy(rresnm, *ri->name);
71 rresnm[3] = '\0';
72 for (i = 0; (i < atoms->nr); i++)
74 if ((ri->nr == atoms->resinfo[atoms->atom[i].resind].nr)
75 && (ri->ic == atoms->resinfo[atoms->atom[i].resind].ic)
76 && (std::strcmp(*atoms->resinfo[atoms->atom[i].resind].name, rresnm) == 0)
77 && (std::strstr(*atoms->atomname[i], atomnm) != nullptr))
79 break;
82 if (i == atoms->nr)
84 fprintf(stderr, "\rCan not find %s%d-%s in pdbfile\n", rresnm, ri->nr, atomnm);
85 fflush(stderr);
86 return 0.0;
89 return atoms->pdbinfo[i].bfac;
92 static void correlate_aniso(const char* fn, t_atoms* ref, t_atoms* calc, const gmx_output_env_t* oenv)
94 FILE* fp;
95 int i, j;
97 fp = xvgropen(fn, "Correlation between X-Ray and Computed Uij", "X-Ray", "Computed", oenv);
98 for (i = 0; (i < ref->nr); i++)
100 if (ref->pdbinfo[i].bAnisotropic)
102 for (j = U11; (j <= U23); j++)
104 fprintf(fp, "%10d %10d\n", ref->pdbinfo[i].uij[j], calc->pdbinfo[i].uij[j]);
108 xvgrclose(fp);
111 static void average_residues(double f[],
112 double** U,
113 int uind,
114 int isize,
115 const int index[],
116 const real w_rls[],
117 const t_atoms* atoms)
119 int i, j, start;
120 double av, m;
122 start = 0;
123 av = 0;
124 m = 0;
125 if (!f)
127 assert(U);
129 for (i = 0; i < isize; i++)
131 av += w_rls[index[i]] * (f != nullptr ? f[i] : U[i][uind]);
132 m += w_rls[index[i]];
133 if (i + 1 == isize || atoms->atom[index[i]].resind != atoms->atom[index[i + 1]].resind)
135 av /= m;
136 if (f != nullptr)
138 for (j = start; j <= i; j++)
140 f[i] = av;
143 else
145 for (j = start; j <= i; j++)
147 U[j][uind] = av;
150 start = i + 1;
151 av = 0;
152 m = 0;
157 static void print_dir(FILE* fp, real* Uaver)
159 real eigvec[DIM * DIM];
160 real tmp[DIM * DIM];
161 rvec eigval;
162 int d, m;
164 fprintf(fp, "MSF X Y Z\n");
165 for (d = 0; d < DIM; d++)
167 fprintf(fp, " %c ", 'X' + d - XX);
168 for (m = 0; m < DIM; m++)
170 fprintf(fp, " %9.2e", Uaver[3 * m + d]);
172 fprintf(fp, "%s\n", m == DIM ? " (nm^2)" : "");
175 for (m = 0; m < DIM * DIM; m++)
177 tmp[m] = Uaver[m];
181 eigensolver(tmp, DIM, 0, DIM, eigval, eigvec);
183 fprintf(fp, "\n Eigenvectors\n\n");
184 fprintf(fp, "Eigv %-8.2e %-8.2e %-8.2e (nm^2)\n\n", eigval[2], eigval[1], eigval[0]);
185 for (d = 0; d < DIM; d++)
187 fprintf(fp, " %c ", 'X' + d - XX);
188 for (m = DIM - 1; m >= 0; m--)
190 fprintf(fp, "%7.4f ", eigvec[3 * m + d]);
192 fprintf(fp, "\n");
196 int gmx_rmsf(int argc, char* argv[])
198 const char* desc[] = {
199 "[THISMODULE] computes the root mean square fluctuation (RMSF, i.e. standard ",
200 "deviation) of atomic positions in the trajectory (supplied with [TT]-f[tt])",
201 "after (optionally) fitting to a reference frame (supplied with [TT]-s[tt]).[PAR]",
202 "With option [TT]-oq[tt] the RMSF values are converted to B-factor",
203 "values, which are written to a [REF].pdb[ref] file. By default, the coordinates",
204 "in this output file are taken from the structure file provided with [TT]-s[tt],"
205 "although you can also use coordinates read from a different [REF].pdb[ref] file"
206 "provided with [TT]-q[tt]. There is very little error checking, so in this case"
207 "it is your responsibility to make sure all atoms in the structure file"
208 "and [REF].pdb[ref] file correspond exactly to each other.[PAR]",
209 "Option [TT]-ox[tt] writes the B-factors to a file with the average",
210 "coordinates in the trajectory.[PAR]",
211 "With the option [TT]-od[tt] the root mean square deviation with",
212 "respect to the reference structure is calculated.[PAR]",
213 "With the option [TT]-aniso[tt], [THISMODULE] will compute anisotropic",
214 "temperature factors and then it will also output average coordinates",
215 "and a [REF].pdb[ref] file with ANISOU records (corresonding to the [TT]-oq[tt]",
216 "or [TT]-ox[tt] option). Please note that the U values",
217 "are orientation-dependent, so before comparison with experimental data",
218 "you should verify that you fit to the experimental coordinates.[PAR]",
219 "When a [REF].pdb[ref] input file is passed to the program and the [TT]-aniso[tt]",
220 "flag is set",
221 "a correlation plot of the Uij will be created, if any anisotropic",
222 "temperature factors are present in the [REF].pdb[ref] file.[PAR]",
223 "With option [TT]-dir[tt] the average MSF (3x3) matrix is diagonalized.",
224 "This shows the directions in which the atoms fluctuate the most and",
225 "the least."
227 static gmx_bool bRes = FALSE, bAniso = FALSE, bFit = TRUE;
228 t_pargs pargs[] = {
229 { "-res", FALSE, etBOOL, { &bRes }, "Calculate averages for each residue" },
230 { "-aniso", FALSE, etBOOL, { &bAniso }, "Compute anisotropic termperature factors" },
231 { "-fit",
232 FALSE,
233 etBOOL,
234 { &bFit },
235 "Do a least squares superposition before computing RMSF. Without this you must "
236 "make sure that the reference structure and the trajectory match." }
238 int natom;
239 int i, m, teller = 0;
240 real t, *w_rls;
242 t_topology top;
243 int ePBC;
244 t_atoms * pdbatoms, *refatoms;
246 matrix box, pdbbox;
247 rvec * x, *pdbx, *xref;
248 t_trxstatus* status;
249 const char* label;
251 FILE* fp; /* the graphics file */
252 const char *devfn, *dirfn;
253 int resind;
255 gmx_bool bReadPDB;
256 int* index;
257 int isize;
258 char* grpnames;
260 real bfac, pdb_bfac, *Uaver;
261 double ** U, *xav;
262 int aid;
263 rvec* rmsd_x = nullptr;
264 double * rmsf, invcount, totmass;
265 int d;
266 real count = 0;
267 rvec xcm;
268 gmx_rmpbc_t gpbc = nullptr;
270 gmx_output_env_t* oenv;
272 const char* leg[2] = { "MD", "X-Ray" };
274 t_filenm fnm[] = { { efTRX, "-f", nullptr, ffREAD }, { efTPS, nullptr, nullptr, ffREAD },
275 { efNDX, nullptr, nullptr, ffOPTRD }, { efPDB, "-q", nullptr, ffOPTRD },
276 { efPDB, "-oq", "bfac", ffOPTWR }, { efPDB, "-ox", "xaver", ffOPTWR },
277 { efXVG, "-o", "rmsf", ffWRITE }, { efXVG, "-od", "rmsdev", ffOPTWR },
278 { efXVG, "-oc", "correl", ffOPTWR }, { efLOG, "-dir", "rmsf", ffOPTWR } };
279 #define NFILE asize(fnm)
281 if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW, NFILE, fnm, asize(pargs),
282 pargs, asize(desc), desc, 0, nullptr, &oenv))
284 return 0;
287 bReadPDB = ftp2bSet(efPDB, NFILE, fnm);
288 devfn = opt2fn_null("-od", NFILE, fnm);
289 dirfn = opt2fn_null("-dir", NFILE, fnm);
291 read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xref, nullptr, box, TRUE);
292 const char* title = *top.name;
293 snew(w_rls, top.atoms.nr);
295 fprintf(stderr, "Select group(s) for root mean square calculation\n");
296 get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &isize, &index, &grpnames);
298 /* Set the weight */
299 for (i = 0; i < isize; i++)
301 w_rls[index[i]] = top.atoms.atom[index[i]].m;
304 /* Malloc the rmsf arrays */
305 snew(xav, isize * DIM);
306 snew(U, isize);
307 for (i = 0; i < isize; i++)
309 snew(U[i], DIM * DIM);
311 snew(rmsf, isize);
312 if (devfn)
314 snew(rmsd_x, isize);
317 if (bReadPDB)
319 t_topology* top_pdb;
320 snew(top_pdb, 1);
321 /* Read coordinates twice */
322 read_tps_conf(opt2fn("-q", NFILE, fnm), top_pdb, nullptr, nullptr, nullptr, pdbbox, FALSE);
323 snew(pdbatoms, 1);
324 *pdbatoms = top_pdb->atoms;
325 read_tps_conf(opt2fn("-q", NFILE, fnm), top_pdb, nullptr, &pdbx, nullptr, pdbbox, FALSE);
326 /* TODO Should this assert that top_pdb->atoms.nr == top.atoms.nr?
327 * See discussion at https://gerrit.gromacs.org/#/c/6430/1 */
328 title = *top_pdb->name;
329 snew(refatoms, 1);
330 *refatoms = top_pdb->atoms;
331 sfree(top_pdb);
333 else
335 pdbatoms = &top.atoms;
336 refatoms = &top.atoms;
337 pdbx = xref;
338 snew(pdbatoms->pdbinfo, pdbatoms->nr);
339 pdbatoms->havePdbInfo = TRUE;
340 copy_mat(box, pdbbox);
343 if (bFit)
345 sub_xcm(xref, isize, index, top.atoms.atom, xcm, FALSE);
348 natom = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
350 if (bFit)
352 gpbc = gmx_rmpbc_init(&top.idef, ePBC, natom);
355 /* Now read the trj again to compute fluctuations */
356 teller = 0;
359 if (bFit)
361 /* Remove periodic boundary */
362 gmx_rmpbc(gpbc, natom, box, x);
364 /* Set center of mass to zero */
365 sub_xcm(x, isize, index, top.atoms.atom, xcm, FALSE);
367 /* Fit to reference structure */
368 do_fit(natom, w_rls, xref, x);
371 /* Calculate Anisotropic U Tensor */
372 for (i = 0; i < isize; i++)
374 aid = index[i];
375 for (d = 0; d < DIM; d++)
377 xav[i * DIM + d] += x[aid][d];
378 for (m = 0; m < DIM; m++)
380 U[i][d * DIM + m] += x[aid][d] * x[aid][m];
385 if (devfn)
387 /* Calculate RMS Deviation */
388 for (i = 0; (i < isize); i++)
390 aid = index[i];
391 for (d = 0; (d < DIM); d++)
393 rmsd_x[i][d] += gmx::square(x[aid][d] - xref[aid][d]);
397 count += 1.0;
398 teller++;
399 } while (read_next_x(oenv, status, &t, x, box));
400 close_trx(status);
402 if (bFit)
404 gmx_rmpbc_done(gpbc);
408 invcount = 1.0 / count;
409 snew(Uaver, DIM * DIM);
410 totmass = 0;
411 for (i = 0; i < isize; i++)
413 for (d = 0; d < DIM; d++)
415 xav[i * DIM + d] *= invcount;
417 for (d = 0; d < DIM; d++)
419 for (m = 0; m < DIM; m++)
421 U[i][d * DIM + m] = U[i][d * DIM + m] * invcount - xav[i * DIM + d] * xav[i * DIM + m];
422 Uaver[3 * d + m] += top.atoms.atom[index[i]].m * U[i][d * DIM + m];
425 totmass += top.atoms.atom[index[i]].m;
427 for (d = 0; d < DIM * DIM; d++)
429 Uaver[d] /= totmass;
432 if (bRes)
434 for (d = 0; d < DIM * DIM; d++)
436 average_residues(nullptr, U, d, isize, index, w_rls, &top.atoms);
440 if (bAniso)
442 for (i = 0; i < isize; i++)
444 aid = index[i];
445 pdbatoms->pdbinfo[aid].bAnisotropic = TRUE;
446 pdbatoms->pdbinfo[aid].uij[U11] = static_cast<int>(1e6 * U[i][XX * DIM + XX]);
447 pdbatoms->pdbinfo[aid].uij[U22] = static_cast<int>(1e6 * U[i][YY * DIM + YY]);
448 pdbatoms->pdbinfo[aid].uij[U33] = static_cast<int>(1e6 * U[i][ZZ * DIM + ZZ]);
449 pdbatoms->pdbinfo[aid].uij[U12] = static_cast<int>(1e6 * U[i][XX * DIM + YY]);
450 pdbatoms->pdbinfo[aid].uij[U13] = static_cast<int>(1e6 * U[i][XX * DIM + ZZ]);
451 pdbatoms->pdbinfo[aid].uij[U23] = static_cast<int>(1e6 * U[i][YY * DIM + ZZ]);
454 if (bRes)
456 label = "Residue";
458 else
460 label = "Atom";
463 for (i = 0; i < isize; i++)
465 rmsf[i] = U[i][XX * DIM + XX] + U[i][YY * DIM + YY] + U[i][ZZ * DIM + ZZ];
468 if (dirfn)
470 fprintf(stdout, "\n");
471 print_dir(stdout, Uaver);
472 fp = gmx_ffopen(dirfn, "w");
473 print_dir(fp, Uaver);
474 gmx_ffclose(fp);
477 for (i = 0; i < isize; i++)
479 sfree(U[i]);
481 sfree(U);
483 /* Write RMSF output */
484 if (bReadPDB)
486 bfac = 8.0 * M_PI * M_PI / 3.0 * 100;
487 fp = xvgropen(ftp2fn(efXVG, NFILE, fnm), "B-Factors", label, "(A\\b\\S\\So\\N\\S2\\N)", oenv);
488 xvgr_legend(fp, 2, leg, oenv);
489 for (i = 0; (i < isize); i++)
491 if (!bRes || i + 1 == isize
492 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
494 resind = top.atoms.atom[index[i]].resind;
495 pdb_bfac = find_pdb_bfac(pdbatoms, &top.atoms.resinfo[resind],
496 *(top.atoms.atomname[index[i]]));
498 fprintf(fp, "%5d %10.5f %10.5f\n",
499 bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
500 rmsf[i] * bfac, pdb_bfac);
503 xvgrclose(fp);
505 else
507 fp = xvgropen(ftp2fn(efXVG, NFILE, fnm), "RMS fluctuation", label, "(nm)", oenv);
508 for (i = 0; i < isize; i++)
510 if (!bRes || i + 1 == isize
511 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
513 fprintf(fp, "%5d %8.4f\n",
514 bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
515 std::sqrt(rmsf[i]));
518 xvgrclose(fp);
521 for (i = 0; i < isize; i++)
523 pdbatoms->pdbinfo[index[i]].bfac = 800 * M_PI * M_PI / 3.0 * rmsf[i];
526 if (devfn)
528 for (i = 0; i < isize; i++)
530 rmsf[i] = (rmsd_x[i][XX] + rmsd_x[i][YY] + rmsd_x[i][ZZ]) / count;
532 if (bRes)
534 average_residues(rmsf, nullptr, 0, isize, index, w_rls, &top.atoms);
536 /* Write RMSD output */
537 fp = xvgropen(devfn, "RMS Deviation", label, "(nm)", oenv);
538 for (i = 0; i < isize; i++)
540 if (!bRes || i + 1 == isize
541 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
543 fprintf(fp, "%5d %8.4f\n",
544 bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
545 std::sqrt(rmsf[i]));
548 xvgrclose(fp);
551 if (opt2bSet("-oq", NFILE, fnm))
553 /* Write a .pdb file with B-factors and optionally anisou records */
554 for (i = 0; i < isize; i++)
556 rvec_inc(pdbx[index[i]], xcm);
558 write_sto_conf_indexed(opt2fn("-oq", NFILE, fnm), title, pdbatoms, pdbx, nullptr, ePBC,
559 pdbbox, isize, index);
561 if (opt2bSet("-ox", NFILE, fnm))
563 rvec* bFactorX;
564 snew(bFactorX, top.atoms.nr);
565 for (i = 0; i < isize; i++)
567 for (d = 0; d < DIM; d++)
569 bFactorX[index[i]][d] = xcm[d] + xav[i * DIM + d];
572 /* Write a .pdb file with B-factors and optionally anisou records */
573 write_sto_conf_indexed(opt2fn("-ox", NFILE, fnm), title, pdbatoms, bFactorX, nullptr, ePBC,
574 pdbbox, isize, index);
575 sfree(bFactorX);
577 if (bAniso)
579 correlate_aniso(opt2fn("-oc", NFILE, fnm), refatoms, pdbatoms, oenv);
580 do_view(oenv, opt2fn("-oc", NFILE, fnm), "-nxy");
582 do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
583 if (devfn)
585 do_view(oenv, opt2fn("-od", NFILE, fnm), "-nxy");
588 return 0;