src/gromacs/gmxana/gmx_rmsf.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
   5  * Copyright (c) 2001-2004, The GROMACS development team.
   6  * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
   7  * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
   8  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   9  * and including many others, as listed in the AUTHORS file in the
  10  * top-level source directory and at http://www.gromacs.org.
  11  *
  12  * GROMACS is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public License
  14  * as published by the Free Software Foundation; either version 2.1
  15  * of the License, or (at your option) any later version.
  16  *
  17  * GROMACS is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with GROMACS; if not, see
  24  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  25  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  26  *
  27  * If you want to redistribute modifications to GROMACS, please
  28  * consider that scientific software is very special. Version
  29  * control is crucial - bugs must be traceable. We will be happy to
  30  * consider code for inclusion in the official distribution, but
  31  * derived work must not be called official GROMACS. Details are found
  32  * in the README & COPYING files - if they are missing, get the
  33  * official version at http://www.gromacs.org.
  34  *
  35  * To help us fund GROMACS development, we humbly ask that you cite
  36  * the research papers on the package. Check out http://www.gromacs.org.
  37  */
  38 #include "gmxpre.h"
  39
  40 #include <cassert>
  41 #include <cmath>
  42 #include <cstring>
  43
  44 #include "gromacs/commandline/pargs.h"
  45 #include "gromacs/commandline/viewit.h"
  46 #include "gromacs/fileio/confio.h"
  47 #include "gromacs/fileio/pdbio.h"
  48 #include "gromacs/fileio/trxio.h"
  49 #include "gromacs/fileio/xvgr.h"
  50 #include "gromacs/gmxana/gmx_ana.h"
  51 #include "gromacs/gmxana/princ.h"
  52 #include "gromacs/linearalgebra/eigensolver.h"
  53 #include "gromacs/math/do_fit.h"
  54 #include "gromacs/math/functions.h"
  55 #include "gromacs/math/utilities.h"
  56 #include "gromacs/math/vec.h"
  57 #include "gromacs/pbcutil/rmpbc.h"
  58 #include "gromacs/topology/index.h"
  59 #include "gromacs/topology/topology.h"
  60 #include "gromacs/utility/arraysize.h"
  61 #include "gromacs/utility/cstringutil.h"
  62 #include "gromacs/utility/futil.h"
  63 #include "gromacs/utility/smalloc.h"
  64
  65 static real find_pdb_bfac(const t_atoms* atoms, t_resinfo* ri, char* atomnm)
  66 {
  67     char rresnm[8];
  68     int  i;
  69
  70     std::strcpy(rresnm, *ri->name);
  71     rresnm[3] = '\0';
  72     for (i = 0; (i < atoms->nr); i++)
  73     {
  74         if ((ri->nr == atoms->resinfo[atoms->atom[i].resind].nr)
  75             && (ri->ic == atoms->resinfo[atoms->atom[i].resind].ic)
  76             && (std::strcmp(*atoms->resinfo[atoms->atom[i].resind].name, rresnm) == 0)
  77             && (std::strstr(*atoms->atomname[i], atomnm) != nullptr))
  78         {
  79             break;
  80         }
  81     }
  82     if (i == atoms->nr)
  83     {
  84         fprintf(stderr, "\rCan not find %s%d-%s in pdbfile\n", rresnm, ri->nr, atomnm);
  85         fflush(stderr);
  86         return 0.0;
  87     }
  88
  89     return atoms->pdbinfo[i].bfac;
  90 }
  91
  92 static void correlate_aniso(const char* fn, t_atoms* ref, t_atoms* calc, const gmx_output_env_t* oenv)
  93 {
  94     FILE* fp;
  95     int   i, j;
  96
  97     fp = xvgropen(fn, "Correlation between X-Ray and Computed Uij", "X-Ray", "Computed", oenv);
  98     for (i = 0; (i < ref->nr); i++)
  99     {
 100         if (ref->pdbinfo[i].bAnisotropic)
 101         {
 102             for (j = U11; (j <= U23); j++)
 103             {
 104                 fprintf(fp, "%10d  %10d\n", ref->pdbinfo[i].uij[j], calc->pdbinfo[i].uij[j]);
 105             }
 106         }
 107     }
 108     xvgrclose(fp);
 109 }
 110
 111 static void average_residues(double         f[],
 112                              double**       U,
 113                              int            uind,
 114                              int            isize,
 115                              const int      index[],
 116                              const real     w_rls[],
 117                              const t_atoms* atoms)
 118 {
 119     int    i, j, start;
 120     double av, m;
 121
 122     start = 0;
 123     av    = 0;
 124     m     = 0;
 125     if (!f)
 126     {
 127         assert(U);
 128     }
 129     for (i = 0; i < isize; i++)
 130     {
 131         av += w_rls[index[i]] * (f != nullptr ? f[i] : U[i][uind]);
 132         m += w_rls[index[i]];
 133         if (i + 1 == isize || atoms->atom[index[i]].resind != atoms->atom[index[i + 1]].resind)
 134         {
 135             av /= m;
 136             if (f != nullptr)
 137             {
 138                 for (j = start; j <= i; j++)
 139                 {
 140                     f[i] = av;
 141                 }
 142             }
 143             else
 144             {
 145                 for (j = start; j <= i; j++)
 146                 {
 147                     U[j][uind] = av;
 148                 }
 149             }
 150             start = i + 1;
 151             av    = 0;
 152             m     = 0;
 153         }
 154     }
 155 }
 156
 157 static void print_dir(FILE* fp, real* Uaver)
 158 {
 159     real eigvec[DIM * DIM];
 160     real tmp[DIM * DIM];
 161     rvec eigval;
 162     int  d, m;
 163
 164     fprintf(fp, "MSF     X         Y         Z\n");
 165     for (d = 0; d < DIM; d++)
 166     {
 167         fprintf(fp, " %c ", 'X' + d - XX);
 168         for (m = 0; m < DIM; m++)
 169         {
 170             fprintf(fp, " %9.2e", Uaver[3 * m + d]);
 171         }
 172         fprintf(fp, "%s\n", m == DIM ? " (nm^2)" : "");
 173     }
 174
 175     for (m = 0; m < DIM * DIM; m++)
 176     {
 177         tmp[m] = Uaver[m];
 178     }
 179
 180
 181     eigensolver(tmp, DIM, 0, DIM, eigval, eigvec);
 182
 183     fprintf(fp, "\n             Eigenvectors\n\n");
 184     fprintf(fp, "Eigv  %-8.2e %-8.2e %-8.2e (nm^2)\n\n", eigval[2], eigval[1], eigval[0]);
 185     for (d = 0; d < DIM; d++)
 186     {
 187         fprintf(fp, "  %c   ", 'X' + d - XX);
 188         for (m = DIM - 1; m >= 0; m--)
 189         {
 190             fprintf(fp, "%7.4f  ", eigvec[3 * m + d]);
 191         }
 192         fprintf(fp, "\n");
 193     }
 194 }
 195
 196 int gmx_rmsf(int argc, char* argv[])
 197 {
 198     const char* desc[] = {
 199         "[THISMODULE] computes the root mean square fluctuation (RMSF, i.e. standard ",
 200         "deviation) of atomic positions in the trajectory (supplied with [TT]-f[tt])",
 201         "after (optionally) fitting to a reference frame (supplied with [TT]-s[tt]).[PAR]",
 202         "With option [TT]-oq[tt] the RMSF values are converted to B-factor",
 203         "values, which are written to a [REF].pdb[ref] file. By default, the coordinates",
 204         "in this output file are taken from the structure file provided with [TT]-s[tt],"
 205         "although you can also use coordinates read from a different [REF].pdb[ref] file"
 206         "provided with [TT]-q[tt]. There is very little error checking, so in this case"
 207         "it is your responsibility to make sure all atoms in the structure file"
 208         "and [REF].pdb[ref] file correspond exactly to each other.[PAR]",
 209         "Option [TT]-ox[tt] writes the B-factors to a file with the average",
 210         "coordinates in the trajectory.[PAR]",
 211         "With the option [TT]-od[tt] the root mean square deviation with",
 212         "respect to the reference structure is calculated.[PAR]",
 213         "With the option [TT]-aniso[tt], [THISMODULE] will compute anisotropic",
 214         "temperature factors and then it will also output average coordinates",
 215         "and a [REF].pdb[ref] file with ANISOU records (corresonding to the [TT]-oq[tt]",
 216         "or [TT]-ox[tt] option). Please note that the U values",
 217         "are orientation-dependent, so before comparison with experimental data",
 218         "you should verify that you fit to the experimental coordinates.[PAR]",
 219         "When a [REF].pdb[ref] input file is passed to the program and the [TT]-aniso[tt]",
 220         "flag is set",
 221         "a correlation plot of the Uij will be created, if any anisotropic",
 222         "temperature factors are present in the [REF].pdb[ref] file.[PAR]",
 223         "With option [TT]-dir[tt] the average MSF (3x3) matrix is diagonalized.",
 224         "This shows the directions in which the atoms fluctuate the most and",
 225         "the least."
 226     };
 227     static gmx_bool bRes = FALSE, bAniso = FALSE, bFit = TRUE;
 228     t_pargs         pargs[] = {
 229         { "-res", FALSE, etBOOL, { &bRes }, "Calculate averages for each residue" },
 230         { "-aniso", FALSE, etBOOL, { &bAniso }, "Compute anisotropic termperature factors" },
 231         { "-fit",
 232           FALSE,
 233           etBOOL,
 234           { &bFit },
 235           "Do a least squares superposition before computing RMSF. Without this you must "
 236           "make sure that the reference structure and the trajectory match." }
 237     };
 238     int  natom;
 239     int  i, m, teller = 0;
 240     real t, *w_rls;
 241
 242     t_topology top;
 243     int        ePBC;
 244     t_atoms *  pdbatoms, *refatoms;
 245
 246     matrix       box, pdbbox;
 247     rvec *       x, *pdbx, *xref;
 248     t_trxstatus* status;
 249     const char*  label;
 250
 251     FILE*       fp; /* the graphics file */
 252     const char *devfn, *dirfn;
 253     int         resind;
 254
 255     gmx_bool bReadPDB;
 256     int*     index;
 257     int      isize;
 258     char*    grpnames;
 259
 260     real        bfac, pdb_bfac, *Uaver;
 261     double **   U, *xav;
 262     int         aid;
 263     rvec*       rmsd_x = nullptr;
 264     double *    rmsf, invcount, totmass;
 265     int         d;
 266     real        count = 0;
 267     rvec        xcm;
 268     gmx_rmpbc_t gpbc = nullptr;
 269
 270     gmx_output_env_t* oenv;
 271
 272     const char* leg[2] = { "MD", "X-Ray" };
 273
 274     t_filenm fnm[] = { { efTRX, "-f", nullptr, ffREAD },     { efTPS, nullptr, nullptr, ffREAD },
 275                        { efNDX, nullptr, nullptr, ffOPTRD }, { efPDB, "-q", nullptr, ffOPTRD },
 276                        { efPDB, "-oq", "bfac", ffOPTWR },    { efPDB, "-ox", "xaver", ffOPTWR },
 277                        { efXVG, "-o", "rmsf", ffWRITE },     { efXVG, "-od", "rmsdev", ffOPTWR },
 278                        { efXVG, "-oc", "correl", ffOPTWR },  { efLOG, "-dir", "rmsf", ffOPTWR } };
 279 #define NFILE asize(fnm)
 280
 281     if (!parse_common_args(&argc, argv, PCA_CAN_TIME | PCA_CAN_VIEW, NFILE, fnm, asize(pargs),
 282                            pargs, asize(desc), desc, 0, nullptr, &oenv))
 283     {
 284         return 0;
 285     }
 286
 287     bReadPDB = ftp2bSet(efPDB, NFILE, fnm);
 288     devfn    = opt2fn_null("-od", NFILE, fnm);
 289     dirfn    = opt2fn_null("-dir", NFILE, fnm);
 290
 291     read_tps_conf(ftp2fn(efTPS, NFILE, fnm), &top, &ePBC, &xref, nullptr, box, TRUE);
 292     const char* title = *top.name;
 293     snew(w_rls, top.atoms.nr);
 294
 295     fprintf(stderr, "Select group(s) for root mean square calculation\n");
 296     get_index(&top.atoms, ftp2fn_null(efNDX, NFILE, fnm), 1, &isize, &index, &grpnames);
 297
 298     /* Set the weight */
 299     for (i = 0; i < isize; i++)
 300     {
 301         w_rls[index[i]] = top.atoms.atom[index[i]].m;
 302     }
 303
 304     /* Malloc the rmsf arrays */
 305     snew(xav, isize * DIM);
 306     snew(U, isize);
 307     for (i = 0; i < isize; i++)
 308     {
 309         snew(U[i], DIM * DIM);
 310     }
 311     snew(rmsf, isize);
 312     if (devfn)
 313     {
 314         snew(rmsd_x, isize);
 315     }
 316
 317     if (bReadPDB)
 318     {
 319         t_topology* top_pdb;
 320         snew(top_pdb, 1);
 321         /* Read coordinates twice */
 322         read_tps_conf(opt2fn("-q", NFILE, fnm), top_pdb, nullptr, nullptr, nullptr, pdbbox, FALSE);
 323         snew(pdbatoms, 1);
 324         *pdbatoms = top_pdb->atoms;
 325         read_tps_conf(opt2fn("-q", NFILE, fnm), top_pdb, nullptr, &pdbx, nullptr, pdbbox, FALSE);
 326         /* TODO Should this assert that top_pdb->atoms.nr == top.atoms.nr?
 327          * See discussion at https://gerrit.gromacs.org/#/c/6430/1 */
 328         title = *top_pdb->name;
 329         snew(refatoms, 1);
 330         *refatoms = top_pdb->atoms;
 331         sfree(top_pdb);
 332     }
 333     else
 334     {
 335         pdbatoms = &top.atoms;
 336         refatoms = &top.atoms;
 337         pdbx     = xref;
 338         snew(pdbatoms->pdbinfo, pdbatoms->nr);
 339         pdbatoms->havePdbInfo = TRUE;
 340         copy_mat(box, pdbbox);
 341     }
 342
 343     if (bFit)
 344     {
 345         sub_xcm(xref, isize, index, top.atoms.atom, xcm, FALSE);
 346     }
 347
 348     natom = read_first_x(oenv, &status, ftp2fn(efTRX, NFILE, fnm), &t, &x, box);
 349
 350     if (bFit)
 351     {
 352         gpbc = gmx_rmpbc_init(&top.idef, ePBC, natom);
 353     }
 354
 355     /* Now read the trj again to compute fluctuations */
 356     teller = 0;
 357     do
 358     {
 359         if (bFit)
 360         {
 361             /* Remove periodic boundary */
 362             gmx_rmpbc(gpbc, natom, box, x);
 363
 364             /* Set center of mass to zero */
 365             sub_xcm(x, isize, index, top.atoms.atom, xcm, FALSE);
 366
 367             /* Fit to reference structure */
 368             do_fit(natom, w_rls, xref, x);
 369         }
 370
 371         /* Calculate Anisotropic U Tensor */
 372         for (i = 0; i < isize; i++)
 373         {
 374             aid = index[i];
 375             for (d = 0; d < DIM; d++)
 376             {
 377                 xav[i * DIM + d] += x[aid][d];
 378                 for (m = 0; m < DIM; m++)
 379                 {
 380                     U[i][d * DIM + m] += x[aid][d] * x[aid][m];
 381                 }
 382             }
 383         }
 384
 385         if (devfn)
 386         {
 387             /* Calculate RMS Deviation */
 388             for (i = 0; (i < isize); i++)
 389             {
 390                 aid = index[i];
 391                 for (d = 0; (d < DIM); d++)
 392                 {
 393                     rmsd_x[i][d] += gmx::square(x[aid][d] - xref[aid][d]);
 394                 }
 395             }
 396         }
 397         count += 1.0;
 398         teller++;
 399     } while (read_next_x(oenv, status, &t, x, box));
 400     close_trx(status);
 401
 402     if (bFit)
 403     {
 404         gmx_rmpbc_done(gpbc);
 405     }
 406
 407
 408     invcount = 1.0 / count;
 409     snew(Uaver, DIM * DIM);
 410     totmass = 0;
 411     for (i = 0; i < isize; i++)
 412     {
 413         for (d = 0; d < DIM; d++)
 414         {
 415             xav[i * DIM + d] *= invcount;
 416         }
 417         for (d = 0; d < DIM; d++)
 418         {
 419             for (m = 0; m < DIM; m++)
 420             {
 421                 U[i][d * DIM + m] = U[i][d * DIM + m] * invcount - xav[i * DIM + d] * xav[i * DIM + m];
 422                 Uaver[3 * d + m] += top.atoms.atom[index[i]].m * U[i][d * DIM + m];
 423             }
 424         }
 425         totmass += top.atoms.atom[index[i]].m;
 426     }
 427     for (d = 0; d < DIM * DIM; d++)
 428     {
 429         Uaver[d] /= totmass;
 430     }
 431
 432     if (bRes)
 433     {
 434         for (d = 0; d < DIM * DIM; d++)
 435         {
 436             average_residues(nullptr, U, d, isize, index, w_rls, &top.atoms);
 437         }
 438     }
 439
 440     if (bAniso)
 441     {
 442         for (i = 0; i < isize; i++)
 443         {
 444             aid                                 = index[i];
 445             pdbatoms->pdbinfo[aid].bAnisotropic = TRUE;
 446             pdbatoms->pdbinfo[aid].uij[U11]     = static_cast<int>(1e6 * U[i][XX * DIM + XX]);
 447             pdbatoms->pdbinfo[aid].uij[U22]     = static_cast<int>(1e6 * U[i][YY * DIM + YY]);
 448             pdbatoms->pdbinfo[aid].uij[U33]     = static_cast<int>(1e6 * U[i][ZZ * DIM + ZZ]);
 449             pdbatoms->pdbinfo[aid].uij[U12]     = static_cast<int>(1e6 * U[i][XX * DIM + YY]);
 450             pdbatoms->pdbinfo[aid].uij[U13]     = static_cast<int>(1e6 * U[i][XX * DIM + ZZ]);
 451             pdbatoms->pdbinfo[aid].uij[U23]     = static_cast<int>(1e6 * U[i][YY * DIM + ZZ]);
 452         }
 453     }
 454     if (bRes)
 455     {
 456         label = "Residue";
 457     }
 458     else
 459     {
 460         label = "Atom";
 461     }
 462
 463     for (i = 0; i < isize; i++)
 464     {
 465         rmsf[i] = U[i][XX * DIM + XX] + U[i][YY * DIM + YY] + U[i][ZZ * DIM + ZZ];
 466     }
 467
 468     if (dirfn)
 469     {
 470         fprintf(stdout, "\n");
 471         print_dir(stdout, Uaver);
 472         fp = gmx_ffopen(dirfn, "w");
 473         print_dir(fp, Uaver);
 474         gmx_ffclose(fp);
 475     }
 476
 477     for (i = 0; i < isize; i++)
 478     {
 479         sfree(U[i]);
 480     }
 481     sfree(U);
 482
 483     /* Write RMSF output */
 484     if (bReadPDB)
 485     {
 486         bfac = 8.0 * M_PI * M_PI / 3.0 * 100;
 487         fp = xvgropen(ftp2fn(efXVG, NFILE, fnm), "B-Factors", label, "(A\\b\\S\\So\\N\\S2\\N)", oenv);
 488         xvgr_legend(fp, 2, leg, oenv);
 489         for (i = 0; (i < isize); i++)
 490         {
 491             if (!bRes || i + 1 == isize
 492                 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
 493             {
 494                 resind   = top.atoms.atom[index[i]].resind;
 495                 pdb_bfac = find_pdb_bfac(pdbatoms, &top.atoms.resinfo[resind],
 496                                          *(top.atoms.atomname[index[i]]));
 497
 498                 fprintf(fp, "%5d  %10.5f  %10.5f\n",
 499                         bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
 500                         rmsf[i] * bfac, pdb_bfac);
 501             }
 502         }
 503         xvgrclose(fp);
 504     }
 505     else
 506     {
 507         fp = xvgropen(ftp2fn(efXVG, NFILE, fnm), "RMS fluctuation", label, "(nm)", oenv);
 508         for (i = 0; i < isize; i++)
 509         {
 510             if (!bRes || i + 1 == isize
 511                 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
 512             {
 513                 fprintf(fp, "%5d %8.4f\n",
 514                         bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
 515                         std::sqrt(rmsf[i]));
 516             }
 517         }
 518         xvgrclose(fp);
 519     }
 520
 521     for (i = 0; i < isize; i++)
 522     {
 523         pdbatoms->pdbinfo[index[i]].bfac = 800 * M_PI * M_PI / 3.0 * rmsf[i];
 524     }
 525
 526     if (devfn)
 527     {
 528         for (i = 0; i < isize; i++)
 529         {
 530             rmsf[i] = (rmsd_x[i][XX] + rmsd_x[i][YY] + rmsd_x[i][ZZ]) / count;
 531         }
 532         if (bRes)
 533         {
 534             average_residues(rmsf, nullptr, 0, isize, index, w_rls, &top.atoms);
 535         }
 536         /* Write RMSD output */
 537         fp = xvgropen(devfn, "RMS Deviation", label, "(nm)", oenv);
 538         for (i = 0; i < isize; i++)
 539         {
 540             if (!bRes || i + 1 == isize
 541                 || top.atoms.atom[index[i]].resind != top.atoms.atom[index[i + 1]].resind)
 542             {
 543                 fprintf(fp, "%5d %8.4f\n",
 544                         bRes ? top.atoms.resinfo[top.atoms.atom[index[i]].resind].nr : index[i] + 1,
 545                         std::sqrt(rmsf[i]));
 546             }
 547         }
 548         xvgrclose(fp);
 549     }
 550
 551     if (opt2bSet("-oq", NFILE, fnm))
 552     {
 553         /* Write a .pdb file with B-factors and optionally anisou records */
 554         for (i = 0; i < isize; i++)
 555         {
 556             rvec_inc(pdbx[index[i]], xcm);
 557         }
 558         write_sto_conf_indexed(opt2fn("-oq", NFILE, fnm), title, pdbatoms, pdbx, nullptr, ePBC,
 559                                pdbbox, isize, index);
 560     }
 561     if (opt2bSet("-ox", NFILE, fnm))
 562     {
 563         rvec* bFactorX;
 564         snew(bFactorX, top.atoms.nr);
 565         for (i = 0; i < isize; i++)
 566         {
 567             for (d = 0; d < DIM; d++)
 568             {
 569                 bFactorX[index[i]][d] = xcm[d] + xav[i * DIM + d];
 570             }
 571         }
 572         /* Write a .pdb file with B-factors and optionally anisou records */
 573         write_sto_conf_indexed(opt2fn("-ox", NFILE, fnm), title, pdbatoms, bFactorX, nullptr, ePBC,
 574                                pdbbox, isize, index);
 575         sfree(bFactorX);
 576     }
 577     if (bAniso)
 578     {
 579         correlate_aniso(opt2fn("-oc", NFILE, fnm), refatoms, pdbatoms, oenv);
 580         do_view(oenv, opt2fn("-oc", NFILE, fnm), "-nxy");
 581     }
 582     do_view(oenv, opt2fn("-o", NFILE, fnm), "-nxy");
 583     if (devfn)
 584     {
 585         do_view(oenv, opt2fn("-od", NFILE, fnm), "-nxy");
 586     }
 587
 588     return 0;
 589 }