Introduce ObservablesHistory container
[gromacs.git] / src / gromacs / mdlib / csettle.cpp
blob1dbaff73fae5746eca598ffd21d196c57a440ae3
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 #include "gmxpre.h"
39 #include <assert.h>
40 #include <math.h>
41 #include <stdio.h>
43 #include <algorithm>
45 #include "gromacs/math/functions.h"
46 #include "gromacs/math/invertmatrix.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/mdlib/constr.h"
49 #include "gromacs/mdtypes/mdatom.h"
50 #include "gromacs/pbcutil/ishift.h"
51 #include "gromacs/pbcutil/pbc.h"
52 #include "gromacs/pbcutil/pbc-simd.h"
53 #include "gromacs/simd/simd.h"
54 #include "gromacs/simd/simd_math.h"
55 #include "gromacs/topology/mtop_util.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/gmxassert.h"
58 #include "gromacs/utility/smalloc.h"
60 using namespace gmx; // TODO: Remove when this file is moved into gmx namespace
62 typedef struct
64 real mO;
65 real mH;
66 real wh;
67 real dOH;
68 real dHH;
69 real ra;
70 real rb;
71 real rc;
72 real irc2;
73 /* For projection */
74 real imO;
75 real imH;
76 real invdOH;
77 real invdHH;
78 matrix invmat;
79 } settleparam_t;
81 typedef struct gmx_settledata
83 settleparam_t massw; /* Parameters for SETTLE for coordinates */
84 settleparam_t mass1; /* Parameters with all masses 1, for forces */
86 int nsettle; /* The number of settles on our rank */
87 int *ow1; /* Index to OW1 atoms, size nsettle + SIMD padding */
88 int *hw2; /* Index to HW2 atoms, size nsettle + SIMD padding */
89 int *hw3; /* Index to HW3 atoms, size nsettle + SIMD padding */
90 real *virfac; /* Virial factor 0 or 1, size nsettle + SIMD pad. */
91 int nalloc; /* Allocation size of ow1, hw2, hw3, virfac */
93 bool bUseSimd; /* Use SIMD intrinsics code, if possible */
94 } t_gmx_settledata;
97 static void init_proj_matrix(real invmO, real invmH, real dOH, real dHH,
98 matrix inverseCouplingMatrix)
100 /* We normalize the inverse masses with invmO for the matrix inversion.
101 * so we can keep using masses of almost zero for frozen particles,
102 * without running out of the float range in invertMatrix.
104 double invmORelative = 1.0;
105 double invmHRelative = invmH/static_cast<double>(invmO);
106 double distanceRatio = dHH/static_cast<double>(dOH);
108 /* Construct the constraint coupling matrix */
109 matrix mat;
110 mat[0][0] = invmORelative + invmHRelative;
111 mat[0][1] = invmORelative*(1.0 - 0.5*gmx::square(distanceRatio));
112 mat[0][2] = invmHRelative*0.5*distanceRatio;
113 mat[1][1] = mat[0][0];
114 mat[1][2] = mat[0][2];
115 mat[2][2] = invmHRelative + invmHRelative;
116 mat[1][0] = mat[0][1];
117 mat[2][0] = mat[0][2];
118 mat[2][1] = mat[1][2];
120 invertMatrix(mat, inverseCouplingMatrix);
122 msmul(inverseCouplingMatrix, 1/invmO, inverseCouplingMatrix);
125 static void settleparam_init(settleparam_t *p,
126 real mO, real mH, real invmO, real invmH,
127 real dOH, real dHH)
129 /* We calculate parameters in double precision to minimize errors.
130 * The velocity correction applied during SETTLE coordinate constraining
131 * introduces a systematic error of approximately 1 bit per atom,
132 * depending on what the compiler does with the code.
134 double wohh;
136 p->mO = mO;
137 p->mH = mH;
138 wohh = mO + 2.0*mH;
139 p->wh = mH/wohh;
140 p->dOH = dOH;
141 p->dHH = dHH;
142 double rc = dHH/2.0;
143 double ra = 2.0*mH*std::sqrt(dOH*dOH - rc*rc)/wohh;
144 p->rb = std::sqrt(dOH*dOH - rc*rc) - ra;
145 p->rc = rc;
146 p->ra = ra;
147 p->irc2 = 1.0/dHH;
149 /* For projection: inverse masses and coupling matrix inversion */
150 p->imO = invmO;
151 p->imH = invmH;
153 p->invdOH = 1.0/dOH;
154 p->invdHH = 1.0/dHH;
156 init_proj_matrix(invmO, invmH, dOH, dHH, p->invmat);
158 if (debug)
160 fprintf(debug, "wh =%g, rc = %g, ra = %g\n",
161 p->wh, p->rc, p->ra);
162 fprintf(debug, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n",
163 p->rb, p->irc2, p->dHH, p->dOH);
167 gmx_settledata_t settle_init(const gmx_mtop_t *mtop)
169 /* Check that we have only one settle type */
170 int settle_type = -1;
171 gmx_mtop_ilistloop_t iloop = gmx_mtop_ilistloop_init(mtop);
172 t_ilist *ilist;
173 int nmol;
174 const int nral1 = 1 + NRAL(F_SETTLE);
175 while (gmx_mtop_ilistloop_next(iloop, &ilist, &nmol))
177 for (int i = 0; i < ilist[F_SETTLE].nr; i += nral1)
179 if (settle_type == -1)
181 settle_type = ilist[F_SETTLE].iatoms[i];
183 else if (ilist[F_SETTLE].iatoms[i] != settle_type)
185 gmx_fatal(FARGS,
186 "The [molecules] section of your topology specifies more than one block of\n"
187 "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
188 "If you are trying to partition your solvent into different *groups*\n"
189 "(e.g. for freezing, T-coupling, etc.), you are using the wrong approach. Index\n"
190 "files specify groups. Otherwise, you may wish to change the least-used\n"
191 "block of molecules with SETTLE constraints into 3 normal constraints.");
195 GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
197 gmx_settledata_t settled;
199 snew(settled, 1);
201 /* We will not initialize the normal SETTLE parameters here yet,
202 * since the atom (inv)masses can depend on the integrator and
203 * free-energy perturbation. We set mO=-1 to trigger later initialization.
205 settled->massw.mO = -1;
207 real dOH = mtop->ffparams.iparams[settle_type].settle.doh;
208 real dHH = mtop->ffparams.iparams[settle_type].settle.dhh;
209 settleparam_init(&settled->mass1, 1.0, 1.0, 1.0, 1.0, dOH, dHH);
211 settled->ow1 = nullptr;
212 settled->hw2 = nullptr;
213 settled->hw3 = nullptr;
214 settled->virfac = nullptr;
215 settled->nalloc = 0;
217 /* Without SIMD configured, this bool is not used */
218 settled->bUseSimd = (getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr);
220 return settled;
223 void settle_free(gmx_settledata_t settled)
225 sfree_aligned(settled->ow1);
226 sfree_aligned(settled->hw2);
227 sfree_aligned(settled->hw3);
228 sfree_aligned(settled->virfac);
229 sfree(settled);
232 void settle_set_constraints(gmx_settledata_t settled,
233 const t_ilist *il_settle,
234 const t_mdatoms *mdatoms)
236 #if GMX_SIMD_HAVE_REAL
237 const int pack_size = GMX_SIMD_REAL_WIDTH;
238 #else
239 const int pack_size = 1;
240 #endif
242 const int nral1 = 1 + NRAL(F_SETTLE);
243 int nsettle = il_settle->nr/nral1;
244 settled->nsettle = nsettle;
246 if (nsettle > 0)
248 const t_iatom *iatoms = il_settle->iatoms;
250 /* Here we initialize the normal SETTLE parameters */
251 if (settled->massw.mO < 0)
253 int firstO = iatoms[1];
254 int firstH = iatoms[2];
255 settleparam_init(&settled->massw,
256 mdatoms->massT[firstO],
257 mdatoms->massT[firstH],
258 mdatoms->invmass[firstO],
259 mdatoms->invmass[firstH],
260 settled->mass1.dOH,
261 settled->mass1.dHH);
264 if (nsettle + pack_size > settled->nalloc)
266 settled->nalloc = over_alloc_dd(nsettle + pack_size);
267 sfree_aligned(settled->ow1);
268 sfree_aligned(settled->hw2);
269 sfree_aligned(settled->hw3);
270 sfree_aligned(settled->virfac);
271 snew_aligned(settled->ow1, settled->nalloc, 64);
272 snew_aligned(settled->hw2, settled->nalloc, 64);
273 snew_aligned(settled->hw3, settled->nalloc, 64);
274 snew_aligned(settled->virfac, settled->nalloc, 64);
277 for (int i = 0; i < nsettle; i++)
279 settled->ow1[i] = iatoms[i*nral1 + 1];
280 settled->hw2[i] = iatoms[i*nral1 + 2];
281 settled->hw3[i] = iatoms[i*nral1 + 3];
282 /* We should avoid double counting of virial contributions for
283 * SETTLEs that appear in multiple DD domains, so we only count
284 * the contribution on the home range of the oxygen atom.
286 settled->virfac[i] = (iatoms[i*nral1 + 1] < mdatoms->homenr ? 1 : 0);
289 /* Pack the index array to the full SIMD width with copies from
290 * the last normal entry, but with no virial contribution.
292 int end_packed = ((nsettle + pack_size - 1)/pack_size)*pack_size;
293 for (int i = nsettle; i < end_packed; i++)
295 settled->ow1[i] = settled->ow1[nsettle - 1];
296 settled->hw2[i] = settled->hw2[nsettle - 1];
297 settled->hw3[i] = settled->hw3[nsettle - 1];
298 settled->virfac[i] = 0;
303 void settle_proj(gmx_settledata_t settled, int econq,
304 int nsettle, t_iatom iatoms[],
305 const t_pbc *pbc,
306 rvec x[],
307 rvec *der, rvec *derp,
308 int calcvir_atom_end, tensor vir_r_m_dder)
310 /* Settle for projection out constraint components
311 * of derivatives of the coordinates.
312 * Berk Hess 2008-1-10
315 settleparam_t *p;
316 real imO, imH, dOH, dHH, invdOH, invdHH;
317 matrix invmat;
318 int i, m, m2, ow1, hw2, hw3;
319 rvec roh2, roh3, rhh, dc, fc;
321 calcvir_atom_end *= DIM;
323 if (econq == econqForce)
325 p = &settled->mass1;
327 else
329 p = &settled->massw;
331 imO = p->imO;
332 imH = p->imH;
333 copy_mat(p->invmat, invmat);
334 dOH = p->dOH;
335 dHH = p->dHH;
336 invdOH = p->invdOH;
337 invdHH = p->invdHH;
339 #ifdef PRAGMAS
340 #pragma ivdep
341 #endif
343 const int nral1 = 1 + NRAL(F_SETTLE);
345 for (i = 0; i < nsettle; i++)
347 ow1 = iatoms[i*nral1 + 1];
348 hw2 = iatoms[i*nral1 + 2];
349 hw3 = iatoms[i*nral1 + 3];
351 if (pbc == nullptr)
353 rvec_sub(x[ow1], x[hw2], roh2);
354 rvec_sub(x[ow1], x[hw3], roh3);
355 rvec_sub(x[hw2], x[hw3], rhh);
357 else
359 pbc_dx_aiuc(pbc, x[ow1], x[hw2], roh2);
360 pbc_dx_aiuc(pbc, x[ow1], x[hw3], roh3);
361 pbc_dx_aiuc(pbc, x[hw2], x[hw3], rhh);
363 svmul(invdOH, roh2, roh2);
364 svmul(invdOH, roh3, roh3);
365 svmul(invdHH, rhh, rhh);
366 /* 18 flops */
368 /* Determine the projections of der on the bonds */
369 clear_rvec(dc);
370 for (m = 0; m < DIM; m++)
372 dc[0] += (der[ow1][m] - der[hw2][m])*roh2[m];
373 dc[1] += (der[ow1][m] - der[hw3][m])*roh3[m];
374 dc[2] += (der[hw2][m] - der[hw3][m])*rhh [m];
376 /* 27 flops */
378 /* Determine the correction for the three bonds */
379 mvmul(invmat, dc, fc);
380 /* 15 flops */
382 /* Subtract the corrections from derp */
383 for (m = 0; m < DIM; m++)
385 derp[ow1][m] -= imO*( fc[0]*roh2[m] + fc[1]*roh3[m]);
386 derp[hw2][m] -= imH*(-fc[0]*roh2[m] + fc[2]*rhh [m]);
387 derp[hw3][m] -= imH*(-fc[1]*roh3[m] - fc[2]*rhh [m]);
390 /* 45 flops */
392 if (ow1 < calcvir_atom_end)
394 /* Determining r \dot m der is easy,
395 * since fc contains the mass weighted corrections for der.
398 for (m = 0; m < DIM; m++)
400 for (m2 = 0; m2 < DIM; m2++)
402 vir_r_m_dder[m][m2] +=
403 dOH*roh2[m]*roh2[m2]*fc[0] +
404 dOH*roh3[m]*roh3[m2]*fc[1] +
405 dHH*rhh [m]*rhh [m2]*fc[2];
413 /* The actual settle code, templated for real/SimdReal and for optimization */
414 template<typename T, typename TypeBool, int packSize,
415 typename TypePbc,
416 bool bCorrectVelocity,
417 bool bCalcVirial>
418 static void settleTemplate(const gmx_settledata_t settled,
419 int settleStart, int settleEnd,
420 const TypePbc pbc,
421 const real *x, real *xprime,
422 real invdt, real * gmx_restrict v,
423 tensor vir_r_m_dr,
424 bool *bErrorHasOccurred)
426 /* ******************************************************************* */
427 /* ** */
428 /* Original code by Shuichi Miyamoto, last update Oct. 1, 1992 ** */
429 /* ** */
430 /* Algorithm changes by Berk Hess: ** */
431 /* 2004-07-15 Convert COM to double precision to avoid drift ** */
432 /* 2006-10-16 Changed velocity update to use differences ** */
433 /* 2012-09-24 Use oxygen as reference instead of COM ** */
434 /* 2016-02 Complete rewrite of the code for SIMD ** */
435 /* ** */
436 /* Reference for the SETTLE algorithm ** */
437 /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */
438 /* ** */
439 /* ******************************************************************* */
441 assert(settleStart % packSize == 0);
442 assert(settleEnd % packSize == 0);
444 TypeBool bError = TypeBool(false);
446 settleparam_t *p = &settled->massw;
447 T wh = T(p->wh);
448 T rc = T(p->rc);
449 T ra = T(p->ra);
450 T rb = T(p->rb);
451 T irc2 = T(p->irc2);
452 T mO = T(p->mO);
453 T mH = T(p->mH);
455 T almost_zero = T(1e-12);
457 T sum_r_m_dr[DIM][DIM];
459 if (bCalcVirial)
461 for (int d2 = 0; d2 < DIM; d2++)
463 for (int d = 0; d < DIM; d++)
465 sum_r_m_dr[d2][d] = T(0);
470 for (int i = settleStart; i < settleEnd; i += packSize)
472 /* Here we pad up to packSize with copies from the last valid entry.
473 * This gives correct results, since we store (not increment) all
474 * output, so we store the same output multiple times.
476 const int *ow1 = settled->ow1 + i;
477 const int *hw2 = settled->hw2 + i;
478 const int *hw3 = settled->hw3 + i;
480 T x_ow1[DIM], x_hw2[DIM], x_hw3[DIM];
482 gatherLoadUTranspose<3>(x, ow1, &x_ow1[XX], &x_ow1[YY], &x_ow1[ZZ]);
483 gatherLoadUTranspose<3>(x, hw2, &x_hw2[XX], &x_hw2[YY], &x_hw2[ZZ]);
484 gatherLoadUTranspose<3>(x, hw3, &x_hw3[XX], &x_hw3[YY], &x_hw3[ZZ]);
486 T xprime_ow1[DIM], xprime_hw2[DIM], xprime_hw3[DIM];
488 gatherLoadUTranspose<3>(xprime, ow1, &xprime_ow1[XX], &xprime_ow1[YY], &xprime_ow1[ZZ]);
489 gatherLoadUTranspose<3>(xprime, hw2, &xprime_hw2[XX], &xprime_hw2[YY], &xprime_hw2[ZZ]);
490 gatherLoadUTranspose<3>(xprime, hw3, &xprime_hw3[XX], &xprime_hw3[YY], &xprime_hw3[ZZ]);
492 T dist21[DIM], dist31[DIM];
493 T doh2[DIM], doh3[DIM];
494 T sh_hw2[DIM], sh_hw3[DIM];
496 pbc_dx_aiuc(pbc, x_hw2, x_ow1, dist21);
498 pbc_dx_aiuc(pbc, x_hw3, x_ow1, dist31);
500 /* Tedious way of doing pbc */
501 pbc_dx_aiuc(pbc, xprime_hw2, xprime_ow1, doh2);
502 for (int d = 0; d < DIM; d++)
504 sh_hw2[d] = xprime_hw2[d] - (xprime_ow1[d] + doh2[d]);
505 xprime_hw2[d] = xprime_hw2[d] - sh_hw2[d];
507 pbc_dx_aiuc(pbc, xprime_hw3, xprime_ow1, doh3);
508 for (int d = 0; d < DIM; d++)
510 sh_hw3[d] = xprime_hw3[d] - (xprime_ow1[d] + doh3[d]);
511 xprime_hw3[d] = xprime_hw3[d] - sh_hw3[d];
514 /* Not calculating the center of mass using the oxygen position
515 * and the O-H distances, as done below, will make SETTLE
516 * the largest source of energy drift for simulations of water,
517 * as then the oxygen coordinate is multiplied by 0.89 at every step,
518 * which can then transfer a systematic rounding to the oxygen velocity.
520 T a1[DIM], com[DIM];
521 for (int d = 0; d < DIM; d++)
523 a1[d] = -(doh2[d] + doh3[d]) * wh;
524 com[d] = xprime_ow1[d] - a1[d];
526 T b1[DIM];
527 for (int d = 0; d < DIM; d++)
529 b1[d] = xprime_hw2[d] - com[d];
531 T c1[DIM];
532 for (int d = 0; d < DIM; d++)
534 c1[d] = xprime_hw3[d] - com[d];
536 /* 15 flops */
538 T xakszd = dist21[YY] * dist31[ZZ] - dist21[ZZ] * dist31[YY];
539 T yakszd = dist21[ZZ] * dist31[XX] - dist21[XX] * dist31[ZZ];
540 T zakszd = dist21[XX] * dist31[YY] - dist21[YY] * dist31[XX];
541 T xaksxd = a1[YY] * zakszd - a1[ZZ] * yakszd;
542 T yaksxd = a1[ZZ] * xakszd - a1[XX] * zakszd;
543 T zaksxd = a1[XX] * yakszd - a1[YY] * xakszd;
544 T xaksyd = yakszd * zaksxd - zakszd * yaksxd;
545 T yaksyd = zakszd * xaksxd - xakszd * zaksxd;
546 T zaksyd = xakszd * yaksxd - yakszd * xaksxd;
547 /* 27 flops */
549 T axlng = gmx::invsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
550 T aylng = gmx::invsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
551 T azlng = gmx::invsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
553 T trns1[DIM], trns2[DIM], trns3[DIM];
555 trns1[XX] = xaksxd * axlng;
556 trns2[XX] = yaksxd * axlng;
557 trns3[XX] = zaksxd * axlng;
558 trns1[YY] = xaksyd * aylng;
559 trns2[YY] = yaksyd * aylng;
560 trns3[YY] = zaksyd * aylng;
561 trns1[ZZ] = xakszd * azlng;
562 trns2[ZZ] = yakszd * azlng;
563 trns3[ZZ] = zakszd * azlng;
564 /* 24 flops */
566 T b0d[2], c0d[2];
568 for (int d = 0; d < 2; d++)
570 b0d[d] = trns1[d] * dist21[XX] + trns2[d] * dist21[YY] + trns3[d] * dist21[ZZ];
571 c0d[d] = trns1[d] * dist31[XX] + trns2[d] * dist31[YY] + trns3[d] * dist31[ZZ];
574 T a1d_z, b1d[DIM], c1d[DIM];
576 a1d_z = trns1[ZZ] * a1[XX] + trns2[ZZ] * a1[YY] + trns3[ZZ] * a1[ZZ];
577 for (int d = 0; d < DIM; d++)
579 b1d[d] = trns1[d] * b1[XX] + trns2[d] * b1[YY] + trns3[d] * b1[ZZ];
580 c1d[d] = trns1[d] * c1[XX] + trns2[d] * c1[YY] + trns3[d] * c1[ZZ];
582 /* 65 flops */
584 T tmp, tmp2;
586 T sinphi = a1d_z * gmx::invsqrt(ra*ra);
587 tmp2 = 1.0 - sinphi * sinphi;
589 /* If tmp2 gets close to or beyond zero we have severly distorted
590 * water molecules and we should terminate the simulation.
591 * Below we take the max with almost_zero to continue the loop.
593 bError = bError || (tmp2 <= almost_zero);
595 tmp2 = max(tmp2, almost_zero);
596 tmp = gmx::invsqrt(tmp2);
597 T cosphi = tmp2*tmp;
598 T sinpsi = (b1d[ZZ] - c1d[ZZ]) * irc2 * tmp;
599 tmp2 = 1.0 - sinpsi * sinpsi;
601 T cospsi = tmp2*gmx::invsqrt(tmp2);
602 /* 46 flops */
604 T a2d_y = ra * cosphi;
605 T b2d_x = -rc * cospsi;
606 T t1 = -rb * cosphi;
607 T t2 = rc * sinpsi * sinphi;
608 T b2d_y = t1 - t2;
609 T c2d_y = t1 + t2;
610 /* 7 flops */
612 /* --- Step3 al,be,ga --- */
613 T alpha = b2d_x * (b0d[XX] - c0d[XX]) + b0d[YY] * b2d_y + c0d[YY] * c2d_y;
614 T beta = b2d_x * (c0d[YY] - b0d[YY]) + b0d[XX] * b2d_y + c0d[XX] * c2d_y;
615 T gamma = b0d[XX] * b1d[YY] - b1d[XX] * b0d[YY] + c0d[XX] * c1d[YY] - c1d[XX] * c0d[YY];
616 T al2be2 = alpha * alpha + beta * beta;
617 tmp2 = (al2be2 - gamma * gamma);
618 T sinthe = (alpha * gamma - beta * tmp2*gmx::invsqrt(tmp2)) * gmx::invsqrt(al2be2*al2be2);
619 /* 47 flops */
621 /* --- Step4 A3' --- */
622 tmp2 = 1.0 - sinthe * sinthe;
623 T costhe = tmp2*gmx::invsqrt(tmp2);
625 T a3d[DIM], b3d[DIM], c3d[DIM];
627 a3d[XX] = -a2d_y * sinthe;
628 a3d[YY] = a2d_y * costhe;
629 a3d[ZZ] = a1d_z;
630 b3d[XX] = b2d_x * costhe - b2d_y * sinthe;
631 b3d[YY] = b2d_x * sinthe + b2d_y * costhe;
632 b3d[ZZ] = b1d[ZZ];
633 c3d[XX] = -b2d_x * costhe - c2d_y * sinthe;
634 c3d[YY] = -b2d_x * sinthe + c2d_y * costhe;
635 c3d[ZZ] = c1d[ZZ];
636 /* 26 flops */
638 /* --- Step5 A3 --- */
639 T a3[DIM], b3[DIM], c3[DIM];
641 a3[XX] = trns1[XX]*a3d[XX] + trns1[YY]*a3d[YY] + trns1[ZZ]*a3d[ZZ];
642 a3[YY] = trns2[XX]*a3d[XX] + trns2[YY]*a3d[YY] + trns2[ZZ]*a3d[ZZ];
643 a3[ZZ] = trns3[XX]*a3d[XX] + trns3[YY]*a3d[YY] + trns3[ZZ]*a3d[ZZ];
644 b3[XX] = trns1[XX]*b3d[XX] + trns1[YY]*b3d[YY] + trns1[ZZ]*b3d[ZZ];
645 b3[YY] = trns2[XX]*b3d[XX] + trns2[YY]*b3d[YY] + trns2[ZZ]*b3d[ZZ];
646 b3[ZZ] = trns3[XX]*b3d[XX] + trns3[YY]*b3d[YY] + trns3[ZZ]*b3d[ZZ];
647 c3[XX] = trns1[XX]*c3d[XX] + trns1[YY]*c3d[YY] + trns1[ZZ]*c3d[ZZ];
648 c3[YY] = trns2[XX]*c3d[XX] + trns2[YY]*c3d[YY] + trns2[ZZ]*c3d[ZZ];
649 c3[ZZ] = trns3[XX]*c3d[XX] + trns3[YY]*c3d[YY] + trns3[ZZ]*c3d[ZZ];
650 /* 45 flops */
652 /* Compute and store the corrected new coordinate */
653 for (int d = 0; d < DIM; d++)
655 xprime_ow1[d] = com[d] + a3[d];
657 for (int d = 0; d < DIM; d++)
659 xprime_hw2[d] = com[d] + b3[d] + sh_hw2[d];;
661 for (int d = 0; d < DIM; d++)
663 xprime_hw3[d] = com[d] + c3[d] + sh_hw3[d];
665 /* 9 flops + 6 pbc flops */
667 transposeScatterStoreU<3>(xprime, ow1, xprime_ow1[XX], xprime_ow1[YY], xprime_ow1[ZZ]);
668 transposeScatterStoreU<3>(xprime, hw2, xprime_hw2[XX], xprime_hw2[YY], xprime_hw2[ZZ]);
669 transposeScatterStoreU<3>(xprime, hw3, xprime_hw3[XX], xprime_hw3[YY], xprime_hw3[ZZ]);
671 // cppcheck-suppress duplicateExpression
672 if (bCorrectVelocity || bCalcVirial)
674 T da[DIM], db[DIM], dc[DIM];
675 for (int d = 0; d < DIM; d++)
677 da[d] = a3[d] - a1[d];
679 for (int d = 0; d < DIM; d++)
681 db[d] = b3[d] - b1[d];
683 for (int d = 0; d < DIM; d++)
685 dc[d] = c3[d] - c1[d];
687 /* 9 flops */
689 if (bCorrectVelocity)
691 T v_ow1[DIM], v_hw2[DIM], v_hw3[DIM];
693 gatherLoadUTranspose<3>(v, ow1, &v_ow1[XX], &v_ow1[YY], &v_ow1[ZZ]);
694 gatherLoadUTranspose<3>(v, hw2, &v_hw2[XX], &v_hw2[YY], &v_hw2[ZZ]);
695 gatherLoadUTranspose<3>(v, hw3, &v_hw3[XX], &v_hw3[YY], &v_hw3[ZZ]);
697 /* Add the position correction divided by dt to the velocity */
698 for (int d = 0; d < DIM; d++)
700 v_ow1[d] = gmx::fma(da[d], invdt, v_ow1[d]);
702 for (int d = 0; d < DIM; d++)
704 v_hw2[d] = gmx::fma(db[d], invdt, v_hw2[d]);
706 for (int d = 0; d < DIM; d++)
708 v_hw3[d] = gmx::fma(dc[d], invdt, v_hw3[d]);
710 /* 3*6 flops */
712 transposeScatterStoreU<3>(v, ow1, v_ow1[XX], v_ow1[YY], v_ow1[ZZ]);
713 transposeScatterStoreU<3>(v, hw2, v_hw2[XX], v_hw2[YY], v_hw2[ZZ]);
714 transposeScatterStoreU<3>(v, hw3, v_hw3[XX], v_hw3[YY], v_hw3[ZZ]);
717 if (bCalcVirial)
719 /* Filter out the non-local settles */
720 T filter = load(settled->virfac + i);
721 T mOf = filter*mO;
722 T mHf = filter*mH;
724 T mdo[DIM], mdb[DIM], mdc[DIM];
726 for (int d = 0; d < DIM; d++)
728 mdb[d] = mHf*db[d];
729 mdc[d] = mHf*dc[d];
730 mdo[d] = mOf*da[d] + mdb[d] + mdc[d];
733 for (int d2 = 0; d2 < DIM; d2++)
735 for (int d = 0; d < DIM; d++)
737 sum_r_m_dr[d2][d] = sum_r_m_dr[d2][d] -
738 (x_ow1[d2]*mdo[d] +
739 dist21[d2]*mdb[d] +
740 dist31[d2]*mdc[d]);
743 /* 71 flops */
748 if (bCalcVirial)
750 for (int d2 = 0; d2 < DIM; d2++)
752 for (int d = 0; d < DIM; d++)
754 vir_r_m_dr[d2][d] += reduce(sum_r_m_dr[d2][d]);
759 *bErrorHasOccurred = anyTrue(bError);
762 /* Wrapper template function that divides the settles over threads
763 * and instantiates the core template with instantiated booleans.
765 template<typename T, typename TypeBool, int packSize, typename TypePbc>
766 static void settleTemplateWrapper(gmx_settledata_t settled,
767 int nthread, int thread,
768 TypePbc pbc,
769 const real x[], real xprime[],
770 real invdt, real *v,
771 bool bCalcVirial, tensor vir_r_m_dr,
772 bool *bErrorHasOccurred)
774 /* We need to assign settles to threads in groups of pack_size */
775 int numSettlePacks = (settled->nsettle + packSize - 1)/packSize;
776 /* Round the end value up to give thread 0 more work */
777 int settleStart = ((numSettlePacks* thread + nthread - 1)/nthread)*packSize;
778 int settleEnd = ((numSettlePacks*(thread + 1) + nthread - 1)/nthread)*packSize;
780 if (v != nullptr)
782 if (!bCalcVirial)
784 settleTemplate<T, TypeBool, packSize,
785 TypePbc,
786 true,
787 false>
788 (settled, settleStart, settleEnd,
789 pbc,
790 x, xprime,
791 invdt, v,
792 nullptr,
793 bErrorHasOccurred);
795 else
797 settleTemplate<T, TypeBool, packSize,
798 TypePbc,
799 true,
800 true>
801 (settled, settleStart, settleEnd,
802 pbc,
803 x, xprime,
804 invdt, v,
805 vir_r_m_dr,
806 bErrorHasOccurred);
809 else
811 if (!bCalcVirial)
813 settleTemplate<T, TypeBool, packSize,
814 TypePbc,
815 false,
816 false>
817 (settled, settleStart, settleEnd,
818 pbc,
819 x, xprime,
820 invdt, v,
821 nullptr,
822 bErrorHasOccurred);
824 else
826 settleTemplate<T, TypeBool, packSize,
827 TypePbc,
828 false,
829 true>
830 (settled, settleStart, settleEnd,
831 pbc,
832 x, xprime,
833 invdt, v,
834 vir_r_m_dr,
835 bErrorHasOccurred);
840 void csettle(gmx_settledata_t settled,
841 int nthread, int thread,
842 const t_pbc *pbc,
843 const real x[], real xprime[],
844 real invdt, real *v,
845 bool bCalcVirial, tensor vir_r_m_dr,
846 bool *bErrorHasOccurred)
848 #if GMX_SIMD_HAVE_REAL
849 if (settled->bUseSimd)
851 /* Convert the pbc struct for SIMD */
852 GMX_ALIGNED(real, GMX_SIMD_REAL_WIDTH) pbcSimd[9*GMX_SIMD_REAL_WIDTH];
853 set_pbc_simd(pbc, pbcSimd);
855 settleTemplateWrapper<SimdReal, SimdBool, GMX_SIMD_REAL_WIDTH,
856 const real *>(settled,
857 nthread, thread,
858 pbcSimd,
859 x, xprime,
860 invdt,
862 bCalcVirial, vir_r_m_dr,
863 bErrorHasOccurred);
865 else
866 #endif
868 /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
869 t_pbc pbcNo;
870 const t_pbc *pbcNonNull;
872 if (pbc != nullptr)
874 pbcNonNull = pbc;
876 else
878 set_pbc(&pbcNo, epbcNONE, nullptr);
879 pbcNonNull = &pbcNo;
882 settleTemplateWrapper<real, bool, 1,
883 const t_pbc *>(settled,
884 nthread, thread,
885 pbcNonNull,
886 x, xprime,
887 invdt,
889 bCalcVirial, vir_r_m_dr,
890 bErrorHasOccurred);