2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2017, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
45 #include "gromacs/math/functions.h"
46 #include "gromacs/math/invertmatrix.h"
47 #include "gromacs/math/vec.h"
48 #include "gromacs/mdlib/constr.h"
49 #include "gromacs/mdtypes/mdatom.h"
50 #include "gromacs/pbcutil/ishift.h"
51 #include "gromacs/pbcutil/pbc.h"
52 #include "gromacs/pbcutil/pbc-simd.h"
53 #include "gromacs/simd/simd.h"
54 #include "gromacs/simd/simd_math.h"
55 #include "gromacs/topology/mtop_util.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/gmxassert.h"
58 #include "gromacs/utility/smalloc.h"
60 using namespace gmx
; // TODO: Remove when this file is moved into gmx namespace
81 typedef struct gmx_settledata
83 settleparam_t massw
; /* Parameters for SETTLE for coordinates */
84 settleparam_t mass1
; /* Parameters with all masses 1, for forces */
86 int nsettle
; /* The number of settles on our rank */
87 int *ow1
; /* Index to OW1 atoms, size nsettle + SIMD padding */
88 int *hw2
; /* Index to HW2 atoms, size nsettle + SIMD padding */
89 int *hw3
; /* Index to HW3 atoms, size nsettle + SIMD padding */
90 real
*virfac
; /* Virial factor 0 or 1, size nsettle + SIMD pad. */
91 int nalloc
; /* Allocation size of ow1, hw2, hw3, virfac */
93 bool bUseSimd
; /* Use SIMD intrinsics code, if possible */
97 static void init_proj_matrix(real invmO
, real invmH
, real dOH
, real dHH
,
98 matrix inverseCouplingMatrix
)
100 /* We normalize the inverse masses with invmO for the matrix inversion.
101 * so we can keep using masses of almost zero for frozen particles,
102 * without running out of the float range in invertMatrix.
104 double invmORelative
= 1.0;
105 double invmHRelative
= invmH
/static_cast<double>(invmO
);
106 double distanceRatio
= dHH
/static_cast<double>(dOH
);
108 /* Construct the constraint coupling matrix */
110 mat
[0][0] = invmORelative
+ invmHRelative
;
111 mat
[0][1] = invmORelative
*(1.0 - 0.5*gmx::square(distanceRatio
));
112 mat
[0][2] = invmHRelative
*0.5*distanceRatio
;
113 mat
[1][1] = mat
[0][0];
114 mat
[1][2] = mat
[0][2];
115 mat
[2][2] = invmHRelative
+ invmHRelative
;
116 mat
[1][0] = mat
[0][1];
117 mat
[2][0] = mat
[0][2];
118 mat
[2][1] = mat
[1][2];
120 invertMatrix(mat
, inverseCouplingMatrix
);
122 msmul(inverseCouplingMatrix
, 1/invmO
, inverseCouplingMatrix
);
125 static void settleparam_init(settleparam_t
*p
,
126 real mO
, real mH
, real invmO
, real invmH
,
129 /* We calculate parameters in double precision to minimize errors.
130 * The velocity correction applied during SETTLE coordinate constraining
131 * introduces a systematic error of approximately 1 bit per atom,
132 * depending on what the compiler does with the code.
143 double ra
= 2.0*mH
*std::sqrt(dOH
*dOH
- rc
*rc
)/wohh
;
144 p
->rb
= std::sqrt(dOH
*dOH
- rc
*rc
) - ra
;
149 /* For projection: inverse masses and coupling matrix inversion */
156 init_proj_matrix(invmO
, invmH
, dOH
, dHH
, p
->invmat
);
160 fprintf(debug
, "wh =%g, rc = %g, ra = %g\n",
161 p
->wh
, p
->rc
, p
->ra
);
162 fprintf(debug
, "rb = %g, irc2 = %g, dHH = %g, dOH = %g\n",
163 p
->rb
, p
->irc2
, p
->dHH
, p
->dOH
);
167 gmx_settledata_t
settle_init(const gmx_mtop_t
*mtop
)
169 /* Check that we have only one settle type */
170 int settle_type
= -1;
171 gmx_mtop_ilistloop_t iloop
= gmx_mtop_ilistloop_init(mtop
);
174 const int nral1
= 1 + NRAL(F_SETTLE
);
175 while (gmx_mtop_ilistloop_next(iloop
, &ilist
, &nmol
))
177 for (int i
= 0; i
< ilist
[F_SETTLE
].nr
; i
+= nral1
)
179 if (settle_type
== -1)
181 settle_type
= ilist
[F_SETTLE
].iatoms
[i
];
183 else if (ilist
[F_SETTLE
].iatoms
[i
] != settle_type
)
186 "The [molecules] section of your topology specifies more than one block of\n"
187 "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
188 "If you are trying to partition your solvent into different *groups*\n"
189 "(e.g. for freezing, T-coupling, etc.), you are using the wrong approach. Index\n"
190 "files specify groups. Otherwise, you may wish to change the least-used\n"
191 "block of molecules with SETTLE constraints into 3 normal constraints.");
195 GMX_RELEASE_ASSERT(settle_type
>= 0, "settle_init called without settles");
197 gmx_settledata_t settled
;
201 /* We will not initialize the normal SETTLE parameters here yet,
202 * since the atom (inv)masses can depend on the integrator and
203 * free-energy perturbation. We set mO=-1 to trigger later initialization.
205 settled
->massw
.mO
= -1;
207 real dOH
= mtop
->ffparams
.iparams
[settle_type
].settle
.doh
;
208 real dHH
= mtop
->ffparams
.iparams
[settle_type
].settle
.dhh
;
209 settleparam_init(&settled
->mass1
, 1.0, 1.0, 1.0, 1.0, dOH
, dHH
);
211 settled
->ow1
= nullptr;
212 settled
->hw2
= nullptr;
213 settled
->hw3
= nullptr;
214 settled
->virfac
= nullptr;
217 /* Without SIMD configured, this bool is not used */
218 settled
->bUseSimd
= (getenv("GMX_DISABLE_SIMD_KERNELS") == nullptr);
223 void settle_free(gmx_settledata_t settled
)
225 sfree_aligned(settled
->ow1
);
226 sfree_aligned(settled
->hw2
);
227 sfree_aligned(settled
->hw3
);
228 sfree_aligned(settled
->virfac
);
232 void settle_set_constraints(gmx_settledata_t settled
,
233 const t_ilist
*il_settle
,
234 const t_mdatoms
*mdatoms
)
236 #if GMX_SIMD_HAVE_REAL
237 const int pack_size
= GMX_SIMD_REAL_WIDTH
;
239 const int pack_size
= 1;
242 const int nral1
= 1 + NRAL(F_SETTLE
);
243 int nsettle
= il_settle
->nr
/nral1
;
244 settled
->nsettle
= nsettle
;
248 const t_iatom
*iatoms
= il_settle
->iatoms
;
250 /* Here we initialize the normal SETTLE parameters */
251 if (settled
->massw
.mO
< 0)
253 int firstO
= iatoms
[1];
254 int firstH
= iatoms
[2];
255 settleparam_init(&settled
->massw
,
256 mdatoms
->massT
[firstO
],
257 mdatoms
->massT
[firstH
],
258 mdatoms
->invmass
[firstO
],
259 mdatoms
->invmass
[firstH
],
264 if (nsettle
+ pack_size
> settled
->nalloc
)
266 settled
->nalloc
= over_alloc_dd(nsettle
+ pack_size
);
267 sfree_aligned(settled
->ow1
);
268 sfree_aligned(settled
->hw2
);
269 sfree_aligned(settled
->hw3
);
270 sfree_aligned(settled
->virfac
);
271 snew_aligned(settled
->ow1
, settled
->nalloc
, 64);
272 snew_aligned(settled
->hw2
, settled
->nalloc
, 64);
273 snew_aligned(settled
->hw3
, settled
->nalloc
, 64);
274 snew_aligned(settled
->virfac
, settled
->nalloc
, 64);
277 for (int i
= 0; i
< nsettle
; i
++)
279 settled
->ow1
[i
] = iatoms
[i
*nral1
+ 1];
280 settled
->hw2
[i
] = iatoms
[i
*nral1
+ 2];
281 settled
->hw3
[i
] = iatoms
[i
*nral1
+ 3];
282 /* We should avoid double counting of virial contributions for
283 * SETTLEs that appear in multiple DD domains, so we only count
284 * the contribution on the home range of the oxygen atom.
286 settled
->virfac
[i
] = (iatoms
[i
*nral1
+ 1] < mdatoms
->homenr
? 1 : 0);
289 /* Pack the index array to the full SIMD width with copies from
290 * the last normal entry, but with no virial contribution.
292 int end_packed
= ((nsettle
+ pack_size
- 1)/pack_size
)*pack_size
;
293 for (int i
= nsettle
; i
< end_packed
; i
++)
295 settled
->ow1
[i
] = settled
->ow1
[nsettle
- 1];
296 settled
->hw2
[i
] = settled
->hw2
[nsettle
- 1];
297 settled
->hw3
[i
] = settled
->hw3
[nsettle
- 1];
298 settled
->virfac
[i
] = 0;
303 void settle_proj(gmx_settledata_t settled
, int econq
,
304 int nsettle
, t_iatom iatoms
[],
307 rvec
*der
, rvec
*derp
,
308 int calcvir_atom_end
, tensor vir_r_m_dder
)
310 /* Settle for projection out constraint components
311 * of derivatives of the coordinates.
312 * Berk Hess 2008-1-10
316 real imO
, imH
, dOH
, dHH
, invdOH
, invdHH
;
318 int i
, m
, m2
, ow1
, hw2
, hw3
;
319 rvec roh2
, roh3
, rhh
, dc
, fc
;
321 calcvir_atom_end
*= DIM
;
323 if (econq
== econqForce
)
333 copy_mat(p
->invmat
, invmat
);
343 const int nral1
= 1 + NRAL(F_SETTLE
);
345 for (i
= 0; i
< nsettle
; i
++)
347 ow1
= iatoms
[i
*nral1
+ 1];
348 hw2
= iatoms
[i
*nral1
+ 2];
349 hw3
= iatoms
[i
*nral1
+ 3];
353 rvec_sub(x
[ow1
], x
[hw2
], roh2
);
354 rvec_sub(x
[ow1
], x
[hw3
], roh3
);
355 rvec_sub(x
[hw2
], x
[hw3
], rhh
);
359 pbc_dx_aiuc(pbc
, x
[ow1
], x
[hw2
], roh2
);
360 pbc_dx_aiuc(pbc
, x
[ow1
], x
[hw3
], roh3
);
361 pbc_dx_aiuc(pbc
, x
[hw2
], x
[hw3
], rhh
);
363 svmul(invdOH
, roh2
, roh2
);
364 svmul(invdOH
, roh3
, roh3
);
365 svmul(invdHH
, rhh
, rhh
);
368 /* Determine the projections of der on the bonds */
370 for (m
= 0; m
< DIM
; m
++)
372 dc
[0] += (der
[ow1
][m
] - der
[hw2
][m
])*roh2
[m
];
373 dc
[1] += (der
[ow1
][m
] - der
[hw3
][m
])*roh3
[m
];
374 dc
[2] += (der
[hw2
][m
] - der
[hw3
][m
])*rhh
[m
];
378 /* Determine the correction for the three bonds */
379 mvmul(invmat
, dc
, fc
);
382 /* Subtract the corrections from derp */
383 for (m
= 0; m
< DIM
; m
++)
385 derp
[ow1
][m
] -= imO
*( fc
[0]*roh2
[m
] + fc
[1]*roh3
[m
]);
386 derp
[hw2
][m
] -= imH
*(-fc
[0]*roh2
[m
] + fc
[2]*rhh
[m
]);
387 derp
[hw3
][m
] -= imH
*(-fc
[1]*roh3
[m
] - fc
[2]*rhh
[m
]);
392 if (ow1
< calcvir_atom_end
)
394 /* Determining r \dot m der is easy,
395 * since fc contains the mass weighted corrections for der.
398 for (m
= 0; m
< DIM
; m
++)
400 for (m2
= 0; m2
< DIM
; m2
++)
402 vir_r_m_dder
[m
][m2
] +=
403 dOH
*roh2
[m
]*roh2
[m2
]*fc
[0] +
404 dOH
*roh3
[m
]*roh3
[m2
]*fc
[1] +
405 dHH
*rhh
[m
]*rhh
[m2
]*fc
[2];
413 /* The actual settle code, templated for real/SimdReal and for optimization */
414 template<typename T
, typename TypeBool
, int packSize
,
416 bool bCorrectVelocity
,
418 static void settleTemplate(const gmx_settledata_t settled
,
419 int settleStart
, int settleEnd
,
421 const real
*x
, real
*xprime
,
422 real invdt
, real
* gmx_restrict v
,
424 bool *bErrorHasOccurred
)
426 /* ******************************************************************* */
428 /* Original code by Shuichi Miyamoto, last update Oct. 1, 1992 ** */
430 /* Algorithm changes by Berk Hess: ** */
431 /* 2004-07-15 Convert COM to double precision to avoid drift ** */
432 /* 2006-10-16 Changed velocity update to use differences ** */
433 /* 2012-09-24 Use oxygen as reference instead of COM ** */
434 /* 2016-02 Complete rewrite of the code for SIMD ** */
436 /* Reference for the SETTLE algorithm ** */
437 /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */
439 /* ******************************************************************* */
441 assert(settleStart
% packSize
== 0);
442 assert(settleEnd
% packSize
== 0);
444 TypeBool bError
= TypeBool(false);
446 settleparam_t
*p
= &settled
->massw
;
455 T almost_zero
= T(1e-12);
457 T sum_r_m_dr
[DIM
][DIM
];
461 for (int d2
= 0; d2
< DIM
; d2
++)
463 for (int d
= 0; d
< DIM
; d
++)
465 sum_r_m_dr
[d2
][d
] = T(0);
470 for (int i
= settleStart
; i
< settleEnd
; i
+= packSize
)
472 /* Here we pad up to packSize with copies from the last valid entry.
473 * This gives correct results, since we store (not increment) all
474 * output, so we store the same output multiple times.
476 const int *ow1
= settled
->ow1
+ i
;
477 const int *hw2
= settled
->hw2
+ i
;
478 const int *hw3
= settled
->hw3
+ i
;
480 T x_ow1
[DIM
], x_hw2
[DIM
], x_hw3
[DIM
];
482 gatherLoadUTranspose
<3>(x
, ow1
, &x_ow1
[XX
], &x_ow1
[YY
], &x_ow1
[ZZ
]);
483 gatherLoadUTranspose
<3>(x
, hw2
, &x_hw2
[XX
], &x_hw2
[YY
], &x_hw2
[ZZ
]);
484 gatherLoadUTranspose
<3>(x
, hw3
, &x_hw3
[XX
], &x_hw3
[YY
], &x_hw3
[ZZ
]);
486 T xprime_ow1
[DIM
], xprime_hw2
[DIM
], xprime_hw3
[DIM
];
488 gatherLoadUTranspose
<3>(xprime
, ow1
, &xprime_ow1
[XX
], &xprime_ow1
[YY
], &xprime_ow1
[ZZ
]);
489 gatherLoadUTranspose
<3>(xprime
, hw2
, &xprime_hw2
[XX
], &xprime_hw2
[YY
], &xprime_hw2
[ZZ
]);
490 gatherLoadUTranspose
<3>(xprime
, hw3
, &xprime_hw3
[XX
], &xprime_hw3
[YY
], &xprime_hw3
[ZZ
]);
492 T dist21
[DIM
], dist31
[DIM
];
493 T doh2
[DIM
], doh3
[DIM
];
494 T sh_hw2
[DIM
], sh_hw3
[DIM
];
496 pbc_dx_aiuc(pbc
, x_hw2
, x_ow1
, dist21
);
498 pbc_dx_aiuc(pbc
, x_hw3
, x_ow1
, dist31
);
500 /* Tedious way of doing pbc */
501 pbc_dx_aiuc(pbc
, xprime_hw2
, xprime_ow1
, doh2
);
502 for (int d
= 0; d
< DIM
; d
++)
504 sh_hw2
[d
] = xprime_hw2
[d
] - (xprime_ow1
[d
] + doh2
[d
]);
505 xprime_hw2
[d
] = xprime_hw2
[d
] - sh_hw2
[d
];
507 pbc_dx_aiuc(pbc
, xprime_hw3
, xprime_ow1
, doh3
);
508 for (int d
= 0; d
< DIM
; d
++)
510 sh_hw3
[d
] = xprime_hw3
[d
] - (xprime_ow1
[d
] + doh3
[d
]);
511 xprime_hw3
[d
] = xprime_hw3
[d
] - sh_hw3
[d
];
514 /* Not calculating the center of mass using the oxygen position
515 * and the O-H distances, as done below, will make SETTLE
516 * the largest source of energy drift for simulations of water,
517 * as then the oxygen coordinate is multiplied by 0.89 at every step,
518 * which can then transfer a systematic rounding to the oxygen velocity.
521 for (int d
= 0; d
< DIM
; d
++)
523 a1
[d
] = -(doh2
[d
] + doh3
[d
]) * wh
;
524 com
[d
] = xprime_ow1
[d
] - a1
[d
];
527 for (int d
= 0; d
< DIM
; d
++)
529 b1
[d
] = xprime_hw2
[d
] - com
[d
];
532 for (int d
= 0; d
< DIM
; d
++)
534 c1
[d
] = xprime_hw3
[d
] - com
[d
];
538 T xakszd
= dist21
[YY
] * dist31
[ZZ
] - dist21
[ZZ
] * dist31
[YY
];
539 T yakszd
= dist21
[ZZ
] * dist31
[XX
] - dist21
[XX
] * dist31
[ZZ
];
540 T zakszd
= dist21
[XX
] * dist31
[YY
] - dist21
[YY
] * dist31
[XX
];
541 T xaksxd
= a1
[YY
] * zakszd
- a1
[ZZ
] * yakszd
;
542 T yaksxd
= a1
[ZZ
] * xakszd
- a1
[XX
] * zakszd
;
543 T zaksxd
= a1
[XX
] * yakszd
- a1
[YY
] * xakszd
;
544 T xaksyd
= yakszd
* zaksxd
- zakszd
* yaksxd
;
545 T yaksyd
= zakszd
* xaksxd
- xakszd
* zaksxd
;
546 T zaksyd
= xakszd
* yaksxd
- yakszd
* xaksxd
;
549 T axlng
= gmx::invsqrt(xaksxd
* xaksxd
+ yaksxd
* yaksxd
+ zaksxd
* zaksxd
);
550 T aylng
= gmx::invsqrt(xaksyd
* xaksyd
+ yaksyd
* yaksyd
+ zaksyd
* zaksyd
);
551 T azlng
= gmx::invsqrt(xakszd
* xakszd
+ yakszd
* yakszd
+ zakszd
* zakszd
);
553 T trns1
[DIM
], trns2
[DIM
], trns3
[DIM
];
555 trns1
[XX
] = xaksxd
* axlng
;
556 trns2
[XX
] = yaksxd
* axlng
;
557 trns3
[XX
] = zaksxd
* axlng
;
558 trns1
[YY
] = xaksyd
* aylng
;
559 trns2
[YY
] = yaksyd
* aylng
;
560 trns3
[YY
] = zaksyd
* aylng
;
561 trns1
[ZZ
] = xakszd
* azlng
;
562 trns2
[ZZ
] = yakszd
* azlng
;
563 trns3
[ZZ
] = zakszd
* azlng
;
568 for (int d
= 0; d
< 2; d
++)
570 b0d
[d
] = trns1
[d
] * dist21
[XX
] + trns2
[d
] * dist21
[YY
] + trns3
[d
] * dist21
[ZZ
];
571 c0d
[d
] = trns1
[d
] * dist31
[XX
] + trns2
[d
] * dist31
[YY
] + trns3
[d
] * dist31
[ZZ
];
574 T a1d_z
, b1d
[DIM
], c1d
[DIM
];
576 a1d_z
= trns1
[ZZ
] * a1
[XX
] + trns2
[ZZ
] * a1
[YY
] + trns3
[ZZ
] * a1
[ZZ
];
577 for (int d
= 0; d
< DIM
; d
++)
579 b1d
[d
] = trns1
[d
] * b1
[XX
] + trns2
[d
] * b1
[YY
] + trns3
[d
] * b1
[ZZ
];
580 c1d
[d
] = trns1
[d
] * c1
[XX
] + trns2
[d
] * c1
[YY
] + trns3
[d
] * c1
[ZZ
];
586 T sinphi
= a1d_z
* gmx::invsqrt(ra
*ra
);
587 tmp2
= 1.0 - sinphi
* sinphi
;
589 /* If tmp2 gets close to or beyond zero we have severly distorted
590 * water molecules and we should terminate the simulation.
591 * Below we take the max with almost_zero to continue the loop.
593 bError
= bError
|| (tmp2
<= almost_zero
);
595 tmp2
= max(tmp2
, almost_zero
);
596 tmp
= gmx::invsqrt(tmp2
);
598 T sinpsi
= (b1d
[ZZ
] - c1d
[ZZ
]) * irc2
* tmp
;
599 tmp2
= 1.0 - sinpsi
* sinpsi
;
601 T cospsi
= tmp2
*gmx::invsqrt(tmp2
);
604 T a2d_y
= ra
* cosphi
;
605 T b2d_x
= -rc
* cospsi
;
607 T t2
= rc
* sinpsi
* sinphi
;
612 /* --- Step3 al,be,ga --- */
613 T alpha
= b2d_x
* (b0d
[XX
] - c0d
[XX
]) + b0d
[YY
] * b2d_y
+ c0d
[YY
] * c2d_y
;
614 T beta
= b2d_x
* (c0d
[YY
] - b0d
[YY
]) + b0d
[XX
] * b2d_y
+ c0d
[XX
] * c2d_y
;
615 T gamma
= b0d
[XX
] * b1d
[YY
] - b1d
[XX
] * b0d
[YY
] + c0d
[XX
] * c1d
[YY
] - c1d
[XX
] * c0d
[YY
];
616 T al2be2
= alpha
* alpha
+ beta
* beta
;
617 tmp2
= (al2be2
- gamma
* gamma
);
618 T sinthe
= (alpha
* gamma
- beta
* tmp2
*gmx::invsqrt(tmp2
)) * gmx::invsqrt(al2be2
*al2be2
);
621 /* --- Step4 A3' --- */
622 tmp2
= 1.0 - sinthe
* sinthe
;
623 T costhe
= tmp2
*gmx::invsqrt(tmp2
);
625 T a3d
[DIM
], b3d
[DIM
], c3d
[DIM
];
627 a3d
[XX
] = -a2d_y
* sinthe
;
628 a3d
[YY
] = a2d_y
* costhe
;
630 b3d
[XX
] = b2d_x
* costhe
- b2d_y
* sinthe
;
631 b3d
[YY
] = b2d_x
* sinthe
+ b2d_y
* costhe
;
633 c3d
[XX
] = -b2d_x
* costhe
- c2d_y
* sinthe
;
634 c3d
[YY
] = -b2d_x
* sinthe
+ c2d_y
* costhe
;
638 /* --- Step5 A3 --- */
639 T a3
[DIM
], b3
[DIM
], c3
[DIM
];
641 a3
[XX
] = trns1
[XX
]*a3d
[XX
] + trns1
[YY
]*a3d
[YY
] + trns1
[ZZ
]*a3d
[ZZ
];
642 a3
[YY
] = trns2
[XX
]*a3d
[XX
] + trns2
[YY
]*a3d
[YY
] + trns2
[ZZ
]*a3d
[ZZ
];
643 a3
[ZZ
] = trns3
[XX
]*a3d
[XX
] + trns3
[YY
]*a3d
[YY
] + trns3
[ZZ
]*a3d
[ZZ
];
644 b3
[XX
] = trns1
[XX
]*b3d
[XX
] + trns1
[YY
]*b3d
[YY
] + trns1
[ZZ
]*b3d
[ZZ
];
645 b3
[YY
] = trns2
[XX
]*b3d
[XX
] + trns2
[YY
]*b3d
[YY
] + trns2
[ZZ
]*b3d
[ZZ
];
646 b3
[ZZ
] = trns3
[XX
]*b3d
[XX
] + trns3
[YY
]*b3d
[YY
] + trns3
[ZZ
]*b3d
[ZZ
];
647 c3
[XX
] = trns1
[XX
]*c3d
[XX
] + trns1
[YY
]*c3d
[YY
] + trns1
[ZZ
]*c3d
[ZZ
];
648 c3
[YY
] = trns2
[XX
]*c3d
[XX
] + trns2
[YY
]*c3d
[YY
] + trns2
[ZZ
]*c3d
[ZZ
];
649 c3
[ZZ
] = trns3
[XX
]*c3d
[XX
] + trns3
[YY
]*c3d
[YY
] + trns3
[ZZ
]*c3d
[ZZ
];
652 /* Compute and store the corrected new coordinate */
653 for (int d
= 0; d
< DIM
; d
++)
655 xprime_ow1
[d
] = com
[d
] + a3
[d
];
657 for (int d
= 0; d
< DIM
; d
++)
659 xprime_hw2
[d
] = com
[d
] + b3
[d
] + sh_hw2
[d
];;
661 for (int d
= 0; d
< DIM
; d
++)
663 xprime_hw3
[d
] = com
[d
] + c3
[d
] + sh_hw3
[d
];
665 /* 9 flops + 6 pbc flops */
667 transposeScatterStoreU
<3>(xprime
, ow1
, xprime_ow1
[XX
], xprime_ow1
[YY
], xprime_ow1
[ZZ
]);
668 transposeScatterStoreU
<3>(xprime
, hw2
, xprime_hw2
[XX
], xprime_hw2
[YY
], xprime_hw2
[ZZ
]);
669 transposeScatterStoreU
<3>(xprime
, hw3
, xprime_hw3
[XX
], xprime_hw3
[YY
], xprime_hw3
[ZZ
]);
671 // cppcheck-suppress duplicateExpression
672 if (bCorrectVelocity
|| bCalcVirial
)
674 T da
[DIM
], db
[DIM
], dc
[DIM
];
675 for (int d
= 0; d
< DIM
; d
++)
677 da
[d
] = a3
[d
] - a1
[d
];
679 for (int d
= 0; d
< DIM
; d
++)
681 db
[d
] = b3
[d
] - b1
[d
];
683 for (int d
= 0; d
< DIM
; d
++)
685 dc
[d
] = c3
[d
] - c1
[d
];
689 if (bCorrectVelocity
)
691 T v_ow1
[DIM
], v_hw2
[DIM
], v_hw3
[DIM
];
693 gatherLoadUTranspose
<3>(v
, ow1
, &v_ow1
[XX
], &v_ow1
[YY
], &v_ow1
[ZZ
]);
694 gatherLoadUTranspose
<3>(v
, hw2
, &v_hw2
[XX
], &v_hw2
[YY
], &v_hw2
[ZZ
]);
695 gatherLoadUTranspose
<3>(v
, hw3
, &v_hw3
[XX
], &v_hw3
[YY
], &v_hw3
[ZZ
]);
697 /* Add the position correction divided by dt to the velocity */
698 for (int d
= 0; d
< DIM
; d
++)
700 v_ow1
[d
] = gmx::fma(da
[d
], invdt
, v_ow1
[d
]);
702 for (int d
= 0; d
< DIM
; d
++)
704 v_hw2
[d
] = gmx::fma(db
[d
], invdt
, v_hw2
[d
]);
706 for (int d
= 0; d
< DIM
; d
++)
708 v_hw3
[d
] = gmx::fma(dc
[d
], invdt
, v_hw3
[d
]);
712 transposeScatterStoreU
<3>(v
, ow1
, v_ow1
[XX
], v_ow1
[YY
], v_ow1
[ZZ
]);
713 transposeScatterStoreU
<3>(v
, hw2
, v_hw2
[XX
], v_hw2
[YY
], v_hw2
[ZZ
]);
714 transposeScatterStoreU
<3>(v
, hw3
, v_hw3
[XX
], v_hw3
[YY
], v_hw3
[ZZ
]);
719 /* Filter out the non-local settles */
720 T filter
= load(settled
->virfac
+ i
);
724 T mdo
[DIM
], mdb
[DIM
], mdc
[DIM
];
726 for (int d
= 0; d
< DIM
; d
++)
730 mdo
[d
] = mOf
*da
[d
] + mdb
[d
] + mdc
[d
];
733 for (int d2
= 0; d2
< DIM
; d2
++)
735 for (int d
= 0; d
< DIM
; d
++)
737 sum_r_m_dr
[d2
][d
] = sum_r_m_dr
[d2
][d
] -
750 for (int d2
= 0; d2
< DIM
; d2
++)
752 for (int d
= 0; d
< DIM
; d
++)
754 vir_r_m_dr
[d2
][d
] += reduce(sum_r_m_dr
[d2
][d
]);
759 *bErrorHasOccurred
= anyTrue(bError
);
762 /* Wrapper template function that divides the settles over threads
763 * and instantiates the core template with instantiated booleans.
765 template<typename T
, typename TypeBool
, int packSize
, typename TypePbc
>
766 static void settleTemplateWrapper(gmx_settledata_t settled
,
767 int nthread
, int thread
,
769 const real x
[], real xprime
[],
771 bool bCalcVirial
, tensor vir_r_m_dr
,
772 bool *bErrorHasOccurred
)
774 /* We need to assign settles to threads in groups of pack_size */
775 int numSettlePacks
= (settled
->nsettle
+ packSize
- 1)/packSize
;
776 /* Round the end value up to give thread 0 more work */
777 int settleStart
= ((numSettlePacks
* thread
+ nthread
- 1)/nthread
)*packSize
;
778 int settleEnd
= ((numSettlePacks
*(thread
+ 1) + nthread
- 1)/nthread
)*packSize
;
784 settleTemplate
<T
, TypeBool
, packSize
,
788 (settled
, settleStart
, settleEnd
,
797 settleTemplate
<T
, TypeBool
, packSize
,
801 (settled
, settleStart
, settleEnd
,
813 settleTemplate
<T
, TypeBool
, packSize
,
817 (settled
, settleStart
, settleEnd
,
826 settleTemplate
<T
, TypeBool
, packSize
,
830 (settled
, settleStart
, settleEnd
,
840 void csettle(gmx_settledata_t settled
,
841 int nthread
, int thread
,
843 const real x
[], real xprime
[],
845 bool bCalcVirial
, tensor vir_r_m_dr
,
846 bool *bErrorHasOccurred
)
848 #if GMX_SIMD_HAVE_REAL
849 if (settled
->bUseSimd
)
851 /* Convert the pbc struct for SIMD */
852 GMX_ALIGNED(real
, GMX_SIMD_REAL_WIDTH
) pbcSimd
[9*GMX_SIMD_REAL_WIDTH
];
853 set_pbc_simd(pbc
, pbcSimd
);
855 settleTemplateWrapper
<SimdReal
, SimdBool
, GMX_SIMD_REAL_WIDTH
,
856 const real
*>(settled
,
862 bCalcVirial
, vir_r_m_dr
,
868 /* This construct is needed because pbc_dx_aiuc doesn't accept pbc=NULL */
870 const t_pbc
*pbcNonNull
;
878 set_pbc(&pbcNo
, epbcNONE
, nullptr);
882 settleTemplateWrapper
<real
, bool, 1,
883 const t_pbc
*>(settled
,
889 bCalcVirial
, vir_r_m_dr
,