2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Implements SETTLE using CUDA
39 * This file contains implementation of SETTLE constraints algorithm
40 * using CUDA, including class initialization, data-structures management
43 * \note Management of CUDA stream and periodic boundary exists here as a temporary
44 * scaffolding to allow this class to be used as a stand-alone unit. The scaffolding
45 * is intended to be removed once constraints are fully integrated with update module.
46 * \todo Reconsider naming to use "gpu" suffix instead of "cuda".
48 * \author Artem Zhmurov <zhmurov@gmail.com>
50 * \ingroup module_mdlib
54 #include "settle_cuda.cuh"
63 #include "gromacs/gpu_utils/cuda_arch_utils.cuh"
64 #include "gromacs/gpu_utils/cudautils.cuh"
65 #include "gromacs/gpu_utils/devicebuffer.h"
66 #include "gromacs/gpu_utils/gputraits.cuh"
67 #include "gromacs/gpu_utils/vectype_ops.cuh"
68 #include "gromacs/math/vec.h"
69 #include "gromacs/pbcutil/pbc.h"
70 #include "gromacs/pbcutil/pbc_aiuc_cuda.cuh"
75 //! Number of CUDA threads in a block
76 constexpr static int c_threadsPerBlock = 256;
77 //! Maximum number of threads in a block (for __launch_bounds__)
78 constexpr static int c_maxThreadsPerBlock = c_threadsPerBlock;
80 /*! \brief SETTLE constraints kernel
82 * Each thread corresponds to a single constraints triangle (i.e. single water molecule).
84 * See original CPU version in settle.cpp
86 * \param [in] numSettles Number of constraints triangles (water molecules).
87 * \param [in] gm_settles Indexes of three atoms in the constraints triangle. The field .x of int3
88 * data type corresponds to Oxygen, fields .y and .z are two hydrogen atoms.
89 * \param [in] pars Parameters for the algorithm (i.e. masses, target distances, etc.).
90 * \param [in] gm_x Coordinates of atoms before the timestep.
91 * \param [in,out] gm_x Coordinates of atoms after the timestep (constrained coordinates will be
93 * \param [in] pbcAiuc Periodic boundary conditions data.
94 * \param [in] invdt Reciprocal timestep.
95 * \param [in] gm_v Velocities of the particles.
96 * \param [in] gm_virialScaled Virial tensor.
98 template <bool updateVelocities, bool computeVirial>
99 __launch_bounds__(c_maxThreadsPerBlock)
100 __global__ void settle_kernel(const int numSettles,
101 const int3* __restrict__ gm_settles,
102 const SettleParameters pars,
103 const float3* __restrict__ gm_x,
104 float3* __restrict__ gm_xprime,
105 const PbcAiuc pbcAiuc,
107 float3* __restrict__ gm_v,
108 float* __restrict__ gm_virialScaled)
110 /* ******************************************************************* */
112 /* Original code by Shuichi Miyamoto, last update Oct. 1, 1992 ** */
114 /* Algorithm changes by Berk Hess: ** */
115 /* 2004-07-15 Convert COM to double precision to avoid drift ** */
116 /* 2006-10-16 Changed velocity update to use differences ** */
117 /* 2012-09-24 Use oxygen as reference instead of COM ** */
118 /* 2016-02 Complete rewrite of the code for SIMD ** */
120 /* Reference for the SETTLE algorithm ** */
121 /* S. Miyamoto et al., J. Comp. Chem., 13, 952 (1992). ** */
123 /* ******************************************************************* */
125 constexpr float almost_zero = real(1e-12);
127 extern __shared__ float sm_threadVirial[];
129 int tid = static_cast<int>(blockIdx.x*blockDim.x + threadIdx.x);
131 if (tid < numSettles)
133 // These are the indexes of three atoms in a single 'water' molecule.
134 // TODO Can be reduced to one integer if atoms are consecutive in memory.
135 int3 indices = gm_settles[tid];
137 float3 x_ow1 = gm_x[indices.x];
138 float3 x_hw2 = gm_x[indices.y];
139 float3 x_hw3 = gm_x[indices.z];
141 float3 xprime_ow1 = gm_xprime[indices.x];
142 float3 xprime_hw2 = gm_xprime[indices.y];
143 float3 xprime_hw3 = gm_xprime[indices.z];
145 float3 dist21 = pbcDxAiuc(pbcAiuc, x_hw2, x_ow1);
146 float3 dist31 = pbcDxAiuc(pbcAiuc, x_hw3, x_ow1);
147 float3 doh2 = pbcDxAiuc(pbcAiuc, xprime_hw2, xprime_ow1);
149 float3 sh_hw2 = xprime_hw2 - (xprime_ow1 + doh2);
150 xprime_hw2 = xprime_hw2 - sh_hw2;
152 float3 doh3 = pbcDxAiuc(pbcAiuc, xprime_hw3, xprime_ow1);
154 float3 sh_hw3 = xprime_hw3 - (xprime_ow1 + doh3);
155 xprime_hw3 = xprime_hw3 - sh_hw3;
157 float3 a1 = (-doh2 - doh3) * pars.wh;
158 float3 com = xprime_ow1 - a1;
160 float3 b1 = xprime_hw2 - com;
162 float3 c1 = xprime_hw3 - com;
164 float xakszd = dist21.y * dist31.z - dist21.z * dist31.y;
165 float yakszd = dist21.z * dist31.x - dist21.x * dist31.z;
166 float zakszd = dist21.x * dist31.y - dist21.y * dist31.x;
168 float xaksxd = a1.y * zakszd - a1.z * yakszd;
169 float yaksxd = a1.z * xakszd - a1.x * zakszd;
170 float zaksxd = a1.x * yakszd - a1.y * xakszd;
172 float xaksyd = yakszd * zaksxd - zakszd * yaksxd;
173 float yaksyd = zakszd * xaksxd - xakszd * zaksxd;
174 float zaksyd = xakszd * yaksxd - yakszd * xaksxd;
176 float axlng = rsqrt(xaksxd * xaksxd + yaksxd * yaksxd + zaksxd * zaksxd);
177 float aylng = rsqrt(xaksyd * xaksyd + yaksyd * yaksyd + zaksyd * zaksyd);
178 float azlng = rsqrt(xakszd * xakszd + yakszd * yakszd + zakszd * zakszd);
180 // TODO {1,2,3} indexes should be swapped with {.x, .y, .z} components.
181 // This way, we will be able to use vector ops more.
182 float3 trns1, trns2, trns3;
184 trns1.x = xaksxd * axlng;
185 trns2.x = yaksxd * axlng;
186 trns3.x = zaksxd * axlng;
188 trns1.y = xaksyd * aylng;
189 trns2.y = yaksyd * aylng;
190 trns3.y = zaksyd * aylng;
192 trns1.z = xakszd * azlng;
193 trns2.z = yakszd * azlng;
194 trns3.z = zakszd * azlng;
199 b0d.x = trns1.x * dist21.x + trns2.x * dist21.y + trns3.x * dist21.z;
200 b0d.y = trns1.y * dist21.x + trns2.y * dist21.y + trns3.y * dist21.z;
202 c0d.x = trns1.x * dist31.x + trns2.x * dist31.y + trns3.x * dist31.z;
203 c0d.y = trns1.y * dist31.x + trns2.y * dist31.y + trns3.y * dist31.z;
207 float a1d_z = trns1.z * a1.x + trns2.z * a1.y + trns3.z * a1.z;
209 b1d.x = trns1.x * b1.x + trns2.x * b1.y + trns3.x * b1.z;
210 b1d.y = trns1.y * b1.x + trns2.y * b1.y + trns3.y * b1.z;
211 b1d.z = trns1.z * b1.x + trns2.z * b1.y + trns3.z * b1.z;
213 c1d.x = trns1.x * c1.x + trns2.x * c1.y + trns3.x * c1.z;
214 c1d.y = trns1.y * c1.x + trns2.y * c1.y + trns3.y * c1.z;
215 c1d.z = trns1.z * c1.x + trns2.z * c1.y + trns3.z * c1.z;
218 float sinphi = a1d_z * rsqrt(pars.ra*pars.ra);
219 float tmp2 = 1.0f - sinphi * sinphi;
221 if (almost_zero > tmp2)
226 float tmp = rsqrt(tmp2);
227 float cosphi = tmp2*tmp;
228 float sinpsi = (b1d.z - c1d.z) * pars.irc2 * tmp;
229 tmp2 = 1.0f - sinpsi * sinpsi;
231 float cospsi = tmp2*rsqrt(tmp2);
233 float a2d_y = pars.ra * cosphi;
234 float b2d_x = -pars.rc * cospsi;
235 float t1 = -pars.rb * cosphi;
236 float t2 = pars.rc * sinpsi * sinphi;
237 float b2d_y = t1 - t2;
238 float c2d_y = t1 + t2;
240 /* --- Step3 al,be,ga --- */
241 float alpha = b2d_x * (b0d.x - c0d.x) + b0d.y * b2d_y + c0d.y * c2d_y;
242 float beta = b2d_x * (c0d.y - b0d.y) + b0d.x * b2d_y + c0d.x * c2d_y;
243 float gamma = b0d.x * b1d.y - b1d.x * b0d.y + c0d.x * c1d.y - c1d.x * c0d.y;
244 float al2be2 = alpha * alpha + beta * beta;
245 tmp2 = (al2be2 - gamma * gamma);
246 float sinthe = (alpha * gamma - beta * tmp2*rsqrt(tmp2)) * rsqrt(al2be2*al2be2);
248 /* --- Step4 A3' --- */
249 tmp2 = 1.0f - sinthe * sinthe;
250 float costhe = tmp2*rsqrt(tmp2);
252 float3 a3d, b3d, c3d;
254 a3d.x = -a2d_y * sinthe;
255 a3d.y = a2d_y * costhe;
257 b3d.x = b2d_x * costhe - b2d_y * sinthe;
258 b3d.y = b2d_x * sinthe + b2d_y * costhe;
260 c3d.x = -b2d_x * costhe - c2d_y * sinthe;
261 c3d.y = -b2d_x * sinthe + c2d_y * costhe;
264 /* --- Step5 A3 --- */
267 a3.x = trns1.x*a3d.x + trns1.y*a3d.y + trns1.z*a3d.z;
268 a3.y = trns2.x*a3d.x + trns2.y*a3d.y + trns2.z*a3d.z;
269 a3.z = trns3.x*a3d.x + trns3.y*a3d.y + trns3.z*a3d.z;
271 b3.x = trns1.x*b3d.x + trns1.y*b3d.y + trns1.z*b3d.z;
272 b3.y = trns2.x*b3d.x + trns2.y*b3d.y + trns2.z*b3d.z;
273 b3.z = trns3.x*b3d.x + trns3.y*b3d.y + trns3.z*b3d.z;
275 c3.x = trns1.x*c3d.x + trns1.y*c3d.y + trns1.z*c3d.z;
276 c3.y = trns2.x*c3d.x + trns2.y*c3d.y + trns2.z*c3d.z;
277 c3.z = trns3.x*c3d.x + trns3.y*c3d.y + trns3.z*c3d.z;
280 /* Compute and store the corrected new coordinate */
281 xprime_ow1 = com + a3;
282 xprime_hw2 = com + b3 + sh_hw2;
283 xprime_hw3 = com + c3 + sh_hw3;
285 gm_xprime[indices.x] = xprime_ow1;
286 gm_xprime[indices.y] = xprime_hw2;
287 gm_xprime[indices.z] = xprime_hw3;
290 if (updateVelocities || computeVirial)
297 if (updateVelocities)
300 float3 v_ow1 = gm_v[indices.x];
301 float3 v_hw2 = gm_v[indices.y];
302 float3 v_hw3 = gm_v[indices.z];
304 /* Add the position correction divided by dt to the velocity */
305 v_ow1 = da*invdt + v_ow1;
306 v_hw2 = db*invdt + v_hw2;
307 v_hw3 = dc*invdt + v_hw3;
309 gm_v[indices.x] = v_ow1;
310 gm_v[indices.y] = v_hw2;
311 gm_v[indices.z] = v_hw3;
318 float3 mdb = pars.mH*db;
319 float3 mdc = pars.mH*dc;
320 float3 mdo = pars.mO*da + mdb + mdc;
322 sm_threadVirial[0*blockDim.x + threadIdx.x] = -(x_ow1.x*mdo.x + dist21.x*mdb.x + dist31.x*mdc.x);
323 sm_threadVirial[1*blockDim.x + threadIdx.x] = -(x_ow1.x*mdo.y + dist21.x*mdb.y + dist31.x*mdc.y);
324 sm_threadVirial[2*blockDim.x + threadIdx.x] = -(x_ow1.x*mdo.z + dist21.x*mdb.z + dist31.x*mdc.z);
325 sm_threadVirial[3*blockDim.x + threadIdx.x] = -(x_ow1.y*mdo.y + dist21.y*mdb.y + dist31.y*mdc.y);
326 sm_threadVirial[4*blockDim.x + threadIdx.x] = -(x_ow1.y*mdo.z + dist21.y*mdb.z + dist31.y*mdc.z);
327 sm_threadVirial[5*blockDim.x + threadIdx.x] = -(x_ow1.z*mdo.z + dist21.z*mdb.z + dist31.z*mdc.z);
333 // Filling data for dummy threads with zeroes
336 for (int d = 0; d < 6; d++)
338 sm_threadVirial[d*blockDim.x + threadIdx.x] = 0.0f;
342 // Basic reduction for the values inside single thread block
343 // TODO what follows should be separated out as a standard virial reduction subroutine
346 // This is to ensure that all threads saved the data before reduction starts
348 // This casts unsigned into signed integers to avoid clang warnings
349 int tib = static_cast<int>(threadIdx.x);
350 int blockSize = static_cast<int>(blockDim.x);
351 // Reduce up to one virial per thread block
352 // All blocks are divided by half, the first half of threads sums
353 // two virials. Then the first half is divided by two and the first half
354 // of it sums two values... The procedure continues until only one thread left.
355 // Only works if the threads per blocks is a power of two.
356 for (int divideBy = 2; divideBy <= blockSize; divideBy *= 2)
358 int dividedAt = blockSize/divideBy;
361 for (int d = 0; d < 6; d++)
363 sm_threadVirial[d*blockSize + tib] += sm_threadVirial[d*blockSize + (tib + dividedAt)];
366 if (dividedAt > warpSize/2)
371 // First 6 threads in the block add the 6 components of virial to the global memory address
374 atomicAdd(&(gm_virialScaled[tib]), sm_threadVirial[tib*blockSize]);
381 /*! \brief Select templated kernel.
383 * Returns pointer to a CUDA kernel based on provided booleans.
385 * \param[in] updateVelocities If the velocities should be constrained.
386 * \param[in] bCalcVir If virial should be updated.
388 * \retrun Pointer to CUDA kernel
390 inline auto getSettleKernelPtr(const bool updateVelocities,
391 const bool computeVirial)
394 auto kernelPtr = settle_kernel<true, true>;
395 if (updateVelocities && computeVirial)
397 kernelPtr = settle_kernel<true, true>;
399 else if (updateVelocities && !computeVirial)
401 kernelPtr = settle_kernel<true, false>;
403 else if (!updateVelocities && computeVirial)
405 kernelPtr = settle_kernel<false, true>;
407 else if (!updateVelocities && !computeVirial)
409 kernelPtr = settle_kernel<false, false>;
414 void SettleCuda::apply(const float3 *d_x,
416 const bool updateVelocities,
419 const bool computeVirial,
423 ensureNoPendingCudaError("In CUDA version SETTLE");
425 // Early exit if no settles
426 if (numSettles_ == 0)
433 // Fill with zeros so the values can be reduced to it
434 // Only 6 values are needed because virial is symmetrical
435 clearDeviceBufferAsync(&d_virialScaled_, 0, 6, stream_);
438 auto kernelPtr = getSettleKernelPtr(updateVelocities, computeVirial);
440 KernelLaunchConfig config;
441 config.blockSize[0] = c_threadsPerBlock;
442 config.blockSize[1] = 1;
443 config.blockSize[2] = 1;
444 config.gridSize[0] = (numSettles_ + c_threadsPerBlock - 1)/c_threadsPerBlock;
445 config.gridSize[1] = 1;
446 config.gridSize[2] = 1;
447 // Shared memory is only used for virial reduction
450 config.sharedMemorySize = c_threadsPerBlock*6*sizeof(float);
454 config.sharedMemorySize = 0;
456 config.stream = stream_;
458 const auto kernelArgs = prepareGpuKernelArguments(kernelPtr, config,
469 launchGpuKernel(kernelPtr, config, nullptr,
470 "settle_kernel<updateVelocities, computeVirial>", kernelArgs);
474 copyFromDeviceBuffer(h_virialScaled_.data(), &d_virialScaled_,
476 stream_, GpuApiCallBehavior::Sync, nullptr);
478 // Mapping [XX, XY, XZ, YY, YZ, ZZ] internal format to a tensor object
479 virialScaled[XX][XX] += h_virialScaled_[0];
480 virialScaled[XX][YY] += h_virialScaled_[1];
481 virialScaled[XX][ZZ] += h_virialScaled_[2];
483 virialScaled[YY][XX] += h_virialScaled_[1];
484 virialScaled[YY][YY] += h_virialScaled_[3];
485 virialScaled[YY][ZZ] += h_virialScaled_[4];
487 virialScaled[ZZ][XX] += h_virialScaled_[2];
488 virialScaled[ZZ][YY] += h_virialScaled_[4];
489 virialScaled[ZZ][ZZ] += h_virialScaled_[5];
495 SettleCuda::SettleCuda(const gmx_mtop_t &mtop)
497 static_assert(sizeof(real) == sizeof(float),
498 "Real numbers should be in single precision in GPU code.");
499 static_assert(c_threadsPerBlock > 0 && ((c_threadsPerBlock & (c_threadsPerBlock - 1)) == 0),
500 "Number of threads per block should be a power of two in order for reduction to work.");
502 // This is to prevent the assertion failure for the systems without water
503 int totalSettles = 0;
504 for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
506 const int nral1 = 1 + NRAL(F_SETTLE);
507 InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
508 std::vector<int> iatoms = interactionList.iatoms;
509 totalSettles += iatoms.size()/nral1;
511 if (totalSettles == 0)
516 // TODO This should be lifted to a separate subroutine that gets the values of Oxygen and Hydrogen
517 // masses, checks if they are consistent across the topology and if there is no more than two values
518 // for each mass if the free energy perturbation is enabled. In later case, masses may need to be
519 // updated on a regular basis (i.e. in set(...) method).
520 // TODO Do the checks for FEP
524 for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
526 const int nral1 = 1 + NRAL(F_SETTLE);
527 InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
528 std::vector<int> iatoms = interactionList.iatoms;
529 for (unsigned i = 0; i < iatoms.size()/nral1; i++)
532 settler.x = iatoms[i*nral1 + 1]; // Oxygen index
533 settler.y = iatoms[i*nral1 + 2]; // First hydrogen index
534 settler.z = iatoms[i*nral1 + 3]; // Second hydrogen index
535 t_atom ow1 = mtop.moltype.at(mt).atoms.atom[settler.x];
536 t_atom hw2 = mtop.moltype.at(mt).atoms.atom[settler.y];
537 t_atom hw3 = mtop.moltype.at(mt).atoms.atom[settler.z];
547 GMX_RELEASE_ASSERT(mO == ow1.m, "Topology has different values for oxygen mass. Should be identical in order to use SETTLE.");
548 GMX_RELEASE_ASSERT(hw2.m == hw3.m && hw2.m == mH, "Topology has different values for hydrogen mass. Should be identical in order to use SETTLE.");
552 GMX_RELEASE_ASSERT(mO > 0, "Could not find oxygen mass in the topology. Needed in SETTLE.");
553 GMX_RELEASE_ASSERT(mH > 0, "Could not find hydrogen mass in the topology. Needed in SETTLE.");
555 // TODO Very similar to SETTLE initialization on CPU. Should be lifted to a separate method
556 // (one that gets dOH and dHH values and checks them for consistency)
557 int settle_type = -1;
558 for (unsigned mt = 0; mt < mtop.moltype.size(); mt++)
560 const int nral1 = 1 + NRAL(F_SETTLE);
561 InteractionList interactionList = mtop.moltype.at(mt).ilist[F_SETTLE];
562 for (int i = 0; i < interactionList.size(); i += nral1)
564 if (settle_type == -1)
566 settle_type = interactionList.iatoms[i];
568 else if (interactionList.iatoms[i] != settle_type)
571 "The [molecules] section of your topology specifies more than one block of\n"
572 "a [moleculetype] with a [settles] block. Only one such is allowed.\n"
573 "If you are trying to partition your solvent into different *groups*\n"
574 "(e.g. for freezing, T-coupling, etc.), you are using the wrong approach. Index\n"
575 "files specify groups. Otherwise, you may wish to change the least-used\n"
576 "block of molecules with SETTLE constraints into 3 normal constraints.");
581 GMX_RELEASE_ASSERT(settle_type >= 0, "settle_init called without settles");
583 real dOH = mtop.ffparams.iparams[settle_type].settle.doh;
584 real dHH = mtop.ffparams.iparams[settle_type].settle.dhh;
586 initSettleParameters(&settleParameters_, mO, mH, dOH, dHH);
588 allocateDeviceBuffer(&d_virialScaled_, 6, nullptr);
589 h_virialScaled_.resize(6);
591 // Use default stream
596 SettleCuda::SettleCuda(const real mO, const real mH,
597 const real dOH, const real dHH)
599 static_assert(sizeof(real) == sizeof(float), "Real numbers should be in single precision in GPU code.");
600 static_assert(c_threadsPerBlock > 0 && ((c_threadsPerBlock & (c_threadsPerBlock - 1)) == 0),
601 "Number of threads per block should be a power of two in order for reduction to work.");
603 initSettleParameters(&settleParameters_, mO, mH, dOH, dHH);
605 allocateDeviceBuffer(&d_virialScaled_, 6, nullptr);
606 h_virialScaled_.resize(6);
608 // Use default stream
613 SettleCuda::~SettleCuda()
615 // Early exit if there is no settles
616 if (numSettles_ == 0)
620 freeDeviceBuffer(&d_virialScaled_);
621 if (numAtomIdsAlloc_ > 0)
623 freeDeviceBuffer(&d_atomIds_);
627 void SettleCuda::set(const t_idef &idef,
628 const t_mdatoms gmx_unused &md)
630 const int nral1 = 1 + NRAL(F_SETTLE);
631 t_ilist il_settle = idef.il[F_SETTLE];
632 t_iatom *iatoms = il_settle.iatoms;
633 numSettles_ = il_settle.nr/nral1;
635 reallocateDeviceBuffer(&d_atomIds_, numSettles_, &numAtomIds_, &numAtomIdsAlloc_, nullptr);
636 h_atomIds_.resize(numSettles_);
637 for (int i = 0; i < numSettles_; i++)
640 settler.x = iatoms[i*nral1 + 1]; // Oxygen index
641 settler.y = iatoms[i*nral1 + 2]; // First hydrogen index
642 settler.z = iatoms[i*nral1 + 3]; // Second hydrogen index
643 h_atomIds_.at(i) = settler;
645 copyToDeviceBuffer(&d_atomIds_, h_atomIds_.data(),
647 stream_, GpuApiCallBehavior::Sync, nullptr);
651 void SettleCuda::setPbc(const t_pbc *pbc)
653 setPbcAiuc(pbc->ndim_ePBC, pbc->box, &pbcAiuc_);