Simplify PME GPU constants
[gromacs.git] / src / gromacs / ewald / pme_redistribute.cpp
blob063ceded374705b431baa37b3007176d5b293949
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
39 /*! \internal \file
41 * \brief This file contains function definitions for redistributing
42 * atoms over the PME domains
44 * \author Berk Hess <hess@kth.se>
45 * \ingroup module_ewald
48 #include "gmxpre.h"
50 #include "pme_redistribute.h"
52 #include "config.h"
54 #include <algorithm>
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdtypes/commrec.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/gmxmpi.h"
61 #include "gromacs/utility/smalloc.h"
63 #include "pme_internal.h"
65 //! Calculate the slab indices and store in \p atc, store counts in \p count
66 static void pme_calc_pidx(int start,
67 int end,
68 const matrix recipbox,
69 gmx::ArrayRef<const gmx::RVec> x,
70 PmeAtomComm* atc,
71 int* count)
73 int nslab, i;
74 int si;
75 const real* xptr;
76 real s;
77 real rxx, ryx, rzx, ryy, rzy;
78 int* pd;
80 /* Calculate PME task index (pidx) for each grid index.
81 * Here we always assign equally sized slabs to each node
82 * for load balancing reasons (the PME grid spacing is not used).
85 nslab = atc->nslab;
86 pd = atc->pd.data();
88 /* Reset the count */
89 for (i = 0; i < nslab; i++)
91 count[i] = 0;
94 if (atc->dimind == 0)
96 rxx = recipbox[XX][XX];
97 ryx = recipbox[YY][XX];
98 rzx = recipbox[ZZ][XX];
99 /* Calculate the node index in x-dimension */
100 for (i = start; i < end; i++)
102 xptr = x[i];
103 /* Fractional coordinates along box vectors */
104 s = nslab * (xptr[XX] * rxx + xptr[YY] * ryx + xptr[ZZ] * rzx);
105 si = static_cast<int>(s + 2 * nslab) % nslab;
106 pd[i] = si;
107 count[si]++;
110 else
112 ryy = recipbox[YY][YY];
113 rzy = recipbox[ZZ][YY];
114 /* Calculate the node index in y-dimension */
115 for (i = start; i < end; i++)
117 xptr = x[i];
118 /* Fractional coordinates along box vectors */
119 s = nslab * (xptr[YY] * ryy + xptr[ZZ] * rzy);
120 si = static_cast<int>(s + 2 * nslab) % nslab;
121 pd[i] = si;
122 count[si]++;
127 //! Wrapper function for calculating slab indices, stored in \p atc
128 static void pme_calc_pidx_wrapper(gmx::ArrayRef<const gmx::RVec> x, const matrix recipbox, PmeAtomComm* atc)
130 int nthread = atc->nthread;
132 #pragma omp parallel for num_threads(nthread) schedule(static)
133 for (int thread = 0; thread < nthread; thread++)
137 const int natoms = x.ssize();
138 pme_calc_pidx(natoms * thread / nthread, natoms * (thread + 1) / nthread, recipbox, x,
139 atc, atc->count_thread[thread].data());
141 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
143 /* Non-parallel reduction, since nslab is small */
145 for (int thread = 1; thread < nthread; thread++)
147 for (int slab = 0; slab < atc->nslab; slab++)
149 atc->count_thread[0][slab] += atc->count_thread[thread][slab];
154 #ifndef DOXYGEN
156 void SplineCoefficients::realloc(const int nalloc)
158 const int padding = 4;
160 bufferX_.resize(nalloc);
161 coefficients[XX] = bufferX_.data();
162 bufferY_.resize(nalloc);
163 coefficients[YY] = bufferY_.data();
164 /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
165 bufferZ_.resize(nalloc + 2 * padding);
166 coefficients[ZZ] = bufferZ_.data() + padding;
169 #endif // !DOXYGEN
171 //! Reallocates all buffers in \p spline to fit atoms in \p atc
172 static void pme_realloc_splinedata(splinedata_t* spline, const PmeAtomComm* atc)
174 if (spline->nalloc >= atc->x.ssize() && spline->nalloc >= atc->numAtoms())
176 return;
179 spline->nalloc = std::max(atc->x.capacity(), static_cast<size_t>(atc->numAtoms()));
180 spline->ind.resize(spline->nalloc);
181 /* Initialize the index to identity so it works without threads */
182 for (int i = 0; i < spline->nalloc; i++)
184 spline->ind[i] = i;
187 spline->theta.realloc(atc->pme_order * spline->nalloc);
188 spline->dtheta.realloc(atc->pme_order * spline->nalloc);
191 #ifndef DOXYGEN
193 void PmeAtomComm::setNumAtoms(const int numAtoms)
195 numAtoms_ = numAtoms;
197 if (nslab > 1)
199 /* We have to avoid a NULL pointer for atc->x to avoid
200 * possible fatal errors in MPI routines.
202 xBuffer.resize(numAtoms_);
203 if (xBuffer.capacity() == 0)
205 xBuffer.reserve(1);
207 x = xBuffer;
208 coefficientBuffer.resize(numAtoms_);
209 if (coefficientBuffer.capacity() == 0)
211 coefficientBuffer.reserve(1);
213 coefficient = coefficientBuffer;
214 const int nalloc_old = fBuffer.size();
215 fBuffer.resize(numAtoms_);
216 for (int i = nalloc_old; i < numAtoms_; i++)
218 clear_rvec(fBuffer[i]);
220 f = fBuffer;
222 if (bSpread)
224 fractx.resize(numAtoms_);
225 idx.resize(numAtoms_);
227 if (nthread > 1)
229 thread_idx.resize(numAtoms_);
232 for (int i = 0; i < nthread; i++)
234 pme_realloc_splinedata(&spline[i], this);
239 #endif // !DOXYGEN
241 //! Communicates buffers between rank separated by \p shift slabs
242 static void pme_dd_sendrecv(PmeAtomComm gmx_unused* atc,
243 gmx_bool gmx_unused bBackward,
244 int gmx_unused shift,
245 void gmx_unused* buf_s,
246 int gmx_unused nbyte_s,
247 void gmx_unused* buf_r,
248 int gmx_unused nbyte_r)
250 #if GMX_MPI
251 int dest, src;
252 MPI_Status stat;
254 if (!bBackward)
256 dest = atc->slabCommSetup[shift].node_dest;
257 src = atc->slabCommSetup[shift].node_src;
259 else
261 dest = atc->slabCommSetup[shift].node_src;
262 src = atc->slabCommSetup[shift].node_dest;
265 if (nbyte_s > 0 && nbyte_r > 0)
267 MPI_Sendrecv(buf_s, nbyte_s, MPI_BYTE, dest, shift, buf_r, nbyte_r, MPI_BYTE, src, shift,
268 atc->mpi_comm, &stat);
270 else if (nbyte_s > 0)
272 MPI_Send(buf_s, nbyte_s, MPI_BYTE, dest, shift, atc->mpi_comm);
274 else if (nbyte_r > 0)
276 MPI_Recv(buf_r, nbyte_r, MPI_BYTE, src, shift, atc->mpi_comm, &stat);
278 #endif
281 //! Redistristributes \p data and optionally coordinates between MPI ranks
282 static void dd_pmeredist_pos_coeffs(gmx_pme_t* pme,
283 const gmx_bool bX,
284 gmx::ArrayRef<const gmx::RVec> x,
285 const real* data,
286 PmeAtomComm* atc)
288 int nnodes_comm, i, local_pos, buf_pos, node;
290 nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
292 auto sendCount = atc->sendCount();
293 int nsend = 0;
294 for (i = 0; i < nnodes_comm; i++)
296 const int commnode = atc->slabCommSetup[i].node_dest;
297 atc->slabCommSetup[commnode].buf_index = nsend;
298 nsend += sendCount[commnode];
300 if (bX)
302 if (sendCount[atc->nodeid] + nsend != x.ssize())
304 gmx_fatal(
305 FARGS,
306 "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off "
307 "out of the domain decomposition cell of their charge group in dimension %c.\n"
308 "This usually means that your system is not well equilibrated.",
309 x.ssize() - (sendCount[atc->nodeid] + nsend), pme->nodeid, 'x' + atc->dimind);
312 if (nsend > pme->buf_nalloc)
314 pme->buf_nalloc = over_alloc_dd(nsend);
315 srenew(pme->bufv, pme->buf_nalloc);
316 srenew(pme->bufr, pme->buf_nalloc);
319 int numAtoms = sendCount[atc->nodeid];
320 for (i = 0; i < nnodes_comm; i++)
322 const int commnode = atc->slabCommSetup[i].node_dest;
323 int scount = sendCount[commnode];
324 /* Communicate the count */
325 if (debug)
327 fprintf(debug, "dimind %d PME rank %d send to rank %d: %d\n", atc->dimind,
328 atc->nodeid, commnode, scount);
330 pme_dd_sendrecv(atc, FALSE, i, &scount, sizeof(int), &atc->slabCommSetup[i].rcount,
331 sizeof(int));
332 numAtoms += atc->slabCommSetup[i].rcount;
335 atc->setNumAtoms(numAtoms);
338 local_pos = 0;
339 for (gmx::index i = 0; i < x.ssize(); i++)
341 node = atc->pd[i];
342 if (node == atc->nodeid)
344 /* Copy direct to the receive buffer */
345 if (bX)
347 copy_rvec(x[i], atc->xBuffer[local_pos]);
349 atc->coefficientBuffer[local_pos] = data[i];
350 local_pos++;
352 else
354 /* Copy to the send buffer */
355 int& buf_index = atc->slabCommSetup[node].buf_index;
356 if (bX)
358 copy_rvec(x[i], pme->bufv[buf_index]);
360 pme->bufr[buf_index] = data[i];
361 buf_index++;
365 buf_pos = 0;
366 for (i = 0; i < nnodes_comm; i++)
368 const int scount = atc->sendCount()[atc->slabCommSetup[i].node_dest];
369 const int rcount = atc->slabCommSetup[i].rcount;
370 if (scount > 0 || rcount > 0)
372 if (bX)
374 /* Communicate the coordinates */
375 pme_dd_sendrecv(atc, FALSE, i, pme->bufv + buf_pos, scount * sizeof(rvec),
376 atc->xBuffer.data() + local_pos, rcount * sizeof(rvec));
378 /* Communicate the coefficients */
379 pme_dd_sendrecv(atc, FALSE, i, pme->bufr + buf_pos, scount * sizeof(real),
380 atc->coefficientBuffer.data() + local_pos, rcount * sizeof(real));
381 buf_pos += scount;
382 local_pos += atc->slabCommSetup[i].rcount;
385 GMX_ASSERT(local_pos == atc->numAtoms(), "After receiving we should have numAtoms coordinates");
388 void dd_pmeredist_f(struct gmx_pme_t* pme, PmeAtomComm* atc, gmx::ArrayRef<gmx::RVec> f, gmx_bool bAddF)
390 int nnodes_comm, local_pos, buf_pos, i, node;
392 nnodes_comm = std::min(2 * atc->maxshift, atc->nslab - 1);
394 local_pos = atc->sendCount()[atc->nodeid];
395 buf_pos = 0;
396 for (i = 0; i < nnodes_comm; i++)
398 const int commnode = atc->slabCommSetup[i].node_dest;
399 const int scount = atc->slabCommSetup[i].rcount;
400 const int rcount = atc->sendCount()[commnode];
401 if (scount > 0 || rcount > 0)
403 /* Communicate the forces */
404 pme_dd_sendrecv(atc, TRUE, i, atc->f.data() + local_pos, scount * sizeof(rvec),
405 pme->bufv + buf_pos, rcount * sizeof(rvec));
406 local_pos += scount;
408 atc->slabCommSetup[commnode].buf_index = buf_pos;
409 buf_pos += rcount;
412 local_pos = 0;
413 if (bAddF)
415 for (gmx::index i = 0; i < f.ssize(); i++)
417 node = atc->pd[i];
418 if (node == atc->nodeid)
420 /* Add from the local force array */
421 rvec_inc(f[i], atc->f[local_pos]);
422 local_pos++;
424 else
426 /* Add from the receive buffer */
427 rvec_inc(f[i], pme->bufv[atc->slabCommSetup[node].buf_index]);
428 atc->slabCommSetup[node].buf_index++;
432 else
434 for (gmx::index i = 0; i < f.ssize(); i++)
436 node = atc->pd[i];
437 if (node == atc->nodeid)
439 /* Copy from the local force array */
440 copy_rvec(atc->f[local_pos], f[i]);
441 local_pos++;
443 else
445 /* Copy from the receive buffer */
446 copy_rvec(pme->bufv[atc->slabCommSetup[node].buf_index], f[i]);
447 atc->slabCommSetup[node].buf_index++;
453 void do_redist_pos_coeffs(struct gmx_pme_t* pme,
454 const t_commrec* cr,
455 gmx_bool bFirst,
456 gmx::ArrayRef<const gmx::RVec> x,
457 const real* data)
459 for (int d = pme->ndecompdim - 1; d >= 0; d--)
461 gmx::ArrayRef<const gmx::RVec> xRef;
462 const real* param_d;
463 if (d == pme->ndecompdim - 1)
465 /* Start out with the local coordinates and charges */
466 xRef = x;
467 param_d = data;
469 else
471 /* Redistribute the data collected along the previous dimension */
472 const PmeAtomComm& atc = pme->atc[d + 1];
473 xRef = atc.x;
474 param_d = atc.coefficient.data();
476 PmeAtomComm& atc = pme->atc[d];
477 atc.pd.resize(xRef.size());
478 pme_calc_pidx_wrapper(xRef, pme->recipbox, &atc);
479 /* Redistribute x (only once) and qA/c6A or qB/c6B */
480 if (DOMAINDECOMP(cr))
482 dd_pmeredist_pos_coeffs(pme, bFirst, xRef, param_d, &atc);