2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
5 * Copyright (c) 2001-2004, The GROMACS development team.
6 * Copyright (c) 2013,2014,2015,2016,2018 by the GROMACS development team.
7 * Copyright (c) 2019,2020, by the GROMACS development team, led by
8 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
9 * and including many others, as listed in the AUTHORS file in the
10 * top-level source directory and at http://www.gromacs.org.
12 * GROMACS is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public License
14 * as published by the Free Software Foundation; either version 2.1
15 * of the License, or (at your option) any later version.
17 * GROMACS is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with GROMACS; if not, see
24 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
25 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
27 * If you want to redistribute modifications to GROMACS, please
28 * consider that scientific software is very special. Version
29 * control is crucial - bugs must be traceable. We will be happy to
30 * consider code for inclusion in the official distribution, but
31 * derived work must not be called official GROMACS. Details are found
32 * in the README & COPYING files - if they are missing, get the
33 * official version at http://www.gromacs.org.
35 * To help us fund GROMACS development, we humbly ask that you cite
36 * the research papers on the package. Check out http://www.gromacs.org.
41 * \brief This file contains function definitions for redistributing
42 * atoms over the PME domains
44 * \author Berk Hess <hess@kth.se>
45 * \ingroup module_ewald
50 #include "pme_redistribute.h"
56 #include "gromacs/math/vec.h"
57 #include "gromacs/mdtypes/commrec.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/fatalerror.h"
60 #include "gromacs/utility/gmxmpi.h"
61 #include "gromacs/utility/smalloc.h"
63 #include "pme_internal.h"
65 //! Calculate the slab indices and store in \p atc, store counts in \p count
66 static void pme_calc_pidx(int start
,
68 const matrix recipbox
,
69 gmx::ArrayRef
<const gmx::RVec
> x
,
77 real rxx
, ryx
, rzx
, ryy
, rzy
;
80 /* Calculate PME task index (pidx) for each grid index.
81 * Here we always assign equally sized slabs to each node
82 * for load balancing reasons (the PME grid spacing is not used).
89 for (i
= 0; i
< nslab
; i
++)
96 rxx
= recipbox
[XX
][XX
];
97 ryx
= recipbox
[YY
][XX
];
98 rzx
= recipbox
[ZZ
][XX
];
99 /* Calculate the node index in x-dimension */
100 for (i
= start
; i
< end
; i
++)
103 /* Fractional coordinates along box vectors */
104 s
= nslab
* (xptr
[XX
] * rxx
+ xptr
[YY
] * ryx
+ xptr
[ZZ
] * rzx
);
105 si
= static_cast<int>(s
+ 2 * nslab
) % nslab
;
112 ryy
= recipbox
[YY
][YY
];
113 rzy
= recipbox
[ZZ
][YY
];
114 /* Calculate the node index in y-dimension */
115 for (i
= start
; i
< end
; i
++)
118 /* Fractional coordinates along box vectors */
119 s
= nslab
* (xptr
[YY
] * ryy
+ xptr
[ZZ
] * rzy
);
120 si
= static_cast<int>(s
+ 2 * nslab
) % nslab
;
127 //! Wrapper function for calculating slab indices, stored in \p atc
128 static void pme_calc_pidx_wrapper(gmx::ArrayRef
<const gmx::RVec
> x
, const matrix recipbox
, PmeAtomComm
* atc
)
130 int nthread
= atc
->nthread
;
132 #pragma omp parallel for num_threads(nthread) schedule(static)
133 for (int thread
= 0; thread
< nthread
; thread
++)
137 const int natoms
= x
.ssize();
138 pme_calc_pidx(natoms
* thread
/ nthread
, natoms
* (thread
+ 1) / nthread
, recipbox
, x
,
139 atc
, atc
->count_thread
[thread
].data());
141 GMX_CATCH_ALL_AND_EXIT_WITH_FATAL_ERROR
143 /* Non-parallel reduction, since nslab is small */
145 for (int thread
= 1; thread
< nthread
; thread
++)
147 for (int slab
= 0; slab
< atc
->nslab
; slab
++)
149 atc
->count_thread
[0][slab
] += atc
->count_thread
[thread
][slab
];
156 void SplineCoefficients::realloc(const int nalloc
)
158 const int padding
= 4;
160 bufferX_
.resize(nalloc
);
161 coefficients
[XX
] = bufferX_
.data();
162 bufferY_
.resize(nalloc
);
163 coefficients
[YY
] = bufferY_
.data();
164 /* In z we add padding, this is only required for the aligned 4-wide SIMD code */
165 bufferZ_
.resize(nalloc
+ 2 * padding
);
166 coefficients
[ZZ
] = bufferZ_
.data() + padding
;
171 //! Reallocates all buffers in \p spline to fit atoms in \p atc
172 static void pme_realloc_splinedata(splinedata_t
* spline
, const PmeAtomComm
* atc
)
174 if (spline
->nalloc
>= atc
->x
.ssize() && spline
->nalloc
>= atc
->numAtoms())
179 spline
->nalloc
= std::max(atc
->x
.capacity(), static_cast<size_t>(atc
->numAtoms()));
180 spline
->ind
.resize(spline
->nalloc
);
181 /* Initialize the index to identity so it works without threads */
182 for (int i
= 0; i
< spline
->nalloc
; i
++)
187 spline
->theta
.realloc(atc
->pme_order
* spline
->nalloc
);
188 spline
->dtheta
.realloc(atc
->pme_order
* spline
->nalloc
);
193 void PmeAtomComm::setNumAtoms(const int numAtoms
)
195 numAtoms_
= numAtoms
;
199 /* We have to avoid a NULL pointer for atc->x to avoid
200 * possible fatal errors in MPI routines.
202 xBuffer
.resize(numAtoms_
);
203 if (xBuffer
.capacity() == 0)
208 coefficientBuffer
.resize(numAtoms_
);
209 if (coefficientBuffer
.capacity() == 0)
211 coefficientBuffer
.reserve(1);
213 coefficient
= coefficientBuffer
;
214 const int nalloc_old
= fBuffer
.size();
215 fBuffer
.resize(numAtoms_
);
216 for (int i
= nalloc_old
; i
< numAtoms_
; i
++)
218 clear_rvec(fBuffer
[i
]);
224 fractx
.resize(numAtoms_
);
225 idx
.resize(numAtoms_
);
229 thread_idx
.resize(numAtoms_
);
232 for (int i
= 0; i
< nthread
; i
++)
234 pme_realloc_splinedata(&spline
[i
], this);
241 //! Communicates buffers between rank separated by \p shift slabs
242 static void pme_dd_sendrecv(PmeAtomComm gmx_unused
* atc
,
243 gmx_bool gmx_unused bBackward
,
244 int gmx_unused shift
,
245 void gmx_unused
* buf_s
,
246 int gmx_unused nbyte_s
,
247 void gmx_unused
* buf_r
,
248 int gmx_unused nbyte_r
)
256 dest
= atc
->slabCommSetup
[shift
].node_dest
;
257 src
= atc
->slabCommSetup
[shift
].node_src
;
261 dest
= atc
->slabCommSetup
[shift
].node_src
;
262 src
= atc
->slabCommSetup
[shift
].node_dest
;
265 if (nbyte_s
> 0 && nbyte_r
> 0)
267 MPI_Sendrecv(buf_s
, nbyte_s
, MPI_BYTE
, dest
, shift
, buf_r
, nbyte_r
, MPI_BYTE
, src
, shift
,
268 atc
->mpi_comm
, &stat
);
270 else if (nbyte_s
> 0)
272 MPI_Send(buf_s
, nbyte_s
, MPI_BYTE
, dest
, shift
, atc
->mpi_comm
);
274 else if (nbyte_r
> 0)
276 MPI_Recv(buf_r
, nbyte_r
, MPI_BYTE
, src
, shift
, atc
->mpi_comm
, &stat
);
281 //! Redistristributes \p data and optionally coordinates between MPI ranks
282 static void dd_pmeredist_pos_coeffs(gmx_pme_t
* pme
,
284 gmx::ArrayRef
<const gmx::RVec
> x
,
288 int nnodes_comm
, i
, local_pos
, buf_pos
, node
;
290 nnodes_comm
= std::min(2 * atc
->maxshift
, atc
->nslab
- 1);
292 auto sendCount
= atc
->sendCount();
294 for (i
= 0; i
< nnodes_comm
; i
++)
296 const int commnode
= atc
->slabCommSetup
[i
].node_dest
;
297 atc
->slabCommSetup
[commnode
].buf_index
= nsend
;
298 nsend
+= sendCount
[commnode
];
302 if (sendCount
[atc
->nodeid
] + nsend
!= x
.ssize())
306 "%zd particles communicated to PME rank %d are more than 2/3 times the cut-off "
307 "out of the domain decomposition cell of their charge group in dimension %c.\n"
308 "This usually means that your system is not well equilibrated.",
309 x
.ssize() - (sendCount
[atc
->nodeid
] + nsend
), pme
->nodeid
, 'x' + atc
->dimind
);
312 if (nsend
> pme
->buf_nalloc
)
314 pme
->buf_nalloc
= over_alloc_dd(nsend
);
315 srenew(pme
->bufv
, pme
->buf_nalloc
);
316 srenew(pme
->bufr
, pme
->buf_nalloc
);
319 int numAtoms
= sendCount
[atc
->nodeid
];
320 for (i
= 0; i
< nnodes_comm
; i
++)
322 const int commnode
= atc
->slabCommSetup
[i
].node_dest
;
323 int scount
= sendCount
[commnode
];
324 /* Communicate the count */
327 fprintf(debug
, "dimind %d PME rank %d send to rank %d: %d\n", atc
->dimind
,
328 atc
->nodeid
, commnode
, scount
);
330 pme_dd_sendrecv(atc
, FALSE
, i
, &scount
, sizeof(int), &atc
->slabCommSetup
[i
].rcount
,
332 numAtoms
+= atc
->slabCommSetup
[i
].rcount
;
335 atc
->setNumAtoms(numAtoms
);
339 for (gmx::index i
= 0; i
< x
.ssize(); i
++)
342 if (node
== atc
->nodeid
)
344 /* Copy direct to the receive buffer */
347 copy_rvec(x
[i
], atc
->xBuffer
[local_pos
]);
349 atc
->coefficientBuffer
[local_pos
] = data
[i
];
354 /* Copy to the send buffer */
355 int& buf_index
= atc
->slabCommSetup
[node
].buf_index
;
358 copy_rvec(x
[i
], pme
->bufv
[buf_index
]);
360 pme
->bufr
[buf_index
] = data
[i
];
366 for (i
= 0; i
< nnodes_comm
; i
++)
368 const int scount
= atc
->sendCount()[atc
->slabCommSetup
[i
].node_dest
];
369 const int rcount
= atc
->slabCommSetup
[i
].rcount
;
370 if (scount
> 0 || rcount
> 0)
374 /* Communicate the coordinates */
375 pme_dd_sendrecv(atc
, FALSE
, i
, pme
->bufv
+ buf_pos
, scount
* sizeof(rvec
),
376 atc
->xBuffer
.data() + local_pos
, rcount
* sizeof(rvec
));
378 /* Communicate the coefficients */
379 pme_dd_sendrecv(atc
, FALSE
, i
, pme
->bufr
+ buf_pos
, scount
* sizeof(real
),
380 atc
->coefficientBuffer
.data() + local_pos
, rcount
* sizeof(real
));
382 local_pos
+= atc
->slabCommSetup
[i
].rcount
;
385 GMX_ASSERT(local_pos
== atc
->numAtoms(), "After receiving we should have numAtoms coordinates");
388 void dd_pmeredist_f(struct gmx_pme_t
* pme
, PmeAtomComm
* atc
, gmx::ArrayRef
<gmx::RVec
> f
, gmx_bool bAddF
)
390 int nnodes_comm
, local_pos
, buf_pos
, i
, node
;
392 nnodes_comm
= std::min(2 * atc
->maxshift
, atc
->nslab
- 1);
394 local_pos
= atc
->sendCount()[atc
->nodeid
];
396 for (i
= 0; i
< nnodes_comm
; i
++)
398 const int commnode
= atc
->slabCommSetup
[i
].node_dest
;
399 const int scount
= atc
->slabCommSetup
[i
].rcount
;
400 const int rcount
= atc
->sendCount()[commnode
];
401 if (scount
> 0 || rcount
> 0)
403 /* Communicate the forces */
404 pme_dd_sendrecv(atc
, TRUE
, i
, atc
->f
.data() + local_pos
, scount
* sizeof(rvec
),
405 pme
->bufv
+ buf_pos
, rcount
* sizeof(rvec
));
408 atc
->slabCommSetup
[commnode
].buf_index
= buf_pos
;
415 for (gmx::index i
= 0; i
< f
.ssize(); i
++)
418 if (node
== atc
->nodeid
)
420 /* Add from the local force array */
421 rvec_inc(f
[i
], atc
->f
[local_pos
]);
426 /* Add from the receive buffer */
427 rvec_inc(f
[i
], pme
->bufv
[atc
->slabCommSetup
[node
].buf_index
]);
428 atc
->slabCommSetup
[node
].buf_index
++;
434 for (gmx::index i
= 0; i
< f
.ssize(); i
++)
437 if (node
== atc
->nodeid
)
439 /* Copy from the local force array */
440 copy_rvec(atc
->f
[local_pos
], f
[i
]);
445 /* Copy from the receive buffer */
446 copy_rvec(pme
->bufv
[atc
->slabCommSetup
[node
].buf_index
], f
[i
]);
447 atc
->slabCommSetup
[node
].buf_index
++;
453 void do_redist_pos_coeffs(struct gmx_pme_t
* pme
,
456 gmx::ArrayRef
<const gmx::RVec
> x
,
459 for (int d
= pme
->ndecompdim
- 1; d
>= 0; d
--)
461 gmx::ArrayRef
<const gmx::RVec
> xRef
;
463 if (d
== pme
->ndecompdim
- 1)
465 /* Start out with the local coordinates and charges */
471 /* Redistribute the data collected along the previous dimension */
472 const PmeAtomComm
& atc
= pme
->atc
[d
+ 1];
474 param_d
= atc
.coefficient
.data();
476 PmeAtomComm
& atc
= pme
->atc
[d
];
477 atc
.pd
.resize(xRef
.size());
478 pme_calc_pidx_wrapper(xRef
, pme
->recipbox
, &atc
);
479 /* Redistribute x (only once) and qA/c6A or qB/c6B */
480 if (DOMAINDECOMP(cr
))
482 dd_pmeredist_pos_coeffs(pme
, bFirst
, xRef
, param_d
, &atc
);